test(connlib): relax site online/unknown assertions (#10963)

Within the proptests, we assert that connlib correctly reports the
online status of resources. For resources where we establish TCP
connections, that is difficult to model exactly due to the number of
error cases we also run through in the tests like rejecting connections
with ICMP errors, rebooting relays etc.

Up until now, we tried to model this quite precisely by only allowing
deviations of the resource status for TCP connections that have received
ICMP errors. With the prolonged ICE timeout on the Gateway, this is
turning out to not be enough.

We therefore relax the assertion here to allow all resources that are
within sites that we made a TCP connection to deviate from their
expected online/unknown status.
This commit is contained in:
Thomas Eizinger
2025-11-25 15:15:14 +11:00
committed by GitHub
parent bcf4ccf817
commit d6080e3ab1
4 changed files with 38 additions and 30 deletions

View File

@@ -253,3 +253,4 @@ cc 6c01d07f2820e879d267944e063a55d0f4f07777a925a532f9da312b9b398db1
cc 9fac3117f02193b4b1dfbbfd02fa6062c8f62eb5c94d36d9bd9c694dcda5ddab
cc 659d967c73dae7e617a63ff86978636ef8aac193b70236adb823354067523629
cc 1c531f3260ae1f80cf5ab4fb8c0cef5dd1d468ed4da1ff7170a12d203b5546a5
cc 1788a0e92cbc246bf55a3a5e8720069309bbd3e010e4dd5babe851dedc49bfaa

View File

@@ -1,6 +1,6 @@
//! Internal model of resources as used by connlib's client code.
use std::collections::BTreeSet;
use std::{collections::BTreeSet, fmt};
use connlib_model::{
CidrResourceView, DnsResourceView, InternetResourceView, IpStack, ResourceId, ResourceStatus,
@@ -139,7 +139,9 @@ impl Resource {
}
/// Returns the [`Site`] of a [`Resource`] if there is exactly one site.
pub fn site(&self) -> Result<&Site, itertools::ExactlyOneError<impl Iterator<Item = &Site>>> {
pub fn site(
&self,
) -> Result<&Site, itertools::ExactlyOneError<impl Iterator<Item = &Site> + fmt::Debug>> {
self.sites().into_iter().exactly_one()
}

View File

@@ -138,23 +138,21 @@ pub(crate) fn assert_tcp_connections(ref_client: &RefClient, sim_client: &SimCli
pub(crate) fn assert_resource_status(ref_client: &RefClient, sim_client: &SimClient) {
use connlib_model::ResourceStatus::*;
let (expected_status_map, tcp_resources) = &ref_client
.expected_resource_status(|tuple| sim_client.failed_tcp_packets.contains_key(&tuple));
let expected_status_map = &ref_client.expected_resource_status();
let actual_status_map = &sim_client.resource_status;
let maybe_online_resources = ref_client.maybe_online_resources();
if expected_status_map != actual_status_map {
for (resource, expected_status) in expected_status_map {
match actual_status_map.get(resource) {
// For resources with TCP connections, the expected status might be wrong.
// We generally expect them to always be online because the TCP client sends its own keep-alive's.
// However, if we have sent an ICMP error back, the client may have given up and therefore it is okay for the site to be in `Unknown` then.
Some(&Online)
if expected_status == &Unknown && tcp_resources.contains(resource) => {}
if expected_status == &Unknown && maybe_online_resources.contains(resource) => {
}
Some(&Unknown)
if expected_status == &Online && tcp_resources.contains(resource) => {}
if expected_status == &Online && maybe_online_resources.contains(resource) => {}
Some(actual_status) if actual_status != expected_status => {
tracing::error!(target: "assertions", %expected_status, %actual_status, %resource, "Resource status doesn't match");
tracing::error!(target: "assertions", %expected_status, %actual_status, %resource, ?maybe_online_resources, "Resource status doesn't match");
}
Some(_) => {}
None => {

View File

@@ -716,24 +716,8 @@ impl RefClient {
}
}
pub(crate) fn expected_resource_status(
&self,
has_failed_tcp_connection: impl Fn((SPort, DPort)) -> bool,
) -> (BTreeMap<ResourceId, ResourceStatus>, BTreeSet<ResourceId>) {
let resources_with_failed_tcp_connections = self
.expected_tcp_connections
.iter()
.filter(|((_, _, sport, dport), _)| has_failed_tcp_connection((*sport, *dport)))
.filter_map(|(_, resource)| self.site_for_resource(*resource))
.flat_map(|site| {
self.resources
.iter()
.filter_map(move |r| r.sites().contains(&site).then_some(r.id()))
})
.collect();
let resource_status = self
.resources
pub(crate) fn expected_resource_status(&self) -> BTreeMap<ResourceId, ResourceStatus> {
self.resources
.iter()
.filter_map(|r| {
let status = self
@@ -744,9 +728,32 @@ impl RefClient {
Some((r.id(), status))
})
.collect();
.collect()
}
(resource_status, resources_with_failed_tcp_connections)
/// Returns the list of resources where we are not "sure" whether they are online or unknown.
///
/// Resources with TCP connections have an automatic retry and therefore, modelling their exact online/unknown state is difficult.
pub(crate) fn maybe_online_resources(&self) -> BTreeSet<ResourceId> {
let resources_with_tcp_connections = self
.expected_tcp_connections
.values()
.copied()
.collect::<BTreeSet<_>>();
let maybe_online_sites = resources_with_tcp_connections
.into_iter()
.flat_map(|r| self.site_for_resource(r))
.collect::<BTreeSet<_>>();
self.resources
.iter()
.filter_map(move |r| {
maybe_online_sites
.contains(r.site().unwrap())
.then_some(r.id())
})
.collect()
}
pub(crate) fn tunnel_ip_for(&self, dst: IpAddr) -> IpAddr {