fix(gateway): answer with empty list of addresses on DNS resolution failure (#4266)

Currently, a failure during DNS resolution results in the client hanging
during the connection setup. Instead, we fall back to an empty list
which results in an empty DNS query result for the client.

That in turn will make most application consider the DNS request failed.
As far as I know, we don't currently retry these DNS requests, meaning a
user would have to sign-in and out again to fix this state.

Whilst not ideal, I think this is a better behaviour and what we
currently have where the initial connection just hangs.
This commit is contained in:
Thomas Eizinger
2024-03-23 09:16:38 +11:00
committed by GitHub
parent 248abffc2d
commit e8f2320d08
4 changed files with 132 additions and 127 deletions

View File

@@ -147,6 +147,25 @@ pub enum ResourceDescription<TDNS = ResourceDescriptionDns> {
Cidr(ResourceDescriptionCidr),
}
impl ResourceDescription<ResourceDescriptionDns> {
pub fn into_resolved(
self,
addresses: Vec<IpNetwork>,
) -> ResourceDescription<ResolvedResourceDescriptionDns> {
match self {
ResourceDescription::Dns(ResourceDescriptionDns { id, address, name }) => {
ResourceDescription::Dns(ResolvedResourceDescriptionDns {
id,
domain: address,
name,
addresses,
})
}
ResourceDescription::Cidr(c) => ResourceDescription::Cidr(c),
}
}
}
impl PartialOrd for ResourceDescription {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
@@ -207,6 +226,20 @@ pub struct ResourceDescriptionDns {
pub name: String,
}
/// Description of a resource that maps to a DNS record which had its domain already resolved.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct ResolvedResourceDescriptionDns {
pub id: ResourceId,
/// Internal resource's domain name.
pub domain: String,
/// Name of the resource.
///
/// Used only for display.
pub name: String,
pub addresses: Vec<IpNetwork>,
}
impl ResourceDescription {
pub fn dns_name(&self) -> Option<&str> {
match self {

View File

@@ -7,7 +7,7 @@ use boringtun::x25519::PublicKey;
use chrono::{DateTime, Utc};
use connlib_shared::messages::{
Answer, ClientId, ConnectionAccepted, DomainResponse, Interface as InterfaceConfig, Key, Offer,
Relay, ResourceId,
Relay, ResolvedResourceDescriptionDns, ResourceDescription, ResourceId,
};
use connlib_shared::{Callbacks, Dname, Error, Result, StaticSecret};
use ip_network::IpNetwork;
@@ -22,23 +22,6 @@ const PEERS_IPV6: &str = "fd00:2021:1111::/107";
const EXPIRE_RESOURCES_INTERVAL: Duration = Duration::from_secs(1);
/// Description of a resource that maps to a DNS record which had its domain already resolved.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct ResolvedResourceDescriptionDns {
pub id: ResourceId,
/// Internal resource's domain name.
pub domain: String,
/// Name of the resource.
///
/// Used only for display.
pub name: String,
pub addresses: Vec<IpNetwork>,
}
pub type ResourceDescription =
connlib_shared::messages::ResourceDescription<ResolvedResourceDescriptionDns>;
impl<CB> GatewayTunnel<CB>
where
CB: Callbacks + 'static,
@@ -81,7 +64,7 @@ where
relays: Vec<Relay>,
domain: Option<Dname>,
expires_at: Option<DateTime<Utc>>,
resource: ResourceDescription,
resource: ResourceDescription<ResolvedResourceDescriptionDns>,
) -> Result<ConnectionAccepted> {
let (resource_addresses, id) = match &resource {
ResourceDescription::Dns(r) => {
@@ -137,7 +120,7 @@ where
pub fn allow_access(
&mut self,
resource: ResourceDescription,
resource: ResourceDescription<ResolvedResourceDescriptionDns>,
client: ClientId,
expires_at: Option<DateTime<Utc>>,
domain: Option<Dname>,

View File

@@ -16,7 +16,7 @@ use std::{
};
pub use client::{ClientState, Request};
pub use gateway::{GatewayState, ResolvedResourceDescriptionDns};
pub use gateway::GatewayState;
mod client;
mod device_channel;

View File

@@ -3,16 +3,17 @@ use crate::messages::{
EgressMessages, IngressMessages, RejectAccess, RequestConnection,
};
use crate::CallbackHandler;
use anyhow::{bail, Result};
use anyhow::Result;
use boringtun::x25519::PublicKey;
use connlib_shared::{
messages::{GatewayResponse, ResourceAccepted, ResourceDescription},
messages::{GatewayResponse, ResourceAccepted},
Dname,
};
#[cfg(not(target_os = "windows"))]
use dns_lookup::{AddrInfoHints, AddrInfoIter, LookupError};
use either::Either;
use firezone_tunnel::{GatewayTunnel, ResolvedResourceDescriptionDns};
use firezone_tunnel::GatewayTunnel;
use futures_bounded::Timeout;
use ip_network::IpNetwork;
use phoenix_channel::PhoenixChannel;
use std::convert::Infallible;
@@ -25,10 +26,8 @@ pub struct Eventloop {
tunnel: GatewayTunnel<CallbackHandler>,
portal: PhoenixChannel<(), IngressMessages, EgressMessages>,
resolve_tasks: futures_bounded::FuturesTupleSet<
Result<ResourceDescription<ResolvedResourceDescriptionDns>>,
Either<RequestConnection, AllowAccess>,
>,
resolve_tasks:
futures_bounded::FuturesTupleSet<Vec<IpNetwork>, Either<RequestConnection, AllowAccess>>,
}
impl Eventloop {
@@ -73,77 +72,13 @@ impl Eventloop {
}
match self.resolve_tasks.poll_unpin(cx) {
Poll::Ready((Ok(Ok(resource)), Either::Left(req))) => {
let ips = req.client.peer.ips();
match self.tunnel.accept(
req.client.id,
req.client.peer.preshared_key,
req.client.payload.ice_parameters,
PublicKey::from(req.client.peer.public_key.0),
ips,
req.relays,
req.client.payload.domain,
req.expires_at,
resource,
) {
Ok(accepted) => {
self.portal.send(
PHOENIX_TOPIC,
EgressMessages::ConnectionReady(ConnectionReady {
reference: req.reference,
gateway_payload: GatewayResponse::ConnectionAccepted(accepted),
}),
);
// TODO: If outbound request fails, cleanup connection.
}
Err(e) => {
let client = req.client.id;
self.tunnel.cleanup_connection(&client);
tracing::debug!(%client, "Connection request failed: {:#}", anyhow::Error::new(e));
}
}
Poll::Ready((result, Either::Left(req))) => {
self.accept_connection(result, req);
continue;
}
Poll::Ready((Ok(Ok(resource)), Either::Right(req))) => {
let maybe_domain_response = self.tunnel.allow_access(
resource,
req.client_id,
req.expires_at,
req.payload,
);
Poll::Ready((result, Either::Right(req))) => {
self.allow_access(result, req);
if let Some(domain_response) = maybe_domain_response {
self.portal.send(
PHOENIX_TOPIC,
EgressMessages::ConnectionReady(ConnectionReady {
reference: req.reference,
gateway_payload: GatewayResponse::ResourceAccepted(
ResourceAccepted { domain_response },
),
}),
);
}
continue;
}
Poll::Ready((Ok(Err(dns_error)), Either::Left(req))) => {
tracing::debug!(client = %req.client.id, reference = %req.reference, "Failed to resolve domains as part of connection request: {dns_error}");
continue;
}
Poll::Ready((Ok(Err(dns_error)), Either::Right(req))) => {
tracing::debug!(client = %req.client_id, reference = %req.reference, "Failed to resolve domains as part of allow access request: {dns_error}");
continue;
}
Poll::Ready((Err(dns_timeout), Either::Left(req))) => {
tracing::debug!(client = %req.client.id, reference = %req.reference, "DNS resolution timed out as part of connection request: {dns_timeout}");
continue;
}
Poll::Ready((Err(dns_timeout), Either::Right(req))) => {
tracing::debug!(client = %req.client_id, reference = %req.reference, "DNS resolution timed out as part of allow access request: {dns_timeout}");
continue;
}
Poll::Pending => {}
@@ -156,10 +91,7 @@ impl Eventloop {
if self
.resolve_tasks
.try_push(
resolve_resource_description(
req.resource.clone(),
req.client.payload.domain.clone(),
),
resolve(req.client.payload.domain.clone()),
Either::Left(req),
)
.is_err()
@@ -175,10 +107,7 @@ impl Eventloop {
}) => {
if self
.resolve_tasks
.try_push(
resolve_resource_description(req.resource.clone(), req.payload.clone()),
Either::Right(req),
)
.try_push(resolve(req.payload.clone()), Either::Right(req))
.is_err()
{
tracing::warn!("Too many allow access requests, dropping existing one");
@@ -228,34 +157,94 @@ impl Eventloop {
return Poll::Pending;
}
}
pub fn accept_connection(
&mut self,
result: Result<Vec<IpNetwork>, Timeout>,
req: RequestConnection,
) {
let addresses = result
.inspect_err(|e| tracing::debug!(client = %req.client.id, reference = %req.reference, "DNS resolution timed out as part of connection request: {e}"))
.unwrap_or_default();
let ips = req.client.peer.ips();
match self.tunnel.accept(
req.client.id,
req.client.peer.preshared_key,
req.client.payload.ice_parameters,
PublicKey::from(req.client.peer.public_key.0),
ips,
req.relays,
req.client.payload.domain,
req.expires_at,
req.resource.into_resolved(addresses),
) {
Ok(accepted) => {
self.portal.send(
PHOENIX_TOPIC,
EgressMessages::ConnectionReady(ConnectionReady {
reference: req.reference,
gateway_payload: GatewayResponse::ConnectionAccepted(accepted),
}),
);
// TODO: If outbound request fails, cleanup connection.
}
Err(e) => {
let client = req.client.id;
self.tunnel.cleanup_connection(&client);
tracing::debug!(%client, "Connection request failed: {:#}", anyhow::Error::new(e));
}
}
}
pub fn allow_access(&mut self, result: Result<Vec<IpNetwork>, Timeout>, req: AllowAccess) {
let addresses = result
.inspect_err(|e| tracing::debug!(client = %req.client_id, reference = %req.reference, "DNS resolution timed out as part of allow access request: {e}"))
.unwrap_or_default();
let maybe_domain_response = self.tunnel.allow_access(
req.resource.into_resolved(addresses),
req.client_id,
req.expires_at,
req.payload,
);
if let Some(domain_response) = maybe_domain_response {
self.portal.send(
PHOENIX_TOPIC,
EgressMessages::ConnectionReady(ConnectionReady {
reference: req.reference,
gateway_payload: GatewayResponse::ResourceAccepted(ResourceAccepted {
domain_response,
}),
}),
);
}
}
}
async fn resolve_resource_description(
resource: ResourceDescription,
domain: Option<Dname>,
) -> Result<ResourceDescription<ResolvedResourceDescriptionDns>> {
match resource {
ResourceDescription::Dns(dns) => {
let Some(domain) = domain.clone() else {
debug_assert!(
false,
"We should never get a DNS resource access request without the subdomain"
);
bail!("Protocol error: Request for DNS resource without the subdomain being tried to access.")
};
async fn resolve(domain: Option<Dname>) -> Vec<IpNetwork> {
let Some(domain) = domain.clone() else {
return vec![];
};
let addresses =
tokio::task::spawn_blocking(move || resolve_addresses(&domain.to_string()))
.await??;
let dname = domain.to_string();
Ok(ResourceDescription::Dns(ResolvedResourceDescriptionDns {
id: dns.id,
domain: dns.address,
name: dns.name,
addresses,
}))
match tokio::task::spawn_blocking(move || resolve_addresses(&dname)).await {
Ok(Ok(addresses)) => addresses,
Ok(Err(e)) => {
tracing::warn!("Failed to resolve '{domain}': {e}");
vec![]
}
Err(e) => {
tracing::warn!("Failed to resolve '{domain}': {e}");
vec![]
}
ResourceDescription::Cidr(cdir) => Ok(ResourceDescription::Cidr(cdir)),
}
}