mirror of
https://github.com/outbackdingo/firezone.git
synced 2026-01-27 18:18:55 +00:00
feat(gateway): use hickory resolver to resolve A/AAAA queries (#10373)
At present, the Gateway performs DNS resolution for A & AAAA queries via `libc`. The `resolve` system call only provides us with the resolved IPs but not any of the metadata around the query such as TTL. As a result, we can only cache DNS queries for a static amount of time, currently 30s. It would be more correct to cache them for their TTL instead. To do so, we re-introduce `hickory-resolver` to our codebase. Deliberately, we only use it for resolving A and AAAA records on the Gateway for now. DNS resolution for SRV & TXT records happens one layer below and uses the same infrastructure as DNS resolution on the Client. Merging this is difficult however because the Gateway still supports the control protocol of 1.3.x clients. That one requires DNS resolution prior to setting up the connection of DNS resources which means it needs to happen in the event-loop of the Gateway binary and cannot be moved into the `Tunnel` where DNS resolution for Client and SRV/TXT records happen. Once we can drop support for 1.3.x clients, this Gateway's event-loop will simplify drastically which will allow us to refactor this to a more unified approach of DNS resolution. Until then, we can at least fix the hardcoded TTL by using `hickory-resolver` in the event-loop. The functionality is guarded behind a feature-flag which - as usual - is off by default (i.e. for as long as we haven't fetched the flags). The feature flag is already configured to `true` for staging and production so we can test the new behaviour. Resolves: #8232 Related: #10385
This commit is contained in:
93
rust/Cargo.lock
generated
93
rust/Cargo.lock
generated
@@ -1578,6 +1578,12 @@ dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "critical-section"
|
||||
version = "1.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "790eea4361631c5e7d22598ecd5723ff611904e3344ce8720784c93e3d83d40b"
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-channel"
|
||||
version = "0.5.15"
|
||||
@@ -2035,14 +2041,14 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "dns-lookup"
|
||||
version = "2.0.4"
|
||||
version = "2.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e5766087c2235fec47fafa4cfecc81e494ee679d0fd4a59887ea0919bfb0e4fc"
|
||||
checksum = "cf5597a4b7fe5275fc9dcf88ce26326bc8e4cb87d0130f33752d4c5f717793cf"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"socket2 0.5.10",
|
||||
"windows-sys 0.48.0",
|
||||
"socket2 0.6.0",
|
||||
"windows-sys 0.60.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2197,6 +2203,18 @@ version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a3d8a32ae18130a3c84dd492d4215c3d913c3b07c6b63c2eb3eb7ff1101ab7bf"
|
||||
|
||||
[[package]]
|
||||
name = "enum-as-inner"
|
||||
version = "0.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a1e6a265c649f3f5979b601d26f1d05ada116434c87741c9493cb56218f76cbc"
|
||||
dependencies = [
|
||||
"heck 0.5.0",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.106",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "enumflags2"
|
||||
version = "0.7.11"
|
||||
@@ -2402,6 +2420,7 @@ dependencies = [
|
||||
"firezone-tunnel",
|
||||
"futures",
|
||||
"futures-bounded",
|
||||
"hickory-resolver",
|
||||
"ip-packet",
|
||||
"ip_network",
|
||||
"libc",
|
||||
@@ -3361,6 +3380,52 @@ version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6fe2267d4ed49bc07b63801559be28c718ea06c4738b7a03c94df7386d2cde46"
|
||||
|
||||
[[package]]
|
||||
name = "hickory-proto"
|
||||
version = "0.25.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f8a6fe56c0038198998a6f217ca4e7ef3a5e51f46163bd6dd60b5c71ca6c6502"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"cfg-if",
|
||||
"data-encoding",
|
||||
"enum-as-inner",
|
||||
"futures-channel",
|
||||
"futures-io",
|
||||
"futures-util",
|
||||
"idna",
|
||||
"ipnet",
|
||||
"once_cell",
|
||||
"rand 0.9.1",
|
||||
"ring",
|
||||
"thiserror 2.0.16",
|
||||
"tinyvec",
|
||||
"tokio",
|
||||
"tracing",
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hickory-resolver"
|
||||
version = "0.25.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dc62a9a99b0bfb44d2ab95a7208ac952d31060efc16241c87eaf36406fecf87a"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"futures-util",
|
||||
"hickory-proto",
|
||||
"ipconfig",
|
||||
"moka",
|
||||
"once_cell",
|
||||
"parking_lot",
|
||||
"rand 0.9.1",
|
||||
"resolv-conf",
|
||||
"smallvec",
|
||||
"thiserror 2.0.16",
|
||||
"tokio",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hkdf"
|
||||
version = "0.12.4"
|
||||
@@ -4161,7 +4226,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6a793df0d7afeac54f95b471d3af7f0d4fb975699f972341a4b76988d49cdf0c"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"windows-targets 0.53.0",
|
||||
"windows-targets 0.52.6",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -4975,6 +5040,10 @@ name = "once_cell"
|
||||
version = "1.21.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
|
||||
dependencies = [
|
||||
"critical-section",
|
||||
"portable-atomic",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "opaque-debug"
|
||||
@@ -9314,6 +9383,15 @@ dependencies = [
|
||||
"windows-targets 0.52.6",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-sys"
|
||||
version = "0.60.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb"
|
||||
dependencies = [
|
||||
"windows-targets 0.53.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-targets"
|
||||
version = "0.42.2"
|
||||
@@ -9362,10 +9440,11 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "windows-targets"
|
||||
version = "0.53.0"
|
||||
version = "0.53.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b1e4c7e8ceaaf9cb7d7507c974735728ab453b67ef8f18febdd7c11fe59dca8b"
|
||||
checksum = "d5fe6031c4041849d7c496a8ded650796e7b6ecc19df1a431c1a363342e5dc91"
|
||||
dependencies = [
|
||||
"windows-link 0.1.3",
|
||||
"windows_aarch64_gnullvm 0.53.0",
|
||||
"windows_aarch64_msvc 0.53.0",
|
||||
"windows_i686_gnu 0.53.0",
|
||||
|
||||
@@ -93,6 +93,7 @@ glob = "0.3.3"
|
||||
hex = "0.4.3"
|
||||
hex-display = "0.3.0"
|
||||
hex-literal = "0.4.1"
|
||||
hickory-resolver = "0.25.2"
|
||||
humantime = "2.3"
|
||||
ip-packet = { path = "connlib/ip-packet" }
|
||||
ip_network = { version = "0.4", default-features = false }
|
||||
|
||||
@@ -21,6 +21,7 @@ firezone-telemetry = { workspace = true }
|
||||
firezone-tunnel = { workspace = true }
|
||||
futures = { workspace = true }
|
||||
futures-bounded = { workspace = true }
|
||||
hickory-resolver = { workspace = true }
|
||||
ip-packet = { workspace = true }
|
||||
ip_network = { workspace = true }
|
||||
libc = { workspace = true, features = ["std", "const-extern-fn", "extra_traits"] }
|
||||
|
||||
@@ -16,6 +16,8 @@ use firezone_tunnel::{
|
||||
DnsResourceNatEntry, GatewayEvent, GatewayTunnel, IPV4_TUNNEL, IPV6_TUNNEL, IpConfig,
|
||||
ResolveDnsRequest, TunnelError,
|
||||
};
|
||||
use futures::FutureExt as _;
|
||||
use hickory_resolver::TokioResolver;
|
||||
use phoenix_channel::{PhoenixChannel, PublicKeyParam};
|
||||
use std::collections::{BTreeMap, BTreeSet};
|
||||
use std::future::{self, Future, poll_fn};
|
||||
@@ -32,7 +34,7 @@ use crate::RELEASE;
|
||||
|
||||
pub const PHOENIX_TOPIC: &str = "gateway";
|
||||
|
||||
/// How long we allow a DNS resolution via `libc::get_addr_info`.
|
||||
/// How long we allow a DNS resolution via hickory.
|
||||
const DNS_RESOLUTION_TIMEOUT: Duration = Duration::from_secs(10);
|
||||
|
||||
/// Cache DNS responses for 30 seconds.
|
||||
@@ -55,6 +57,7 @@ pub struct Eventloop {
|
||||
// Tunnel is `Option` because we need to take ownership on shutdown.
|
||||
tunnel: Option<GatewayTunnel>,
|
||||
tun_device_manager: TunDeviceManager,
|
||||
resolver: TokioResolver,
|
||||
|
||||
resolve_tasks:
|
||||
futures_bounded::FuturesTupleSet<Result<Vec<IpAddr>, Arc<anyhow::Error>>, ResolveTrigger>,
|
||||
@@ -79,6 +82,7 @@ impl Eventloop {
|
||||
tunnel: GatewayTunnel,
|
||||
mut portal: PhoenixChannel<(), IngressMessages, PublicKeyParam>,
|
||||
tun_device_manager: TunDeviceManager,
|
||||
resolver: TokioResolver,
|
||||
) -> Result<Self> {
|
||||
portal.connect(PublicKeyParam(tunnel.public_key().to_bytes()));
|
||||
|
||||
@@ -94,6 +98,7 @@ impl Eventloop {
|
||||
Ok(Self {
|
||||
tunnel: Some(tunnel),
|
||||
tun_device_manager,
|
||||
resolver,
|
||||
resolve_tasks: futures_bounded::FuturesTupleSet::new(DNS_RESOLUTION_TIMEOUT, 1000),
|
||||
logged_permission_denied: false,
|
||||
dns_cache: moka::future::Cache::builder()
|
||||
@@ -651,10 +656,26 @@ impl Eventloop {
|
||||
&self,
|
||||
domain: DomainName,
|
||||
) -> impl Future<Output = Result<Vec<IpAddr>, Arc<anyhow::Error>>> + use<> {
|
||||
let do_resolve = resolve(domain.clone());
|
||||
let cache = self.dns_cache.clone();
|
||||
if firezone_telemetry::feature_flags::gateway_userspace_dns_a_aaaa_records() {
|
||||
let resolver = self.resolver.clone();
|
||||
|
||||
async move { cache.try_get_with(domain, do_resolve).await }
|
||||
async move {
|
||||
let ips = resolver
|
||||
.lookup_ip(domain.to_string())
|
||||
.await
|
||||
.with_context(|| format!("Failed to lookup domain '{domain}'"))?
|
||||
.iter()
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
Ok(ips)
|
||||
}
|
||||
.boxed()
|
||||
} else {
|
||||
let do_resolve = resolve(domain.clone());
|
||||
let cache = self.dns_cache.clone();
|
||||
|
||||
async move { cache.try_get_with(domain, do_resolve).await }.boxed()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -11,6 +11,7 @@ use firezone_telemetry::{
|
||||
MaybePushMetricsExporter, NoopPushMetricsExporter, Telemetry, feature_flags, otel,
|
||||
};
|
||||
use firezone_tunnel::GatewayTunnel;
|
||||
use hickory_resolver::config::ResolveHosts;
|
||||
use ip_packet::IpPacket;
|
||||
use opentelemetry_otlp::WithExportConfig;
|
||||
use opentelemetry_sdk::metrics::SdkMeterProvider;
|
||||
@@ -210,7 +211,13 @@ async fn try_main(cli: Cli, telemetry: &mut Telemetry) -> Result<()> {
|
||||
|| true,
|
||||
));
|
||||
|
||||
Eventloop::new(tunnel, portal, tun_device_manager)?
|
||||
let mut resolver_builder = hickory_resolver::TokioResolver::builder_tokio()?;
|
||||
resolver_builder.options_mut().cache_size = 512;
|
||||
resolver_builder.options_mut().use_hosts_file = ResolveHosts::Always;
|
||||
|
||||
let resolver = resolver_builder.build();
|
||||
|
||||
Eventloop::new(tunnel, portal, tun_device_manager, resolver)?
|
||||
.run()
|
||||
.await?;
|
||||
|
||||
|
||||
@@ -40,6 +40,10 @@ pub fn map_enobufs_to_would_block() -> bool {
|
||||
FEATURE_FLAGS.map_enobufs_to_wouldblock()
|
||||
}
|
||||
|
||||
pub fn gateway_userspace_dns_a_aaaa_records() -> bool {
|
||||
FEATURE_FLAGS.gateway_userspace_dns_a_aaaa_records()
|
||||
}
|
||||
|
||||
pub fn export_metrics() -> bool {
|
||||
false // Placeholder until we actually deploy an OTEL collector.
|
||||
}
|
||||
@@ -160,6 +164,8 @@ struct FeatureFlagsResponse {
|
||||
stream_logs: bool,
|
||||
#[serde(default)]
|
||||
map_enobufs_to_wouldblock: bool,
|
||||
#[serde(default)]
|
||||
gateway_userspace_dns_a_aaaa_records: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Default, Clone)]
|
||||
@@ -175,6 +181,7 @@ struct FeatureFlags {
|
||||
drop_llmnr_nxdomain_responses: AtomicBool,
|
||||
stream_logs: RwLock<LogFilter>,
|
||||
map_enobufs_to_wouldblock: AtomicBool,
|
||||
gateway_userspace_dns_a_aaaa_records: AtomicBool,
|
||||
}
|
||||
|
||||
/// Accessors to the actual feature flags.
|
||||
@@ -191,6 +198,7 @@ impl FeatureFlags {
|
||||
drop_llmnr_nxdomain_responses,
|
||||
stream_logs,
|
||||
map_enobufs_to_wouldblock,
|
||||
gateway_userspace_dns_a_aaaa_records,
|
||||
}: FeatureFlagsResponse,
|
||||
payloads: FeatureFlagPayloadsResponse,
|
||||
) {
|
||||
@@ -200,6 +208,8 @@ impl FeatureFlags {
|
||||
.store(drop_llmnr_nxdomain_responses, Ordering::Relaxed);
|
||||
self.map_enobufs_to_wouldblock
|
||||
.store(map_enobufs_to_wouldblock, Ordering::Relaxed);
|
||||
self.gateway_userspace_dns_a_aaaa_records
|
||||
.store(gateway_userspace_dns_a_aaaa_records, Ordering::Relaxed);
|
||||
|
||||
let log_filter = if stream_logs {
|
||||
LogFilter::parse(payloads.stream_logs)
|
||||
@@ -226,6 +236,11 @@ impl FeatureFlags {
|
||||
fn map_enobufs_to_wouldblock(&self) -> bool {
|
||||
self.map_enobufs_to_wouldblock.load(Ordering::Relaxed)
|
||||
}
|
||||
|
||||
fn gateway_userspace_dns_a_aaaa_records(&self) -> bool {
|
||||
self.gateway_userspace_dns_a_aaaa_records
|
||||
.load(Ordering::Relaxed)
|
||||
}
|
||||
}
|
||||
|
||||
fn sentry_flag_context(flags: FeatureFlagsResponse) -> sentry::protocol::Context {
|
||||
@@ -236,6 +251,7 @@ fn sentry_flag_context(flags: FeatureFlagsResponse) -> sentry::protocol::Context
|
||||
DropLlmnrNxdomainResponses { result: bool },
|
||||
StreamLogs { result: bool },
|
||||
MapENOBUFSToWouldBlock { result: bool },
|
||||
GatewayUserspaceDnsAAaaaRecords { result: bool },
|
||||
}
|
||||
|
||||
// Exhaustive destruction so we don't forget to update this when we add a flag.
|
||||
@@ -244,6 +260,7 @@ fn sentry_flag_context(flags: FeatureFlagsResponse) -> sentry::protocol::Context
|
||||
drop_llmnr_nxdomain_responses,
|
||||
stream_logs,
|
||||
map_enobufs_to_wouldblock,
|
||||
gateway_userspace_dns_a_aaaa_records,
|
||||
} = flags;
|
||||
|
||||
let value = serde_json::json!({
|
||||
@@ -254,6 +271,7 @@ fn sentry_flag_context(flags: FeatureFlagsResponse) -> sentry::protocol::Context
|
||||
SentryFlag::DropLlmnrNxdomainResponses { result: drop_llmnr_nxdomain_responses },
|
||||
SentryFlag::StreamLogs { result: stream_logs },
|
||||
SentryFlag::MapENOBUFSToWouldBlock { result: map_enobufs_to_wouldblock },
|
||||
SentryFlag::GatewayUserspaceDnsAAaaaRecords { result: gateway_userspace_dns_a_aaaa_records },
|
||||
]
|
||||
});
|
||||
|
||||
|
||||
@@ -38,6 +38,10 @@ export default function Gateway() {
|
||||
</Link>
|
||||
section for details.
|
||||
</ChangeItem>
|
||||
<ChangeItem pull="10373">
|
||||
Switches to user-space DNS resolution, allowing for accurate caching
|
||||
based on the TTL in the DNS response.
|
||||
</ChangeItem>
|
||||
</Unreleased>
|
||||
<Entry version="1.4.16" date={new Date("2025-09-10")}>
|
||||
<ChangeItem pull="10231">
|
||||
|
||||
Reference in New Issue
Block a user