From 9d640b52b4691357a339e3413ca2ffeab3a0be57 Mon Sep 17 00:00:00 2001 From: Jamil Date: Mon, 20 Oct 2025 20:35:41 -0700 Subject: [PATCH] feat(website): 09/2025 devlog (#10571) First in a series of new monthly devlog posts to summarize what we've shipped over the previous month. Intentionally detailed and engineering focused - added a new `Engineering` section to the blog. --------- Signed-off-by: Jamil Co-authored-by: Thomas Eizinger --- website/.gitignore | 4 + website/src/app/blog/devlog/2025-09/_page.tsx | 18 ++ website/src/app/blog/devlog/2025-09/page.tsx | 11 ++ .../src/app/blog/devlog/2025-09/readme.mdx | 164 ++++++++++++++++++ website/src/app/blog/posts.tsx | 27 ++- .../components/Blog/AboutFirezone/index.tsx | 30 ++++ 6 files changed, 253 insertions(+), 1 deletion(-) create mode 100644 website/src/app/blog/devlog/2025-09/_page.tsx create mode 100644 website/src/app/blog/devlog/2025-09/page.tsx create mode 100644 website/src/app/blog/devlog/2025-09/readme.mdx create mode 100644 website/src/components/Blog/AboutFirezone/index.tsx diff --git a/website/.gitignore b/website/.gitignore index d766da20f..4ea8555be 100644 --- a/website/.gitignore +++ b/website/.gitignore @@ -42,3 +42,7 @@ next-env.d.ts # generated timestamps timestamps.json + +# AI +.rules +scrape_commits.py diff --git a/website/src/app/blog/devlog/2025-09/_page.tsx b/website/src/app/blog/devlog/2025-09/_page.tsx new file mode 100644 index 000000000..d133d59d2 --- /dev/null +++ b/website/src/app/blog/devlog/2025-09/_page.tsx @@ -0,0 +1,18 @@ +"use client"; +import Post from "@/components/Blog/Post"; +import Content from "./readme.mdx"; +import gravatar from "@/lib/gravatar"; + +export default function _Page() { + return ( + + + + ); +} diff --git a/website/src/app/blog/devlog/2025-09/page.tsx b/website/src/app/blog/devlog/2025-09/page.tsx new file mode 100644 index 000000000..19b4e71a5 --- /dev/null +++ b/website/src/app/blog/devlog/2025-09/page.tsx @@ -0,0 +1,11 @@ +import { Metadata } from "next"; +import _Page from "./_page"; + +export const metadata: Metadata = { + title: "September 2025 Devlog • Firezone Blog", + description: "September 2025 Devlog", +}; + +export default function Page() { + return <_Page />; +} diff --git a/website/src/app/blog/devlog/2025-09/readme.mdx b/website/src/app/blog/devlog/2025-09/readme.mdx new file mode 100644 index 000000000..7c6716d77 --- /dev/null +++ b/website/src/app/blog/devlog/2025-09/readme.mdx @@ -0,0 +1,164 @@ +import AboutFirezone from "@/components/Blog/AboutFirezone"; + +September brought major improvements to Firezone's reliability, performance, and administrative capabilities. +Our focus this month was on making connections more resilient, helping admins manage client versions more effectively, and optimizing our networking stack for real-world conditions. + +## Smarter Admin Tools + +Managing a fleet of Clients just got easier. +The admin portal now shows client versions directly in the clients table with a sortable version column.[^10456] +When planning Gateway upgrades, admins can quickly identify which clients might lose connectivity due to version incompatibility. + +We've also enhanced the "outdated Gateway" email notification system, which is now enabled by default for all accounts.[^10281] +These emails now include a count of clients that will be affected by the Gateway upgrade, with a direct link to view them in the portal. + +For organizations with large user bases, we added batch upsert operations for directory sync.[^10369] +This lays the groundwork for more efficient directory synchronization that can handle thousands of users without overwhelming the system. + +A new `/account` API endpoint provides programmatic access to billing details and seat usage.[^10302] +This enables better integration with capacity planning and billing automation tools. + +## Rock-Solid Connections + +Connection reliability saw significant improvements across the board. +We implemented graceful connection shutdown using a new peer-to-peer control protocol message.[^10076] +When Gateways restart for maintenance or upgrades, Clients now immediately fail over to alternative Gateways instead of waiting through a 15-second ICE timeout.[^10400] + +This graceful handling also applies when Clients sign out, making it much easier to distinguish between actual network problems and normal disconnections in the logs. + +We fixed a synchronization issue where Clients and Gateways could disagree on authorization state.[^10462] +Now when a Client receives an ICMP "prohibited" error from a Gateway, it automatically re-authorizes access for that Resource. +This keeps both sides in sync even when authorization states drift out of alignment. + +DNS resource handling got more reliable too. +Proxy IP assignments for DNS resources now persist across Client sessions.[^10104] +Previously, restarting the Client could reassign `100.96.0.1` to a different resource, breaking applications that cached the IP address. +Now these mappings stay consistent for the lifetime of the tunnel process, fixing issues with long-lived connections like SSH sessions. + +## Better DNS Resolution + +The Gateway switched from using the system's `libc` resolver to the `hickory-resolver` library for A and AAAA record resolution.[^10373] +This change enables proper TTL-based caching instead of the previous hardcoded 30-second cache. +DNS responses are now cached for exactly as long as the authoritative nameserver specifies, reducing unnecessary queries while ensuring timely updates. + +## Performance Under Load + +We optimized the event-loop to prevent starvation of lower-priority inputs.[^10347] +The tunnel now batch-processes input from all sources rather than prioritizing high-traffic sources to completion. +This prevents timeout checks and DNS resolution from being delayed when UDP sockets or the TUN device are extremely busy. + +Linux systems got a significant performance boost through better socket buffer management.[^10349] +Clients and Gateways now attempt to set UDP receive and send buffer maximums to 128 MB and 16 MB respectively at startup. +The default 200KB buffers were causing packet drops during high throughput scenarios, directly correlating with buffer errors visible in `nstat`. + +We also limited the number of optimistic ICE candidates to prevent CPU spikes.[^10367] +When clients advertise many IPv6 addresses, the previous unlimited candidate generation could cause performance issues. +Optimistic candidates are now disabled entirely for IPv6 and limited to 2 for IPv4, focusing on the scenarios where they're most beneficial. + +## Relay Infrastructure Advances + +Our relay infrastructure received important eBPF improvements. +The relay can now handle relay-to-relay candidate pairs in the eBPF kernel, which occur when both Client and Gateway allocate from the same relay.[^10286] +Previously these packets would need to traverse userspace or fail entirely. + +We restructured the eBPF code into modular components that handle all cross-stack translation cases, including IPv4↔IPv6 transformations. +A comprehensive integration test with double symmetric NAT validates this behavior in continuous integration. + +We also fixed several edge cases in the eBPF layer, including properly handling DNS replies[^10330] and re-populating the channel map when TURN channels are refreshed.[^10291] + +## Platform-Specific Improvements + +The Android client now launches authentication in a CustomTab instead of the default browser.[^10371] +This fixes a Firefox bug where only the first browser tab could intercept the custom URI scheme, making subsequent sign-ins fail until users manually closed the old tab. +CustomTabs ensure only one sandboxed authentication instance exists at a time. +It also results in a nice UX improvement where the "You have been signed-in" tabs are no longer lingering around. + +macOS developers got quality-of-life improvements making the workspace build correctly on macOS with appropriate stubs and conditional compilation.[^10363] + +The macOS client now detects and alerts users when multiple instances are running, preventing interference with tunnel state.[^10313] + +## Infrastructure and Tooling + +All CI runners now use Ubuntu 24.04 to match production relay environments.[^10288] +This ensures builds and tests run on the same kernel version as production, catching platform-specific issues earlier. + +The docker-compose test environment now uses realistic network topology with separate subnets for Clients, Gateways, relays, and backend.[^10301] +Each component has a dedicated router container performing NAT and firewall rules, enabling proper testing of relayed connections with port randomization. + +Database performance improved with the addition of missing indexes on foreign key columns.[^10396][^10403] +These indexes ensure efficient cascade deletes now that hard-delete has been fully rolled out. + +## Notable Bug Fixes + +Several important bugs were squashed: + +- Gateway re-joins Phoenix channel topic on send errors to prevent message loss[^10397] +- Fixed poll-after-completion panics in Client session event-loop[^10399] +- Fixed DNS resource NAT reset when Client reassigns proxy IPs after sign out[^10310] +- Relay filters traces by log filter to respect OTEL configuration[^10317] +- Internet site no longer counts against Starter plan resource limits[^10336] + +--- + +That's September in a nutshell. +See our [changelog](https://www.firezone.dev/changelog) for a more compact version of the above or view the [full diff on GitHub](https://github.com/firezone/firezone/compare/61e0a228869f646d4ae7b4d5ee19ce57f5697df8...f07d2932dcc04441bc16f01febe011131523a39a). + + + +--- + +#### Footnotes + +[^10456]: [feat(portal): show outdated clients](https://github.com/firezone/firezone/pull/10456) + +[^10281]: [feat(portal): enable outdated gateway email](https://github.com/firezone/firezone/pull/10281) + +[^10369]: [feat(portal): batch_upsert and delete_unsynced functions](https://github.com/firezone/firezone/pull/10369) + +[^10302]: [feat(api): GET /account API](https://github.com/firezone/firezone/pull/10302) + +[^10076]: [feat(connlib): gracefully shutdown connections](https://github.com/firezone/firezone/pull/10076) + +[^10400]: [feat(clients): gracefully close connections on shutdown](https://github.com/firezone/firezone/pull/10400) + +[^10462]: [feat(connlib): create flow on ICMP error "prohibited"](https://github.com/firezone/firezone/pull/10462) + +[^10104]: [feat(connlib): persistent DNS resource records across sessions](https://github.com/firezone/firezone/pull/10104) + +[^10373]: [feat(gateway): use hickory resolver to resolve A/AAAA queries](https://github.com/firezone/firezone/pull/10373) + +[^10347]: [refactor(connlib): improve fairness of event-loop](https://github.com/firezone/firezone/pull/10347) + +[^10349]: [feat(linux): try to set `rmem_max` and `wmem_max` on startup](https://github.com/firezone/firezone/pull/10349) + +[^10367]: [fix(connlib): limit the number of optimistic candidates](https://github.com/firezone/firezone/pull/10367) + +[^10286]: [fix(relay): handle relay-relay candidate pairs in eBPF](https://github.com/firezone/firezone/pull/10286) + +[^10330]: [fix(relay): XDP_PASS DNS replies](https://github.com/firezone/firezone/pull/10330) + +[^10291]: [fix(relay): re-add eBPF channel map entry on refresh](https://github.com/firezone/firezone/pull/10291) + +[^10371]: [fix(android): launch auth in CustomTab](https://github.com/firezone/firezone/pull/10371) + +[^10363]: [chore: improve macos dev experience](https://github.com/firezone/firezone/pull/10363) + +[^10313]: [fix(apple): Enforce single Firezone instance](https://github.com/firezone/firezone/pull/10313) + +[^10288]: [ci: bump Ubuntu runners to 24.04](https://github.com/firezone/firezone/pull/10288) + +[^10301]: [ci: create a more realistic network setup](https://github.com/firezone/firezone/pull/10301) + +[^10396]: [chore(portal): add non-composite indexes](https://github.com/firezone/firezone/pull/10396) + +[^10403]: [chore(portal): add remaining simple indexes](https://github.com/firezone/firezone/pull/10403) + +[^10397]: [fix(gateway): re-join topic in phoenix-channel on error](https://github.com/firezone/firezone/pull/10397) + +[^10399]: [fix(connlib): fuse event-loop future inside client session](https://github.com/firezone/firezone/pull/10399) + +[^10310]: [fix(gateway): reset DNS resource NAT if proxy IPs change](https://github.com/firezone/firezone/pull/10310) + +[^10317]: [fix(relay): filter traces by log filter](https://github.com/firezone/firezone/pull/10317) + +[^10336]: [fix(portal): don't count internet site in limits](https://github.com/firezone/firezone/pull/10336) diff --git a/website/src/app/blog/posts.tsx b/website/src/app/blog/posts.tsx index 1ae7e94c2..462533e21 100644 --- a/website/src/app/blog/posts.tsx +++ b/website/src/app/blog/posts.tsx @@ -11,6 +11,25 @@ import { Badge } from "@/components/Badges"; export default function Posts() { const [filters, setFilters] = useState("All Posts"); const posts = [ + { + title: "September 2025 Devlog", + date: "September 30, 2025", + href: "/blog/devlog/2025-09", + authorName: "Jamil Bou Kheir", + authorAvatarSrc: gravatar("jamil@firezone.dev"), + type: "Engineering", + description: ( +

+ September brought significant improvements to Firezone's + networking stack, administrative tooling, and cross-platform + reliability. This month's work focused on optimizing relay + performance through eBPF, improving DNS resolution behavior, and + enhancing the admin portal's visibility into client and Gateway + states. ) +

+ ), + }, + { title: "Migrate your Internet Resource by March 15, 2024", date: "February 16, 2024", @@ -405,7 +424,13 @@ export default function Posts() { <>
diff --git a/website/src/components/Blog/AboutFirezone/index.tsx b/website/src/components/Blog/AboutFirezone/index.tsx new file mode 100644 index 000000000..c56439e76 --- /dev/null +++ b/website/src/components/Blog/AboutFirezone/index.tsx @@ -0,0 +1,30 @@ +import Link from "next/link"; + +export default function AboutFirezone() { + return ( +
+

About Firezone

+

+ Firezone is an open source platform for securely managing remote access + to your organization's networks and applications. Unlike + traditional VPNs, Firezone takes a granular, least-privileged approach + with group-based policies that control access to individual + applications, entire subnets, and everything in between.{" "} + + Get started for free + {" "} + or{" "} + + learn more + {" "} + about how Firezone can help secure your organization. +

+
+ ); +}