chore(gui-client): improve logging around Client startup and IPC connections (#5216)

Closes #3567 (again)
Closes #5214

Ready for review

```[tasklist]
### Before merging
- [x] The IPC service should report system uptime when it starts. This will tell us whether the computer was rebooted or just the IPC service itself was upgraded / rebooted.
- [x] The IPC service should report the PID of itself and the GUI if possible
- [x] The GUI should report the PID of the IPC service if possible
- [x] Extra logging between `GIT_VERSION = ` and the token loading log line, especially right before and right after the critical Tauri launching step
- [x] If a 2nd GUI or IPC service runs and exits due to single-instance, it must log that
- [x] Remove redundant DNS deactivation when IPC service starts (I think conectado noticed this in another PR)
- [x] Manually test that the GUI logs something on clean shutdown
- [x] Logarithmic heartbeat?
- [x] If possible, log monotonic time somewhere so NTP syncs don't make the logs unreadable (uptime in the heartbeat should be monotonic, mostly)
- [x] Apply the same logging fix to the IPC service
- [x] Ensure log zips include GUI crash dumps
- [x] ~~Fix #5042~~ (that's a separate issue, I don't want to drag this PR out)
- [x] Test IPC service restart (logs as a stop event)
- [x] Test IPC service stop
- [x] Test IPC service logs during system suspend (Not logged, maybe because we aren't subscribed to power events)
- [x] Test IPC service logs during system reboot (Logged as shutdown, we exit gracefully)
- [x] Test IPC service logs during system shut down (Logged as a suspend)
- [x] Test IPC service upgrade (Logged as a stop)
- [x] Log unhandled events from the Windows service controller (Power events like suspend and resume are logged and not handled)
```

---------

Signed-off-by: Reactor Scram <ReactorScram@users.noreply.github.com>
This commit is contained in:
Reactor Scram
2024-06-06 17:10:03 -05:00
committed by GitHub
parent ba6685e2f2
commit 609ba73f84
16 changed files with 257 additions and 103 deletions

12
rust/Cargo.lock generated
View File

@@ -2009,6 +2009,7 @@ dependencies = [
"tokio-util",
"tracing",
"tracing-subscriber",
"uptime_lib",
"url",
"uuid",
"windows 0.56.0",
@@ -7052,6 +7053,17 @@ version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
[[package]]
name = "uptime_lib"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f4e71ddbefed856d5881821d6ada4e606bbb91fd332296963ed596e2ad2100f3"
dependencies = [
"libc",
"thiserror",
"windows 0.52.0",
]
[[package]]
name = "url"
version = "2.5.0"

View File

@@ -75,6 +75,7 @@ features = [
"Win32_Networking_NetworkListManager",
# COM is needed to listen for network change events
"Win32_System_Com",
"Win32_System_Pipes", # Needed to get PID of pipe server
# Needed to listen for system DNS changes
"Win32_System_Registry",
"Win32_System_Threading",

View File

@@ -1,7 +1,10 @@
use anyhow::{bail, Result};
use anyhow::{bail, Context as _, Result};
use clap::{Args, Parser};
use connlib_client_shared::file_logger;
use firezone_headless_client::FIREZONE_GROUP;
use std::path::PathBuf;
use tracing::instrument;
use tracing_subscriber::EnvFilter;
mod about;
mod auth;
@@ -18,6 +21,8 @@ mod updates;
mod uptime;
mod welcome;
use settings::AdvancedSettings;
/// Output of `git describe` at compile time
/// e.g. `1.0.0-pre.4-20-ged5437c88-modified` where:
///
@@ -57,6 +62,7 @@ pub(crate) enum Error {
///
/// In total there are 4 subcommands (non-elevated, elevated GUI, crash handler, and deep link process)
/// In steady state, the only processes running will be the GUI and the crash handler.
#[instrument(skip_all)]
pub(crate) fn run() -> Result<()> {
std::panic::set_hook(Box::new(tracing_panic::panic_hook));
let cli = Cli::parse();
@@ -89,14 +95,17 @@ pub(crate) fn run() -> Result<()> {
anyhow::bail!("`smoke-test` must run as a normal user");
}
let result = gui::run(cli);
let settings = settings::load_advanced_settings().unwrap_or_default();
// Don't fix the log filter for smoke tests
let _logger = start_logging(&settings.log_filter)?;
let result = gui::run(cli, settings);
if let Err(error) = &result {
// In smoke-test mode, don't show the dialog, since it might be running
// unattended in CI and the dialog would hang forever
// Because of <https://github.com/firezone/firezone/issues/3567>,
// errors returned from `gui::run` may not be logged correctly
tracing::error!(?error, "gui::run error");
tracing::error!(?error);
}
Ok(result?)
}
@@ -106,21 +115,55 @@ pub(crate) fn run() -> Result<()> {
/// `gui::run` but wrapped in `anyhow::Result`
///
/// Automatically logs or shows error dialogs for important user-actionable errors
// Can't `instrument` this because logging isn't running when we enter it.
fn run_gui(cli: Cli) -> Result<()> {
let result = gui::run(cli);
let mut settings = settings::load_advanced_settings().unwrap_or_default();
fix_log_filter(&mut settings)?;
let _logger = start_logging(&settings.log_filter)?;
let result = gui::run(cli, settings);
// Make sure errors get logged, at least to stderr
if let Err(error) = &result {
tracing::error!(?error, "gui::run error");
tracing::error!(?error, error_msg = %error);
show_error_dialog(error)?;
}
Ok(result?)
}
/// Parse the log filter from settings, showing an error and fixing it if needed
fn fix_log_filter(settings: &mut AdvancedSettings) -> Result<()> {
if EnvFilter::try_new(&settings.log_filter).is_ok() {
return Ok(());
}
settings.log_filter = AdvancedSettings::default().log_filter;
native_dialog::MessageDialog::new()
.set_title("Log filter error")
.set_text("The custom log filter is not parsable. Using the default log filter.")
.set_type(native_dialog::MessageType::Error)
.show_alert()
.context("Can't show log filter error dialog")?;
Ok(())
}
/// Starts logging
///
/// Don't drop the log handle or logging will stop.
fn start_logging(directives: &str) -> Result<file_logger::Handle> {
let logging_handles = logging::setup(directives)?;
tracing::info!(?GIT_VERSION, "`gui-client` started logging");
Ok(logging_handles.logger)
}
#[instrument(skip_all)]
fn show_error_dialog(error: &gui::Error) -> Result<()> {
// Decision to put the error strings here: <https://github.com/firezone/firezone/pull/3464#discussion_r1473608415>
let error_msg = match error {
// This message gets shown to users in the GUI and could be localized, unlike
// messages in the log which only need to be used for `git grep`.
let user_friendly_error_msg = match error {
// TODO: Update this URL
gui::Error::WebViewNotInstalled => "Firezone cannot start because WebView2 is not installed. Follow the instructions at <https://www.firezone.dev/kb/user-guides/windows-client>.".to_string(),
gui::Error::DeepLink(deep_link::Error::CantListen) => "Firezone is already running. If it's not responding, force-stop it.".to_string(),
@@ -129,10 +172,11 @@ fn show_error_dialog(error: &gui::Error) -> Result<()> {
gui::Error::UserNotInFirezoneGroup => format!("You are not a member of the group `{FIREZONE_GROUP}`. Try `sudo usermod -aG {FIREZONE_GROUP} $USER` and then reboot"),
gui::Error::Other(error) => error.to_string(),
};
tracing::error!(?user_friendly_error_msg);
native_dialog::MessageDialog::new()
.set_title("Firezone Error")
.set_text(&error_msg)
.set_text(&user_friendly_error_msg)
.set_type(native_dialog::MessageType::Error)
.show_alert()?;
Ok(())

View File

@@ -40,7 +40,7 @@ fn crash() -> Result<()> {
let _handles = client::logging::setup("debug")?;
tracing::info!("started log (DebugCrash)");
panic!("purposely crashing to see if it shows up in logs");
panic!("purposely panicking to see if it shows up in logs");
}
#[allow(clippy::print_stdout)]

View File

@@ -16,6 +16,7 @@ use std::{path::PathBuf, str::FromStr, sync::Arc, time::Duration};
use system_tray_menu::Event as TrayMenuEvent;
use tauri::{Manager, SystemTray, SystemTrayEvent};
use tokio::sync::{mpsc, oneshot, Notify};
use tracing::instrument;
use url::Url;
use ControllerRequest as Req;
@@ -71,38 +72,11 @@ pub(crate) enum Error {
/// Runs the Tauri GUI and returns on exit or unrecoverable error
///
/// Still uses `thiserror` so we can catch the deep_link `CantListen` error
pub(crate) fn run(cli: client::Cli) -> Result<(), Error> {
let advanced_settings = settings::load_advanced_settings().unwrap_or_default();
// If the log filter is unparsable, show an error and use the default
// Fixes <https://github.com/firezone/firezone/issues/3452>
let advanced_settings =
match tracing_subscriber::EnvFilter::from_str(&advanced_settings.log_filter) {
Ok(_) => advanced_settings,
Err(_) => {
native_dialog::MessageDialog::new()
.set_title("Log filter error")
.set_text(
"The custom log filter is not parsable. Using the default log filter.",
)
.set_type(native_dialog::MessageType::Error)
.show_alert()
.context("Can't show log filter error dialog")?;
AdvancedSettings {
log_filter: AdvancedSettings::default().log_filter,
..advanced_settings
}
}
};
// Start logging
// TODO: Try using an Arc to keep the file logger alive even if Tauri bails out
// That may fix <https://github.com/firezone/firezone/issues/3567>
let logging_handles = client::logging::setup(&advanced_settings.log_filter)?;
tracing::info!("started log");
tracing::info!("GIT_VERSION = {}", crate::client::GIT_VERSION);
#[instrument(skip_all)]
pub(crate) fn run(
cli: client::Cli,
advanced_settings: settings::AdvancedSettings,
) -> Result<(), Error> {
// Need to keep this alive so crashes will be handled. Dropping detaches it.
let _crash_handler = match client::crash_handling::attach_handler() {
Ok(x) => Some(x),
@@ -117,6 +91,7 @@ pub(crate) fn run(cli: client::Cli) -> Result<(), Error> {
// Needed for the deep link server
let rt = tokio::runtime::Runtime::new().context("Couldn't start Tokio runtime")?;
let _guard = rt.enter();
rt.spawn(firezone_headless_client::heartbeat::heartbeat());
let (ctlr_tx, ctlr_rx) = mpsc::channel(5);
@@ -127,7 +102,7 @@ pub(crate) fn run(cli: client::Cli) -> Result<(), Error> {
let tray = SystemTray::new().with_menu(system_tray_menu::signed_out());
tracing::debug!("Setting up Tauri app instance...");
tracing::info!("Setting up Tauri app instance...");
let (setup_result_tx, mut setup_result_rx) =
tokio::sync::oneshot::channel::<Result<(), Error>>();
let app = tauri::Builder::default()
@@ -171,7 +146,7 @@ pub(crate) fn run(cli: client::Cli) -> Result<(), Error> {
}
})
.setup(move |app| {
tracing::debug!("Entered Tauri's `setup`");
tracing::info!("Entered Tauri's `setup`");
let setup_inner = move || {
// Check for updates
@@ -237,7 +212,6 @@ pub(crate) fn run(cli: client::Cli) -> Result<(), Error> {
app_handle_2,
ctlr_tx,
ctlr_rx,
logging_handles,
advanced_settings,
)
.await
@@ -460,8 +434,6 @@ struct Controller {
ctlr_tx: CtlrTx,
/// connlib session for the currently signed-in user, if there is one
session: Option<Session>,
/// Must be kept alive so the logger will keep running
_logging_handles: client::logging::Handles,
/// Tells us when to wake up and look for a new resource list. Tokio docs say that memory reads and writes are synchronized when notifying, so we don't need an extra mutex on the resources.
notify_controller: Arc<Notify>,
tunnel_ready: bool,
@@ -725,16 +697,15 @@ async fn run_controller(
app: tauri::AppHandle,
ctlr_tx: CtlrTx,
mut rx: mpsc::Receiver<ControllerRequest>,
logging_handles: client::logging::Handles,
advanced_settings: AdvancedSettings,
) -> Result<()> {
tracing::info!("Entered `run_controller`");
let mut controller = Controller {
advanced_settings,
app: app.clone(),
auth: client::auth::Auth::new(),
ctlr_tx,
session: None,
_logging_handles: logging_handles,
notify_controller: Arc::new(Notify::new()), // TODO: Fix cancel-safety
tunnel_ready: false,
uptime: Default::default(),
@@ -808,7 +779,10 @@ async fn run_controller(
},
Req::Fail(Failure::Error) => bail!("Test error"),
Req::Fail(Failure::Panic) => panic!("Test panic"),
Req::SystemTrayMenu(TrayMenuEvent::Quit) => break,
Req::SystemTrayMenu(TrayMenuEvent::Quit) => {
tracing::info!("User clicked Quit in the menu");
break
}
req => if let Err(error) = controller.handle_request(req).await {
tracing::error!(?error, "Failed to handle a ControllerRequest");
}

View File

@@ -88,7 +88,10 @@ impl Client {
callback_handler: CallbackHandler,
tokio_handle: tokio::runtime::Handle,
) -> Result<Self> {
tracing::info!(pid = std::process::id(), "Connecting to IPC service...");
tracing::info!(
client_pid = std::process::id(),
"Connecting to IPC service..."
);
let stream = platform::connect_to_service().await?;
let (rx, tx) = tokio::io::split(stream);
// Receives messages from the IPC service

View File

@@ -14,5 +14,12 @@ pub(crate) async fn connect_to_service() -> Result<IpcStream> {
path.display()
)
})?;
let cred = stream.peer_cred()?;
tracing::info!(
uid = cred.uid(),
gid = cred.gid(),
pid = cred.pid(),
"Made an IPC connection"
);
Ok(stream)
}

View File

@@ -1,5 +1,7 @@
use anyhow::{Context as _, Result};
use std::os::windows::io::AsRawHandle;
use tokio::net::windows::named_pipe;
use windows::Win32::{Foundation::HANDLE, System::Pipes::GetNamedPipeServerProcessId};
/// A type alias to abstract over the Windows and Unix IPC primitives
pub(crate) type IpcStream = named_pipe::NamedPipeClient;
@@ -13,5 +15,12 @@ pub(crate) async fn connect_to_service() -> Result<IpcStream> {
let stream = named_pipe::ClientOptions::new()
.open(path)
.with_context(|| "Couldn't connect to named pipe server at `{path}`")?;
let handle = HANDLE(stream.as_raw_handle() as isize);
let mut server_pid: u32 = 0;
// SAFETY: Windows doesn't store this pointer or handle, and we just got the handle
// from Tokio, so it should be valid.
unsafe { GetNamedPipeServerProcessId(handle, &mut server_pid) }
.context("Couldn't get PID of named pipe server")?;
tracing::info!(?server_pid, "Made IPC connection");
Ok(stream)
}

View File

@@ -9,7 +9,6 @@ use std::{
fs, io,
path::{Path, PathBuf},
result::Result as StdResult,
str::FromStr,
};
use tokio::task::spawn_blocking;
use tracing::subscriber::set_global_default;
@@ -20,7 +19,7 @@ use tracing_subscriber::{fmt, layer::SubscriberExt, reload, EnvFilter, Layer, Re
/// resulting in empty log files.
#[must_use]
pub(crate) struct Handles {
pub _logger: file_logger::Handle,
pub logger: file_logger::Handle,
pub _reloader: reload::Handle<EnvFilter, Registry>,
}
@@ -48,16 +47,19 @@ pub(crate) enum Error {
}
/// Set up logs after the process has started
pub(crate) fn setup(log_filter: &str) -> Result<Handles> {
///
/// We need two of these filters for some reason, and `EnvFilter` doesn't implement
/// `Clone` yet, so that's why we take the directives string
/// <https://github.com/tokio-rs/tracing/issues/2360>
pub(crate) fn setup(directives: &str) -> Result<Handles> {
let log_path = known_dirs::logs().context("Can't compute app log dir")?;
std::fs::create_dir_all(&log_path).map_err(Error::CreateDirAll)?;
let (layer, logger) = file_logger::layer(&log_path);
let filter = EnvFilter::from_str(log_filter)?;
let (filter, reloader) = reload::Layer::new(filter);
let (filter_2, reloader) = reload::Layer::new(EnvFilter::try_new(directives)?);
let subscriber = Registry::default()
.with(layer.with_filter(filter))
.with(fmt::layer().with_filter(EnvFilter::from_str(log_filter)?));
.with(layer.with_filter(filter_2))
.with(fmt::layer().with_filter(EnvFilter::try_new(directives)?));
set_global_default(subscriber)?;
if let Err(error) = output_vt100::try_init() {
tracing::warn!(
@@ -68,7 +70,7 @@ pub(crate) fn setup(log_filter: &str) -> Result<Handles> {
LogTracer::init()?;
tracing::debug!(?log_path, "Log path");
Ok(Handles {
_logger: logger,
logger,
_reloader: reloader,
})
}

View File

@@ -90,7 +90,9 @@ pub(crate) async fn get_advanced_settings(
"couldn't request advanced settings from controller task"
);
}
Ok(rx.await.unwrap())
rx.await.map_err(|_| {
"Couldn't get settings from `Controller`, maybe the program is crashing".to_string()
})
}
/// Saves the settings to disk and then tells `Controller` to apply them in-memory

View File

@@ -35,8 +35,7 @@ pub(crate) async fn check() -> Result<Release> {
.head(update_url.clone())
.header("User-Agent", user_agent)
.send()
.await
.context("Couldn't fetch from update URL `{update_url}`")?;
.await?;
let status = response.status();
if status != reqwest::StatusCode::TEMPORARY_REDIRECT {
anyhow::bail!("HTTP status: {status} from update URL `{update_url}`");

View File

@@ -27,6 +27,7 @@ tokio = { version = "1.38.0", features = ["macros", "signal"] }
tokio-util = { version = "0.7.11", features = ["codec"] }
tracing = { workspace = true }
tracing-subscriber = { version = "0.3.17", features = ["env-filter"] }
uptime_lib = "0.3.0"
url = { version = "2.3.1", default-features = false }
uuid = { version = "1.7", default-features = false, features = ["std", "v4", "serde"] }

View File

@@ -0,0 +1,60 @@
//! A heartbeat that logs to `INFO` at exponentially increasing intervals
//! so it won't take up much disk space
//!
//! The IPC service is quiet when signed out, and the GUI is quiet when it's in steady
//! state, so this heartbeat allows us to estimate roughly how long each process stayed
//! up when looking at user logs, using unlimited disk space per run of the app.
use std::time::Duration;
use tokio::time::{sleep_until, Instant};
/// Logs a heartbeat to `tracing::info!`. Put this in a Tokio task and forget about it.
pub async fn heartbeat() {
let mut hb = Heartbeat::default();
loop {
sleep_until(hb.next_instant).await;
let system_uptime = uptime_lib::get().ok();
tracing::info!(?system_uptime, "Heartbeat");
hb.tick();
}
}
struct Heartbeat {
next_instant: Instant,
dur: Duration,
}
impl Default for Heartbeat {
fn default() -> Self {
Self {
next_instant: Instant::now(),
dur: Duration::from_secs(1),
}
}
}
impl Heartbeat {
fn tick(&mut self) {
self.next_instant += self.dur;
self.dur *= 2;
}
}
#[cfg(test)]
mod tests {
/// Make sure this can run for a few years with no issue
#[test]
fn years() {
let mut hb = super::Heartbeat::default();
const SECONDS_PER_DAY: u64 = 86_400;
const DAYS_PER_YEAR: u64 = 365;
let far_future =
hb.next_instant + std::time::Duration::from_secs(SECONDS_PER_DAY * DAYS_PER_YEAR * 50);
// It will only print 32 lines or so for the next 50+ years
for _ in 0..50 {
hb.tick();
}
assert!(hb.next_instant > far_future);
}
}

View File

@@ -37,6 +37,7 @@ use platform::Signals;
/// Generate a persistent device ID, stores it to disk, and reads it back.
pub(crate) mod device_id;
pub mod dns_control;
pub mod heartbeat;
pub mod known_dirs;
#[cfg(target_os = "linux")]
@@ -219,6 +220,8 @@ pub fn run_only_headless_client() -> Result<()> {
}
assert!(std::env::var(TOKEN_ENV_KEY).is_err());
// TODO: This might have the same issue with fatal errors not getting logged
// as addressed for the IPC service in PR #5216
let (layer, _handle) = cli
.common
.log_dir
@@ -347,6 +350,7 @@ pub(crate) fn run_debug_ipc_service() -> Result<()> {
debug_command_setup()?;
let rt = tokio::runtime::Runtime::new()?;
let _guard = rt.enter();
rt.spawn(crate::heartbeat::heartbeat());
let mut signals = Signals::new()?;
// Couldn't get the loop to work here yet, so SIGHUP is not implemented

View File

@@ -105,7 +105,10 @@ pub(crate) fn run_ipc_service(cli: CliCommon) -> Result<()> {
anyhow::bail!("This is the IPC service binary, it's not meant to run interactively.");
}
let rt = tokio::runtime::Runtime::new()?;
rt.block_on(crate::ipc_listen())?;
rt.spawn(crate::heartbeat::heartbeat());
if let Err(error) = rt.block_on(crate::ipc_listen()) {
tracing::error!(?error, "`ipc_listen` failed");
}
Ok(())
}
@@ -156,14 +159,11 @@ impl IpcServer {
tracing::info!("Listening for GUI to connect over IPC...");
let (stream, _) = self.listener.accept().await?;
let cred = stream.peer_cred()?;
// I'm not sure if we can enforce group membership here - Docker
// might just be enforcing it with filesystem permissions.
// Checking the secondary groups of another user looks complicated.
tracing::info!(
uid = cred.uid(),
gid = cred.gid(),
pid = cred.pid(),
"Got an IPC connection"
"Accepted an IPC connection"
);
Ok(stream)
}

View File

@@ -10,6 +10,7 @@ use connlib_client_shared::file_logger;
use connlib_shared::BUNDLE_ID;
use std::{
ffi::{c_void, OsString},
os::windows::io::AsRawHandle,
path::{Path, PathBuf},
str::FromStr,
time::Duration,
@@ -17,7 +18,9 @@ use std::{
use tokio::net::windows::named_pipe;
use tracing::subscriber::set_global_default;
use tracing_subscriber::{layer::SubscriberExt as _, EnvFilter, Layer, Registry};
use windows::Win32::Security as WinSec;
use windows::Win32::{
Foundation::HANDLE, Security as WinSec, System::Pipes::GetNamedPipeClientProcessId,
};
use windows_service::{
service::{
ServiceControl, ServiceControlAccept, ServiceExitCode, ServiceState, ServiceStatus,
@@ -78,30 +81,33 @@ pub(crate) fn run_ipc_service(_cli: CliCommon) -> Result<()> {
windows_service::define_windows_service!(ffi_service_run, windows_service_run);
fn windows_service_run(arguments: Vec<OsString>) {
if let Err(error) = fallible_windows_service_run(arguments) {
tracing::error!(?error, "fallible_windows_service_run returned an error");
let log_path = crate::known_dirs::ipc_service_logs()
.expect("Should be able to compute IPC service logs dir");
std::fs::create_dir_all(&log_path).expect("We should have permissions to create our log dir");
let (layer, handle) = file_logger::layer(&log_path);
let filter = EnvFilter::from_str(SERVICE_RUST_LOG)
.expect("Hard-coded log filter should always be parsable");
let subscriber = Registry::default().with(layer.with_filter(filter));
set_global_default(subscriber).expect("`set_global_default` should always work)");
tracing::info!(git_version = crate::GIT_VERSION);
if let Err(error) = fallible_windows_service_run(arguments, handle) {
tracing::error!(?error, "`fallible_windows_service_run` returned an error");
}
}
// Most of the Windows-specific service stuff should go here
//
// The arguments don't seem to match the ones passed to the main thread at all.
fn fallible_windows_service_run(arguments: Vec<OsString>) -> Result<()> {
let log_path =
crate::known_dirs::ipc_service_logs().context("Can't compute IPC service logs dir")?;
std::fs::create_dir_all(&log_path)?;
let (layer, _handle) = file_logger::layer(&log_path);
let filter = EnvFilter::from_str(SERVICE_RUST_LOG)?;
let subscriber = Registry::default().with(layer.with_filter(filter));
set_global_default(subscriber)?;
tracing::info!(git_version = crate::GIT_VERSION);
//
// If Windows stops us gracefully, this function may never return.
fn fallible_windows_service_run(
arguments: Vec<OsString>,
logging_handle: file_logger::Handle,
) -> Result<()> {
tracing::info!(?arguments, "fallible_windows_service_run");
let rt = tokio::runtime::Runtime::new()?;
// Fixes <https://github.com/firezone/firezone/issues/4899>,
// DNS rules persisting after reboot
crate::dns_control::deactivate().ok();
rt.spawn(crate::heartbeat::heartbeat());
let ipc_task = rt.spawn(super::ipc_listen());
let ipc_task_ah = ipc_task.abort_handle();
@@ -111,8 +117,12 @@ fn fallible_windows_service_run(arguments: Vec<OsString>) -> Result<()> {
match control_event {
// TODO
ServiceControl::Interrogate => ServiceControlHandlerResult::NoError,
ServiceControl::Stop => {
tracing::info!("Got stop signal from service controller");
ServiceControl::PowerEvent(event) => {
tracing::info!(?event, "Power event");
ServiceControlHandlerResult::NoError
}
ServiceControl::Shutdown | ServiceControl::Stop => {
tracing::info!(?control_event, "Got stop signal from service controller");
ipc_task_ah.abort();
ServiceControlHandlerResult::NoError
}
@@ -125,12 +135,13 @@ fn fallible_windows_service_run(arguments: Vec<OsString>) -> Result<()> {
| ServiceControl::ParamChange
| ServiceControl::Pause
| ServiceControl::Preshutdown
| ServiceControl::Shutdown
| ServiceControl::HardwareProfileChange(_)
| ServiceControl::PowerEvent(_)
| ServiceControl::SessionChange(_)
| ServiceControl::TimeChange
| ServiceControl::TriggerEvent => ServiceControlHandlerResult::NotImplemented,
| ServiceControl::TriggerEvent => {
tracing::warn!(?control_event, "Unhandled service control event");
ServiceControlHandlerResult::NotImplemented
}
_ => ServiceControlHandlerResult::NotImplemented,
}
};
@@ -140,7 +151,9 @@ fn fallible_windows_service_run(arguments: Vec<OsString>) -> Result<()> {
status_handle.set_service_status(ServiceStatus {
service_type: SERVICE_TYPE,
current_state: ServiceState::Running,
controls_accepted: ServiceControlAccept::STOP,
controls_accepted: ServiceControlAccept::POWER_EVENT
| ServiceControlAccept::SHUTDOWN
| ServiceControlAccept::STOP,
exit_code: ServiceExitCode::Win32(0),
checkpoint: 0,
wait_hint: Duration::default(),
@@ -152,25 +165,38 @@ fn fallible_windows_service_run(arguments: Vec<OsString>) -> Result<()> {
// We cancelled because Windows asked us to shut down.
Ok(())
}
Err(join_error) => {
// The IPC task may have panicked
Err(join_error.into())
}
Ok(Err(error)) => Err(error.context("ipc_listen failed")),
Err(join_error) => Err(anyhow::Error::from(join_error).context("`ipc_listen` panicked")),
Ok(Err(error)) => Err(error.context("`ipc_listen` threw an error")),
Ok(Ok(impossible)) => match impossible {},
};
if let Err(error) = &result {
tracing::error!(?error, "`ipc_listen` failed");
}
// Drop the logging handle so it flushes the logs before we let Windows kill our process.
// There is no obvious and elegant way to do this, since the logging and `ServiceState`
// changes are interleaved, not nested:
// - Start logging
// - ServiceState::Running
// - Stop logging
// - ServiceState::Stopped
std::mem::drop(logging_handle);
// Tell Windows that we're stopping
status_handle.set_service_status(ServiceStatus {
service_type: SERVICE_TYPE,
current_state: ServiceState::Stopped,
controls_accepted: ServiceControlAccept::empty(),
exit_code: ServiceExitCode::Win32(if result.is_ok() { 0 } else { 1 }),
checkpoint: 0,
wait_hint: Duration::default(),
process_id: None,
})?;
result
// Per Windows docs, this will cause Windows to kill our process eventually.
status_handle
.set_service_status(ServiceStatus {
service_type: SERVICE_TYPE,
current_state: ServiceState::Stopped,
controls_accepted: ServiceControlAccept::empty(),
exit_code: ServiceExitCode::Win32(if result.is_ok() { 0 } else { 1 }),
checkpoint: 0,
wait_hint: Duration::default(),
process_id: None,
})
.expect("Should be able to tell Windows we're stopping");
// Generally unreachable
Ok(())
}
pub(crate) struct IpcServer {
@@ -219,11 +245,21 @@ impl IpcServer {
.bind_to_pipe()
.await
.context("Couldn't bind to named pipe")?;
tracing::info!("Listening for GUI to connect over IPC...");
tracing::info!(
server_pid = std::process::id(),
"Listening for GUI to connect over IPC..."
);
server
.connect()
.await
.context("Couldn't accept IPC connection from GUI")?;
let handle = HANDLE(server.as_raw_handle() as isize);
let mut client_pid: u32 = 0;
// SAFETY: Windows doesn't store this pointer or handle, and we just got the handle
// from Tokio, so it should be valid.
unsafe { GetNamedPipeClientProcessId(handle, &mut client_pid) }
.context("Couldn't get PID of named pipe client")?;
tracing::info!(?client_pid, "Accepted IPC connection");
Ok(server)
}