mirror of
https://github.com/outbackdingo/firezone.git
synced 2026-01-28 02:18:50 +00:00
chore(gui-client): improve logging around Client startup and IPC connections (#5216)
Closes #3567 (again) Closes #5214 Ready for review ```[tasklist] ### Before merging - [x] The IPC service should report system uptime when it starts. This will tell us whether the computer was rebooted or just the IPC service itself was upgraded / rebooted. - [x] The IPC service should report the PID of itself and the GUI if possible - [x] The GUI should report the PID of the IPC service if possible - [x] Extra logging between `GIT_VERSION = ` and the token loading log line, especially right before and right after the critical Tauri launching step - [x] If a 2nd GUI or IPC service runs and exits due to single-instance, it must log that - [x] Remove redundant DNS deactivation when IPC service starts (I think conectado noticed this in another PR) - [x] Manually test that the GUI logs something on clean shutdown - [x] Logarithmic heartbeat? - [x] If possible, log monotonic time somewhere so NTP syncs don't make the logs unreadable (uptime in the heartbeat should be monotonic, mostly) - [x] Apply the same logging fix to the IPC service - [x] Ensure log zips include GUI crash dumps - [x] ~~Fix #5042~~ (that's a separate issue, I don't want to drag this PR out) - [x] Test IPC service restart (logs as a stop event) - [x] Test IPC service stop - [x] Test IPC service logs during system suspend (Not logged, maybe because we aren't subscribed to power events) - [x] Test IPC service logs during system reboot (Logged as shutdown, we exit gracefully) - [x] Test IPC service logs during system shut down (Logged as a suspend) - [x] Test IPC service upgrade (Logged as a stop) - [x] Log unhandled events from the Windows service controller (Power events like suspend and resume are logged and not handled) ``` --------- Signed-off-by: Reactor Scram <ReactorScram@users.noreply.github.com>
This commit is contained in:
12
rust/Cargo.lock
generated
12
rust/Cargo.lock
generated
@@ -2009,6 +2009,7 @@ dependencies = [
|
||||
"tokio-util",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
"uptime_lib",
|
||||
"url",
|
||||
"uuid",
|
||||
"windows 0.56.0",
|
||||
@@ -7052,6 +7053,17 @@ version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
|
||||
|
||||
[[package]]
|
||||
name = "uptime_lib"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f4e71ddbefed856d5881821d6ada4e606bbb91fd332296963ed596e2ad2100f3"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"thiserror",
|
||||
"windows 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "url"
|
||||
version = "2.5.0"
|
||||
|
||||
@@ -75,6 +75,7 @@ features = [
|
||||
"Win32_Networking_NetworkListManager",
|
||||
# COM is needed to listen for network change events
|
||||
"Win32_System_Com",
|
||||
"Win32_System_Pipes", # Needed to get PID of pipe server
|
||||
# Needed to listen for system DNS changes
|
||||
"Win32_System_Registry",
|
||||
"Win32_System_Threading",
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
use anyhow::{bail, Result};
|
||||
use anyhow::{bail, Context as _, Result};
|
||||
use clap::{Args, Parser};
|
||||
use connlib_client_shared::file_logger;
|
||||
use firezone_headless_client::FIREZONE_GROUP;
|
||||
use std::path::PathBuf;
|
||||
use tracing::instrument;
|
||||
use tracing_subscriber::EnvFilter;
|
||||
|
||||
mod about;
|
||||
mod auth;
|
||||
@@ -18,6 +21,8 @@ mod updates;
|
||||
mod uptime;
|
||||
mod welcome;
|
||||
|
||||
use settings::AdvancedSettings;
|
||||
|
||||
/// Output of `git describe` at compile time
|
||||
/// e.g. `1.0.0-pre.4-20-ged5437c88-modified` where:
|
||||
///
|
||||
@@ -57,6 +62,7 @@ pub(crate) enum Error {
|
||||
///
|
||||
/// In total there are 4 subcommands (non-elevated, elevated GUI, crash handler, and deep link process)
|
||||
/// In steady state, the only processes running will be the GUI and the crash handler.
|
||||
#[instrument(skip_all)]
|
||||
pub(crate) fn run() -> Result<()> {
|
||||
std::panic::set_hook(Box::new(tracing_panic::panic_hook));
|
||||
let cli = Cli::parse();
|
||||
@@ -89,14 +95,17 @@ pub(crate) fn run() -> Result<()> {
|
||||
anyhow::bail!("`smoke-test` must run as a normal user");
|
||||
}
|
||||
|
||||
let result = gui::run(cli);
|
||||
let settings = settings::load_advanced_settings().unwrap_or_default();
|
||||
// Don't fix the log filter for smoke tests
|
||||
let _logger = start_logging(&settings.log_filter)?;
|
||||
let result = gui::run(cli, settings);
|
||||
if let Err(error) = &result {
|
||||
// In smoke-test mode, don't show the dialog, since it might be running
|
||||
// unattended in CI and the dialog would hang forever
|
||||
|
||||
// Because of <https://github.com/firezone/firezone/issues/3567>,
|
||||
// errors returned from `gui::run` may not be logged correctly
|
||||
tracing::error!(?error, "gui::run error");
|
||||
tracing::error!(?error);
|
||||
}
|
||||
Ok(result?)
|
||||
}
|
||||
@@ -106,21 +115,55 @@ pub(crate) fn run() -> Result<()> {
|
||||
/// `gui::run` but wrapped in `anyhow::Result`
|
||||
///
|
||||
/// Automatically logs or shows error dialogs for important user-actionable errors
|
||||
// Can't `instrument` this because logging isn't running when we enter it.
|
||||
fn run_gui(cli: Cli) -> Result<()> {
|
||||
let result = gui::run(cli);
|
||||
let mut settings = settings::load_advanced_settings().unwrap_or_default();
|
||||
fix_log_filter(&mut settings)?;
|
||||
let _logger = start_logging(&settings.log_filter)?;
|
||||
let result = gui::run(cli, settings);
|
||||
|
||||
// Make sure errors get logged, at least to stderr
|
||||
if let Err(error) = &result {
|
||||
tracing::error!(?error, "gui::run error");
|
||||
tracing::error!(?error, error_msg = %error);
|
||||
show_error_dialog(error)?;
|
||||
}
|
||||
|
||||
Ok(result?)
|
||||
}
|
||||
|
||||
/// Parse the log filter from settings, showing an error and fixing it if needed
|
||||
fn fix_log_filter(settings: &mut AdvancedSettings) -> Result<()> {
|
||||
if EnvFilter::try_new(&settings.log_filter).is_ok() {
|
||||
return Ok(());
|
||||
}
|
||||
settings.log_filter = AdvancedSettings::default().log_filter;
|
||||
|
||||
native_dialog::MessageDialog::new()
|
||||
.set_title("Log filter error")
|
||||
.set_text("The custom log filter is not parsable. Using the default log filter.")
|
||||
.set_type(native_dialog::MessageType::Error)
|
||||
.show_alert()
|
||||
.context("Can't show log filter error dialog")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Starts logging
|
||||
///
|
||||
/// Don't drop the log handle or logging will stop.
|
||||
fn start_logging(directives: &str) -> Result<file_logger::Handle> {
|
||||
let logging_handles = logging::setup(directives)?;
|
||||
tracing::info!(?GIT_VERSION, "`gui-client` started logging");
|
||||
|
||||
Ok(logging_handles.logger)
|
||||
}
|
||||
|
||||
#[instrument(skip_all)]
|
||||
fn show_error_dialog(error: &gui::Error) -> Result<()> {
|
||||
// Decision to put the error strings here: <https://github.com/firezone/firezone/pull/3464#discussion_r1473608415>
|
||||
let error_msg = match error {
|
||||
// This message gets shown to users in the GUI and could be localized, unlike
|
||||
// messages in the log which only need to be used for `git grep`.
|
||||
let user_friendly_error_msg = match error {
|
||||
// TODO: Update this URL
|
||||
gui::Error::WebViewNotInstalled => "Firezone cannot start because WebView2 is not installed. Follow the instructions at <https://www.firezone.dev/kb/user-guides/windows-client>.".to_string(),
|
||||
gui::Error::DeepLink(deep_link::Error::CantListen) => "Firezone is already running. If it's not responding, force-stop it.".to_string(),
|
||||
@@ -129,10 +172,11 @@ fn show_error_dialog(error: &gui::Error) -> Result<()> {
|
||||
gui::Error::UserNotInFirezoneGroup => format!("You are not a member of the group `{FIREZONE_GROUP}`. Try `sudo usermod -aG {FIREZONE_GROUP} $USER` and then reboot"),
|
||||
gui::Error::Other(error) => error.to_string(),
|
||||
};
|
||||
tracing::error!(?user_friendly_error_msg);
|
||||
|
||||
native_dialog::MessageDialog::new()
|
||||
.set_title("Firezone Error")
|
||||
.set_text(&error_msg)
|
||||
.set_text(&user_friendly_error_msg)
|
||||
.set_type(native_dialog::MessageType::Error)
|
||||
.show_alert()?;
|
||||
Ok(())
|
||||
|
||||
@@ -40,7 +40,7 @@ fn crash() -> Result<()> {
|
||||
let _handles = client::logging::setup("debug")?;
|
||||
tracing::info!("started log (DebugCrash)");
|
||||
|
||||
panic!("purposely crashing to see if it shows up in logs");
|
||||
panic!("purposely panicking to see if it shows up in logs");
|
||||
}
|
||||
|
||||
#[allow(clippy::print_stdout)]
|
||||
|
||||
@@ -16,6 +16,7 @@ use std::{path::PathBuf, str::FromStr, sync::Arc, time::Duration};
|
||||
use system_tray_menu::Event as TrayMenuEvent;
|
||||
use tauri::{Manager, SystemTray, SystemTrayEvent};
|
||||
use tokio::sync::{mpsc, oneshot, Notify};
|
||||
use tracing::instrument;
|
||||
use url::Url;
|
||||
|
||||
use ControllerRequest as Req;
|
||||
@@ -71,38 +72,11 @@ pub(crate) enum Error {
|
||||
/// Runs the Tauri GUI and returns on exit or unrecoverable error
|
||||
///
|
||||
/// Still uses `thiserror` so we can catch the deep_link `CantListen` error
|
||||
pub(crate) fn run(cli: client::Cli) -> Result<(), Error> {
|
||||
let advanced_settings = settings::load_advanced_settings().unwrap_or_default();
|
||||
|
||||
// If the log filter is unparsable, show an error and use the default
|
||||
// Fixes <https://github.com/firezone/firezone/issues/3452>
|
||||
let advanced_settings =
|
||||
match tracing_subscriber::EnvFilter::from_str(&advanced_settings.log_filter) {
|
||||
Ok(_) => advanced_settings,
|
||||
Err(_) => {
|
||||
native_dialog::MessageDialog::new()
|
||||
.set_title("Log filter error")
|
||||
.set_text(
|
||||
"The custom log filter is not parsable. Using the default log filter.",
|
||||
)
|
||||
.set_type(native_dialog::MessageType::Error)
|
||||
.show_alert()
|
||||
.context("Can't show log filter error dialog")?;
|
||||
|
||||
AdvancedSettings {
|
||||
log_filter: AdvancedSettings::default().log_filter,
|
||||
..advanced_settings
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Start logging
|
||||
// TODO: Try using an Arc to keep the file logger alive even if Tauri bails out
|
||||
// That may fix <https://github.com/firezone/firezone/issues/3567>
|
||||
let logging_handles = client::logging::setup(&advanced_settings.log_filter)?;
|
||||
tracing::info!("started log");
|
||||
tracing::info!("GIT_VERSION = {}", crate::client::GIT_VERSION);
|
||||
|
||||
#[instrument(skip_all)]
|
||||
pub(crate) fn run(
|
||||
cli: client::Cli,
|
||||
advanced_settings: settings::AdvancedSettings,
|
||||
) -> Result<(), Error> {
|
||||
// Need to keep this alive so crashes will be handled. Dropping detaches it.
|
||||
let _crash_handler = match client::crash_handling::attach_handler() {
|
||||
Ok(x) => Some(x),
|
||||
@@ -117,6 +91,7 @@ pub(crate) fn run(cli: client::Cli) -> Result<(), Error> {
|
||||
// Needed for the deep link server
|
||||
let rt = tokio::runtime::Runtime::new().context("Couldn't start Tokio runtime")?;
|
||||
let _guard = rt.enter();
|
||||
rt.spawn(firezone_headless_client::heartbeat::heartbeat());
|
||||
|
||||
let (ctlr_tx, ctlr_rx) = mpsc::channel(5);
|
||||
|
||||
@@ -127,7 +102,7 @@ pub(crate) fn run(cli: client::Cli) -> Result<(), Error> {
|
||||
|
||||
let tray = SystemTray::new().with_menu(system_tray_menu::signed_out());
|
||||
|
||||
tracing::debug!("Setting up Tauri app instance...");
|
||||
tracing::info!("Setting up Tauri app instance...");
|
||||
let (setup_result_tx, mut setup_result_rx) =
|
||||
tokio::sync::oneshot::channel::<Result<(), Error>>();
|
||||
let app = tauri::Builder::default()
|
||||
@@ -171,7 +146,7 @@ pub(crate) fn run(cli: client::Cli) -> Result<(), Error> {
|
||||
}
|
||||
})
|
||||
.setup(move |app| {
|
||||
tracing::debug!("Entered Tauri's `setup`");
|
||||
tracing::info!("Entered Tauri's `setup`");
|
||||
|
||||
let setup_inner = move || {
|
||||
// Check for updates
|
||||
@@ -237,7 +212,6 @@ pub(crate) fn run(cli: client::Cli) -> Result<(), Error> {
|
||||
app_handle_2,
|
||||
ctlr_tx,
|
||||
ctlr_rx,
|
||||
logging_handles,
|
||||
advanced_settings,
|
||||
)
|
||||
.await
|
||||
@@ -460,8 +434,6 @@ struct Controller {
|
||||
ctlr_tx: CtlrTx,
|
||||
/// connlib session for the currently signed-in user, if there is one
|
||||
session: Option<Session>,
|
||||
/// Must be kept alive so the logger will keep running
|
||||
_logging_handles: client::logging::Handles,
|
||||
/// Tells us when to wake up and look for a new resource list. Tokio docs say that memory reads and writes are synchronized when notifying, so we don't need an extra mutex on the resources.
|
||||
notify_controller: Arc<Notify>,
|
||||
tunnel_ready: bool,
|
||||
@@ -725,16 +697,15 @@ async fn run_controller(
|
||||
app: tauri::AppHandle,
|
||||
ctlr_tx: CtlrTx,
|
||||
mut rx: mpsc::Receiver<ControllerRequest>,
|
||||
logging_handles: client::logging::Handles,
|
||||
advanced_settings: AdvancedSettings,
|
||||
) -> Result<()> {
|
||||
tracing::info!("Entered `run_controller`");
|
||||
let mut controller = Controller {
|
||||
advanced_settings,
|
||||
app: app.clone(),
|
||||
auth: client::auth::Auth::new(),
|
||||
ctlr_tx,
|
||||
session: None,
|
||||
_logging_handles: logging_handles,
|
||||
notify_controller: Arc::new(Notify::new()), // TODO: Fix cancel-safety
|
||||
tunnel_ready: false,
|
||||
uptime: Default::default(),
|
||||
@@ -808,7 +779,10 @@ async fn run_controller(
|
||||
},
|
||||
Req::Fail(Failure::Error) => bail!("Test error"),
|
||||
Req::Fail(Failure::Panic) => panic!("Test panic"),
|
||||
Req::SystemTrayMenu(TrayMenuEvent::Quit) => break,
|
||||
Req::SystemTrayMenu(TrayMenuEvent::Quit) => {
|
||||
tracing::info!("User clicked Quit in the menu");
|
||||
break
|
||||
}
|
||||
req => if let Err(error) = controller.handle_request(req).await {
|
||||
tracing::error!(?error, "Failed to handle a ControllerRequest");
|
||||
}
|
||||
|
||||
@@ -88,7 +88,10 @@ impl Client {
|
||||
callback_handler: CallbackHandler,
|
||||
tokio_handle: tokio::runtime::Handle,
|
||||
) -> Result<Self> {
|
||||
tracing::info!(pid = std::process::id(), "Connecting to IPC service...");
|
||||
tracing::info!(
|
||||
client_pid = std::process::id(),
|
||||
"Connecting to IPC service..."
|
||||
);
|
||||
let stream = platform::connect_to_service().await?;
|
||||
let (rx, tx) = tokio::io::split(stream);
|
||||
// Receives messages from the IPC service
|
||||
|
||||
@@ -14,5 +14,12 @@ pub(crate) async fn connect_to_service() -> Result<IpcStream> {
|
||||
path.display()
|
||||
)
|
||||
})?;
|
||||
let cred = stream.peer_cred()?;
|
||||
tracing::info!(
|
||||
uid = cred.uid(),
|
||||
gid = cred.gid(),
|
||||
pid = cred.pid(),
|
||||
"Made an IPC connection"
|
||||
);
|
||||
Ok(stream)
|
||||
}
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
use anyhow::{Context as _, Result};
|
||||
use std::os::windows::io::AsRawHandle;
|
||||
use tokio::net::windows::named_pipe;
|
||||
use windows::Win32::{Foundation::HANDLE, System::Pipes::GetNamedPipeServerProcessId};
|
||||
|
||||
/// A type alias to abstract over the Windows and Unix IPC primitives
|
||||
pub(crate) type IpcStream = named_pipe::NamedPipeClient;
|
||||
@@ -13,5 +15,12 @@ pub(crate) async fn connect_to_service() -> Result<IpcStream> {
|
||||
let stream = named_pipe::ClientOptions::new()
|
||||
.open(path)
|
||||
.with_context(|| "Couldn't connect to named pipe server at `{path}`")?;
|
||||
let handle = HANDLE(stream.as_raw_handle() as isize);
|
||||
let mut server_pid: u32 = 0;
|
||||
// SAFETY: Windows doesn't store this pointer or handle, and we just got the handle
|
||||
// from Tokio, so it should be valid.
|
||||
unsafe { GetNamedPipeServerProcessId(handle, &mut server_pid) }
|
||||
.context("Couldn't get PID of named pipe server")?;
|
||||
tracing::info!(?server_pid, "Made IPC connection");
|
||||
Ok(stream)
|
||||
}
|
||||
|
||||
@@ -9,7 +9,6 @@ use std::{
|
||||
fs, io,
|
||||
path::{Path, PathBuf},
|
||||
result::Result as StdResult,
|
||||
str::FromStr,
|
||||
};
|
||||
use tokio::task::spawn_blocking;
|
||||
use tracing::subscriber::set_global_default;
|
||||
@@ -20,7 +19,7 @@ use tracing_subscriber::{fmt, layer::SubscriberExt, reload, EnvFilter, Layer, Re
|
||||
/// resulting in empty log files.
|
||||
#[must_use]
|
||||
pub(crate) struct Handles {
|
||||
pub _logger: file_logger::Handle,
|
||||
pub logger: file_logger::Handle,
|
||||
pub _reloader: reload::Handle<EnvFilter, Registry>,
|
||||
}
|
||||
|
||||
@@ -48,16 +47,19 @@ pub(crate) enum Error {
|
||||
}
|
||||
|
||||
/// Set up logs after the process has started
|
||||
pub(crate) fn setup(log_filter: &str) -> Result<Handles> {
|
||||
///
|
||||
/// We need two of these filters for some reason, and `EnvFilter` doesn't implement
|
||||
/// `Clone` yet, so that's why we take the directives string
|
||||
/// <https://github.com/tokio-rs/tracing/issues/2360>
|
||||
pub(crate) fn setup(directives: &str) -> Result<Handles> {
|
||||
let log_path = known_dirs::logs().context("Can't compute app log dir")?;
|
||||
|
||||
std::fs::create_dir_all(&log_path).map_err(Error::CreateDirAll)?;
|
||||
let (layer, logger) = file_logger::layer(&log_path);
|
||||
let filter = EnvFilter::from_str(log_filter)?;
|
||||
let (filter, reloader) = reload::Layer::new(filter);
|
||||
let (filter_2, reloader) = reload::Layer::new(EnvFilter::try_new(directives)?);
|
||||
let subscriber = Registry::default()
|
||||
.with(layer.with_filter(filter))
|
||||
.with(fmt::layer().with_filter(EnvFilter::from_str(log_filter)?));
|
||||
.with(layer.with_filter(filter_2))
|
||||
.with(fmt::layer().with_filter(EnvFilter::try_new(directives)?));
|
||||
set_global_default(subscriber)?;
|
||||
if let Err(error) = output_vt100::try_init() {
|
||||
tracing::warn!(
|
||||
@@ -68,7 +70,7 @@ pub(crate) fn setup(log_filter: &str) -> Result<Handles> {
|
||||
LogTracer::init()?;
|
||||
tracing::debug!(?log_path, "Log path");
|
||||
Ok(Handles {
|
||||
_logger: logger,
|
||||
logger,
|
||||
_reloader: reloader,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -90,7 +90,9 @@ pub(crate) async fn get_advanced_settings(
|
||||
"couldn't request advanced settings from controller task"
|
||||
);
|
||||
}
|
||||
Ok(rx.await.unwrap())
|
||||
rx.await.map_err(|_| {
|
||||
"Couldn't get settings from `Controller`, maybe the program is crashing".to_string()
|
||||
})
|
||||
}
|
||||
|
||||
/// Saves the settings to disk and then tells `Controller` to apply them in-memory
|
||||
|
||||
@@ -35,8 +35,7 @@ pub(crate) async fn check() -> Result<Release> {
|
||||
.head(update_url.clone())
|
||||
.header("User-Agent", user_agent)
|
||||
.send()
|
||||
.await
|
||||
.context("Couldn't fetch from update URL `{update_url}`")?;
|
||||
.await?;
|
||||
let status = response.status();
|
||||
if status != reqwest::StatusCode::TEMPORARY_REDIRECT {
|
||||
anyhow::bail!("HTTP status: {status} from update URL `{update_url}`");
|
||||
|
||||
@@ -27,6 +27,7 @@ tokio = { version = "1.38.0", features = ["macros", "signal"] }
|
||||
tokio-util = { version = "0.7.11", features = ["codec"] }
|
||||
tracing = { workspace = true }
|
||||
tracing-subscriber = { version = "0.3.17", features = ["env-filter"] }
|
||||
uptime_lib = "0.3.0"
|
||||
url = { version = "2.3.1", default-features = false }
|
||||
uuid = { version = "1.7", default-features = false, features = ["std", "v4", "serde"] }
|
||||
|
||||
|
||||
60
rust/headless-client/src/heartbeat.rs
Normal file
60
rust/headless-client/src/heartbeat.rs
Normal file
@@ -0,0 +1,60 @@
|
||||
//! A heartbeat that logs to `INFO` at exponentially increasing intervals
|
||||
//! so it won't take up much disk space
|
||||
//!
|
||||
//! The IPC service is quiet when signed out, and the GUI is quiet when it's in steady
|
||||
//! state, so this heartbeat allows us to estimate roughly how long each process stayed
|
||||
//! up when looking at user logs, using unlimited disk space per run of the app.
|
||||
|
||||
use std::time::Duration;
|
||||
use tokio::time::{sleep_until, Instant};
|
||||
|
||||
/// Logs a heartbeat to `tracing::info!`. Put this in a Tokio task and forget about it.
|
||||
pub async fn heartbeat() {
|
||||
let mut hb = Heartbeat::default();
|
||||
loop {
|
||||
sleep_until(hb.next_instant).await;
|
||||
let system_uptime = uptime_lib::get().ok();
|
||||
tracing::info!(?system_uptime, "Heartbeat");
|
||||
hb.tick();
|
||||
}
|
||||
}
|
||||
|
||||
struct Heartbeat {
|
||||
next_instant: Instant,
|
||||
dur: Duration,
|
||||
}
|
||||
|
||||
impl Default for Heartbeat {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
next_instant: Instant::now(),
|
||||
dur: Duration::from_secs(1),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Heartbeat {
|
||||
fn tick(&mut self) {
|
||||
self.next_instant += self.dur;
|
||||
self.dur *= 2;
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
/// Make sure this can run for a few years with no issue
|
||||
#[test]
|
||||
fn years() {
|
||||
let mut hb = super::Heartbeat::default();
|
||||
const SECONDS_PER_DAY: u64 = 86_400;
|
||||
const DAYS_PER_YEAR: u64 = 365;
|
||||
let far_future =
|
||||
hb.next_instant + std::time::Duration::from_secs(SECONDS_PER_DAY * DAYS_PER_YEAR * 50);
|
||||
|
||||
// It will only print 32 lines or so for the next 50+ years
|
||||
for _ in 0..50 {
|
||||
hb.tick();
|
||||
}
|
||||
assert!(hb.next_instant > far_future);
|
||||
}
|
||||
}
|
||||
@@ -37,6 +37,7 @@ use platform::Signals;
|
||||
/// Generate a persistent device ID, stores it to disk, and reads it back.
|
||||
pub(crate) mod device_id;
|
||||
pub mod dns_control;
|
||||
pub mod heartbeat;
|
||||
pub mod known_dirs;
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
@@ -219,6 +220,8 @@ pub fn run_only_headless_client() -> Result<()> {
|
||||
}
|
||||
assert!(std::env::var(TOKEN_ENV_KEY).is_err());
|
||||
|
||||
// TODO: This might have the same issue with fatal errors not getting logged
|
||||
// as addressed for the IPC service in PR #5216
|
||||
let (layer, _handle) = cli
|
||||
.common
|
||||
.log_dir
|
||||
@@ -347,6 +350,7 @@ pub(crate) fn run_debug_ipc_service() -> Result<()> {
|
||||
debug_command_setup()?;
|
||||
let rt = tokio::runtime::Runtime::new()?;
|
||||
let _guard = rt.enter();
|
||||
rt.spawn(crate::heartbeat::heartbeat());
|
||||
let mut signals = Signals::new()?;
|
||||
|
||||
// Couldn't get the loop to work here yet, so SIGHUP is not implemented
|
||||
|
||||
@@ -105,7 +105,10 @@ pub(crate) fn run_ipc_service(cli: CliCommon) -> Result<()> {
|
||||
anyhow::bail!("This is the IPC service binary, it's not meant to run interactively.");
|
||||
}
|
||||
let rt = tokio::runtime::Runtime::new()?;
|
||||
rt.block_on(crate::ipc_listen())?;
|
||||
rt.spawn(crate::heartbeat::heartbeat());
|
||||
if let Err(error) = rt.block_on(crate::ipc_listen()) {
|
||||
tracing::error!(?error, "`ipc_listen` failed");
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -156,14 +159,11 @@ impl IpcServer {
|
||||
tracing::info!("Listening for GUI to connect over IPC...");
|
||||
let (stream, _) = self.listener.accept().await?;
|
||||
let cred = stream.peer_cred()?;
|
||||
// I'm not sure if we can enforce group membership here - Docker
|
||||
// might just be enforcing it with filesystem permissions.
|
||||
// Checking the secondary groups of another user looks complicated.
|
||||
tracing::info!(
|
||||
uid = cred.uid(),
|
||||
gid = cred.gid(),
|
||||
pid = cred.pid(),
|
||||
"Got an IPC connection"
|
||||
"Accepted an IPC connection"
|
||||
);
|
||||
Ok(stream)
|
||||
}
|
||||
|
||||
@@ -10,6 +10,7 @@ use connlib_client_shared::file_logger;
|
||||
use connlib_shared::BUNDLE_ID;
|
||||
use std::{
|
||||
ffi::{c_void, OsString},
|
||||
os::windows::io::AsRawHandle,
|
||||
path::{Path, PathBuf},
|
||||
str::FromStr,
|
||||
time::Duration,
|
||||
@@ -17,7 +18,9 @@ use std::{
|
||||
use tokio::net::windows::named_pipe;
|
||||
use tracing::subscriber::set_global_default;
|
||||
use tracing_subscriber::{layer::SubscriberExt as _, EnvFilter, Layer, Registry};
|
||||
use windows::Win32::Security as WinSec;
|
||||
use windows::Win32::{
|
||||
Foundation::HANDLE, Security as WinSec, System::Pipes::GetNamedPipeClientProcessId,
|
||||
};
|
||||
use windows_service::{
|
||||
service::{
|
||||
ServiceControl, ServiceControlAccept, ServiceExitCode, ServiceState, ServiceStatus,
|
||||
@@ -78,30 +81,33 @@ pub(crate) fn run_ipc_service(_cli: CliCommon) -> Result<()> {
|
||||
windows_service::define_windows_service!(ffi_service_run, windows_service_run);
|
||||
|
||||
fn windows_service_run(arguments: Vec<OsString>) {
|
||||
if let Err(error) = fallible_windows_service_run(arguments) {
|
||||
tracing::error!(?error, "fallible_windows_service_run returned an error");
|
||||
let log_path = crate::known_dirs::ipc_service_logs()
|
||||
.expect("Should be able to compute IPC service logs dir");
|
||||
std::fs::create_dir_all(&log_path).expect("We should have permissions to create our log dir");
|
||||
let (layer, handle) = file_logger::layer(&log_path);
|
||||
let filter = EnvFilter::from_str(SERVICE_RUST_LOG)
|
||||
.expect("Hard-coded log filter should always be parsable");
|
||||
let subscriber = Registry::default().with(layer.with_filter(filter));
|
||||
set_global_default(subscriber).expect("`set_global_default` should always work)");
|
||||
tracing::info!(git_version = crate::GIT_VERSION);
|
||||
if let Err(error) = fallible_windows_service_run(arguments, handle) {
|
||||
tracing::error!(?error, "`fallible_windows_service_run` returned an error");
|
||||
}
|
||||
}
|
||||
|
||||
// Most of the Windows-specific service stuff should go here
|
||||
//
|
||||
// The arguments don't seem to match the ones passed to the main thread at all.
|
||||
fn fallible_windows_service_run(arguments: Vec<OsString>) -> Result<()> {
|
||||
let log_path =
|
||||
crate::known_dirs::ipc_service_logs().context("Can't compute IPC service logs dir")?;
|
||||
std::fs::create_dir_all(&log_path)?;
|
||||
let (layer, _handle) = file_logger::layer(&log_path);
|
||||
let filter = EnvFilter::from_str(SERVICE_RUST_LOG)?;
|
||||
let subscriber = Registry::default().with(layer.with_filter(filter));
|
||||
set_global_default(subscriber)?;
|
||||
tracing::info!(git_version = crate::GIT_VERSION);
|
||||
//
|
||||
// If Windows stops us gracefully, this function may never return.
|
||||
fn fallible_windows_service_run(
|
||||
arguments: Vec<OsString>,
|
||||
logging_handle: file_logger::Handle,
|
||||
) -> Result<()> {
|
||||
tracing::info!(?arguments, "fallible_windows_service_run");
|
||||
|
||||
let rt = tokio::runtime::Runtime::new()?;
|
||||
|
||||
// Fixes <https://github.com/firezone/firezone/issues/4899>,
|
||||
// DNS rules persisting after reboot
|
||||
crate::dns_control::deactivate().ok();
|
||||
rt.spawn(crate::heartbeat::heartbeat());
|
||||
|
||||
let ipc_task = rt.spawn(super::ipc_listen());
|
||||
let ipc_task_ah = ipc_task.abort_handle();
|
||||
@@ -111,8 +117,12 @@ fn fallible_windows_service_run(arguments: Vec<OsString>) -> Result<()> {
|
||||
match control_event {
|
||||
// TODO
|
||||
ServiceControl::Interrogate => ServiceControlHandlerResult::NoError,
|
||||
ServiceControl::Stop => {
|
||||
tracing::info!("Got stop signal from service controller");
|
||||
ServiceControl::PowerEvent(event) => {
|
||||
tracing::info!(?event, "Power event");
|
||||
ServiceControlHandlerResult::NoError
|
||||
}
|
||||
ServiceControl::Shutdown | ServiceControl::Stop => {
|
||||
tracing::info!(?control_event, "Got stop signal from service controller");
|
||||
ipc_task_ah.abort();
|
||||
ServiceControlHandlerResult::NoError
|
||||
}
|
||||
@@ -125,12 +135,13 @@ fn fallible_windows_service_run(arguments: Vec<OsString>) -> Result<()> {
|
||||
| ServiceControl::ParamChange
|
||||
| ServiceControl::Pause
|
||||
| ServiceControl::Preshutdown
|
||||
| ServiceControl::Shutdown
|
||||
| ServiceControl::HardwareProfileChange(_)
|
||||
| ServiceControl::PowerEvent(_)
|
||||
| ServiceControl::SessionChange(_)
|
||||
| ServiceControl::TimeChange
|
||||
| ServiceControl::TriggerEvent => ServiceControlHandlerResult::NotImplemented,
|
||||
| ServiceControl::TriggerEvent => {
|
||||
tracing::warn!(?control_event, "Unhandled service control event");
|
||||
ServiceControlHandlerResult::NotImplemented
|
||||
}
|
||||
_ => ServiceControlHandlerResult::NotImplemented,
|
||||
}
|
||||
};
|
||||
@@ -140,7 +151,9 @@ fn fallible_windows_service_run(arguments: Vec<OsString>) -> Result<()> {
|
||||
status_handle.set_service_status(ServiceStatus {
|
||||
service_type: SERVICE_TYPE,
|
||||
current_state: ServiceState::Running,
|
||||
controls_accepted: ServiceControlAccept::STOP,
|
||||
controls_accepted: ServiceControlAccept::POWER_EVENT
|
||||
| ServiceControlAccept::SHUTDOWN
|
||||
| ServiceControlAccept::STOP,
|
||||
exit_code: ServiceExitCode::Win32(0),
|
||||
checkpoint: 0,
|
||||
wait_hint: Duration::default(),
|
||||
@@ -152,25 +165,38 @@ fn fallible_windows_service_run(arguments: Vec<OsString>) -> Result<()> {
|
||||
// We cancelled because Windows asked us to shut down.
|
||||
Ok(())
|
||||
}
|
||||
Err(join_error) => {
|
||||
// The IPC task may have panicked
|
||||
Err(join_error.into())
|
||||
}
|
||||
Ok(Err(error)) => Err(error.context("ipc_listen failed")),
|
||||
Err(join_error) => Err(anyhow::Error::from(join_error).context("`ipc_listen` panicked")),
|
||||
Ok(Err(error)) => Err(error.context("`ipc_listen` threw an error")),
|
||||
Ok(Ok(impossible)) => match impossible {},
|
||||
};
|
||||
if let Err(error) = &result {
|
||||
tracing::error!(?error, "`ipc_listen` failed");
|
||||
}
|
||||
|
||||
// Drop the logging handle so it flushes the logs before we let Windows kill our process.
|
||||
// There is no obvious and elegant way to do this, since the logging and `ServiceState`
|
||||
// changes are interleaved, not nested:
|
||||
// - Start logging
|
||||
// - ServiceState::Running
|
||||
// - Stop logging
|
||||
// - ServiceState::Stopped
|
||||
std::mem::drop(logging_handle);
|
||||
|
||||
// Tell Windows that we're stopping
|
||||
status_handle.set_service_status(ServiceStatus {
|
||||
service_type: SERVICE_TYPE,
|
||||
current_state: ServiceState::Stopped,
|
||||
controls_accepted: ServiceControlAccept::empty(),
|
||||
exit_code: ServiceExitCode::Win32(if result.is_ok() { 0 } else { 1 }),
|
||||
checkpoint: 0,
|
||||
wait_hint: Duration::default(),
|
||||
process_id: None,
|
||||
})?;
|
||||
result
|
||||
// Per Windows docs, this will cause Windows to kill our process eventually.
|
||||
status_handle
|
||||
.set_service_status(ServiceStatus {
|
||||
service_type: SERVICE_TYPE,
|
||||
current_state: ServiceState::Stopped,
|
||||
controls_accepted: ServiceControlAccept::empty(),
|
||||
exit_code: ServiceExitCode::Win32(if result.is_ok() { 0 } else { 1 }),
|
||||
checkpoint: 0,
|
||||
wait_hint: Duration::default(),
|
||||
process_id: None,
|
||||
})
|
||||
.expect("Should be able to tell Windows we're stopping");
|
||||
// Generally unreachable
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) struct IpcServer {
|
||||
@@ -219,11 +245,21 @@ impl IpcServer {
|
||||
.bind_to_pipe()
|
||||
.await
|
||||
.context("Couldn't bind to named pipe")?;
|
||||
tracing::info!("Listening for GUI to connect over IPC...");
|
||||
tracing::info!(
|
||||
server_pid = std::process::id(),
|
||||
"Listening for GUI to connect over IPC..."
|
||||
);
|
||||
server
|
||||
.connect()
|
||||
.await
|
||||
.context("Couldn't accept IPC connection from GUI")?;
|
||||
let handle = HANDLE(server.as_raw_handle() as isize);
|
||||
let mut client_pid: u32 = 0;
|
||||
// SAFETY: Windows doesn't store this pointer or handle, and we just got the handle
|
||||
// from Tokio, so it should be valid.
|
||||
unsafe { GetNamedPipeClientProcessId(handle, &mut client_pid) }
|
||||
.context("Couldn't get PID of named pipe client")?;
|
||||
tracing::info!(?client_pid, "Accepted IPC connection");
|
||||
Ok(server)
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user