chore(relay): reduce instrumentation overhead (#4426)

Previously, we were creating a lot of spans because they were all set to
`level = error`. We now reduce those spans to `debug` which should help
with the CPU utilization.

Related: #4366.
This commit is contained in:
Thomas Eizinger
2024-04-02 10:02:59 +11:00
committed by GitHub
parent a2a86703e7
commit a1bd9248d0
3 changed files with 17 additions and 16 deletions

View File

@@ -73,6 +73,10 @@ struct Args {
#[arg(long, env, hide = true)]
otlp_grpc_endpoint: Option<SocketAddr>,
/// The address of the local interface where we should serve the profiling controller.
#[arg(long, env, hide = true)]
profiler_ctrl_addr: Option<SocketAddr>,
/// The Google Project ID to embed in spans.
///
/// Set this if you are running on Google Cloud but using the OTLP trace collector.
@@ -117,7 +121,7 @@ async fn main() -> Result<()> {
let base_url = args.api_url.clone();
let stamp_secret = server.auth_secret();
let span = tracing::error_span!("connect_to_portal", config_url = %base_url);
let span = tracing::debug_span!("connect_to_portal", config_url = %base_url);
connect_to_portal(&args, token, base_url, stamp_secret)
.instrument(span)
@@ -354,9 +358,6 @@ where
}
fn poll(&mut self, cx: &mut std::task::Context<'_>) -> Poll<Result<()>> {
let span = tracing::error_span!("Eventloop::poll");
let _guard = span.enter();
loop {
// Don't fail these results. One of the senders might not be active because we might not be listening on IP4 / IP6.
let _ = ready!(self.outbound_ip4_data_sender.poll_ready_unpin(cx));
@@ -368,7 +369,7 @@ where
if let Some(next_command) = self.server.next_command() {
match next_command {
Command::SendMessage { payload, recipient } => {
let span = tracing::error_span!("Command::SendMessage");
let span = tracing::debug_span!("Command::SendMessage");
let _guard = span.enter();
let sender = match recipient.family() {
@@ -391,7 +392,7 @@ where
}
Command::CreateAllocation { id, family, port } => {
let span =
tracing::error_span!("Command::CreateAllocation", %id, %family, %port);
tracing::debug_span!("Command::CreateAllocation", %id, %family, %port);
let _guard = span.enter();
self.allocations.insert(
@@ -400,7 +401,7 @@ where
);
}
Command::FreeAllocation { id, family } => {
let span = tracing::error_span!("Command::FreeAllocation", %id, %family);
let span = tracing::debug_span!("Command::FreeAllocation", %id, %family);
let _guard = span.enter();
if self.allocations.remove(&(id, family)).is_none() {
@@ -411,7 +412,7 @@ where
tracing::info!(target: "relay", "Freeing addresses of allocation {id}");
}
Command::Wake { deadline } => {
let span = tracing::error_span!("Command::Wake", ?deadline);
let span = tracing::debug_span!("Command::Wake", ?deadline);
let _guard = span.enter();
match deadline.duration_since(now) {
@@ -431,7 +432,7 @@ where
Pin::new(&mut self.sleep).reset(deadline);
}
Command::ForwardData { id, data, receiver } => {
let span = tracing::error_span!("Command::ForwardData", %id, %receiver);
let span = tracing::debug_span!("Command::ForwardData", %id, %receiver);
let _guard = span.enter();
let mut allocation = match self.allocations.entry((id, receiver.family())) {

View File

@@ -229,14 +229,14 @@ where
/// Process the bytes received from a client.
///
/// After calling this method, you should call [`Server::next_command`] until it returns `None`.
#[tracing::instrument(skip_all, fields(transaction_id, %sender, allocation, channel, recipient, peer), level = "error")]
#[tracing::instrument(level = "debug", skip_all, fields(transaction_id, %sender, allocation, channel, recipient, peer))]
pub fn handle_client_input(&mut self, bytes: &[u8], sender: ClientSocket, now: SystemTime) {
tracing::trace!(target: "wire", num_bytes = %bytes.len());
match self.decoder.decode(bytes) {
Ok(Ok(message)) => {
if let Some(id) = message.transaction_id() {
Span::current().record("transaction_id", hex::encode(id.as_bytes()));
Span::current().record("transaction_id", field::debug(id));
}
self.handle_client_message(message, sender, now);
@@ -321,7 +321,7 @@ where
}
/// Process the bytes received from an allocation.
#[tracing::instrument(skip_all, fields(%sender, %allocation, recipient, channel), level = "error")]
#[tracing::instrument(level = "debug", skip_all, fields(%sender, %allocation, recipient, channel))]
pub fn handle_peer_traffic(
&mut self,
bytes: &[u8],
@@ -378,7 +378,7 @@ where
})
}
#[tracing::instrument(skip(self), level = "error")]
#[tracing::instrument(level = "debug", skip(self))]
pub fn handle_deadline_reached(&mut self, now: SystemTime) {
for action in self.time_events.pending_actions(now) {
match action {
@@ -423,7 +423,7 @@ where
}
/// An allocation failed.
#[tracing::instrument(skip(self), fields(%allocation), level = "error")]
#[tracing::instrument(level = "debug", skip(self), fields(%allocation))]
pub fn handle_allocation_failed(&mut self, allocation: AllocationId) {
self.delete_allocation(allocation)
}
@@ -723,7 +723,7 @@ where
///
/// This TURN server implementation does not support relaying data other than through channels.
/// Thus, creating a permission is a no-op that always succeeds.
#[tracing::instrument(skip(self, message, now), fields(%sender), level = "error")]
#[tracing::instrument(level = "debug", skip(self, message, now), fields(%sender))]
fn handle_create_permission_request(
&mut self,
message: CreatePermission,

View File

@@ -47,7 +47,7 @@ impl Decoder {
let transaction_id = broken_message.transaction_id();
let error = broken_message.error().clone();
tracing::debug!(transaction_id = %hex::encode(transaction_id.as_bytes()), %method, %error, "Failed to decode attributes of message");
tracing::debug!(transaction_id = ?transaction_id, %method, %error, "Failed to decode attributes of message");
let error_code = ErrorCode::from(error);