From 55c97acfc3e75bd90cc9538020430af3b2014875 Mon Sep 17 00:00:00 2001 From: Thomas Eizinger Date: Tue, 13 Aug 2024 23:17:21 +0100 Subject: [PATCH] feat(relay): record error code as label in response counter metric (#6274) This will allow us to write queries and thus alerts for increased number of error responses such as `Allocation Mismatch`. When attaching labels to metrics, it is important to avoid cardinality explosions. Thus, the possible label values should always be a fixed, bounded set of values. The possible error codes could be quite a few but in practise, we only use a handful and clients cannot influence, which error codes we send. Thus, it is safe to create labels for these codes. The same would not be true for IP addresses or ports for example. --- rust/Cargo.lock | 1 + rust/relay/Cargo.toml | 1 + rust/relay/src/server.rs | 21 ++++++++++++++------- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/rust/Cargo.lock b/rust/Cargo.lock index a56d3ff66..dbb62876b 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -1964,6 +1964,7 @@ dependencies = [ "secrecy", "serde", "sha2", + "smallvec 1.13.2", "socket-factory", "socket2", "stun_codec", diff --git a/rust/relay/Cargo.toml b/rust/relay/Cargo.toml index 6be4e0c27..479b19ab7 100644 --- a/rust/relay/Cargo.toml +++ b/rust/relay/Cargo.toml @@ -26,6 +26,7 @@ rand = "0.8.5" secrecy = { workspace = true } serde = { version = "1.0.204", features = ["derive"] } sha2 = "0.10.8" +smallvec = "1.13.2" socket-factory = { workspace = true } socket2 = { workspace = true } stun_codec = "0.3.4" diff --git a/rust/relay/src/server.rs b/rust/relay/src/server.rs index da5a6f40c..fba5389be 100644 --- a/rust/relay/src/server.rs +++ b/rust/relay/src/server.rs @@ -17,6 +17,7 @@ use opentelemetry::metrics::{Counter, UpDownCounter}; use opentelemetry::KeyValue; use rand::Rng; use secrecy::SecretString; +use smallvec::SmallVec; use std::collections::{HashMap, VecDeque}; use std::hash::Hash; use std::net::{IpAddr, SocketAddr}; @@ -862,6 +863,7 @@ where fn send_message(&mut self, message: Message, recipient: ClientSocket) { let method = message.method(); let class = message.class(); + let error_code = message.get_attribute::().map(|e| e.code()); tracing::trace!(target: "relay", method = %message.method(), class = %message.class(), "Sending message"); let Ok(bytes) = self.encoder.encode_into_bytes(message) else { @@ -890,13 +892,18 @@ where CREATE_PERMISSION => "createpermission", _ => return, }; - self.responses_counter.add( - 1, - &[ - KeyValue::new("response_class", response_class), - KeyValue::new("message_type", message_type), - ], - ); + let error_code = error_code.map(|c| opentelemetry::Value::from(c as i64)); + + // Use a `SmallVec` to avoid heap-allocations when collecting metrics. + let mut attributes = SmallVec::<[KeyValue; 3]>::with_capacity(3); + attributes.push(KeyValue::new("response_class", response_class)); + attributes.push(KeyValue::new("message_type", message_type)); + + if let Some(error_code) = error_code { + attributes.push(KeyValue::new("error_code", error_code)); + } + + self.responses_counter.add(1, &attributes); } fn delete_allocation(&mut self, port: AllocationPort) {