feat(spanv2): Implement AI normalizations

Dav1dde · Dav1dde · commit 5fcda120b860 · 2025-11-04T16:22:39.000+01:00
diff --git a/relay-conventions/src/consts.rs b/relay-conventions/src/consts.rs
@@ -21,7 +21,18 @@ convention_attributes!(
     DB_SYSTEM_NAME => "db.system.name",
     DESCRIPTION => "sentry.description",
     FAAS_TRIGGER => "faas.trigger",
+    GEN_AI_COST_INPUT_TOKENS => "gen_ai.cost.input_tokens",
+    GEN_AI_COST_OUTPUT_TOKENS => "gen_ai.cost.output_tokens",
+    GEN_AI_COST_TOTAL_TOKENS => "gen_ai.cost.total_tokens",
+    GEN_AI_REQUEST_MODEL => "gen_ai.request.model",
+    GEN_AI_RESPONSE_MODEL => "gen_ai.response.model",
+    GEN_AI_RESPONSE_TPS => "gen_ai.response.tokens_per_second",
     GEN_AI_SYSTEM => "gen_ai.system",
+    GEN_AI_USAGE_INPUT_CACHED_TOKENS => "gen_ai.usage.input_tokens.cached",
+    GEN_AI_USAGE_INPUT_TOKENS => "gen_ai.usage.input_tokens",
+    GEN_AI_USAGE_OUTPUT_REASONING_TOKENS => "gen_ai.usage.output_tokens.reasoning",
+    GEN_AI_USAGE_OUTPUT_TOKENS => "gen_ai.usage.output_tokens",
+    GEN_AI_USAGE_TOTAL_TOKENS => "gen_ai.usage.total_tokens",
     HTTP_PREFETCH => "sentry.http.prefetch",
     HTTP_REQUEST_METHOD => "http.request.method",
     HTTP_RESPONSE_STATUS_CODE => "http.response.status_code",
diff --git a/relay-event-normalization/src/eap/ai.rs b/relay-event-normalization/src/eap/ai.rs
@@ -0,0 +1,108 @@
+use std::time::Duration;
+
+use relay_conventions::{
+    GEN_AI_COST_INPUT_TOKENS, GEN_AI_COST_OUTPUT_TOKENS, GEN_AI_COST_TOTAL_TOKENS,
+    GEN_AI_REQUEST_MODEL, GEN_AI_RESPONSE_MODEL, GEN_AI_RESPONSE_TPS,
+    GEN_AI_USAGE_INPUT_CACHED_TOKENS, GEN_AI_USAGE_INPUT_TOKENS,
+    GEN_AI_USAGE_OUTPUT_REASONING_TOKENS, GEN_AI_USAGE_OUTPUT_TOKENS, GEN_AI_USAGE_TOTAL_TOKENS,
+};
+use relay_event_schema::protocol::Attributes;
+use relay_protocol::Annotated;
+
+use crate::ModelCosts;
+use crate::span::ai;
+
+/// Normalizes AI attributes.
+pub fn normalize_ai(
+    attributes: &mut Annotated<Attributes>,
+    duration: Option<Duration>,
+    costs: Option<&ModelCosts>,
+) {
+    let Some(attributes) = attributes.value_mut() else {
+        return;
+    };
+
+    normalize_total_tokens(attributes);
+    normalize_tokens_per_second(attributes, duration);
+    normalize_ai_costs(attributes, costs);
+}
+
+/// Calculates the [`GEN_AI_USAGE_TOTAL_TOKENS`] attribute.
+fn normalize_total_tokens(attributes: &mut Attributes) {
+    if attributes.contains_key(GEN_AI_USAGE_TOTAL_TOKENS) {
+        return;
+    }
+
+    let input_tokens = attributes
+        .get_value(GEN_AI_USAGE_INPUT_TOKENS)
+        .and_then(|v| v.as_f64());
+
+    let output_tokens = attributes
+        .get_value(GEN_AI_USAGE_OUTPUT_TOKENS)
+        .and_then(|v| v.as_f64());
+
+    if input_tokens.is_none() && output_tokens.is_none() {
+        return;
+    }
+
+    let total_tokens = input_tokens.unwrap_or(0.0) + output_tokens.unwrap_or(0.0);
+    attributes.insert(GEN_AI_USAGE_TOTAL_TOKENS, total_tokens);
+}
+
+/// Calculates the [`GEN_AI_RESPONSE_TPS`] attribute.
+fn normalize_tokens_per_second(attributes: &mut Attributes, duration: Option<Duration>) {
+    let Some(duration) = duration.filter(|d| !d.is_zero()) else {
+        return;
+    };
+
+    if attributes.contains_key(GEN_AI_RESPONSE_TPS) {
+        return;
+    }
+
+    let output_tokens = attributes
+        .get_value(GEN_AI_USAGE_OUTPUT_TOKENS)
+        .and_then(|v| v.as_f64())
+        .filter(|v| *v > 0.0);
+
+    if let Some(output_tokens) = output_tokens {
+        let tps = output_tokens / duration.as_secs_f64();
+        attributes.insert(GEN_AI_RESPONSE_TPS, tps);
+    }
+}
+
+/// Calculates model costs and serializes them into attributes.
+fn normalize_ai_costs(attributes: &mut Attributes, model_costs: Option<&ModelCosts>) {
+    if attributes.contains_key(GEN_AI_COST_TOTAL_TOKENS) {
+        return;
+    }
+
+    let model_cost = attributes
+        .get_value(GEN_AI_REQUEST_MODEL)
+        .or_else(|| attributes.get_value(GEN_AI_RESPONSE_MODEL))
+        .and_then(|v| v.as_str())
+        .and_then(|model| model_costs?.cost_per_token(model));
+
+    let Some(model_cost) = model_cost else { return };
+
+    let get_tokens = |key| {
+        attributes
+            .get_value(key)
+            .and_then(|v| v.as_f64())
+            .unwrap_or(0.0)
+    };
+
+    let tokens = ai::UsedTokens {
+        input_tokens: get_tokens(GEN_AI_USAGE_INPUT_TOKENS),
+        input_cached_tokens: get_tokens(GEN_AI_USAGE_INPUT_CACHED_TOKENS),
+        output_tokens: get_tokens(GEN_AI_USAGE_OUTPUT_TOKENS),
+        output_reasoning_tokens: get_tokens(GEN_AI_USAGE_OUTPUT_REASONING_TOKENS),
+    };
+
+    let Some(costs) = ai::calculate_costs(model_cost, tokens) else {
+        return;
+    };
+
+    attributes.insert(GEN_AI_COST_INPUT_TOKENS, costs.input);
+    attributes.insert(GEN_AI_COST_OUTPUT_TOKENS, costs.output);
+    attributes.insert(GEN_AI_COST_TOTAL_TOKENS, costs.total());
+}
diff --git a/relay-event-normalization/src/eap/mod.rs b/relay-event-normalization/src/eap/mod.rs
@@ -16,6 +16,10 @@ use relay_protocol::{Annotated, ErrorKind, Meta, Remark, RemarkType, Value};
 
 use crate::{ClientHints, FromUserAgentInfo as _, RawUserAgentInfo};
 
+mod ai;
+
+pub use self::ai::normalize_ai;
+
 /// Normalizes/validates all attribute types.
 ///
 /// Removes and marks all attributes with an error for which the specified [`AttributeType`]
diff --git a/relay-event-normalization/src/normalize/span/ai.rs b/relay-event-normalization/src/normalize/span/ai.rs
@@ -144,6 +144,10 @@ fn map_ai_measurements_to_data(span: &mut Span) {
         &mut data.gen_ai_usage_output_tokens,
         "ai_completion_tokens_used",
     );
+}
+
+fn set_total_tokens(span: &mut Span) {
+    let data = span.data.get_or_insert_with(SpanData::default);
 
     // It might be that 'total_tokens' is not set in which case we need to calculate it
     if data.gen_ai_usage_total_tokens.value().is_none() {
@@ -214,6 +218,8 @@ pub fn enrich_ai_span_data(
     }
 
     map_ai_measurements_to_data(span);
+    set_total_tokens(span);
+
     if let Some(model_costs) = model_costs {
         extract_ai_data(span, model_costs);
     }
diff --git a/relay-server/src/processing/spans/mod.rs b/relay-server/src/processing/spans/mod.rs
@@ -159,7 +159,7 @@ impl processing::Processor for SpansProcessor {
 
         dynamic_sampling::validate_dsc(&spans).reject(&spans)?;
 
-        process::normalize(&mut spans, &self.geo_lookup);
+        process::normalize(&mut spans, &self.geo_lookup, ctx);
         filter::filter(&mut spans, ctx);
 
         self.limiter.enforce_quotas(&mut spans, ctx).await?;
diff --git a/relay-server/src/processing/spans/process.rs b/relay-server/src/processing/spans/process.rs
@@ -1,3 +1,5 @@
+use std::time::Duration;
+
 use relay_event_normalization::{
     GeoIpLookup, RequiredMode, SchemaProcessor, TimestampProcessor, TrimmingProcessor, eap,
 };
@@ -67,11 +69,11 @@ fn expand_legacy_span(item: &Item) -> Result<WithHeader<SpanV2>> {
 }
 
 /// Normalizes individual spans.
-pub fn normalize(spans: &mut Managed<ExpandedSpans>, geo_lookup: &GeoIpLookup) {
+pub fn normalize(spans: &mut Managed<ExpandedSpans>, geo_lookup: &GeoIpLookup, ctx: Context<'_>) {
     spans.retain_with_context(
         |spans| (&mut spans.spans, spans.headers.meta()),
         |span, meta, _| {
-            normalize_span(span, meta, geo_lookup).inspect_err(|err| {
+            normalize_span(span, meta, geo_lookup, ctx).inspect_err(|err| {
                 relay_log::debug!("failed to normalize span: {err}");
             })
         },
@@ -82,6 +84,7 @@ fn normalize_span(
     span: &mut Annotated<SpanV2>,
     meta: &RequestMeta,
     geo_lookup: &GeoIpLookup,
+    ctx: Context<'_>,
 ) -> Result<()> {
     process_value(span, &mut TimestampProcessor, ProcessingState::root())?;
 
@@ -97,7 +100,9 @@ fn normalize_span(
             meta.client_addr().and_then(|ip| geo_lookup.lookup(ip))
         });
 
-        // TODO: ai model costs
+        let duration = span_duration(span);
+        let model_costs = ctx.global_config.ai_model_costs.as_ref().ok();
+        eap::normalize_ai(&mut span.attributes, duration, model_costs);
     };
 
     process_value(span, &mut TrimmingProcessor::new(), ProcessingState::root())?;
@@ -144,6 +149,12 @@ fn scrub_span(span: &mut Annotated<SpanV2>, ctx: Context<'_>) -> Result<()> {
     Ok(())
 }
 
+fn span_duration(span: &SpanV2) -> Option<Duration> {
+    let start_timestamp = *span.start_timestamp.value()?;
+    let timestamp = *span.end_timestamp.value()?;
+    (timestamp - start_timestamp).to_std().ok()
+}
+
 #[cfg(test)]
 mod tests {
     use relay_pii::{DataScrubbingConfig, PiiConfig};