Skip to content

Commit 95ca225

Browse files
committed
feat(spanv2): Implement AI normalizations
1 parent 29ea47c commit 95ca225

File tree

5 files changed

+136
-4
lines changed

5 files changed

+136
-4
lines changed

relay-conventions/src/consts.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,18 @@ convention_attributes!(
2121
DB_SYSTEM_NAME => "db.system.name",
2222
DESCRIPTION => "sentry.description",
2323
FAAS_TRIGGER => "faas.trigger",
24+
GEN_AI_COST_INPUT_TOKENS => "gen_ai.cost.input_tokens",
25+
GEN_AI_COST_OUTPUT_TOKENS => "gen_ai.cost.output_tokens",
26+
GEN_AI_COST_TOTAL_TOKENS => "gen_ai.cost.total_tokens",
27+
GEN_AI_REQUEST_MODEL => "gen_ai.request.model",
28+
GEN_AI_RESPONSE_MODEL => "gen_ai.response.model",
29+
GEN_AI_RESPONSE_TPS => "gen_ai.response.tokens_per_second",
2430
GEN_AI_SYSTEM => "gen_ai.system",
31+
GEN_AI_USAGE_INPUT_CACHED_TOKENS => "gen_ai.usage.input_tokens.cached",
32+
GEN_AI_USAGE_INPUT_TOKENS => "gen_ai.usage.input_tokens",
33+
GEN_AI_USAGE_OUTPUT_REASONING_TOKENS => "gen_ai.usage.output_tokens.reasoning",
34+
GEN_AI_USAGE_OUTPUT_TOKENS => "gen_ai.usage.output_tokens",
35+
GEN_AI_USAGE_TOTAL_TOKENS => "gen_ai.usage.total_tokens",
2536
HTTP_PREFETCH => "sentry.http.prefetch",
2637
HTTP_REQUEST_METHOD => "http.request.method",
2738
HTTP_RESPONSE_STATUS_CODE => "http.response.status_code",
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
use std::time::Duration;
2+
3+
use relay_conventions::{
4+
GEN_AI_COST_INPUT_TOKENS, GEN_AI_COST_OUTPUT_TOKENS, GEN_AI_COST_TOTAL_TOKENS,
5+
GEN_AI_REQUEST_MODEL, GEN_AI_RESPONSE_MODEL, GEN_AI_RESPONSE_TPS,
6+
GEN_AI_USAGE_INPUT_CACHED_TOKENS, GEN_AI_USAGE_INPUT_TOKENS,
7+
GEN_AI_USAGE_OUTPUT_REASONING_TOKENS, GEN_AI_USAGE_OUTPUT_TOKENS, GEN_AI_USAGE_TOTAL_TOKENS,
8+
};
9+
use relay_event_schema::protocol::{Attributes, SpanV2};
10+
11+
use crate::ModelCosts;
12+
use crate::span::ai;
13+
14+
/// Normalizes AI attributes.
15+
pub fn normalize_ai(span: &mut SpanV2, costs: Option<&ModelCosts>) {
16+
let duration = span_duration(span);
17+
18+
let Some(attributes) = span.attributes.value_mut() else {
19+
return;
20+
};
21+
22+
normalize_total_tokens(attributes);
23+
normalize_tokens_per_second(attributes, duration);
24+
normalize_ai_costs(attributes, costs);
25+
}
26+
27+
/// Calculates the [`GEN_AI_USAGE_TOTAL_TOKENS`] attribute.
28+
fn normalize_total_tokens(attributes: &mut Attributes) {
29+
if attributes.contains_key(GEN_AI_USAGE_TOTAL_TOKENS) {
30+
return;
31+
}
32+
33+
let input_tokens = attributes
34+
.get_value(GEN_AI_USAGE_INPUT_TOKENS)
35+
.and_then(|v| v.as_f64());
36+
37+
let output_tokens = attributes
38+
.get_value(GEN_AI_USAGE_OUTPUT_TOKENS)
39+
.and_then(|v| v.as_f64());
40+
41+
if input_tokens.is_none() && output_tokens.is_none() {
42+
return;
43+
}
44+
45+
let total_tokens = input_tokens.unwrap_or(0.0) + output_tokens.unwrap_or(0.0);
46+
attributes.insert(GEN_AI_USAGE_TOTAL_TOKENS, total_tokens);
47+
}
48+
49+
/// Calculates the [`GEN_AI_RESPONSE_TPS`] attribute.
50+
fn normalize_tokens_per_second(attributes: &mut Attributes, duration: Option<Duration>) {
51+
let Some(duration) = duration.filter(|d| !d.is_zero()) else {
52+
return;
53+
};
54+
55+
if attributes.contains_key(GEN_AI_RESPONSE_TPS) {
56+
return;
57+
}
58+
59+
let output_tokens = attributes
60+
.get_value(GEN_AI_USAGE_OUTPUT_TOKENS)
61+
.and_then(|v| v.as_f64())
62+
.filter(|v| *v > 0.0);
63+
64+
if let Some(output_tokens) = output_tokens {
65+
let tps = output_tokens / duration.as_secs_f64();
66+
attributes.insert(GEN_AI_RESPONSE_TPS, tps);
67+
}
68+
}
69+
70+
/// Calculates model costs and serializes them into attributes.
71+
fn normalize_ai_costs(attributes: &mut Attributes, model_costs: Option<&ModelCosts>) {
72+
if attributes.contains_key(GEN_AI_COST_TOTAL_TOKENS) {
73+
return;
74+
}
75+
76+
let model_cost = attributes
77+
.get_value(GEN_AI_REQUEST_MODEL)
78+
.or_else(|| attributes.get_value(GEN_AI_RESPONSE_MODEL))
79+
.and_then(|v| v.as_str())
80+
.and_then(|model| model_costs?.cost_per_token(model));
81+
82+
let Some(model_cost) = model_cost else { return };
83+
84+
let get_tokens = |key| {
85+
attributes
86+
.get_value(key)
87+
.and_then(|v| v.as_f64())
88+
.unwrap_or(0.0)
89+
};
90+
91+
let tokens = ai::UsedTokens {
92+
input_tokens: get_tokens(GEN_AI_USAGE_INPUT_TOKENS),
93+
input_cached_tokens: get_tokens(GEN_AI_USAGE_INPUT_CACHED_TOKENS),
94+
output_tokens: get_tokens(GEN_AI_USAGE_OUTPUT_TOKENS),
95+
output_reasoning_tokens: get_tokens(GEN_AI_USAGE_OUTPUT_REASONING_TOKENS),
96+
};
97+
98+
let Some(costs) = ai::calculate_costs(model_cost, tokens) else {
99+
return;
100+
};
101+
102+
attributes.insert_if_missing(GEN_AI_COST_INPUT_TOKENS, || costs.input);
103+
attributes.insert_if_missing(GEN_AI_COST_OUTPUT_TOKENS, || costs.output);
104+
attributes.insert_if_missing(GEN_AI_COST_TOTAL_TOKENS, || costs.total());
105+
}
106+
107+
fn span_duration(span: &SpanV2) -> Option<Duration> {
108+
let start_timestamp = *span.start_timestamp.value()?;
109+
let timestamp = *span.end_timestamp.value()?;
110+
(timestamp - start_timestamp).to_std().ok()
111+
}

relay-event-normalization/src/eap/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@ use relay_protocol::{Annotated, ErrorKind, Meta, Remark, RemarkType, Value};
1616

1717
use crate::{ClientHints, FromUserAgentInfo as _, RawUserAgentInfo};
1818

19+
mod ai;
20+
21+
pub use self::ai::normalize_ai;
22+
1923
/// Normalizes/validates all attribute types.
2024
///
2125
/// Removes and marks all attributes with an error for which the specified [`AttributeType`]

relay-event-normalization/src/normalize/span/ai.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,10 @@ fn map_ai_measurements_to_data(span: &mut Span) {
144144
&mut data.gen_ai_usage_output_tokens,
145145
"ai_completion_tokens_used",
146146
);
147+
}
148+
149+
fn set_total_tokens(span: &mut Span) {
150+
let data = span.data.get_or_insert_with(SpanData::default);
147151

148152
// It might be that 'total_tokens' is not set in which case we need to calculate it
149153
if data.gen_ai_usage_total_tokens.value().is_none() {
@@ -214,6 +218,8 @@ pub fn enrich_ai_span_data(
214218
}
215219

216220
map_ai_measurements_to_data(span);
221+
set_total_tokens(span);
222+
217223
if let Some(model_costs) = model_costs {
218224
extract_ai_data(span, model_costs);
219225
}

relay-server/src/processing/spans/process.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -67,11 +67,11 @@ fn expand_legacy_span(item: &Item) -> Result<WithHeader<SpanV2>> {
6767
}
6868

6969
/// Normalizes individual spans.
70-
pub fn normalize(spans: &mut Managed<ExpandedSpans>, geo_lookup: &GeoIpLookup) {
70+
pub fn normalize(spans: &mut Managed<ExpandedSpans>, geo_lookup: &GeoIpLookup, ctx: Context<'_>) {
7171
spans.retain_with_context(
7272
|spans| (&mut spans.spans, spans.headers.meta()),
7373
|span, meta, _| {
74-
normalize_span(span, meta, geo_lookup).inspect_err(|err| {
74+
normalize_span(span, meta, geo_lookup, ctx).inspect_err(|err| {
7575
relay_log::debug!("failed to normalize span: {err}");
7676
})
7777
},
@@ -82,6 +82,7 @@ fn normalize_span(
8282
span: &mut Annotated<SpanV2>,
8383
meta: &RequestMeta,
8484
geo_lookup: &GeoIpLookup,
85+
ctx: Context<'_>,
8586
) -> Result<()> {
8687
process_value(span, &mut TimestampProcessor, ProcessingState::root())?;
8788

@@ -96,8 +97,7 @@ fn normalize_span(
9697
eap::normalize_user_geo(&mut span.attributes, || {
9798
meta.client_addr().and_then(|ip| geo_lookup.lookup(ip))
9899
});
99-
100-
// TODO: ai model costs
100+
eap::normalize_ai(span, ctx.global_config.ai_model_costs.as_ref().ok());
101101
};
102102

103103
process_value(span, &mut TrimmingProcessor::new(), ProcessingState::root())?;

0 commit comments

Comments
 (0)