Skip to content

Commit b2ff00b

Browse files
committed
feat(spanv2): Implement AI normalizations
1 parent a8b51eb commit b2ff00b

File tree

7 files changed

+408
-5
lines changed

7 files changed

+408
-5
lines changed

relay-conventions/src/consts.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,18 @@ convention_attributes!(
2121
DB_SYSTEM_NAME => "db.system.name",
2222
DESCRIPTION => "sentry.description",
2323
FAAS_TRIGGER => "faas.trigger",
24+
GEN_AI_COST_INPUT_TOKENS => "gen_ai.cost.input_tokens",
25+
GEN_AI_COST_OUTPUT_TOKENS => "gen_ai.cost.output_tokens",
26+
GEN_AI_COST_TOTAL_TOKENS => "gen_ai.cost.total_tokens",
27+
GEN_AI_REQUEST_MODEL => "gen_ai.request.model",
28+
GEN_AI_RESPONSE_MODEL => "gen_ai.response.model",
29+
GEN_AI_RESPONSE_TPS => "gen_ai.response.tokens_per_second",
2430
GEN_AI_SYSTEM => "gen_ai.system",
31+
GEN_AI_USAGE_INPUT_CACHED_TOKENS => "gen_ai.usage.input_tokens.cached",
32+
GEN_AI_USAGE_INPUT_TOKENS => "gen_ai.usage.input_tokens",
33+
GEN_AI_USAGE_OUTPUT_REASONING_TOKENS => "gen_ai.usage.output_tokens.reasoning",
34+
GEN_AI_USAGE_OUTPUT_TOKENS => "gen_ai.usage.output_tokens",
35+
GEN_AI_USAGE_TOTAL_TOKENS => "gen_ai.usage.total_tokens",
2536
HTTP_PREFETCH => "sentry.http.prefetch",
2637
HTTP_REQUEST_METHOD => "http.request.method",
2738
HTTP_RESPONSE_STATUS_CODE => "http.response.status_code",
Lines changed: 362 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,362 @@
1+
use std::time::Duration;
2+
3+
use relay_conventions::{
4+
GEN_AI_COST_INPUT_TOKENS, GEN_AI_COST_OUTPUT_TOKENS, GEN_AI_COST_TOTAL_TOKENS,
5+
GEN_AI_REQUEST_MODEL, GEN_AI_RESPONSE_MODEL, GEN_AI_RESPONSE_TPS,
6+
GEN_AI_USAGE_INPUT_CACHED_TOKENS, GEN_AI_USAGE_INPUT_TOKENS,
7+
GEN_AI_USAGE_OUTPUT_REASONING_TOKENS, GEN_AI_USAGE_OUTPUT_TOKENS, GEN_AI_USAGE_TOTAL_TOKENS,
8+
};
9+
use relay_event_schema::protocol::Attributes;
10+
use relay_protocol::Annotated;
11+
12+
use crate::ModelCosts;
13+
use crate::span::ai;
14+
15+
/// Normalizes AI attributes.
16+
pub fn normalize_ai(
17+
attributes: &mut Annotated<Attributes>,
18+
duration: Option<Duration>,
19+
costs: Option<&ModelCosts>,
20+
) {
21+
let Some(attributes) = attributes.value_mut() else {
22+
return;
23+
};
24+
25+
normalize_total_tokens(attributes);
26+
normalize_tokens_per_second(attributes, duration);
27+
normalize_ai_costs(attributes, costs);
28+
}
29+
30+
/// Calculates the [`GEN_AI_USAGE_TOTAL_TOKENS`] attribute.
31+
fn normalize_total_tokens(attributes: &mut Attributes) {
32+
if attributes.contains_key(GEN_AI_USAGE_TOTAL_TOKENS) {
33+
return;
34+
}
35+
36+
let input_tokens = attributes
37+
.get_value(GEN_AI_USAGE_INPUT_TOKENS)
38+
.and_then(|v| v.as_u64());
39+
40+
let output_tokens = attributes
41+
.get_value(GEN_AI_USAGE_OUTPUT_TOKENS)
42+
.and_then(|v| v.as_u64());
43+
44+
if input_tokens.is_none() && output_tokens.is_none() {
45+
return;
46+
}
47+
48+
let total_tokens = input_tokens.unwrap_or(0) + output_tokens.unwrap_or(0);
49+
if let Ok(total_tokens) = i64::try_from(total_tokens) {
50+
attributes.insert(GEN_AI_USAGE_TOTAL_TOKENS, total_tokens);
51+
}
52+
}
53+
54+
/// Calculates the [`GEN_AI_RESPONSE_TPS`] attribute.
55+
fn normalize_tokens_per_second(attributes: &mut Attributes, duration: Option<Duration>) {
56+
let Some(duration) = duration.filter(|d| !d.is_zero()) else {
57+
return;
58+
};
59+
60+
if attributes.contains_key(GEN_AI_RESPONSE_TPS) {
61+
return;
62+
}
63+
64+
let output_tokens = attributes
65+
.get_value(GEN_AI_USAGE_OUTPUT_TOKENS)
66+
.and_then(|v| v.as_f64())
67+
.filter(|v| *v > 0.0);
68+
69+
if let Some(output_tokens) = output_tokens {
70+
let tps = output_tokens / duration.as_secs_f64();
71+
attributes.insert(GEN_AI_RESPONSE_TPS, tps);
72+
}
73+
}
74+
75+
/// Calculates model costs and serializes them into attributes.
76+
fn normalize_ai_costs(attributes: &mut Attributes, model_costs: Option<&ModelCosts>) {
77+
if attributes.contains_key(GEN_AI_COST_TOTAL_TOKENS) {
78+
return;
79+
}
80+
81+
let model_cost = attributes
82+
.get_value(GEN_AI_REQUEST_MODEL)
83+
.or_else(|| attributes.get_value(GEN_AI_RESPONSE_MODEL))
84+
.and_then(|v| v.as_str())
85+
.and_then(|model| model_costs?.cost_per_token(model));
86+
87+
let Some(model_cost) = model_cost else { return };
88+
89+
let get_tokens = |key| {
90+
attributes
91+
.get_value(key)
92+
.and_then(|v| v.as_f64())
93+
.unwrap_or(0.0)
94+
};
95+
96+
let tokens = ai::UsedTokens {
97+
input_tokens: get_tokens(GEN_AI_USAGE_INPUT_TOKENS),
98+
input_cached_tokens: get_tokens(GEN_AI_USAGE_INPUT_CACHED_TOKENS),
99+
output_tokens: get_tokens(GEN_AI_USAGE_OUTPUT_TOKENS),
100+
output_reasoning_tokens: get_tokens(GEN_AI_USAGE_OUTPUT_REASONING_TOKENS),
101+
};
102+
103+
let Some(costs) = ai::calculate_costs(model_cost, tokens) else {
104+
return;
105+
};
106+
107+
attributes.insert(GEN_AI_COST_INPUT_TOKENS, costs.input);
108+
attributes.insert(GEN_AI_COST_OUTPUT_TOKENS, costs.output);
109+
attributes.insert(GEN_AI_COST_TOTAL_TOKENS, costs.total());
110+
}
111+
112+
#[cfg(test)]
113+
mod tests {
114+
use std::collections::HashMap;
115+
116+
use relay_pattern::Pattern;
117+
use relay_protocol::assert_annotated_snapshot;
118+
119+
use crate::ModelCostV2;
120+
121+
use super::*;
122+
123+
macro_rules! attributes {
124+
($($key:expr => $value:expr),* $(,)?) => {
125+
Attributes::from([
126+
$(($key.into(), Annotated::new($value.into())),)*
127+
])
128+
};
129+
}
130+
131+
fn model_costs() -> ModelCosts {
132+
ModelCosts {
133+
version: 2,
134+
models: HashMap::from([
135+
(
136+
Pattern::new("claude-2.1").unwrap(),
137+
ModelCostV2 {
138+
input_per_token: 0.01,
139+
output_per_token: 0.02,
140+
output_reasoning_per_token: 0.03,
141+
input_cached_per_token: 0.04,
142+
},
143+
),
144+
(
145+
Pattern::new("gpt4-21-04").unwrap(),
146+
ModelCostV2 {
147+
input_per_token: 0.09,
148+
output_per_token: 0.05,
149+
output_reasoning_per_token: 0.0,
150+
input_cached_per_token: 0.0,
151+
},
152+
),
153+
]),
154+
}
155+
}
156+
157+
#[test]
158+
fn test_normalize_ai_all_tokens() {
159+
let mut attributes = Annotated::new(attributes! {
160+
"gen_ai.usage.input_tokens" => 1000,
161+
"gen_ai.usage.output_tokens" => 2000,
162+
"gen_ai.usage.output_tokens.reasoning" => 1000,
163+
"gen_ai.usage.input_tokens.cached" => 500,
164+
"gen_ai.request.model" => "claude-2.1".to_owned(),
165+
});
166+
167+
normalize_ai(
168+
&mut attributes,
169+
Some(Duration::from_secs(1)),
170+
Some(&model_costs()),
171+
);
172+
173+
assert_annotated_snapshot!(attributes, @r#"
174+
{
175+
"gen_ai.cost.input_tokens": {
176+
"type": "double",
177+
"value": 25.0
178+
},
179+
"gen_ai.cost.output_tokens": {
180+
"type": "double",
181+
"value": 50.0
182+
},
183+
"gen_ai.cost.total_tokens": {
184+
"type": "double",
185+
"value": 75.0
186+
},
187+
"gen_ai.request.model": {
188+
"type": "string",
189+
"value": "claude-2.1"
190+
},
191+
"gen_ai.response.tokens_per_second": {
192+
"type": "double",
193+
"value": 2000.0
194+
},
195+
"gen_ai.usage.input_tokens": {
196+
"type": "integer",
197+
"value": 1000
198+
},
199+
"gen_ai.usage.input_tokens.cached": {
200+
"type": "integer",
201+
"value": 500
202+
},
203+
"gen_ai.usage.output_tokens": {
204+
"type": "integer",
205+
"value": 2000
206+
},
207+
"gen_ai.usage.output_tokens.reasoning": {
208+
"type": "integer",
209+
"value": 1000
210+
},
211+
"gen_ai.usage.total_tokens": {
212+
"type": "integer",
213+
"value": 3000
214+
}
215+
}
216+
"#);
217+
}
218+
219+
#[test]
220+
fn test_normalize_ai_basic_tokens() {
221+
let mut attributes = Annotated::new(attributes! {
222+
"gen_ai.usage.input_tokens" => 1000,
223+
"gen_ai.usage.output_tokens" => 2000,
224+
"gen_ai.request.model" => "gpt4-21-04".to_owned(),
225+
});
226+
227+
normalize_ai(
228+
&mut attributes,
229+
Some(Duration::from_millis(500)),
230+
Some(&model_costs()),
231+
);
232+
233+
assert_annotated_snapshot!(attributes, @r#"
234+
{
235+
"gen_ai.cost.input_tokens": {
236+
"type": "double",
237+
"value": 90.0
238+
},
239+
"gen_ai.cost.output_tokens": {
240+
"type": "double",
241+
"value": 100.0
242+
},
243+
"gen_ai.cost.total_tokens": {
244+
"type": "double",
245+
"value": 190.0
246+
},
247+
"gen_ai.request.model": {
248+
"type": "string",
249+
"value": "gpt4-21-04"
250+
},
251+
"gen_ai.response.tokens_per_second": {
252+
"type": "double",
253+
"value": 4000.0
254+
},
255+
"gen_ai.usage.input_tokens": {
256+
"type": "integer",
257+
"value": 1000
258+
},
259+
"gen_ai.usage.output_tokens": {
260+
"type": "integer",
261+
"value": 2000
262+
},
263+
"gen_ai.usage.total_tokens": {
264+
"type": "integer",
265+
"value": 3000
266+
}
267+
}
268+
"#);
269+
}
270+
271+
#[test]
272+
fn test_normalize_ai_basic_tokens_no_duration_no_cost() {
273+
let mut attributes = Annotated::new(attributes! {
274+
"gen_ai.usage.input_tokens" => 1000,
275+
"gen_ai.usage.output_tokens" => 2000,
276+
"gen_ai.request.model" => "unknown".to_owned(),
277+
});
278+
279+
normalize_ai(&mut attributes, Some(Duration::ZERO), Some(&model_costs()));
280+
281+
assert_annotated_snapshot!(attributes, @r#"
282+
{
283+
"gen_ai.request.model": {
284+
"type": "string",
285+
"value": "unknown"
286+
},
287+
"gen_ai.usage.input_tokens": {
288+
"type": "integer",
289+
"value": 1000
290+
},
291+
"gen_ai.usage.output_tokens": {
292+
"type": "integer",
293+
"value": 2000
294+
},
295+
"gen_ai.usage.total_tokens": {
296+
"type": "integer",
297+
"value": 3000
298+
}
299+
}
300+
"#);
301+
}
302+
303+
#[test]
304+
fn test_normalize_ai_does_not_override() {
305+
let mut attributes = Annotated::new(attributes! {
306+
"gen_ai.usage.input_tokens" => 1000,
307+
"gen_ai.usage.output_tokens" => 2000,
308+
"gen_ai.request.model" => "gpt4-21-04".to_owned(),
309+
310+
"gen_ai.usage.total_tokens" => 1337,
311+
312+
"gen_ai.cost.input_tokens" => 99.0,
313+
"gen_ai.cost.output_tokens" => 99.0,
314+
"gen_ai.cost.total_tokens" => 123.0,
315+
316+
"gen_ai.response.tokens_per_second" => 42.0,
317+
});
318+
319+
normalize_ai(
320+
&mut attributes,
321+
Some(Duration::from_millis(500)),
322+
Some(&model_costs()),
323+
);
324+
325+
assert_annotated_snapshot!(attributes, @r#"
326+
{
327+
"gen_ai.cost.input_tokens": {
328+
"type": "double",
329+
"value": 99.0
330+
},
331+
"gen_ai.cost.output_tokens": {
332+
"type": "double",
333+
"value": 99.0
334+
},
335+
"gen_ai.cost.total_tokens": {
336+
"type": "double",
337+
"value": 123.0
338+
},
339+
"gen_ai.request.model": {
340+
"type": "string",
341+
"value": "gpt4-21-04"
342+
},
343+
"gen_ai.response.tokens_per_second": {
344+
"type": "double",
345+
"value": 42.0
346+
},
347+
"gen_ai.usage.input_tokens": {
348+
"type": "integer",
349+
"value": 1000
350+
},
351+
"gen_ai.usage.output_tokens": {
352+
"type": "integer",
353+
"value": 2000
354+
},
355+
"gen_ai.usage.total_tokens": {
356+
"type": "integer",
357+
"value": 1337
358+
}
359+
}
360+
"#);
361+
}
362+
}

relay-event-normalization/src/eap/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@ use relay_sampling::DynamicSamplingContext;
1414

1515
use crate::{ClientHints, FromUserAgentInfo as _, RawUserAgentInfo};
1616

17+
mod ai;
18+
19+
pub use self::ai::normalize_ai;
20+
1721
/// Normalizes/validates all attribute types.
1822
///
1923
/// Removes and marks all attributes with an error for which the specified [`AttributeType`]

0 commit comments

Comments
 (0)