Skip to content

Commit 4502c52

Browse files
committed
feat(spanv2): Implement AI normalizations
1 parent a8b51eb commit 4502c52

File tree

7 files changed

+403
-5
lines changed

7 files changed

+403
-5
lines changed

relay-conventions/src/consts.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,18 @@ convention_attributes!(
2121
DB_SYSTEM_NAME => "db.system.name",
2222
DESCRIPTION => "sentry.description",
2323
FAAS_TRIGGER => "faas.trigger",
24+
GEN_AI_COST_INPUT_TOKENS => "gen_ai.cost.input_tokens",
25+
GEN_AI_COST_OUTPUT_TOKENS => "gen_ai.cost.output_tokens",
26+
GEN_AI_COST_TOTAL_TOKENS => "gen_ai.cost.total_tokens",
27+
GEN_AI_REQUEST_MODEL => "gen_ai.request.model",
28+
GEN_AI_RESPONSE_MODEL => "gen_ai.response.model",
29+
GEN_AI_RESPONSE_TPS => "gen_ai.response.tokens_per_second",
2430
GEN_AI_SYSTEM => "gen_ai.system",
31+
GEN_AI_USAGE_INPUT_CACHED_TOKENS => "gen_ai.usage.input_tokens.cached",
32+
GEN_AI_USAGE_INPUT_TOKENS => "gen_ai.usage.input_tokens",
33+
GEN_AI_USAGE_OUTPUT_REASONING_TOKENS => "gen_ai.usage.output_tokens.reasoning",
34+
GEN_AI_USAGE_OUTPUT_TOKENS => "gen_ai.usage.output_tokens",
35+
GEN_AI_USAGE_TOTAL_TOKENS => "gen_ai.usage.total_tokens",
2536
HTTP_PREFETCH => "sentry.http.prefetch",
2637
HTTP_REQUEST_METHOD => "http.request.method",
2738
HTTP_RESPONSE_STATUS_CODE => "http.response.status_code",
Lines changed: 357 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,357 @@
1+
use std::time::Duration;
2+
3+
use relay_conventions::consts::*;
4+
use relay_event_schema::protocol::Attributes;
5+
use relay_protocol::Annotated;
6+
7+
use crate::ModelCosts;
8+
use crate::span::ai;
9+
10+
/// Normalizes AI attributes.
11+
pub fn normalize_ai(
12+
attributes: &mut Annotated<Attributes>,
13+
duration: Option<Duration>,
14+
costs: Option<&ModelCosts>,
15+
) {
16+
let Some(attributes) = attributes.value_mut() else {
17+
return;
18+
};
19+
20+
normalize_total_tokens(attributes);
21+
normalize_tokens_per_second(attributes, duration);
22+
normalize_ai_costs(attributes, costs);
23+
}
24+
25+
/// Calculates the [`GEN_AI_USAGE_TOTAL_TOKENS`] attribute.
26+
fn normalize_total_tokens(attributes: &mut Attributes) {
27+
if attributes.contains_key(GEN_AI_USAGE_TOTAL_TOKENS) {
28+
return;
29+
}
30+
31+
let input_tokens = attributes
32+
.get_value(GEN_AI_USAGE_INPUT_TOKENS)
33+
.and_then(|v| v.as_u64());
34+
35+
let output_tokens = attributes
36+
.get_value(GEN_AI_USAGE_OUTPUT_TOKENS)
37+
.and_then(|v| v.as_u64());
38+
39+
if input_tokens.is_none() && output_tokens.is_none() {
40+
return;
41+
}
42+
43+
let total_tokens = input_tokens.unwrap_or(0) + output_tokens.unwrap_or(0);
44+
if let Ok(total_tokens) = i64::try_from(total_tokens) {
45+
attributes.insert(GEN_AI_USAGE_TOTAL_TOKENS, total_tokens);
46+
}
47+
}
48+
49+
/// Calculates the [`GEN_AI_RESPONSE_TPS`] attribute.
50+
fn normalize_tokens_per_second(attributes: &mut Attributes, duration: Option<Duration>) {
51+
let Some(duration) = duration.filter(|d| !d.is_zero()) else {
52+
return;
53+
};
54+
55+
if attributes.contains_key(GEN_AI_RESPONSE_TPS) {
56+
return;
57+
}
58+
59+
let output_tokens = attributes
60+
.get_value(GEN_AI_USAGE_OUTPUT_TOKENS)
61+
.and_then(|v| v.as_f64())
62+
.filter(|v| *v > 0.0);
63+
64+
if let Some(output_tokens) = output_tokens {
65+
let tps = output_tokens / duration.as_secs_f64();
66+
attributes.insert(GEN_AI_RESPONSE_TPS, tps);
67+
}
68+
}
69+
70+
/// Calculates model costs and serializes them into attributes.
71+
fn normalize_ai_costs(attributes: &mut Attributes, model_costs: Option<&ModelCosts>) {
72+
if attributes.contains_key(GEN_AI_COST_TOTAL_TOKENS) {
73+
return;
74+
}
75+
76+
let model_cost = attributes
77+
.get_value(GEN_AI_REQUEST_MODEL)
78+
.or_else(|| attributes.get_value(GEN_AI_RESPONSE_MODEL))
79+
.and_then(|v| v.as_str())
80+
.and_then(|model| model_costs?.cost_per_token(model));
81+
82+
let Some(model_cost) = model_cost else { return };
83+
84+
let get_tokens = |key| {
85+
attributes
86+
.get_value(key)
87+
.and_then(|v| v.as_f64())
88+
.unwrap_or(0.0)
89+
};
90+
91+
let tokens = ai::UsedTokens {
92+
input_tokens: get_tokens(GEN_AI_USAGE_INPUT_TOKENS),
93+
input_cached_tokens: get_tokens(GEN_AI_USAGE_INPUT_CACHED_TOKENS),
94+
output_tokens: get_tokens(GEN_AI_USAGE_OUTPUT_TOKENS),
95+
output_reasoning_tokens: get_tokens(GEN_AI_USAGE_OUTPUT_REASONING_TOKENS),
96+
};
97+
98+
let Some(costs) = ai::calculate_costs(model_cost, tokens) else {
99+
return;
100+
};
101+
102+
attributes.insert(GEN_AI_COST_INPUT_TOKENS, costs.input);
103+
attributes.insert(GEN_AI_COST_OUTPUT_TOKENS, costs.output);
104+
attributes.insert(GEN_AI_COST_TOTAL_TOKENS, costs.total());
105+
}
106+
107+
#[cfg(test)]
108+
mod tests {
109+
use std::collections::HashMap;
110+
111+
use relay_pattern::Pattern;
112+
use relay_protocol::assert_annotated_snapshot;
113+
114+
use crate::ModelCostV2;
115+
116+
use super::*;
117+
118+
macro_rules! attributes {
119+
($($key:expr => $value:expr),* $(,)?) => {
120+
Attributes::from([
121+
$(($key.into(), Annotated::new($value.into())),)*
122+
])
123+
};
124+
}
125+
126+
fn model_costs() -> ModelCosts {
127+
ModelCosts {
128+
version: 2,
129+
models: HashMap::from([
130+
(
131+
Pattern::new("claude-2.1").unwrap(),
132+
ModelCostV2 {
133+
input_per_token: 0.01,
134+
output_per_token: 0.02,
135+
output_reasoning_per_token: 0.03,
136+
input_cached_per_token: 0.04,
137+
},
138+
),
139+
(
140+
Pattern::new("gpt4-21-04").unwrap(),
141+
ModelCostV2 {
142+
input_per_token: 0.09,
143+
output_per_token: 0.05,
144+
output_reasoning_per_token: 0.0,
145+
input_cached_per_token: 0.0,
146+
},
147+
),
148+
]),
149+
}
150+
}
151+
152+
#[test]
153+
fn test_normalize_ai_all_tokens() {
154+
let mut attributes = Annotated::new(attributes! {
155+
"gen_ai.usage.input_tokens" => 1000,
156+
"gen_ai.usage.output_tokens" => 2000,
157+
"gen_ai.usage.output_tokens.reasoning" => 1000,
158+
"gen_ai.usage.input_tokens.cached" => 500,
159+
"gen_ai.request.model" => "claude-2.1".to_owned(),
160+
});
161+
162+
normalize_ai(
163+
&mut attributes,
164+
Some(Duration::from_secs(1)),
165+
Some(&model_costs()),
166+
);
167+
168+
assert_annotated_snapshot!(attributes, @r#"
169+
{
170+
"gen_ai.cost.input_tokens": {
171+
"type": "double",
172+
"value": 25.0
173+
},
174+
"gen_ai.cost.output_tokens": {
175+
"type": "double",
176+
"value": 50.0
177+
},
178+
"gen_ai.cost.total_tokens": {
179+
"type": "double",
180+
"value": 75.0
181+
},
182+
"gen_ai.request.model": {
183+
"type": "string",
184+
"value": "claude-2.1"
185+
},
186+
"gen_ai.response.tokens_per_second": {
187+
"type": "double",
188+
"value": 2000.0
189+
},
190+
"gen_ai.usage.input_tokens": {
191+
"type": "integer",
192+
"value": 1000
193+
},
194+
"gen_ai.usage.input_tokens.cached": {
195+
"type": "integer",
196+
"value": 500
197+
},
198+
"gen_ai.usage.output_tokens": {
199+
"type": "integer",
200+
"value": 2000
201+
},
202+
"gen_ai.usage.output_tokens.reasoning": {
203+
"type": "integer",
204+
"value": 1000
205+
},
206+
"gen_ai.usage.total_tokens": {
207+
"type": "integer",
208+
"value": 3000
209+
}
210+
}
211+
"#);
212+
}
213+
214+
#[test]
215+
fn test_normalize_ai_basic_tokens() {
216+
let mut attributes = Annotated::new(attributes! {
217+
"gen_ai.usage.input_tokens" => 1000,
218+
"gen_ai.usage.output_tokens" => 2000,
219+
"gen_ai.request.model" => "gpt4-21-04".to_owned(),
220+
});
221+
222+
normalize_ai(
223+
&mut attributes,
224+
Some(Duration::from_millis(500)),
225+
Some(&model_costs()),
226+
);
227+
228+
assert_annotated_snapshot!(attributes, @r#"
229+
{
230+
"gen_ai.cost.input_tokens": {
231+
"type": "double",
232+
"value": 90.0
233+
},
234+
"gen_ai.cost.output_tokens": {
235+
"type": "double",
236+
"value": 100.0
237+
},
238+
"gen_ai.cost.total_tokens": {
239+
"type": "double",
240+
"value": 190.0
241+
},
242+
"gen_ai.request.model": {
243+
"type": "string",
244+
"value": "gpt4-21-04"
245+
},
246+
"gen_ai.response.tokens_per_second": {
247+
"type": "double",
248+
"value": 4000.0
249+
},
250+
"gen_ai.usage.input_tokens": {
251+
"type": "integer",
252+
"value": 1000
253+
},
254+
"gen_ai.usage.output_tokens": {
255+
"type": "integer",
256+
"value": 2000
257+
},
258+
"gen_ai.usage.total_tokens": {
259+
"type": "integer",
260+
"value": 3000
261+
}
262+
}
263+
"#);
264+
}
265+
266+
#[test]
267+
fn test_normalize_ai_basic_tokens_no_duration_no_cost() {
268+
let mut attributes = Annotated::new(attributes! {
269+
"gen_ai.usage.input_tokens" => 1000,
270+
"gen_ai.usage.output_tokens" => 2000,
271+
"gen_ai.request.model" => "unknown".to_owned(),
272+
});
273+
274+
normalize_ai(&mut attributes, Some(Duration::ZERO), Some(&model_costs()));
275+
276+
assert_annotated_snapshot!(attributes, @r#"
277+
{
278+
"gen_ai.request.model": {
279+
"type": "string",
280+
"value": "unknown"
281+
},
282+
"gen_ai.usage.input_tokens": {
283+
"type": "integer",
284+
"value": 1000
285+
},
286+
"gen_ai.usage.output_tokens": {
287+
"type": "integer",
288+
"value": 2000
289+
},
290+
"gen_ai.usage.total_tokens": {
291+
"type": "integer",
292+
"value": 3000
293+
}
294+
}
295+
"#);
296+
}
297+
298+
#[test]
299+
fn test_normalize_ai_does_not_override() {
300+
let mut attributes = Annotated::new(attributes! {
301+
"gen_ai.usage.input_tokens" => 1000,
302+
"gen_ai.usage.output_tokens" => 2000,
303+
"gen_ai.request.model" => "gpt4-21-04".to_owned(),
304+
305+
"gen_ai.usage.total_tokens" => 1337,
306+
307+
"gen_ai.cost.input_tokens" => 99.0,
308+
"gen_ai.cost.output_tokens" => 99.0,
309+
"gen_ai.cost.total_tokens" => 123.0,
310+
311+
"gen_ai.response.tokens_per_second" => 42.0,
312+
});
313+
314+
normalize_ai(
315+
&mut attributes,
316+
Some(Duration::from_millis(500)),
317+
Some(&model_costs()),
318+
);
319+
320+
assert_annotated_snapshot!(attributes, @r#"
321+
{
322+
"gen_ai.cost.input_tokens": {
323+
"type": "double",
324+
"value": 99.0
325+
},
326+
"gen_ai.cost.output_tokens": {
327+
"type": "double",
328+
"value": 99.0
329+
},
330+
"gen_ai.cost.total_tokens": {
331+
"type": "double",
332+
"value": 123.0
333+
},
334+
"gen_ai.request.model": {
335+
"type": "string",
336+
"value": "gpt4-21-04"
337+
},
338+
"gen_ai.response.tokens_per_second": {
339+
"type": "double",
340+
"value": 42.0
341+
},
342+
"gen_ai.usage.input_tokens": {
343+
"type": "integer",
344+
"value": 1000
345+
},
346+
"gen_ai.usage.output_tokens": {
347+
"type": "integer",
348+
"value": 2000
349+
},
350+
"gen_ai.usage.total_tokens": {
351+
"type": "integer",
352+
"value": 1337
353+
}
354+
}
355+
"#);
356+
}
357+
}

relay-event-normalization/src/eap/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@ use relay_sampling::DynamicSamplingContext;
1414

1515
use crate::{ClientHints, FromUserAgentInfo as _, RawUserAgentInfo};
1616

17+
mod ai;
18+
19+
pub use self::ai::normalize_ai;
20+
1721
/// Normalizes/validates all attribute types.
1822
///
1923
/// Removes and marks all attributes with an error for which the specified [`AttributeType`]

0 commit comments

Comments
 (0)