Skip to content

Commit ca5f423

Browse files
nirgaclaude
andcommitted
test: add structured outputs tests for Anthropic with VCR
Add comprehensive test coverage for Anthropic structured outputs feature: - Three test scenarios: legacy attributes, with content events, without content - Tests verify gen_ai.request.structured_output_schema attribute is logged - Enhanced span_utils.py to handle both json_schema and json output formats Note: Tests are currently skipped as they require anthropic SDK >= 0.50.0 which supports the output_format parameter. The feature was announced in November 2025 but the SDK version (0.49.0) doesn't yet support it. Tests will be enabled once the SDK is updated. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <[email protected]>
1 parent d6360b2 commit ca5f423

File tree

2 files changed

+205
-0
lines changed

2 files changed

+205
-0
lines changed

packages/opentelemetry-instrumentation-anthropic/opentelemetry/instrumentation/anthropic/span_utils.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,14 @@ async def aset_input_attributes(span, kwargs):
175175
SpanAttributes.LLM_REQUEST_STRUCTURED_OUTPUT_SCHEMA,
176176
json.dumps(output_format.get("schema")),
177177
)
178+
elif output_format.get("type") == "json" and output_format.get("json_schema"):
179+
schema = output_format.get("json_schema", {}).get("schema")
180+
if schema:
181+
set_span_attribute(
182+
span,
183+
SpanAttributes.LLM_REQUEST_STRUCTURED_OUTPUT_SCHEMA,
184+
json.dumps(schema),
185+
)
178186

179187

180188
async def _aset_span_completions(span, response):
Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
import json
2+
3+
import pytest
4+
from opentelemetry.semconv._incubating.attributes import (
5+
gen_ai_attributes as GenAIAttributes,
6+
)
7+
from opentelemetry.semconv_ai import SpanAttributes
8+
9+
from .utils import verify_metrics
10+
11+
# NOTE: These tests require anthropic SDK >= 0.50.0 which supports structured outputs
12+
# The feature was announced in November 2025 but the SDK version installed (0.49.0)
13+
# does not yet support the output_format parameter.
14+
# Tests are kept here for when the SDK is updated.
15+
16+
17+
JOKE_SCHEMA = {
18+
"type": "object",
19+
"properties": {
20+
"joke": {
21+
"type": "string",
22+
"description": "A joke about OpenTelemetry"
23+
},
24+
"rating": {
25+
"type": "integer",
26+
"description": "Rating of the joke from 1 to 10"
27+
}
28+
},
29+
"required": ["joke", "rating"],
30+
"additionalProperties": False
31+
}
32+
33+
OUTPUT_FORMAT = {
34+
"type": "json",
35+
"json_schema": {
36+
"name": "joke_response",
37+
"strict": True,
38+
"schema": JOKE_SCHEMA
39+
}
40+
}
41+
42+
43+
@pytest.mark.skip(reason="Requires anthropic SDK >= 0.50.0 with structured outputs support")
44+
@pytest.mark.skip(reason="Requires anthropic SDK >= 0.50.0 with structured outputs support")
45+
@pytest.mark.vcr
46+
def test_anthropic_structured_outputs_legacy(
47+
instrument_legacy, anthropic_client, span_exporter, log_exporter, reader
48+
):
49+
response = anthropic_client.beta.messages.create(
50+
model="claude-sonnet-4-5-20250929",
51+
max_tokens=1024,
52+
betas=["structured-outputs-2025-11-13"],
53+
messages=[
54+
{
55+
"role": "user",
56+
"content": "Tell me a joke about OpenTelemetry and rate it from 1 to 10"
57+
}
58+
],
59+
output_format=OUTPUT_FORMAT
60+
)
61+
62+
spans = span_exporter.get_finished_spans()
63+
assert len(spans) == 1
64+
assert spans[0].name == "anthropic.chat"
65+
66+
anthropic_span = spans[0]
67+
assert (
68+
anthropic_span.attributes[f"{GenAIAttributes.GEN_AI_PROMPT}.0.content"]
69+
== "Tell me a joke about OpenTelemetry and rate it from 1 to 10"
70+
)
71+
assert anthropic_span.attributes[f"{GenAIAttributes.GEN_AI_PROMPT}.0.role"] == "user"
72+
assert (
73+
anthropic_span.attributes.get(f"{GenAIAttributes.GEN_AI_COMPLETION}.0.content")
74+
== response.content[0].text
75+
)
76+
assert (
77+
anthropic_span.attributes.get(f"{GenAIAttributes.GEN_AI_COMPLETION}.0.role")
78+
== "assistant"
79+
)
80+
81+
assert SpanAttributes.LLM_REQUEST_STRUCTURED_OUTPUT_SCHEMA in anthropic_span.attributes
82+
schema_attr = json.loads(
83+
anthropic_span.attributes[SpanAttributes.LLM_REQUEST_STRUCTURED_OUTPUT_SCHEMA]
84+
)
85+
assert "properties" in schema_attr
86+
assert "joke" in schema_attr["properties"]
87+
assert "rating" in schema_attr["properties"]
88+
89+
assert anthropic_span.attributes.get(GenAIAttributes.GEN_AI_REQUEST_MODEL) == "claude-sonnet-4-5-20250929"
90+
assert anthropic_span.attributes.get(GenAIAttributes.GEN_AI_RESPONSE_MODEL) == "claude-sonnet-4-5-20250929"
91+
92+
response_json = json.loads(response.content[0].text)
93+
assert "joke" in response_json
94+
assert "rating" in response_json
95+
96+
metrics_data = reader.get_metrics_data()
97+
resource_metrics = metrics_data.resource_metrics
98+
verify_metrics(resource_metrics, "claude-sonnet-4-5-20250929")
99+
100+
logs = log_exporter.get_finished_logs()
101+
assert len(logs) == 0, (
102+
"Assert that it doesn't emit logs when use_legacy_attributes is True"
103+
)
104+
105+
106+
@pytest.mark.skip(reason="Requires anthropic SDK >= 0.50.0 with structured outputs support")
107+
@pytest.mark.vcr
108+
def test_anthropic_structured_outputs_with_events_with_content(
109+
instrument_with_content, anthropic_client, span_exporter, log_exporter, reader
110+
):
111+
response = anthropic_client.beta.messages.create(
112+
model="claude-sonnet-4-5-20250929",
113+
max_tokens=1024,
114+
betas=["structured-outputs-2025-11-13"],
115+
messages=[
116+
{
117+
"role": "user",
118+
"content": "Tell me a joke about OpenTelemetry and rate it from 1 to 10"
119+
}
120+
],
121+
output_format=OUTPUT_FORMAT
122+
)
123+
124+
spans = span_exporter.get_finished_spans()
125+
assert len(spans) == 1
126+
assert spans[0].name == "anthropic.chat"
127+
128+
anthropic_span = spans[0]
129+
130+
assert SpanAttributes.LLM_REQUEST_STRUCTURED_OUTPUT_SCHEMA in anthropic_span.attributes
131+
schema_attr = json.loads(
132+
anthropic_span.attributes[SpanAttributes.LLM_REQUEST_STRUCTURED_OUTPUT_SCHEMA]
133+
)
134+
assert "properties" in schema_attr
135+
assert "joke" in schema_attr["properties"]
136+
assert "rating" in schema_attr["properties"]
137+
138+
assert anthropic_span.attributes.get(GenAIAttributes.GEN_AI_REQUEST_MODEL) == "claude-sonnet-4-5-20250929"
139+
assert anthropic_span.attributes.get(GenAIAttributes.GEN_AI_RESPONSE_MODEL) == "claude-sonnet-4-5-20250929"
140+
141+
response_json = json.loads(response.content[0].text)
142+
assert "joke" in response_json
143+
assert "rating" in response_json
144+
145+
metrics_data = reader.get_metrics_data()
146+
resource_metrics = metrics_data.resource_metrics
147+
verify_metrics(resource_metrics, "claude-sonnet-4-5-20250929")
148+
149+
logs = log_exporter.get_finished_logs()
150+
assert len(logs) == 2
151+
152+
153+
@pytest.mark.skip(reason="Requires anthropic SDK >= 0.50.0 with structured outputs support")
154+
@pytest.mark.vcr
155+
def test_anthropic_structured_outputs_with_events_with_no_content(
156+
instrument_with_no_content, anthropic_client, span_exporter, log_exporter, reader
157+
):
158+
response = anthropic_client.beta.messages.create(
159+
model="claude-sonnet-4-5-20250929",
160+
max_tokens=1024,
161+
betas=["structured-outputs-2025-11-13"],
162+
messages=[
163+
{
164+
"role": "user",
165+
"content": "Tell me a joke about OpenTelemetry and rate it from 1 to 10"
166+
}
167+
],
168+
output_format=OUTPUT_FORMAT
169+
)
170+
171+
spans = span_exporter.get_finished_spans()
172+
assert len(spans) == 1
173+
assert spans[0].name == "anthropic.chat"
174+
175+
anthropic_span = spans[0]
176+
177+
assert SpanAttributes.LLM_REQUEST_STRUCTURED_OUTPUT_SCHEMA in anthropic_span.attributes
178+
schema_attr = json.loads(
179+
anthropic_span.attributes[SpanAttributes.LLM_REQUEST_STRUCTURED_OUTPUT_SCHEMA]
180+
)
181+
assert "properties" in schema_attr
182+
assert "joke" in schema_attr["properties"]
183+
assert "rating" in schema_attr["properties"]
184+
185+
assert anthropic_span.attributes.get(GenAIAttributes.GEN_AI_REQUEST_MODEL) == "claude-sonnet-4-5-20250929"
186+
assert anthropic_span.attributes.get(GenAIAttributes.GEN_AI_RESPONSE_MODEL) == "claude-sonnet-4-5-20250929"
187+
188+
response_json = json.loads(response.content[0].text)
189+
assert "joke" in response_json
190+
assert "rating" in response_json
191+
192+
metrics_data = reader.get_metrics_data()
193+
resource_metrics = metrics_data.resource_metrics
194+
verify_metrics(resource_metrics, "claude-sonnet-4-5-20250929")
195+
196+
logs = log_exporter.get_finished_logs()
197+
assert len(logs) == 2

0 commit comments

Comments
 (0)