Skip to content

Commit 02e8b03

Browse files
authored
fix: drop usage token detail fields from OpenAI trace ingest payloads (#2529)
1 parent fabba01 commit 02e8b03

File tree

2 files changed

+274
-13
lines changed

2 files changed

+274
-13
lines changed

src/agents/tracing/processors.py

Lines changed: 99 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
import math
34
import os
45
import queue
56
import random
@@ -33,10 +34,9 @@ class BackendSpanExporter(TracingExporter):
3334
{
3435
"input_tokens",
3536
"output_tokens",
36-
"input_tokens_details",
37-
"output_tokens_details",
3837
}
3938
)
39+
_UNSERIALIZABLE = object()
4040

4141
def __init__(
4242
self,
@@ -181,7 +181,7 @@ def _should_sanitize_for_openai_tracing_api(self) -> bool:
181181
return self.endpoint.rstrip("/") == self._OPENAI_TRACING_INGEST_ENDPOINT.rstrip("/")
182182

183183
def _sanitize_for_openai_tracing_api(self, payload_item: dict[str, Any]) -> dict[str, Any]:
184-
"""Drop fields known to be rejected by OpenAI tracing ingestion."""
184+
"""Move unsupported generation usage fields under usage.details for traces ingest."""
185185
span_data = payload_item.get("span_data")
186186
if not isinstance(span_data, dict):
187187
return payload_item
@@ -193,20 +193,109 @@ def _sanitize_for_openai_tracing_api(self, payload_item: dict[str, Any]) -> dict
193193
if not isinstance(usage, dict):
194194
return payload_item
195195

196-
filtered_usage = {
197-
key: value
198-
for key, value in usage.items()
199-
if key in self._OPENAI_TRACING_ALLOWED_USAGE_KEYS
200-
}
201-
if filtered_usage == usage:
196+
sanitized_usage = self._sanitize_generation_usage_for_openai_tracing_api(usage)
197+
198+
if sanitized_usage is None:
199+
sanitized_span_data = dict(span_data)
200+
sanitized_span_data.pop("usage", None)
201+
sanitized_payload_item = dict(payload_item)
202+
sanitized_payload_item["span_data"] = sanitized_span_data
203+
return sanitized_payload_item
204+
205+
if sanitized_usage == usage:
202206
return payload_item
203207

204208
sanitized_span_data = dict(span_data)
205-
sanitized_span_data["usage"] = filtered_usage
209+
sanitized_span_data["usage"] = sanitized_usage
206210
sanitized_payload_item = dict(payload_item)
207211
sanitized_payload_item["span_data"] = sanitized_span_data
208212
return sanitized_payload_item
209213

214+
def _sanitize_generation_usage_for_openai_tracing_api(
215+
self, usage: dict[str, Any]
216+
) -> dict[str, Any] | None:
217+
input_tokens = usage.get("input_tokens")
218+
output_tokens = usage.get("output_tokens")
219+
if not self._is_finite_json_number(input_tokens) or not self._is_finite_json_number(
220+
output_tokens
221+
):
222+
return None
223+
224+
details: dict[str, Any] = {}
225+
existing_details = usage.get("details")
226+
if isinstance(existing_details, dict):
227+
for key, value in existing_details.items():
228+
if not isinstance(key, str):
229+
continue
230+
sanitized_value = self._sanitize_json_compatible_value(value)
231+
if sanitized_value is self._UNSERIALIZABLE:
232+
continue
233+
details[key] = sanitized_value
234+
235+
for key, value in usage.items():
236+
if key in self._OPENAI_TRACING_ALLOWED_USAGE_KEYS or key == "details" or value is None:
237+
continue
238+
sanitized_value = self._sanitize_json_compatible_value(value)
239+
if sanitized_value is self._UNSERIALIZABLE:
240+
continue
241+
details[key] = sanitized_value
242+
243+
sanitized_usage: dict[str, Any] = {
244+
"input_tokens": input_tokens,
245+
"output_tokens": output_tokens,
246+
}
247+
if details:
248+
sanitized_usage["details"] = details
249+
return sanitized_usage
250+
251+
def _is_finite_json_number(self, value: Any) -> bool:
252+
if isinstance(value, bool):
253+
return False
254+
return isinstance(value, int | float) and not (
255+
isinstance(value, float) and not math.isfinite(value)
256+
)
257+
258+
def _sanitize_json_compatible_value(self, value: Any, seen_ids: set[int] | None = None) -> Any:
259+
if value is None or isinstance(value, str | bool | int):
260+
return value
261+
if isinstance(value, float):
262+
return value if math.isfinite(value) else self._UNSERIALIZABLE
263+
if seen_ids is None:
264+
seen_ids = set()
265+
if isinstance(value, dict):
266+
value_id = id(value)
267+
if value_id in seen_ids:
268+
return self._UNSERIALIZABLE
269+
seen_ids.add(value_id)
270+
sanitized_dict: dict[str, Any] = {}
271+
try:
272+
for key, nested_value in value.items():
273+
if not isinstance(key, str):
274+
continue
275+
sanitized_nested = self._sanitize_json_compatible_value(nested_value, seen_ids)
276+
if sanitized_nested is self._UNSERIALIZABLE:
277+
continue
278+
sanitized_dict[key] = sanitized_nested
279+
finally:
280+
seen_ids.remove(value_id)
281+
return sanitized_dict
282+
if isinstance(value, list | tuple):
283+
value_id = id(value)
284+
if value_id in seen_ids:
285+
return self._UNSERIALIZABLE
286+
seen_ids.add(value_id)
287+
sanitized_list: list[Any] = []
288+
try:
289+
for nested_value in value:
290+
sanitized_nested = self._sanitize_json_compatible_value(nested_value, seen_ids)
291+
if sanitized_nested is self._UNSERIALIZABLE:
292+
continue
293+
sanitized_list.append(sanitized_nested)
294+
finally:
295+
seen_ids.remove(value_id)
296+
return sanitized_list
297+
return self._UNSERIALIZABLE
298+
210299
def close(self):
211300
"""Close the underlying HTTP client."""
212301
self._client.close()

tests/test_trace_processor.py

Lines changed: 175 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -317,10 +317,16 @@ def export(self):
317317
sent_usage = sent_payload["span_data"]["usage"]
318318
assert "requests" not in sent_usage
319319
assert "total_tokens" not in sent_usage
320+
assert "input_tokens_details" not in sent_usage
321+
assert "output_tokens_details" not in sent_usage
320322
assert sent_usage["input_tokens"] == 10
321323
assert sent_usage["output_tokens"] == 5
322-
assert sent_usage["input_tokens_details"] == {"cached_tokens": 1}
323-
assert sent_usage["output_tokens_details"] == {"reasoning_tokens": 2}
324+
assert sent_usage["details"] == {
325+
"requests": 1,
326+
"total_tokens": 15,
327+
"input_tokens_details": {"cached_tokens": 1},
328+
"output_tokens_details": {"reasoning_tokens": 2},
329+
}
324330

325331
# Ensure the original exported object has not been mutated.
326332
assert "requests" in item.exported_payload["span_data"]["usage"]
@@ -401,12 +407,178 @@ def test_sanitize_for_openai_tracing_api_keeps_allowed_generation_usage():
401407
"usage": {
402408
"input_tokens": 1,
403409
"output_tokens": 2,
410+
},
411+
},
412+
}
413+
assert exporter._sanitize_for_openai_tracing_api(payload) is payload
414+
exporter.close()
415+
416+
417+
def test_sanitize_for_openai_tracing_api_moves_unsupported_generation_usage_to_details():
418+
exporter = BackendSpanExporter(api_key="test_key")
419+
payload = {
420+
"object": "trace.span",
421+
"span_data": {
422+
"type": "generation",
423+
"usage": {
424+
"input_tokens": 1,
425+
"output_tokens": 2,
426+
"total_tokens": 3,
404427
"input_tokens_details": {"cached_tokens": 0},
405428
"output_tokens_details": {"reasoning_tokens": 0},
429+
"details": {"provider": "litellm"},
406430
},
407431
},
408432
}
409-
assert exporter._sanitize_for_openai_tracing_api(payload) is payload
433+
sanitized = exporter._sanitize_for_openai_tracing_api(payload)
434+
assert sanitized["span_data"]["usage"] == {
435+
"input_tokens": 1,
436+
"output_tokens": 2,
437+
"details": {
438+
"provider": "litellm",
439+
"total_tokens": 3,
440+
"input_tokens_details": {"cached_tokens": 0},
441+
"output_tokens_details": {"reasoning_tokens": 0},
442+
},
443+
}
444+
exporter.close()
445+
446+
447+
def test_sanitize_for_openai_tracing_api_filters_non_json_values_in_usage_details():
448+
exporter = BackendSpanExporter(api_key="test_key")
449+
non_json = object()
450+
payload = {
451+
"object": "trace.span",
452+
"span_data": {
453+
"type": "generation",
454+
"usage": {
455+
"input_tokens": 1,
456+
"output_tokens": 2,
457+
"input_tokens_details": {
458+
"cached_tokens": 0,
459+
"bad": non_json,
460+
},
461+
"output_tokens_details": {"reasoning_tokens": 0},
462+
"provider_usage": [1, non_json, {"ok": True, "bad": non_json}],
463+
"details": {
464+
"provider": "litellm",
465+
"bad": non_json,
466+
"nested": {"keep": 1, "bad": non_json},
467+
},
468+
},
469+
},
470+
}
471+
sanitized = exporter._sanitize_for_openai_tracing_api(payload)
472+
assert sanitized["span_data"]["usage"] == {
473+
"input_tokens": 1,
474+
"output_tokens": 2,
475+
"details": {
476+
"provider": "litellm",
477+
"nested": {"keep": 1},
478+
"input_tokens_details": {"cached_tokens": 0},
479+
"output_tokens_details": {"reasoning_tokens": 0},
480+
"provider_usage": [1, {"ok": True}],
481+
},
482+
}
483+
exporter.close()
484+
485+
486+
def test_sanitize_for_openai_tracing_api_handles_cyclic_usage_values():
487+
exporter = BackendSpanExporter(api_key="test_key")
488+
cyclic_dict: dict[str, Any] = {}
489+
cyclic_dict["self"] = cyclic_dict
490+
cyclic_list: list[Any] = []
491+
cyclic_list.append(cyclic_list)
492+
493+
payload = {
494+
"object": "trace.span",
495+
"span_data": {
496+
"type": "generation",
497+
"usage": {
498+
"input_tokens": 1,
499+
"output_tokens": 2,
500+
"input_tokens_details": cyclic_dict,
501+
"details": {
502+
"provider": "litellm",
503+
"cycle": cyclic_list,
504+
},
505+
},
506+
},
507+
}
508+
509+
sanitized = exporter._sanitize_for_openai_tracing_api(payload)
510+
assert sanitized["span_data"]["usage"] == {
511+
"input_tokens": 1,
512+
"output_tokens": 2,
513+
"details": {
514+
"provider": "litellm",
515+
"cycle": [],
516+
"input_tokens_details": {},
517+
},
518+
}
519+
exporter.close()
520+
521+
522+
def test_sanitize_for_openai_tracing_api_drops_non_dict_generation_usage_details():
523+
exporter = BackendSpanExporter(api_key="test_key")
524+
payload = {
525+
"object": "trace.span",
526+
"span_data": {
527+
"type": "generation",
528+
"usage": {
529+
"input_tokens": 1,
530+
"output_tokens": 2,
531+
"details": "invalid",
532+
},
533+
},
534+
}
535+
sanitized = exporter._sanitize_for_openai_tracing_api(payload)
536+
assert sanitized["span_data"]["usage"] == {
537+
"input_tokens": 1,
538+
"output_tokens": 2,
539+
}
540+
exporter.close()
541+
542+
543+
def test_sanitize_for_openai_tracing_api_drops_generation_usage_missing_required_tokens():
544+
exporter = BackendSpanExporter(api_key="test_key")
545+
payload = {
546+
"object": "trace.span",
547+
"span_data": {
548+
"type": "generation",
549+
"usage": {
550+
"input_tokens": 1,
551+
"total_tokens": 3,
552+
"input_tokens_details": {"cached_tokens": 0},
553+
"output_tokens_details": {"reasoning_tokens": 0},
554+
},
555+
},
556+
}
557+
sanitized = exporter._sanitize_for_openai_tracing_api(payload)
558+
assert sanitized["span_data"] == {
559+
"type": "generation",
560+
}
561+
exporter.close()
562+
563+
564+
def test_sanitize_for_openai_tracing_api_rejects_boolean_token_counts():
565+
exporter = BackendSpanExporter(api_key="test_key")
566+
payload = {
567+
"object": "trace.span",
568+
"span_data": {
569+
"type": "generation",
570+
"usage": {
571+
"input_tokens": True,
572+
"output_tokens": False,
573+
"input_tokens_details": {"cached_tokens": 0},
574+
"output_tokens_details": {"reasoning_tokens": 0},
575+
},
576+
},
577+
}
578+
sanitized = exporter._sanitize_for_openai_tracing_api(payload)
579+
assert sanitized["span_data"] == {
580+
"type": "generation",
581+
}
410582
exporter.close()
411583

412584

0 commit comments

Comments
 (0)