Skip to content

Commit 40aada1

Browse files
authored
fix: #2806 sanitize AnyLLM responses replay input before validation (#2813)
1 parent 5e0e6d2 commit 40aada1

File tree

2 files changed

+182
-19
lines changed

2 files changed

+182
-19
lines changed

src/agents/extensions/models/any_llm_model.py

Lines changed: 37 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -813,7 +813,7 @@ async def _fetch_responses_response(
813813

814814
list_input = ItemHelpers.input_to_new_input_list(input)
815815
list_input = _to_dump_compatible(list_input)
816-
list_input = self._remove_openai_responses_api_incompatible_fields(list_input)
816+
list_input = self._sanitize_any_llm_responses_input(list_input)
817817

818818
parallel_tool_calls = (
819819
True
@@ -1095,31 +1095,51 @@ def _make_any_llm_responses_params(payload: dict[str, Any]) -> Any:
10951095
AnyLLMResponsesParams = any_llm_responses.ResponsesParams
10961096
return AnyLLMResponsesParams(**payload)
10971097

1098-
def _remove_openai_responses_api_incompatible_fields(self, list_input: list[Any]) -> list[Any]:
1099-
has_provider_data = any(
1100-
isinstance(item, dict) and item.get("provider_data") for item in list_input
1101-
)
1102-
if not has_provider_data:
1103-
return list_input
1098+
def _sanitize_any_llm_responses_input(self, list_input: list[Any]) -> list[Any]:
1099+
"""Normalize replayed Responses input into a shape accepted by any-llm.
11041100
1101+
any-llm validates replayed items against OpenAI-style input models before the request is
1102+
handed to the underlying provider. SDK-produced replay items can legitimately carry
1103+
adapter-only fields such as provider_data or explicit nulls like status=None, which those
1104+
models reject. Strip those fields here while preserving valid replay content.
1105+
"""
11051106
result: list[Any] = []
11061107
for item in list_input:
1107-
cleaned = self._clean_item_for_openai(item)
1108+
cleaned = self._sanitize_any_llm_responses_value(item)
11081109
if cleaned is not None:
11091110
result.append(cleaned)
11101111
return result
11111112

1112-
def _clean_item_for_openai(self, item: Any) -> Any | None:
1113-
if not isinstance(item, dict):
1114-
return item
1113+
def _sanitize_any_llm_responses_value(self, value: Any) -> Any | None:
1114+
if isinstance(value, list):
1115+
sanitized_list = []
1116+
for item in value:
1117+
cleaned_item = self._sanitize_any_llm_responses_value(item)
1118+
if cleaned_item is not None:
1119+
sanitized_list.append(cleaned_item)
1120+
return sanitized_list
1121+
1122+
if not isinstance(value, dict):
1123+
return value
11151124

1116-
if item.get("type") == "reasoning" and item.get("provider_data"):
1125+
# Provider-specific reasoning payloads are not replay-safe across adapter boundaries.
1126+
if value.get("type") == "reasoning" and value.get("provider_data"):
11171127
return None
1118-
if item.get("id") == FAKE_RESPONSES_ID:
1119-
del item["id"]
1120-
if "provider_data" in item:
1121-
del item["provider_data"]
1122-
return item
1128+
1129+
cleaned: dict[str, Any] = {}
1130+
for key, item_value in value.items():
1131+
if key == "provider_data":
1132+
continue
1133+
if key == "id" and item_value == FAKE_RESPONSES_ID:
1134+
continue
1135+
if item_value is None:
1136+
continue
1137+
1138+
sanitized = self._sanitize_any_llm_responses_value(item_value)
1139+
if sanitized is not None:
1140+
cleaned[key] = sanitized
1141+
1142+
return cleaned
11231143

11241144
def _attach_logprobs_to_output(self, output_items: list[Any], logprobs: list[Any]) -> None:
11251145
from openai.types.responses import ResponseOutputMessage, ResponseOutputText

tests/models/test_any_llm_model.py

Lines changed: 145 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import sys
55
import types as pytypes
66
from collections.abc import AsyncIterator
7-
from typing import Any
7+
from typing import Any, Literal, cast
88

99
import pytest
1010
from openai.types.chat import (
@@ -25,9 +25,18 @@
2525
)
2626
from pydantic import BaseModel
2727

28-
from agents import ModelSettings, ModelTracing, __version__
28+
from agents import (
29+
Agent,
30+
Handoff,
31+
ModelSettings,
32+
ModelTracing,
33+
Tool,
34+
TResponseInputItem,
35+
__version__,
36+
)
2937
from agents.exceptions import UserError
3038
from agents.models.chatcmpl_helpers import HEADERS_OVERRIDE
39+
from agents.models.fake_id import FAKE_RESPONSES_ID
3140

3241

3342
class FakeAnyLLMProvider:
@@ -583,6 +592,140 @@ async def test_any_llm_prompt_requests_fail_fast(monkeypatch) -> None:
583592
)
584593

585594

595+
def test_any_llm_responses_input_sanitizer_strips_none_fields_from_reasoning_items() -> None:
596+
pytest.importorskip(
597+
"any_llm",
598+
reason="`any-llm-sdk` is only available when the optional dependency is installed.",
599+
)
600+
from agents.extensions.models.any_llm_model import AnyLLMModel
601+
602+
model = AnyLLMModel(model="openai/gpt-5.4-mini")
603+
raw_input = [
604+
{
605+
"id": "rid1",
606+
"summary": [{"text": "why", "type": "summary_text"}],
607+
"type": "reasoning",
608+
"content": [{"type": "reasoning_text", "text": "thinking"}],
609+
"status": None,
610+
"encrypted_content": None,
611+
}
612+
]
613+
614+
cleaned = model._sanitize_any_llm_responses_input(raw_input)
615+
616+
assert cleaned == [
617+
{
618+
"id": "rid1",
619+
"summary": [{"text": "why", "type": "summary_text"}],
620+
"type": "reasoning",
621+
"content": [{"type": "reasoning_text", "text": "thinking"}],
622+
}
623+
]
624+
625+
ResponsesParams = importlib.import_module("any_llm.types.responses").ResponsesParams
626+
params = ResponsesParams(model="dummy", input=cleaned)
627+
assert isinstance(params.input, list)
628+
629+
630+
@pytest.mark.allow_call_model_methods
631+
@pytest.mark.asyncio
632+
async def test_any_llm_responses_path_sanitizes_replayed_items_before_validation() -> None:
633+
pytest.importorskip(
634+
"any_llm",
635+
reason="`any-llm-sdk` is only available when the optional dependency is installed.",
636+
)
637+
from agents.extensions.models.any_llm_model import AnyLLMModel
638+
639+
class ValidatingProvider:
640+
SUPPORTS_RESPONSES = True
641+
642+
def __init__(self) -> None:
643+
self.private_responses_calls: list[dict[str, Any]] = []
644+
645+
async def aresponses(self, **kwargs: Any) -> Any:
646+
raise AssertionError("public aresponses path should not be used in this test")
647+
648+
async def _aresponses(self, params: Any, **kwargs: Any) -> Response:
649+
self.private_responses_calls.append({"params": params, "kwargs": kwargs})
650+
return _response("Hello from sanitized replay")
651+
652+
class TestAnyLLMModel(AnyLLMModel):
653+
def __init__(self, provider: ValidatingProvider) -> None:
654+
super().__init__(model="openai/gpt-5.4-mini", api="responses")
655+
self._provider = provider
656+
657+
def _get_provider(self) -> Any:
658+
return self._provider
659+
660+
provider = ValidatingProvider()
661+
model = TestAnyLLMModel(provider)
662+
tools: list[Tool] = []
663+
handoffs: list[Handoff[Any, Agent[Any]]] = []
664+
stream_flag: Literal[False] = False
665+
666+
replay_input = cast(
667+
list[TResponseInputItem],
668+
[
669+
{"role": "user", "content": "What's the weather in Tokyo?"},
670+
{
671+
"id": FAKE_RESPONSES_ID,
672+
"summary": [
673+
{"text": "I should call the weather tool first.", "type": "summary_text"}
674+
],
675+
"type": "reasoning",
676+
"content": [{"type": "reasoning_text", "text": "thinking"}],
677+
"status": None,
678+
"provider_data": {"model": "anthropic/fake-responses-model"},
679+
},
680+
{
681+
"id": FAKE_RESPONSES_ID,
682+
"arguments": '{"city": "Tokyo"}',
683+
"call_id": "call_weather_123",
684+
"name": "get_weather",
685+
"type": "function_call",
686+
"status": None,
687+
"provider_data": {"model": "anthropic/fake-responses-model"},
688+
},
689+
{
690+
"type": "function_call_output",
691+
"call_id": "call_weather_123",
692+
"output": "The weather in Tokyo is sunny and 22°C.",
693+
},
694+
],
695+
)
696+
697+
response = await model._fetch_responses_response(
698+
system_instructions=None,
699+
input=replay_input,
700+
model_settings=ModelSettings(),
701+
tools=tools,
702+
output_schema=None,
703+
handoffs=handoffs,
704+
previous_response_id=None,
705+
conversation_id=None,
706+
stream=stream_flag,
707+
prompt=None,
708+
)
709+
710+
assert response.id == "resp_123"
711+
assert len(provider.private_responses_calls) == 1
712+
params = provider.private_responses_calls[0]["params"]
713+
assert params.input == [
714+
{"role": "user", "content": "What's the weather in Tokyo?"},
715+
{
716+
"arguments": '{"city": "Tokyo"}',
717+
"call_id": "call_weather_123",
718+
"name": "get_weather",
719+
"type": "function_call",
720+
},
721+
{
722+
"type": "function_call_output",
723+
"call_id": "call_weather_123",
724+
"output": "The weather in Tokyo is sunny and 22°C.",
725+
},
726+
]
727+
728+
586729
def test_any_llm_provider_passes_api_override() -> None:
587730
pytest.importorskip(
588731
"any_llm",

0 commit comments

Comments
 (0)