Skip to content

Commit 6375b97

Browse files
authored
fix: #2503 filter reasoning items from nested handoff input (#2508)
1 parent a23fe13 commit 6375b97

4 files changed

Lines changed: 176 additions & 0 deletions

File tree

src/agents/handoffs/history.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@
3232
_SUMMARY_ONLY_INPUT_TYPES = {
3333
"function_call",
3434
"function_call_output",
35+
# Reasoning items can become orphaned after other summarized items are filtered.
36+
"reasoning",
3537
}
3638

3739

tests/test_agent_runner.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from openai import BadRequestError
1414
from openai.types.responses import ResponseFunctionToolCall
1515
from openai.types.responses.response_output_text import AnnotationFileCitation, ResponseOutputText
16+
from openai.types.responses.response_reasoning_item import ResponseReasoningItem, Summary
1617
from typing_extensions import TypedDict
1718

1819
from agents import (
@@ -585,6 +586,60 @@ def capture_model_input(data):
585586
assert has_function_call_output
586587

587588

589+
@pytest.mark.asyncio
590+
async def test_nested_handoff_filters_reasoning_items_from_model_input():
591+
model = FakeModel()
592+
delegate = Agent(
593+
name="delegate",
594+
model=model,
595+
)
596+
triage = Agent(
597+
name="triage",
598+
model=model,
599+
handoffs=[delegate],
600+
)
601+
602+
model.add_multiple_turn_outputs(
603+
[
604+
[
605+
ResponseReasoningItem(
606+
id="reasoning_1",
607+
type="reasoning",
608+
summary=[Summary(text="Thinking about a handoff.", type="summary_text")],
609+
),
610+
get_handoff_tool_call(delegate),
611+
],
612+
[get_text_message("done")],
613+
]
614+
)
615+
616+
captured_inputs: list[list[dict[str, Any]]] = []
617+
618+
def capture_model_input(data):
619+
if isinstance(data.model_data.input, list):
620+
captured_inputs.append(
621+
[item for item in data.model_data.input if isinstance(item, dict)]
622+
)
623+
return data.model_data
624+
625+
result = await Runner.run(
626+
triage,
627+
input="user_message",
628+
run_config=RunConfig(
629+
nest_handoff_history=True,
630+
call_model_input_filter=capture_model_input,
631+
),
632+
)
633+
634+
assert result.final_output == "done"
635+
assert len(captured_inputs) >= 2
636+
handoff_input = captured_inputs[1]
637+
handoff_input_types = [
638+
item["type"] for item in handoff_input if isinstance(item.get("type"), str)
639+
]
640+
assert "reasoning" not in handoff_input_types
641+
642+
588643
@pytest.mark.asyncio
589644
async def test_resume_preserves_filtered_model_input_after_handoff():
590645
model = FakeModel()

tests/test_agent_runner_streamed.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import pytest
88
from openai.types.responses import ResponseFunctionToolCall
9+
from openai.types.responses.response_reasoning_item import ResponseReasoningItem, Summary
910
from typing_extensions import TypedDict
1011

1112
from agents import (
@@ -333,6 +334,61 @@ async def test_handoff_filters():
333334
)
334335

335336

337+
@pytest.mark.asyncio
338+
async def test_streamed_nested_handoff_filters_reasoning_items_from_model_input():
339+
model = FakeModel()
340+
delegate = Agent(
341+
name="delegate",
342+
model=model,
343+
)
344+
triage = Agent(
345+
name="triage",
346+
model=model,
347+
handoffs=[delegate],
348+
)
349+
350+
model.add_multiple_turn_outputs(
351+
[
352+
[
353+
ResponseReasoningItem(
354+
id="reasoning_1",
355+
type="reasoning",
356+
summary=[Summary(text="Thinking about a handoff.", type="summary_text")],
357+
),
358+
get_handoff_tool_call(delegate),
359+
],
360+
[get_text_message("done")],
361+
]
362+
)
363+
364+
captured_inputs: list[list[dict[str, Any]]] = []
365+
366+
def capture_model_input(data):
367+
if isinstance(data.model_data.input, list):
368+
captured_inputs.append(
369+
[item for item in data.model_data.input if isinstance(item, dict)]
370+
)
371+
return data.model_data
372+
373+
result = Runner.run_streamed(
374+
triage,
375+
input="user_message",
376+
run_config=RunConfig(
377+
nest_handoff_history=True,
378+
call_model_input_filter=capture_model_input,
379+
),
380+
)
381+
await consume_stream(result)
382+
383+
assert result.final_output == "done"
384+
assert len(captured_inputs) >= 2
385+
handoff_input = captured_inputs[1]
386+
handoff_input_types = [
387+
item["type"] for item in handoff_input if isinstance(item.get("type"), str)
388+
]
389+
assert "reasoning" not in handoff_input_types
390+
391+
336392
@pytest.mark.asyncio
337393
async def test_async_input_filter_supported():
338394
# DO NOT rename this without updating pyproject.toml

tests/test_handoff_history_duplication.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,15 @@
1212
ResponseOutputMessage,
1313
ResponseOutputText,
1414
)
15+
from openai.types.responses.response_reasoning_item import ResponseReasoningItem, Summary
1516

1617
from agents import Agent
1718
from agents.handoffs import HandoffInputData, nest_handoff_history
1819
from agents.items import (
1920
HandoffCallItem,
2021
HandoffOutputItem,
2122
MessageOutputItem,
23+
ReasoningItem,
2224
ToolApprovalItem,
2325
ToolCallItem,
2426
ToolCallOutputItem,
@@ -97,6 +99,16 @@ def _create_message_item(agent: Agent) -> MessageOutputItem:
9799
return MessageOutputItem(agent=agent, raw_item=raw_item, type="message_output_item")
98100

99101

102+
def _create_reasoning_item(agent: Agent) -> ReasoningItem:
103+
"""Create a mock ReasoningItem."""
104+
raw_item = ResponseReasoningItem(
105+
id="reasoning_123",
106+
type="reasoning",
107+
summary=[Summary(text="Thinking about handoff", type="summary_text")],
108+
)
109+
return ReasoningItem(agent=agent, raw_item=raw_item, type="reasoning_item")
110+
111+
100112
def _create_tool_approval_item(agent: Agent) -> ToolApprovalItem:
101113
"""Create a mock ToolApprovalItem."""
102114
raw_item = {
@@ -157,6 +169,28 @@ def test_tool_approval_items_are_skipped(self):
157169
assert len(nested.pre_handoff_items) == 0
158170
assert nested.input_items == ()
159171

172+
def test_pre_handoff_reasoning_items_are_filtered(self):
173+
"""Verify ReasoningItem in pre_handoff_items is filtered.
174+
175+
Reasoning is represented in the summary transcript and should not be
176+
forwarded as a raw item.
177+
"""
178+
agent = _create_mock_agent()
179+
180+
handoff_data = HandoffInputData(
181+
input_history=({"role": "user", "content": "Hello"},),
182+
pre_handoff_items=(_create_reasoning_item(agent),),
183+
new_items=(),
184+
)
185+
186+
nested = nest_handoff_history(handoff_data)
187+
188+
assert len(nested.pre_handoff_items) == 0
189+
first_item = nested.input_history[0]
190+
assert isinstance(first_item, dict)
191+
summary = str(first_item.get("content", ""))
192+
assert "reasoning" in summary
193+
160194
def test_new_items_handoff_output_is_filtered_for_input(self):
161195
"""Verify HandoffOutputItem in new_items is filtered from input_items.
162196
@@ -209,6 +243,35 @@ def test_message_items_are_preserved_in_new_items(self):
209243
assert len(nested.input_items) == 1, "MessageOutputItem should be preserved in input_items"
210244
assert isinstance(nested.input_items[0], MessageOutputItem)
211245

246+
def test_reasoning_items_are_filtered_from_input_items(self):
247+
"""Verify ReasoningItem in new_items is filtered from input_items.
248+
249+
Reasoning is summarized in the conversation transcript and should not be
250+
forwarded verbatim in nested handoff model input.
251+
"""
252+
agent = _create_mock_agent()
253+
254+
handoff_data = HandoffInputData(
255+
input_history=({"role": "user", "content": "Hello"},),
256+
pre_handoff_items=(),
257+
new_items=(
258+
_create_reasoning_item(agent),
259+
_create_handoff_call_item(agent),
260+
_create_handoff_output_item(agent),
261+
),
262+
)
263+
264+
nested = nest_handoff_history(handoff_data)
265+
266+
assert nested.input_items is not None
267+
has_reasoning = any(isinstance(item, ReasoningItem) for item in nested.input_items)
268+
assert not has_reasoning, "ReasoningItem should be filtered from input_items"
269+
270+
first_item = nested.input_history[0]
271+
assert isinstance(first_item, dict)
272+
summary = str(first_item.get("content", ""))
273+
assert "reasoning" in summary
274+
212275
def test_summary_contains_filtered_items_as_text(self):
213276
"""Verify the summary message contains the filtered tool items as text.
214277

0 commit comments

Comments
 (0)