Skip to content

Commit 83b3833

Browse files
authored
fix #2151 shield server-managed handoffs from unsupported history rewrites (#2747)
1 parent 5d05d5d commit 83b3833

7 files changed

Lines changed: 243 additions & 5 deletions

File tree

src/agents/handoffs/__init__.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,11 +134,17 @@ class Handoff(Generic[TContext, TAgent]):
134134
input history plus ``input_items`` when provided, otherwise it receives ``new_items``. Use
135135
``input_items`` to filter model input while keeping ``new_items`` intact for session history.
136136
IMPORTANT: in streaming mode, we will not stream anything as a result of this function. The
137-
items generated before will already have been streamed.
137+
items generated before will already have been streamed. Server-managed conversations
138+
(`conversation_id`, `previous_response_id`, or `auto_previous_response_id`) do not support
139+
handoff input filters.
138140
"""
139141

140142
nest_handoff_history: bool | None = None
141-
"""Override the run-level ``nest_handoff_history`` behavior for this handoff only."""
143+
"""Override the run-level ``nest_handoff_history`` behavior for this handoff only.
144+
145+
Server-managed conversations (`conversation_id`, `previous_response_id`, or
146+
`auto_previous_response_id`) automatically disable nested handoff history with a warning.
147+
"""
142148

143149
strict_json_schema: bool = True
144150
"""Whether the input JSON schema is in strict mode. We strongly recommend setting this to True

src/agents/run.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -837,6 +837,7 @@ def _finalize_result(result: RunResult) -> RunResult:
837837
hooks=hooks,
838838
context_wrapper=context_wrapper,
839839
run_config=run_config,
840+
server_manages_conversation=server_conversation_tracker is not None,
840841
run_state=run_state,
841842
)
842843

src/agents/run_config.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,13 +157,17 @@ class RunConfig:
157157
handoff_input_filter: HandoffInputFilter | None = None
158158
"""A global input filter to apply to all handoffs. If `Handoff.input_filter` is set, then that
159159
will take precedence. The input filter allows you to edit the inputs that are sent to the new
160-
agent. See the documentation in `Handoff.input_filter` for more details.
160+
agent. See the documentation in `Handoff.input_filter` for more details. Server-managed
161+
conversations (`conversation_id`, `previous_response_id`, or `auto_previous_response_id`)
162+
do not support handoff input filters.
161163
"""
162164

163165
nest_handoff_history: bool = False
164166
"""Opt-in beta: wrap prior run history in a single assistant message before handing off when no
165167
custom input filter is set. This is disabled by default while we stabilize nested handoffs; set
166-
to True to enable the collapsed transcript behavior.
168+
to True to enable the collapsed transcript behavior. Server-managed conversations
169+
(`conversation_id`, `previous_response_id`, or `auto_previous_response_id`) automatically
170+
disable this behavior with a warning.
167171
"""
168172

169173
handoff_history_mapper: HandoffHistoryMapper | None = None

src/agents/run_internal/run_loop.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -723,6 +723,7 @@ async def _save_stream_items_without_count(
723723
hooks=hooks,
724724
context_wrapper=context_wrapper,
725725
run_config=run_config,
726+
server_manages_conversation=server_conversation_tracker is not None,
726727
run_state=run_state,
727728
)
728729

@@ -1587,6 +1588,7 @@ async def rewind_model_request() -> None:
15871588
context_wrapper=context_wrapper,
15881589
run_config=run_config,
15891590
tool_use_tracker=tool_use_tracker,
1591+
server_manages_conversation=server_conversation_tracker is not None,
15901592
event_queue=streamed_result._event_queue,
15911593
)
15921594

@@ -1717,6 +1719,7 @@ async def run_single_turn(
17171719
context_wrapper=context_wrapper,
17181720
run_config=run_config,
17191721
tool_use_tracker=tool_use_tracker,
1722+
server_manages_conversation=server_conversation_tracker is not None,
17201723
)
17211724

17221725

src/agents/run_internal/turn_resolution.py

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
from ..agent_output import AgentOutputSchemaBase
4444
from ..agent_tool_state import get_agent_tool_state_scope, peek_agent_tool_run_result
4545
from ..exceptions import ModelBehaviorError, UserError
46-
from ..handoffs import Handoff, HandoffInputData, nest_handoff_history
46+
from ..handoffs import Handoff, HandoffInputData, HandoffInputFilter, nest_handoff_history
4747
from ..items import (
4848
CompactionItem,
4949
HandoffCallItem,
@@ -285,6 +285,38 @@ async def execute_final_output(
285285
)
286286

287287

288+
def _resolve_server_managed_handoff_behavior(
289+
*,
290+
handoff: Handoff[Any, Agent[Any]],
291+
from_agent: Agent[Any],
292+
to_agent: Agent[Any],
293+
run_config: RunConfig,
294+
server_manages_conversation: bool,
295+
input_filter: HandoffInputFilter | None,
296+
should_nest_history: bool,
297+
) -> tuple[HandoffInputFilter | None, bool]:
298+
if not server_manages_conversation:
299+
return input_filter, should_nest_history
300+
301+
if input_filter is not None:
302+
raise UserError(
303+
"Server-managed conversations do not support handoff input filters. "
304+
"Remove Handoff.input_filter or RunConfig.handoff_input_filter, "
305+
"or disable conversation_id, previous_response_id, and auto_previous_response_id."
306+
)
307+
308+
if not should_nest_history:
309+
return input_filter, should_nest_history
310+
311+
logger.warning(
312+
"Server-managed conversations do not support nest_handoff_history for handoff "
313+
"%s -> %s. Disabling nested handoff history and continuing with delta-only input.",
314+
from_agent.name,
315+
to_agent.name,
316+
)
317+
return input_filter, False
318+
319+
288320
async def execute_handoffs(
289321
*,
290322
public_agent: Agent[TContext],
@@ -296,6 +328,7 @@ async def execute_handoffs(
296328
hooks: RunHooks[TContext],
297329
context_wrapper: RunContextWrapper[TContext],
298330
run_config: RunConfig,
331+
server_manages_conversation: bool = False,
299332
nest_handoff_history_fn: Callable[..., HandoffInputData] | None = None,
300333
) -> SingleStepResult:
301334
"""Execute a handoff and prepare the next turn for the new agent."""
@@ -375,6 +408,15 @@ def nest_history(data: HandoffInputData, mapper: Any | None = None) -> HandoffIn
375408
if handoff_nest_setting is not None
376409
else run_config.nest_handoff_history
377410
)
411+
input_filter, should_nest_history = _resolve_server_managed_handoff_behavior(
412+
handoff=handoff,
413+
from_agent=public_agent,
414+
to_agent=new_agent,
415+
run_config=run_config,
416+
server_manages_conversation=server_manages_conversation,
417+
input_filter=input_filter,
418+
should_nest_history=should_nest_history,
419+
)
378420
handoff_input_data: HandoffInputData | None = None
379421
session_step_items: list[RunItem] | None = None
380422
if input_filter or should_nest_history:
@@ -510,6 +552,7 @@ async def execute_tools_and_side_effects(
510552
hooks: RunHooks[TContext],
511553
context_wrapper: RunContextWrapper[TContext],
512554
run_config: RunConfig,
555+
server_manages_conversation: bool = False,
513556
) -> SingleStepResult:
514557
"""Run one turn of the loop, coordinating tools, approvals, guardrails, and handoffs."""
515558
public_agent = bindings.public_agent
@@ -603,6 +646,7 @@ async def execute_tools_and_side_effects(
603646
hooks=hooks,
604647
context_wrapper=context_wrapper,
605648
run_config=run_config,
649+
server_manages_conversation=server_manages_conversation,
606650
)
607651

608652
tool_final_output = await _maybe_finalize_from_tool_results(
@@ -679,6 +723,7 @@ async def resolve_interrupted_turn(
679723
hooks: RunHooks[TContext],
680724
context_wrapper: RunContextWrapper[TContext],
681725
run_config: RunConfig,
726+
server_manages_conversation: bool = False,
682727
run_state: RunState | None = None,
683728
nest_handoff_history_fn: Callable[..., HandoffInputData] | None = None,
684729
) -> SingleStepResult:
@@ -1337,6 +1382,7 @@ def _add_unmatched_pending(approval: ToolApprovalItem) -> None:
13371382
hooks=hooks,
13381383
context_wrapper=context_wrapper,
13391384
run_config=run_config,
1385+
server_manages_conversation=server_manages_conversation,
13401386
nest_handoff_history_fn=nest_history,
13411387
)
13421388

@@ -1807,6 +1853,7 @@ async def get_single_step_result_from_response(
18071853
context_wrapper: RunContextWrapper[TContext],
18081854
run_config: RunConfig,
18091855
tool_use_tracker,
1856+
server_manages_conversation: bool = False,
18101857
event_queue: asyncio.Queue[StreamEvent | QueueCompleteSentinel] | None = None,
18111858
) -> SingleStepResult:
18121859
item_agent = bindings.public_agent
@@ -1838,4 +1885,5 @@ async def get_single_step_result_from_response(
18381885
hooks=hooks,
18391886
context_wrapper=context_wrapper,
18401887
run_config=run_config,
1888+
server_manages_conversation=server_manages_conversation,
18411889
)

tests/test_agent_runner.py

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,21 @@ async def run_execute_approved_tools(
147147
return generated_items
148148

149149

150+
async def _run_agent_with_optional_streaming(
151+
agent: Agent[Any],
152+
*,
153+
input: str | list[TResponseInputItem],
154+
streamed: bool,
155+
**kwargs: Any,
156+
):
157+
if streamed:
158+
result = Runner.run_streamed(agent, input=input, **kwargs)
159+
async for _ in result.stream_events():
160+
pass
161+
return result
162+
return await Runner.run(agent, input=input, **kwargs)
163+
164+
150165
def test_set_default_agent_runner_roundtrip():
151166
runner = AgentRunner()
152167
set_default_agent_runner(runner)
@@ -1345,6 +1360,101 @@ async def test_opt_in_handoff_history_accumulates_across_multiple_handoffs():
13451360
assert "user_question" in summary_content
13461361

13471362

1363+
@pytest.mark.asyncio
1364+
@pytest.mark.parametrize("streamed", [False, True], ids=["non_streamed", "streamed"])
1365+
@pytest.mark.parametrize("nest_source", ["run_config", "handoff"], ids=["run_config", "handoff"])
1366+
async def test_server_managed_handoff_history_auto_disables_with_warning(
1367+
streamed: bool,
1368+
nest_source: str,
1369+
caplog: pytest.LogCaptureFixture,
1370+
) -> None:
1371+
triage_model = FakeModel()
1372+
delegate_model = FakeModel()
1373+
delegate = Agent(name="delegate", model=delegate_model)
1374+
1375+
run_config = RunConfig()
1376+
triage_handoffs: list[Agent[Any] | Handoff[Any, Any]]
1377+
if nest_source == "handoff":
1378+
triage_handoffs = [handoff(delegate, nest_handoff_history=True)]
1379+
else:
1380+
triage_handoffs = [delegate]
1381+
run_config = RunConfig(nest_handoff_history=True)
1382+
1383+
triage = Agent(name="triage", model=triage_model, handoffs=triage_handoffs)
1384+
triage_model.add_multiple_turn_outputs(
1385+
[[get_text_message("triage summary"), get_handoff_tool_call(delegate)]]
1386+
)
1387+
delegate_model.add_multiple_turn_outputs([[get_text_message("done")]])
1388+
1389+
with caplog.at_level("WARNING", logger="openai.agents"):
1390+
result = await _run_agent_with_optional_streaming(
1391+
triage,
1392+
input="user_message",
1393+
streamed=streamed,
1394+
run_config=run_config,
1395+
auto_previous_response_id=True,
1396+
)
1397+
1398+
assert result.final_output == "done"
1399+
assert "do not support nest_handoff_history" in caplog.text
1400+
assert delegate_model.first_turn_args is not None
1401+
delegate_input = delegate_model.first_turn_args["input"]
1402+
assert isinstance(delegate_input, list)
1403+
assert len(delegate_input) == 1
1404+
handoff_output = delegate_input[0]
1405+
assert handoff_output.get("type") == "function_call_output"
1406+
assert "delegate" in str(handoff_output.get("output"))
1407+
assert not any(
1408+
isinstance(item, dict)
1409+
and item.get("role") == "assistant"
1410+
and "<CONVERSATION HISTORY>" in str(item.get("content"))
1411+
for item in delegate_input
1412+
)
1413+
1414+
1415+
@pytest.mark.asyncio
1416+
@pytest.mark.parametrize("streamed", [False, True], ids=["non_streamed", "streamed"])
1417+
@pytest.mark.parametrize("filter_source", ["run_config", "handoff"], ids=["run_config", "handoff"])
1418+
async def test_server_managed_handoff_input_filters_still_raise(
1419+
streamed: bool,
1420+
filter_source: str,
1421+
) -> None:
1422+
triage_model = FakeModel()
1423+
delegate_model = FakeModel()
1424+
delegate = Agent(name="delegate", model=delegate_model)
1425+
1426+
def passthrough_filter(data: HandoffInputData) -> HandoffInputData:
1427+
return data
1428+
1429+
run_config = RunConfig()
1430+
triage_handoffs: list[Agent[Any] | Handoff[Any, Any]]
1431+
if filter_source == "handoff":
1432+
triage_handoffs = [handoff(delegate, input_filter=passthrough_filter)]
1433+
else:
1434+
triage_handoffs = [delegate]
1435+
run_config = RunConfig(handoff_input_filter=passthrough_filter)
1436+
1437+
triage = Agent(name="triage", model=triage_model, handoffs=triage_handoffs)
1438+
triage_model.add_multiple_turn_outputs(
1439+
[[get_text_message("triage summary"), get_handoff_tool_call(delegate)]]
1440+
)
1441+
delegate_model.add_multiple_turn_outputs([[get_text_message("done")]])
1442+
1443+
with pytest.raises(
1444+
UserError,
1445+
match="Server-managed conversations do not support handoff input filters",
1446+
):
1447+
await _run_agent_with_optional_streaming(
1448+
triage,
1449+
input="user_message",
1450+
streamed=streamed,
1451+
run_config=run_config,
1452+
auto_previous_response_id=True,
1453+
)
1454+
1455+
assert delegate_model.first_turn_args is None
1456+
1457+
13481458
@pytest.mark.asyncio
13491459
async def test_async_input_filter_supported():
13501460
# DO NOT rename this without updating pyproject.toml

tests/test_run_impl_resume_paths.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,72 @@ async def fake_run_single_turn(**_kwargs):
232232
assert "function_call" in saved_types
233233

234234

235+
@pytest.mark.parametrize(
236+
("conversation_id", "previous_response_id", "auto_previous_response_id"),
237+
[
238+
("conv_1", None, False),
239+
(None, "resp_prev", False),
240+
(None, None, True),
241+
],
242+
)
243+
@pytest.mark.asyncio
244+
async def test_resumed_interruption_passes_server_managed_conversation_flag(
245+
monkeypatch: pytest.MonkeyPatch,
246+
conversation_id: str | None,
247+
previous_response_id: str | None,
248+
auto_previous_response_id: bool,
249+
) -> None:
250+
agent = Agent(name="resume-agent")
251+
context_wrapper: RunContextWrapper[dict[str, str]] = RunContextWrapper(context={})
252+
state = RunState(
253+
context=context_wrapper,
254+
original_input="input",
255+
starting_agent=agent,
256+
max_turns=1,
257+
conversation_id=conversation_id,
258+
previous_response_id=previous_response_id,
259+
auto_previous_response_id=auto_previous_response_id,
260+
)
261+
262+
state._current_step = NextStepInterruption(interruptions=[])
263+
state._model_responses = [
264+
ModelResponse(output=[], usage=Usage(), response_id="resp_1"),
265+
]
266+
state._last_processed_response = ProcessedResponse(
267+
new_items=[],
268+
handoffs=[],
269+
functions=[],
270+
computer_actions=[],
271+
local_shell_calls=[],
272+
shell_calls=[],
273+
apply_patch_calls=[],
274+
tools_used=[],
275+
mcp_approval_requests=[],
276+
interruptions=[],
277+
)
278+
server_managed_values: list[bool] = []
279+
280+
async def fake_resolve_interrupted_turn(**kwargs: object) -> SingleStepResult:
281+
server_managed_values.append(cast(bool, kwargs["server_manages_conversation"]))
282+
return SingleStepResult(
283+
original_input="input",
284+
model_response=ModelResponse(output=[], usage=Usage(), response_id="resp_resume"),
285+
pre_step_items=[],
286+
new_step_items=[],
287+
next_step=NextStepFinalOutput("done"),
288+
tool_input_guardrail_results=[],
289+
tool_output_guardrail_results=[],
290+
)
291+
292+
monkeypatch.setattr(run_module, "resolve_interrupted_turn", fake_resolve_interrupted_turn)
293+
294+
runner = run_module.AgentRunner()
295+
result = await runner.run(agent, state, run_config=RunConfig())
296+
297+
assert result.final_output == "done"
298+
assert server_managed_values == [True]
299+
300+
235301
@pytest.mark.asyncio
236302
async def test_resumed_approval_does_not_duplicate_session_items() -> None:
237303
async def test_tool() -> str:

0 commit comments

Comments
 (0)