fix #2151 shield server-managed handoffs from unsupported history rewrites (#2747)

seratch · web-flow · commit 83b38333826b · 2026-04-16T03:17:00.000+09:00
diff --git a/src/agents/handoffs/__init__.py b/src/agents/handoffs/__init__.py
@@ -134,11 +134,17 @@ class Handoff(Generic[TContext, TAgent]):
     input history plus ``input_items`` when provided, otherwise it receives ``new_items``. Use
     ``input_items`` to filter model input while keeping ``new_items`` intact for session history.
     IMPORTANT: in streaming mode, we will not stream anything as a result of this function. The
-    items generated before will already have been streamed.
+    items generated before will already have been streamed. Server-managed conversations
+    (`conversation_id`, `previous_response_id`, or `auto_previous_response_id`) do not support
+    handoff input filters.
     """
 
     nest_handoff_history: bool | None = None
-    """Override the run-level ``nest_handoff_history`` behavior for this handoff only."""
+    """Override the run-level ``nest_handoff_history`` behavior for this handoff only.
+
+    Server-managed conversations (`conversation_id`, `previous_response_id`, or
+    `auto_previous_response_id`) automatically disable nested handoff history with a warning.
+    """
 
     strict_json_schema: bool = True
     """Whether the input JSON schema is in strict mode. We strongly recommend setting this to True
diff --git a/src/agents/run.py b/src/agents/run.py
@@ -837,6 +837,7 @@ def _finalize_result(result: RunResult) -> RunResult:
                                 hooks=hooks,
                                 context_wrapper=context_wrapper,
                                 run_config=run_config,
+                                server_manages_conversation=server_conversation_tracker is not None,
                                 run_state=run_state,
                             )
 
diff --git a/src/agents/run_config.py b/src/agents/run_config.py
@@ -157,13 +157,17 @@ class RunConfig:
     handoff_input_filter: HandoffInputFilter | None = None
     """A global input filter to apply to all handoffs. If `Handoff.input_filter` is set, then that
     will take precedence. The input filter allows you to edit the inputs that are sent to the new
-    agent. See the documentation in `Handoff.input_filter` for more details.
+    agent. See the documentation in `Handoff.input_filter` for more details. Server-managed
+    conversations (`conversation_id`, `previous_response_id`, or `auto_previous_response_id`)
+    do not support handoff input filters.
     """
 
     nest_handoff_history: bool = False
     """Opt-in beta: wrap prior run history in a single assistant message before handing off when no
     custom input filter is set. This is disabled by default while we stabilize nested handoffs; set
-    to True to enable the collapsed transcript behavior.
+    to True to enable the collapsed transcript behavior. Server-managed conversations
+    (`conversation_id`, `previous_response_id`, or `auto_previous_response_id`) automatically
+    disable this behavior with a warning.
     """
 
     handoff_history_mapper: HandoffHistoryMapper | None = None
diff --git a/src/agents/run_internal/run_loop.py b/src/agents/run_internal/run_loop.py
@@ -723,6 +723,7 @@ async def _save_stream_items_without_count(
                         hooks=hooks,
                         context_wrapper=context_wrapper,
                         run_config=run_config,
+                        server_manages_conversation=server_conversation_tracker is not None,
                         run_state=run_state,
                     )
 
@@ -1587,6 +1588,7 @@ async def rewind_model_request() -> None:
         context_wrapper=context_wrapper,
         run_config=run_config,
         tool_use_tracker=tool_use_tracker,
+        server_manages_conversation=server_conversation_tracker is not None,
         event_queue=streamed_result._event_queue,
     )
 
@@ -1717,6 +1719,7 @@ async def run_single_turn(
         context_wrapper=context_wrapper,
         run_config=run_config,
         tool_use_tracker=tool_use_tracker,
+        server_manages_conversation=server_conversation_tracker is not None,
     )
 
 
diff --git a/src/agents/run_internal/turn_resolution.py b/src/agents/run_internal/turn_resolution.py
@@ -43,7 +43,7 @@
 from ..agent_output import AgentOutputSchemaBase
 from ..agent_tool_state import get_agent_tool_state_scope, peek_agent_tool_run_result
 from ..exceptions import ModelBehaviorError, UserError
-from ..handoffs import Handoff, HandoffInputData, nest_handoff_history
+from ..handoffs import Handoff, HandoffInputData, HandoffInputFilter, nest_handoff_history
 from ..items import (
     CompactionItem,
     HandoffCallItem,
@@ -285,6 +285,38 @@ async def execute_final_output(
     )
 
 
+def _resolve_server_managed_handoff_behavior(
+    *,
+    handoff: Handoff[Any, Agent[Any]],
+    from_agent: Agent[Any],
+    to_agent: Agent[Any],
+    run_config: RunConfig,
+    server_manages_conversation: bool,
+    input_filter: HandoffInputFilter | None,
+    should_nest_history: bool,
+) -> tuple[HandoffInputFilter | None, bool]:
+    if not server_manages_conversation:
+        return input_filter, should_nest_history
+
+    if input_filter is not None:
+        raise UserError(
+            "Server-managed conversations do not support handoff input filters. "
+            "Remove Handoff.input_filter or RunConfig.handoff_input_filter, "
+            "or disable conversation_id, previous_response_id, and auto_previous_response_id."
+        )
+
+    if not should_nest_history:
+        return input_filter, should_nest_history
+
+    logger.warning(
+        "Server-managed conversations do not support nest_handoff_history for handoff "
+        "%s -> %s. Disabling nested handoff history and continuing with delta-only input.",
+        from_agent.name,
+        to_agent.name,
+    )
+    return input_filter, False
+
+
 async def execute_handoffs(
     *,
     public_agent: Agent[TContext],
@@ -296,6 +328,7 @@ async def execute_handoffs(
     hooks: RunHooks[TContext],
     context_wrapper: RunContextWrapper[TContext],
     run_config: RunConfig,
+    server_manages_conversation: bool = False,
     nest_handoff_history_fn: Callable[..., HandoffInputData] | None = None,
 ) -> SingleStepResult:
     """Execute a handoff and prepare the next turn for the new agent."""
@@ -375,6 +408,15 @@ def nest_history(data: HandoffInputData, mapper: Any | None = None) -> HandoffIn
             if handoff_nest_setting is not None
             else run_config.nest_handoff_history
         )
+        input_filter, should_nest_history = _resolve_server_managed_handoff_behavior(
+            handoff=handoff,
+            from_agent=public_agent,
+            to_agent=new_agent,
+            run_config=run_config,
+            server_manages_conversation=server_manages_conversation,
+            input_filter=input_filter,
+            should_nest_history=should_nest_history,
+        )
         handoff_input_data: HandoffInputData | None = None
         session_step_items: list[RunItem] | None = None
         if input_filter or should_nest_history:
@@ -510,6 +552,7 @@ async def execute_tools_and_side_effects(
     hooks: RunHooks[TContext],
     context_wrapper: RunContextWrapper[TContext],
     run_config: RunConfig,
+    server_manages_conversation: bool = False,
 ) -> SingleStepResult:
     """Run one turn of the loop, coordinating tools, approvals, guardrails, and handoffs."""
     public_agent = bindings.public_agent
@@ -603,6 +646,7 @@ async def execute_tools_and_side_effects(
             hooks=hooks,
             context_wrapper=context_wrapper,
             run_config=run_config,
+            server_manages_conversation=server_manages_conversation,
         )
 
     tool_final_output = await _maybe_finalize_from_tool_results(
@@ -679,6 +723,7 @@ async def resolve_interrupted_turn(
     hooks: RunHooks[TContext],
     context_wrapper: RunContextWrapper[TContext],
     run_config: RunConfig,
+    server_manages_conversation: bool = False,
     run_state: RunState | None = None,
     nest_handoff_history_fn: Callable[..., HandoffInputData] | None = None,
 ) -> SingleStepResult:
@@ -1337,6 +1382,7 @@ def _add_unmatched_pending(approval: ToolApprovalItem) -> None:
             hooks=hooks,
             context_wrapper=context_wrapper,
             run_config=run_config,
+            server_manages_conversation=server_manages_conversation,
             nest_handoff_history_fn=nest_history,
         )
 
@@ -1807,6 +1853,7 @@ async def get_single_step_result_from_response(
     context_wrapper: RunContextWrapper[TContext],
     run_config: RunConfig,
     tool_use_tracker,
+    server_manages_conversation: bool = False,
     event_queue: asyncio.Queue[StreamEvent | QueueCompleteSentinel] | None = None,
 ) -> SingleStepResult:
     item_agent = bindings.public_agent
@@ -1838,4 +1885,5 @@ async def get_single_step_result_from_response(
         hooks=hooks,
         context_wrapper=context_wrapper,
         run_config=run_config,
+        server_manages_conversation=server_manages_conversation,
     )
diff --git a/tests/test_agent_runner.py b/tests/test_agent_runner.py
@@ -147,6 +147,21 @@ async def run_execute_approved_tools(
     return generated_items
 
 
+async def _run_agent_with_optional_streaming(
+    agent: Agent[Any],
+    *,
+    input: str | list[TResponseInputItem],
+    streamed: bool,
+    **kwargs: Any,
+):
+    if streamed:
+        result = Runner.run_streamed(agent, input=input, **kwargs)
+        async for _ in result.stream_events():
+            pass
+        return result
+    return await Runner.run(agent, input=input, **kwargs)
+
+
 def test_set_default_agent_runner_roundtrip():
     runner = AgentRunner()
     set_default_agent_runner(runner)
@@ -1345,6 +1360,101 @@ async def test_opt_in_handoff_history_accumulates_across_multiple_handoffs():
     assert "user_question" in summary_content
 
 
+@pytest.mark.asyncio
+@pytest.mark.parametrize("streamed", [False, True], ids=["non_streamed", "streamed"])
+@pytest.mark.parametrize("nest_source", ["run_config", "handoff"], ids=["run_config", "handoff"])
+async def test_server_managed_handoff_history_auto_disables_with_warning(
+    streamed: bool,
+    nest_source: str,
+    caplog: pytest.LogCaptureFixture,
+) -> None:
+    triage_model = FakeModel()
+    delegate_model = FakeModel()
+    delegate = Agent(name="delegate", model=delegate_model)
+
+    run_config = RunConfig()
+    triage_handoffs: list[Agent[Any] | Handoff[Any, Any]]
+    if nest_source == "handoff":
+        triage_handoffs = [handoff(delegate, nest_handoff_history=True)]
+    else:
+        triage_handoffs = [delegate]
+        run_config = RunConfig(nest_handoff_history=True)
+
+    triage = Agent(name="triage", model=triage_model, handoffs=triage_handoffs)
+    triage_model.add_multiple_turn_outputs(
+        [[get_text_message("triage summary"), get_handoff_tool_call(delegate)]]
+    )
+    delegate_model.add_multiple_turn_outputs([[get_text_message("done")]])
+
+    with caplog.at_level("WARNING", logger="openai.agents"):
+        result = await _run_agent_with_optional_streaming(
+            triage,
+            input="user_message",
+            streamed=streamed,
+            run_config=run_config,
+            auto_previous_response_id=True,
+        )
+
+    assert result.final_output == "done"
+    assert "do not support nest_handoff_history" in caplog.text
+    assert delegate_model.first_turn_args is not None
+    delegate_input = delegate_model.first_turn_args["input"]
+    assert isinstance(delegate_input, list)
+    assert len(delegate_input) == 1
+    handoff_output = delegate_input[0]
+    assert handoff_output.get("type") == "function_call_output"
+    assert "delegate" in str(handoff_output.get("output"))
+    assert not any(
+        isinstance(item, dict)
+        and item.get("role") == "assistant"
+        and "<CONVERSATION HISTORY>" in str(item.get("content"))
+        for item in delegate_input
+    )
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("streamed", [False, True], ids=["non_streamed", "streamed"])
+@pytest.mark.parametrize("filter_source", ["run_config", "handoff"], ids=["run_config", "handoff"])
+async def test_server_managed_handoff_input_filters_still_raise(
+    streamed: bool,
+    filter_source: str,
+) -> None:
+    triage_model = FakeModel()
+    delegate_model = FakeModel()
+    delegate = Agent(name="delegate", model=delegate_model)
+
+    def passthrough_filter(data: HandoffInputData) -> HandoffInputData:
+        return data
+
+    run_config = RunConfig()
+    triage_handoffs: list[Agent[Any] | Handoff[Any, Any]]
+    if filter_source == "handoff":
+        triage_handoffs = [handoff(delegate, input_filter=passthrough_filter)]
+    else:
+        triage_handoffs = [delegate]
+        run_config = RunConfig(handoff_input_filter=passthrough_filter)
+
+    triage = Agent(name="triage", model=triage_model, handoffs=triage_handoffs)
+    triage_model.add_multiple_turn_outputs(
+        [[get_text_message("triage summary"), get_handoff_tool_call(delegate)]]
+    )
+    delegate_model.add_multiple_turn_outputs([[get_text_message("done")]])
+
+    with pytest.raises(
+        UserError,
+        match="Server-managed conversations do not support handoff input filters",
+    ):
+        await _run_agent_with_optional_streaming(
+            triage,
+            input="user_message",
+            streamed=streamed,
+            run_config=run_config,
+            auto_previous_response_id=True,
+        )
+
+    assert delegate_model.first_turn_args is None
+
+
 @pytest.mark.asyncio
 async def test_async_input_filter_supported():
     # DO NOT rename this without updating pyproject.toml
diff --git a/tests/test_run_impl_resume_paths.py b/tests/test_run_impl_resume_paths.py
@@ -232,6 +232,72 @@ async def fake_run_single_turn(**_kwargs):
     assert "function_call" in saved_types
 
 
+@pytest.mark.parametrize(
+    ("conversation_id", "previous_response_id", "auto_previous_response_id"),
+    [
+        ("conv_1", None, False),
+        (None, "resp_prev", False),
+        (None, None, True),
+    ],
+)
+@pytest.mark.asyncio
+async def test_resumed_interruption_passes_server_managed_conversation_flag(
+    monkeypatch: pytest.MonkeyPatch,
+    conversation_id: str | None,
+    previous_response_id: str | None,
+    auto_previous_response_id: bool,
+) -> None:
+    agent = Agent(name="resume-agent")
+    context_wrapper: RunContextWrapper[dict[str, str]] = RunContextWrapper(context={})
+    state = RunState(
+        context=context_wrapper,
+        original_input="input",
+        starting_agent=agent,
+        max_turns=1,
+        conversation_id=conversation_id,
+        previous_response_id=previous_response_id,
+        auto_previous_response_id=auto_previous_response_id,
+    )
+
+    state._current_step = NextStepInterruption(interruptions=[])
+    state._model_responses = [
+        ModelResponse(output=[], usage=Usage(), response_id="resp_1"),
+    ]
+    state._last_processed_response = ProcessedResponse(
+        new_items=[],
+        handoffs=[],
+        functions=[],
+        computer_actions=[],
+        local_shell_calls=[],
+        shell_calls=[],
+        apply_patch_calls=[],
+        tools_used=[],
+        mcp_approval_requests=[],
+        interruptions=[],
+    )
+    server_managed_values: list[bool] = []
+
+    async def fake_resolve_interrupted_turn(**kwargs: object) -> SingleStepResult:
+        server_managed_values.append(cast(bool, kwargs["server_manages_conversation"]))
+        return SingleStepResult(
+            original_input="input",
+            model_response=ModelResponse(output=[], usage=Usage(), response_id="resp_resume"),
+            pre_step_items=[],
+            new_step_items=[],
+            next_step=NextStepFinalOutput("done"),
+            tool_input_guardrail_results=[],
+            tool_output_guardrail_results=[],
+        )
+
+    monkeypatch.setattr(run_module, "resolve_interrupted_turn", fake_resolve_interrupted_turn)
+
+    runner = run_module.AgentRunner()
+    result = await runner.run(agent, state, run_config=RunConfig())
+
+    assert result.final_output == "done"
+    assert server_managed_values == [True]
+
+
 @pytest.mark.asyncio
 async def test_resumed_approval_does_not_duplicate_session_items() -> None:
     async def test_tool() -> str:

Original file line number	Diff line number	Diff line change
`@@ -837,6 +837,7 @@ def _finalize_result(result: RunResult) -> RunResult:`
`837`	`837`	`hooks=hooks,`
`838`	`838`	`context_wrapper=context_wrapper,`
`839`	`839`	`run_config=run_config,`
	`840`	`+ server_manages_conversation=server_conversation_tracker is not None,`
`840`	`841`	`run_state=run_state,`
`841`	`842`	`)`
`842`	`843`
Original file line number	Diff line number	Diff line change
`@@ -723,6 +723,7 @@ async def _save_stream_items_without_count(`
`723`	`723`	`hooks=hooks,`
`724`	`724`	`context_wrapper=context_wrapper,`
`725`	`725`	`run_config=run_config,`
	`726`	`+ server_manages_conversation=server_conversation_tracker is not None,`
`726`	`727`	`run_state=run_state,`
`727`	`728`	`)`
`728`	`729`
`@@ -1587,6 +1588,7 @@ async def rewind_model_request() -> None:`
`1587`	`1588`	`context_wrapper=context_wrapper,`
`1588`	`1589`	`run_config=run_config,`
`1589`	`1590`	`tool_use_tracker=tool_use_tracker,`
	`1591`	`+ server_manages_conversation=server_conversation_tracker is not None,`
`1590`	`1592`	`event_queue=streamed_result._event_queue,`
`1591`	`1593`	`)`
`1592`	`1594`
`@@ -1717,6 +1719,7 @@ async def run_single_turn(`
`1717`	`1719`	`context_wrapper=context_wrapper,`
`1718`	`1720`	`run_config=run_config,`
`1719`	`1721`	`tool_use_tracker=tool_use_tracker,`
	`1722`	`+ server_manages_conversation=server_conversation_tracker is not None,`
`1720`	`1723`	`)`
`1721`	`1724`
`1722`	`1725`