test: improve coverage for tracing and runtime helpers (#2635)

seratch · web-flow · commit fc6afebc48d5 · 2026-03-08T02:22:05.000Z
diff --git a/tests/test_agent_runner.py b/tests/test_agent_runner.py
@@ -62,9 +62,11 @@
 from agents.run_internal.run_loop import get_new_response
 from agents.run_internal.run_steps import NextStepFinalOutput, SingleStepResult
 from agents.run_internal.session_persistence import (
+    persist_session_items_for_guardrail_trip,
     prepare_input_with_session,
     rewind_session_items,
     save_result_to_session,
+    wait_for_session_cleanup,
 )
 from agents.run_internal.tool_execution import execute_approved_tools
 from agents.run_internal.tool_use_tracker import AgentToolUseTracker
@@ -1889,6 +1891,107 @@ def callback(history: list[TResponseInputItem], new_input: list[TResponseInputIt
         await prepare_input_with_session("hello", session, cast(Any, callback))
 
 
+@pytest.mark.asyncio
+async def test_prepare_input_with_session_matches_copied_items_by_content() -> None:
+    history_item = cast(TResponseInputItem, {"role": "user", "content": "history"})
+    session = SimpleListSession(history=[history_item])
+
+    def callback(
+        history: list[TResponseInputItem], new_input: list[TResponseInputItem]
+    ) -> list[TResponseInputItem]:
+        return [
+            cast(TResponseInputItem, dict(cast(dict[str, Any], history[0]))),
+            cast(TResponseInputItem, dict(cast(dict[str, Any], new_input[0]))),
+        ]
+
+    prepared, session_items = await prepare_input_with_session("new", session, callback)
+
+    assert [cast(dict[str, Any], item).get("content") for item in prepared] == [
+        "history",
+        "new",
+    ]
+    assert [cast(dict[str, Any], item).get("content") for item in session_items] == ["new"]
+
+
+@pytest.mark.asyncio
+async def test_persist_session_items_for_guardrail_trip_uses_original_input_when_missing() -> None:
+    session = SimpleListSession()
+    agent = Agent(name="agent", model=FakeModel())
+    run_state: RunState[Any] = RunState(
+        context=RunContextWrapper(context={}),
+        original_input="input",
+        starting_agent=agent,
+        max_turns=1,
+    )
+
+    persisted = await persist_session_items_for_guardrail_trip(
+        session,
+        None,
+        None,
+        "guardrail input",
+        run_state,
+    )
+
+    assert persisted == [{"role": "user", "content": "guardrail input"}]
+    assert await session.get_items() == persisted
+
+
+@pytest.mark.asyncio
+async def test_wait_for_session_cleanup_retries_after_get_items_error(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    target = cast(TResponseInputItem, {"id": "msg-1", "type": "message", "content": "hello"})
+    serialized_target = fingerprint_input_item(target)
+
+    class FlakyCleanupSession(SimpleListSession):
+        def __init__(self) -> None:
+            super().__init__()
+            self.get_items_calls = 0
+
+        async def get_items(self, limit: int | None = None) -> list[TResponseInputItem]:
+            self.get_items_calls += 1
+            if self.get_items_calls == 1:
+                raise RuntimeError("temporary failure")
+            return []
+
+    session = FlakyCleanupSession()
+    sleeps: list[float] = []
+
+    async def fake_sleep(delay: float) -> None:
+        sleeps.append(delay)
+
+    monkeypatch.setattr(asyncio, "sleep", fake_sleep)
+
+    assert serialized_target is not None
+    await wait_for_session_cleanup(session, [serialized_target])
+
+    assert session.get_items_calls == 2
+    assert sleeps == [0.1]
+
+
+@pytest.mark.asyncio
+async def test_wait_for_session_cleanup_logs_when_targets_linger(
+    monkeypatch: pytest.MonkeyPatch,
+    caplog: pytest.LogCaptureFixture,
+) -> None:
+    target = cast(TResponseInputItem, {"id": "msg-1", "type": "message", "content": "hello"})
+    session = SimpleListSession(history=[target])
+    serialized_target = fingerprint_input_item(target)
+    sleeps: list[float] = []
+
+    async def fake_sleep(delay: float) -> None:
+        sleeps.append(delay)
+
+    monkeypatch.setattr(asyncio, "sleep", fake_sleep)
+
+    assert serialized_target is not None
+    with caplog.at_level("DEBUG", logger="openai.agents"):
+        await wait_for_session_cleanup(session, [serialized_target], max_attempts=2)
+
+    assert sleeps == [0.1, 0.2]
+    assert "Session cleanup verification exhausted attempts" in caplog.text
+
+
 @pytest.mark.asyncio
 async def test_conversation_lock_rewind_skips_when_no_snapshot() -> None:
     history_item = cast(TResponseInputItem, {"id": "old", "type": "message"})
diff --git a/tests/test_agent_tool_state.py b/tests/test_agent_tool_state.py
@@ -1,9 +1,25 @@
 from __future__ import annotations
 
+import gc
+import weakref
+from types import SimpleNamespace
+from typing import Any, cast
+
 import pytest
+from openai.types.responses import ResponseFunctionToolCall
 
 import agents.agent_tool_state as tool_state
 
+from .test_responses import get_function_tool_call
+
+
+@pytest.fixture(autouse=True)
+def reset_tool_state_globals(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.setattr(tool_state, "_agent_tool_run_results_by_obj", {})
+    monkeypatch.setattr(tool_state, "_agent_tool_run_results_by_signature", {})
+    monkeypatch.setattr(tool_state, "_agent_tool_run_result_signature_by_obj", {})
+    monkeypatch.setattr(tool_state, "_agent_tool_call_refs_by_obj", {})
+
 
 def test_drop_agent_tool_run_result_handles_cleared_globals(
     monkeypatch: pytest.MonkeyPatch,
@@ -14,3 +30,73 @@ def test_drop_agent_tool_run_result_handles_cleared_globals(
 
     # Should not raise even if globals are cleared during interpreter shutdown.
     tool_state._drop_agent_tool_run_result(123)
+
+
+def test_agent_tool_state_scope_helpers_tolerate_missing_or_readonly_contexts() -> None:
+    context = SimpleNamespace()
+
+    tool_state.set_agent_tool_state_scope(None, "ignored")
+    tool_state.set_agent_tool_state_scope(context, "scope-1")
+    assert tool_state.get_agent_tool_state_scope(context) == "scope-1"
+
+    tool_state.set_agent_tool_state_scope(context, None)
+    assert tool_state.get_agent_tool_state_scope(context) is None
+
+    readonly_context = object()
+    tool_state.set_agent_tool_state_scope(readonly_context, "scope-2")
+    assert tool_state.get_agent_tool_state_scope(readonly_context) is None
+
+
+def _function_tool_call(name: str, arguments: str, *, call_id: str) -> ResponseFunctionToolCall:
+    tool_call = get_function_tool_call(name, arguments, call_id=call_id)
+    assert isinstance(tool_call, ResponseFunctionToolCall)
+    return tool_call
+
+
+def test_agent_tool_run_result_supports_signature_fallback_across_instances() -> None:
+    original_call = _function_tool_call("lookup_account", "{}", call_id="call-1")
+    restored_call = _function_tool_call("lookup_account", "{}", call_id="call-1")
+    run_result = cast(Any, object())
+
+    tool_state.record_agent_tool_run_result(original_call, run_result, scope_id="scope-1")
+
+    assert tool_state.peek_agent_tool_run_result(restored_call, scope_id="scope-1") is run_result
+    assert tool_state.consume_agent_tool_run_result(restored_call, scope_id="scope-1") is run_result
+    assert tool_state.peek_agent_tool_run_result(original_call, scope_id="scope-1") is None
+    assert tool_state._agent_tool_run_results_by_signature == {}
+
+
+def test_agent_tool_run_result_returns_none_for_ambiguous_signature_matches() -> None:
+    first_call = _function_tool_call("lookup_account", "{}", call_id="call-1")
+    second_call = _function_tool_call("lookup_account", "{}", call_id="call-1")
+    restored_call = _function_tool_call("lookup_account", "{}", call_id="call-1")
+    first_result = cast(Any, object())
+    second_result = cast(Any, object())
+
+    tool_state.record_agent_tool_run_result(first_call, first_result, scope_id="scope-1")
+    tool_state.record_agent_tool_run_result(second_call, second_result, scope_id="scope-1")
+
+    assert tool_state.peek_agent_tool_run_result(restored_call, scope_id="scope-1") is None
+    assert tool_state.consume_agent_tool_run_result(restored_call, scope_id="scope-1") is None
+
+    tool_state.drop_agent_tool_run_result(restored_call, scope_id="scope-1")
+
+    assert tool_state.peek_agent_tool_run_result(first_call, scope_id="scope-1") is first_result
+    assert tool_state.peek_agent_tool_run_result(second_call, scope_id="scope-1") is second_result
+    assert tool_state.peek_agent_tool_run_result(restored_call, scope_id="other-scope") is None
+
+
+def test_agent_tool_run_result_is_dropped_when_tool_call_is_collected() -> None:
+    tool_call = _function_tool_call("lookup_account", "{}", call_id="call-1")
+    tool_call_ref = weakref.ref(tool_call)
+    tool_call_obj_id = id(tool_call)
+
+    tool_state.record_agent_tool_run_result(tool_call, cast(Any, object()), scope_id="scope-1")
+
+    del tool_call
+    gc.collect()
+
+    assert tool_call_ref() is None
+    assert tool_call_obj_id not in tool_state._agent_tool_run_results_by_obj
+    assert tool_call_obj_id not in tool_state._agent_tool_run_result_signature_by_obj
+    assert tool_call_obj_id not in tool_state._agent_tool_call_refs_by_obj
diff --git a/tests/test_stream_events.py b/tests/test_stream_events.py
@@ -1,6 +1,6 @@
 import asyncio
 import time
-from typing import cast
+from typing import Any, cast
 
 import pytest
 from mcp import Tool as MCPTool
@@ -22,20 +22,33 @@
     ResponseReasoningSummaryTextDoneEvent,
     ResponseTextDeltaEvent,
     ResponseTextDoneEvent,
+    ResponseToolSearchCall,
+    ResponseToolSearchOutputItem,
+)
+from openai.types.responses.response_output_item import (
+    McpApprovalRequest,
+    McpListTools,
+    McpListToolsTool,
 )
 from openai.types.responses.response_reasoning_item import ResponseReasoningItem, Summary
 
 from agents import Agent, HandoffCallItem, Runner, function_tool
 from agents.extensions.handoff_filters import remove_all_tools
 from agents.handoffs import handoff
 from agents.items import (
+    MCPApprovalRequestItem,
+    MCPApprovalResponseItem,
+    MCPListToolsItem,
     MessageOutputItem,
     ReasoningItem,
+    RunItem,
+    ToolApprovalItem,
     ToolCallItem,
     ToolCallOutputItem,
     ToolSearchCallItem,
     ToolSearchOutputItem,
 )
+from agents.run_internal.streaming import stream_step_items_to_queue, stream_step_result_to_queue
 
 from .fake_model import FakeModel
 from .mcp.helpers import FakeMCPServer
@@ -48,6 +61,22 @@ def get_reasoning_item() -> ResponseReasoningItem:
     )
 
 
+def _make_hosted_mcp_list_tools(server_label: str, tool_name: str) -> McpListTools:
+    return McpListTools(
+        id=f"list_{server_label}",
+        server_label=server_label,
+        tools=[
+            McpListToolsTool(
+                name=tool_name,
+                input_schema={},
+                description="Search the docs.",
+                annotations={"title": "Search Docs"},
+            )
+        ],
+        type="mcp_list_tools",
+    )
+
+
 @function_tool
 async def foo() -> str:
     await asyncio.sleep(0)
@@ -130,6 +159,100 @@ async def test_stream_events_tool_called_includes_local_mcp_title() -> None:
     assert seen_tool_item.title == "Search Docs"
 
 
+def test_stream_step_items_to_queue_emits_helper_events_and_skips_approvals(
+    caplog: pytest.LogCaptureFixture,
+) -> None:
+    agent = Agent(name="StreamHelper")
+    queue: asyncio.Queue[Any] = asyncio.Queue()
+    request_item = McpApprovalRequest(
+        id="mcp-approval-1",
+        type="mcp_approval_request",
+        server_label="test-mcp-server",
+        arguments="{}",
+        name="search_docs",
+    )
+
+    items: list[RunItem] = [
+        ToolSearchCallItem(
+            agent=agent,
+            raw_item=ResponseToolSearchCall(
+                id="tsc_123",
+                type="tool_search_call",
+                arguments={"query": "docs"},
+                execution="client",
+                status="completed",
+            ),
+        ),
+        ToolSearchOutputItem(
+            agent=agent,
+            raw_item=ResponseToolSearchOutputItem(
+                id="tso_123",
+                type="tool_search_output",
+                execution="client",
+                status="completed",
+                tools=[],
+            ),
+        ),
+        MCPApprovalRequestItem(agent=agent, raw_item=request_item),
+        MCPApprovalResponseItem(
+            agent=agent,
+            raw_item=cast(
+                Any,
+                {
+                    "type": "mcp_approval_response",
+                    "approval_request_id": "mcp-approval-1",
+                    "approve": True,
+                },
+            ),
+        ),
+        MCPListToolsItem(
+            agent=agent,
+            raw_item=_make_hosted_mcp_list_tools("test-mcp-server", "search_docs"),
+        ),
+        ToolApprovalItem(
+            agent=agent,
+            raw_item={"type": "function_call", "call_id": "call-1", "name": "tool"},
+        ),
+        cast(Any, object()),
+    ]
+
+    with caplog.at_level("WARNING", logger="openai.agents"):
+        stream_step_items_to_queue(items, queue)
+
+    names = []
+    while not queue.empty():
+        event = queue.get_nowait()
+        names.append(event.name)
+
+    assert names == [
+        "tool_search_called",
+        "tool_search_output_created",
+        "mcp_approval_requested",
+        "mcp_approval_response",
+        "mcp_list_tools",
+    ]
+    assert "Unexpected item type" in caplog.text
+
+
+def test_stream_step_result_to_queue_uses_new_step_items() -> None:
+    agent = Agent(name="StreamHelper")
+    queue: asyncio.Queue[Any] = asyncio.Queue()
+
+    tool_search_item = ToolSearchCallItem(
+        agent=agent,
+        raw_item={
+            "type": "tool_search_call",
+            "queries": [{"search_term": "docs"}],
+        },
+    )
+    step_result = cast(Any, type("StepResult", (), {"new_step_items": [tool_search_item]})())
+
+    stream_step_result_to_queue(step_result, queue)
+
+    event = queue.get_nowait()
+    assert event.name == "tool_search_called"
+
+
 @pytest.mark.asyncio
 async def test_stream_events_main_with_handoff():
     @function_tool
diff --git a/tests/tracing/test_setup.py b/tests/tracing/test_setup.py