fix review comments

seratch · seratch · commit d33cc1e75d53 · 2026-04-08T20:47:12.000+09:00
diff --git a/src/agents/memory/openai_responses_compaction_session.py b/src/agents/memory/openai_responses_compaction_session.py
@@ -28,6 +28,14 @@
 OpenAIResponsesCompactionMode = Literal["previous_response_id", "input", "auto"]
 
 
+def _is_user_message_item(item: TResponseInputItem) -> bool:
+    if not isinstance(item, dict):
+        return False
+    if item.get("type") == "message":
+        return item.get("role") == "user"
+    return item.get("role") == "user" and "content" in item
+
+
 def select_compaction_candidate_items(
     items: list[TResponseInputItem],
 ) -> list[TResponseInputItem]:
@@ -36,18 +44,12 @@ def select_compaction_candidate_items(
     Excludes user messages and compaction items.
     """
 
-    def _is_user_message(item: TResponseInputItem) -> bool:
-        if not isinstance(item, dict):
-            return False
-        if item.get("type") == "message":
-            return item.get("role") == "user"
-        return item.get("role") == "user" and "content" in item
-
     return [
         item
         for item in items
         if not (
-            _is_user_message(item) or (isinstance(item, dict) and item.get("type") == "compaction")
+            _is_user_message_item(item)
+            or (isinstance(item, dict) and item.get("type") == "compaction")
         )
     ]
 
@@ -273,12 +275,12 @@ async def run_compaction(self, args: OpenAIResponsesCompactionArgs | None = None
             )
             return
 
-        unresolved_function_calls = _find_unresolved_function_calls_without_results(session_items)
-        if unresolved_function_calls:
+        frontier_unresolved_function_calls = _find_frontier_unresolved_function_calls(session_items)
+        if frontier_unresolved_function_calls:
             logger.debug(
                 "compact: blocked unresolved function calls for %s: %s",
                 self._response_id,
-                unresolved_function_calls,
+                frontier_unresolved_function_calls,
             )
             return
 
@@ -476,12 +478,19 @@ def _normalize_compaction_session_items(
 _ResolvedCompactionMode = Literal["previous_response_id", "input"]
 
 
-def _find_unresolved_function_calls_without_results(items: list[TResponseInputItem]) -> list[str]:
-    """Return function-call ids that do not yet have matching outputs."""
-    function_calls: dict[str, TResponseInputItem] = {}
+def _find_frontier_unresolved_function_calls(items: list[TResponseInputItem]) -> list[str]:
+    """Return unresolved function-call ids that remain in the active conversation frontier.
+
+    Once a later user message appears, earlier unresolved tool calls are considered abandoned and
+    should no longer block future compaction for the session.
+    """
+    function_call_indices: dict[str, int] = {}
     resolved_call_ids: set[str] = set()
+    last_user_message_index = -1
 
-    for item in items:
+    for index, item in enumerate(items):
+        if _is_user_message_item(item):
+            last_user_message_index = index
         if isinstance(item, dict):
             item_type = item.get("type")
             call_id = item.get("call_id")
@@ -492,11 +501,15 @@ def _find_unresolved_function_calls_without_results(items: list[TResponseInputIt
         if not isinstance(call_id, str):
             continue
         if item_type == "function_call":
-            function_calls[call_id] = item
+            function_call_indices[call_id] = index
         elif item_type == "function_call_output":
             resolved_call_ids.add(call_id)
 
-    return [call_id for call_id in function_calls if call_id not in resolved_call_ids]
+    return [
+        call_id
+        for call_id, index in function_call_indices.items()
+        if call_id not in resolved_call_ids and index > last_user_message_index
+    ]
 
 
 def _resolve_compaction_mode(
diff --git a/src/agents/result.py b/src/agents/result.py
@@ -107,12 +107,36 @@ def _populate_state_from_result(
     if trace_state is None:
         trace_state = TraceState.from_trace(getattr(result, "trace", None))
     state._trace_state = copy.deepcopy(trace_state) if trace_state else None
-    state._trace_include_sensitive_data = getattr(
-        source_state,
-        "_trace_include_sensitive_data",
-        True,
+    trace_include_sensitive_data_snapshot = getattr(
+        result,
+        "_trace_include_sensitive_data_snapshot",
+        None,
     )
-    if isinstance(source_state, RunState):
+    if trace_include_sensitive_data_snapshot is not None:
+        state._trace_include_sensitive_data = trace_include_sensitive_data_snapshot
+    else:
+        state._trace_include_sensitive_data = getattr(
+            source_state,
+            "_trace_include_sensitive_data",
+            True,
+        )
+
+    session_history_mutations_snapshot = getattr(
+        result,
+        "_session_history_mutations_snapshot",
+        None,
+    )
+    execution_only_approval_override_call_ids_snapshot = getattr(
+        result,
+        "_execution_only_approval_override_call_ids_snapshot",
+        None,
+    )
+    if session_history_mutations_snapshot is not None:
+        state._session_history_mutations = copy.deepcopy(session_history_mutations_snapshot)
+        state._execution_only_approval_override_call_ids = list(
+            execution_only_approval_override_call_ids_snapshot or []
+        )
+    elif isinstance(source_state, RunState):
         state._session_history_mutations = source_state.get_session_history_mutations()
         state._execution_only_approval_override_call_ids = list(
             source_state._execution_only_approval_override_call_ids
@@ -332,6 +356,15 @@ class RunResult(RunResultBase):
     to preserve the correct originalInput when serializing state."""
     _state: Any = field(default=None, repr=False)
     """Internal reference to the originating RunState when available."""
+    _trace_include_sensitive_data_snapshot: bool | None = field(default=None, repr=False)
+    """Snapshot of the trace redaction setting used when rebuilding state from a completed
+    result."""
+    _session_history_mutations_snapshot: list[Any] | None = field(default=None, repr=False)
+    """Snapshot of pending session-history rewrites needed by `to_state()`."""
+    _execution_only_approval_override_call_ids_snapshot: list[str] | None = field(
+        default=None, repr=False
+    )
+    """Snapshot of execution-only approval overrides needed by `to_state()`."""
     _conversation_id: str | None = field(default=None, repr=False)
     """Conversation identifier for server-managed runs."""
     _previous_response_id: str | None = field(default=None, repr=False)
diff --git a/src/agents/run_internal/agent_runner_helpers.py b/src/agents/run_internal/agent_runner_helpers.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import copy
 from typing import Any, cast
 
 from ..agent import Agent
@@ -185,9 +186,16 @@ def resolve_trace_include_sensitive_data(
     run_config: RunConfig,
     run_config_was_supplied: bool,
 ) -> bool:
-    """Resolve whether traces may include sensitive data for this run."""
-    if run_state is None or run_config_was_supplied:
+    """Resolve whether traces may include sensitive data for this run.
+
+    Resumed runs preserve the stored setting unless the new RunConfig explicitly narrows it by
+    setting `trace_include_sensitive_data=False`.
+    """
+    del run_config_was_supplied
+    if run_state is None:
         return run_config.trace_include_sensitive_data
+    if run_config.trace_include_sensitive_data is False:
+        return False
     return run_state._trace_include_sensitive_data
 
 
@@ -295,9 +303,15 @@ def attach_run_state_metadata(result: RunResult, *, run_state: RunState | None)
     if run_state is None:
         return result
 
-    result._state = run_state
     result._current_turn_persisted_item_count = run_state._current_turn_persisted_item_count
     result._trace_state = run_state._trace_state
+    result._trace_include_sensitive_data_snapshot = run_state._trace_include_sensitive_data
+    result._session_history_mutations_snapshot = copy.deepcopy(
+        run_state.get_session_history_mutations()
+    )
+    result._execution_only_approval_override_call_ids_snapshot = list(
+        run_state._execution_only_approval_override_call_ids
+    )
     return result
 
 
diff --git a/tests/memory/test_openai_responses_compaction_session.py b/tests/memory/test_openai_responses_compaction_session.py
@@ -623,6 +623,81 @@ async def test_run_compaction_auto_uses_default_store_when_unset(self) -> None:
         assert second_kwargs.get("previous_response_id") == "resp-stored"
         assert "input" not in second_kwargs
 
+    @pytest.mark.asyncio
+    async def test_run_compaction_ignores_abandoned_unresolved_function_calls(self) -> None:
+        mock_session = self.create_mock_session()
+        items: list[TResponseInputItem] = [
+            cast(TResponseInputItem, {"type": "message", "role": "user", "content": "first"}),
+            cast(
+                TResponseInputItem,
+                {
+                    "type": "function_call",
+                    "call_id": "call-abandoned",
+                    "id": "fc_1",
+                    "name": "test_tool",
+                    "arguments": "{}",
+                },
+            ),
+            cast(TResponseInputItem, {"type": "message", "role": "user", "content": "followup"}),
+            cast(
+                TResponseInputItem,
+                {"type": "message", "role": "assistant", "content": "latest response"},
+            ),
+        ]
+        mock_session.get_items.return_value = items
+
+        mock_compact_response = MagicMock()
+        mock_compact_response.output = []
+
+        mock_client = MagicMock()
+        mock_client.responses.compact = AsyncMock(return_value=mock_compact_response)
+
+        session = OpenAIResponsesCompactionSession(
+            session_id="test",
+            underlying_session=mock_session,
+            client=mock_client,
+            compaction_mode="auto",
+        )
+
+        await session.run_compaction({"response_id": "resp-latest", "force": True})
+
+        mock_client.responses.compact.assert_called_once_with(
+            previous_response_id="resp-latest",
+            model="gpt-4.1",
+        )
+
+    @pytest.mark.asyncio
+    async def test_run_compaction_still_blocks_active_unresolved_function_calls(self) -> None:
+        mock_session = self.create_mock_session()
+        items: list[TResponseInputItem] = [
+            cast(TResponseInputItem, {"type": "message", "role": "user", "content": "hello"}),
+            cast(
+                TResponseInputItem,
+                {
+                    "type": "function_call",
+                    "call_id": "call-pending",
+                    "id": "fc_1",
+                    "name": "test_tool",
+                    "arguments": "{}",
+                },
+            ),
+        ]
+        mock_session.get_items.return_value = items
+
+        mock_client = MagicMock()
+        mock_client.responses.compact = AsyncMock()
+
+        session = OpenAIResponsesCompactionSession(
+            session_id="test",
+            underlying_session=mock_session,
+            client=mock_client,
+            compaction_mode="auto",
+        )
+
+        await session.run_compaction({"response_id": "resp-pending", "force": True})
+
+        mock_client.responses.compact.assert_not_called()
+
     @pytest.mark.asyncio
     async def test_run_compaction_auto_uses_input_when_last_response_unstored(self) -> None:
         mock_session = self.create_mock_session()
diff --git a/tests/test_agent_tracing.py b/tests/test_agent_tracing.py
@@ -410,6 +410,45 @@ def send_email(recipient: str) -> str:
     assert function_span["span_data"]["output"] is None
 
 
+@pytest.mark.asyncio
+async def test_resumed_run_preserves_sensitive_trace_flag_for_unrelated_run_config() -> None:
+    model = FakeModel()
+
+    @function_tool(name_override="send_email", needs_approval=True)
+    def send_email(recipient: str) -> str:
+        return recipient
+
+    agent = Agent(name="trace_agent", model=model, tools=[send_email])
+    model.add_multiple_turn_outputs(
+        [
+            [
+                get_function_tool_call(
+                    "send_email", '{"recipient":"alice@example.com"}', call_id="call-1"
+                )
+            ],
+            [get_text_message("done")],
+        ]
+    )
+
+    first = await Runner.run(agent, input="first_test")
+    assert first.interruptions
+
+    state = first.to_state()
+    state.set_trace_include_sensitive_data(False)
+    state.approve(first.interruptions[0], override_arguments={"recipient": "bob@example.com"})
+
+    resumed = await Runner.run(
+        agent,
+        state,
+        run_config=RunConfig(workflow_name="override_workflow"),
+    )
+
+    assert resumed.final_output == "done"
+    function_span = _get_last_function_span_export("send_email")
+    assert function_span["span_data"]["input"] is None
+    assert function_span["span_data"]["output"] is None
+
+
 @pytest.mark.asyncio
 async def test_wrapped_trace_is_single_trace():
     model = FakeModel()
diff --git a/tests/test_result_cast.py b/tests/test_result_cast.py
@@ -15,12 +15,16 @@
     MessageOutputItem,
     RunContextWrapper,
     RunItem,
+    Runner,
     RunResult,
     RunResultStreaming,
 )
 from agents.exceptions import AgentsException
 from agents.tool_context import ToolContext
 
+from .fake_model import FakeModel
+from .test_responses import get_text_message
+
 
 def create_run_result(
     final_output: Any | None,
@@ -261,6 +265,25 @@ def test_run_result_streaming_release_agents_releases_current_agent() -> None:
         _ = streaming_result.last_agent
 
 
+@pytest.mark.asyncio
+async def test_runner_result_does_not_retain_live_run_state() -> None:
+    agent = Agent(
+        name="runner-result-agent",
+        model=FakeModel(initial_output=[get_text_message("done")]),
+    )
+
+    result = await Runner.run(agent, "hello")
+
+    assert result._state is None
+
+    agent_ref = weakref.ref(agent)
+    result.release_agents()
+    del agent
+    gc.collect()
+
+    assert agent_ref() is None
+
+
 def test_run_result_agent_tool_invocation_returns_none_for_plain_context() -> None:
     result = create_run_result("ok")