docs: update a few docs and code comments (#2564)

seratch · web-flow · commit ee674b8ecae3 · 2026-03-01T13:13:36.000+09:00
diff --git a/docs/agents.md b/docs/agents.md
@@ -211,7 +211,48 @@ agent = Agent[UserContext](
 
 ## Lifecycle events (hooks)
 
-Sometimes, you want to observe the lifecycle of an agent. For example, you may want to log events, or pre-fetch data when certain events occur. You can hook into the agent lifecycle with the `hooks` property. Subclass the [`AgentHooks`][agents.lifecycle.AgentHooks] class, and override the methods you're interested in.
+Sometimes, you want to observe the lifecycle of an agent. For example, you may want to log events, pre-fetch data, or record usage when certain events occur.
+
+There are two hook scopes:
+
+-   [`RunHooks`][agents.lifecycle.RunHooks] observe the entire `Runner.run(...)` invocation, including handoffs to other agents.
+-   [`AgentHooks`][agents.lifecycle.AgentHooks] are attached to a specific agent instance via `agent.hooks`.
+
+The callback context also changes depending on the event:
+
+-   Agent start/end hooks receive [`AgentHookContext`][agents.run_context.AgentHookContext], which wraps your original context and carries the shared run usage state.
+-   LLM, tool, and handoff hooks receive [`RunContextWrapper`][agents.run_context.RunContextWrapper].
+
+Typical hook timing:
+
+-   `on_agent_start` / `on_agent_end`: when a specific agent begins or finishes producing a final output.
+-   `on_llm_start` / `on_llm_end`: immediately around each model call.
+-   `on_tool_start` / `on_tool_end`: around each local tool invocation.
+-   `on_handoff`: when control moves from one agent to another.
+
+Use `RunHooks` when you want a single observer for the whole workflow, and `AgentHooks` when one agent needs custom side effects.
+
+```python
+from agents import Agent, RunHooks, Runner
+
+
+class LoggingHooks(RunHooks):
+    async def on_agent_start(self, context, agent):
+        print(f"Starting {agent.name}")
+
+    async def on_llm_end(self, context, agent, response):
+        print(f"{agent.name} produced {len(response.output)} output items")
+
+    async def on_agent_end(self, context, agent, output):
+        print(f"{agent.name} finished with usage: {context.usage}")
+
+
+agent = Agent(name="Assistant", instructions="Be concise.")
+result = await Runner.run(agent, "Explain quines", hooks=LoggingHooks())
+print(result.final_output)
+```
+
+For the full callback surface, see the [Lifecycle API reference](ref/lifecycle.md).
 
 ## Guardrails
 
diff --git a/docs/mcp.md b/docs/mcp.md
@@ -339,6 +339,7 @@ async with MCPServerStdio(
 ## 5. MCP server manager
 
 When you have multiple MCP servers, use `MCPServerManager` to connect them up front and expose the connected subset to your agents.
+See the [MCPServerManager API reference](ref/mcp/manager.md) for constructor options and reconnect behavior.
 
 ```python
 from agents import Agent, Runner
diff --git a/docs/tools.md b/docs/tools.md
@@ -673,10 +673,9 @@ Disabled tools are completely hidden from the LLM at runtime, making this useful
 
 ## Experimental: Codex tool
 
-The `codex_tool` wraps the Codex CLI so an agent can run workspace-scoped tasks (shell, file edits, MCP tools)
-during a tool call. This surface is experimental and may change.
-By default, the tool name is `codex`. If you set a custom name, it must be `codex` or start with `codex_`.
-When an agent includes multiple Codex tools, each must use a unique name (including vs non-Codex tools).
+The `codex_tool` wraps the Codex CLI so an agent can run workspace-scoped tasks (shell, file edits, MCP tools) during a tool call. This surface is experimental and may change.
+
+Use it when you want the main agent to delegate a bounded workspace task to Codex without leaving the current run. By default, the tool name is `codex`. If you set a custom name, it must be `codex` or start with `codex_`. When an agent includes multiple Codex tools, each must use a unique name.
 
 ```python
 from agents import Agent
@@ -705,21 +704,33 @@ agent = Agent(
 )
 ```
 
-What to know:
+Start with these option groups:
+
+-   Execution surface: `sandbox_mode` and `working_directory` define where Codex can operate. Pair them together, and set `skip_git_repo_check=True` when the working directory is not inside a Git repository.
+-   Thread defaults: `default_thread_options=ThreadOptions(...)` configures the model, reasoning effort, approval policy, additional directories, network access, and web search mode. Prefer `web_search_mode` over the legacy `web_search_enabled`.
+-   Turn defaults: `default_turn_options=TurnOptions(...)` configures per-turn behavior such as `idle_timeout_seconds` and the optional cancellation `signal`.
+-   Tool I/O: tool calls must include at least one `inputs` item with `{ "type": "text", "text": ... }` or `{ "type": "local_image", "path": ... }`. `output_schema` lets you require structured Codex responses.
+
+Thread reuse and persistence are separate controls:
+
+-   `persist_session=True` reuses one Codex thread for repeated calls to the same tool instance.
+-   `use_run_context_thread_id=True` stores and reuses the thread ID in run context across runs that share the same mutable context object.
+-   Thread ID precedence is: per-call `thread_id`, then run-context thread ID (if enabled), then the configured `thread_id` option.
+-   The default run-context key is `codex_thread_id` for `name="codex"` and `codex_thread_id_<suffix>` for `name="codex_<suffix>"`. Override it with `run_context_thread_id_key`.
+
+Runtime configuration:
 
 -   Auth: set `CODEX_API_KEY` (preferred) or `OPENAI_API_KEY`, or pass `codex_options={"api_key": "..."}`.
 -   Runtime: `codex_options.base_url` overrides the CLI base URL.
 -   Binary resolution: set `codex_options.codex_path_override` (or `CODEX_PATH`) to pin the CLI path. Otherwise the SDK resolves `codex` from `PATH`, then falls back to the bundled vendor binary.
 -   Environment: `codex_options.env` fully controls the subprocess environment. When it is provided, the subprocess does not inherit `os.environ`.
 -   Stream limits: `codex_options.codex_subprocess_stream_limit_bytes` (or `OPENAI_AGENTS_CODEX_SUBPROCESS_STREAM_LIMIT_BYTES`) controls stdout/stderr reader limits. Valid range is `65536` to `67108864`; default is `8388608`.
--   Inputs: tool calls must include at least one item in `inputs` with `{ "type": "text", "text": ... }` or `{ "type": "local_image", "path": ... }`.
--   Thread defaults: configure `default_thread_options` for `model_reasoning_effort`, `web_search_mode` (preferred over legacy `web_search_enabled`), `approval_policy`, and `additional_directories`.
--   Turn defaults: configure `default_turn_options` for `idle_timeout_seconds` and cancellation `signal`.
--   Safety: pair `sandbox_mode` with `working_directory`; set `skip_git_repo_check=True` outside Git repos.
--   Run-context thread persistence: `use_run_context_thread_id=True` stores and reuses `thread_id` in run context, across runs that share that context. This requires a mutable run context (for example, `dict` or a writable object field).
--   Run-context key defaults: the stored key defaults to `codex_thread_id` for `name="codex"`, or `codex_thread_id_<suffix>` for `name="codex_<suffix>"`. Set `run_context_thread_id_key` to override.
--   Thread ID precedence: per-call `thread_id` input takes priority, then run-context `thread_id` (if enabled), then the configured `thread_id` option.
 -   Streaming: `on_stream` receives thread/turn lifecycle events and item events (`reasoning`, `command_execution`, `mcp_tool_call`, `file_change`, `web_search`, `todo_list`, and `error` item updates).
 -   Outputs: results include `response`, `usage`, and `thread_id`; usage is added to `RunContextWrapper.usage`.
--   Structure: `output_schema` enforces structured Codex responses when you need typed outputs.
+
+Reference:
+
+-   [Codex tool API reference](ref/extensions/experimental/codex/codex_tool.md)
+-   [ThreadOptions reference](ref/extensions/experimental/codex/thread_options.md)
+-   [TurnOptions reference](ref/extensions/experimental/codex/turn_options.md)
 -   See `examples/tools/codex.py` and `examples/tools/codex_same_thread.py` for complete runnable samples.
diff --git a/src/agents/run_internal/oai_conversation.py b/src/agents/run_internal/oai_conversation.py
@@ -40,20 +40,39 @@ def _fingerprint_for_tracker(item: Any) -> str | None:
 
 @dataclass
 class OpenAIServerConversationTracker:
-    """Track server-side conversation state for conversation-aware runs."""
+    """Track server-side conversation state for conversation-aware runs.
+
+    This tracker keeps three complementary views of what has already been acknowledged:
+
+    - Object identity for prepared items in the current Python process.
+    - Stable server item IDs and tool call IDs returned by the provider.
+    - Content fingerprints for retry/resume paths where object identity changes.
+
+    The runner uses these sets together to decide which deltas are still safe to send when a
+    run is resumed, retried after a transient failure, or rebuilt from serialized RunState.
+    """
 
     conversation_id: str | None = None
     previous_response_id: str | None = None
     auto_previous_response_id: bool = False
+
+    # In-process object identity for items that have already been delivered or acknowledged.
     sent_items: set[int] = field(default_factory=set)
     server_items: set[int] = field(default_factory=set)
+
+    # Stable provider identifiers returned by the Responses API.
     server_item_ids: set[str] = field(default_factory=set)
     server_tool_call_ids: set[str] = field(default_factory=set)
+
+    # Content-based dedupe for resume/retry paths where objects are reconstructed.
     sent_item_fingerprints: set[str] = field(default_factory=set)
     sent_initial_input: bool = False
     remaining_initial_input: list[TResponseInputItem] | None = None
     primed_from_state: bool = False
     reasoning_item_id_policy: ReasoningItemIdPolicy | None = None
+
+    # Mapping from normalized prepared items back to their original source objects so that
+    # mark_input_as_sent() can mark the right object identities after the model call succeeds.
     prepared_item_sources: dict[int, TResponseInputItem] = field(default_factory=dict)
     prepared_item_sources_by_fingerprint: dict[str, list[TResponseInputItem]] = field(
         default_factory=dict
@@ -75,7 +94,13 @@ def hydrate_from_state(
         model_responses: list[ModelResponse],
         session_items: list[TResponseInputItem] | None = None,
     ) -> None:
-        """Seed tracking from prior state so resumed runs do not replay already-sent content."""
+        """Seed tracking from prior state so resumed runs do not replay already-sent content.
+
+        This reconstructs the tracker from the original input, saved model responses, generated
+        run items, and optional session history. After hydration, retry logic can treat rebuilt
+        items as already acknowledged even though their Python object identities may differ from
+        the original run.
+        """
         if self.sent_initial_input:
             return
 
diff --git a/src/agents/run_internal/session_persistence.py b/src/agents/run_internal/session_persistence.py
@@ -59,9 +59,19 @@ async def prepare_input_with_session(
     include_history_in_prepared_input: bool = True,
     preserve_dropped_new_items: bool = False,
 ) -> tuple[str | list[TResponseInputItem], list[TResponseInputItem]]:
-    """
-    Prepare input by combining it with session history and applying the optional input callback.
-    Returns the prepared input plus the appended items that should be persisted separately.
+    """Prepare model input from session history plus the new turn input.
+
+    Returns a tuple of:
+
+    1. The prepared input that should be sent to the model after normalization and dedupe.
+    2. The subset of items that should be appended to the session store for this turn.
+
+    The second value is intentionally not "everything returned by the callback". When a
+    ``session_input_callback`` reorders or filters history, we still need to persist only the
+    items that belong to the new turn. This function therefore compares the callback output
+    against deep-copied history and new-input lists, first by object identity and then by
+    content frequency, so retries and custom merge strategies do not accidentally re-persist
+    old history as fresh input.
     """
 
     if session is None:
@@ -102,6 +112,9 @@ async def prepare_input_with_session(
         if not isinstance(combined, list):
             raise UserError("Session input callback must return a list of input items.")
 
+        # The callback may reorder, drop, or duplicate items. Keep separate reference maps for
+        # the copied history and copied new-input lists so we can reconstruct which output items
+        # belong to the new turn and therefore still need to be persisted.
         history_refs = _build_reference_map(history_for_callback)
         new_refs = _build_reference_map(new_items_for_callback)
         history_counts = _build_frequency_map(history_for_callback)
@@ -135,6 +148,8 @@ async def prepare_input_with_session(
         else:
             prepared_items_raw = new_items_for_callback if preserve_dropped_new_items else []
 
+    # Normalize exactly as the runtime does elsewhere so the prepared model input and the
+    # persisted session items are derived from the same item shape and dedupe rules.
     prepared_as_inputs = [ensure_input_item_format(item) for item in prepared_items_raw]
     filtered = drop_orphan_function_calls(prepared_as_inputs)
     normalized = normalize_input_items_for_api(filtered)
diff --git a/src/agents/run_state.py b/src/agents/run_state.py
@@ -117,7 +117,19 @@
 
 @dataclass
 class RunState(Generic[TContext, TAgent]):
-    """Serializable snapshot of an agent run, including context, usage, and interruptions."""
+    """Serializable snapshot of an agent run, including context, usage, and interruptions.
+
+    ``RunState`` is the durable pause/resume boundary for human-in-the-loop flows. It stores
+    enough information to continue an interrupted run, including model responses, generated
+    items, approval state, and optional server-managed conversation identifiers.
+
+    Context serialization is intentionally conservative:
+
+    - Mapping contexts round-trip directly.
+    - Custom contexts may require a serializer and deserializer.
+    - When no safe serializer is available, the snapshot is still written but emits warnings and
+      records metadata describing what is required to rebuild the original context type.
+    """
 
     _current_turn: int = 0
     """Current turn number in the conversation."""
@@ -297,7 +309,13 @@ def _serialize_context_payload(
         context_serializer: ContextSerializer | None = None,
         strict_context: bool = False,
     ) -> tuple[dict[str, Any] | None, dict[str, Any]]:
-        """Validate and serialize the stored run context."""
+        """Validate and serialize the stored run context.
+
+        The returned metadata captures how the context was serialized so restore-time code can
+        decide whether a deserializer or override is required. This lets RunState remain durable
+        for simple mapping contexts without silently pretending that richer custom objects can be
+        reconstructed automatically.
+        """
         if self._context is None:
             return None, _build_context_meta(
                 None,
@@ -1906,7 +1924,18 @@ async def _build_run_state_from_json(
     context_deserializer: ContextDeserializer | None = None,
     strict_context: bool = False,
 ) -> RunState[Any, Agent[Any]]:
-    """Shared helper to rebuild RunState from JSON payload."""
+    """Shared helper to rebuild RunState from JSON payload.
+
+    Context restoration follows this precedence order:
+
+    1. ``context_override`` when supplied.
+    2. ``context_deserializer`` applied to serialized mapping data.
+    3. Direct mapping restore for contexts that were serialized as plain mappings.
+
+    When the snapshot metadata indicates that the original context type could not round-trip
+    safely, this function warns or raises (in ``strict_context`` mode) rather than silently
+    claiming that the rebuilt mapping is equivalent to the original object.
+    """
     schema_version = state_json.get("$schemaVersion")
     if not schema_version:
         raise UserError("Run state is missing schema version")