diff --git a/docs/running_agents.md b/docs/running_agents.md index f9cfa5e274..b5a52299c6 100644 --- a/docs/running_agents.md +++ b/docs/running_agents.md @@ -441,6 +441,31 @@ print(result.final_output) Set `include_in_history=False` when you do not want the fallback output appended to conversation history. +### Recovering from hallucinated tool calls + +Models occasionally call a tool name that was never registered on the agent (issue [#325](https://github.com/openai/openai-agents-python/issues/325)). By default the SDK raises `ModelBehaviorError` and the run ends, discarding prior work. Register a `"tool_not_found"` handler to turn that crash into a recoverable nudge: the handler returns a [`ToolNotFoundAction`][agents.ToolNotFoundAction] with a model-visible error message, the runner injects it as a synthetic tool output, and the model self-corrects on the next turn. Returning `None` (or not registering a handler) preserves the existing raise behavior. Recovery is bounded by the run's `max_turns`, so a model that keeps hallucinating still terminates. + +```python +from agents import Agent, Runner, ToolNotFoundAction, ToolNotFoundErrorHandlerInput + + +def on_tool_not_found(data: ToolNotFoundErrorHandlerInput[None]) -> ToolNotFoundAction: + return ToolNotFoundAction( + error_message=( + f"Tool {data.tool_name!r} does not exist. Available: {data.available_tools}." + ) + ) + + +result = Runner.run_sync( + agent, + "find me profiles related to Anthropic", + error_handlers={"tool_not_found": on_tool_not_found}, +) +``` + +See [`examples/basic/tool_not_found_handler.py`](https://github.com/openai/openai-agents-python/blob/main/examples/basic/tool_not_found_handler.py) for a full runnable example. + ## Durable execution integrations and human-in-the-loop For tool approval pause/resume patterns, start with the dedicated [Human-in-the-loop guide](human_in_the_loop.md). diff --git a/examples/basic/tool_not_found_handler.py b/examples/basic/tool_not_found_handler.py new file mode 100644 index 0000000000..ec2bd60504 --- /dev/null +++ b/examples/basic/tool_not_found_handler.py @@ -0,0 +1,140 @@ +"""Recovering from a model that calls a tool that doesn't exist. + +Large models occasionally "hallucinate" a tool name that isn't registered on the agent -- +for example they call ``search_linkedin`` when only ``search_web`` is available. Without a +handler, the SDK raises ``ModelBehaviorError`` and the entire run is lost. + +Registering a ``tool_not_found`` error handler lets you turn that crash into a recoverable +nudge: the handler returns a ``ToolNotFoundAction`` with an error message, the runner +injects that message as a synthetic tool output, and the model self-corrects on the next +turn. + +This example uses a tiny scripted ``Model`` subclass so it runs offline -- no API key +needed. See issue #325 for the real-world report that motivated this API. + + $ python examples/basic/tool_not_found_handler.py +""" + +from __future__ import annotations + +import asyncio +from collections.abc import AsyncIterator +from typing import Any + +from openai.types.responses import ResponseFunctionToolCall, ResponseOutputMessage + +from agents import ( + Agent, + ModelResponse, + Runner, + ToolNotFoundAction, + ToolNotFoundErrorHandlerInput, + Usage, + function_tool, +) +from agents.agent_output import AgentOutputSchemaBase +from agents.handoffs import Handoff +from agents.items import TResponseInputItem, TResponseStreamEvent +from agents.model_settings import ModelSettings +from agents.models.interface import Model, ModelTracing +from agents.tool import Tool + + +@function_tool +def search_web(query: str) -> str: + """The only real tool on the agent.""" + return f"results for: {query}" + + +class ScriptedModel(Model): + """Plays back a fixed script of model responses so the example runs offline.""" + + def __init__(self, scripted_outputs: list[list[Any]]) -> None: + self._outputs = list(scripted_outputs) + + async def get_response(self, *args: Any, **kwargs: Any) -> ModelResponse: + output = self._outputs.pop(0) if self._outputs else [] + return ModelResponse(output=output, usage=Usage(), response_id="scripted") + + def stream_response( # pragma: no cover - not exercised here + self, + system_instructions: str | None, + input: str | list[TResponseInputItem], + model_settings: ModelSettings, + tools: list[Tool], + output_schema: AgentOutputSchemaBase | None, + handoffs: list[Handoff], + tracing: ModelTracing, + *, + previous_response_id: str | None = None, + conversation_id: str | None = None, + prompt: Any | None = None, + ) -> AsyncIterator[TResponseStreamEvent]: + raise NotImplementedError("streaming not used in this example") + + +def on_tool_not_found(data: ToolNotFoundErrorHandlerInput[Any]) -> ToolNotFoundAction: + """Build a model-visible error so the model can pick a valid tool on its next step.""" + available = ", ".join(data.available_tools) or "(none)" + return ToolNotFoundAction( + error_message=( + f"Tool {data.tool_name!r} is not registered on this agent. " + f"Available tools: [{available}]. Pick one of those and try again." + ) + ) + + +async def main() -> None: + # Turn 1: the model hallucinates a tool that doesn't exist. + # Turn 2: after the handler injects the error, the model recovers with a final answer. + scripted_model = ScriptedModel( + [ + [ + ResponseFunctionToolCall( + id="call-1", + call_id="call-1", + type="function_call", + name="search_linkedin", # intentionally unknown + arguments='{"query": "Anthropic"}', + ) + ], + [ + ResponseOutputMessage.model_validate( + { + "id": "msg-1", + "type": "message", + "role": "assistant", + "status": "completed", + "content": [ + { + "type": "output_text", + "text": "Sorry, I used the wrong tool. Here's what I got from search_web instead.", + "annotations": [], + "logprobs": [], + } + ], + } + ) + ], + ] + ) + + agent = Agent( + name="recoverable_agent", + instructions="You are a helpful assistant.", + model=scripted_model, + tools=[search_web], + ) + + result = await Runner.run( + agent, + input="find me profiles related to Anthropic", + error_handlers={"tool_not_found": on_tool_not_found}, + ) + + print("Final output:") + print(result.final_output) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/src/agents/__init__.py b/src/agents/__init__.py index e3b34d244b..4e3023cf44 100644 --- a/src/agents/__init__.py +++ b/src/agents/__init__.py @@ -112,6 +112,9 @@ RunErrorHandlerInput, RunErrorHandlerResult, RunErrorHandlers, + ToolNotFoundAction, + ToolNotFoundErrorHandler, + ToolNotFoundErrorHandlerInput, ) from .run_state import RunState from .stream_events import ( @@ -420,6 +423,9 @@ def enable_verbose_stdout_logging(): "RunErrorHandlerInput", "RunErrorHandlerResult", "RunErrorHandlers", + "ToolNotFoundAction", + "ToolNotFoundErrorHandler", + "ToolNotFoundErrorHandlerInput", "AgentToolInvocation", "RunResult", "RunResultStreaming", diff --git a/src/agents/run.py b/src/agents/run.py index f116cc1fdd..ce8baae4e9 100644 --- a/src/agents/run.py +++ b/src/agents/run.py @@ -1194,6 +1194,8 @@ def _finalize_result(result: RunResult) -> RunResult: ), reasoning_item_id_policy=resolved_reasoning_item_id_policy, prompt_cache_key_resolver=prompt_cache_key_resolver, + error_handlers=error_handlers, + model_responses_so_far=model_responses, ) ) @@ -1249,6 +1251,8 @@ def _finalize_result(result: RunResult) -> RunResult: ), reasoning_item_id_policy=resolved_reasoning_item_id_policy, prompt_cache_key_resolver=prompt_cache_key_resolver, + error_handlers=error_handlers, + model_responses_so_far=model_responses, ) finally: attach_usage_to_span( diff --git a/src/agents/run_config.py b/src/agents/run_config.py index 7457706cfc..b5a35e55dd 100644 --- a/src/agents/run_config.py +++ b/src/agents/run_config.py @@ -284,7 +284,7 @@ class RunOptions(TypedDict, Generic[TContext]): """The session for the run.""" error_handlers: NotRequired[RunErrorHandlers[TContext] | None] - """Error handlers keyed by error kind. Currently supports max_turns.""" + """Error handlers keyed by error kind. Supports ``max_turns`` and ``tool_not_found``.""" __all__ = [ diff --git a/src/agents/run_error_handlers.py b/src/agents/run_error_handlers.py index aee386fbb2..03da849831 100644 --- a/src/agents/run_error_handlers.py +++ b/src/agents/run_error_handlers.py @@ -47,10 +47,58 @@ class RunErrorHandlerResult: ] +@dataclass +class ToolNotFoundErrorHandlerInput(Generic[TContext]): + """Input passed to the ``tool_not_found`` error handler. + + The handler is invoked when the model calls a tool that is not registered on the current + agent. Returning :class:`ToolNotFoundAction` tells the runner to inject a synthetic tool + output with ``error_message`` so the model can self-correct on the next turn. Returning + ``None`` re-raises the original :class:`ModelBehaviorError`. + """ + + tool_name: str + """Name of the tool the model tried to call.""" + + available_tools: list[str] + """Names of tools actually registered on the agent (function + custom + handoffs).""" + + agent: Agent[Any] + """The agent that received the bogus tool call.""" + + context: RunContextWrapper[TContext] + """The run context wrapper.""" + + run_data: RunErrorData + """Snapshot of run data at the moment the error occurred.""" + + +@dataclass +class ToolNotFoundAction: + """Instructs the runner to recover from a tool-not-found error. + + The runner appends a synthetic ``function_call_output`` item containing ``error_message`` to + the conversation, then continues the turn. The model will see the error on its next step and + can retry with a valid tool name. + + Note: recovery is bounded by the run's ``max_turns`` setting. A model that repeatedly + hallucinates tool calls will eventually hit that limit and raise ``MaxTurnsExceeded``. + """ + + error_message: str + + +ToolNotFoundErrorHandler = Callable[ + [ToolNotFoundErrorHandlerInput[TContext]], + MaybeAwaitable["ToolNotFoundAction | None"], +] + + class RunErrorHandlers(TypedDict, Generic[TContext], total=False): """Error handlers keyed by error kind.""" max_turns: RunErrorHandler[TContext] + tool_not_found: ToolNotFoundErrorHandler[TContext] __all__ = [ @@ -59,4 +107,7 @@ class RunErrorHandlers(TypedDict, Generic[TContext], total=False): "RunErrorHandlerInput", "RunErrorHandlerResult", "RunErrorHandlers", + "ToolNotFoundAction", + "ToolNotFoundErrorHandler", + "ToolNotFoundErrorHandlerInput", ] diff --git a/src/agents/run_internal/error_handlers.py b/src/agents/run_internal/error_handlers.py index bcb2d9bced..524f5e1813 100644 --- a/src/agents/run_internal/error_handlers.py +++ b/src/agents/run_internal/error_handlers.py @@ -23,6 +23,8 @@ RunErrorHandlerInput, RunErrorHandlerResult, RunErrorHandlers, + ToolNotFoundAction, + ToolNotFoundErrorHandlerInput, ) from .items import ReasoningItemIdPolicy, run_item_to_input_item from .turn_preparation import get_output_schema @@ -161,3 +163,42 @@ async def resolve_run_error_handler_result( raise UserError("Invalid run error handler result.") from exc return RunErrorHandlerResult(final_output=result) return RunErrorHandlerResult(final_output=result) + + +async def resolve_tool_not_found_action( + *, + error_handlers: RunErrorHandlers[TContext] | None, + tool_name: str, + available_tools: list[str], + agent: Agent[Any], + context_wrapper: RunContextWrapper[TContext], + run_data: RunErrorData, +) -> ToolNotFoundAction | None: + """Invoke the ``tool_not_found`` handler (if configured) and normalize its return value. + + Returns a :class:`ToolNotFoundAction` when the handler asks the runner to recover, or + ``None`` when no handler is registered or the handler opts to re-raise. + """ + if not error_handlers: + return None + handler = error_handlers.get("tool_not_found") + if handler is None: + return None + handler_input = ToolNotFoundErrorHandlerInput( + tool_name=tool_name, + available_tools=available_tools, + agent=agent, + context=context_wrapper, + run_data=run_data, + ) + result: Any = handler(handler_input) + if inspect.isawaitable(result): + result = await result + if result is None: + return None + if isinstance(result, ToolNotFoundAction): + return result + raise UserError( + "tool_not_found handler must return ToolNotFoundAction or None, " + f"got {type(result).__name__}." + ) diff --git a/src/agents/run_internal/run_loop.py b/src/agents/run_internal/run_loop.py index e6f1062072..ae405de867 100644 --- a/src/agents/run_internal/run_loop.py +++ b/src/agents/run_internal/run_loop.py @@ -1028,6 +1028,7 @@ async def _save_stream_items_without_count( ), reasoning_item_id_policy=resolved_reasoning_item_id_policy, prompt_cache_key_resolver=prompt_cache_key_resolver, + error_handlers=error_handlers, ) finally: attach_usage_to_span( @@ -1246,6 +1247,7 @@ async def run_single_turn_streamed( pending_server_items: list[RunItem] | None = None, reasoning_item_id_policy: ReasoningItemIdPolicy | None = None, prompt_cache_key_resolver: PromptCacheKeyResolver | None = None, + error_handlers: RunErrorHandlers[TContext] | None = None, ) -> SingleStepResult: """Run a single streamed turn and emit events as results arrive.""" public_agent = bindings.public_agent @@ -1636,6 +1638,8 @@ async def rewind_model_request() -> None: server_manages_conversation=server_conversation_tracker is not None, event_queue=streamed_result._event_queue, before_side_effects=raise_if_input_guardrail_tripwire_known, + error_handlers=error_handlers, + raw_responses_so_far=streamed_result.raw_responses, ) items_to_filter = session_items_for_turn(single_step_result) @@ -1697,6 +1701,8 @@ async def run_single_turn( session_items_to_rewind: list[TResponseInputItem] | None = None, reasoning_item_id_policy: ReasoningItemIdPolicy | None = None, prompt_cache_key_resolver: PromptCacheKeyResolver | None = None, + error_handlers: RunErrorHandlers[TContext] | None = None, + model_responses_so_far: list[ModelResponse] | None = None, ) -> SingleStepResult: """Run a single non-streaming turn of the agent loop.""" public_agent = bindings.public_agent @@ -1766,6 +1772,8 @@ async def run_single_turn( run_config=run_config, tool_use_tracker=tool_use_tracker, server_manages_conversation=server_conversation_tracker is not None, + error_handlers=error_handlers, + raw_responses_so_far=model_responses_so_far, ) diff --git a/src/agents/run_internal/turn_resolution.py b/src/agents/run_internal/turn_resolution.py index e7c059c701..2d66fb42ba 100644 --- a/src/agents/run_internal/turn_resolution.py +++ b/src/agents/run_internal/turn_resolution.py @@ -68,6 +68,7 @@ from ..logger import logger from ..run_config import RunConfig from ..run_context import AgentHookContext, RunContextWrapper, TContext +from ..run_error_handlers import RunErrorHandlers, ToolNotFoundAction from ..run_state import RunState from ..stream_events import StreamEvent from ..tool import ( @@ -1417,6 +1418,79 @@ def _add_unmatched_pending(approval: ToolApprovalItem) -> None: ) +def _build_custom_tool_not_found_output_item( + *, + agent: Agent[Any], + tool_call: ResponseCustomToolCall, + error_message: str, +) -> ToolCallOutputItem: + """Synthesize a ``custom_tool_call_output`` item to feed the model after a not-found error.""" + raw_item: dict[str, Any] = { + "type": "custom_tool_call_output", + "call_id": tool_call.call_id, + "output": error_message, + } + return ToolCallOutputItem( + agent=agent, + output=error_message, + raw_item=cast(Any, raw_item), + ) + + +def collect_tool_not_found_calls( + *, + all_tools: list[Tool], + response: ModelResponse, + handoffs: list[Handoff], + output_schema: AgentOutputSchemaBase | None = None, +) -> list[tuple[str, str]]: + """Return ``(call_id, tool_name)`` pairs for every tool call in ``response`` whose + name is not registered on the agent. + + The runner calls this helper to pre-scan model output before invoking an async + ``tool_not_found`` handler and then feeds the resolved actions back into + :func:`process_model_response`. + + ``output_schema`` mirrors the parameter ``process_model_response`` receives so the + pre-scan matches the real lookup's escape hatches. In particular, when a structured + output is in use the LiteLLM path synthesizes a ``json_tool_call`` tool on the fly + rather than raising, so it must not be flagged as unknown here. + """ + handoff_map = {handoff.tool_name: handoff for handoff in handoffs} + function_map = build_function_tool_lookup_map( + [tool for tool in all_tools if isinstance(tool, FunctionTool)] + ) + custom_tool_map = {tool.name: tool for tool in all_tools if isinstance(tool, CustomTool)} + apply_patch_tool = next((tool for tool in all_tools if isinstance(tool, ApplyPatchTool)), None) + + missing: list[tuple[str, str]] = [] + for output in response.output: + if isinstance(output, ResponseCustomToolCall): + if output.name in custom_tool_map: + continue + if is_apply_patch_name(output.name, apply_patch_tool): + continue + missing.append((output.call_id, output.name)) + continue + if isinstance(output, ResponseFunctionToolCall): + if is_apply_patch_name(output.name, apply_patch_tool): + # apply_patch routing happens later; not a tool-not-found. + continue + qualified_name = get_tool_call_qualified_name(output) or output.name + if qualified_name == output.name and output.name in handoff_map: + continue + lookup_key = get_function_tool_lookup_key_for_call(output) + if lookup_key is not None and lookup_key in function_map: + continue + # LiteLLM structured-output escape hatch: `process_model_response` synthesizes a + # ``json_tool_call`` tool when an output schema is in use. Mirror that here so + # the pre-scan doesn't fire the handler on a legitimate pseudo-call. + if output_schema is not None and output.name == "json_tool_call": + continue + missing.append((output.call_id, qualified_name)) + return missing + + def process_model_response( *, agent: Agent[Any], @@ -1425,6 +1499,7 @@ def process_model_response( output_schema: AgentOutputSchemaBase | None, handoffs: list[Handoff], existing_items: Sequence[RunItem] | None = None, + tool_not_found_actions: Mapping[str, Any] | None = None, ) -> ProcessedResponse: items: list[RunItem] = [] @@ -1741,6 +1816,20 @@ def _dump_output_item(raw_item: Any) -> dict[str, Any]: ) else: items.append(ToolCallItem(raw_item=cast(Any, output), agent=agent)) + recovery_action = ( + tool_not_found_actions.get(output.call_id) if tool_not_found_actions else None + ) + if isinstance(recovery_action, ToolNotFoundAction): + # Recovery path: the user handler is rescuing this turn, so don't + # pollute the trace with a span error. + items.append( + _build_custom_tool_not_found_output_item( + agent=agent, + tool_call=output, + error_message=recovery_action.error_message, + ) + ) + continue _error_tracing.attach_error_to_current_span( SpanError( message="Custom tool not found", @@ -1816,6 +1905,28 @@ def _dump_output_item(raw_item: Any) -> dict[str, Any]: ) ) continue + recovery_action = ( + tool_not_found_actions.get(output.call_id) if tool_not_found_actions else None + ) + if isinstance(recovery_action, ToolNotFoundAction): + # Recovery path: the user handler is rescuing this turn, so don't + # pollute the trace with a span error. + items.append( + ToolCallItem( + raw_item=output, + agent=agent, + ) + ) + items.append( + ToolCallOutputItem( + output=recovery_action.error_message, + raw_item=ItemHelpers.tool_call_output_item( + output, recovery_action.error_message + ), + agent=agent, + ) + ) + continue _error_tracing.attach_error_to_current_span( SpanError( message="Tool not found", @@ -1858,6 +1969,84 @@ def _dump_output_item(raw_item: Any) -> dict[str, Any]: ) +async def _resolve_tool_not_found_actions( + *, + error_handlers: RunErrorHandlers[TContext] | None, + agent: Agent[Any], + all_tools: list[Tool], + handoffs: list[Handoff], + response: ModelResponse, + output_schema: AgentOutputSchemaBase | None, + original_input: str | list[TResponseInputItem], + pre_step_items: list[RunItem], + raw_responses_so_far: list[ModelResponse] | None, + context_wrapper: RunContextWrapper[TContext], +) -> dict[str, ToolNotFoundAction] | None: + """Pre-scan the model response for unknown tool calls and invoke the user-supplied + ``tool_not_found`` handler. Returns the map ``{call_id: ToolNotFoundAction}`` that + :func:`process_model_response` consults at each raise site.""" + if not error_handlers or error_handlers.get("tool_not_found") is None: + return None + missing = collect_tool_not_found_calls( + all_tools=all_tools, + response=response, + handoffs=handoffs, + output_schema=output_schema, + ) + if not missing: + return None + # Lazy import to avoid a cycle: error_handlers imports from turn_preparation, which + # is loaded before turn_resolution's module init finishes. + from .error_handlers import build_run_error_data, resolve_tool_not_found_action + + raw_responses = list(raw_responses_so_far or []) + raw_responses.append(response) + run_data = build_run_error_data( + input=original_input, + new_items=list(pre_step_items), + raw_responses=raw_responses, + last_agent=agent, + ) + available_tools = _collect_available_tool_names(all_tools=all_tools, handoffs=handoffs) + resolved: dict[str, ToolNotFoundAction] = {} + for call_id, tool_name in missing: + action = await resolve_tool_not_found_action( + error_handlers=error_handlers, + tool_name=tool_name, + available_tools=list(available_tools), + agent=agent, + context_wrapper=context_wrapper, + run_data=run_data, + ) + if action is not None: + resolved[call_id] = action + return resolved or None + + +def _collect_available_tool_names(*, all_tools: list[Tool], handoffs: list[Handoff]) -> list[str]: + """Best-effort list of tool names the model could have used. + + Includes function tools, custom tools, and handoffs. Other hosted/builtin tools + (shell, apply_patch, computer, MCP) are not addressable by arbitrary name and are + omitted — the handler only needs this to help the model self-correct. + """ + names: list[str] = [] + for tool in all_tools: + if isinstance(tool, FunctionTool | CustomTool): + names.append(tool.name) + for handoff in handoffs: + names.append(handoff.tool_name) + # Preserve order, drop duplicates. + seen: set[str] = set() + deduped: list[str] = [] + for name in names: + if name in seen: + continue + seen.add(name) + deduped.append(name) + return deduped + + async def get_single_step_result_from_response( *, bindings: AgentBindings[TContext], @@ -1874,8 +2063,24 @@ async def get_single_step_result_from_response( server_manages_conversation: bool = False, event_queue: asyncio.Queue[StreamEvent | QueueCompleteSentinel] | None = None, before_side_effects: Callable[[], Awaitable[None]] | None = None, + error_handlers: RunErrorHandlers[TContext] | None = None, + raw_responses_so_far: list[ModelResponse] | None = None, ) -> SingleStepResult: item_agent = bindings.public_agent + + tool_not_found_actions = await _resolve_tool_not_found_actions( + error_handlers=error_handlers, + agent=item_agent, + all_tools=all_tools, + handoffs=handoffs, + response=new_response, + output_schema=output_schema, + original_input=original_input, + pre_step_items=pre_step_items, + raw_responses_so_far=raw_responses_so_far, + context_wrapper=context_wrapper, + ) + processed_response = process_model_response( agent=item_agent, all_tools=all_tools, @@ -1883,6 +2088,7 @@ async def get_single_step_result_from_response( output_schema=output_schema, handoffs=handoffs, existing_items=pre_step_items, + tool_not_found_actions=tool_not_found_actions, ) if before_side_effects is not None: diff --git a/tests/test_tool_not_found_handler.py b/tests/test_tool_not_found_handler.py new file mode 100644 index 0000000000..356a40097d --- /dev/null +++ b/tests/test_tool_not_found_handler.py @@ -0,0 +1,391 @@ +from __future__ import annotations + +from typing import Any + +import pytest +from pydantic import BaseModel + +from agents import ( + Agent, + ModelBehaviorError, + Runner, + ToolCallOutputItem, + ToolNotFoundAction, + ToolNotFoundErrorHandlerInput, +) + +from .fake_model import FakeModel +from .test_responses import get_function_tool, get_function_tool_call, get_text_message + + +def _agent_with_one_tool() -> tuple[Agent[Any], FakeModel]: + model = FakeModel() + agent = Agent( + name="test_agent", + model=model, + tools=[get_function_tool("real_tool", "tool_result")], + ) + return agent, model + + +@pytest.mark.asyncio +async def test_no_handler_raises_model_behavior_error() -> None: + """Backward compat: no handler → ``ModelBehaviorError`` bubbles up.""" + agent, model = _agent_with_one_tool() + model.add_multiple_turn_outputs( + [ + [get_function_tool_call("search_linkedin", "{}")], + ] + ) + with pytest.raises( + ModelBehaviorError, match="Tool search_linkedin not found in agent test_agent" + ): + await Runner.run(agent, input="hi") + + +@pytest.mark.asyncio +async def test_handler_returning_none_raises() -> None: + """Handler can opt out by returning ``None``; the runner re-raises.""" + agent, model = _agent_with_one_tool() + model.add_multiple_turn_outputs( + [ + [get_function_tool_call("search_linkedin", "{}")], + ] + ) + + def handler(_: ToolNotFoundErrorHandlerInput[Any]) -> None: + return None + + with pytest.raises(ModelBehaviorError, match="Tool search_linkedin not found"): + await Runner.run( + agent, + input="hi", + error_handlers={"tool_not_found": handler}, + ) + + +@pytest.mark.asyncio +async def test_handler_returns_action_and_model_recovers() -> None: + """Returning a ``ToolNotFoundAction`` injects a synthetic tool output and continues.""" + agent, model = _agent_with_one_tool() + model.add_multiple_turn_outputs( + [ + # Turn 1: model hallucinates a tool + [get_function_tool_call("search_linkedin", "{}")], + # Turn 2: with the injected error, model "self-corrects" to a final answer + [get_text_message("recovered")], + ] + ) + + def handler(data: ToolNotFoundErrorHandlerInput[Any]) -> ToolNotFoundAction: + return ToolNotFoundAction( + error_message=( + f"Tool {data.tool_name!r} is not registered. " + f"Available tools: {data.available_tools}" + ) + ) + + result = await Runner.run( + agent, + input="hi", + error_handlers={"tool_not_found": handler}, + ) + + assert result.final_output == "recovered" + # The synthetic tool output was injected with the handler's message. + outputs = [item for item in result.new_items if isinstance(item, ToolCallOutputItem)] + assert len(outputs) == 1 + assert "search_linkedin" in str(outputs[0].output) + assert "real_tool" in str(outputs[0].output) + + +@pytest.mark.asyncio +async def test_async_handler_is_awaited() -> None: + """The handler may be a coroutine; the runner awaits it.""" + agent, model = _agent_with_one_tool() + model.add_multiple_turn_outputs( + [ + [get_function_tool_call("bogus_tool", "{}")], + [get_text_message("ok")], + ] + ) + + called = {"count": 0} + + async def async_handler( + data: ToolNotFoundErrorHandlerInput[Any], + ) -> ToolNotFoundAction: + called["count"] += 1 + return ToolNotFoundAction(error_message=f"no such tool: {data.tool_name}") + + result = await Runner.run( + agent, + input="hi", + error_handlers={"tool_not_found": async_handler}, + ) + assert called["count"] == 1 + assert result.final_output == "ok" + + +@pytest.mark.asyncio +async def test_handler_input_contains_available_tools() -> None: + """The handler input exposes ``available_tools`` — the list of names the agent has.""" + model = FakeModel() + agent = Agent( + name="multi_tool_agent", + model=model, + tools=[ + get_function_tool("alpha", "a"), + get_function_tool("beta", "b"), + ], + ) + model.add_multiple_turn_outputs( + [ + [get_function_tool_call("gamma", "{}")], + [get_text_message("done")], + ] + ) + + seen_inputs: list[ToolNotFoundErrorHandlerInput[Any]] = [] + + def handler(data: ToolNotFoundErrorHandlerInput[Any]) -> ToolNotFoundAction: + seen_inputs.append(data) + return ToolNotFoundAction(error_message="nope") + + await Runner.run(agent, input="hi", error_handlers={"tool_not_found": handler}) + + assert len(seen_inputs) == 1 + observed = seen_inputs[0] + assert observed.tool_name == "gamma" + assert set(observed.available_tools) == {"alpha", "beta"} + assert observed.agent is agent + # run_data is populated (defensive; the exact contents aren't part of the contract here). + assert observed.run_data.last_agent is agent + + +@pytest.mark.asyncio +async def test_handler_invalid_return_raises_user_error() -> None: + """Handlers must return ``ToolNotFoundAction | None``; other values fail loudly.""" + from agents.exceptions import UserError + + agent, model = _agent_with_one_tool() + model.add_multiple_turn_outputs( + [ + [get_function_tool_call("bogus", "{}")], + ] + ) + + def bad_handler(_: ToolNotFoundErrorHandlerInput[Any]) -> str: + return "not a ToolNotFoundAction" + + with pytest.raises(UserError, match="tool_not_found handler must return"): + await Runner.run( + agent, + input="hi", + error_handlers={"tool_not_found": bad_handler}, # type: ignore[typeddict-item] + ) + + +@pytest.mark.asyncio +async def test_streamed_runner_invokes_handler_and_recovers() -> None: + """The streaming runner follows the same recovery path as ``Runner.run``.""" + agent, model = _agent_with_one_tool() + model.add_multiple_turn_outputs( + [ + [get_function_tool_call("hallucinated", "{}")], + [get_text_message("done-streamed")], + ] + ) + + def handler(data: ToolNotFoundErrorHandlerInput[Any]) -> ToolNotFoundAction: + return ToolNotFoundAction(error_message=f"unknown tool {data.tool_name}") + + streamed = Runner.run_streamed( + agent, + input="hi", + error_handlers={"tool_not_found": handler}, + ) + async for _ in streamed.stream_events(): + pass + assert streamed.final_output == "done-streamed" + + +class _StructuredPayload(BaseModel): + status: str + + +@pytest.mark.asyncio +async def test_litellm_json_tool_call_does_not_trigger_handler() -> None: + """With ``output_type`` set, ``json_tool_call`` is a LiteLLM structured-output pseudo-call + that :func:`process_model_response` handles by synthesizing a tool. The pre-scan must + skip it so the user's ``tool_not_found`` handler is never invoked for a legitimate + structured-output call, and the real lookup must not raise ``ModelBehaviorError``. + """ + from agents import ModelResponse, Usage + from agents.run_context import RunContextWrapper + from agents.run_internal import run_loop + from agents.run_internal.turn_preparation import get_output_schema + from agents.run_internal.turn_resolution import ( + _resolve_tool_not_found_actions, + collect_tool_not_found_calls, + ) + + agent = Agent( + name="structured_agent", + tools=[get_function_tool("real_tool", "tool_result")], + output_type=_StructuredPayload, + ) + response = ModelResponse( + output=[ + get_function_tool_call( + "json_tool_call", + _StructuredPayload(status="ok").model_dump_json(), + call_id="call_json_tool", + ) + ], + usage=Usage(), + response_id="resp_json", + ) + output_schema = get_output_schema(agent) + + # 1. Pre-scan must not flag `json_tool_call` as missing when an output schema is in use. + missing = collect_tool_not_found_calls( + all_tools=list(agent.tools), + response=response, + handoffs=[], + output_schema=output_schema, + ) + assert missing == [] + + # 2. The resolver must return ``None`` — no handler invocation — even with a handler + # registered, because the pre-scan found nothing. + handler_calls: list[str] = [] + + def handler(data: ToolNotFoundErrorHandlerInput[Any]) -> ToolNotFoundAction: + handler_calls.append(data.tool_name) + return ToolNotFoundAction(error_message="should not be called") + + resolved = await _resolve_tool_not_found_actions( + error_handlers={"tool_not_found": handler}, + agent=agent, + all_tools=list(agent.tools), + handoffs=[], + response=response, + output_schema=output_schema, + original_input="hi", + pre_step_items=[], + raw_responses_so_far=[], + context_wrapper=RunContextWrapper(None), + ) + assert resolved is None + assert handler_calls == [] + + # 3. The real lookup must not raise — it synthesizes the json_tool_call tool. + processed = run_loop.process_model_response( + agent=agent, + all_tools=list(agent.tools), + response=response, + output_schema=output_schema, + handoffs=[], + ) + assert len(processed.functions) == 1 + assert processed.functions[0].tool_call.name == "json_tool_call" + + +@pytest.mark.asyncio +async def test_handler_exception_propagates() -> None: + """A handler that raises should surface the exception — the SDK must not swallow it. + + This pins the contract: buggy handler code is the caller's bug, not the SDK's. + """ + agent, model = _agent_with_one_tool() + model.add_multiple_turn_outputs( + [ + [get_function_tool_call("bogus", "{}")], + ] + ) + + class HandlerBoom(RuntimeError): + pass + + def handler(_: ToolNotFoundErrorHandlerInput[Any]) -> ToolNotFoundAction: + raise HandlerBoom("handler exploded") + + with pytest.raises(HandlerBoom, match="handler exploded"): + await Runner.run( + agent, + input="hi", + error_handlers={"tool_not_found": handler}, + ) + + +@pytest.mark.asyncio +async def test_multiple_unknown_calls_in_one_batch_all_recover() -> None: + """When the model emits multiple unknown tool calls in a single turn, the handler is + invoked once per call and one synthetic output is produced for each.""" + agent, model = _agent_with_one_tool() + model.add_multiple_turn_outputs( + [ + [ + get_function_tool_call("ghost_a", "{}", call_id="call_1"), + get_function_tool_call("ghost_b", "{}", call_id="call_2"), + get_function_tool_call("ghost_c", "{}", call_id="call_3"), + ], + [get_text_message("recovered-3")], + ] + ) + + seen_names: list[str] = [] + + def handler(data: ToolNotFoundErrorHandlerInput[Any]) -> ToolNotFoundAction: + seen_names.append(data.tool_name) + return ToolNotFoundAction(error_message=f"unknown tool: {data.tool_name}") + + result = await Runner.run( + agent, + input="hi", + error_handlers={"tool_not_found": handler}, + ) + + assert seen_names == ["ghost_a", "ghost_b", "ghost_c"] + outputs = [item for item in result.new_items if isinstance(item, ToolCallOutputItem)] + assert len(outputs) == 3 + output_strs = [str(item.output) for item in outputs] + assert any("ghost_a" in s for s in output_strs) + assert any("ghost_b" in s for s in output_strs) + assert any("ghost_c" in s for s in output_strs) + assert result.final_output == "recovered-3" + + +@pytest.mark.asyncio +async def test_synthetic_output_round_trips_through_to_input_list() -> None: + """``result.to_input_list()`` must include the synthesized function_call_output so that + the next turn's model input is well-formed.""" + agent, model = _agent_with_one_tool() + model.add_multiple_turn_outputs( + [ + [get_function_tool_call("phantom", "{}", call_id="call_phantom")], + [get_text_message("ok")], + ] + ) + + recovery_msg = "phantom is not a real tool; use real_tool." + + def handler(_: ToolNotFoundErrorHandlerInput[Any]) -> ToolNotFoundAction: + return ToolNotFoundAction(error_message=recovery_msg) + + result = await Runner.run( + agent, + input="hi", + error_handlers={"tool_not_found": handler}, + ) + + input_list = result.to_input_list() + synthesized = [ + item + for item in input_list + if isinstance(item, dict) and item.get("type") == "function_call_output" + ] + assert len(synthesized) == 1 + assert synthesized[0].get("call_id") == "call_phantom" + assert synthesized[0].get("output") == recovery_msg