feat: #325 add tool_not_found error handler for model-hallucinated tool calls

sjhddh · sjhddh · commit 49ccd77fcb62 · 2026-04-19T15:03:50.000+02:00
When the model calls a tool that isn't registered on the agent, the SDK raises ModelBehaviorError and kills the run — discarding however many turns of work came before it. Users on issue #325 lost multi-minute DeepSearch-style runs to a single bogus tool name. This extends the existing RunErrorHandlers pattern with a new kind, tool_not_found, that lets the caller recover by returning a ToolNotFoundAction(error_message=...). The runner then synthesizes a function_call_output item carrying that message and continues the turn; the model sees the error on its next step and can retry with a valid tool name. Returning None (or not registering a handler) preserves the existing raise behavior. The resolver pre-scans the model response for unknown tool calls, invokes the user handler (sync or async) once per missing call, and passes the resolved {call_id: ToolNotFoundAction} map into process_model_response — which already had two raise sites for function-tool and custom-tool lookups. The pre-scan honors the LiteLLM structured-output escape hatch (json_tool_call under an output_schema) so legitimate pseudo-calls don't spuriously fire the handler, and span errors are only attached when we're actually raising (successful recovery does not pollute traces). Ships with docs under running_agents.md and a self-contained runnable example at examples/basic/tool_not_found_handler.py. Fixes #325
diff --git a/docs/running_agents.md b/docs/running_agents.md
@@ -441,6 +441,31 @@ print(result.final_output)
 
 Set `include_in_history=False` when you do not want the fallback output appended to conversation history.
 
+### Recovering from hallucinated tool calls
+
+Models occasionally call a tool name that was never registered on the agent (issue [#325](https://github.com/openai/openai-agents-python/issues/325)). By default the SDK raises `ModelBehaviorError` and the run ends, discarding prior work. Register a `"tool_not_found"` handler to turn that crash into a recoverable nudge: the handler returns a [`ToolNotFoundAction`][agents.ToolNotFoundAction] with a model-visible error message, the runner injects it as a synthetic tool output, and the model self-corrects on the next turn. Returning `None` (or not registering a handler) preserves the existing raise behavior. Recovery is bounded by the run's `max_turns`, so a model that keeps hallucinating still terminates.
+
+```python
+from agents import Agent, Runner, ToolNotFoundAction, ToolNotFoundErrorHandlerInput
+
+
+def on_tool_not_found(data: ToolNotFoundErrorHandlerInput[None]) -> ToolNotFoundAction:
+    return ToolNotFoundAction(
+        error_message=(
+            f"Tool {data.tool_name!r} does not exist. Available: {data.available_tools}."
+        )
+    )
+
+
+result = Runner.run_sync(
+    agent,
+    "find me profiles related to Anthropic",
+    error_handlers={"tool_not_found": on_tool_not_found},
+)
+```
+
+See [`examples/basic/tool_not_found_handler.py`](https://github.com/openai/openai-agents-python/blob/main/examples/basic/tool_not_found_handler.py) for a full runnable example.
+
 ## Durable execution integrations and human-in-the-loop
 
 For tool approval pause/resume patterns, start with the dedicated [Human-in-the-loop guide](human_in_the_loop.md).
diff --git a/examples/basic/tool_not_found_handler.py b/examples/basic/tool_not_found_handler.py
@@ -0,0 +1,140 @@
+"""Recovering from a model that calls a tool that doesn't exist.
+
+Large models occasionally "hallucinate" a tool name that isn't registered on the agent --
+for example they call ``search_linkedin`` when only ``search_web`` is available. Without a
+handler, the SDK raises ``ModelBehaviorError`` and the entire run is lost.
+
+Registering a ``tool_not_found`` error handler lets you turn that crash into a recoverable
+nudge: the handler returns a ``ToolNotFoundAction`` with an error message, the runner
+injects that message as a synthetic tool output, and the model self-corrects on the next
+turn.
+
+This example uses a tiny scripted ``Model`` subclass so it runs offline -- no API key
+needed. See issue #325 for the real-world report that motivated this API.
+
+    $ python examples/basic/tool_not_found_handler.py
+"""
+
+from __future__ import annotations
+
+import asyncio
+from collections.abc import AsyncIterator
+from typing import Any
+
+from openai.types.responses import ResponseFunctionToolCall, ResponseOutputMessage
+
+from agents import (
+    Agent,
+    ModelResponse,
+    Runner,
+    ToolNotFoundAction,
+    ToolNotFoundErrorHandlerInput,
+    Usage,
+    function_tool,
+)
+from agents.agent_output import AgentOutputSchemaBase
+from agents.handoffs import Handoff
+from agents.items import TResponseInputItem, TResponseStreamEvent
+from agents.model_settings import ModelSettings
+from agents.models.interface import Model, ModelTracing
+from agents.tool import Tool
+
+
+@function_tool
+def search_web(query: str) -> str:
+    """The only real tool on the agent."""
+    return f"results for: {query}"
+
+
+class ScriptedModel(Model):
+    """Plays back a fixed script of model responses so the example runs offline."""
+
+    def __init__(self, scripted_outputs: list[list[Any]]) -> None:
+        self._outputs = list(scripted_outputs)
+
+    async def get_response(self, *args: Any, **kwargs: Any) -> ModelResponse:
+        output = self._outputs.pop(0) if self._outputs else []
+        return ModelResponse(output=output, usage=Usage(), response_id="scripted")
+
+    def stream_response(  # pragma: no cover - not exercised here
+        self,
+        system_instructions: str | None,
+        input: str | list[TResponseInputItem],
+        model_settings: ModelSettings,
+        tools: list[Tool],
+        output_schema: AgentOutputSchemaBase | None,
+        handoffs: list[Handoff],
+        tracing: ModelTracing,
+        *,
+        previous_response_id: str | None = None,
+        conversation_id: str | None = None,
+        prompt: Any | None = None,
+    ) -> AsyncIterator[TResponseStreamEvent]:
+        raise NotImplementedError("streaming not used in this example")
+
+
+def on_tool_not_found(data: ToolNotFoundErrorHandlerInput[Any]) -> ToolNotFoundAction:
+    """Build a model-visible error so the model can pick a valid tool on its next step."""
+    available = ", ".join(data.available_tools) or "(none)"
+    return ToolNotFoundAction(
+        error_message=(
+            f"Tool {data.tool_name!r} is not registered on this agent. "
+            f"Available tools: [{available}]. Pick one of those and try again."
+        )
+    )
+
+
+async def main() -> None:
+    # Turn 1: the model hallucinates a tool that doesn't exist.
+    # Turn 2: after the handler injects the error, the model recovers with a final answer.
+    scripted_model = ScriptedModel(
+        [
+            [
+                ResponseFunctionToolCall(
+                    id="call-1",
+                    call_id="call-1",
+                    type="function_call",
+                    name="search_linkedin",  # intentionally unknown
+                    arguments='{"query": "Anthropic"}',
+                )
+            ],
+            [
+                ResponseOutputMessage.model_validate(
+                    {
+                        "id": "msg-1",
+                        "type": "message",
+                        "role": "assistant",
+                        "status": "completed",
+                        "content": [
+                            {
+                                "type": "output_text",
+                                "text": "Sorry, I used the wrong tool. Here's what I got from search_web instead.",
+                                "annotations": [],
+                                "logprobs": [],
+                            }
+                        ],
+                    }
+                )
+            ],
+        ]
+    )
+
+    agent = Agent(
+        name="recoverable_agent",
+        instructions="You are a helpful assistant.",
+        model=scripted_model,
+        tools=[search_web],
+    )
+
+    result = await Runner.run(
+        agent,
+        input="find me profiles related to Anthropic",
+        error_handlers={"tool_not_found": on_tool_not_found},
+    )
+
+    print("Final output:")
+    print(result.final_output)
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/src/agents/__init__.py b/src/agents/__init__.py
@@ -112,6 +112,9 @@
     RunErrorHandlerInput,
     RunErrorHandlerResult,
     RunErrorHandlers,
+    ToolNotFoundAction,
+    ToolNotFoundErrorHandler,
+    ToolNotFoundErrorHandlerInput,
 )
 from .run_state import RunState
 from .stream_events import (
@@ -420,6 +423,9 @@ def enable_verbose_stdout_logging():
     "RunErrorHandlerInput",
     "RunErrorHandlerResult",
     "RunErrorHandlers",
+    "ToolNotFoundAction",
+    "ToolNotFoundErrorHandler",
+    "ToolNotFoundErrorHandlerInput",
     "AgentToolInvocation",
     "RunResult",
     "RunResultStreaming",
diff --git a/src/agents/run.py b/src/agents/run.py
@@ -1194,6 +1194,8 @@ def _finalize_result(result: RunResult) -> RunResult:
                                     ),
                                     reasoning_item_id_policy=resolved_reasoning_item_id_policy,
                                     prompt_cache_key_resolver=prompt_cache_key_resolver,
+                                    error_handlers=error_handlers,
+                                    model_responses_so_far=model_responses,
                                 )
                             )
 
@@ -1249,6 +1251,8 @@ def _finalize_result(result: RunResult) -> RunResult:
                                 ),
                                 reasoning_item_id_policy=resolved_reasoning_item_id_policy,
                                 prompt_cache_key_resolver=prompt_cache_key_resolver,
+                                error_handlers=error_handlers,
+                                model_responses_so_far=model_responses,
                             )
                     finally:
                         attach_usage_to_span(
diff --git a/src/agents/run_config.py b/src/agents/run_config.py
@@ -284,7 +284,7 @@ class RunOptions(TypedDict, Generic[TContext]):
     """The session for the run."""
 
     error_handlers: NotRequired[RunErrorHandlers[TContext] | None]
-    """Error handlers keyed by error kind. Currently supports max_turns."""
+    """Error handlers keyed by error kind. Supports ``max_turns`` and ``tool_not_found``."""
 
 
 __all__ = [
diff --git a/src/agents/run_error_handlers.py b/src/agents/run_error_handlers.py
@@ -47,10 +47,58 @@ class RunErrorHandlerResult:
 ]
 
 
+@dataclass
+class ToolNotFoundErrorHandlerInput(Generic[TContext]):
+    """Input passed to the ``tool_not_found`` error handler.
+
+    The handler is invoked when the model calls a tool that is not registered on the current
+    agent. Returning :class:`ToolNotFoundAction` tells the runner to inject a synthetic tool
+    output with ``error_message`` so the model can self-correct on the next turn. Returning
+    ``None`` re-raises the original :class:`ModelBehaviorError`.
+    """
+
+    tool_name: str
+    """Name of the tool the model tried to call."""
+
+    available_tools: list[str]
+    """Names of tools actually registered on the agent (function + custom + handoffs)."""
+
+    agent: Agent[Any]
+    """The agent that received the bogus tool call."""
+
+    context: RunContextWrapper[TContext]
+    """The run context wrapper."""
+
+    run_data: RunErrorData
+    """Snapshot of run data at the moment the error occurred."""
+
+
+@dataclass
+class ToolNotFoundAction:
+    """Instructs the runner to recover from a tool-not-found error.
+
+    The runner appends a synthetic ``function_call_output`` item containing ``error_message`` to
+    the conversation, then continues the turn. The model will see the error on its next step and
+    can retry with a valid tool name.
+
+    Note: recovery is bounded by the run's ``max_turns`` setting. A model that repeatedly
+    hallucinates tool calls will eventually hit that limit and raise ``MaxTurnsExceeded``.
+    """
+
+    error_message: str
+
+
+ToolNotFoundErrorHandler = Callable[
+    [ToolNotFoundErrorHandlerInput[TContext]],
+    MaybeAwaitable["ToolNotFoundAction | None"],
+]
+
+
 class RunErrorHandlers(TypedDict, Generic[TContext], total=False):
     """Error handlers keyed by error kind."""
 
     max_turns: RunErrorHandler[TContext]
+    tool_not_found: ToolNotFoundErrorHandler[TContext]
 
 
 __all__ = [
@@ -59,4 +107,7 @@ class RunErrorHandlers(TypedDict, Generic[TContext], total=False):
     "RunErrorHandlerInput",
     "RunErrorHandlerResult",
     "RunErrorHandlers",
+    "ToolNotFoundAction",
+    "ToolNotFoundErrorHandler",
+    "ToolNotFoundErrorHandlerInput",
 ]
diff --git a/src/agents/run_internal/error_handlers.py b/src/agents/run_internal/error_handlers.py
@@ -23,6 +23,8 @@
     RunErrorHandlerInput,
     RunErrorHandlerResult,
     RunErrorHandlers,
+    ToolNotFoundAction,
+    ToolNotFoundErrorHandlerInput,
 )
 from .items import ReasoningItemIdPolicy, run_item_to_input_item
 from .turn_preparation import get_output_schema
@@ -161,3 +163,42 @@ async def resolve_run_error_handler_result(
                 raise UserError("Invalid run error handler result.") from exc
         return RunErrorHandlerResult(final_output=result)
     return RunErrorHandlerResult(final_output=result)
+
+
+async def resolve_tool_not_found_action(
+    *,
+    error_handlers: RunErrorHandlers[TContext] | None,
+    tool_name: str,
+    available_tools: list[str],
+    agent: Agent[Any],
+    context_wrapper: RunContextWrapper[TContext],
+    run_data: RunErrorData,
+) -> ToolNotFoundAction | None:
+    """Invoke the ``tool_not_found`` handler (if configured) and normalize its return value.
+
+    Returns a :class:`ToolNotFoundAction` when the handler asks the runner to recover, or
+    ``None`` when no handler is registered or the handler opts to re-raise.
+    """
+    if not error_handlers:
+        return None
+    handler = error_handlers.get("tool_not_found")
+    if handler is None:
+        return None
+    handler_input = ToolNotFoundErrorHandlerInput(
+        tool_name=tool_name,
+        available_tools=available_tools,
+        agent=agent,
+        context=context_wrapper,
+        run_data=run_data,
+    )
+    result: Any = handler(handler_input)
+    if inspect.isawaitable(result):
+        result = await result
+    if result is None:
+        return None
+    if isinstance(result, ToolNotFoundAction):
+        return result
+    raise UserError(
+        "tool_not_found handler must return ToolNotFoundAction or None, "
+        f"got {type(result).__name__}."
+    )
diff --git a/src/agents/run_internal/run_loop.py b/src/agents/run_internal/run_loop.py
@@ -1028,6 +1028,7 @@ async def _save_stream_items_without_count(
                         ),
                         reasoning_item_id_policy=resolved_reasoning_item_id_policy,
                         prompt_cache_key_resolver=prompt_cache_key_resolver,
+                        error_handlers=error_handlers,
                     )
                 finally:
                     attach_usage_to_span(
@@ -1246,6 +1247,7 @@ async def run_single_turn_streamed(
     pending_server_items: list[RunItem] | None = None,
     reasoning_item_id_policy: ReasoningItemIdPolicy | None = None,
     prompt_cache_key_resolver: PromptCacheKeyResolver | None = None,
+    error_handlers: RunErrorHandlers[TContext] | None = None,
 ) -> SingleStepResult:
     """Run a single streamed turn and emit events as results arrive."""
     public_agent = bindings.public_agent
@@ -1636,6 +1638,8 @@ async def rewind_model_request() -> None:
         server_manages_conversation=server_conversation_tracker is not None,
         event_queue=streamed_result._event_queue,
         before_side_effects=raise_if_input_guardrail_tripwire_known,
+        error_handlers=error_handlers,
+        raw_responses_so_far=streamed_result.raw_responses,
     )
 
     items_to_filter = session_items_for_turn(single_step_result)
@@ -1697,6 +1701,8 @@ async def run_single_turn(
     session_items_to_rewind: list[TResponseInputItem] | None = None,
     reasoning_item_id_policy: ReasoningItemIdPolicy | None = None,
     prompt_cache_key_resolver: PromptCacheKeyResolver | None = None,
+    error_handlers: RunErrorHandlers[TContext] | None = None,
+    model_responses_so_far: list[ModelResponse] | None = None,
 ) -> SingleStepResult:
     """Run a single non-streaming turn of the agent loop."""
     public_agent = bindings.public_agent
@@ -1766,6 +1772,8 @@ async def run_single_turn(
         run_config=run_config,
         tool_use_tracker=tool_use_tracker,
         server_manages_conversation=server_conversation_tracker is not None,
+        error_handlers=error_handlers,
+        raw_responses_so_far=model_responses_so_far,
     )
 
 
diff --git a/src/agents/run_internal/turn_resolution.py b/src/agents/run_internal/turn_resolution.py
diff --git a/tests/test_tool_not_found_handler.py b/tests/test_tool_not_found_handler.py

Original file line number	Diff line number	Diff line change
`@@ -1194,6 +1194,8 @@ def _finalize_result(result: RunResult) -> RunResult:`
`1194`	`1194`	`),`
`1195`	`1195`	`reasoning_item_id_policy=resolved_reasoning_item_id_policy,`
`1196`	`1196`	`prompt_cache_key_resolver=prompt_cache_key_resolver,`
	`1197`	`+ error_handlers=error_handlers,`
	`1198`	`+ model_responses_so_far=model_responses,`
`1197`	`1199`	`)`
`1198`	`1200`	`)`
`1199`	`1201`
`@@ -1249,6 +1251,8 @@ def _finalize_result(result: RunResult) -> RunResult:`
`1249`	`1251`	`),`
`1250`	`1252`	`reasoning_item_id_policy=resolved_reasoning_item_id_policy,`
`1251`	`1253`	`prompt_cache_key_resolver=prompt_cache_key_resolver,`
	`1254`	`+ error_handlers=error_handlers,`
	`1255`	`+ model_responses_so_far=model_responses,`
`1252`	`1256`	`)`
`1253`	`1257`	`finally:`
`1254`	`1258`	`attach_usage_to_span(`