Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions docs/running_agents.md
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,31 @@ print(result.final_output)

Set `include_in_history=False` when you do not want the fallback output appended to conversation history.

### Recovering from hallucinated tool calls

Models occasionally call a tool name that was never registered on the agent (issue [#325](https://github.com/openai/openai-agents-python/issues/325)). By default the SDK raises `ModelBehaviorError` and the run ends, discarding prior work. Register a `"tool_not_found"` handler to turn that crash into a recoverable nudge: the handler returns a [`ToolNotFoundAction`][agents.ToolNotFoundAction] with a model-visible error message, the runner injects it as a synthetic tool output, and the model self-corrects on the next turn. Returning `None` (or not registering a handler) preserves the existing raise behavior. Recovery is bounded by the run's `max_turns`, so a model that keeps hallucinating still terminates.

```python
from agents import Agent, Runner, ToolNotFoundAction, ToolNotFoundErrorHandlerInput


def on_tool_not_found(data: ToolNotFoundErrorHandlerInput[None]) -> ToolNotFoundAction:
return ToolNotFoundAction(
error_message=(
f"Tool {data.tool_name!r} does not exist. Available: {data.available_tools}."
)
)


result = Runner.run_sync(
agent,
"find me profiles related to Anthropic",
error_handlers={"tool_not_found": on_tool_not_found},
)
```

See [`examples/basic/tool_not_found_handler.py`](https://github.com/openai/openai-agents-python/blob/main/examples/basic/tool_not_found_handler.py) for a full runnable example.

## Durable execution integrations and human-in-the-loop

For tool approval pause/resume patterns, start with the dedicated [Human-in-the-loop guide](human_in_the_loop.md).
Expand Down
140 changes: 140 additions & 0 deletions examples/basic/tool_not_found_handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
"""Recovering from a model that calls a tool that doesn't exist.

Large models occasionally "hallucinate" a tool name that isn't registered on the agent --
for example they call ``search_linkedin`` when only ``search_web`` is available. Without a
handler, the SDK raises ``ModelBehaviorError`` and the entire run is lost.

Registering a ``tool_not_found`` error handler lets you turn that crash into a recoverable
nudge: the handler returns a ``ToolNotFoundAction`` with an error message, the runner
injects that message as a synthetic tool output, and the model self-corrects on the next
turn.

This example uses a tiny scripted ``Model`` subclass so it runs offline -- no API key
needed. See issue #325 for the real-world report that motivated this API.

$ python examples/basic/tool_not_found_handler.py
"""

from __future__ import annotations

import asyncio
from collections.abc import AsyncIterator
from typing import Any

from openai.types.responses import ResponseFunctionToolCall, ResponseOutputMessage

from agents import (
Agent,
ModelResponse,
Runner,
ToolNotFoundAction,
ToolNotFoundErrorHandlerInput,
Usage,
function_tool,
)
from agents.agent_output import AgentOutputSchemaBase
from agents.handoffs import Handoff
from agents.items import TResponseInputItem, TResponseStreamEvent
from agents.model_settings import ModelSettings
from agents.models.interface import Model, ModelTracing
from agents.tool import Tool


@function_tool
def search_web(query: str) -> str:
"""The only real tool on the agent."""
return f"results for: {query}"


class ScriptedModel(Model):
"""Plays back a fixed script of model responses so the example runs offline."""

def __init__(self, scripted_outputs: list[list[Any]]) -> None:
self._outputs = list(scripted_outputs)

async def get_response(self, *args: Any, **kwargs: Any) -> ModelResponse:
output = self._outputs.pop(0) if self._outputs else []
return ModelResponse(output=output, usage=Usage(), response_id="scripted")

def stream_response( # pragma: no cover - not exercised here
self,
system_instructions: str | None,
input: str | list[TResponseInputItem],
model_settings: ModelSettings,
tools: list[Tool],
output_schema: AgentOutputSchemaBase | None,
handoffs: list[Handoff],
tracing: ModelTracing,
*,
previous_response_id: str | None = None,
conversation_id: str | None = None,
prompt: Any | None = None,
) -> AsyncIterator[TResponseStreamEvent]:
raise NotImplementedError("streaming not used in this example")


def on_tool_not_found(data: ToolNotFoundErrorHandlerInput[Any]) -> ToolNotFoundAction:
"""Build a model-visible error so the model can pick a valid tool on its next step."""
available = ", ".join(data.available_tools) or "(none)"
return ToolNotFoundAction(
error_message=(
f"Tool {data.tool_name!r} is not registered on this agent. "
f"Available tools: [{available}]. Pick one of those and try again."
)
)


async def main() -> None:
# Turn 1: the model hallucinates a tool that doesn't exist.
# Turn 2: after the handler injects the error, the model recovers with a final answer.
scripted_model = ScriptedModel(
[
[
ResponseFunctionToolCall(
id="call-1",
call_id="call-1",
type="function_call",
name="search_linkedin", # intentionally unknown
arguments='{"query": "Anthropic"}',
)
],
[
ResponseOutputMessage.model_validate(
{
"id": "msg-1",
"type": "message",
"role": "assistant",
"status": "completed",
"content": [
{
"type": "output_text",
"text": "Sorry, I used the wrong tool. Here's what I got from search_web instead.",
"annotations": [],
"logprobs": [],
}
],
}
)
],
]
)

agent = Agent(
name="recoverable_agent",
instructions="You are a helpful assistant.",
model=scripted_model,
tools=[search_web],
)

result = await Runner.run(
agent,
input="find me profiles related to Anthropic",
error_handlers={"tool_not_found": on_tool_not_found},
)

print("Final output:")
print(result.final_output)


if __name__ == "__main__":
asyncio.run(main())
6 changes: 6 additions & 0 deletions src/agents/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,9 @@
RunErrorHandlerInput,
RunErrorHandlerResult,
RunErrorHandlers,
ToolNotFoundAction,
ToolNotFoundErrorHandler,
ToolNotFoundErrorHandlerInput,
)
from .run_state import RunState
from .stream_events import (
Expand Down Expand Up @@ -420,6 +423,9 @@ def enable_verbose_stdout_logging():
"RunErrorHandlerInput",
"RunErrorHandlerResult",
"RunErrorHandlers",
"ToolNotFoundAction",
"ToolNotFoundErrorHandler",
"ToolNotFoundErrorHandlerInput",
"AgentToolInvocation",
"RunResult",
"RunResultStreaming",
Expand Down
4 changes: 4 additions & 0 deletions src/agents/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -1194,6 +1194,8 @@ def _finalize_result(result: RunResult) -> RunResult:
),
reasoning_item_id_policy=resolved_reasoning_item_id_policy,
prompt_cache_key_resolver=prompt_cache_key_resolver,
error_handlers=error_handlers,
model_responses_so_far=model_responses,
)
)

Expand Down Expand Up @@ -1249,6 +1251,8 @@ def _finalize_result(result: RunResult) -> RunResult:
),
reasoning_item_id_policy=resolved_reasoning_item_id_policy,
prompt_cache_key_resolver=prompt_cache_key_resolver,
error_handlers=error_handlers,
model_responses_so_far=model_responses,
)
finally:
attach_usage_to_span(
Expand Down
2 changes: 1 addition & 1 deletion src/agents/run_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ class RunOptions(TypedDict, Generic[TContext]):
"""The session for the run."""

error_handlers: NotRequired[RunErrorHandlers[TContext] | None]
"""Error handlers keyed by error kind. Currently supports max_turns."""
"""Error handlers keyed by error kind. Supports ``max_turns`` and ``tool_not_found``."""


__all__ = [
Expand Down
51 changes: 51 additions & 0 deletions src/agents/run_error_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,58 @@ class RunErrorHandlerResult:
]


@dataclass
class ToolNotFoundErrorHandlerInput(Generic[TContext]):
"""Input passed to the ``tool_not_found`` error handler.

The handler is invoked when the model calls a tool that is not registered on the current
agent. Returning :class:`ToolNotFoundAction` tells the runner to inject a synthetic tool
output with ``error_message`` so the model can self-correct on the next turn. Returning
``None`` re-raises the original :class:`ModelBehaviorError`.
"""

tool_name: str
"""Name of the tool the model tried to call."""

available_tools: list[str]
"""Names of tools actually registered on the agent (function + custom + handoffs)."""

agent: Agent[Any]
"""The agent that received the bogus tool call."""

context: RunContextWrapper[TContext]
"""The run context wrapper."""

run_data: RunErrorData
"""Snapshot of run data at the moment the error occurred."""


@dataclass
class ToolNotFoundAction:
"""Instructs the runner to recover from a tool-not-found error.

The runner appends a synthetic ``function_call_output`` item containing ``error_message`` to
the conversation, then continues the turn. The model will see the error on its next step and
can retry with a valid tool name.

Note: recovery is bounded by the run's ``max_turns`` setting. A model that repeatedly
hallucinates tool calls will eventually hit that limit and raise ``MaxTurnsExceeded``.
"""

error_message: str


ToolNotFoundErrorHandler = Callable[
[ToolNotFoundErrorHandlerInput[TContext]],
MaybeAwaitable["ToolNotFoundAction | None"],
]


class RunErrorHandlers(TypedDict, Generic[TContext], total=False):
"""Error handlers keyed by error kind."""

max_turns: RunErrorHandler[TContext]
tool_not_found: ToolNotFoundErrorHandler[TContext]


__all__ = [
Expand All @@ -59,4 +107,7 @@ class RunErrorHandlers(TypedDict, Generic[TContext], total=False):
"RunErrorHandlerInput",
"RunErrorHandlerResult",
"RunErrorHandlers",
"ToolNotFoundAction",
"ToolNotFoundErrorHandler",
"ToolNotFoundErrorHandlerInput",
]
41 changes: 41 additions & 0 deletions src/agents/run_internal/error_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
RunErrorHandlerInput,
RunErrorHandlerResult,
RunErrorHandlers,
ToolNotFoundAction,
ToolNotFoundErrorHandlerInput,
)
from .items import ReasoningItemIdPolicy, run_item_to_input_item
from .turn_preparation import get_output_schema
Expand Down Expand Up @@ -161,3 +163,42 @@ async def resolve_run_error_handler_result(
raise UserError("Invalid run error handler result.") from exc
return RunErrorHandlerResult(final_output=result)
return RunErrorHandlerResult(final_output=result)


async def resolve_tool_not_found_action(
*,
error_handlers: RunErrorHandlers[TContext] | None,
tool_name: str,
available_tools: list[str],
agent: Agent[Any],
context_wrapper: RunContextWrapper[TContext],
run_data: RunErrorData,
) -> ToolNotFoundAction | None:
"""Invoke the ``tool_not_found`` handler (if configured) and normalize its return value.

Returns a :class:`ToolNotFoundAction` when the handler asks the runner to recover, or
``None`` when no handler is registered or the handler opts to re-raise.
"""
if not error_handlers:
return None
handler = error_handlers.get("tool_not_found")
if handler is None:
return None
handler_input = ToolNotFoundErrorHandlerInput(
tool_name=tool_name,
available_tools=available_tools,
agent=agent,
context=context_wrapper,
run_data=run_data,
)
result: Any = handler(handler_input)
if inspect.isawaitable(result):
result = await result
if result is None:
return None
if isinstance(result, ToolNotFoundAction):
return result
raise UserError(
"tool_not_found handler must return ToolNotFoundAction or None, "
f"got {type(result).__name__}."
)
8 changes: 8 additions & 0 deletions src/agents/run_internal/run_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -1028,6 +1028,7 @@ async def _save_stream_items_without_count(
),
reasoning_item_id_policy=resolved_reasoning_item_id_policy,
prompt_cache_key_resolver=prompt_cache_key_resolver,
error_handlers=error_handlers,
)
finally:
attach_usage_to_span(
Expand Down Expand Up @@ -1246,6 +1247,7 @@ async def run_single_turn_streamed(
pending_server_items: list[RunItem] | None = None,
reasoning_item_id_policy: ReasoningItemIdPolicy | None = None,
prompt_cache_key_resolver: PromptCacheKeyResolver | None = None,
error_handlers: RunErrorHandlers[TContext] | None = None,
) -> SingleStepResult:
"""Run a single streamed turn and emit events as results arrive."""
public_agent = bindings.public_agent
Expand Down Expand Up @@ -1636,6 +1638,8 @@ async def rewind_model_request() -> None:
server_manages_conversation=server_conversation_tracker is not None,
event_queue=streamed_result._event_queue,
before_side_effects=raise_if_input_guardrail_tripwire_known,
error_handlers=error_handlers,
raw_responses_so_far=streamed_result.raw_responses,
)

items_to_filter = session_items_for_turn(single_step_result)
Expand Down Expand Up @@ -1697,6 +1701,8 @@ async def run_single_turn(
session_items_to_rewind: list[TResponseInputItem] | None = None,
reasoning_item_id_policy: ReasoningItemIdPolicy | None = None,
prompt_cache_key_resolver: PromptCacheKeyResolver | None = None,
error_handlers: RunErrorHandlers[TContext] | None = None,
model_responses_so_far: list[ModelResponse] | None = None,
) -> SingleStepResult:
"""Run a single non-streaming turn of the agent loop."""
public_agent = bindings.public_agent
Expand Down Expand Up @@ -1766,6 +1772,8 @@ async def run_single_turn(
run_config=run_config,
tool_use_tracker=tool_use_tracker,
server_manages_conversation=server_conversation_tracker is not None,
error_handlers=error_handlers,
raw_responses_so_far=model_responses_so_far,
)


Expand Down
Loading