Skip to content

Commit 3a52673

Browse files
authored
feat: add opt-in model retry policies (#2651)
1 parent a8be7c0 commit 3a52673

20 files changed

+5320
-106
lines changed

examples/basic/retry.py

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
import asyncio
2+
import inspect
3+
4+
from agents import (
5+
Agent,
6+
ModelRetrySettings,
7+
ModelSettings,
8+
RetryDecision,
9+
RunConfig,
10+
Runner,
11+
retry_policies,
12+
)
13+
14+
15+
def format_error(error: object) -> str:
16+
if not isinstance(error, BaseException):
17+
return "Unknown error"
18+
return str(error) or error.__class__.__name__
19+
20+
21+
async def main() -> None:
22+
apply_policies = retry_policies.any(
23+
# On OpenAI-backed models, provider_suggested() follows provider retry advice,
24+
# including fallback retryable statuses when x-should-retry is absent
25+
# (for example 408/409/429/5xx).
26+
retry_policies.provider_suggested(),
27+
retry_policies.retry_after(),
28+
retry_policies.network_error(),
29+
retry_policies.http_status([408, 409, 429, 500, 502, 503, 504]),
30+
)
31+
32+
async def policy(context) -> bool | RetryDecision:
33+
raw_decision = apply_policies(context)
34+
decision: bool | RetryDecision
35+
if inspect.isawaitable(raw_decision):
36+
decision = await raw_decision
37+
else:
38+
decision = raw_decision
39+
if isinstance(decision, RetryDecision):
40+
if not decision.retry:
41+
print(
42+
f"[retry] stop after attempt {context.attempt}/{context.max_retries + 1}: "
43+
f"{format_error(context.error)}"
44+
)
45+
return False
46+
47+
print(
48+
" | ".join(
49+
part
50+
for part in [
51+
f"[retry] retry attempt {context.attempt}/{context.max_retries + 1}",
52+
(
53+
f"waiting {decision.delay:.2f}s"
54+
if decision.delay is not None
55+
else "using default backoff"
56+
),
57+
f"reason: {decision.reason}" if decision.reason else None,
58+
f"error: {format_error(context.error)}",
59+
]
60+
if part is not None
61+
)
62+
)
63+
return decision
64+
65+
if not decision:
66+
print(
67+
f"[retry] stop after attempt {context.attempt}/{context.max_retries + 1}: "
68+
f"{format_error(context.error)}"
69+
)
70+
return decision
71+
72+
retry = ModelRetrySettings(
73+
max_retries=4,
74+
backoff={
75+
"initial_delay": 0.5,
76+
"max_delay": 5.0,
77+
"multiplier": 2.0,
78+
"jitter": True,
79+
},
80+
policy=policy,
81+
)
82+
83+
# RunConfig-level model_settings are shared defaults for the run.
84+
# If an Agent also defines model_settings, the Agent wins for overlapping
85+
# keys, while nested objects like retry/backoff are merged.
86+
run_config = RunConfig(model_settings=ModelSettings(retry=retry))
87+
88+
agent = Agent(
89+
name="Assistant",
90+
instructions="You are a concise assistant. Answer in 3 short bullet points at most.",
91+
# This Agent repeats the same retry config for clarity. In real code you
92+
# can keep shared defaults in RunConfig and only put per-agent overrides
93+
# here when you need different retry behavior.
94+
model_settings=ModelSettings(retry=retry),
95+
)
96+
97+
print(
98+
"Retry support is configured. You will only see [retry] logs if a transient failure happens."
99+
)
100+
101+
result = await Runner.run(
102+
agent,
103+
"Explain exponential backoff for API retries in plain English.",
104+
run_config=run_config,
105+
)
106+
107+
print("\nFinal output:\n")
108+
print(result.final_output)
109+
110+
111+
if __name__ == "__main__":
112+
asyncio.run(main())

examples/basic/retry_litellm.py

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
import asyncio
2+
import inspect
3+
4+
from agents import (
5+
Agent,
6+
ModelRetrySettings,
7+
ModelSettings,
8+
RetryDecision,
9+
RunConfig,
10+
Runner,
11+
retry_policies,
12+
)
13+
14+
15+
def format_error(error: object) -> str:
16+
if not isinstance(error, BaseException):
17+
return "Unknown error"
18+
return str(error) or error.__class__.__name__
19+
20+
21+
async def main() -> None:
22+
apply_policies = retry_policies.any(
23+
# On OpenAI-backed models, provider_suggested() follows provider retry advice,
24+
# including fallback retryable statuses when x-should-retry is absent
25+
# (for example 408/409/429/5xx).
26+
retry_policies.provider_suggested(),
27+
retry_policies.retry_after(),
28+
retry_policies.network_error(),
29+
retry_policies.http_status([408, 409, 429, 500, 502, 503, 504]),
30+
)
31+
32+
async def policy(context) -> bool | RetryDecision:
33+
raw_decision = apply_policies(context)
34+
decision: bool | RetryDecision
35+
if inspect.isawaitable(raw_decision):
36+
decision = await raw_decision
37+
else:
38+
decision = raw_decision
39+
if isinstance(decision, RetryDecision):
40+
if not decision.retry:
41+
print(
42+
f"[retry] stop after attempt {context.attempt}/{context.max_retries + 1}: "
43+
f"{format_error(context.error)}"
44+
)
45+
return False
46+
47+
print(
48+
" | ".join(
49+
part
50+
for part in [
51+
f"[retry] retry attempt {context.attempt}/{context.max_retries + 1}",
52+
(
53+
f"waiting {decision.delay:.2f}s"
54+
if decision.delay is not None
55+
else "using default backoff"
56+
),
57+
f"reason: {decision.reason}" if decision.reason else None,
58+
f"error: {format_error(context.error)}",
59+
]
60+
if part is not None
61+
)
62+
)
63+
return decision
64+
65+
if not decision:
66+
print(
67+
f"[retry] stop after attempt {context.attempt}/{context.max_retries + 1}: "
68+
f"{format_error(context.error)}"
69+
)
70+
return decision
71+
72+
retry = ModelRetrySettings(
73+
max_retries=4,
74+
backoff={
75+
"initial_delay": 0.5,
76+
"max_delay": 5.0,
77+
"multiplier": 2.0,
78+
"jitter": True,
79+
},
80+
policy=policy,
81+
)
82+
83+
# RunConfig-level model_settings are shared defaults for the run.
84+
# If an Agent also defines model_settings, the Agent wins for overlapping
85+
# keys, while nested objects like retry/backoff are merged.
86+
run_config = RunConfig(model_settings=ModelSettings(retry=retry))
87+
88+
agent = Agent(
89+
name="Assistant",
90+
instructions="You are a concise assistant. Answer in 3 short bullet points at most.",
91+
# Prefix with litellm/ to route this request through the LiteLLM adapter.
92+
model="litellm/openai/gpt-4o-mini",
93+
# This Agent repeats the same retry config for clarity. In real code you
94+
# can keep shared defaults in RunConfig and only put per-agent overrides
95+
# here when you need different retry behavior.
96+
model_settings=ModelSettings(retry=retry),
97+
)
98+
99+
print(
100+
"Retry support is configured. You will only see [retry] logs if a transient failure happens."
101+
)
102+
103+
result = await Runner.run(
104+
agent,
105+
"Explain exponential backoff for API retries in plain English.",
106+
run_config=run_config,
107+
)
108+
109+
print("\nFinal output:\n")
110+
print(result.final_output)
111+
112+
113+
if __name__ == "__main__":
114+
asyncio.run(main())

src/agents/__init__.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,17 @@
8787
from .repl import run_demo_loop
8888
from .responses_websocket_session import ResponsesWebSocketSession, responses_websocket_session
8989
from .result import AgentToolInvocation, RunResult, RunResultStreaming
90+
from .retry import (
91+
ModelRetryAdvice,
92+
ModelRetryAdviceRequest,
93+
ModelRetryBackoffSettings,
94+
ModelRetryNormalizedError,
95+
ModelRetrySettings,
96+
RetryDecision,
97+
RetryPolicy,
98+
RetryPolicyContext,
99+
retry_policies,
100+
)
90101
from .run import (
91102
ReasoningItemIdPolicy,
92103
RunConfig,
@@ -284,6 +295,15 @@ def enable_verbose_stdout_logging():
284295
"ModelProvider",
285296
"ModelTracing",
286297
"ModelSettings",
298+
"ModelRetryAdvice",
299+
"ModelRetryAdviceRequest",
300+
"ModelRetryBackoffSettings",
301+
"ModelRetryNormalizedError",
302+
"ModelRetrySettings",
303+
"RetryDecision",
304+
"RetryPolicy",
305+
"RetryPolicyContext",
306+
"retry_policies",
287307
"OpenAIChatCompletionsModel",
288308
"MultiProvider",
289309
"OpenAIProvider",

src/agents/extensions/models/litellm_model.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,15 @@
4141
from ...items import ModelResponse, TResponseInputItem, TResponseStreamEvent
4242
from ...logger import logger
4343
from ...model_settings import ModelSettings
44+
from ...models._openai_retry import get_openai_retry_advice
45+
from ...models._retry_runtime import should_disable_provider_managed_retries
4446
from ...models.chatcmpl_converter import Converter
4547
from ...models.chatcmpl_helpers import HEADERS, HEADERS_OVERRIDE, ChatCmplHelpers
4648
from ...models.chatcmpl_stream_handler import ChatCmplStreamHandler
4749
from ...models.fake_id import FAKE_RESPONSES_ID
4850
from ...models.interface import Model, ModelTracing
4951
from ...models.openai_responses import Converter as OpenAIResponsesConverter
52+
from ...retry import ModelRetryAdvice, ModelRetryAdviceRequest
5053
from ...tool import Tool
5154
from ...tracing import generation_span
5255
from ...tracing.span_data import GenerationSpanData
@@ -148,6 +151,11 @@ def __init__(
148151
self.base_url = base_url
149152
self.api_key = api_key
150153

154+
def get_retry_advice(self, request: ModelRetryAdviceRequest) -> ModelRetryAdvice | None:
155+
# LiteLLM exceptions mirror OpenAI-style status/header fields.
156+
# Reuse the same normalization to expose retry-after and explicit retry/no-retry hints.
157+
return get_openai_retry_advice(request)
158+
151159
async def get_response(
152160
self,
153161
system_instructions: str | None,
@@ -479,7 +487,7 @@ async def _fetch_response(
479487
if stream and model_settings.include_usage is not None:
480488
stream_options = {"include_usage": model_settings.include_usage}
481489

482-
extra_kwargs = {}
490+
extra_kwargs: dict[str, Any] = {}
483491
if model_settings.extra_query:
484492
extra_kwargs["extra_query"] = copy(model_settings.extra_query)
485493
if model_settings.metadata:
@@ -491,6 +499,12 @@ async def _fetch_response(
491499
if model_settings.extra_args:
492500
extra_kwargs.update(model_settings.extra_args)
493501

502+
if should_disable_provider_managed_retries():
503+
# Preserve provider-managed retries on the first attempt, but make runner retries the
504+
# sole retry layer by forcing LiteLLM's retry knobs off on replay attempts.
505+
extra_kwargs["num_retries"] = 0
506+
extra_kwargs["max_retries"] = 0
507+
494508
# Prevent duplicate reasoning_effort kwargs when it was promoted to a top-level argument.
495509
extra_kwargs.pop("reasoning_effort", None)
496510

0 commit comments

Comments
 (0)