fix: #1876 LiteLLM extra_body forwarding (#2900)

yu2001-s · web-flow · commit 4f3c8a5379c1 · 2026-04-16T06:34:22.000Z
diff --git a/src/agents/extensions/models/litellm_model.py b/src/agents/extensions/models/litellm_model.py
@@ -506,8 +506,14 @@ async def _fetch_response(
             extra_kwargs["extra_query"] = copy(model_settings.extra_query)
         if model_settings.metadata:
             extra_kwargs["metadata"] = copy(model_settings.metadata)
-        if model_settings.extra_body and isinstance(model_settings.extra_body, dict):
-            extra_kwargs.update(model_settings.extra_body)
+        if model_settings.extra_body is not None:
+            extra_body = copy(model_settings.extra_body)
+            if isinstance(extra_body, dict) and reasoning_effort is not None:
+                extra_body.pop("reasoning_effort", None)
+                if not extra_body:
+                    extra_body = None
+            if extra_body is not None:
+                extra_kwargs["extra_body"] = extra_body
 
         # Add kwargs from model_settings.extra_args, filtering out None values
         if model_settings.extra_args:
diff --git a/tests/models/test_litellm_extra_body.py b/tests/models/test_litellm_extra_body.py
@@ -13,10 +13,10 @@
 @pytest.mark.asyncio
 async def test_extra_body_is_forwarded(monkeypatch):
     """
-    Forward `extra_body` entries into litellm.acompletion kwargs.
+    Forward `extra_body` via LiteLLM's dedicated kwarg.
 
-    This ensures that user-provided parameters (e.g. cached_content)
-    arrive alongside default arguments.
+    This ensures that provider-specific request fields stay nested under `extra_body`
+    so LiteLLM can merge them into the upstream request body itself.
     """
     captured: dict[str, object] = {}
 
@@ -43,7 +43,9 @@ async def fake_acompletion(model, messages=None, **kwargs):
         previous_response_id=None,
     )
 
-    assert {"cached_content": "some_cache", "foo": 123}.items() <= captured.items()
+    assert captured["extra_body"] == {"cached_content": "some_cache", "foo": 123}
+    assert "cached_content" not in captured
+    assert "foo" not in captured
 
 
 @pytest.mark.allow_call_model_methods
@@ -79,7 +81,7 @@ async def fake_acompletion(model, messages=None, **kwargs):
     )
 
     assert captured["reasoning_effort"] == "none"
-    assert captured["cached_content"] == "some_cache"
+    assert captured["extra_body"] == {"cached_content": "some_cache"}
     assert settings.extra_body == {"reasoning_effort": "none", "cached_content": "some_cache"}
 
 
@@ -119,6 +121,7 @@ async def fake_acompletion(model, messages=None, **kwargs):
 
     # reasoning_effort is string when no summary is provided (backward compatible)
     assert captured["reasoning_effort"] == "low"
+    assert "extra_body" not in captured
     assert settings.extra_body == {"reasoning_effort": "high"}
 
 
@@ -157,9 +160,55 @@ async def fake_acompletion(model, messages=None, **kwargs):
 
     assert captured["reasoning_effort"] == "none"
     assert captured["custom_param"] == "custom"
+    assert "extra_body" not in captured
     assert settings.extra_args == {"reasoning_effort": "low", "custom_param": "custom"}
 
 
+@pytest.mark.allow_call_model_methods
+@pytest.mark.asyncio
+async def test_extra_body_metadata_stays_nested(monkeypatch):
+    """
+    Keep extra_body metadata nested even when top-level metadata is also set.
+
+    LiteLLM resolves top-level metadata and extra_body separately. Flattening the nested
+    metadata dict loses the caller's intended request shape for OpenAI-compatible proxies.
+    """
+    captured: dict[str, object] = {}
+
+    async def fake_acompletion(model, messages=None, **kwargs):
+        captured.update(kwargs)
+        msg = Message(role="assistant", content="ok")
+        choice = Choices(index=0, message=msg)
+        return ModelResponse(choices=[choice], usage=Usage(0, 0, 0))
+
+    monkeypatch.setattr(litellm, "acompletion", fake_acompletion)
+    settings = ModelSettings(
+        metadata={"sdk": "agents"},
+        extra_body={
+            "metadata": {"trace_user_id": "user-123", "generation_id": "gen-456"},
+            "cached_content": "some_cache",
+        },
+    )
+    model = LitellmModel(model="test-model")
+
+    await model.get_response(
+        system_instructions=None,
+        input=[],
+        model_settings=settings,
+        tools=[],
+        output_schema=None,
+        handoffs=[],
+        tracing=ModelTracing.DISABLED,
+        previous_response_id=None,
+    )
+
+    assert captured["metadata"] == {"sdk": "agents"}
+    assert captured["extra_body"] == {
+        "metadata": {"trace_user_id": "user-123", "generation_id": "gen-456"},
+        "cached_content": "some_cache",
+    }
+
+
 @pytest.mark.allow_call_model_methods
 @pytest.mark.asyncio
 @pytest.mark.parametrize(