fix: enforce bridge tool choice semantics

OdinBot33 · OdinBot33 · commit 8d0c1a3b2137 · 2026-04-19T15:24:44.000-05:00
- reject non-object tool arguments instead of coercing them to {}\n- skip structured tool planning when tool_choice is none\n- validate required/specific tool_choice outputs\n- tighten prompt instructions and add regression tests
diff --git a/examples/subscription_bridge/server.py b/examples/subscription_bridge/server.py
@@ -136,6 +136,58 @@ def _describe_tool_choice(tool_choice: Any) -> str:
     return "auto"
 
 
+def _required_tool_choice_name(tool_choice: Any) -> str | None:
+    if not isinstance(tool_choice, dict):
+        return None
+    if tool_choice.get("type") == "function":
+        function = tool_choice.get("function")
+        if isinstance(function, dict):
+            name = function.get("name")
+            if isinstance(name, str) and name.strip():
+                return name.strip()
+    name = tool_choice.get("name")
+    if isinstance(name, str) and name.strip():
+        return name.strip()
+    return None
+
+
+def _tool_choice_requires_tool_calls(tool_choice: Any) -> bool:
+    return tool_choice == "required" or _required_tool_choice_name(tool_choice) is not None
+
+
+def _tool_choice_allows_structured_tool_calls(tool_choice: Any) -> bool:
+    return tool_choice != "none"
+
+
+def _validate_tool_choice_decision(decision: dict[str, Any], payload: dict[str, Any]) -> None:
+    tool_choice = payload.get("tool_choice")
+    required_tool_name = _required_tool_choice_name(tool_choice)
+
+    if tool_choice == "none":
+        if decision.get("type") == "tool_calls":
+            raise RuntimeError("tool_choice='none' forbids tool calls")
+        return
+
+    if required_tool_name is not None:
+        if decision.get("type") != "tool_calls":
+            raise RuntimeError(
+                f"required tool choice {required_tool_name!r} requires a tool call"
+            )
+        invalid_names = [
+            tool_call.get("name")
+            for tool_call in decision.get("tool_calls", [])
+            if tool_call.get("name") != required_tool_name
+        ]
+        if invalid_names:
+            raise RuntimeError(
+                f"backend violated required tool choice {required_tool_name!r}"
+            )
+        return
+
+    if _tool_choice_requires_tool_calls(tool_choice) and decision.get("type") != "tool_calls":
+        raise RuntimeError("tool_choice='required' requires a tool call")
+
+
 def _chat_message_blocks(messages: Any) -> list[str]:
     if not isinstance(messages, list) or not messages:
         raise ValueError("chat.completions payload must include non-empty messages")
@@ -239,7 +291,9 @@ def build_responses_prompt(payload: dict[str, Any]) -> str:
 
 
 def _build_structured_decision_prompt(base_prompt: str, payload: dict[str, Any]) -> str:
-    tool_choice = _describe_tool_choice(payload.get("tool_choice"))
+    raw_tool_choice = payload.get("tool_choice")
+    tool_choice = _describe_tool_choice(raw_tool_choice)
+    required_tool_name = _required_tool_choice_name(raw_tool_choice)
     parallel_tool_calls = bool(payload.get("parallel_tool_calls"))
     instructions = [
         "Return JSON only.",
@@ -256,6 +310,11 @@ def _build_structured_decision_prompt(base_prompt: str, payload: dict[str, Any])
         "When you emit tool_calls, arguments_json must be a valid JSON string encoding an object that matches the tool schema.",
         "Do not invent tools.",
     ]
+    if raw_tool_choice == "required":
+        instructions.append("You must return at least one tool call.")
+    if required_tool_name is not None:
+        instructions.append("You must return at least one tool call.")
+        instructions.append(f"Every tool call name must be exactly {required_tool_name}.")
     return f"{base_prompt}\n\nDecision rules:\n- " + "\n- ".join(instructions)
 
 
@@ -285,8 +344,10 @@ def _coerce_tool_calls(tool_calls: list[dict[str, Any]]) -> list[dict[str, Any]]
                 arguments = json.loads(arguments)
             except json.JSONDecodeError:
                 arguments = {"value": arguments}
-        if not isinstance(arguments, dict):
+        if arguments is None:
             arguments = {}
+        elif not isinstance(arguments, dict):
+            raise ValueError("tool call arguments must decode to a JSON object")
         normalized.append(
             {
                 "call_id": tool_call.get("call_id") or f"call_{uuid.uuid4().hex}",
@@ -625,7 +686,9 @@ def _respond_for_chat_request(
     payload: dict[str, Any], *, backend: str, model: str, workdir: Path, request_id: str
 ) -> dict[str, Any]:
     prompt = build_chat_prompt(payload)
-    if _normalize_tools(payload.get("tools")):
+    if _normalize_tools(payload.get("tools")) and _tool_choice_allows_structured_tool_calls(
+        payload.get("tool_choice")
+    ):
         try:
             decision = run_backend_structured(
                 backend=backend,
@@ -634,6 +697,7 @@ def _respond_for_chat_request(
                 workdir=workdir,
                 schema=DecisionSchema,
             )
+            _validate_tool_choice_decision(decision, payload)
             if decision.get("type") == "tool_calls":
                 return build_chat_completion_response(
                     model=model,
@@ -656,7 +720,9 @@ def _respond_for_responses_request(
     payload: dict[str, Any], *, backend: str, model: str, workdir: Path, request_id: str
 ) -> dict[str, Any]:
     prompt = build_responses_prompt(payload)
-    if _normalize_tools(payload.get("tools")):
+    if _normalize_tools(payload.get("tools")) and _tool_choice_allows_structured_tool_calls(
+        payload.get("tool_choice")
+    ):
         try:
             decision = run_backend_structured(
                 backend=backend,
@@ -665,6 +731,7 @@ def _respond_for_responses_request(
                 workdir=workdir,
                 schema=DecisionSchema,
             )
+            _validate_tool_choice_decision(decision, payload)
             if decision.get("type") == "tool_calls":
                 return build_responses_api_response(
                     model=model,
diff --git a/tests/examples/test_subscription_bridge.py b/tests/examples/test_subscription_bridge.py
@@ -230,6 +230,119 @@ def test_build_chat_completion_response_can_emit_tool_calls() -> None:
     assert json.loads(tool_call["function"]["arguments"]) == {"city": "Tokyo"}
 
 
+def test_build_chat_completion_response_rejects_non_object_tool_arguments() -> None:
+    with pytest.raises(ValueError, match="must decode to a JSON object"):
+        server.build_chat_completion_response(
+            model="codex/gpt-5.4",
+            request_id="req_bad",
+            tool_calls=[{"name": "get_weather", "arguments_json": '[]'}],
+        )
+
+
+def test_respond_for_chat_request_skips_structured_tool_mode_when_tool_choice_is_none(
+    monkeypatch: pytest.MonkeyPatch, tmp_path: Path
+) -> None:
+    called: dict[str, bool] = {"structured": False, "plain": False}
+
+    def fake_run_backend_structured(**_: Any) -> dict[str, Any]:
+        called["structured"] = True
+        raise AssertionError("structured backend should not run when tool_choice is none")
+
+    def fake_run_backend(*, backend: str, prompt: str, model: str | None, workdir: Path) -> str:
+        called["plain"] = True
+        return "No tool call emitted."
+
+    monkeypatch.setattr(server, "run_backend_structured", fake_run_backend_structured)
+    monkeypatch.setattr(server, "run_backend", fake_run_backend)
+
+    response = server._respond_for_chat_request(
+        {
+            "messages": [{"role": "user", "content": "Just answer directly."}],
+            "tools": [
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "get_weather",
+                        "description": "Get the weather for a city.",
+                        "parameters": {"type": "object", "properties": {"city": {"type": "string"}}},
+                    },
+                }
+            ],
+            "tool_choice": "none",
+        },
+        backend="codex",
+        model="codex/gpt-5.4",
+        workdir=tmp_path,
+        request_id="req_none",
+    )
+
+    assert called == {"structured": False, "plain": True}
+    assert response["choices"][0]["finish_reason"] == "stop"
+    assert response["choices"][0]["message"]["content"] == "No tool call emitted."
+
+
+def test_respond_for_chat_request_rejects_tool_calls_outside_required_tool_choice(
+    monkeypatch: pytest.MonkeyPatch, tmp_path: Path
+) -> None:
+    def fake_run_backend_structured(**_: Any) -> dict[str, Any]:
+        return {"type": "tool_calls", "tool_calls": [{"name": "other_tool", "arguments": {}}]}
+
+    monkeypatch.setattr(server, "run_backend_structured", fake_run_backend_structured)
+
+    with pytest.raises(RuntimeError, match="required tool choice"):
+        server._respond_for_chat_request(
+            {
+                "messages": [{"role": "user", "content": "Use the weather tool."}],
+                "tools": [
+                    {
+                        "type": "function",
+                        "function": {
+                            "name": "get_weather",
+                            "description": "Get the weather for a city.",
+                            "parameters": {"type": "object", "properties": {"city": {"type": "string"}}},
+                        },
+                    }
+                ],
+                "tool_choice": {"type": "function", "function": {"name": "get_weather"}},
+            },
+            backend="codex",
+            model="codex/gpt-5.4",
+            workdir=tmp_path,
+            request_id="req_specific",
+        )
+
+
+def test_respond_for_chat_request_requires_tool_calls_when_tool_choice_is_required(
+    monkeypatch: pytest.MonkeyPatch, tmp_path: Path
+) -> None:
+    def fake_run_backend_structured(**_: Any) -> dict[str, Any]:
+        return {"type": "final", "content": "Here is a direct answer."}
+
+    monkeypatch.setattr(server, "run_backend_structured", fake_run_backend_structured)
+
+    with pytest.raises(RuntimeError, match="requires a tool call"):
+        server._respond_for_chat_request(
+            {
+                "messages": [{"role": "user", "content": "Use a tool."}],
+                "tools": [
+                    {
+                        "type": "function",
+                        "function": {
+                            "name": "get_weather",
+                            "description": "Get the weather for a city.",
+                            "parameters": {"type": "object", "properties": {"city": {"type": "string"}}},
+                        },
+                    }
+                ],
+                "tool_choice": "required",
+            },
+            backend="codex",
+            model="codex/gpt-5.4",
+            workdir=tmp_path,
+            request_id="req_required",
+        )
+
+
 def test_build_responses_api_response_can_emit_function_calls() -> None:
     response = server.build_responses_api_response(
         model="codex/gpt-5.4",
@@ -330,6 +443,27 @@ def test_structured_decision_prompt_requires_plain_text_final_content() -> None:
     assert "Do not wrap the final answer in JSON" in prompt
 
 
+def test_structured_decision_prompt_requires_tool_calls_when_tool_choice_is_required() -> None:
+    prompt = server._build_structured_decision_prompt(
+        "Conversation transcript:\n\n[user]\nUse a tool.",
+        {"tool_choice": "required", "parallel_tool_calls": False},
+    )
+
+    assert "You must return at least one tool call." in prompt
+
+
+def test_structured_decision_prompt_limits_specific_tool_choice() -> None:
+    prompt = server._build_structured_decision_prompt(
+        "Conversation transcript:\n\n[user]\nUse the weather tool.",
+        {
+            "tool_choice": {"type": "function", "function": {"name": "get_weather"}},
+            "parallel_tool_calls": False,
+        },
+    )
+
+    assert "Every tool call name must be exactly get_weather." in prompt
+
+
 def test_normalize_decision_payload_unwraps_nested_final_json_content() -> None:
     payload = {
         "type": "final",