diff --git a/src/agents/models/chatcmpl_converter.py b/src/agents/models/chatcmpl_converter.py index 60fa10b6ad..71c25d66ed 100644 --- a/src/agents/models/chatcmpl_converter.py +++ b/src/agents/models/chatcmpl_converter.py @@ -147,23 +147,39 @@ def message_to_output_items( # Store thinking blocks for Anthropic compatibility if hasattr(message, "thinking_blocks") and message.thinking_blocks: - # Store thinking text in content and signature in encrypted_content - reasoning_item.content = [] - signatures: list[str] = [] - for block in message.thinking_blocks: - if isinstance(block, dict): - thinking_text = block.get("thinking", "") - if thinking_text: - reasoning_item.content.append( - Content(text=thinking_text, type="reasoning_text") - ) - # Store the signature if present - if signature := block.get("signature"): - signatures.append(signature) - - # Store the signatures in encrypted_content with newline delimiter - if signatures: - reasoning_item.encrypted_content = "\n".join(signatures) + # Normalise blocks before serialisation. The last-resort fallback + # in LitellmConverter can produce {"thinking": str(block)} dicts + # that are missing a "type" field. Dropping them entirely would + # lose thinking content and could still cause Bedrock to reject the + # next turn; instead, inject "type": "thinking" so the block is + # valid and replayable. Blocks that are not dicts or have neither + # "type" nor "thinking" are discarded as unrecoverable. + blocks_as_dicts = [] + for b in message.thinking_blocks: + if not isinstance(b, dict): + continue + if not b.get("type") and b.get("thinking"): + b = {**b, "type": "thinking"} + if b.get("type"): + blocks_as_dicts.append(b) + + # Serialise the full blocks as JSON so that both thinking and + # redacted_thinking blocks can be reconstructed verbatim on the + # next turn. Providers like Bedrock reject requests where + # thinking/redacted_thinking blocks are modified or dropped + # between turns; redacted_thinking blocks carry a "data" field + # instead of "thinking"/"signature" and were silently lost with + # the previous signature-only serialisation. + if blocks_as_dicts: + reasoning_item.encrypted_content = json.dumps(blocks_as_dicts) + + # Populate content with the visible thinking text so it can be + # used for display and summary purposes. + reasoning_item.content = [ + Content(text=block.get("thinking", ""), type="reasoning_text") + for block in blocks_as_dicts + if block.get("thinking") + ] items.append(reasoning_item) @@ -762,7 +778,7 @@ def ensure_assistant_message() -> ChatCompletionAssistantMessageParam: # 7) reasoning message => extract thinking blocks if present elif reasoning_item := cls.maybe_reasoning_message(item): # Reconstruct thinking blocks from content (text) and encrypted_content (signature) - content_items = reasoning_item.get("content", []) + content_items = reasoning_item.get("content") or [] encrypted_content = reasoning_item.get("encrypted_content") item_provider_data: dict[str, Any] = reasoning_item.get("provider_data", {}) # type: ignore[assignment] @@ -772,33 +788,53 @@ def ensure_assistant_message() -> ChatCompletionAssistantMessageParam: if ( model and ("claude" in model.lower() or "anthropic" in model.lower()) - and content_items and preserve_thinking_blocks + and (content_items or encrypted_content) # Items may not all originate from Claude, so we need to check for model match. # For backward compatibility, if provider_data is missing, we ignore the check. and (model == item_model or item_provider_data == {}) ): - signatures = encrypted_content.split("\n") if encrypted_content else [] - - # Reconstruct thinking blocks from content and signature - reconstructed_thinking_blocks = [] - for content_item in content_items: - if ( - isinstance(content_item, dict) - and content_item.get("type") == "reasoning_text" - ): - thinking_block = { - "type": "thinking", - "thinking": content_item.get("text", ""), - } - # Add signatures if available - if signatures: - thinking_block["signature"] = signatures.pop(0) - reconstructed_thinking_blocks.append(thinking_block) - - # Store thinking blocks as pending for the next assistant message - # This preserves the original behavior - pending_thinking_blocks = reconstructed_thinking_blocks + if encrypted_content: + # Try the JSON format first (current serialisation, preserves + # redacted_thinking verbatim). Fall back to the legacy + # "\n"-joined signatures format so existing in-flight sessions + # with the old encoding are not broken. + try: + decoded = json.loads(encrypted_content) + if not isinstance(decoded, list) or not all( + isinstance(b, dict) and b.get("type") for b in decoded + ): + raise ValueError( + "expected a list of block dicts with a 'type' field" + ) + pending_thinking_blocks = decoded + except (json.JSONDecodeError, TypeError, ValueError): + signatures = encrypted_content.split("\n") + + reconstructed_thinking_blocks = [] + for content_item in content_items: + if ( + isinstance(content_item, dict) + and content_item.get("type") == "reasoning_text" + ): + thinking_block: dict[str, str] = { + "type": "thinking", + "thinking": content_item.get("text", ""), + } + if signatures: + thinking_block["signature"] = signatures.pop(0) + reconstructed_thinking_blocks.append(thinking_block) + + pending_thinking_blocks = reconstructed_thinking_blocks + else: + # No encrypted_content: older persisted turns where signatures + # were absent. Reconstruct thinking blocks from content text + # only so multi-turn history is not silently dropped. + pending_thinking_blocks = [ + {"type": "thinking", "thinking": item.get("text", "")} + for item in content_items + if isinstance(item, dict) and item.get("type") == "reasoning_text" + ] if model is not None: replay_context = ReasoningContentReplayContext( diff --git a/tests/test_anthropic_thinking_blocks.py b/tests/test_anthropic_thinking_blocks.py index e55787730d..0ceea350fa 100644 --- a/tests/test_anthropic_thinking_blocks.py +++ b/tests/test_anthropic_thinking_blocks.py @@ -10,10 +10,12 @@ from __future__ import annotations +import json from typing import Any, cast from openai.types.chat import ChatCompletionMessageToolCall from openai.types.chat.chat_completion_message_tool_call import Function +from openai.types.responses import ResponseReasoningItem from agents.extensions.models.litellm_model import InternalChatCompletionMessage from agents.models.chatcmpl_converter import Converter @@ -160,12 +162,18 @@ def test_anthropic_thinking_blocks_with_tool_calls(): "Content should be reasoning_text type" ) - # Verify signature is stored in encrypted_content - assert hasattr(reasoning_item, "encrypted_content"), ( + # Verify full blocks are stored as JSON in encrypted_content so that both + # thinking and redacted_thinking blocks survive the round-trip verbatim. + assert isinstance(reasoning_item, ResponseReasoningItem) + assert reasoning_item.encrypted_content is not None, ( "Reasoning item should have encrypted_content" ) - assert reasoning_item.encrypted_content == "TestSignature123\nTestSignature456", ( - "Signature should be preserved" + stored_blocks = json.loads(reasoning_item.encrypted_content) + assert stored_blocks[0]["signature"] == "TestSignature123", ( + "Signature of first block should be preserved" + ) + assert stored_blocks[1]["signature"] == "TestSignature456", ( + "Signature of second block should be preserved" ) # Verify tool calls are present @@ -349,11 +357,13 @@ def test_anthropic_thinking_blocks_without_tool_calls(): "Thinking text should be preserved" ) - # Verify signature is stored in encrypted_content - assert hasattr(reasoning_item, "encrypted_content"), ( + # Verify full blocks are stored as JSON in encrypted_content. + assert isinstance(reasoning_item, ResponseReasoningItem) + assert reasoning_item.encrypted_content is not None, ( "Reasoning item should have encrypted_content" ) - assert reasoning_item.encrypted_content == "TestSignatureNoTools123", ( + stored_blocks = json.loads(reasoning_item.encrypted_content) + assert stored_blocks[0]["signature"] == "TestSignatureNoTools123", ( "Signature should be preserved" ) @@ -416,3 +426,157 @@ def test_anthropic_thinking_blocks_without_tool_calls(): assert ( second_content.get("text") == "The weather in Paris is sunny with a temperature of 22°C." ), "Text content should be preserved" + + +def test_redacted_thinking_blocks_preserved_across_turns(): + """ + Regression test for Bedrock redacted_thinking blocks being dropped. + + When Claude (via Bedrock) returns redacted_thinking blocks the previous + serialisation only stored thinking/signature pairs and silently discarded + any block whose type is "redacted_thinking" (they carry a "data" field + instead of "thinking"/"signature"). Bedrock then rejected the next turn + with: "thinking or redacted_thinking blocks in the latest assistant message + cannot be modified". + + The fix serialises the complete block list as JSON so every block type + survives the round-trip verbatim. + """ + redacted_data = "SGVsbG8gV29ybGQ=" # base64 stand-in for encrypted content + message = InternalChatCompletionMessage( + role="assistant", + content="I've investigated the cluster.", + reasoning_content="Thinking was redacted by the provider.", + thinking_blocks=[ + { + "type": "redacted_thinking", + "data": redacted_data, + } + ], + tool_calls=None, + ) + + # Step 1: model response → output items + output_items = Converter.message_to_output_items(message) + + reasoning_items = [i for i in output_items if getattr(i, "type", None) == "reasoning"] + assert len(reasoning_items) == 1 + + reasoning_item = cast(ResponseReasoningItem, reasoning_items[0]) + + # encrypted_content must be present (the block has no "thinking" text, so + # content will be empty — encrypted_content is the only carrier). + assert reasoning_item.encrypted_content is not None, ( + "encrypted_content must be set even for redacted_thinking blocks" + ) + stored_blocks = json.loads(reasoning_item.encrypted_content) + assert len(stored_blocks) == 1 + assert stored_blocks[0]["type"] == "redacted_thinking", "Block type must be preserved verbatim" + assert stored_blocks[0]["data"] == redacted_data, "Encrypted data must be preserved verbatim" + + # Step 2: output items → next-turn messages + items_as_dicts: list[dict[str, Any]] = [ + i.model_dump() if hasattr(i, "model_dump") else cast(dict[str, Any], i) + for i in output_items + ] + messages = Converter.items_to_messages( + items_as_dicts, # type: ignore[arg-type] + model="anthropic/claude-sonnet-4-5", + preserve_thinking_blocks=True, + ) + + assistant_messages = [m for m in messages if m.get("role") == "assistant"] + assert len(assistant_messages) == 1 + + content = assistant_messages[0].get("content") + assert isinstance(content, list) and len(content) >= 1, ( + "Assistant message must contain the redacted_thinking block" + ) + + redacted_block = content[0] + assert redacted_block.get("type") == "redacted_thinking", ( + f"Expected redacted_thinking block, got {redacted_block.get('type')}" + ) + assert redacted_block.get("data") == redacted_data, ( + "data field of redacted_thinking block must be preserved verbatim" + ) + + +def test_mixed_thinking_and_redacted_thinking_blocks_preserved(): + """ + When a response contains both thinking and redacted_thinking blocks, + all blocks must survive the round-trip in their original order and with + their original fields intact. + """ + message = InternalChatCompletionMessage( + role="assistant", + content="Done.", + reasoning_content="Mixed thinking blocks.", + thinking_blocks=[ + { + "type": "thinking", + "thinking": "First, let me check the pods.", + "signature": "SigAAA", + }, + { + "type": "redacted_thinking", + "data": "cmVkYWN0ZWQ=", + }, + { + "type": "thinking", + "thinking": "Now summarising findings.", + "signature": "SigBBB", + }, + ], + tool_calls=None, + ) + + output_items = Converter.message_to_output_items(message) + + reasoning_items = [i for i in output_items if getattr(i, "type", None) == "reasoning"] + assert len(reasoning_items) == 1 + + reasoning_item_mixed = cast(ResponseReasoningItem, reasoning_items[0]) + assert reasoning_item_mixed.encrypted_content is not None + stored_blocks = json.loads(reasoning_item_mixed.encrypted_content) + assert len(stored_blocks) == 3 + assert stored_blocks[0] == { + "type": "thinking", + "thinking": "First, let me check the pods.", + "signature": "SigAAA", + } + assert stored_blocks[1] == {"type": "redacted_thinking", "data": "cmVkYWN0ZWQ="} + assert stored_blocks[2] == { + "type": "thinking", + "thinking": "Now summarising findings.", + "signature": "SigBBB", + } + + items_as_dicts: list[dict[str, Any]] = [ + i.model_dump() if hasattr(i, "model_dump") else cast(dict[str, Any], i) + for i in output_items + ] + messages = Converter.items_to_messages( + items_as_dicts, # type: ignore[arg-type] + model="bedrock/anthropic.claude-sonnet-4-5", + preserve_thinking_blocks=True, + ) + + assistant_messages = [m for m in messages if m.get("role") == "assistant"] + assert len(assistant_messages) == 1 + + content = assistant_messages[0].get("content") + assert isinstance(content, list) + + # First three entries are the thinking blocks (in original order) + assert content[0] == { + "type": "thinking", + "thinking": "First, let me check the pods.", + "signature": "SigAAA", + } + assert content[1] == {"type": "redacted_thinking", "data": "cmVkYWN0ZWQ="} + assert content[2] == { + "type": "thinking", + "thinking": "Now summarising findings.", + "signature": "SigBBB", + }