Skip to content
Open
114 changes: 75 additions & 39 deletions src/agents/models/chatcmpl_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,23 +147,39 @@ def message_to_output_items(

# Store thinking blocks for Anthropic compatibility
if hasattr(message, "thinking_blocks") and message.thinking_blocks:
# Store thinking text in content and signature in encrypted_content
reasoning_item.content = []
signatures: list[str] = []
for block in message.thinking_blocks:
if isinstance(block, dict):
thinking_text = block.get("thinking", "")
if thinking_text:
reasoning_item.content.append(
Content(text=thinking_text, type="reasoning_text")
)
# Store the signature if present
if signature := block.get("signature"):
signatures.append(signature)

# Store the signatures in encrypted_content with newline delimiter
if signatures:
reasoning_item.encrypted_content = "\n".join(signatures)
# Normalise blocks before serialisation. The last-resort fallback
# in LitellmConverter can produce {"thinking": str(block)} dicts
# that are missing a "type" field. Dropping them entirely would
# lose thinking content and could still cause Bedrock to reject the
# next turn; instead, inject "type": "thinking" so the block is
# valid and replayable. Blocks that are not dicts or have neither
# "type" nor "thinking" are discarded as unrecoverable.
blocks_as_dicts = []
for b in message.thinking_blocks:
if not isinstance(b, dict):
continue
if not b.get("type") and b.get("thinking"):
b = {**b, "type": "thinking"}
if b.get("type"):
blocks_as_dicts.append(b)

# Serialise the full blocks as JSON so that both thinking and
# redacted_thinking blocks can be reconstructed verbatim on the
# next turn. Providers like Bedrock reject requests where
# thinking/redacted_thinking blocks are modified or dropped
# between turns; redacted_thinking blocks carry a "data" field
# instead of "thinking"/"signature" and were silently lost with
# the previous signature-only serialisation.
if blocks_as_dicts:
reasoning_item.encrypted_content = json.dumps(blocks_as_dicts)

# Populate content with the visible thinking text so it can be
# used for display and summary purposes.
reasoning_item.content = [
Content(text=block.get("thinking", ""), type="reasoning_text")
for block in blocks_as_dicts
if block.get("thinking")
]

items.append(reasoning_item)

Expand Down Expand Up @@ -772,33 +788,53 @@ def ensure_assistant_message() -> ChatCompletionAssistantMessageParam:
if (
model
and ("claude" in model.lower() or "anthropic" in model.lower())
and content_items
and preserve_thinking_blocks
and (content_items or encrypted_content)
# Items may not all originate from Claude, so we need to check for model match.
Comment thread
laiman1107 marked this conversation as resolved.
# For backward compatibility, if provider_data is missing, we ignore the check.
and (model == item_model or item_provider_data == {})
):
signatures = encrypted_content.split("\n") if encrypted_content else []

# Reconstruct thinking blocks from content and signature
reconstructed_thinking_blocks = []
for content_item in content_items:
if (
isinstance(content_item, dict)
and content_item.get("type") == "reasoning_text"
):
thinking_block = {
"type": "thinking",
"thinking": content_item.get("text", ""),
}
# Add signatures if available
if signatures:
thinking_block["signature"] = signatures.pop(0)
reconstructed_thinking_blocks.append(thinking_block)

# Store thinking blocks as pending for the next assistant message
# This preserves the original behavior
pending_thinking_blocks = reconstructed_thinking_blocks
if encrypted_content:
# Try the JSON format first (current serialisation, preserves
# redacted_thinking verbatim). Fall back to the legacy
# "\n"-joined signatures format so existing in-flight sessions
# with the old encoding are not broken.
try:
decoded = json.loads(encrypted_content)
if not isinstance(decoded, list) or not all(
isinstance(b, dict) and b.get("type") for b in decoded
):
Comment thread
laiman1107 marked this conversation as resolved.
raise ValueError(
"expected a list of block dicts with a 'type' field"
)
pending_thinking_blocks = decoded
except (json.JSONDecodeError, TypeError, ValueError):
signatures = encrypted_content.split("\n")

reconstructed_thinking_blocks = []
for content_item in content_items:
Comment thread
laiman1107 marked this conversation as resolved.
if (
isinstance(content_item, dict)
and content_item.get("type") == "reasoning_text"
):
thinking_block: dict[str, str] = {
"type": "thinking",
"thinking": content_item.get("text", ""),
}
if signatures:
thinking_block["signature"] = signatures.pop(0)
reconstructed_thinking_blocks.append(thinking_block)

pending_thinking_blocks = reconstructed_thinking_blocks
else:
# No encrypted_content: older persisted turns where signatures
# were absent. Reconstruct thinking blocks from content text
# only so multi-turn history is not silently dropped.
pending_thinking_blocks = [
{"type": "thinking", "thinking": item.get("text", "")}
for item in content_items
if isinstance(item, dict) and item.get("type") == "reasoning_text"
]

if model is not None:
replay_context = ReasoningContentReplayContext(
Expand Down
178 changes: 171 additions & 7 deletions tests/test_anthropic_thinking_blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,12 @@

from __future__ import annotations

import json
from typing import Any, cast

from openai.types.chat import ChatCompletionMessageToolCall
from openai.types.chat.chat_completion_message_tool_call import Function
from openai.types.responses import ResponseReasoningItem

from agents.extensions.models.litellm_model import InternalChatCompletionMessage
from agents.models.chatcmpl_converter import Converter
Expand Down Expand Up @@ -160,12 +162,18 @@ def test_anthropic_thinking_blocks_with_tool_calls():
"Content should be reasoning_text type"
)

# Verify signature is stored in encrypted_content
assert hasattr(reasoning_item, "encrypted_content"), (
# Verify full blocks are stored as JSON in encrypted_content so that both
# thinking and redacted_thinking blocks survive the round-trip verbatim.
assert isinstance(reasoning_item, ResponseReasoningItem)
assert reasoning_item.encrypted_content is not None, (
"Reasoning item should have encrypted_content"
)
assert reasoning_item.encrypted_content == "TestSignature123\nTestSignature456", (
"Signature should be preserved"
stored_blocks = json.loads(reasoning_item.encrypted_content)
assert stored_blocks[0]["signature"] == "TestSignature123", (
"Signature of first block should be preserved"
)
assert stored_blocks[1]["signature"] == "TestSignature456", (
"Signature of second block should be preserved"
)

# Verify tool calls are present
Expand Down Expand Up @@ -349,11 +357,13 @@ def test_anthropic_thinking_blocks_without_tool_calls():
"Thinking text should be preserved"
)

# Verify signature is stored in encrypted_content
assert hasattr(reasoning_item, "encrypted_content"), (
# Verify full blocks are stored as JSON in encrypted_content.
assert isinstance(reasoning_item, ResponseReasoningItem)
assert reasoning_item.encrypted_content is not None, (
"Reasoning item should have encrypted_content"
)
assert reasoning_item.encrypted_content == "TestSignatureNoTools123", (
stored_blocks = json.loads(reasoning_item.encrypted_content)
assert stored_blocks[0]["signature"] == "TestSignatureNoTools123", (
"Signature should be preserved"
)

Expand Down Expand Up @@ -416,3 +426,157 @@ def test_anthropic_thinking_blocks_without_tool_calls():
assert (
second_content.get("text") == "The weather in Paris is sunny with a temperature of 22°C."
), "Text content should be preserved"


def test_redacted_thinking_blocks_preserved_across_turns():
"""
Regression test for Bedrock redacted_thinking blocks being dropped.

When Claude (via Bedrock) returns redacted_thinking blocks the previous
serialisation only stored thinking/signature pairs and silently discarded
any block whose type is "redacted_thinking" (they carry a "data" field
instead of "thinking"/"signature"). Bedrock then rejected the next turn
with: "thinking or redacted_thinking blocks in the latest assistant message
cannot be modified".

The fix serialises the complete block list as JSON so every block type
survives the round-trip verbatim.
"""
redacted_data = "SGVsbG8gV29ybGQ=" # base64 stand-in for encrypted content
message = InternalChatCompletionMessage(
role="assistant",
content="I've investigated the cluster.",
reasoning_content="Thinking was redacted by the provider.",
thinking_blocks=[
{
"type": "redacted_thinking",
"data": redacted_data,
}
],
tool_calls=None,
)

# Step 1: model response → output items
output_items = Converter.message_to_output_items(message)

reasoning_items = [i for i in output_items if getattr(i, "type", None) == "reasoning"]
assert len(reasoning_items) == 1

reasoning_item = cast(ResponseReasoningItem, reasoning_items[0])

# encrypted_content must be present (the block has no "thinking" text, so
# content will be empty — encrypted_content is the only carrier).
assert reasoning_item.encrypted_content is not None, (
"encrypted_content must be set even for redacted_thinking blocks"
)
stored_blocks = json.loads(reasoning_item.encrypted_content)
assert len(stored_blocks) == 1
assert stored_blocks[0]["type"] == "redacted_thinking", "Block type must be preserved verbatim"
assert stored_blocks[0]["data"] == redacted_data, "Encrypted data must be preserved verbatim"

# Step 2: output items → next-turn messages
items_as_dicts: list[dict[str, Any]] = [
i.model_dump() if hasattr(i, "model_dump") else cast(dict[str, Any], i)
for i in output_items
]
messages = Converter.items_to_messages(
items_as_dicts, # type: ignore[arg-type]
model="anthropic/claude-sonnet-4-5",
preserve_thinking_blocks=True,
)

assistant_messages = [m for m in messages if m.get("role") == "assistant"]
assert len(assistant_messages) == 1

content = assistant_messages[0].get("content")
assert isinstance(content, list) and len(content) >= 1, (
"Assistant message must contain the redacted_thinking block"
)

redacted_block = content[0]
assert redacted_block.get("type") == "redacted_thinking", (
f"Expected redacted_thinking block, got {redacted_block.get('type')}"
)
assert redacted_block.get("data") == redacted_data, (
"data field of redacted_thinking block must be preserved verbatim"
)


def test_mixed_thinking_and_redacted_thinking_blocks_preserved():
"""
When a response contains both thinking and redacted_thinking blocks,
all blocks must survive the round-trip in their original order and with
their original fields intact.
"""
message = InternalChatCompletionMessage(
role="assistant",
content="Done.",
reasoning_content="Mixed thinking blocks.",
thinking_blocks=[
{
"type": "thinking",
"thinking": "First, let me check the pods.",
"signature": "SigAAA",
},
{
"type": "redacted_thinking",
"data": "cmVkYWN0ZWQ=",
},
{
"type": "thinking",
"thinking": "Now summarising findings.",
"signature": "SigBBB",
},
],
tool_calls=None,
)

output_items = Converter.message_to_output_items(message)

reasoning_items = [i for i in output_items if getattr(i, "type", None) == "reasoning"]
assert len(reasoning_items) == 1

reasoning_item_mixed = cast(ResponseReasoningItem, reasoning_items[0])
assert reasoning_item_mixed.encrypted_content is not None
stored_blocks = json.loads(reasoning_item_mixed.encrypted_content)
assert len(stored_blocks) == 3
assert stored_blocks[0] == {
"type": "thinking",
"thinking": "First, let me check the pods.",
"signature": "SigAAA",
}
assert stored_blocks[1] == {"type": "redacted_thinking", "data": "cmVkYWN0ZWQ="}
assert stored_blocks[2] == {
"type": "thinking",
"thinking": "Now summarising findings.",
"signature": "SigBBB",
}

items_as_dicts: list[dict[str, Any]] = [
i.model_dump() if hasattr(i, "model_dump") else cast(dict[str, Any], i)
for i in output_items
]
messages = Converter.items_to_messages(
items_as_dicts, # type: ignore[arg-type]
model="bedrock/anthropic.claude-sonnet-4-5",
preserve_thinking_blocks=True,
)

assistant_messages = [m for m in messages if m.get("role") == "assistant"]
assert len(assistant_messages) == 1

content = assistant_messages[0].get("content")
assert isinstance(content, list)

# First three entries are the thinking blocks (in original order)
assert content[0] == {
"type": "thinking",
"thinking": "First, let me check the pods.",
"signature": "SigAAA",
}
assert content[1] == {"type": "redacted_thinking", "data": "cmVkYWN0ZWQ="}
assert content[2] == {
"type": "thinking",
"thinking": "Now summarising findings.",
"signature": "SigBBB",
}