Skip to content

Commit c2f6690

Browse files
authored
fix: #2776 keep private tool metadata out of persisted session items (#2781)
1 parent 9a96d9e commit c2f6690

7 files changed

Lines changed: 412 additions & 16 deletions

File tree

src/agents/memory/openai_responses_compaction_session.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from openai import AsyncOpenAI
77

88
from ..models._openai_shared import get_default_openai_client
9+
from ..run_internal.items import normalize_input_items_for_api
910
from .openai_conversations_session import OpenAIConversationsSession
1011
from .session import (
1112
OpenAIResponsesCompactionArgs,
@@ -270,11 +271,12 @@ def _clear_deferred_compaction(self) -> None:
270271
async def add_items(self, items: list[TResponseInputItem]) -> None:
271272
await self.underlying_session.add_items(items)
272273
if self._compaction_candidate_items is not None:
273-
new_candidates = select_compaction_candidate_items(items)
274+
new_items = _normalize_compaction_session_items(items)
275+
new_candidates = select_compaction_candidate_items(new_items)
274276
if new_candidates:
275277
self._compaction_candidate_items.extend(new_candidates)
276278
if self._session_items is not None:
277-
self._session_items.extend(items)
279+
self._session_items.extend(_normalize_compaction_session_items(items))
278280

279281
async def pop_item(self) -> TResponseInputItem | None:
280282
popped = await self.underlying_session.pop_item()
@@ -296,7 +298,7 @@ async def _ensure_compaction_candidates(
296298
if self._compaction_candidate_items is not None and self._session_items is not None:
297299
return (self._compaction_candidate_items[:], self._session_items[:])
298300

299-
history = await self.underlying_session.get_items()
301+
history = _normalize_compaction_session_items(await self.underlying_session.get_items())
300302
candidates = select_compaction_candidate_items(history)
301303
self._compaction_candidate_items = candidates
302304
self._session_items = history
@@ -336,6 +338,13 @@ def _strip_orphaned_assistant_ids(
336338
return cleaned
337339

338340

341+
def _normalize_compaction_session_items(
342+
items: list[TResponseInputItem],
343+
) -> list[TResponseInputItem]:
344+
"""Normalize compaction input so SDK-only metadata never reaches responses.compact."""
345+
return normalize_input_items_for_api(list(items))
346+
347+
339348
_ResolvedCompactionMode = Literal["previous_response_id", "input"]
340349

341350

src/agents/run_internal/items.py

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
from ..tool import DEFAULT_APPROVAL_REJECTION_MESSAGE
1919

2020
REJECTION_MESSAGE = DEFAULT_APPROVAL_REJECTION_MESSAGE
21+
TOOL_CALL_SESSION_DESCRIPTION_KEY = "_agents_tool_description"
22+
TOOL_CALL_SESSION_TITLE_KEY = "_agents_tool_title"
2123
_TOOL_CALL_TO_OUTPUT_TYPE: dict[str, str] = {
2224
"function_call": "function_call_output",
2325
"shell_call": "shell_call_output",
@@ -30,6 +32,8 @@
3032
__all__ = [
3133
"ReasoningItemIdPolicy",
3234
"REJECTION_MESSAGE",
35+
"TOOL_CALL_SESSION_DESCRIPTION_KEY",
36+
"TOOL_CALL_SESSION_TITLE_KEY",
3337
"copy_input_items",
3438
"drop_orphan_function_calls",
3539
"ensure_input_item_format",
@@ -41,6 +45,7 @@
4145
"fingerprint_input_item",
4246
"deduplicate_input_items",
4347
"deduplicate_input_items_preferring_latest",
48+
"strip_internal_input_item_metadata",
4449
"function_rejection_item",
4550
"shell_rejection_item",
4651
"apply_patch_rejection_item",
@@ -148,8 +153,8 @@ def normalize_input_items_for_api(items: list[TResponseInputItem]) -> list[TResp
148153
normalized.append(item)
149154
continue
150155

151-
normalized_item = dict(coerced)
152-
normalized.append(cast(TResponseInputItem, normalized_item))
156+
normalized_item = strip_internal_input_item_metadata(cast(TResponseInputItem, coerced))
157+
normalized.append(normalized_item)
153158
return normalized
154159

155160

@@ -188,12 +193,25 @@ def fingerprint_input_item(item: Any, *, ignore_ids_for_matching: bool = False)
188193
payload = _model_dump_without_warnings(item)
189194
if payload is None:
190195
return None
196+
if isinstance(payload, dict):
197+
payload = cast(
198+
dict[str, Any],
199+
strip_internal_input_item_metadata(cast(TResponseInputItem, payload)),
200+
)
191201
elif isinstance(item, dict):
192-
payload = dict(item)
202+
payload = cast(
203+
dict[str, Any],
204+
strip_internal_input_item_metadata(cast(TResponseInputItem, item)),
205+
)
193206
if ignore_ids_for_matching:
194207
payload.pop("id", None)
195208
else:
196209
payload = ensure_input_item_format(item)
210+
if isinstance(payload, dict):
211+
payload = cast(
212+
dict[str, Any],
213+
strip_internal_input_item_metadata(cast(TResponseInputItem, payload)),
214+
)
197215
if ignore_ids_for_matching and isinstance(payload, dict):
198216
payload.pop("id", None)
199217

@@ -231,6 +249,17 @@ def _dedupe_key(item: TResponseInputItem) -> str | None:
231249
return None
232250

233251

252+
def strip_internal_input_item_metadata(item: TResponseInputItem) -> TResponseInputItem:
253+
"""Remove SDK-only session metadata before sending items back to the model."""
254+
if not isinstance(item, dict):
255+
return item
256+
257+
cleaned = dict(item)
258+
cleaned.pop(TOOL_CALL_SESSION_DESCRIPTION_KEY, None)
259+
cleaned.pop(TOOL_CALL_SESSION_TITLE_KEY, None)
260+
return cast(TResponseInputItem, cleaned)
261+
262+
234263
def _should_omit_reasoning_item_ids(reasoning_item_id_policy: ReasoningItemIdPolicy | None) -> bool:
235264
return reasoning_item_id_policy == "omit"
236265

src/agents/run_internal/session_persistence.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
fingerprint_input_item,
3434
normalize_input_items_for_api,
3535
run_item_to_input_item,
36+
strip_internal_input_item_metadata,
3637
)
3738
from .oai_conversation import OpenAIServerConversationTracker
3839
from .run_steps import SingleStepResult
@@ -85,7 +86,9 @@ async def prepare_input_with_session(
8586
history = await session.get_items(limit=resolved_settings.limit)
8687
else:
8788
history = await session.get_items()
88-
converted_history = [ensure_input_item_format(item) for item in history]
89+
converted_history = [
90+
strip_internal_input_item_metadata(ensure_input_item_format(item)) for item in history
91+
]
8992

9093
new_input_list = [
9194
ensure_input_item_format(item) for item in ItemHelpers.input_to_new_input_list(input)
@@ -164,7 +167,8 @@ async def prepare_input_with_session(
164167
normalized = normalize_input_items_for_api(filtered)
165168
deduplicated = deduplicate_input_items_preferring_latest(normalized)
166169

167-
return deduplicated, [ensure_input_item_format(item) for item in appended_items]
170+
appended_as_inputs = [ensure_input_item_format(item) for item in appended_items]
171+
return deduplicated, normalize_input_items_for_api(appended_as_inputs)
168172

169173

170174
async def persist_session_items_for_guardrail_trip(
@@ -262,10 +266,12 @@ async def save_result_to_session(
262266

263267
input_list: list[TResponseInputItem] = []
264268
if original_input:
265-
input_list = [
266-
ensure_input_item_format(item)
267-
for item in ItemHelpers.input_to_new_input_list(original_input)
268-
]
269+
input_list = normalize_input_items_for_api(
270+
[
271+
ensure_input_item_format(item)
272+
for item in ItemHelpers.input_to_new_input_list(original_input)
273+
]
274+
)
269275

270276
resolved_reasoning_item_id_policy = (
271277
reasoning_item_id_policy
@@ -562,7 +568,7 @@ def _ignore_ids_for_matching(session: Session) -> bool:
562568
def _sanitize_openai_conversation_item(item: TResponseInputItem) -> TResponseInputItem:
563569
"""Remove provider-specific fields before fingerprinting or persistence."""
564570
if isinstance(item, dict):
565-
clean_item = dict(item)
571+
clean_item = cast(dict[str, Any], strip_internal_input_item_metadata(item))
566572
clean_item.pop("id", None)
567573
clean_item.pop("provider_data", None)
568574
return cast(TResponseInputItem, clean_item)
@@ -585,6 +591,11 @@ def _session_item_key(item: Any) -> str:
585591
payload = item
586592
else:
587593
payload = ensure_input_item_format(item)
594+
if isinstance(payload, dict):
595+
payload = cast(
596+
dict[str, Any],
597+
strip_internal_input_item_metadata(cast(TResponseInputItem, payload)),
598+
)
588599
return json.dumps(payload, sort_keys=True, default=str)
589600
except Exception:
590601
return repr(item)

tests/memory/test_openai_responses_compaction_session.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@
2020
is_openai_model_name,
2121
select_compaction_candidate_items,
2222
)
23+
from agents.run_internal.items import (
24+
TOOL_CALL_SESSION_DESCRIPTION_KEY,
25+
TOOL_CALL_SESSION_TITLE_KEY,
26+
)
2327
from tests.fake_model import FakeModel
2428
from tests.test_responses import get_function_tool, get_function_tool_call, get_text_message
2529
from tests.utils.simple_session import SimpleListSession
@@ -215,6 +219,104 @@ async def test_run_compaction_auto_without_response_id_uses_input(self) -> None:
215219
assert "previous_response_id" not in call_kwargs
216220
assert call_kwargs.get("input") == items
217221

222+
@pytest.mark.asyncio
223+
async def test_run_compaction_input_mode_strips_internal_tool_call_metadata(self) -> None:
224+
mock_session = self.create_mock_session()
225+
items: list[TResponseInputItem] = [
226+
cast(
227+
TResponseInputItem,
228+
{
229+
"type": "function_call",
230+
"call_id": "call_123",
231+
"name": "lookup_account",
232+
"arguments": "{}",
233+
TOOL_CALL_SESSION_DESCRIPTION_KEY: "Lookup customer records.",
234+
TOOL_CALL_SESSION_TITLE_KEY: "Lookup Account",
235+
},
236+
),
237+
cast(
238+
TResponseInputItem,
239+
{
240+
"type": "function_call_output",
241+
"call_id": "call_123",
242+
"output": "ok",
243+
},
244+
),
245+
]
246+
mock_session.get_items.return_value = items
247+
248+
mock_compact_response = MagicMock()
249+
mock_compact_response.output = []
250+
251+
mock_client = MagicMock()
252+
mock_client.responses.compact = AsyncMock(return_value=mock_compact_response)
253+
254+
session = OpenAIResponsesCompactionSession(
255+
session_id="test",
256+
underlying_session=mock_session,
257+
client=mock_client,
258+
compaction_mode="input",
259+
)
260+
261+
await session.run_compaction({"force": True})
262+
263+
call_kwargs = mock_client.responses.compact.call_args.kwargs
264+
compact_input = cast(list[dict[str, Any]], call_kwargs["input"])
265+
assert compact_input[0]["type"] == "function_call"
266+
assert TOOL_CALL_SESSION_DESCRIPTION_KEY not in compact_input[0]
267+
assert TOOL_CALL_SESSION_TITLE_KEY not in compact_input[0]
268+
269+
@pytest.mark.asyncio
270+
async def test_run_compaction_uses_sanitized_cached_items_after_add(self) -> None:
271+
mock_session = self.create_mock_session()
272+
mock_session.get_items.return_value = []
273+
274+
mock_compact_response = MagicMock()
275+
mock_compact_response.output = []
276+
277+
mock_client = MagicMock()
278+
mock_client.responses.compact = AsyncMock(return_value=mock_compact_response)
279+
280+
session = OpenAIResponsesCompactionSession(
281+
session_id="test",
282+
underlying_session=mock_session,
283+
client=mock_client,
284+
compaction_mode="input",
285+
)
286+
287+
await session._ensure_compaction_candidates()
288+
await session.add_items(
289+
[
290+
cast(
291+
TResponseInputItem,
292+
{
293+
"type": "function_call",
294+
"call_id": "call_cached",
295+
"name": "lookup_account",
296+
"arguments": "{}",
297+
TOOL_CALL_SESSION_DESCRIPTION_KEY: "Lookup customer records.",
298+
TOOL_CALL_SESSION_TITLE_KEY: "Lookup Account",
299+
},
300+
),
301+
cast(
302+
TResponseInputItem,
303+
{
304+
"type": "function_call_output",
305+
"call_id": "call_cached",
306+
"output": "ok",
307+
},
308+
),
309+
]
310+
)
311+
312+
await session.run_compaction({"force": True})
313+
314+
call_kwargs = mock_client.responses.compact.call_args.kwargs
315+
compact_input = cast(list[dict[str, Any]], call_kwargs["input"])
316+
assert compact_input[0]["type"] == "function_call"
317+
assert TOOL_CALL_SESSION_DESCRIPTION_KEY not in compact_input[0]
318+
assert TOOL_CALL_SESSION_TITLE_KEY not in compact_input[0]
319+
218320
@pytest.mark.asyncio
219321
async def test_run_compaction_auto_uses_input_when_store_false(self) -> None:
220322
mock_session = self.create_mock_session()

0 commit comments

Comments
 (0)