From f9459ac120d2622007c34348207d4436c6f57e6b Mon Sep 17 00:00:00 2001
From: Tom Beckenham <34339192+tombeckenham@users.noreply.github.com>
Date: Fri, 26 Jun 2026 18:14:59 +1000
Subject: [PATCH 1/2] fix(anthropic): default max_tokens to the model's output
 ceiling (#849)

Anthropic's Messages API requires `max_tokens`, so the text adapter must
always send a value. It previously hard-coded `?? 1024` when the caller
didn't pass one, silently truncating any non-trivial generation mid-stream
with `stop_reason: "max_tokens"`.

Now default to the resolved model's real `max_output_tokens` from model-meta
(e.g. 64K Sonnet, 128K Opus), falling back to 64K for unrecognized ids.
`max_tokens` is a ceiling, not a reservation, so this costs nothing extra.
Also log a warning when a response is truncated while using the defaulted
cap, so it isn't silently read as the model "doing nothing"; callers that set
`max_tokens` explicitly are unaffected.

The new id -> max_output_tokens map is kept in lockstep with ANTHROPIC_MODELS
by `scripts/sync-provider-models.ts`, so a freshly-synced model resolves to
its real ceiling rather than the fallback.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .changeset/anthropic-max-tokens-default.md    | 15 ++++
 docs/adapters/anthropic.md                    |  4 +
 docs/config.json                              |  3 +-
 packages/ai-anthropic/src/adapters/text.ts    | 30 ++++++-
 packages/ai-anthropic/src/model-meta.ts       | 55 ++++++++++++
 .../tests/anthropic-adapter.test.ts           | 87 ++++++++++++++++++-
 .../ai-anthropic/tests/model-meta.test.ts     | 29 ++++++-
 .../ai-core/adapter-configuration/SKILL.md    | 10 +++
 scripts/sync-provider-models.ts               | 58 +++++++++++++
 9 files changed, 285 insertions(+), 6 deletions(-)
 create mode 100644 .changeset/anthropic-max-tokens-default.md

diff --git a/.changeset/anthropic-max-tokens-default.md b/.changeset/anthropic-max-tokens-default.md
new file mode 100644
index 000000000..11017103d
--- /dev/null
+++ b/.changeset/anthropic-max-tokens-default.md
@@ -0,0 +1,15 @@
+---
+'@tanstack/ai-anthropic': patch
+---
+
+Default Anthropic `max_tokens` to the selected model's real output ceiling
+(`max_output_tokens` from model metadata — e.g. 64K for Sonnet, 128K for Opus)
+when the caller doesn't pass one, instead of a hard-coded `1024` that silently
+truncated long responses with `stop_reason: "max_tokens"` (#849). Unknown
+models fall back to a safe constant. `max_tokens` is a ceiling, not a
+reservation, so this costs nothing unless the model genuinely produces more.
+
+The adapter also now logs a warning when a response is truncated while using the
+defaulted (caller-unspecified) cap, so the truncation isn't silently attributed
+to the model "doing nothing". Callers that set `modelOptions.max_tokens`
+explicitly are unaffected.
diff --git a/docs/adapters/anthropic.md b/docs/adapters/anthropic.md
index a44da5cfd..8e5b6f85d 100644
--- a/docs/adapters/anthropic.md
+++ b/docs/adapters/anthropic.md
@@ -136,6 +136,10 @@ const stream = chat({
 
 > If you previously passed `temperature` / `topP` / `maxTokens` at the root of `chat()`, see [Moving Sampling Options into modelOptions](../migration/sampling-options-to-model-options).
 
+#### `max_tokens` default
+
+Anthropic's Messages API _requires_ `max_tokens` on every request, so the adapter always sends a value. When you don't set `modelOptions.max_tokens`, it defaults to the selected model's full output ceiling (`max_output_tokens` from the model metadata — e.g. 64K for Sonnet, 128K for Opus), falling back to a safe constant for unrecognized models. `max_tokens` is a ceiling, not a reservation — billing is on tokens actually generated — so this default costs nothing extra and avoids the silent mid-response truncation (`stop_reason: "max_tokens"`) that a low default would cause. Set `max_tokens` explicitly only when you want to _cap_ output below the model ceiling. If a response is truncated while using the default cap, the adapter logs a warning (visible with [debug logging](../advanced/debug-logging) enabled).
+
 ### Thinking (Extended Thinking)
 
 Enable extended thinking with a token budget. This allows Claude to show its reasoning process, which is streamed as `thinking` chunks:
diff --git a/docs/config.json b/docs/config.json
index 0e8982869..0074c6638 100644
--- a/docs/config.json
+++ b/docs/config.json
@@ -507,7 +507,8 @@
         {
           "label": "Anthropic",
           "to": "adapters/anthropic",
-          "addedAt": "2026-04-15"
+          "addedAt": "2026-04-15",
+          "updatedAt": "2026-06-26"
         },
         {
           "label": "Google Gemini",
diff --git a/packages/ai-anthropic/src/adapters/text.ts b/packages/ai-anthropic/src/adapters/text.ts
index bdffea41c..5af486fdc 100644
--- a/packages/ai-anthropic/src/adapters/text.ts
+++ b/packages/ai-anthropic/src/adapters/text.ts
@@ -10,7 +10,10 @@ import {
   generateId,
   getAnthropicApiKeyFromEnv,
 } from '../utils'
-import { ANTHROPIC_COMBINED_TOOLS_AND_SCHEMA_MODELS } from '../model-meta'
+import {
+  ANTHROPIC_COMBINED_TOOLS_AND_SCHEMA_MODELS,
+  getAnthropicDefaultMaxTokens,
+} from '../model-meta'
 import type {
   ANTHROPIC_MODELS,
   AnthropicChatModelProviderOptionsByName,
@@ -420,7 +423,14 @@ export class AnthropicTextAdapter<
       validProviderOptions.thinking?.type === 'enabled'
         ? validProviderOptions.thinking.budget_tokens
         : undefined
-    const defaultMaxTokens = modelOptions?.max_tokens ?? 1024
+    // Anthropic's Messages API *requires* `max_tokens`, so we must always send a
+    // value. When the caller doesn't specify one, default to the resolved
+    // model's real output ceiling (from model-meta) rather than a low constant
+    // that silently truncates long responses with `stop_reason: "max_tokens"`
+    // (issue #849). `max_tokens` is a ceiling, not a reservation — billing is on
+    // tokens actually generated, so a higher default costs nothing extra.
+    const defaultMaxTokens =
+      modelOptions?.max_tokens ?? getAnthropicDefaultMaxTokens(this.model)
     const maxTokens =
       thinkingBudget && thinkingBudget >= defaultMaxTokens
         ? thinkingBudget + 1
@@ -1181,6 +1191,22 @@ export class AnthropicTextAdapter<
                 break
               }
               case 'max_tokens': {
+                // Surface a warning when the truncating cap was the
+                // adapter-supplied default (caller didn't pass `max_tokens`), so
+                // the truncation isn't silently attributed to the model "doing
+                // nothing" (issue #849). When the caller set `max_tokens`
+                // themselves, hitting it is their own deliberate ceiling.
+                if (options.modelOptions?.max_tokens == null) {
+                  const defaultedMaxTokens = getAnthropicDefaultMaxTokens(model)
+                  logger.warn(
+                    `anthropic response truncated at the default max_tokens (${defaultedMaxTokens}) for model=${model}; pass maxTokens (or modelOptions.max_tokens) to raise the output ceiling`,
+                    {
+                      source: 'anthropic.processAnthropicStream',
+                      model,
+                      defaultedMaxTokens,
+                    },
+                  )
+                }
                 yield {
                   type: EventType.RUN_ERROR,
                   model,
diff --git a/packages/ai-anthropic/src/model-meta.ts b/packages/ai-anthropic/src/model-meta.ts
index e951d9edb..13785e9fa 100644
--- a/packages/ai-anthropic/src/model-meta.ts
+++ b/packages/ai-anthropic/src/model-meta.ts
@@ -814,6 +814,61 @@ export const ANTHROPIC_MODELS = [
   CLAUDE_SONNET_5.id,
 ] as const
 
+/**
+ * Fallback `max_tokens` ceiling for a model whose metadata carries no
+ * `max_output_tokens` (e.g. an unrecognized model id). Anthropic's Messages
+ * API *requires* `max_tokens`, so the adapter must always send a value. 64K is
+ * the output ceiling of the current mainstream Claude tier (Sonnet/Haiku 4.5),
+ * so it's a sane default for an unknown — almost certainly modern — model and
+ * avoids silently truncating long generations (issue #849). Recognized models
+ * use their exact `max_output_tokens` from {@link ANTHROPIC_MODEL_MAX_OUTPUT_TOKENS}
+ * (e.g. 128K for Opus), so this fallback only ever applies to ids not in the
+ * map.
+ */
+export const ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS = 64_000
+
+/**
+ * Runtime lookup of each model's maximum output-token ceiling, keyed by model
+ * id. Lets the text adapter default the required `max_tokens` request field to
+ * the model's real ceiling when the caller doesn't specify one, rather than a
+ * low constant that truncates responses mid-stream (issue #849).
+ *
+ * Kept in sync with {@link ANTHROPIC_MODELS} by `scripts/sync-provider-models.ts`
+ * — when that script adds a model it also inserts the model's `max_output_tokens`
+ * here, so a freshly-synced model resolves to its real ceiling rather than the
+ * fallback above.
+ */
+const ANTHROPIC_MODEL_MAX_OUTPUT_TOKENS: Record<string, number> = {
+  [CLAUDE_OPUS_4_6.id]: CLAUDE_OPUS_4_6.max_output_tokens,
+  [CLAUDE_OPUS_4_5.id]: CLAUDE_OPUS_4_5.max_output_tokens,
+  [CLAUDE_SONNET_4_6.id]: CLAUDE_SONNET_4_6.max_output_tokens,
+  [CLAUDE_SONNET_4_5.id]: CLAUDE_SONNET_4_5.max_output_tokens,
+  [CLAUDE_HAIKU_4_5.id]: CLAUDE_HAIKU_4_5.max_output_tokens,
+  [CLAUDE_OPUS_4_1.id]: CLAUDE_OPUS_4_1.max_output_tokens,
+  [CLAUDE_SONNET_4.id]: CLAUDE_SONNET_4.max_output_tokens,
+  [CLAUDE_SONNET_3_7.id]: CLAUDE_SONNET_3_7.max_output_tokens,
+  [CLAUDE_OPUS_4.id]: CLAUDE_OPUS_4.max_output_tokens,
+  [CLAUDE_HAIKU_3_5.id]: CLAUDE_HAIKU_3_5.max_output_tokens,
+  [CLAUDE_HAIKU_3.id]: CLAUDE_HAIKU_3.max_output_tokens,
+  [CLAUDE_OPUS_4_6_FAST.id]: CLAUDE_OPUS_4_6_FAST.max_output_tokens,
+  [CLAUDE_OPUS_4_7.id]: CLAUDE_OPUS_4_7.max_output_tokens,
+  [CLAUDE_OPUS_4_7_FAST.id]: CLAUDE_OPUS_4_7_FAST.max_output_tokens,
+  [CLAUDE_OPUS_4_8.id]: CLAUDE_OPUS_4_8.max_output_tokens,
+  [CLAUDE_OPUS_4_8_FAST.id]: CLAUDE_OPUS_4_8_FAST.max_output_tokens,
+}
+
+/**
+ * Resolve the default `max_tokens` for a model: its known `max_output_tokens`
+ * ceiling, or {@link ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS} for unknown models.
+ * Callers that pass an explicit `max_tokens` bypass this entirely.
+ */
+export function getAnthropicDefaultMaxTokens(model: string): number {
+  return (
+    ANTHROPIC_MODEL_MAX_OUTPUT_TOKENS[model] ??
+    ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS
+  )
+}
+
 /**
  * Anthropic models that support combining `tools` + JSON-Schema-constrained
  * output in a single streaming Messages request (per issue #605). GA'd
diff --git a/packages/ai-anthropic/tests/anthropic-adapter.test.ts b/packages/ai-anthropic/tests/anthropic-adapter.test.ts
index 611c38a33..3dda7c9d9 100644
--- a/packages/ai-anthropic/tests/anthropic-adapter.test.ts
+++ b/packages/ai-anthropic/tests/anthropic-adapter.test.ts
@@ -444,7 +444,7 @@ describe('Anthropic adapter option mapping', () => {
     expect(payload.top_p).toBe(0.7)
   })
 
-  it('defaults max_tokens to 1024 when not provided via modelOptions', async () => {
+  it("defaults max_tokens to the model's max_output_tokens when not provided via modelOptions (#849)", async () => {
     mocks.betaMessagesCreate.mockResolvedValueOnce(createTextStream('ok'))
 
     const adapter = createAdapter('claude-3-7-sonnet')
@@ -457,7 +457,90 @@ describe('Anthropic adapter option mapping', () => {
     }
 
     const [payload] = mocks.betaMessagesCreate.mock.calls[0]!
-    expect(payload.max_tokens).toBe(1024)
+    // claude-3-7-sonnet's model-meta max_output_tokens is 64_000 — not the old
+    // hard-coded 1024 floor that silently truncated long responses.
+    expect(payload.max_tokens).toBe(64_000)
+  })
+
+  it('warns when the default max_tokens cap truncates the response (#849)', async () => {
+    // Stream that ends with stop_reason: "max_tokens" — the model hit the cap.
+    const truncatedStream = (async function* () {
+      yield {
+        type: 'content_block_start',
+        index: 0,
+        content_block: { type: 'text', text: '' },
+      }
+      yield {
+        type: 'content_block_delta',
+        index: 0,
+        delta: { type: 'text_delta', text: 'partial output' },
+      }
+      yield { type: 'content_block_stop', index: 0 }
+      yield {
+        type: 'message_delta',
+        delta: { stop_reason: 'max_tokens' },
+        usage: { output_tokens: 64_000 },
+      }
+      yield { type: 'message_stop' }
+    })()
+    mocks.betaMessagesCreate.mockResolvedValueOnce(truncatedStream)
+
+    const adapter = createAdapter('claude-3-7-sonnet')
+
+    const logger = {
+      debug: vi.fn(),
+      info: vi.fn(),
+      warn: vi.fn(),
+      error: vi.fn(),
+    }
+
+    for await (const _ of chat({
+      adapter,
+      messages: [{ role: 'user', content: 'Write a long essay' }],
+      debug: { logger, errors: true },
+    })) {
+      // consume stream
+    }
+
+    const truncationWarning = logger.warn.mock.calls.find((call) =>
+      String(call[0]).includes('truncated at the default max_tokens'),
+    )
+    expect(truncationWarning).toBeDefined()
+  })
+
+  it('does not warn about truncation when the caller set max_tokens explicitly (#849)', async () => {
+    const truncatedStream = (async function* () {
+      yield {
+        type: 'message_delta',
+        delta: { stop_reason: 'max_tokens' },
+        usage: { output_tokens: 100 },
+      }
+      yield { type: 'message_stop' }
+    })()
+    mocks.betaMessagesCreate.mockResolvedValueOnce(truncatedStream)
+
+    const adapter = createAdapter('claude-3-7-sonnet')
+
+    const logger = {
+      debug: vi.fn(),
+      info: vi.fn(),
+      warn: vi.fn(),
+      error: vi.fn(),
+    }
+
+    for await (const _ of chat({
+      adapter,
+      messages: [{ role: 'user', content: 'Hi' }],
+      modelOptions: { max_tokens: 100 } satisfies AnthropicTextProviderOptions,
+      debug: { logger, errors: true },
+    })) {
+      // consume stream
+    }
+
+    const truncationWarning = logger.warn.mock.calls.find((call) =>
+      String(call[0]).includes('truncated at the default max_tokens'),
+    )
+    expect(truncationWarning).toBeUndefined()
   })
 
   it('native combined mode (#605): wires outputSchema into output_format alongside tools on Claude 4.5+', async () => {
diff --git a/packages/ai-anthropic/tests/model-meta.test.ts b/packages/ai-anthropic/tests/model-meta.test.ts
index 3a8c5bc44..50fc3b282 100644
--- a/packages/ai-anthropic/tests/model-meta.test.ts
+++ b/packages/ai-anthropic/tests/model-meta.test.ts
@@ -1,4 +1,8 @@
-import { describe, expectTypeOf, it } from 'vitest'
+import { describe, expect, expectTypeOf, it } from 'vitest'
+import {
+  ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS,
+  getAnthropicDefaultMaxTokens,
+} from '../src/model-meta'
 import type {
   AnthropicChatModelProviderOptionsByName,
   AnthropicModelInputModalitiesByName,
@@ -780,3 +784,26 @@ describe('Anthropic Model Input Modality Type Assertions', () => {
     })
   })
 })
+
+describe('getAnthropicDefaultMaxTokens (#849)', () => {
+  it("returns the model's max_output_tokens for known models", () => {
+    expect(getAnthropicDefaultMaxTokens('claude-opus-4.8')).toBe(128_000)
+    expect(getAnthropicDefaultMaxTokens('claude-opus-4-6')).toBe(128_000)
+    expect(getAnthropicDefaultMaxTokens('claude-sonnet-4-6')).toBe(64_000)
+    expect(getAnthropicDefaultMaxTokens('claude-3-7-sonnet')).toBe(64_000)
+    expect(getAnthropicDefaultMaxTokens('claude-3-haiku')).toBe(4_000)
+  })
+
+  it('falls back to the safe constant for unknown models', () => {
+    expect(ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS).toBe(64_000)
+    expect(getAnthropicDefaultMaxTokens('some-future-claude-model')).toBe(
+      ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS,
+    )
+  })
+
+  it('never returns the old hard-coded 1024 floor for a known model', () => {
+    expect(getAnthropicDefaultMaxTokens('claude-opus-4.8')).toBeGreaterThan(
+      1024,
+    )
+  })
+})
diff --git a/packages/ai/skills/ai-core/adapter-configuration/SKILL.md b/packages/ai/skills/ai-core/adapter-configuration/SKILL.md
index 2faa4ecd4..f6be3f7b4 100644
--- a/packages/ai/skills/ai-core/adapter-configuration/SKILL.md
+++ b/packages/ai/skills/ai-core/adapter-configuration/SKILL.md
@@ -297,6 +297,16 @@ Per-provider sampling keys (all live inside `modelOptions`):
 some sampling options use provider-native names. Ollama nests all sampling under
 `modelOptions.options`.
 
+> **Anthropic `max_tokens` default:** Anthropic's API _requires_ `max_tokens`,
+> so the adapter always sends one. When you omit `modelOptions.max_tokens`, it
+> defaults to the selected model's full output ceiling (its `max_output_tokens`
+> from model metadata — e.g. 64K for Sonnet, 128K for Opus), not a low constant.
+> `max_tokens` is a ceiling, not a reservation (billing is per token generated),
+> so leaving it unset is the right default for codegen / agentic / long-form
+> output and avoids silent `stop_reason: "max_tokens"` truncation. Set it only to
+> cap output below the model ceiling. Other providers treat token limits as
+> optional and don't apply this flooring.
+
 ### 6. Capability Flag: `supportsCombinedToolsAndSchema`
 
 Adapters can declare an optional capability method:
diff --git a/scripts/sync-provider-models.ts b/scripts/sync-provider-models.ts
index efeaf25a5..3dc8233b5 100644
--- a/scripts/sync-provider-models.ts
+++ b/scripts/sync-provider-models.ts
@@ -43,6 +43,13 @@ interface ProviderConfig {
   providerOptionsTypeName: string
   /** Name of the input modalities type map */
   inputModalitiesTypeName: string
+  /**
+   * Name of the runtime `Record<string, number>` mapping model id →
+   * `max_output_tokens`, if the provider maintains one. Anthropic uses this to
+   * default the required `max_tokens` request field to the model's real ceiling
+   * (issue #849); other providers treat token limits as optional and omit it.
+   */
+  maxOutputTokensMapName?: string
   /** The supports block template (minus input modalities, which come from OpenRouter) */
   referenceSupportsBody: string
   /** Valid input modality types for this provider's ModelMeta interface */
@@ -95,6 +102,7 @@ const PROVIDER_MAP: Record<string, ProviderConfig> = {
     chatArrayName: 'ANTHROPIC_MODELS',
     providerOptionsTypeName: 'AnthropicChatModelProviderOptionsByName',
     inputModalitiesTypeName: 'AnthropicModelInputModalitiesByName',
+    maxOutputTokensMapName: 'ANTHROPIC_MODEL_MAX_OUTPUT_TOKENS',
     validInputModalities: ['text', 'image', 'audio', 'video', 'document'],
     referenceSupportsBody: `    extended_thinking: true,
     priority_tier: true,
@@ -500,6 +508,34 @@ function addToTypeMap(
   return content.replace(pattern, () => `${match[1]}\n${newEntries}${match[2]}`)
 }
 
+/**
+ * Add entries to a runtime object literal like:
+ *   const MAP_NAME: Record<string, number> = {
+ *     ...existing entries...
+ *   }
+ * Used for the Anthropic id → max_output_tokens map (issue #849), which is a
+ * value declaration rather than a `type` alias.
+ */
+function addToObjectMap(
+  content: string,
+  mapName: string,
+  entries: Array<string>,
+): string {
+  // Match: const MAP_NAME: Record<string, number> = { ... \n}
+  const pattern = new RegExp(
+    `(const ${mapName}: Record<string, number> = \\{[\\s\\S]*?)(\\n\\})`,
+  )
+  const match = pattern.exec(content)
+  if (!match) {
+    console.warn(`  Warning: Could not find object map '${mapName}' in file`)
+    return content
+  }
+
+  const newEntries = entries.join('\n')
+  // Use replacer function to prevent $-character interpretation in replacement string
+  return content.replace(pattern, () => `${match[1]}\n${newEntries}${match[2]}`)
+}
+
 // ---------------------------------------------------------------------------
 // Git-based change detection
 // ---------------------------------------------------------------------------
@@ -697,6 +733,28 @@ async function main() {
       )
     }
 
+    // Add to the id → max_output_tokens runtime map (Anthropic only). Only
+    // models whose generated constant actually carries `max_output_tokens`
+    // (i.e. OpenRouter reported a `max_completion_tokens`) get an entry; the
+    // rest correctly fall through to the map's constant default. Keeps the map
+    // in lockstep with the chat-model array so a synced model resolves to its
+    // real ceiling instead of the fallback (issue #849).
+    if (config.maxOutputTokensMapName) {
+      const maxOutputEntries = chatModels
+        .filter(({ model }) => model.top_provider.max_completion_tokens)
+        .map(
+          ({ constName }) =>
+            `  [${constName}${config.arrayRef}]: ${constName}.max_output_tokens,`,
+        )
+      if (maxOutputEntries.length > 0) {
+        content = addToObjectMap(
+          content,
+          config.maxOutputTokensMapName,
+          maxOutputEntries,
+        )
+      }
+    }
+
     // Write the modified file
     await writeFile(config.metaFile, content, 'utf-8')
     console.log(`  Wrote updated file: ${config.metaFile}`)

From 92f5b767abd3f841527fbc44a0f602b3f3c60801 Mon Sep 17 00:00:00 2001
From: Tom Beckenham <34339192+tombeckenham@users.noreply.github.com>
Date: Fri, 26 Jun 2026 19:02:00 +1000
Subject: [PATCH 2/2] fix(anthropic): clamp non-streaming structured-output
 max_tokens default (#849)

The #849 default of the model's full output ceiling broke the non-streaming
`structuredOutput()` path: the Anthropic SDK refuses a non-streaming request
whose `max_tokens` could exceed its 10-minute timeout (~21,333 tokens), so
`chat({ outputSchema })` on any fallback-path model threw "Streaming is
required for operations that may take longer than 10 minutes".

`getAnthropicDefaultMaxTokens(model, { stream })` now clamps the default to
`ANTHROPIC_MAX_NONSTREAMING_TOKENS` when `stream: false`; the streaming chat
path keeps the model's full ceiling. An explicit oversized `max_tokens` still
surfaces the SDK's "use streaming" error.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .changeset/anthropic-max-tokens-default.md    |  8 ++++
 docs/adapters/anthropic.md                    |  2 +
 packages/ai-anthropic/src/adapters/text.ts    | 14 +++++-
 packages/ai-anthropic/src/model-meta.ts       | 32 +++++++++++--
 .../tests/anthropic-adapter.test.ts           | 46 +++++++++++++++++++
 .../ai-anthropic/tests/model-meta.test.ts     | 38 +++++++++++++++
 6 files changed, 135 insertions(+), 5 deletions(-)

diff --git a/.changeset/anthropic-max-tokens-default.md b/.changeset/anthropic-max-tokens-default.md
index 11017103d..57eef68dd 100644
--- a/.changeset/anthropic-max-tokens-default.md
+++ b/.changeset/anthropic-max-tokens-default.md
@@ -13,3 +13,11 @@ The adapter also now logs a warning when a response is truncated while using the
 defaulted (caller-unspecified) cap, so the truncation isn't silently attributed
 to the model "doing nothing". Callers that set `modelOptions.max_tokens`
 explicitly are unaffected.
+
+The non-streaming structured-output path (`structuredOutput()`) clamps this
+default to the Anthropic SDK's non-streaming-safe limit (~21K tokens). The SDK
+refuses a non-streaming request whose `max_tokens` could exceed its 10-minute
+timeout, so without the clamp the full-ceiling default would make every
+`chat({ outputSchema })` call on a fallback-path model throw "Streaming is
+required for operations that may take longer than 10 minutes". The streaming
+chat path keeps the model's full ceiling.
diff --git a/docs/adapters/anthropic.md b/docs/adapters/anthropic.md
index 8e5b6f85d..0a42a8c2c 100644
--- a/docs/adapters/anthropic.md
+++ b/docs/adapters/anthropic.md
@@ -140,6 +140,8 @@ const stream = chat({
 
 Anthropic's Messages API _requires_ `max_tokens` on every request, so the adapter always sends a value. When you don't set `modelOptions.max_tokens`, it defaults to the selected model's full output ceiling (`max_output_tokens` from the model metadata — e.g. 64K for Sonnet, 128K for Opus), falling back to a safe constant for unrecognized models. `max_tokens` is a ceiling, not a reservation — billing is on tokens actually generated — so this default costs nothing extra and avoids the silent mid-response truncation (`stop_reason: "max_tokens"`) that a low default would cause. Set `max_tokens` explicitly only when you want to _cap_ output below the model ceiling. If a response is truncated while using the default cap, the adapter logs a warning (visible with [debug logging](../advanced/debug-logging) enabled).
 
+One exception: structured output (`chat({ outputSchema })`) on models that use the non-streaming finalization path clamps this default to ~21K tokens. The Anthropic SDK rejects a non-streaming request whose `max_tokens` could exceed its 10-minute timeout, so the full ceiling can't be used there. Streaming chat is unaffected. To raise the structured-output ceiling toward a model's true max, stream the response.
+
 ### Thinking (Extended Thinking)
 
 Enable extended thinking with a token budget. This allows Claude to show its reasoning process, which is streamed as `thinking` chunks:
diff --git a/packages/ai-anthropic/src/adapters/text.ts b/packages/ai-anthropic/src/adapters/text.ts
index 5af486fdc..0c9c8bcdd 100644
--- a/packages/ai-anthropic/src/adapters/text.ts
+++ b/packages/ai-anthropic/src/adapters/text.ts
@@ -266,7 +266,12 @@ export class AnthropicTextAdapter<
     const { chatOptions, outputSchema } = options
     const { logger } = chatOptions
 
-    const requestParams = this.mapCommonOptionsToAnthropic(chatOptions)
+    // `structuredOutput()` issues a non-streaming `messages.create({ stream:
+    // false })` below, so the defaulted `max_tokens` must stay under the SDK's
+    // non-streaming 10-minute guard (issue #849) — pass `stream: false`.
+    const requestParams = this.mapCommonOptionsToAnthropic(chatOptions, {
+      stream: false,
+    })
 
     // Create a tool that will capture the structured output
     // Anthropic's SDK requires input_schema with type: 'object' literal
@@ -355,6 +360,7 @@ export class AnthropicTextAdapter<
 
   private mapCommonOptionsToAnthropic(
     options: TextOptions<AnthropicTextProviderOptions>,
+    { stream = true }: { stream?: boolean } = {},
   ) {
     const modelOptions = options.modelOptions
 
@@ -429,8 +435,12 @@ export class AnthropicTextAdapter<
     // that silently truncates long responses with `stop_reason: "max_tokens"`
     // (issue #849). `max_tokens` is a ceiling, not a reservation — billing is on
     // tokens actually generated, so a higher default costs nothing extra.
+    // For non-streaming requests (the `structuredOutput()` path) the default is
+    // clamped to the SDK's non-streaming-safe limit so it doesn't trip the
+    // "streaming required" 10-minute guard — see getAnthropicDefaultMaxTokens.
     const defaultMaxTokens =
-      modelOptions?.max_tokens ?? getAnthropicDefaultMaxTokens(this.model)
+      modelOptions?.max_tokens ??
+      getAnthropicDefaultMaxTokens(this.model, { stream })
     const maxTokens =
       thinkingBudget && thinkingBudget >= defaultMaxTokens
         ? thinkingBudget + 1
diff --git a/packages/ai-anthropic/src/model-meta.ts b/packages/ai-anthropic/src/model-meta.ts
index 13785e9fa..4cb9f3978 100644
--- a/packages/ai-anthropic/src/model-meta.ts
+++ b/packages/ai-anthropic/src/model-meta.ts
@@ -857,16 +857,42 @@ const ANTHROPIC_MODEL_MAX_OUTPUT_TOKENS: Record<string, number> = {
   [CLAUDE_OPUS_4_8_FAST.id]: CLAUDE_OPUS_4_8_FAST.max_output_tokens,
 }
 
+/**
+ * Largest `max_tokens` the Anthropic SDK permits on a **non-streaming**
+ * request. The SDK refuses to make a non-streaming call it estimates could
+ * exceed its 10-minute timeout, computed as
+ * `(60min * max_tokens) / 128_000 > 10min` — i.e. it throws
+ * `"Streaming is required for operations that may take longer than 10 minutes"`
+ * once `max_tokens > 128_000 * 10 / 60 ≈ 21_333`
+ * (`@anthropic-ai/sdk`'s `calculateNonstreamingTimeout`). The text adapter's
+ * only non-streaming call is the forced-tool `structuredOutput()` request, so
+ * its defaulted ceiling must stay at or below this; the streaming chat path
+ * keeps the model's full {@link getAnthropicDefaultMaxTokens} ceiling. We sit
+ * just under the boundary (`21_333` would round-trip to exactly 10min). This
+ * caps only the *default* — an explicit oversized `max_tokens` from the caller
+ * still surfaces the SDK's "use streaming" error, which is the correct signal.
+ */
+export const ANTHROPIC_MAX_NONSTREAMING_TOKENS = 21_000
+
 /**
  * Resolve the default `max_tokens` for a model: its known `max_output_tokens`
  * ceiling, or {@link ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS} for unknown models.
  * Callers that pass an explicit `max_tokens` bypass this entirely.
+ *
+ * Pass `stream: false` for non-streaming requests (the `structuredOutput()`
+ * path): the result is then clamped to {@link ANTHROPIC_MAX_NONSTREAMING_TOKENS}
+ * so the defaulted ceiling doesn't trip the SDK's non-streaming 10-minute guard
+ * (issue #849). Streaming requests (the default) are unaffected and get the
+ * model's full ceiling.
  */
-export function getAnthropicDefaultMaxTokens(model: string): number {
-  return (
+export function getAnthropicDefaultMaxTokens(
+  model: string,
+  { stream = true }: { stream?: boolean } = {},
+): number {
+  const ceiling =
     ANTHROPIC_MODEL_MAX_OUTPUT_TOKENS[model] ??
     ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS
-  )
+  return stream ? ceiling : Math.min(ceiling, ANTHROPIC_MAX_NONSTREAMING_TOKENS)
 }
 
 /**
diff --git a/packages/ai-anthropic/tests/anthropic-adapter.test.ts b/packages/ai-anthropic/tests/anthropic-adapter.test.ts
index 3dda7c9d9..8a86baf3f 100644
--- a/packages/ai-anthropic/tests/anthropic-adapter.test.ts
+++ b/packages/ai-anthropic/tests/anthropic-adapter.test.ts
@@ -2,6 +2,7 @@ import { describe, it, expect, beforeEach, vi } from 'vitest'
 import { chat, type Tool, type StreamChunk } from '@tanstack/ai'
 import { AnthropicTextAdapter } from '../src/adapters/text'
 import type { AnthropicTextProviderOptions } from '../src/adapters/text'
+import { ANTHROPIC_MAX_NONSTREAMING_TOKENS } from '../src/model-meta'
 import { z } from 'zod'
 
 const mocks = vi.hoisted(() => {
@@ -543,6 +544,51 @@ describe('Anthropic adapter option mapping', () => {
     expect(truncationWarning).toBeUndefined()
   })
 
+  it('clamps the default max_tokens on the non-streaming structured-output path so it never trips the SDK 10-minute guard (#849)', async () => {
+    // The structured-output fallback issues a NON-streaming
+    // `messages.create({ stream: false })`. The Anthropic SDK throws
+    // "Streaming is required for operations that may take longer than 10
+    // minutes" once max_tokens exceeds ~21_333, so the defaulted ceiling must
+    // be clamped here even though the streaming chat path keeps the full 64K.
+    mocks.betaMessagesCreate.mockResolvedValueOnce({
+      id: 'msg_structured',
+      type: 'message',
+      role: 'assistant',
+      model: 'claude-3-7-sonnet',
+      content: [
+        {
+          type: 'tool_use',
+          id: 'toolu_structured_output',
+          name: 'structured_output',
+          input: { recommendation: 'Strat', price: 1299 },
+        },
+      ],
+      stop_reason: 'tool_use',
+      usage: { input_tokens: 10, output_tokens: 20 },
+    })
+
+    const adapter = createAdapter('claude-3-7-sonnet')
+
+    for await (const _ of chat({
+      adapter,
+      messages: [{ role: 'user', content: 'recommend a guitar as json' }],
+      outputSchema: z.object({
+        recommendation: z.string(),
+        price: z.number(),
+      }),
+      stream: true,
+    })) {
+      // consume stream
+    }
+
+    const [payload] = mocks.betaMessagesCreate.mock.calls[0]!
+    expect(payload.stream).toBe(false)
+    // Clamped to the non-streaming limit — NOT claude-3-7-sonnet's full 64K
+    // streaming ceiling, which would make the SDK throw before the request.
+    expect(payload.max_tokens).toBe(ANTHROPIC_MAX_NONSTREAMING_TOKENS)
+    expect(payload.max_tokens).toBeLessThanOrEqual(21_333)
+  })
+
   it('native combined mode (#605): wires outputSchema into output_format alongside tools on Claude 4.5+', async () => {
     // Final-turn JSON the model emits when output_format is in play.
     const finalJson = JSON.stringify({ city: 'Berlin', temp: 18 })
diff --git a/packages/ai-anthropic/tests/model-meta.test.ts b/packages/ai-anthropic/tests/model-meta.test.ts
index 50fc3b282..283a47368 100644
--- a/packages/ai-anthropic/tests/model-meta.test.ts
+++ b/packages/ai-anthropic/tests/model-meta.test.ts
@@ -1,6 +1,7 @@
 import { describe, expect, expectTypeOf, it } from 'vitest'
 import {
   ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS,
+  ANTHROPIC_MAX_NONSTREAMING_TOKENS,
   getAnthropicDefaultMaxTokens,
 } from '../src/model-meta'
 import type {
@@ -806,4 +807,41 @@ describe('getAnthropicDefaultMaxTokens (#849)', () => {
       1024,
     )
   })
+
+  it('clamps the default to the non-streaming limit for non-streaming requests (#849)', () => {
+    // The Anthropic SDK refuses non-streaming requests whose `max_tokens`
+    // could exceed its 10-minute timeout (~21_333). The streaming path keeps
+    // the full ceiling; the non-streaming (`structuredOutput`) path must clamp.
+    expect(ANTHROPIC_MAX_NONSTREAMING_TOKENS).toBeLessThanOrEqual(21_333)
+
+    // Opus 128K and Sonnet 64K both exceed the non-streaming limit → clamped.
+    expect(
+      getAnthropicDefaultMaxTokens('claude-opus-4.8', { stream: false }),
+    ).toBe(ANTHROPIC_MAX_NONSTREAMING_TOKENS)
+    expect(
+      getAnthropicDefaultMaxTokens('claude-sonnet-4-6', { stream: false }),
+    ).toBe(ANTHROPIC_MAX_NONSTREAMING_TOKENS)
+    // Unknown model fallback (64K) is also above the limit → clamped.
+    expect(
+      getAnthropicDefaultMaxTokens('some-future-claude-model', {
+        stream: false,
+      }),
+    ).toBe(ANTHROPIC_MAX_NONSTREAMING_TOKENS)
+  })
+
+  it('does not clamp a model whose ceiling is already below the non-streaming limit (#849)', () => {
+    // claude-3-haiku's 4K ceiling is under the non-streaming limit, so the
+    // non-streaming path returns the real ceiling, not the (larger) cap.
+    expect(
+      getAnthropicDefaultMaxTokens('claude-3-haiku', { stream: false }),
+    ).toBe(4_000)
+  })
+
+  it('keeps the full ceiling for streaming requests (default) (#849)', () => {
+    expect(
+      getAnthropicDefaultMaxTokens('claude-opus-4.8', { stream: true }),
+    ).toBe(128_000)
+    // Omitting the option defaults to streaming.
+    expect(getAnthropicDefaultMaxTokens('claude-opus-4.8')).toBe(128_000)
+  })
 })