diff --git a/.agent/self-learning/coupling.json b/.agent/self-learning/coupling.json index cd673b5e2..b65b64a77 100644 --- a/.agent/self-learning/coupling.json +++ b/.agent/self-learning/coupling.json @@ -61,6 +61,48 @@ "why": "Per CLAUDE.md, every feature / bug fix / behavior change MUST include E2E test coverage. When a new public capability is added, a corresponding spec under testing/e2e/tests/ plus a fixture under testing/e2e/fixtures/ are required — often plus a new entry in feature-support.ts and types.ts. The full pattern is: add the Feature flag, decide provider support, optionally add a per-feature config (system prompt, schema, tools), wire it through src/routes/api.chat.ts (or the relevant route), write the fixture(s), write the spec iterating `providersFor(feature)`. Spec must run against every supported provider so non-native-streaming providers exercise the fallback path. Skip only for refactors that don't change observable behavior." } ] + }, + { + "id": "sandbox-core-contract-touches-providers", + "trigger": "packages/ai-sandbox/src/**/*.ts", + "impacts": [ + { + "target": [ + "packages/ai-sandbox-local-process/src/**/*.ts", + "packages/ai-sandbox-docker/src/**/*.ts", + "packages/ai-sandbox-cloudflare/src/**/*.ts" + ], + "kind": "change-required", + "why": "The SandboxProvider / SandboxHandle / SandboxCapabilities contracts in @tanstack/ai-sandbox are the seam every provider package implements. When a method, capability flag, lifecycle field, or the ensure/resume algorithm changes in core, every provider package must be updated in the same PR or it falls out of contract (silent type breaks, missing capability handling, broken resume). Also re-check capability degradation: a provider that returns capabilities().snapshots===false must keep working when core adds a snapshot-dependent path." + } + ] + }, + { + "id": "harness-adapter-sandbox-execution", + "trigger": "packages/ai-claude-code/src/**/*.ts", + "impacts": [ + { + "target": [ + "packages/ai-codex/src/**/*.ts", + "packages/ai-gemini-cli/src/**/*.ts", + "packages/ai-opencode/src/**/*.ts", + "packages/ai-sandbox/src/**/*.ts" + ], + "kind": "change-required", + "why": "All four harness adapters share one execution contract: declare requires:[SandboxCapability], spawn the agent CLI via sandbox.process (never local child_process), pipe its native stream-json/ACP stdout through the per-adapter translate layer, and proxy host tools via the MCP-over-channel bridge. When the sandbox-execution pattern, the host MCP tool-bridge shape, the per-run bearer-token/channel contract, or the policy->native-permission mapping changes in one adapter (or in @tanstack/ai-sandbox), mirror it across the other harness adapters so they don't diverge into incompatible execution paths." + } + ] + }, + { + "id": "sandbox-source-persistence-ready", + "trigger": "packages/ai-sandbox*/src/**/*.ts", + "impacts": [ + { + "target": ["packages/ai-sandbox/src/**/*.ts"], + "kind": "change-required", + "why": "The sandbox layer ships with zero persistence package but MUST stay persistence-ready so the persistence proposal drops in without re-architecture. Invariant to preserve on any sandbox change: SandboxStore and LockStore stay PLUGGABLE optional capabilities (in-memory defaults only - never hardcode storage), emitted chunks stay conceptually offset-addressable ({runId, seq, ts, chunk}) so a future EventLog/DurableRunStream can capture+replay by cursor, and approvals keep using the existing resume-based approval-requested flow. Do not introduce a sandbox-owned durable store, a bespoke replay buffer, or a non-AG-UI event type that the persistence layer would later have to rip out." + } + ] } ] } diff --git a/.agentsroom/.gitignore b/.agentsroom/.gitignore new file mode 100644 index 000000000..1acd1a387 --- /dev/null +++ b/.agentsroom/.gitignore @@ -0,0 +1,4 @@ +# AgentsRoom: personal files (not committed to git) +*-personal.json +agents-local.json +sessions/ diff --git a/.agentsroom/agents.json b/.agentsroom/agents.json new file mode 100644 index 000000000..e9a83e418 --- /dev/null +++ b/.agentsroom/agents.json @@ -0,0 +1,10 @@ +[ + { + "role": "fullstack", + "model": "opus", + "customName": "Full-Stack Developer", + "isPersonal": false, + "id": "agent-1776361243376-3sekdc", + "claudeSessionId": "96773a93-be2a-45a9-a732-ceb224d3d0e5" + } +] \ No newline at end of file diff --git a/.agentsroom/prompts.json b/.agentsroom/prompts.json new file mode 100644 index 000000000..f4455d843 --- /dev/null +++ b/.agentsroom/prompts.json @@ -0,0 +1,4 @@ +{ + "folders": [], + "prompts": [] +} \ No newline at end of file diff --git a/.changeset/ai-claude-code-initial.md b/.changeset/ai-claude-code-initial.md new file mode 100644 index 000000000..35a41dfe4 --- /dev/null +++ b/.changeset/ai-claude-code-initial.md @@ -0,0 +1,5 @@ +--- +'@tanstack/ai-claude-code': minor +--- + +New `@tanstack/ai-claude-code` package: a Claude Code **harness adapter that runs inside a sandbox**. It declares `requires: [SandboxCapability]` and spawns the `claude` CLI (`claude -p --output-format stream-json`) inside the sandbox provided by `withSandbox(...)`, streaming its events back as AG-UI chunks. Claude Code owns the agent loop and executes its own native tools (bash, file edits, search) against the sandbox workspace; their activity streams back as resolved tool-call events. `chat()`-provided server tools are bridged to the in-sandbox agent over a host-side MCP tool-proxy (calls are proxied back to the host where `execute()` runs). Sessions are resumable via `modelOptions.sessionId` (surfaced through a `claude-code.session-id` custom event), and the working-tree diff is emitted as a `file.changed` custom event after each run. A `defineSandboxPolicy` (allow/ask/deny command globs + file-write/network capability rules) is enforced via Claude Code's `--permission-prompt-tool`: each native tool use is checked against the policy and the client's approval decisions, and an `ask` action with no decision yet surfaces an `approval-requested` event (the client approves and re-runs to continue). Requires the `claude` executable and `ANTHROPIC_API_KEY` to be available in the sandbox (e.g. via `workspace.secrets`). diff --git a/.changeset/ai-codex-initial.md b/.changeset/ai-codex-initial.md new file mode 100644 index 000000000..c6a4d711c --- /dev/null +++ b/.changeset/ai-codex-initial.md @@ -0,0 +1,5 @@ +--- +'@tanstack/ai-codex': minor +--- + +New `@tanstack/ai-codex` package: a Codex **harness adapter that runs inside a sandbox**. It declares `requires: [SandboxCapability]` and spawns `codex exec --experimental-json` inside the sandbox provided by `withSandbox(...)` (mirroring `@openai/codex-sdk`'s own CLI invocation), feeding the prompt via stdin and streaming its JSONL thread events back as AG-UI chunks. Codex owns the agent loop and executes its built-in tools (shell, file changes, web search, todo lists) against the sandbox workspace. Threads are resumable via `modelOptions.sessionId` (surfaced through a `codex.session-id` custom event); sandbox mode / approval policy / reasoning effort map to codex CLI flags. Requires the `codex` executable and `CODEX_API_KEY` (or a `codex login`) in the sandbox. chat()-provided server tools are bridged into the agent via the host MCP tool-proxy. A `defineSandboxPolicy` is mapped onto Codex's coarse permission knobs (sandbox mode, `approval_policy`, `network_access`); because `codex exec` runs non-interactively with no per-action host callback, the fine-grained resume-based interactive-approval flow is not available for Codex (it refuses, rather than prompts for, actions needing approval). diff --git a/.changeset/ai-gemini-cli-initial.md b/.changeset/ai-gemini-cli-initial.md new file mode 100644 index 000000000..2e77eb65a --- /dev/null +++ b/.changeset/ai-gemini-cli-initial.md @@ -0,0 +1,5 @@ +--- +'@tanstack/ai-gemini-cli': minor +--- + +New `@tanstack/ai-gemini-cli` package: a Gemini CLI **harness adapter that runs inside a sandbox**. It declares `requires: [SandboxCapability]` and spawns `gemini --acp` (Agent Client Protocol) inside the sandbox provided by `withSandbox(...)`, driving it over the sandbox's duplex process IO (the ACP transport is adapted from the sandbox `SpawnHandle`; all ACP protocol handling is reused). Gemini CLI owns the agent loop and executes its built-in tools (shell, file edits, search) against the sandbox workspace; assistant text/thinking stream as token-level deltas and tool activity as resolved tool-call events. Sessions are resumable via `modelOptions.sessionId` (surfaced through a `gemini-cli.session-id` custom event, with graceful fallback to transcript replay), and ACP permission requests are answered by a configurable never-hanging policy (`default` / `acceptEdits` / `bypassPermissions` or a custom handler), and an action the policy would reject with no client decision yet surfaces an `approval-requested` event so the client can approve and re-run to grant it (interactive approvals). Headless auth is selectable up front via `authMethodId`. Requires the `gemini` CLI in the sandbox. chat()-provided server tools are bridged into the agent via the host MCP tool-proxy. diff --git a/.changeset/ai-opencode-initial.md b/.changeset/ai-opencode-initial.md new file mode 100644 index 000000000..a160e9f22 --- /dev/null +++ b/.changeset/ai-opencode-initial.md @@ -0,0 +1,5 @@ +--- +'@tanstack/ai-opencode': minor +--- + +New `@tanstack/ai-opencode` package: an OpenCode **harness adapter that runs inside a sandbox**. It declares `requires: [SandboxCapability]`, spawns `opencode serve` inside the sandbox provided by `withSandbox(...)`, exposes its port, and connects the `@opencode-ai/sdk` HTTP client to it via `baseUrl`. OpenCode owns the agent loop and executes its built-in tools (shell, file edits, search) against the sandbox workspace; assistant text/thinking stream as token-level deltas and tool activity as resolved tool-call events. Sessions are resumable, and OpenCode permission requests are answered by a configurable `permissionMode` (`default` / `acceptEdits` / `bypassPermissions` or a custom handler), and a request the policy would reject with no client decision yet surfaces an `approval-requested` event so the client can approve and re-run to grant it (interactive approvals). Requires the `opencode` CLI in the sandbox (Docker: publish the server port via `publishPorts`). chat()-provided server tools are bridged into the agent via the host MCP tool-proxy. diff --git a/.changeset/ai-sandbox-cloudflare.md b/.changeset/ai-sandbox-cloudflare.md new file mode 100644 index 000000000..2be9ac1b8 --- /dev/null +++ b/.changeset/ai-sandbox-cloudflare.md @@ -0,0 +1,5 @@ +--- +'@tanstack/ai-sandbox-cloudflare': minor +--- + +New `@tanstack/ai-sandbox-cloudflare` package: a Cloudflare Containers sandbox provider (`cloudflareSandbox`) built on `@cloudflare/sandbox`, for running harness adapters at the edge inside a Worker. Implements the uniform `SandboxHandle` (exec, base64-backed fs, git, `exposePort` preview URLs, env) over the Cloudflare Sandbox Durable Object. The container disk is ephemeral and snapshots are not yet GA, so `withSandbox` re-bootstraps under the same identity across cold starts (`durableFilesystem`/`snapshots` are reported false). Background processes don't expose stdin on Cloudflare, so stdin-fed harnesses (e.g. Claude Code) need a stdin-capable provider; `exec` works fully. diff --git a/.changeset/persistence-layer.md b/.changeset/persistence-layer.md new file mode 100644 index 000000000..0ea1e36c8 --- /dev/null +++ b/.changeset/persistence-layer.md @@ -0,0 +1,31 @@ +--- +'@tanstack/ai': minor +'@tanstack/ai-sandbox': patch +'@tanstack/ai-client': minor +'@tanstack/ai-claude-code': patch +'@tanstack/ai-codex': patch +'@tanstack/ai-gemini-cli': patch +'@tanstack/ai-opencode': patch +'@tanstack/ai-persistence': minor +'@tanstack/ai-persistence-sql': minor +'@tanstack/ai-persistence-sqlite': minor +'@tanstack/ai-persistence-postgres': minor +'@tanstack/ai-persistence-cloudflare': minor +'@tanstack/ai-persistence-drizzle': minor +'@tanstack/ai-persistence-prisma': minor +'@tanstack/ai-sandbox-persistence': minor +--- + +Persistence + resumable runs as composable `chat()` middleware. + +`withPersistence(...)` makes any run durable: it loads/saves thread message history (server-authoritative), creates/updates run records, persists every AG-UI `StreamChunk` to an append-only event log, and persists usage. It is fully **optional** — a `chat()` with no persistence middleware is byte-for-byte unchanged, and it works for both non-sandbox and sandbox (agent-mode) runs. + +**Resume.** Each persisted chunk carries an in-band, opaque `cursor` (a monotonic per-run sequence). A client that disconnects mid-run reconnects with the run's `runId` + last `cursor`; `chat({ cursor })` replays the persisted event tail after that cursor, then — for harness adapters that re-attach to their still-running in-sandbox process — continues live. The headless `ChatClient` tracks the cursor and exposes `resume()` / `getResumeState()` / `maybeAutoResume()` with an `autoResume` opt-out. + +**Event model.** The persisted log is the AG-UI `StreamChunk` stream itself (no parallel event type); agent activity (file changes, process output, approvals, artifacts, sandbox lifecycle) rides on well-known `CUSTOM` events catalogued in `@tanstack/ai`. + +**Backends (shared SQL core + thin adapters).** One SQL implementation behind a minimal `SqlDriver` (`@tanstack/ai-persistence-sql`), with backends for SQLite (`-sqlite`, node:sqlite/better-sqlite3), Postgres (`-postgres`, pg), Cloudflare D1 (`-cloudflare`), and bring-your-own Drizzle (`-drizzle`) and Prisma (`-prisma`). Raw drivers auto-migrate (versioned, opt-out); ORMs own their schema. `memoryPersistence()` ships in core for tests/examples. + +**Agent mode.** `@tanstack/ai-sandbox-persistence` bridges a durable SQL-backed `SandboxStore` and the durable `LockStore` into `withSandbox`, so sandbox resume and ensure-locking survive across processes. The shared `locks` capability now lives in `@tanstack/ai` (one token across the sandbox and persistence layers); `@tanstack/ai-sandbox` re-exports it for back-compat. + +Approvals are persisted and a durable approval controller feeds decisions back into the existing deny-and-replay flow. Cloudflare is compile-verified (Workers runtime), Postgres runtime-verification is via Docker, and live harness re-attach is verified with the real CLIs; everything else is unit/integration-tested. The Playwright E2E suite is a follow-up. diff --git a/.changeset/sandbox-hooks-redesign.md b/.changeset/sandbox-hooks-redesign.md new file mode 100644 index 000000000..b32c1d5df --- /dev/null +++ b/.changeset/sandbox-hooks-redesign.md @@ -0,0 +1,23 @@ +--- +'@tanstack/ai': minor +'@tanstack/ai-sandbox': minor +'@tanstack/ai-sandbox-local-process': minor +--- + +Declarative sandbox file-event hooks: observe file create / change / delete +inside a sandbox and have them fire automatically during a chat run. + +- `@tanstack/ai`: chat middleware gains an optional `sandbox` hook group + (`onFile`/`onFileCreate`/`onFileChange`/`onFileDelete`), a `SandboxFileEvent` + type, and a `sandbox` debug-logging category. The engine auto-emits a + `CUSTOM` `sandbox.file` event per change (client reads it from `parts`). +- `@tanstack/ai-sandbox`: `defineSandbox({ hooks, fileEvents })` declares + file + lifecycle hooks (`onFile*`/`onReady`/`onError`/`onDestroy`) that fire + automatically while the sandbox runs in a chat — `withSandbox` owns the + watcher. The watcher is provider-agnostic: a native `fs.watch` fast-path when + the provider advertises it, otherwise a portable `find -printf` mtime + snapshot-diff poll (no extra deps; `.git`/`node_modules` ignored by default). + `watchWorkspace()` / `diffSnapshots` remain as low-level building blocks. +- `@tanstack/ai-sandbox-local-process`: implements the optional `fs.watch` seam + via Node's recursive `fs.watch` (Windows/macOS); Linux falls back to the core + exec-poll automatically. diff --git a/.changeset/sandbox-layer.md b/.changeset/sandbox-layer.md new file mode 100644 index 000000000..6f2d57dd3 --- /dev/null +++ b/.changeset/sandbox-layer.md @@ -0,0 +1,13 @@ +--- +'@tanstack/ai-sandbox': minor +'@tanstack/ai-sandbox-local-process': minor +'@tanstack/ai-sandbox-docker': minor +'@tanstack/ai': minor +--- + +New provider-agnostic sandbox layer so harness adapters can run **inside** isolated sandboxes. + +- **`@tanstack/ai-sandbox`** — `defineSandbox()` (lazy controller + resume→restoreSnapshot→create+bootstrap ensure algorithm), `withSandbox()` middleware, `defineWorkspace()` (git/local source, package-manager detection, setup, skills, secrets), `defineSandboxPolicy()`, the `SandboxProvider`/`SandboxHandle`/`SandboxCapabilities` contracts, capability tokens (`SandboxCapability` plus the optional `SandboxStore`/`Locks` persistence seams with in-memory defaults), `bootstrapWorkspace`, `createExecBackedGit`, `spawnNdjson` (run an agent CLI in a sandbox and stream its NDJSON stdout), the host MCP tool-proxy bridge (`startHostToolBridge` — exposes `chat()` server tools to the in-sandbox agent, with an optional permission-prompt tool), and the shared interactive-approval primitives (`resolveApproval`, `approvalId`, `buildApprovalRequestedEvent`) harness adapters use to enforce a policy and surface `approval-requested` events for client-in-the-loop approvals. +- **`@tanstack/ai-sandbox-local-process`** — `localProcessSandbox()`: runs the agent on the host through the uniform `SandboxHandle` (no isolation; the fast dev loop). +- **`@tanstack/ai-sandbox-docker`** — `dockerSandbox()`: runs the agent inside an isolated Docker container (dockerode), with commit-based snapshots, fork, and resume-by-id. +- **`@tanstack/ai`** — `TextOptions.capabilities` exposes the middleware capability context to adapters so harness adapters that declare `requires: [...]` can read provided capabilities from `chatStream`; `TextOptions.approvals` threads client approval decisions through to adapters for the interactive-approval (deny + `approval-requested` + re-run) flow; `DefinedChatMiddleware` and `AnyChatMiddleware` are now exported for portable middleware authoring. diff --git a/.gitignore b/.gitignore index 6678fb779..92054517b 100644 --- a/.gitignore +++ b/.gitignore @@ -78,3 +78,7 @@ solo.yml # Agent scratch output (gap-analysis reports, triage notes — generated locally) .agent/gap-analysis/ .agent/triage/ + +/OpenCode.md +.agentsroom/ +.opencode/ diff --git a/docs/adapters/claude-code.md b/docs/adapters/claude-code.md new file mode 100644 index 000000000..3f0f6dfbc --- /dev/null +++ b/docs/adapters/claude-code.md @@ -0,0 +1,181 @@ +--- +title: Claude Code +id: claude-code-adapter +order: 11 +description: "Use Claude Code as a chat backend in TanStack AI — agent harness with local tool execution, stateful coding sessions, and tool bridging via @tanstack/ai-claude-code." +keywords: + - tanstack ai + - claude code + - claude agent sdk + - anthropic + - harness + - agent + - coding agent + - adapter +--- + +The Claude Code adapter runs [Claude Code](https://docs.anthropic.com/en/docs/claude-code) (via the `@anthropic-ai/claude-agent-sdk`) as a chat backend. Unlike HTTP provider adapters, this is a **harness adapter**: Claude Code runs its own agent loop and executes its own tools — bash, file reads and edits, glob/grep search, web search — locally on your server. Each `chat()` call runs one full harness turn; the harness's tool activity streams back as already-resolved tool-call events your UI can render. + +> **Server-only.** The harness spawns the Claude Code runtime as a subprocess, so this adapter only works in a Node.js server environment — never in the browser. Treat it like giving Claude a shell on the machine it runs on, and configure permissions accordingly. + +## Installation + +```bash +npm install @tanstack/ai-claude-code +``` + +A runnable demo lives at [`examples/ts-react-coding-agent`](https://github.com/TanStack/ai/tree/main/examples/ts-react-coding-agent) — session resume, the harness tool timeline, permission modes, and tool bridging, wired into a React app. + +## Authentication + +The harness resolves credentials the same way Claude Code does: + +- `ANTHROPIC_API_KEY` in the server's environment (or the `apiKey` config option), or +- an existing Claude subscription login on the machine (`claude login`). + +## Basic Usage + +```typescript +import { chat } from "@tanstack/ai"; +import { claudeCodeText } from "@tanstack/ai-claude-code"; + +const stream = chat({ + adapter: claudeCodeText("claude-opus-4-8", { + cwd: "/path/to/project", + permissionMode: "acceptEdits", + }), + messages: [{ role: "user", content: "Fix the failing test in utils.test.ts" }], +}); +``` + +## Configuration + +| Option | Description | +| ---------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------- | +| `cwd` | Working directory for the harness session. Defaults to `process.cwd()`. | +| `permissionMode` | Claude Code permission mode (`'default'`, `'acceptEdits'`, `'bypassPermissions'`, `'plan'`, `'dontAsk'`, `'auto'`). See the permissions note below. | +| `allowedTools` | Built-in tools the harness may use without prompting (e.g. `['Read', 'Grep', 'Bash(npm test:*)']`). | +| `disallowedTools` | Built-in tools removed from the harness entirely. | +| `maxTurns` | Maximum harness-internal turns per run. | +| `systemPromptMode` | `'append'` (default) keeps Claude Code's preset system prompt and appends your `systemPrompts`; `'replace'` sends yours as the entire prompt. | +| `mcpServers` | Extra MCP servers passed through to the harness untouched. | +| `apiKey` | Anthropic API key for the harness subprocess. | +| `env` | Extra environment variables for the harness subprocess. | +| `pathToClaudeCodeExecutable` | Use a specific Claude Code executable instead of the SDK's bundled one. | +| `streamPartials` | Emit true token-level text deltas (default `true`). | +| `canUseTool` | Custom permission handler; replaces the adapter's default handler. | +| `settingSources` | Claude Code settings tiers to load. Default `['project']`: the `cwd`'s CLAUDE.md and project settings apply, but user-level config on the host (`~/.claude` plugins, hooks, skills) is ignored. Pass `['user', 'project', 'local']` for CLI-equivalent behavior, or `[]` for full isolation. | + +**Permissions on headless servers.** Without an explicit `permissionMode` or `canUseTool`, the adapter installs a safe default handler: bridged TanStack tools always run, and any built-in tool call that would normally prompt a human is denied with guidance instead of hanging the request. To let the harness edit files or run commands, set `permissionMode: 'acceptEdits'` / `'bypassPermissions'`, or enumerate `allowedTools`. + +## Stateful Sessions + +Claude Code sessions are stateful — the harness keeps the full working context (files read, commands run, conclusions reached) between turns. The adapter surfaces the session id of every run as a custom stream event named `claude-code.session-id`; thread it back via `modelOptions.sessionId` to resume the session. When resuming, only the latest user message is sent — the harness already holds the prior context. + +Server endpoint: + +```typescript +import { + chat, + chatParamsFromRequest, + toServerSentEventsResponse, +} from "@tanstack/ai"; +import { claudeCodeText } from "@tanstack/ai-claude-code"; + +export async function POST(request: Request) { + const params = await chatParamsFromRequest(request); + + // Extra fields the client puts in the connection `body` arrive here. + const sessionId = + typeof params.forwardedProps.sessionId === "string" + ? params.forwardedProps.sessionId + : undefined; + + const stream = chat({ + adapter: claudeCodeText("claude-opus-4-8", { + cwd: "/path/to/project", + permissionMode: "acceptEdits", + }), + messages: params.messages, + modelOptions: { sessionId }, + }); + + return toServerSentEventsResponse(stream); +} +``` + +Client (React) — capture the session id from the custom event and send it back on subsequent requests: + +```typescript +import { useState } from "react"; +import { useChat } from "@tanstack/ai-react"; +import { fetchServerSentEvents } from "@tanstack/ai-client"; + +function CodingAssistant() { + const [sessionId, setSessionId] = useState(undefined); + + const { messages, sendMessage } = useChat({ + connection: fetchServerSentEvents("/api/chat", () => ({ + body: { sessionId }, + })), + onCustomEvent: (name, value) => { + if ( + name === "claude-code.session-id" && + typeof value === "object" && + value !== null && + "sessionId" in value && + typeof value.sessionId === "string" + ) { + setSessionId(value.sessionId); + } + }, + }); + + // ... render messages; harness tool activity (Bash, Edit, Read, ...) + // arrives as regular tool-call parts with their results attached. +} +``` + +Sessions are stored on the machine that ran them (`~/.claude/projects/`), so resuming only works on the same server instance. Pass `modelOptions: { forkSession: true }` alongside `sessionId` to branch a session instead of continuing it. + +## Tools + +Two kinds of tools flow through this adapter: + +1. **Built-in harness tools** (`Bash`, `Read`, `Write`, `Edit`, `Glob`, `Grep`, `WebSearch`, ...) are executed by Claude Code itself. Their activity streams back as tool-call events with results already attached, so `useChat` UIs render them with no extra wiring — but your code never executes them. + +2. **Your TanStack tools** are bridged *into* the harness as an in-process MCP server. Define them as usual with `toolDefinition().server()`; the model sees them as `mcp__tanstack__` and the adapter strips the prefix on the way back out, so events match the names you registered. + +```typescript +import { z } from "zod"; +import { chat, toolDefinition } from "@tanstack/ai"; +import { claudeCodeText } from "@tanstack/ai-claude-code"; + +const lookupTicket = toolDefinition({ + name: "lookup_ticket", + description: "Look up an issue ticket by id", + inputSchema: z.object({ ticketId: z.string() }), +}).server(async ({ ticketId }) => { + return { ticketId, status: "open", title: "Crash on startup" }; +}); + +const stream = chat({ + adapter: claudeCodeText("claude-opus-4-8"), + messages: [{ role: "user", content: "What's the status of ticket T-123?" }], + tools: [lookupTicket], +}); +``` + +**Client-side and approval-gated tools are not supported.** The harness executes tools inside a live subprocess, which cannot pause across HTTP requests to wait for a browser round-trip or a human approval. Passing a tool without a server `execute()` implementation — or one marked `needsApproval` — fails fast with a descriptive error. Run those tools outside the harness with a regular provider adapter. + +## Structured Output + +`structuredOutput()` uses the harness's native JSON-schema output format in a one-shot run (single turn, no tools). It works for finalization after a chat, but a plain provider adapter (e.g. `@tanstack/ai-anthropic`) is the better choice when structured extraction is the primary job — it's faster and doesn't spawn a subprocess. + +## Limitations + +- **Server-only (Node).** The harness spawns a subprocess; Windows support is untested. +- **The harness owns the agent loop.** TanStack's agent-loop strategies and per-iteration middleware don't apply inside a harness turn; `maxTurns` is the equivalent control. +- **No sampling controls.** `temperature`-style options don't exist here. +- **Sessions are machine-local.** Resume requires hitting the same server instance. +- **Cold starts.** Each call spawns a harness turn; expect higher first-token latency than HTTP adapters. diff --git a/docs/adapters/codex.md b/docs/adapters/codex.md new file mode 100644 index 000000000..199cffe7f --- /dev/null +++ b/docs/adapters/codex.md @@ -0,0 +1,182 @@ +--- +title: Codex +id: codex-adapter +order: 12 +description: "Use OpenAI Codex as a chat backend in TanStack AI — agent harness with local tool execution, stateful coding sessions, and tool bridging via @tanstack/ai-codex." +keywords: + - tanstack ai + - codex + - codex sdk + - openai + - harness + - agent + - coding agent + - adapter +--- + +The Codex adapter runs [OpenAI Codex](https://developers.openai.com/codex) (via the `@openai/codex-sdk`) as a chat backend. Unlike HTTP provider adapters, this is a **harness adapter**: Codex runs its own agent loop and executes its own tools — shell commands, file changes, web search — locally on your server, inside its sandbox. Each `chat()` call runs one full harness turn; the harness's tool activity streams back as already-resolved tool-call events your UI can render. + +> **Server-only.** The harness spawns the Codex runtime (bundled with the SDK) as a subprocess, so this adapter only works in a Node.js server environment — never in the browser. The sandbox mode is the safety boundary; configure it deliberately. + +## Installation + +```bash +npm install @tanstack/ai-codex +``` + +A runnable demo lives at [`examples/ts-react-coding-agent`](https://github.com/TanStack/ai/tree/main/examples/ts-react-coding-agent) — session resume, the harness tool timeline, sandbox modes, and tool bridging, wired into a React app. + +## Authentication + +The harness resolves credentials the same way the Codex CLI does: + +- the `apiKey` config option (exported to the subprocess as `CODEX_API_KEY`; usage-based billing), or +- an existing ChatGPT login on the machine (`codex login`). + +## Basic Usage + +```typescript +import { chat } from "@tanstack/ai"; +import { codexText } from "@tanstack/ai-codex"; + +const stream = chat({ + adapter: codexText("gpt-5.1-codex", { + cwd: "/path/to/project", + sandboxMode: "workspace-write", + }), + messages: [{ role: "user", content: "Fix the failing test in utils.test.ts" }], +}); +``` + +## Configuration + +| Option | Description | +| ---------------------- | -------------------------------------------------------------------------------------------------------------------------------------------- | +| `cwd` | Working directory for the harness session. Defaults to `process.cwd()`. | +| `sandboxMode` | Codex sandbox: `'read-only'` (harness default), `'workspace-write'`, or `'danger-full-access'`. This is the safety boundary on a server. | +| `approvalPolicy` | Codex approval policy. Defaults to `'never'` — headless runs have no approval UI, so anything else can stall a turn. | +| `modelReasoningEffort` | `'minimal'` \| `'low'` \| `'medium'` \| `'high'` \| `'xhigh'`. | +| `skipGitRepoCheck` | Skip the harness's git-repo safety check. Defaults to `true` (server adapters routinely point at scratch directories). | +| `networkAccessEnabled` | Allow network access inside the `workspace-write` sandbox. | +| `webSearchMode` | `'disabled'` \| `'cached'` \| `'live'`. | +| `additionalDirectories`| Extra writable directories beyond `cwd`. | +| `apiKey` | OpenAI API key for the harness subprocess. | +| `baseUrl` | Override the Codex backend base URL. | +| `codexPathOverride` | Use a specific codex executable instead of the SDK's bundled binary. | +| `env` | Environment variables for the subprocess. When set, `process.env` is **not** inherited (Codex SDK semantics). | +| `config` | Extra `--config key=value` overrides passed to the Codex CLI (e.g. additional `mcp_servers` entries). | + +Per-call overrides — `sessionId`, `sandboxMode`, `approvalPolicy`, `modelReasoningEffort`, `workingDirectory`, `skipGitRepoCheck` — go through `modelOptions`. + +## Stateful Sessions + +Codex threads are stateful — the harness keeps the full working context (files read, commands run, conclusions reached) between turns. The adapter surfaces the thread id of every fresh run as a custom stream event named `codex.session-id`; thread it back via `modelOptions.sessionId` to resume. When resuming, only the latest user message is sent — the harness already holds the prior context. + +Server endpoint: + +```typescript +import { + chat, + chatParamsFromRequest, + toServerSentEventsResponse, +} from "@tanstack/ai"; +import { codexText } from "@tanstack/ai-codex"; + +export async function POST(request: Request) { + const params = await chatParamsFromRequest(request); + + // Extra fields the client puts in the connection `body` arrive here. + const sessionId = + typeof params.forwardedProps.sessionId === "string" + ? params.forwardedProps.sessionId + : undefined; + + const stream = chat({ + adapter: codexText("gpt-5.1-codex", { + cwd: "/path/to/project", + sandboxMode: "workspace-write", + }), + messages: params.messages, + modelOptions: { sessionId }, + }); + + return toServerSentEventsResponse(stream); +} +``` + +Client (React) — capture the session id from the custom event and send it back on subsequent requests: + +```typescript +import { useState } from "react"; +import { useChat } from "@tanstack/ai-react"; +import { fetchServerSentEvents } from "@tanstack/ai-client"; + +function CodingAssistant() { + const [sessionId, setSessionId] = useState(undefined); + + const { messages, sendMessage } = useChat({ + connection: fetchServerSentEvents("/api/chat", () => ({ + body: { sessionId }, + })), + onCustomEvent: (name, value) => { + if ( + name === "codex.session-id" && + typeof value === "object" && + value !== null && + "sessionId" in value && + typeof value.sessionId === "string" + ) { + setSessionId(value.sessionId); + } + }, + }); + + // ... render messages; harness tool activity (command_execution, + // file_change, ...) arrives as regular tool-call parts with results. +} +``` + +Sessions are stored on the machine that ran them (`~/.codex/sessions/`), so resuming only works on the same server instance. + +## Tools + +Two kinds of tools flow through this adapter: + +1. **Built-in harness tools** are executed by Codex itself and stream back as tool-call events with results already attached: `command_execution` (shell), `file_change` (patches), `web_search`, and `todo_list` (the agent's running plan). Your code never executes them. + +2. **Your TanStack tools** are bridged *into* the harness: the adapter starts a short-lived Streamable-HTTP MCP server on `127.0.0.1` for the duration of the turn and points Codex at it. Define tools as usual with `toolDefinition().server()`; tool-call events come back under the names you registered. + +```typescript +import { z } from "zod"; +import { chat, toolDefinition } from "@tanstack/ai"; +import { codexText } from "@tanstack/ai-codex"; + +const lookupTicket = toolDefinition({ + name: "lookup_ticket", + description: "Look up an issue ticket by id", + inputSchema: z.object({ ticketId: z.string() }), +}).server(async ({ ticketId }) => { + return { ticketId, status: "open", title: "Crash on startup" }; +}); + +const stream = chat({ + adapter: codexText("gpt-5.1-codex"), + messages: [{ role: "user", content: "What's the status of ticket T-123?" }], + tools: [lookupTicket], +}); +``` + +**Client-side and approval-gated tools are not supported.** The harness executes tools inside a live subprocess, which cannot pause across HTTP requests to wait for a browser round-trip or a human approval. Passing a tool without a server `execute()` implementation — or one marked `needsApproval` — fails fast with a descriptive error. Run those tools outside the harness with a regular provider adapter. + +## Structured Output + +`structuredOutput()` uses Codex's native `outputSchema` support in a fresh, read-only, one-shot thread whose final message is a JSON string conforming to your schema. It works for finalization after a chat, but a plain provider adapter (e.g. `@tanstack/ai-openai`) is the better choice when structured extraction is the primary job — it's faster and doesn't spawn a subprocess. + +## Limitations + +- **No token-level text streaming.** The Codex SDK reports assistant text and reasoning only as completed items, so text arrives message-at-a-time. Tool activity (commands starting/finishing) still streams live, which keeps the UI feeling alive during long turns. +- **Server-only (Node).** The harness spawns a subprocess. +- **The harness owns the agent loop.** TanStack's agent-loop strategies and per-iteration middleware don't apply inside a harness turn. +- **No sampling controls.** `temperature`-style options don't exist here. +- **Sessions are machine-local.** Resume requires hitting the same server instance. +- **Cold starts.** Each call spawns a harness turn; expect higher first-token latency than HTTP adapters. diff --git a/docs/adapters/gemini-cli.md b/docs/adapters/gemini-cli.md new file mode 100644 index 000000000..9822c1298 --- /dev/null +++ b/docs/adapters/gemini-cli.md @@ -0,0 +1,205 @@ +--- +title: Gemini CLI +id: gemini-cli-adapter +order: 13 +description: "Use Gemini CLI as a chat backend in TanStack AI — agent harness with local tool execution, stateful coding sessions, and tool bridging via @tanstack/ai-gemini-cli." +keywords: + - tanstack ai + - gemini cli + - agent client protocol + - acp + - google + - harness + - agent + - coding agent + - adapter +--- + +The Gemini CLI adapter runs [Gemini CLI](https://github.com/google-gemini/gemini-cli) as a chat backend, driving it over the [Agent Client Protocol](https://agentclientprotocol.com) (`gemini --acp`) — the same interface editors like Zed use to embed it. Unlike HTTP provider adapters, this is a **harness adapter**: Gemini CLI runs its own agent loop and executes its own tools — shell commands, file reads and edits, search — locally on your server. Each `chat()` call runs one full harness turn; assistant text and thinking stream as true token-level deltas, and the harness's tool activity streams back as already-resolved tool-call events your UI can render. + +> **Server-only.** The adapter spawns the `gemini` CLI as a subprocess, so it only works in a Node.js server environment — never in the browser. Treat it like giving Gemini a shell on the machine it runs on, and configure permissions accordingly. + +## Installation + +```bash +npm install @tanstack/ai-gemini-cli +``` + +The `gemini` CLI itself is a prerequisite — it is **not** bundled: + +```bash +npm install -g @google/gemini-cli +``` + +A runnable demo lives at [`examples/ts-react-coding-agent`](https://github.com/TanStack/ai/tree/main/examples/ts-react-coding-agent) — session resume, the harness tool timeline, permission modes, and tool bridging, wired into a React app. + +## Authentication + +The harness resolves credentials the same way Gemini CLI does: + +- an existing Google login on the machine (run `gemini` once interactively), or +- `GEMINI_API_KEY` in the server's environment (pass it via the `env` config option if needed). + +**Headless ACP auth.** When driven over ACP, Gemini CLI can't pop an +interactive auth picker, so it needs to be told which method to use. Set +`authMethodId` to one of the methods the CLI advertises — commonly +`'oauth-personal'` (Log in with Google), `'gemini-api-key'`, or `'vertex-ai'`. +The adapter selects it (via the ACP `authenticate` call) before opening the +session, and fails fast with the list of available methods if the one you +asked for isn't offered. Some setups also require trusting the working +directory in headless mode — set `GEMINI_CLI_TRUST_WORKSPACE=true` (or pass +`--skip-trust` via `extraArgs`) when the CLI refuses an untrusted folder. + +```typescript +import { geminiCliText } from "@tanstack/ai-gemini-cli"; + +const adapter = geminiCliText("gemini-3-pro-preview", { + cwd: "/path/to/project", + authMethodId: "oauth-personal", // reuse the machine's Google login +}); +``` + +## Basic Usage + +```typescript +import { chat } from "@tanstack/ai"; +import { geminiCliText } from "@tanstack/ai-gemini-cli"; + +const stream = chat({ + adapter: geminiCliText("gemini-3-pro-preview", { + cwd: "/path/to/project", + permissionMode: "acceptEdits", + }), + messages: [{ role: "user", content: "Fix the failing test in utils.test.ts" }], +}); +``` + +## Configuration + +| Option | Description | +| --------------------- | --------------------------------------------------------------------------------------------------------------------- | +| `cwd` | Working directory for the harness session. Defaults to `process.cwd()`. | +| `executablePath` | Path to the Gemini CLI executable. Defaults to `gemini` on `PATH`. | +| `extraArgs` | Extra CLI arguments appended after `--acp` (e.g. `['--sandbox']`). | +| `env` | Extra environment variables merged over `process.env` for the subprocess. | +| `permissionMode` | `'default'`, `'acceptEdits'`, or `'bypassPermissions'`. See the permissions note below. | +| `onPermissionRequest` | Custom permission handler; replaces the adapter's default policy. | +| `authMethodId` | ACP auth method to select before the session starts, e.g. `'oauth-personal'`, `'gemini-api-key'`, `'vertex-ai'`. See Authentication. | + +Per-call overrides — `sessionId`, `permissionMode`, `cwd`, `authMethodId` — go through `modelOptions`. + +**Permissions on headless servers.** ACP routes the harness's tool-approval questions back to the embedding application. Without a custom `onPermissionRequest`, the adapter installs a safe default policy that always answers immediately: bridged TanStack tools are approved, `'acceptEdits'` additionally approves file-mutation tools (edit / move / delete kinds), `'bypassPermissions'` approves everything, and anything else is rejected — a headless server must never hang on a question only an interactive user could answer. + +## Stateful Sessions + +Gemini CLI sessions are stateful — the harness keeps the full working context between turns. The adapter surfaces the session id of every run as a custom stream event named `gemini-cli.session-id`; thread it back via `modelOptions.sessionId` to resume the session. When resuming, only the latest user message is sent — the harness already holds the prior context. If the installed CLI can't load the session (older CLI, different machine), the adapter transparently falls back to a fresh session seeded with the flattened transcript, and the new session id is emitted so the client can re-pin it. + +Server endpoint: + +```typescript +import { + chat, + chatParamsFromRequest, + toServerSentEventsResponse, +} from "@tanstack/ai"; +import { geminiCliText } from "@tanstack/ai-gemini-cli"; + +export async function POST(request: Request) { + const params = await chatParamsFromRequest(request); + + // Extra fields the client puts in the connection `body` arrive here. + const sessionId = + typeof params.forwardedProps.sessionId === "string" + ? params.forwardedProps.sessionId + : undefined; + + const stream = chat({ + adapter: geminiCliText("gemini-3-pro-preview", { + cwd: "/path/to/project", + permissionMode: "acceptEdits", + }), + messages: params.messages, + modelOptions: { sessionId }, + }); + + return toServerSentEventsResponse(stream); +} +``` + +Client (React) — capture the session id from the custom event and send it back on subsequent requests: + +```typescript +import { useState } from "react"; +import { useChat } from "@tanstack/ai-react"; +import { fetchServerSentEvents } from "@tanstack/ai-client"; + +function CodingAssistant() { + const [sessionId, setSessionId] = useState(undefined); + + const { messages, sendMessage } = useChat({ + connection: fetchServerSentEvents("/api/chat", () => ({ + body: { sessionId }, + })), + onCustomEvent: (name, value) => { + if ( + name === "gemini-cli.session-id" && + typeof value === "object" && + value !== null && + "sessionId" in value && + typeof value.sessionId === "string" + ) { + setSessionId(value.sessionId); + } + }, + }); + + // ... render messages; harness tool activity (execute, edit, read, ...) + // arrives as regular tool-call parts with their results attached. +} +``` + +Sessions are stored on the machine that ran them (under `~/.gemini/tmp/`), so resuming only works on the same server instance. + +## Tools + +Two kinds of tools flow through this adapter: + +1. **Built-in harness tools** (shell, file edits, reads, search, web fetch, ...) are executed by Gemini CLI itself. Their activity streams back as tool-call events — named by their ACP tool kind (`execute`, `edit`, `read`, `search`, ...), with the human-readable title in the arguments — and results attached, so `useChat` UIs render them with no extra wiring. Your code never executes them. The harness's running plan is surfaced as a CUSTOM `gemini-cli.plan` event. + +2. **Your TanStack tools** are bridged *into* the harness: the adapter starts a short-lived Streamable-HTTP MCP server on `127.0.0.1` for the duration of the turn and registers it with the ACP session. Define tools as usual with `toolDefinition().server()`; tool-call events come back under the names you registered, and the default permission policy auto-approves them. + +```typescript +import { z } from "zod"; +import { chat, toolDefinition } from "@tanstack/ai"; +import { geminiCliText } from "@tanstack/ai-gemini-cli"; + +const lookupTicket = toolDefinition({ + name: "lookup_ticket", + description: "Look up an issue ticket by id", + inputSchema: z.object({ ticketId: z.string() }), +}).server(async ({ ticketId }) => { + return { ticketId, status: "open", title: "Crash on startup" }; +}); + +const stream = chat({ + adapter: geminiCliText("gemini-3-pro-preview"), + messages: [{ role: "user", content: "What's the status of ticket T-123?" }], + tools: [lookupTicket], +}); +``` + +**Client-side and approval-gated tools are not supported.** The harness executes tools inside a live subprocess, which cannot pause across HTTP requests to wait for a browser round-trip or a human approval. Passing a tool without a server `execute()` implementation — or one marked `needsApproval` — fails fast with a descriptive error. Run those tools outside the harness with a regular provider adapter. + +## Structured Output + +ACP has no native JSON-schema output channel, so `structuredOutput()` is best-effort: the schema is embedded as a prompt instruction in a fresh one-shot session and the final text is parsed (markdown fences are stripped when present). For production structured extraction, use a plain provider adapter (e.g. `@tanstack/ai-gemini`) — it's faster, schema-enforced, and doesn't spawn a subprocess. + +## Limitations + +- **Server-only (Node)**, and the `gemini` CLI must be installed and authenticated on the host. +- **Token usage is usually unavailable.** ACP only recently added usage reporting; when the CLI doesn't report it, `RUN_FINISHED` carries no usage. +- **The harness owns the agent loop.** TanStack's agent-loop strategies and per-iteration middleware don't apply inside a harness turn. +- **No sampling controls.** `temperature`-style options don't exist here. +- **Sessions are machine-local.** Resume requires hitting the same server instance (with graceful fallback to a fresh transcript-seeded session). +- **Cold starts.** Each call spawns the CLI; expect higher first-token latency than HTTP adapters. +- **ACP is young.** Gemini CLI's ACP mode is still stabilizing; pin a known-good CLI version in production. diff --git a/docs/adapters/opencode.md b/docs/adapters/opencode.md new file mode 100644 index 000000000..ff2fa70e6 --- /dev/null +++ b/docs/adapters/opencode.md @@ -0,0 +1,186 @@ +--- +title: OpenCode +id: opencode-adapter +order: 14 +description: "Use OpenCode as a chat backend in TanStack AI — agent harness with local tool execution, token-level streaming, stateful sessions, and tool bridging via @tanstack/ai-opencode." +keywords: + - tanstack ai + - opencode + - opencode sdk + - harness + - agent + - coding agent + - adapter +--- + +The OpenCode adapter runs [OpenCode](https://opencode.ai) as a chat backend, driving it over its local HTTP server (`@opencode-ai/sdk`). Unlike HTTP provider adapters, this is a **harness adapter**: OpenCode runs its own agent loop and executes its own tools — shell commands, file reads and edits, search — locally on your server. Each `chat()` call runs one full harness turn; assistant text and reasoning stream as true token-level deltas, and the harness's tool activity streams back as already-resolved tool-call events your UI can render. + +> **Server-only.** The adapter spawns (or attaches to) an `opencode serve` process, so it only works in a Node.js server environment — never in the browser. Treat it like giving OpenCode a shell on the machine it runs on, and configure permissions accordingly. + +## Installation + +```bash +npm install @tanstack/ai-opencode +``` + +The `opencode` CLI must be installed and its providers authenticated on the host: + +```bash +npm install -g opencode-ai +opencode auth login +``` + +A runnable demo lives at [`examples/ts-react-coding-agent`](https://github.com/TanStack/ai/tree/main/examples/ts-react-coding-agent) — session resume, the harness tool timeline, permission modes, and tool bridging, wired into a React app. + +## Models + +OpenCode is provider-agnostic: it resolves any `provider/model` id its configured providers support. Address models as `provider/model` (the adapter splits on the first `/`): + +```typescript +import { chat } from "@tanstack/ai"; +import { opencodeText } from "@tanstack/ai-opencode"; + +const stream = chat({ + adapter: opencodeText("anthropic/claude-sonnet-4-5", { + directory: "/path/to/project", + permissionMode: "acceptEdits", + }), + messages: [{ role: "user", content: "Fix the failing test in utils.test.ts" }], +}); +``` + +## Configuration + +| Option | Description | +| --------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| `directory` | Working directory for the harness session. Defaults to `process.cwd()`. | +| `baseUrl` | Attach to an already-running `opencode serve` (e.g. `http://127.0.0.1:4096`) instead of spawning a new server per turn. | +| `hostname` | Hostname for the spawned server. Defaults to the SDK default (`127.0.0.1`). | +| `port` | Port for the spawned server. Defaults to the SDK default (`4096`). | +| `permissionMode` | `'default'` (bridged tools run, everything else that prompts is rejected), `'acceptEdits'` (also auto-approves file edits), or `'bypassPermissions'` (allow all). | +| `onPermissionRequest` | Custom permission handler; replaces the default policy entirely. | +| `config` | Extra OpenCode config merged with the adapter's MCP and permission config. | + +Per-call overrides — `sessionId`, `permissionMode`, `directory` — go through `modelOptions`. + +## Permissions + +OpenCode asks for permission before mutating files or running commands. A headless server has no one to answer those prompts, so the adapter applies a policy automatically — it never hangs a turn: + +- **`'default'`** — bridged TanStack tools run; anything else that would prompt (edits, shell, web fetch) is rejected. +- **`'acceptEdits'`** — additionally auto-approves file-mutation requests (edit / write / patch). +- **`'bypassPermissions'`** — approves everything. Only use this against a sandbox or scratch directory. + +Provide `onPermissionRequest` to implement your own policy (e.g. allow-list specific commands). + +## Stateful Sessions + +OpenCode sessions are stateful — the harness keeps the full working context (files read, commands run, conclusions reached) between turns. The adapter surfaces the session id of every fresh run as a custom stream event named `opencode.session-id`; thread it back via `modelOptions.sessionId` to resume. When resuming, only the latest user message is sent — the harness already holds the prior context. + +Server endpoint: + +```typescript +import { + chat, + chatParamsFromRequest, + toServerSentEventsResponse, +} from "@tanstack/ai"; +import { opencodeText } from "@tanstack/ai-opencode"; + +export async function POST(request: Request) { + const params = await chatParamsFromRequest(request); + + // Extra fields the client puts in the connection `body` arrive here. + const sessionId = + typeof params.forwardedProps.sessionId === "string" + ? params.forwardedProps.sessionId + : undefined; + + const stream = chat({ + adapter: opencodeText("anthropic/claude-sonnet-4-5", { + directory: "/path/to/project", + permissionMode: "acceptEdits", + }), + messages: params.messages, + modelOptions: { sessionId }, + }); + + return toServerSentEventsResponse(stream); +} +``` + +Client (React) — capture the session id from the custom event and send it back on subsequent requests: + +```typescript +import { useState } from "react"; +import { useChat } from "@tanstack/ai-react"; +import { fetchServerSentEvents } from "@tanstack/ai-client"; + +function CodingAssistant() { + const [sessionId, setSessionId] = useState(undefined); + + const { messages, sendMessage } = useChat({ + connection: fetchServerSentEvents("/api/chat", () => ({ + body: { sessionId }, + })), + onCustomEvent: (name, value) => { + if ( + name === "opencode.session-id" && + typeof value === "object" && + value !== null && + "sessionId" in value && + typeof value.sessionId === "string" + ) { + setSessionId(value.sessionId); + } + }, + }); + + // ... render messages; harness tool activity (bash, edit, read, ...) + // arrives as regular tool-call parts with results. +} +``` + +Sessions live on the server that ran them, so resuming only works against the same server instance (or a shared `baseUrl`). + +## Tools + +Two kinds of tools flow through this adapter: + +1. **Built-in harness tools** are executed by OpenCode itself and stream back as tool-call events with results already attached: `bash`, `edit`, `write`, `read`, `grep`, and the agent's running todo plan (surfaced as an `opencode.todo` custom event). Your code never executes them. + +2. **Your TanStack tools** are bridged *into* the harness: the adapter starts a short-lived Streamable-HTTP MCP server on `127.0.0.1` for the duration of the turn and registers it with OpenCode. Define tools as usual with `toolDefinition().server()`; tool-call events come back under the names you registered (OpenCode prefixes MCP tools `tanstack_…` internally, which the adapter strips). + +```typescript +import { z } from "zod"; +import { chat, toolDefinition } from "@tanstack/ai"; +import { opencodeText } from "@tanstack/ai-opencode"; + +const lookupTicket = toolDefinition({ + name: "lookup_ticket", + description: "Look up an issue ticket by id", + inputSchema: z.object({ ticketId: z.string() }), +}).server(async ({ ticketId }) => { + return { ticketId, status: "open", title: "Crash on startup" }; +}); + +const stream = chat({ + adapter: opencodeText("anthropic/claude-sonnet-4-5"), + messages: [{ role: "user", content: "What's the status of ticket T-123?" }], + tools: [lookupTicket], +}); +``` + +**Client-side and approval-gated tools are not supported.** The harness executes tools inside a live process, which cannot pause across HTTP requests to wait for a browser round-trip or a human approval. Passing a tool without a server `execute()` implementation — or one marked `needsApproval` — fails fast with a descriptive error. Run those tools outside the harness with a regular provider adapter. + +## Structured Output + +`structuredOutput()` is best-effort: OpenCode's prompt API has no native JSON-schema channel, so the schema is embedded as a prompt instruction in a fresh, one-shot session and the final text is parsed (markdown fences are stripped when present). It works for finalization after a chat, but a plain provider adapter (e.g. `@tanstack/ai-openai`) is the better choice when structured extraction is the primary job — it's faster, deterministic, and doesn't spawn a harness. + +## Limitations + +- **Server-only (Node).** The adapter spawns or attaches to an `opencode serve` process. +- **The harness owns the agent loop.** TanStack's agent-loop strategies and per-iteration middleware don't apply inside a harness turn. +- **No sampling controls.** `temperature`-style options don't exist here. +- **Sessions are server-local.** Resume requires hitting the same server instance (or a shared `baseUrl`). +- **Cold starts.** Spawning a server per turn adds first-token latency; point the adapter at a long-lived `baseUrl` to avoid it. diff --git a/docs/config.json b/docs/config.json index 3a18eb12e..568f1fe91 100644 --- a/docs/config.json +++ b/docs/config.json @@ -291,6 +291,27 @@ } ] }, + { + "label": "Sandboxes", + "children": [ + { + "label": "Overview", + "to": "sandbox/overview", + "addedAt": "2026-06-16", + "updatedAt": "2026-06-16" + } + ] + }, + { + "label": "Persistence", + "children": [ + { + "label": "Overview", + "to": "persistence/overview", + "addedAt": "2026-06-18" + } + ] + }, { "label": "Advanced", "children": [ @@ -458,6 +479,26 @@ "label": "OpenAI-Compatible", "to": "adapters/openai-compatible", "addedAt": "2026-06-01" + }, + { + "label": "Claude Code", + "to": "adapters/claude-code", + "addedAt": "2026-06-12" + }, + { + "label": "Codex", + "to": "adapters/codex", + "addedAt": "2026-06-12" + }, + { + "label": "Gemini CLI", + "to": "adapters/gemini-cli", + "addedAt": "2026-06-12" + }, + { + "label": "OpenCode", + "to": "adapters/opencode", + "addedAt": "2026-06-12" } ] }, diff --git a/docs/persistence/overview.md b/docs/persistence/overview.md new file mode 100644 index 000000000..88e422d32 --- /dev/null +++ b/docs/persistence/overview.md @@ -0,0 +1,164 @@ +--- +title: Persistence Overview +id: overview +--- + +Persistence makes a `chat()` run **durable** and **resumable** — without changing +how you write `chat()`. It is composable middleware, so it is entirely optional: +a run with no persistence middleware behaves exactly as before, and the same +middleware works for plain model adapters and for sandbox-backed harness adapters. + +`withPersistence(...)`: + +- loads and saves the thread's message history (the server is authoritative), +- records each run (status, usage, errors), +- appends every streamed AG-UI event to an append-only **event log**, +- stamps each streamed chunk with an opaque **cursor** so a disconnected client + can resume, +- and (in agent mode) persists approvals and artifacts. + +## Installation + +Pick a backend. SQLite is the simplest durable option: + +```sh +npm install @tanstack/ai-persistence @tanstack/ai-persistence-sqlite +``` + +Other backends: `@tanstack/ai-persistence-postgres`, `-cloudflare`, `-drizzle`, +`-prisma`. For tests and prototypes, `memoryPersistence()` ships in +`@tanstack/ai-persistence`. + +## Server: a persisted, resumable endpoint + +```ts +import { chat } from '@tanstack/ai' +import { anthropicText } from '@tanstack/ai-anthropic/adapters' +import { withPersistence } from '@tanstack/ai-persistence' +import { sqlitePersistence } from '@tanstack/ai-persistence-sqlite' + +// Build once and reuse across requests. +const persistence = sqlitePersistence({ + path: '.tanstack-ai/state.sqlite', + mode: 'chat', +}) + +export async function POST(request: Request) { + // `runId` is reused on a resume; `cursor` is present only when resuming. + const { messages, threadId, runId, cursor } = await request.json() + + return chat({ + threadId, + runId, + cursor, + adapter: anthropicText({ model: 'claude-sonnet-4-6' }), + messages, + middleware: [withPersistence(persistence)], + }).toResponse() +} +``` + +When `cursor` is present, `chat()` replays the persisted events after that +cursor instead of re-running the adapter — so a reconnecting client catches up +without duplicating work or burning tokens. + +## Client: automatic resume + +The headless client tracks the last cursor it saw and can resume an interrupted +run. In React: + +```tsx +import { useChat } from '@tanstack/ai-react' + +function Chat() { + const chat = useChat({ + threadId: 'thread-123', + transport: { api: '/api/chat' }, + // Auto-resume is on by default; opt out with `autoResume: false`. + }) + + // Call on mount / when the tab comes back online to continue an + // interrupted run where it left off: + // useEffect(() => { chat.maybeAutoResume() }, []) + + return <>{/* ...render chat.messages... */} +} +``` + +`chat.getResumeState()` returns `{ runId, cursor }` for the active/interrupted +run (or `null`), which you can persist to resume across a full page reload; +`chat.resume()` continues it on demand. + +## Modes + +`mode` declares how much is persisted: + +| Mode | Persists | +| --- | --- | +| `'messages'` | thread message history only | +| `'chat'` | messages + runs + event log + usage (resumable conversations) | +| `'agent'` | everything in `chat`, plus sandbox records, approvals, and artifacts | + +## Bring your own database + +`sqlitePersistence` / `postgresPersistence` accept a connection (`{ path }` / +`{ connectionString }`) **or** an existing handle. Drizzle and Prisma users pass +their client directly: + +```ts +import { drizzlePersistence } from '@tanstack/ai-persistence-drizzle' +import { prismaPersistence } from '@tanstack/ai-persistence-prisma' + +const a = drizzlePersistence({ db, dialect: 'postgres', mode: 'chat' }) +const b = prismaPersistence({ prisma, dialect: 'postgres', mode: 'chat' }) +``` + +Raw drivers create and migrate their tables automatically (opt out with +`{ migrate: false }` and apply the exported `ddl(...)` / `migrate(...)` +yourself). Drizzle and Prisma own their own schema/migrations. + +## Agent mode + sandboxes + +For sandbox-backed harness runs, `@tanstack/ai-sandbox-persistence` provides a +durable, SQL-backed sandbox store and a distributed lock so sandbox resume and +ensure-locking survive across processes: + +```ts +import { withSandbox, defineSandbox } from '@tanstack/ai-sandbox' +import { dockerSandbox } from '@tanstack/ai-sandbox-docker' +import { withPersistence } from '@tanstack/ai-persistence' +import { sqlitePersistence, createSqliteDriver } from '@tanstack/ai-persistence-sqlite' +import { + withPersistenceBridge, + createSqlSandboxStore, +} from '@tanstack/ai-sandbox-persistence' +import { claudeCode } from '@tanstack/ai-claude-code' + +const dbPath = '.tanstack-ai/state.sqlite' +const driver = createSqliteDriver({ path: dbPath }) +const persistence = sqlitePersistence({ path: dbPath, mode: 'agent' }) + +const repoSandbox = defineSandbox({ + id: 'repo-agent', + provider: dockerSandbox({ image: 'node:22' }), +}) + +chat({ + threadId, + runId, + adapter: claudeCode({ model: 'claude-sonnet-4-6' }), + messages, + middleware: [ + withPersistence(persistence), + withPersistenceBridge({ + persistence, + sandboxStore: createSqlSandboxStore(driver), + }), + withSandbox(repoSandbox), + ], +}).toResponse() +``` + +A harness adapter (which runs the agent inside the still-running sandbox) can +re-attach to its process on resume and continue live after replaying the event +tail. diff --git a/docs/sandbox/overview.md b/docs/sandbox/overview.md new file mode 100644 index 000000000..314784b0c --- /dev/null +++ b/docs/sandbox/overview.md @@ -0,0 +1,309 @@ +--- +id: overview +title: Sandboxes Overview +--- + +Sandboxes let **harness adapters** (coding agents like Claude Code) run inside +an isolated environment — with a real filesystem, processes, and a cloned repo — +and stream their work back through `chat()`. The same code runs on your laptop, +in CI, in a Docker container, or on the edge: only the **provider** changes. + +```ts +import { chat } from '@tanstack/ai' +import { claudeCodeText } from '@tanstack/ai-claude-code' +import { defineSandbox, defineWorkspace, withSandbox } from '@tanstack/ai-sandbox' +import { dockerSandbox } from '@tanstack/ai-sandbox-docker' + +const repoSandbox = defineSandbox({ + id: 'repo-agent', + provider: dockerSandbox({ image: 'node:22' }), + workspace: defineWorkspace({ + source: { type: 'git', url: 'https://github.com/TanStack/ai' }, + packageManager: 'pnpm', + setup: ['corepack enable', 'pnpm install'], + scripts: { test: 'pnpm test', typecheck: 'pnpm test:types' }, + secrets: { ANTHROPIC_API_KEY: process.env.ANTHROPIC_API_KEY ?? '' }, + }), + lifecycle: { reuse: 'thread', snapshot: 'after-setup', keepAlive: '30m' }, +}) + +chat({ + threadId, + adapter: claudeCodeText('sonnet'), + messages, + middleware: [withSandbox(repoSandbox)], +}) +``` + +## Mental model + +- **`chat()`** owns the execution pipeline. +- **The adapter** decides _how_ a chat executes. A **harness adapter** (e.g. + `claudeCodeText`) runs an external agent runtime and declares + `requires: [SandboxCapability]` — `chat()` errors at the call site if no + middleware provides a sandbox. +- **`withSandbox(...)`** is middleware that _provides_ the `SandboxCapability`: + it resumes-or-creates the sandbox, bootstraps the workspace, and tears it + down per the lifecycle. + +```txt +chat({ adapter: claudeCodeText(), middleware: [withSandbox(repoSandbox)] }) + │ + ├─ withSandbox.setup → ensure sandbox (resume → restore snapshot → create + bootstrap), provide handle + ├─ adapter.chatStream → spawn `claude` INSIDE the sandbox, stream its events back as AG-UI chunks + └─ withSandbox.onFinish→ snapshot / destroy per lifecycle +``` + +## Providers + +A provider owns the isolation primitive. All implement the same +`SandboxProvider` / `SandboxHandle` contract, so adapters and workspaces are +provider-agnostic. + +| Provider | Package | Isolation | Notes | +| --- | --- | --- | --- | +| Local process | `@tanstack/ai-sandbox-local-process` | none (host) | The fast, no-Docker dev loop. Trusted/dev use only. | +| Docker | `@tanstack/ai-sandbox-docker` | container | Real isolation; commit-based snapshots, fork, resume-by-id. | + +```ts +import { localProcessSandbox } from '@tanstack/ai-sandbox-local-process' +import { dockerSandbox } from '@tanstack/ai-sandbox-docker' + +const dev = localProcessSandbox() // runs on your host +const isolated = dockerSandbox({ image: 'node:22' }) // runs in a container +``` + +Providers declare what they support via `capabilities()` +(`fs`, `exec`, `env`, `ports`, `backgroundProcesses`, `snapshots`, +`networkPolicy`, `durableFilesystem`, `fork`). Code that uses an optional +capability checks the flag first and degrades gracefully; calling an +unsupported optional method throws `UnsupportedCapabilityError`. + +## Workspace + +`defineWorkspace()` describes what the agent sees. It is portable; each harness +adapter projects it into its own native format. + +```ts +import { defineWorkspace } from '@tanstack/ai-sandbox' + +defineWorkspace({ + // Where the working tree comes from. + source: { type: 'git', url: 'https://github.com/owner/repo', ref: 'main' }, + // Package manager (auto-detected from the lockfile when omitted). + packageManager: 'pnpm', + // Commands run once during bootstrap. + setup: ['corepack enable', 'pnpm install'], + // Named commands the agent can run. + scripts: { test: 'pnpm test', build: 'pnpm build' }, + // Injected into the sandbox env at create/resume — never persisted to + // snapshots, the sandbox store, or the event log. + secrets: { ANTHROPIC_API_KEY: process.env.ANTHROPIC_API_KEY ?? '' }, +}) +``` + +## Policy + +`defineSandboxPolicy()` is a portable allow/ask/deny description that each +harness adapter maps onto its native permission system. Precedence is +`deny` > `ask` > `allow`, with a configurable `default`. + +```ts group=overview +import { defineSandboxPolicy, defineSandbox } from '@tanstack/ai-sandbox' + +const policy = defineSandboxPolicy({ + commands: { + allow: ['pnpm test', 'pnpm typecheck', 'git diff'], + ask: ['pnpm install', 'curl *'], + deny: ['sudo *', 'rm -rf *'], + }, + capabilities: { fileWrite: 'allow', network: 'ask' }, + default: 'ask', +}) + +const sandbox = defineSandbox({ id: 'repo', provider, policy /* … */ }) +``` + +## Tools + +The agent always has its own native tools (Bash, file edits, search) inside the +sandbox. In addition, `chat()`-provided server tools are **bridged** to the +in-sandbox agent over a host-side MCP tool-proxy: the agent calls them, each call +is proxied back to the host where the tool's `execute()` runs (keeping its +DB/secrets/closures), and the result is returned into the sandbox. The bridge is +gated by a per-run bearer token; the sandbox reaches the host on `localhost` +(local-process) or `host.docker.internal` (Docker). + +```ts +chat({ + threadId, + adapter: claudeCodeText('sonnet'), + messages, + tools: [getTodos.server(async ({ userId }) => db.todos.find({ userId }))], + middleware: [withSandbox(sandbox)], +}) +``` + +## File-event hooks + +Listen to files being created, changed, or deleted inside a sandbox — e.g. to +watch what the agent edits as it works. The watcher is provider-agnostic: it +uses native OS watching where the provider supports it (local-process) and falls +back to a portable `find` poll everywhere else (Docker and other exec-only +providers), with no extra dependencies or image changes. + +Hooks are declared directly on `defineSandbox({ hooks })` (sandbox-scoped, fire +once per file event regardless of how many runs share the sandbox) or on any +chat middleware via the `sandbox` group (run-scoped, fired per-run): + +```ts +import { defineSandbox, defineChatMiddleware, withSandbox } from '@tanstack/ai-sandbox' +import { dockerSandbox } from '@tanstack/ai-sandbox-docker' + +// Sandbox-scoped hooks — declared once on the definition. +const repoSandbox = defineSandbox({ + id: 'repo-agent', + provider: dockerSandbox({ image: 'node:22' }), + hooks: { + // catch-all: fires for every event + onFile: (e) => console.log(`[${e.type}] ${e.path}`), + // type-specific variants + onFileCreate: (e) => console.log('created', e.path), + onFileChange: (e) => console.log('changed', e.path), + onFileDelete: (e) => console.log('deleted', e.path), + // lifecycle + onReady: (handle) => console.log('sandbox ready', handle.id), + onError: (err) => console.error('sandbox error', err), + onDestroy: () => console.log('sandbox destroyed'), + }, +}) +``` + +To handle file events inside a run-scoped middleware (e.g. for per-request +audit logging), use the `sandbox` hook group on `defineChatMiddleware`: + +```ts +const auditMiddleware = defineChatMiddleware({ + name: 'audit', + // ctx is the ChatMiddlewareContext for the current run + sandbox: { + onFile: (ctx, e) => console.log(ctx.runId, e.type, e.path), + onFileCreate: (ctx, e) => db.log({ run: ctx.runId, event: e }), + }, +}) +``` + +Both hook groups fire server-side. The engine automatically emits one `CUSTOM` +`sandbox.file` event per change into the stream — no extra middleware needed. +Read it from the `parts` array on the client: + +```ts +for await (const chunk of stream) { + if (chunk.type === 'CUSTOM' && chunk.name === 'sandbox.file') { + const value = chunk.value + if ( + value !== null && + typeof value === 'object' && + 'type' in value && + 'path' in value + ) { + console.log('file event', value) // { type, path, timestamp } + } + } +} +``` + +To disable file watching for a sandbox entirely, set `fileEvents: false`: + +```ts +const sandbox = defineSandbox({ + id: 'quiet-agent', + provider: dockerSandbox({ image: 'node:22' }), + fileEvents: false, // watcher not started; no sandbox.file events emitted +}) +``` + +To log sandbox internals (watcher start/stop, event dispatch, lifecycle +transitions), pass the `sandbox` debug category: + +```ts +chat({ threadId, adapter, messages, debug: true }) +// or selectively: +chat({ threadId, adapter, messages, debug: { sandbox: true } }) +``` + +`watchWorkspace()` remains available as a low-level building block for using +the watcher outside a `chat()` run: + +```ts +import { watchWorkspace } from '@tanstack/ai-sandbox' + +const handle = await sandbox.ensure({ threadId, runId }) +const watcher = await watchWorkspace(handle, { + onEvent: (event) => { + // event.type is 'create' | 'change' | 'delete' + console.log(`${event.type} ${event.path}`) + }, + ignore: ['.git', 'node_modules'], // default +}) +// …do work outside a chat run… +await watcher.stop() +``` + +## Lifecycle & resume + +```ts +lifecycle: { + reuse: 'thread', // one sandbox per threadId ('none' = fresh per run) + snapshot: 'after-setup', // snapshot once bootstrapped (provider-permitting) + keepAlive: '30m', // hint to keep the sandbox warm between runs + destroyOnComplete: false, // keep it for the next run +} +``` + +A sandbox is keyed by a compound `sandboxInstanceKey` = +`hash(threadId + sandbox.id + provider + workspaceHash + tenant?)`, so changing +the repo, setup, image, or tenant safely starts a fresh sandbox rather than +resuming a stale one. The ensure order is: **resume the running sandbox → +restore the latest snapshot → create fresh and bootstrap**. Providers without +durable disk or snapshots (e.g. ephemeral containers) re-create + re-bootstrap +under the same identity. + +## Events + +Harness runs stream standard AG-UI `StreamChunk`s (text, tool calls, reasoning, +run lifecycle) plus namespaced `CUSTOM` events for sandbox-specifics. Today the +in-sandbox Claude Code adapter emits: + +- `claude-code.session-id` — the resumable harness session id. +- `file.changed` — the working-tree `git diff` after the run. +- `sandbox.file` — emitted per file create/change/delete automatically when a + sandbox is active (see [File-event hooks](#file-event-hooks)). + +```ts +for await (const chunk of stream) { + if (chunk.type === 'CUSTOM' && chunk.name === 'file.changed') { + const value = chunk.value + if (value !== null && typeof value === 'object' && 'diff' in value) { + console.log(value.diff) + } + } +} +``` + +## Try it + +A runnable end-to-end demo lives at `examples/sandbox-coding-agent`: it clones a +tiny repo with a deliberate bug into a sandbox, asks Claude Code to fix it, +streams the agent's output, and prints the resulting diff. Run it with Docker or +with `SANDBOX=local` on your host (requires `ANTHROPIC_API_KEY`). + +`examples/sandbox-issue-triage` goes further: it fetches the first open issue on +`TanStack/ai`, clones the repo into a sandbox, runs Claude Code to triage it, and +writes a Markdown report locally — using **file-event hooks** to log the agent's +edits live. It ships two entrypoints, `pnpm start:process` and `pnpm start:docker`. + +> **Persistence-ready:** the sandbox layer ships with in-memory stores for +> resume bookkeeping. A future persistence package can provide durable +> `SandboxStore` / `LockStore` implementations (and event-log replay) by +> supplying those optional capabilities — no changes to the sandbox layer. diff --git a/examples/sandbox-coding-agent/README.md b/examples/sandbox-coding-agent/README.md new file mode 100644 index 000000000..78676301e --- /dev/null +++ b/examples/sandbox-coding-agent/README.md @@ -0,0 +1,85 @@ +# Sandbox coding-agent demo + +Runs **Claude Code inside a sandbox** to fix a bug, end-to-end, through the +public `chat()` API. Use this to manually verify the sandbox layer locally. + +It bootstraps a tiny git repo with a deliberate bug in `add.js` +(`add(a, b)` returns `a - b`), asks Claude Code to fix it, streams the agent's +output, and prints the resulting `git diff`. + +## Prerequisites + +1. **Build the workspace packages first** (examples consume built `dist`): + + ```bash + # from the repo root + pnpm install + pnpm --filter "@tanstack/ai-sandbox..." --filter "@tanstack/ai-claude-code..." --filter "@tanstack/ai" build + # (or simply: pnpm build:all) + ``` + +2. An **Anthropic API key**: `export ANTHROPIC_API_KEY=sk-ant-...` + +## Run it — Docker (isolated, recommended) + +Needs a running Docker daemon. The container image needs `git` + `node` +(default `node:22`); the demo installs the `claude` CLI during bootstrap. + +```bash +cd examples/sandbox-coding-agent +pnpm start +``` + +Override the image (must have git + node): + +```bash +SANDBOX_IMAGE=node:22 pnpm start +``` + +> First run pulls the image and `npm install -g @anthropic-ai/claude-code`, so +> it takes a minute. Subsequent runs on the same `threadId` reuse the container. + +## Run it — local process (no Docker) + +Runs the agent directly on your host (no isolation — dev only). Requires the +`claude` CLI, `git`, and `node` on your `PATH`. A local `claude` login works in +place of `ANTHROPIC_API_KEY`. + +```bash +cd examples/sandbox-coding-agent +SANDBOX=local pnpm start +``` + +## What you should see + +- Streamed reasoning/text from Claude Code as it inspects and edits `add.js`. +- `↳ [tool] …` lines as the agent uses its native tools (Read/Edit/Bash). +- A final `===== git diff =====` block showing `- return a - b` → `+ return a + b`. +- `✅ done`. + +## How it works + +```ts +const sandbox = defineSandbox({ + id: 'coding-agent-demo', + provider: dockerSandbox({ image: 'node:22' }), // or localProcessSandbox() + workspace: defineWorkspace({ + source: { type: 'none' }, + setup: ['npm install -g @anthropic-ai/claude-code' /* scaffold repo */], + secrets: { ANTHROPIC_API_KEY: process.env.ANTHROPIC_API_KEY! }, + }), + lifecycle: { reuse: 'thread' }, +}) + +chat({ + threadId, + adapter: claudeCodeText('sonnet'), // declares requires:[SandboxCapability] + messages: [{ role: 'user', content: 'Fix the bug in add.js' }], + middleware: [withSandbox(sandbox)], +}) +``` + +`withSandbox` resumes-or-creates the sandbox and bootstraps the workspace; the +`claudeCodeText` adapter spawns `claude -p --output-format stream-json` **inside** +the sandbox, streams its events back as AG-UI chunks, and emits a `file.changed` +event with the diff. diff --git a/examples/sandbox-coding-agent/package.json b/examples/sandbox-coding-agent/package.json new file mode 100644 index 000000000..fe393636e --- /dev/null +++ b/examples/sandbox-coding-agent/package.json @@ -0,0 +1,18 @@ +{ + "name": "@tanstack/example-sandbox-coding-agent", + "private": true, + "type": "module", + "scripts": { + "start": "tsx run.ts" + }, + "dependencies": { + "@tanstack/ai": "workspace:*", + "@tanstack/ai-claude-code": "workspace:*", + "@tanstack/ai-sandbox": "workspace:*", + "@tanstack/ai-sandbox-docker": "workspace:*", + "@tanstack/ai-sandbox-local-process": "workspace:*" + }, + "devDependencies": { + "tsx": "^4.21.0" + } +} diff --git a/examples/sandbox-coding-agent/run.ts b/examples/sandbox-coding-agent/run.ts new file mode 100644 index 000000000..6a68e779d --- /dev/null +++ b/examples/sandbox-coding-agent/run.ts @@ -0,0 +1,120 @@ +/** + * Local end-to-end demo: run Claude Code INSIDE a sandbox to fix a bug. + * + * What it does: + * 1. Spins up a sandbox (Docker by default, or the local host process). + * 2. Bootstraps a tiny git repo containing a deliberate bug in `add.js` + * (`add(a, b)` returns `a - b`). + * 3. Runs `chat()` with the in-sandbox `claudeCodeText` harness adapter and + * asks it to fix the bug — Claude Code edits the file using its OWN native + * tools inside the sandbox. + * 4. Streams the agent's text + tool activity, then prints the git diff it + * produced. + * + * Run it: see README.md. Requires ANTHROPIC_API_KEY (or a local `claude` login + * for SANDBOX=local). + */ +import { chat } from '@tanstack/ai' +import { claudeCodeText } from '@tanstack/ai-claude-code' +import { + defineSandbox, + defineWorkspace, + withSandbox, +} from '@tanstack/ai-sandbox' +import { dockerSandbox } from '@tanstack/ai-sandbox-docker' +import { localProcessSandbox } from '@tanstack/ai-sandbox-local-process' +import type { StreamChunk } from '@tanstack/ai' + +const apiKey = process.env.ANTHROPIC_API_KEY +const useLocal = process.env.SANDBOX === 'local' + +if (!apiKey && !useLocal) { + console.error( + 'Set ANTHROPIC_API_KEY (or use SANDBOX=local with a local `claude` login).', + ) + process.exit(1) +} + +// Scaffold a tiny git repo with a deliberate bug in add.js. +const scaffold = [ + 'git init -q', + 'git config user.email demo@example.com', + 'git config user.name tanstack-ai-demo', + `printf 'export function add(a, b) {\\n return a - b\\n}\\n' > add.js`, + 'git add -A', + 'git commit -q -m "initial (with bug)"', +] + +const provider = useLocal + ? // Runs on the host — needs `claude`, `git`, and `node` on your PATH. + localProcessSandbox() + : // Runs in an isolated Docker container. The image needs git + node; we + // install the claude CLI during setup. Override with SANDBOX_IMAGE. + dockerSandbox({ image: process.env.SANDBOX_IMAGE ?? 'node:22' }) + +const setup = useLocal + ? scaffold + : ['npm install -g @anthropic-ai/claude-code', ...scaffold] + +const sandbox = defineSandbox({ + id: 'coding-agent-demo', + provider, + workspace: defineWorkspace({ + source: { type: 'none' }, + setup, + // Injected into the sandbox env (never persisted to snapshots/logs). + secrets: apiKey ? { ANTHROPIC_API_KEY: apiKey } : {}, + }), + lifecycle: { reuse: 'thread' }, +}) + +async function main(): Promise { + console.log( + `\n▶ Sandbox: ${useLocal ? 'local-process (host)' : 'docker'} — bootstrapping + running Claude Code...\n`, + ) + + const stream = chat({ + threadId: `demo-${Date.now()}`, + adapter: claudeCodeText('sonnet'), + messages: [ + { + role: 'user', + content: + 'There is a bug in add.js: add(a, b) returns a - b instead of a + b. ' + + 'Fix it so it correctly adds the two numbers.', + }, + ], + middleware: [withSandbox(sandbox)], + }) as AsyncIterable + + for await (const chunk of stream) { + const c = chunk as Record & { type: string } + switch (c.type) { + case 'TEXT_MESSAGE_CONTENT': + process.stdout.write((c.delta as string) ?? '') + break + case 'TOOL_CALL_START': + console.log(`\n ↳ [tool] ${(c.toolCallName as string) ?? ''}`) + break + case 'CUSTOM': + if (c.name === 'file.changed') { + const value = c.value as { diff?: string } + console.log('\n\n===== git diff =====\n' + (value.diff ?? '(none)')) + } + break + case 'RUN_FINISHED': + console.log('\n\n✅ done') + break + case 'RUN_ERROR': + console.error('\n\n❌ error:', c.message) + break + default: + break + } + } +} + +main().catch((error) => { + console.error(error) + process.exit(1) +}) diff --git a/examples/sandbox-issue-triage/README.md b/examples/sandbox-issue-triage/README.md new file mode 100644 index 000000000..992361014 --- /dev/null +++ b/examples/sandbox-issue-triage/README.md @@ -0,0 +1,55 @@ +# Sandbox issue triage + +Fetches the first **open** issue on [`TanStack/ai`](https://github.com/TanStack/ai/issues), +spins up a sandbox with the repo cloned in, runs **Claude Code inside the +sandbox** to investigate and triage the issue, and writes a Markdown report to +your local `reports/` directory. + +It demonstrates three pieces of the sandbox layer together: + +- **`@tanstack/ai-sandbox`** workspace bootstrap (`githubRepo` source → clone). +- The **`@tanstack/ai-claude-code`** harness adapter running the `claude` CLI + inside the sandbox. +- **Sandbox file-event hooks** — `defineSandbox({ hooks })` logs the agent's + create/change/delete events live; `withSandbox` owns the watcher and forwards + events to those hooks. File events are also automatically streamed to clients + as CUSTOM `sandbox.file` chunks. The observed events are appended to the + report. + +Two entrypoints, same logic ([`triage.ts`](./triage.ts)): + +| Command | Sandbox | Isolation | +| -------------------- | -------------------- | -------------------- | +| `pnpm start:process` | local-process (host) | none — fast dev loop | +| `pnpm start:docker` | Docker container | full | + +## Prerequisites + +- **Both:** `ANTHROPIC_API_KEY` in your environment (the local-process variant + can instead use an existing `claude` login). +- **`start:process`:** `git`, `node`, and the `claude` CLI on your PATH. +- **`start:docker`:** a running Docker daemon. The base image (`node:22`) + already has `git` + `node`; the `claude` CLI is installed during setup. +- Optional: `GITHUB_TOKEN` to avoid GitHub API rate limits. +- Optional: `SANDBOX_IMAGE` to override the Docker base image. + +## Run + +```bash +# from the repo root, build the workspace packages first +pnpm build + +cd examples/sandbox-issue-triage +pnpm install + +# local-process sandbox +ANTHROPIC_API_KEY=sk-... pnpm start:process + +# docker sandbox +ANTHROPIC_API_KEY=sk-... pnpm start:docker +``` + +The report lands at `reports/issue--.md`. + +> Note: the workspace clones the **default branch** of `TanStack/ai` into the +> sandbox. The first run pulls the full repo, so give it a moment. diff --git a/examples/sandbox-issue-triage/docker.ts b/examples/sandbox-issue-triage/docker.ts new file mode 100644 index 000000000..70b575f51 --- /dev/null +++ b/examples/sandbox-issue-triage/docker.ts @@ -0,0 +1,25 @@ +/** + * Issue triage with the DOCKER sandbox (isolated container). + * + * Prerequisites: a running Docker daemon and ANTHROPIC_API_KEY. The base image + * (`node:22` by default; override with SANDBOX_IMAGE) already ships git + node; + * we install the `claude` CLI during setup. Run: `pnpm start:docker`. + */ +import { dockerSandbox } from '@tanstack/ai-sandbox-docker' +import { runTriage } from './triage' + +const apiKey = process.env.ANTHROPIC_API_KEY +if (!apiKey) { + console.error('Set ANTHROPIC_API_KEY to run the Docker triage example.') + process.exit(1) +} + +runTriage({ + provider: dockerSandbox({ image: process.env.SANDBOX_IMAGE ?? 'node:22' }), + providerLabel: 'docker', + setup: ['npm install -g @anthropic-ai/claude-code'], + secrets: { ANTHROPIC_API_KEY: apiKey }, +}).catch((error) => { + console.error(error) + process.exit(1) +}) diff --git a/examples/sandbox-issue-triage/package.json b/examples/sandbox-issue-triage/package.json new file mode 100644 index 000000000..4ea3af36f --- /dev/null +++ b/examples/sandbox-issue-triage/package.json @@ -0,0 +1,19 @@ +{ + "name": "@tanstack/example-sandbox-issue-triage", + "private": true, + "type": "module", + "scripts": { + "start:process": "tsx process.ts", + "start:docker": "tsx docker.ts" + }, + "dependencies": { + "@tanstack/ai": "workspace:*", + "@tanstack/ai-claude-code": "workspace:*", + "@tanstack/ai-sandbox": "workspace:*", + "@tanstack/ai-sandbox-docker": "workspace:*", + "@tanstack/ai-sandbox-local-process": "workspace:*" + }, + "devDependencies": { + "tsx": "^4.21.0" + } +} diff --git a/examples/sandbox-issue-triage/process.ts b/examples/sandbox-issue-triage/process.ts new file mode 100644 index 000000000..6285cb3be --- /dev/null +++ b/examples/sandbox-issue-triage/process.ts @@ -0,0 +1,21 @@ +/** + * Issue triage with the LOCAL-PROCESS sandbox (runs on the host — no isolation). + * + * Prerequisites on your PATH: `git`, `node`, and the `claude` CLI (logged in, or + * set ANTHROPIC_API_KEY). Run: `pnpm start:process`. + */ +import { localProcessSandbox } from '@tanstack/ai-sandbox-local-process' +import { runTriage } from './triage' + +const apiKey = process.env.ANTHROPIC_API_KEY + +runTriage({ + provider: localProcessSandbox(), + providerLabel: 'process', + // Host already has the tooling; nothing to install. + setup: [], + secrets: apiKey ? { ANTHROPIC_API_KEY: apiKey } : {}, +}).catch((error) => { + console.error(error) + process.exit(1) +}) diff --git a/examples/sandbox-issue-triage/triage.ts b/examples/sandbox-issue-triage/triage.ts new file mode 100644 index 000000000..6dd62e797 --- /dev/null +++ b/examples/sandbox-issue-triage/triage.ts @@ -0,0 +1,230 @@ +/** + * Shared logic for the issue-triage demo (used by process.ts and docker.ts). + * + * Flow: + * 1. Fetch the first OPEN issue on TanStack/ai from the GitHub API. + * 2. Spin up a sandbox with the repo cloned in. + * 3. Attach file-event hooks on the sandbox definition so we see the agent's edits live. + * 4. Run Claude Code INSIDE the sandbox to investigate the issue and write + * `ISSUE-REPORT.md` at the repo root. + * 5. Read that report back out of the sandbox and persist it to ./reports/ + * on the HOST, with a header + the observed file events appended. + */ +import { mkdir, writeFile } from 'node:fs/promises' +import { dirname, join } from 'node:path' +import { fileURLToPath } from 'node:url' +import { chat } from '@tanstack/ai' +import { claudeCodeText } from '@tanstack/ai-claude-code' +import { + defineSandbox, + defineWorkspace, + githubRepo, + withSandbox, +} from '@tanstack/ai-sandbox' +import type { FileEvent, SandboxProvider } from '@tanstack/ai-sandbox' +import type { StreamChunk } from '@tanstack/ai' + +const REPO = 'TanStack/ai' + +export interface GitHubIssue { + number: number + title: string + body: string + url: string +} + +/** Fetch the oldest open issue (filtering out pull requests). */ +export async function fetchFirstOpenIssue(): Promise { + const headers: Record = { + Accept: 'application/vnd.github+json', + 'User-Agent': 'tanstack-ai-sandbox-issue-triage', + } + if (process.env.GITHUB_TOKEN) { + headers.Authorization = `Bearer ${process.env.GITHUB_TOKEN}` + } + const res = await fetch( + `https://api.github.com/repos/${REPO}/issues?state=open&sort=created&direction=asc&per_page=20`, + { headers }, + ) + if (!res.ok) { + throw new Error( + `GitHub API ${res.status} ${res.statusText}: ${await res.text()}`, + ) + } + const items = (await res.json()) as Array<{ + number: number + title: string + body: string | null + html_url: string + pull_request?: unknown + }> + const issue = items.find((item) => item.pull_request === undefined) + if (!issue) throw new Error(`No open issues found on ${REPO}.`) + return { + number: issue.number, + title: issue.title, + body: issue.body ?? '', + url: issue.html_url, + } +} + +export interface RunTriageOptions { + provider: SandboxProvider + /** Short label used in logs + the report filename (e.g. 'process', 'docker'). */ + providerLabel: string + /** Bootstrap commands run once after the repo is cloned. */ + setup: Array + /** Secrets injected into the sandbox env (never persisted). */ + secrets: Record +} + +const REPORT_FILE = 'ISSUE-REPORT.md' + +function buildPrompt(issue: GitHubIssue): string { + return [ + `You are triaging a GitHub issue in the ${REPO} repository, which is checked`, + `out in your current working directory.`, + '', + `Issue #${issue.number}: ${issue.title}`, + `URL: ${issue.url}`, + '', + 'Issue body:', + issue.body || '(no description provided)', + '', + 'Investigate the repository to understand and triage this issue. Do NOT', + 'change any source code — this is analysis only. When done, WRITE your', + `findings to a file named ${REPORT_FILE} in the current working directory`, + '(the repository root), as Markdown with these sections:', + '', + '## Summary', + '## Root cause / analysis', + '## Affected files (with paths)', + '## Proposed fix', + '## Confidence', + ].join('\n') +} + +/** Run one triage end-to-end against the given provider; returns the report path. */ +export async function runTriage(options: RunTriageOptions): Promise { + const issue = await fetchFirstOpenIssue() + console.log( + `\n▶ [${options.providerLabel}] Triaging issue #${issue.number}: ${issue.title}\n ${issue.url}\n`, + ) + + // Collect file events via declarative hooks on the sandbox definition. + const fileEvents: Array = [] + const sandbox = defineSandbox({ + id: `issue-triage-${options.providerLabel}`, + provider: options.provider, + workspace: defineWorkspace({ + source: githubRepo({ repo: REPO }), + setup: options.setup, + secrets: options.secrets, + }), + lifecycle: { reuse: 'thread' }, + hooks: { + onFile: (e) => { + fileEvents.push(e) + const mark = e.type === 'create' ? '+' : e.type === 'delete' ? '-' : '~' + console.log(` [${mark}] ${e.type} ${e.path}`) + }, + }, + }) + + const threadId = `triage-${options.providerLabel}-${issue.number}` + + console.log(' ⧗ Bootstrapping sandbox (clone + setup)…') + + let assistantText = '' + const stream = chat({ + threadId, + adapter: claudeCodeText('sonnet'), + messages: [{ role: 'user', content: buildPrompt(issue) }], + // withSandbox provides the handle and starts the file-event watcher; + // file events are forwarded to the hooks declared above. + middleware: [withSandbox(sandbox)], + }) as AsyncIterable + + for await (const chunk of stream) { + const c = chunk as Record & { type: string } + switch (c.type) { + case 'TEXT_MESSAGE_CONTENT': { + const delta = (c.delta as string) ?? '' + assistantText += delta + process.stdout.write(delta) + break + } + case 'TOOL_CALL_START': + console.log(`\n ↳ [tool] ${(c.toolCallName as string) ?? ''}`) + break + case 'CUSTOM': + if (c.name === 'sandbox.file') { + const value = c.value as FileEvent + console.log(` ⟳ [stream] ${value.type} ${value.path}`) + } + break + case 'RUN_FINISHED': + console.log('\n\n✅ agent finished') + break + case 'RUN_ERROR': + console.error('\n\n❌ error:', c.message) + break + default: + break + } + } + + // Obtain the handle after the run (reuse:'thread' returns the same sandbox). + const ensureCtx = { threadId, runId: 'triage-read' } + const handle = await sandbox.ensure(ensureCtx) + + // Read the report back out of the sandbox; fall back to the streamed text. + let report: string + try { + report = await handle.fs.read(REPORT_FILE) + } catch { + report = '' + } + if (report.trim() === '') { + report = assistantText.trim() || '_(the agent produced no report)_' + } + + const observed = + fileEvents.length === 0 + ? '_(none observed)_' + : fileEvents.map((e) => `- \`${e.type}\` ${e.path}`).join('\n') + + const stamp = new Date().toISOString() + const out = [ + `# Issue triage — ${REPO}#${issue.number}`, + '', + `- **Issue:** [${issue.title}](${issue.url})`, + `- **Sandbox provider:** ${options.providerLabel} (${handle.provider})`, + `- **Generated:** ${stamp}`, + '', + '---', + '', + report.trim(), + '', + '---', + '', + '## Observed file events (sandbox hooks)', + '', + observed, + '', + ].join('\n') + + const here = dirname(fileURLToPath(import.meta.url)) + const reportPath = join( + here, + 'reports', + `issue-${issue.number}-${options.providerLabel}.md`, + ) + await mkdir(dirname(reportPath), { recursive: true }) + await writeFile(reportPath, out, 'utf8') + + await sandbox.destroy(ensureCtx) + + console.log(`\n📝 Report written to ${reportPath}\n`) + return reportPath +} diff --git a/examples/ts-react-coding-agent/README.md b/examples/ts-react-coding-agent/README.md new file mode 100644 index 000000000..b40587d90 --- /dev/null +++ b/examples/ts-react-coding-agent/README.md @@ -0,0 +1,148 @@ +# TanStack AI — Coding Agent Example + +A React (TanStack Start) app that drives **coding-agent harnesses** through +TanStack AI — [Claude Code](https://docs.anthropic.com/en/docs/claude-code) +via `@tanstack/ai-claude-code`, [Codex](https://developers.openai.com/codex) +via `@tanstack/ai-codex`, +[Gemini CLI](https://github.com/google-gemini/gemini-cli) via +`@tanstack/ai-gemini-cli`, and [OpenCode](https://opencode.ai) via +`@tanstack/ai-opencode`, switchable from a dropdown. + +Unlike a normal chat example, the agent here runs its own loop server-side +and executes its own tools — reading, searching, and (in Edit mode) editing +the files in `workspace/`. Its tool activity streams into the UI as a +timeline of resolved tool calls. + +## What it demonstrates + +- **Session resume** — the server emits the harness session id via a + `.session-id` custom event (`claude-code.session-id`, + `codex.session-id`, `gemini-cli.session-id`, `opencode.session-id`); the + client pins it and sends + it back through `forwardedProps` → `modelOptions.sessionId`, so follow-ups + continue the same stateful session. Switching agents resets the session. +- **Harness tool timeline** — built-in tools (Read, Grep, Edit, + command_execution, ...) arrive as already-resolved tool-call parts and + render with their inputs/outputs. Note that Codex streams text + message-at-a-time (its SDK has no token deltas), while Claude Code, + Gemini CLI, and OpenCode stream token-by-token. +- **Permission modes** — a Read-only/Edit toggle maps to each harness's + knobs: `disallowedTools` vs `permissionMode: 'acceptEdits'` for Claude + Code, `sandboxMode: 'read-only'` vs `'workspace-write'` for Codex, and + the default-deny vs `acceptEdits` permission policy for Gemini CLI and + OpenCode. With Claude Code, Gemini CLI, and OpenCode, ask it to run a + shell command and watch the denial show up in the timeline. +- **Tool bridging** — `lookup_style_guide` is an ordinary TanStack server + tool the harness calls from inside its own loop (in-process MCP for + Claude Code; a localhost Streamable-HTTP MCP bridge for Codex, + Gemini CLI, and OpenCode). +- **Sandboxed cwd** — the agent only works inside `workspace/`. + +## Running + +This is a server-spawning example: each chat turn launches the selected +harness as a subprocess on your machine. You only need to set up the agent(s) +you actually want to try — the others stay selectable in the UI and pop a +setup dialog explaining what's missing (see [Runtime config detection](#runtime-config-detection)). + +### 1. Set up the agent(s) you want + +**Claude Code** ([docs](https://docs.anthropic.com/en/docs/claude-code)) + +```bash +npm i -g @anthropic-ai/claude-code # install the CLI +claude login # log in with your Claude subscription +# …or, instead of `claude login`, set an API key in the server env: +export ANTHROPIC_API_KEY=sk-ant-… +``` + +The codex/gemini binaries are spawned per turn, so the CLI must be on `PATH`. + +**Codex** ([docs](https://developers.openai.com/codex)) + +```bash +codex login # log in interactively +# …or set an API key in the server env (forwarded as CODEX_API_KEY): +export OPENAI_API_KEY=sk-… +``` + +The `codex` binary ships with `@openai/codex-sdk`, so there's nothing extra to +install. Note: a **ChatGPT-account** login can't run codex models in headless +mode — use an API key or an entitled account, otherwise the run fails with an +entitlement error from OpenAI. + +**Gemini CLI** ([docs](https://github.com/google-gemini/gemini-cli)) + +```bash +npm i -g @google/gemini-cli # ACP mode needs a current build +gemini # log in with Google once (interactive) +``` + +Headless ACP runs can't show an interactive auth picker, so you must tell the +adapter which method to use via `GEMINI_ACP_AUTH_METHOD` (e.g. `oauth-personal` +for a Google login, or `gemini-api-key`). If the CLI refuses the scratch +workspace as untrusted, also export `GEMINI_CLI_TRUST_WORKSPACE=true`. So, for +a Google-login setup, start the dev server like this: + +```bash +GEMINI_ACP_AUTH_METHOD=oauth-personal GEMINI_CLI_TRUST_WORKSPACE=true pnpm dev +``` + +To use an API key instead, set `GEMINI_API_KEY` and +`GEMINI_ACP_AUTH_METHOD=gemini-api-key`. + +**OpenCode** ([docs](https://opencode.ai/docs)) + +```bash +npm i -g opencode-ai # install the CLI +opencode auth login # authenticate a provider (interactive) +# …or set the provider API key in the server env (this example uses Anthropic): +export ANTHROPIC_API_KEY=sk-ant-… +``` + +The adapter spawns `opencode serve` per turn, so the CLI must be on `PATH`. The +example drives the `anthropic/claude-sonnet-4-5` model; point it at a different +`provider/model` in `src/routes/api.chat.ts` to use another provider. + +### 2. Install and run + +```bash +pnpm install +pnpm dev +``` + +### 3. Try it out + +Open http://localhost:3000 and try: + +- "What files are in this project, and what do they do?" (Read-only) +- Switch to **Edit mode**: "Fix the bug in temperature.js" — note it + calls `lookup_style_guide` first. +- "Now update todo.md to check off what you did" — same session, no + re-explaining. + +Reset the demo workspace afterwards with `git checkout -- workspace/`. + +## Runtime config detection + +Environment variables and CLI logins live on the server, not in the browser, so +the route loader calls a `createServerFn` (`src/lib/agent-status.ts`) that +reports which agents are actually runnable. Every agent stays selectable in the +dropdown; picking one that isn't configured — or trying to send to it — opens a +dialog with the exact setup steps (sourced from `AGENT_SETUP` in +`src/lib/agents.ts`, which mirrors the instructions above). An agent counts as +configured when: + +- **Claude Code** — `ANTHROPIC_API_KEY` / `CLAUDE_CODE_OAUTH_TOKEN` is set, or + a `~/.claude.json` login exists. +- **Codex** — `OPENAI_API_KEY` / `CODEX_API_KEY` is set, or a + `~/.codex/auth.json` login exists. +- **Gemini CLI** — `GEMINI_API_KEY` or `GEMINI_ACP_AUTH_METHOD` is set (a + cached Google login alone isn't enough for headless ACP, so it isn't + counted). +- **OpenCode** — a provider key (`ANTHROPIC_API_KEY` / `OPENAI_API_KEY` / + `GEMINI_API_KEY`) is set, or an `opencode auth login` credential file + (`~/.local/share/opencode/auth.json`) exists. + +Detection runs at server startup time per request to the loader, so set your +env vars / log in **before** `pnpm dev` (or restart it after). diff --git a/examples/ts-react-coding-agent/package.json b/examples/ts-react-coding-agent/package.json new file mode 100644 index 000000000..2d8e74c09 --- /dev/null +++ b/examples/ts-react-coding-agent/package.json @@ -0,0 +1,39 @@ +{ + "name": "ts-react-coding-agent", + "private": true, + "type": "module", + "scripts": { + "dev": "vite dev --port 3000", + "build": "vite build", + "serve": "vite preview", + "test": "exit 0", + "test:types": "tsc --noEmit" + }, + "dependencies": { + "@tailwindcss/vite": "^4.1.18", + "@tanstack/ai": "workspace:*", + "@tanstack/ai-claude-code": "workspace:*", + "@tanstack/ai-client": "workspace:*", + "@tanstack/ai-codex": "workspace:*", + "@tanstack/ai-gemini-cli": "workspace:*", + "@tanstack/ai-opencode": "workspace:*", + "@tanstack/ai-react": "workspace:*", + "@tanstack/nitro-v2-vite-plugin": "^1.154.7", + "@tanstack/react-router": "^1.158.4", + "@tanstack/react-start": "^1.159.0", + "@tanstack/router-plugin": "^1.158.4", + "react": "^19.2.3", + "react-dom": "^19.2.3", + "tailwindcss": "^4.1.18", + "vite-tsconfig-paths": "^5.1.4", + "zod": "^4.2.0" + }, + "devDependencies": { + "@types/node": "^24.10.1", + "@types/react": "^19.2.7", + "@types/react-dom": "^19.2.3", + "@vitejs/plugin-react": "^5.1.2", + "typescript": "5.9.3", + "vite": "^7.3.3" + } +} diff --git a/examples/ts-react-coding-agent/src/lib/agent-status.ts b/examples/ts-react-coding-agent/src/lib/agent-status.ts new file mode 100644 index 000000000..d2db68e9c --- /dev/null +++ b/examples/ts-react-coding-agent/src/lib/agent-status.ts @@ -0,0 +1,63 @@ +import { createServerFn } from '@tanstack/react-start' +import type { AgentId } from './agents' + +/** Whether a path exists, swallowing any access error. */ +async function fileExists(filePath: string): Promise { + try { + const { access } = await import('node:fs/promises') + await access(filePath) + return true + } catch { + return false + } +} + +/** + * Reports, per agent, whether the server has credentials/config to actually + * run it. Environment variables aren't visible to the browser, so the client + * gets this through a server function (called from the route loader). Each + * agent counts as configured when an API key is present in the environment, or + * when a local CLI login exists — except Gemini CLI, whose headless ACP mode + * additionally needs an auth method selected up front (so we gate on the env + * vars the example's adapter actually reads). + */ +export const getAgentConfigFn = createServerFn({ method: 'GET' }).handler( + async (): Promise> => { + const os = await import('node:os') + const path = await import('node:path') + const home = os.homedir() + const env = process.env + + const claudeCode = + Boolean(env.ANTHROPIC_API_KEY) || + Boolean(env.CLAUDE_CODE_OAUTH_TOKEN) || + (await fileExists(path.join(home, '.claude.json'))) + + const codex = + Boolean(env.OPENAI_API_KEY) || + Boolean(env.CODEX_API_KEY) || + (await fileExists(path.join(home, '.codex', 'auth.json'))) + + // Gemini's headless ACP path needs an auth method (or an API key) chosen + // explicitly — a cached Google login alone isn't enough, so don't count it. + const geminiCli = + Boolean(env.GEMINI_API_KEY) || Boolean(env.GEMINI_ACP_AUTH_METHOD) + + // OpenCode resolves any configured provider — count a provider API key in + // the environment or an `opencode auth login` credential file. + const opencode = + Boolean(env.ANTHROPIC_API_KEY) || + Boolean(env.OPENAI_API_KEY) || + Boolean(env.GEMINI_API_KEY) || + (await fileExists( + path.join(home, '.local', 'share', 'opencode', 'auth.json'), + )) + + return { + 'claude-code': claudeCode, + codex, + 'gemini-cli': geminiCli, + opencode, + } + }, +) diff --git a/examples/ts-react-coding-agent/src/lib/agents.ts b/examples/ts-react-coding-agent/src/lib/agents.ts new file mode 100644 index 000000000..00aaf9bad --- /dev/null +++ b/examples/ts-react-coding-agent/src/lib/agents.ts @@ -0,0 +1,148 @@ +/** + * Registry of coding-agent harnesses this example can drive. + * + * Each entry maps to a harness adapter on the server (see + * `src/routes/api.chat.ts`): Claude Code (`@tanstack/ai-claude-code`), + * Codex (`@tanstack/ai-codex`), Gemini CLI (`@tanstack/ai-gemini-cli`), and + * OpenCode (`@tanstack/ai-opencode`). + */ +export const AGENTS = [ + { id: 'claude-code', label: 'Claude Code' }, + { id: 'codex', label: 'Codex' }, + { id: 'gemini-cli', label: 'Gemini CLI' }, + { id: 'opencode', label: 'OpenCode' }, +] as const + +/** Agent ids with a working adapter behind them. */ +export type AgentId = 'claude-code' | 'codex' | 'gemini-cli' | 'opencode' + +export const DEFAULT_AGENT: AgentId = 'claude-code' + +export function isAgentId(value: unknown): value is AgentId { + return ( + value === 'claude-code' || + value === 'codex' || + value === 'gemini-cli' || + value === 'opencode' + ) +} + +/** A single, optionally command-bearing step in an agent's setup guide. */ +export interface SetupStep { + text: string + /** A shell command to show in a copyable code block. */ + code?: string +} + +export interface AgentSetup { + /** Human label (mirrors the AGENTS entry). */ + label: string + /** One-line description of what drives this agent. */ + summary: string + /** Ordered setup steps shown in the "not configured" dialog. */ + steps: Array + /** Docs link for the underlying CLI/tool. */ + docsUrl: string +} + +/** + * Setup instructions surfaced in the UI when an agent isn't configured on the + * server at runtime. Mirrors the README "Running" section — keep them in sync. + */ +export const AGENT_SETUP: Record = { + 'claude-code': { + label: 'Claude Code', + summary: + 'Drives the Claude Code CLI through @tanstack/ai-claude-code. Needs the CLI installed and authenticated on the server.', + steps: [ + { + text: 'Install the Claude Code CLI:', + code: 'npm i -g @anthropic-ai/claude-code', + }, + { + text: 'Log in interactively (uses your Claude subscription):', + code: 'claude login', + }, + { + text: '…or set an API key in the server environment instead:', + code: 'export ANTHROPIC_API_KEY=sk-ant-…', + }, + { text: 'Restart the dev server so it picks up new credentials.' }, + ], + docsUrl: 'https://docs.anthropic.com/en/docs/claude-code', + }, + codex: { + label: 'Codex', + summary: + 'Drives OpenAI Codex through @tanstack/ai-codex. The codex binary ships with the SDK; you only need credentials.', + steps: [ + { text: 'Log in interactively:', code: 'codex login' }, + { + text: '…or set an API key in the server environment instead:', + code: 'export OPENAI_API_KEY=sk-…', + }, + { + text: 'Heads up: ChatGPT-account logins cannot run codex models in headless mode — an API key or an entitled account is required.', + }, + { text: 'Restart the dev server so it picks up new credentials.' }, + ], + docsUrl: 'https://developers.openai.com/codex', + }, + 'gemini-cli': { + label: 'Gemini CLI', + summary: + 'Drives the Gemini CLI over ACP through @tanstack/ai-gemini-cli. Needs a recent CLI and an ACP auth method chosen up front.', + steps: [ + { + text: 'Install a current Gemini CLI (ACP mode needs a recent build):', + code: 'npm i -g @google/gemini-cli', + }, + { text: 'Log in with Google once (interactive):', code: 'gemini' }, + { + text: 'Headless ACP runs can’t show an auth picker, so tell the adapter which method to use and start the server:', + code: 'GEMINI_ACP_AUTH_METHOD=oauth-personal GEMINI_CLI_TRUST_WORKSPACE=true pnpm dev', + }, + { + text: '…or use an API key instead (set GEMINI_ACP_AUTH_METHOD=gemini-api-key):', + code: 'export GEMINI_API_KEY=…', + }, + ], + docsUrl: 'https://github.com/google-gemini/gemini-cli', + }, + opencode: { + label: 'OpenCode', + summary: + 'Drives OpenCode through @tanstack/ai-opencode. Needs the opencode CLI installed and a provider authenticated on the server.', + steps: [ + { + text: 'Install the OpenCode CLI:', + code: 'npm i -g opencode-ai', + }, + { + text: 'Authenticate a provider once (interactive):', + code: 'opencode auth login', + }, + { + text: '…or set the provider API key in the server environment instead:', + code: 'export ANTHROPIC_API_KEY=sk-ant-…', + }, + { text: 'Restart the dev server so it picks up new credentials.' }, + ], + docsUrl: 'https://opencode.ai/docs', + }, +} + +/** + * What the agent is allowed to do in the workspace: + * - `read-only`: it can read and search, but file edits and shell commands + * are blocked. + * - `edit`: file edits are auto-approved; with Claude Code, Gemini CLI, and + * OpenCode, shell commands still get denied by each adapter's default + * permission policy (a deliberate demo of the permission system), while + * Codex sandboxes them inside the workspace instead. + */ +export type AgentMode = 'read-only' | 'edit' + +export function isAgentMode(value: unknown): value is AgentMode { + return value === 'read-only' || value === 'edit' +} diff --git a/examples/ts-react-coding-agent/src/lib/style-guide-tool.ts b/examples/ts-react-coding-agent/src/lib/style-guide-tool.ts new file mode 100644 index 000000000..679356a48 --- /dev/null +++ b/examples/ts-react-coding-agent/src/lib/style-guide-tool.ts @@ -0,0 +1,26 @@ +import { z } from 'zod' +import { toolDefinition } from '@tanstack/ai' + +/** + * A TanStack server tool bridged *into* the harness. The agent sees it as + * `mcp__tanstack__lookup_style_guide`, calls it like any built-in tool, and + * the adapter strips the prefix so the UI shows `lookup_style_guide`. + */ +export const lookupStyleGuide = toolDefinition({ + name: 'lookup_style_guide', + description: + "Look up this project's coding style guide. Call this before writing or editing any code so your changes match the house style.", + inputSchema: z.object({ + topic: z + .string() + .describe('What you are about to write, e.g. "functions", "naming"'), + }), +}).server(({ topic }) => ({ + topic, + rules: [ + 'Use arrow functions assigned to const, never function declarations.', + 'Prefer single quotes and no semicolons.', + 'Every exported function gets a one-line JSDoc comment.', + 'Keep files under 100 lines; split modules instead of growing them.', + ], +})) diff --git a/examples/ts-react-coding-agent/src/routeTree.gen.ts b/examples/ts-react-coding-agent/src/routeTree.gen.ts new file mode 100644 index 000000000..861dc17e2 --- /dev/null +++ b/examples/ts-react-coding-agent/src/routeTree.gen.ts @@ -0,0 +1,86 @@ +/* eslint-disable */ + +// @ts-nocheck + +// noinspection JSUnusedGlobalSymbols + +// This file was automatically generated by TanStack Router. +// You should NOT make any changes in this file as it will be overwritten. +// Additionally, you should also exclude this file from your linter and/or formatter to prevent it from being checked or modified. + +import { Route as rootRouteImport } from './routes/__root' +import { Route as IndexRouteImport } from './routes/index' +import { Route as ApiChatRouteImport } from './routes/api.chat' + +const IndexRoute = IndexRouteImport.update({ + id: '/', + path: '/', + getParentRoute: () => rootRouteImport, +} as any) +const ApiChatRoute = ApiChatRouteImport.update({ + id: '/api/chat', + path: '/api/chat', + getParentRoute: () => rootRouteImport, +} as any) + +export interface FileRoutesByFullPath { + '/': typeof IndexRoute + '/api/chat': typeof ApiChatRoute +} +export interface FileRoutesByTo { + '/': typeof IndexRoute + '/api/chat': typeof ApiChatRoute +} +export interface FileRoutesById { + __root__: typeof rootRouteImport + '/': typeof IndexRoute + '/api/chat': typeof ApiChatRoute +} +export interface FileRouteTypes { + fileRoutesByFullPath: FileRoutesByFullPath + fullPaths: '/' | '/api/chat' + fileRoutesByTo: FileRoutesByTo + to: '/' | '/api/chat' + id: '__root__' | '/' | '/api/chat' + fileRoutesById: FileRoutesById +} +export interface RootRouteChildren { + IndexRoute: typeof IndexRoute + ApiChatRoute: typeof ApiChatRoute +} + +declare module '@tanstack/react-router' { + interface FileRoutesByPath { + '/': { + id: '/' + path: '/' + fullPath: '/' + preLoaderRoute: typeof IndexRouteImport + parentRoute: typeof rootRouteImport + } + '/api/chat': { + id: '/api/chat' + path: '/api/chat' + fullPath: '/api/chat' + preLoaderRoute: typeof ApiChatRouteImport + parentRoute: typeof rootRouteImport + } + } +} + +const rootRouteChildren: RootRouteChildren = { + IndexRoute: IndexRoute, + ApiChatRoute: ApiChatRoute, +} +export const routeTree = rootRouteImport + ._addFileChildren(rootRouteChildren) + ._addFileTypes() + +import type { getRouter } from './router.tsx' +import type { createStart } from '@tanstack/react-start' +declare module '@tanstack/react-start' { + interface Register { + ssr: true + router: Awaited> + } +} diff --git a/examples/ts-react-coding-agent/src/router.tsx b/examples/ts-react-coding-agent/src/router.tsx new file mode 100644 index 000000000..ee1edab88 --- /dev/null +++ b/examples/ts-react-coding-agent/src/router.tsx @@ -0,0 +1,13 @@ +import { createRouter } from '@tanstack/react-router' + +// Import the generated route tree +import { routeTree } from './routeTree.gen' + +// Create a new router instance +export const getRouter = () => { + return createRouter({ + routeTree, + scrollRestoration: true, + defaultPreloadStaleTime: 0, + }) +} diff --git a/examples/ts-react-coding-agent/src/routes/__root.tsx b/examples/ts-react-coding-agent/src/routes/__root.tsx new file mode 100644 index 000000000..950ce1bcc --- /dev/null +++ b/examples/ts-react-coding-agent/src/routes/__root.tsx @@ -0,0 +1,41 @@ +import { HeadContent, Scripts, createRootRoute } from '@tanstack/react-router' +import appCss from '../styles.css?url' + +export const Route = createRootRoute({ + head: () => ({ + meta: [ + { + charSet: 'utf-8', + }, + { + name: 'viewport', + content: 'width=device-width, initial-scale=1', + }, + { + title: 'TanStack AI — Coding Agent', + }, + ], + links: [ + { + rel: 'stylesheet', + href: appCss, + }, + ], + }), + + shellComponent: RootDocument, +}) + +function RootDocument({ children }: { children: React.ReactNode }) { + return ( + + + + + + {children} + + + + ) +} diff --git a/examples/ts-react-coding-agent/src/routes/api.chat.ts b/examples/ts-react-coding-agent/src/routes/api.chat.ts new file mode 100644 index 000000000..6e4896936 --- /dev/null +++ b/examples/ts-react-coding-agent/src/routes/api.chat.ts @@ -0,0 +1,127 @@ +import path from 'node:path' +import { createFileRoute } from '@tanstack/react-router' +import { + chat, + chatParamsFromRequestBody, + toServerSentEventsResponse, +} from '@tanstack/ai' +import { claudeCodeText } from '@tanstack/ai-claude-code' +import { codexText } from '@tanstack/ai-codex' +import { geminiCliText } from '@tanstack/ai-gemini-cli' +import { opencodeText } from '@tanstack/ai-opencode' +import { isAgentId, isAgentMode } from '@/lib/agents' +import { lookupStyleGuide } from '@/lib/style-guide-tool' +import type { AgentId, AgentMode } from '@/lib/agents' +import type { AnyTextAdapter } from '@tanstack/ai' + +const SYSTEM_PROMPT = `You are a coding assistant working on the small demo +project mounted in your working directory. Before writing or editing any +code, call the lookup_style_guide tool and follow what it says. Keep your +answers short — the user is watching your tool activity stream by.` + +/** One harness adapter per agent id. */ +function createAdapter( + agentId: AgentId, + mode: AgentMode, + cwd: string, +): AnyTextAdapter { + switch (agentId) { + case 'claude-code': + return claudeCodeText('claude-opus-4-8', { + cwd, + maxTurns: 25, + ...(mode === 'edit' + ? // Auto-approve file edits. Shell commands still go through the + // adapter's default permission policy, which denies them with an + // explanatory message — watch for it in the tool timeline. + { permissionMode: 'acceptEdits' } + : // Read-only: searching and reading work, mutating tools are + // removed from the harness entirely. + { disallowedTools: ['Write', 'Edit', 'NotebookEdit', 'Bash'] }), + }) + case 'codex': + // Codex has no per-tool permission prompts in headless mode; the + // sandbox is the safety boundary. Edit mode lets it write inside the + // workspace, read-only keeps every command non-mutating. + return codexText('gpt-5.1-codex', { + cwd, + sandboxMode: mode === 'edit' ? 'workspace-write' : 'read-only', + }) + case 'gemini-cli': + return geminiCliText('gemini-3-pro-preview', { + cwd, + // Edit mode auto-approves file edits; shell commands still get + // rejected by the adapter's default permission policy, same demo + // as Claude Code above. + permissionMode: mode === 'edit' ? 'acceptEdits' : 'default', + // Headless ACP runs must select an auth method up front (the CLI + // can't pop an interactive picker). Set GEMINI_ACP_AUTH_METHOD to + // the method your CLI is set up for, e.g. `oauth-personal` (Log in + // with Google) or `gemini-api-key`. See this example's README. + ...(process.env.GEMINI_ACP_AUTH_METHOD && { + authMethodId: process.env.GEMINI_ACP_AUTH_METHOD, + }), + }) + case 'opencode': + return opencodeText('anthropic/claude-sonnet-4-5', { + directory: cwd, + // Edit mode auto-approves file edits; shell commands still get + // rejected by the adapter's default permission policy, same demo + // as Claude Code and Gemini CLI above. + permissionMode: mode === 'edit' ? 'acceptEdits' : 'default', + }) + } +} + +export const Route = createFileRoute('/api/chat')({ + server: { + handlers: { + POST: async ({ request }) => { + if (request.signal.aborted) { + return new Response(null, { status: 499 }) + } + const abortController = new AbortController() + + let params + try { + params = await chatParamsFromRequestBody(await request.json()) + } catch (error) { + return new Response( + error instanceof Error ? error.message : 'Bad request', + { status: 400 }, + ) + } + + // Client-sent settings arrive via forwardedProps. Validate against + // the allowlist — never feed client strings straight into config. + const agentId = isAgentId(params.forwardedProps.agentId) + ? params.forwardedProps.agentId + : 'claude-code' + const mode = isAgentMode(params.forwardedProps.mode) + ? params.forwardedProps.mode + : 'read-only' + const sessionId = + typeof params.forwardedProps.sessionId === 'string' && + params.forwardedProps.sessionId !== '' + ? params.forwardedProps.sessionId + : undefined + + // The agent only ever works inside the example's scratch workspace. + const cwd = path.join(process.cwd(), 'workspace') + + const stream = chat({ + adapter: createAdapter(agentId, mode, cwd), + messages: params.messages, + systemPrompts: [SYSTEM_PROMPT], + tools: [lookupStyleGuide], + modelOptions: { sessionId }, + threadId: params.threadId, + runId: params.runId, + abortController, + }) + + return toServerSentEventsResponse(stream, { abortController }) + }, + }, + }, +}) diff --git a/examples/ts-react-coding-agent/src/routes/index.tsx b/examples/ts-react-coding-agent/src/routes/index.tsx new file mode 100644 index 000000000..27c487849 --- /dev/null +++ b/examples/ts-react-coding-agent/src/routes/index.tsx @@ -0,0 +1,324 @@ +import { useMemo, useState } from 'react' +import { createFileRoute } from '@tanstack/react-router' +import { fetchServerSentEvents, useChat } from '@tanstack/ai-react' +import { AGENTS, AGENT_SETUP, DEFAULT_AGENT, isAgentId } from '@/lib/agents' +import { getAgentConfigFn } from '@/lib/agent-status' +import type { UIMessage } from '@tanstack/ai-react' +import type { AgentId, AgentMode } from '@/lib/agents' + +export const Route = createFileRoute('/')({ + component: CodingAgentPage, + // Env vars aren't available client-side, so the loader asks the server which + // agents are actually configured (see src/lib/agent-status.ts). + loader: () => getAgentConfigFn(), +}) + +function ToolCallCard({ + part, +}: { + part: Extract +}) { + const args = useMemo(() => { + try { + return JSON.stringify(JSON.parse(part.arguments), null, 2) + } catch { + return part.arguments + } + }, [part.arguments]) + + const output = useMemo(() => { + if (part.output === undefined) return undefined + return typeof part.output === 'string' + ? part.output + : JSON.stringify(part.output, null, 2) + }, [part.output]) + + return ( +
+ + 🔧 {part.name} + + {output !== undefined ? 'done' : part.state} + + +
+
+          {args}
+        
+ {output !== undefined && ( +
+            {output}
+          
+ )} +
+
+ ) +} + +function Message({ message }: { message: UIMessage }) { + const isUser = message.role === 'user' + return ( +
+
+ {message.parts.map((part, index) => { + if (part.type === 'text' && part.content.trim()) { + return ( +

+ {part.content} +

+ ) + } + if (part.type === 'thinking' && part.content.trim()) { + return ( +
+ + 💭 thinking… + +

+ {part.content} +

+
+ ) + } + if (part.type === 'tool-call') { + return + } + return null + })} +
+
+ ) +} + +function SetupDialog({ + agentId, + onClose, +}: { + agentId: AgentId + onClose: () => void +}) { + const setup = AGENT_SETUP[agentId] + return ( +
+
event.stopPropagation()} + role="dialog" + aria-modal="true" + aria-label={`${setup.label} setup`} + > +
+

Set up {setup.label}

+ +
+

{setup.summary}

+
    + {setup.steps.map((step, index) => ( +
  1. +
    + + {index + 1}. + +
    +

    {step.text}

    + {step.code && ( +
    +                      {step.code}
    +                    
    + )} +
    +
    +
  2. + ))} +
+
+ + Documentation ↗ + + +
+
+
+ ) +} + +function CodingAgentPage() { + const configured = Route.useLoaderData() + const [agentId, setAgentId] = useState(DEFAULT_AGENT) + const [mode, setMode] = useState('read-only') + const [sessionId, setSessionId] = useState(undefined) + const [input, setInput] = useState('') + const [setupOpen, setSetupOpen] = useState(false) + + const isConfigured = configured[agentId] + + const body = useMemo( + () => ({ agentId, mode, sessionId }), + [agentId, mode, sessionId], + ) + + const { messages, sendMessage, isLoading, clear, error } = useChat({ + connection: fetchServerSentEvents('/api/chat'), + body, + onCustomEvent: (eventType, data) => { + // Every harness adapter pins its session with a `.session-id` + // CUSTOM event (claude-code.session-id, codex.session-id, ...). + if ( + eventType.endsWith('.session-id') && + typeof data === 'object' && + data !== null && + 'sessionId' in data && + typeof data.sessionId === 'string' + ) { + setSessionId(data.sessionId) + } + }, + }) + + const newSession = () => { + setSessionId(undefined) + clear() + } + + const send = () => { + const text = input.trim() + if (!text || isLoading) return + // Don't fire a request the server can't fulfil — explain the setup instead. + if (!isConfigured) { + setSetupOpen(true) + return + } + setInput('') + void sendMessage(text) + } + + const selectAgent = (value: string) => { + if (!isAgentId(value)) return + // Sessions aren't portable across harnesses — switching agents starts fresh. + setAgentId(value) + setSessionId(undefined) + // Selecting is always allowed; if it isn't set up, show how to fix it. + if (!configured[value]) setSetupOpen(true) + } + + return ( +
+
+

Coding Agent

+ + + +
+ + {!isConfigured && ( +
+ + ⚠️ {AGENT_SETUP[agentId].label} isn’t configured on the server. + + +
+ )} + +
+ {sessionId + ? `Resuming session ${sessionId.slice(0, 8)}… — follow-ups send only your latest message.` + : `No session yet — the first reply starts one and pins it via the ${agentId}.session-id event.`} +
+ +
+ {messages.length === 0 && ( +

+ Try: “What files are in this project, and what do they do?” — then + switch to Edit mode and ask it to fix the bug in{' '} + workspace/temperature.js. +

+ )} + {messages.map((message) => ( + + ))} + {error && ( +

+ {String(error)} +

+ )} +
+ +
+ setInput(event.target.value)} + onKeyDown={(event) => { + if (event.key === 'Enter') send() + }} + placeholder="Ask the agent to explore or change the workspace…" + className="flex-1 rounded border border-gray-700 bg-gray-900 px-3 py-2 outline-none focus:border-gray-500" + /> + +
+ + {setupOpen && ( + setSetupOpen(false)} /> + )} +
+ ) +} diff --git a/examples/ts-react-coding-agent/src/styles.css b/examples/ts-react-coding-agent/src/styles.css new file mode 100644 index 000000000..d4b507858 --- /dev/null +++ b/examples/ts-react-coding-agent/src/styles.css @@ -0,0 +1 @@ +@import 'tailwindcss'; diff --git a/examples/ts-react-coding-agent/tsconfig.json b/examples/ts-react-coding-agent/tsconfig.json new file mode 100644 index 000000000..477479fb7 --- /dev/null +++ b/examples/ts-react-coding-agent/tsconfig.json @@ -0,0 +1,28 @@ +{ + "include": ["**/*.ts", "**/*.tsx"], + "compilerOptions": { + "target": "ES2022", + "jsx": "react-jsx", + "module": "ESNext", + "lib": ["ES2022", "DOM", "DOM.Iterable"], + "types": ["vite/client"], + + /* Bundler mode */ + "moduleResolution": "bundler", + "allowImportingTsExtensions": true, + "verbatimModuleSyntax": false, + "noEmit": true, + + /* Linting */ + "skipLibCheck": true, + "strict": true, + "noUnusedLocals": true, + "noUnusedParameters": true, + "noFallthroughCasesInSwitch": true, + "noUncheckedSideEffectImports": true, + "baseUrl": ".", + "paths": { + "@/*": ["./src/*"] + } + } +} diff --git a/examples/ts-react-coding-agent/vite.config.ts b/examples/ts-react-coding-agent/vite.config.ts new file mode 100644 index 000000000..563d73a12 --- /dev/null +++ b/examples/ts-react-coding-agent/vite.config.ts @@ -0,0 +1,30 @@ +import { defineConfig } from 'vite' +import { tanstackStart } from '@tanstack/react-start/plugin/vite' +import viteReact from '@vitejs/plugin-react' +import viteTsConfigPaths from 'vite-tsconfig-paths' +import tailwindcss from '@tailwindcss/vite' +import { nitroV2Plugin } from '@tanstack/nitro-v2-vite-plugin' + +const config = defineConfig({ + // The Claude Agent SDK is server-only and ships its own bundled Claude + // Code runtime — keep it external so the SSR build resolves it at runtime + // via require() instead of inlining it into the rollup chunk. + ssr: { + external: ['@anthropic-ai/claude-agent-sdk'], + }, + plugins: [ + nitroV2Plugin({ + externals: { + external: ['@anthropic-ai/claude-agent-sdk'], + }, + }), + viteTsConfigPaths({ + projects: ['./tsconfig.json'], + }), + tailwindcss(), + tanstackStart(), + viteReact(), + ], +}) + +export default config diff --git a/examples/ts-react-coding-agent/workspace/README.md b/examples/ts-react-coding-agent/workspace/README.md new file mode 100644 index 000000000..47dc36b1a --- /dev/null +++ b/examples/ts-react-coding-agent/workspace/README.md @@ -0,0 +1,13 @@ +# Demo Workspace + +This directory is the coding agent's working directory (`cwd`). Everything +the agent reads, searches, and edits happens in here — nothing outside this +folder is touched. + +Files: + +- `temperature.js` — a tiny conversion module with a deliberate bug for the + agent to find and fix (in Edit mode). +- `todo.md` — a short task list the agent can read or update. + +Feel free to reset this directory with `git checkout -- .` after demos. diff --git a/examples/ts-react-coding-agent/workspace/temperature.js b/examples/ts-react-coding-agent/workspace/temperature.js new file mode 100644 index 000000000..4aaeb517b --- /dev/null +++ b/examples/ts-react-coding-agent/workspace/temperature.js @@ -0,0 +1,12 @@ +/** Convert Celsius to Fahrenheit. */ +const celsiusToFahrenheit = (celsius) => { + return celsius * (9 / 5) + 32 +} + +/** Convert Fahrenheit to Celsius. */ +const fahrenheitToCelsius = (fahrenheit) => { + // BUG: should subtract 32 before scaling, not after. + return fahrenheit * (5 / 9) - 32 +} + +export { celsiusToFahrenheit, fahrenheitToCelsius } diff --git a/examples/ts-react-coding-agent/workspace/todo.md b/examples/ts-react-coding-agent/workspace/todo.md new file mode 100644 index 000000000..945973c61 --- /dev/null +++ b/examples/ts-react-coding-agent/workspace/todo.md @@ -0,0 +1,5 @@ +# Tasks + +- [ ] Fix the Fahrenheit → Celsius conversion bug +- [ ] Add a Kelvin conversion helper +- [ ] Write a usage example in the README diff --git a/knip.json b/knip.json index a5e8a03e1..39655ddab 100644 --- a/knip.json +++ b/knip.json @@ -44,6 +44,9 @@ }, "packages/ai-vue-ui": { "ignore": ["src/use-chat-context.ts"] + }, + "packages/ai-persistence-postgres": { + "ignoreDependencies": ["pg"] } } } diff --git a/packages/ai-claude-code/README.md b/packages/ai-claude-code/README.md new file mode 100644 index 000000000..8532fcaae --- /dev/null +++ b/packages/ai-claude-code/README.md @@ -0,0 +1,18 @@ +# @tanstack/ai-claude-code + +Claude Code harness adapter for [TanStack AI](https://tanstack.com/ai) — run [Claude Code](https://docs.anthropic.com/en/docs/claude-code) (via `@anthropic-ai/claude-agent-sdk`) as a chat backend with local tool execution, stateful coding sessions, and TanStack tool bridging. + +```typescript +import { chat } from '@tanstack/ai' +import { claudeCodeText } from '@tanstack/ai-claude-code' + +const stream = chat({ + adapter: claudeCodeText('claude-opus-4-8', { + cwd: '/path/to/project', + permissionMode: 'acceptEdits', + }), + messages: [{ role: 'user', content: 'Fix the failing test.' }], +}) +``` + +Server-only (Node). See the [Claude Code adapter docs](https://tanstack.com/ai/latest/docs/adapters/claude-code) for sessions, tool bridging, permissions, and limitations. diff --git a/packages/ai-claude-code/package.json b/packages/ai-claude-code/package.json new file mode 100644 index 000000000..3fb3ed30b --- /dev/null +++ b/packages/ai-claude-code/package.json @@ -0,0 +1,59 @@ +{ + "name": "@tanstack/ai-claude-code", + "version": "0.1.0", + "description": "Claude Code harness adapter for TanStack AI — run Claude Code as a chat backend with local tool execution and stateful sessions.", + "author": "", + "license": "MIT", + "repository": { + "type": "git", + "url": "git+https://github.com/TanStack/ai.git", + "directory": "packages/ai-claude-code" + }, + "keywords": [ + "ai", + "ai-sdk", + "typescript", + "tanstack", + "anthropic", + "claude", + "claude-code", + "harness", + "agent", + "adapter", + "chat", + "tool-calling" + ], + "type": "module", + "module": "./dist/esm/index.js", + "types": "./dist/esm/index.d.ts", + "exports": { + ".": { + "types": "./dist/esm/index.d.ts", + "import": "./dist/esm/index.js" + } + }, + "files": [ + "dist", + "src" + ], + "scripts": { + "build": "vite build", + "clean": "premove ./build ./dist", + "lint:fix": "eslint ./src --fix", + "test:build": "publint --strict", + "test:eslint": "eslint ./src", + "test:lib": "vitest", + "test:lib:dev": "pnpm test:lib --watch", + "test:types": "tsc" + }, + "peerDependencies": { + "@tanstack/ai": "workspace:^", + "@tanstack/ai-sandbox": "workspace:^" + }, + "devDependencies": { + "@tanstack/ai": "workspace:*", + "@tanstack/ai-sandbox": "workspace:*", + "@tanstack/ai-sandbox-local-process": "workspace:*", + "@vitest/coverage-v8": "4.0.14" + } +} diff --git a/packages/ai-claude-code/src/adapters/policy-map.ts b/packages/ai-claude-code/src/adapters/policy-map.ts new file mode 100644 index 000000000..02a04f664 --- /dev/null +++ b/packages/ai-claude-code/src/adapters/policy-map.ts @@ -0,0 +1,83 @@ +/** + * Map a portable {@link SandboxPolicy} onto Claude Code CLI permission flags. + * + * This is a best-effort, coarse mapping (the CLI's permission model is + * tool-level + a permission mode, not arbitrary command globs): + * + * - `default` decision → `--permission-mode`: + * `'allow'` → `bypassPermissions`, `'acceptEdits'`-ish `'ask'` → `acceptEdits`, + * `'deny'` → `default` (in `-p` mode, prompts are auto-denied). + * - `capabilities.fileWrite === 'deny'` → disallow `Write`,`Edit`,`MultiEdit`. + * - `capabilities.network === 'deny'` → disallow `WebFetch`,`WebSearch`. + * - `commands.deny` that name a bare built-in tool (e.g. `Bash`) are added to + * `--disallowedTools`; fine-grained command-glob enforcement is left to the + * MCP permission-prompt tool (interactive approvals). + * + * Returns the permission mode plus tool allow/deny additions; the adapter + * merges these with its own config. + */ +import type { PolicyDecision, SandboxPolicy } from '@tanstack/ai-sandbox' +import type { ClaudeCodePermissionMode } from './text' + +export interface ClaudePolicyFlags { + permissionMode?: ClaudeCodePermissionMode + allowedTools: Array + disallowedTools: Array +} + +const WRITE_TOOLS = ['Write', 'Edit', 'MultiEdit'] +const NETWORK_TOOLS = ['WebFetch', 'WebSearch'] +const BUILTIN_TOOL_NAMES = new Set([ + 'Bash', + 'Read', + 'Write', + 'Edit', + 'MultiEdit', + 'Glob', + 'Grep', + 'WebFetch', + 'WebSearch', + 'NotebookEdit', + 'Task', +]) + +function modeFor(decision: PolicyDecision): ClaudeCodePermissionMode { + switch (decision) { + case 'allow': + return 'bypassPermissions' + case 'ask': + return 'acceptEdits' + case 'deny': + return 'default' + } +} + +export function mapPolicyToClaudeFlags( + policy: SandboxPolicy | undefined, +): ClaudePolicyFlags { + const allowedTools: Array = [] + const disallowedTools: Array = [] + if (!policy) return { allowedTools, disallowedTools } + + if (policy.capabilities?.fileWrite === 'deny') + disallowedTools.push(...WRITE_TOOLS) + if (policy.capabilities?.network === 'deny') + disallowedTools.push(...NETWORK_TOOLS) + + // Tool-name-level command rules map directly; everything else is left to the + // permission-prompt tool. + for (const pattern of policy.commands?.deny ?? []) { + if (BUILTIN_TOOL_NAMES.has(pattern)) disallowedTools.push(pattern) + } + for (const pattern of policy.commands?.allow ?? []) { + if (BUILTIN_TOOL_NAMES.has(pattern)) allowedTools.push(pattern) + } + + const result: ClaudePolicyFlags = { + allowedTools: [...new Set(allowedTools)], + disallowedTools: [...new Set(disallowedTools)], + } + if (policy.default !== undefined) + result.permissionMode = modeFor(policy.default) + return result +} diff --git a/packages/ai-claude-code/src/adapters/text.ts b/packages/ai-claude-code/src/adapters/text.ts new file mode 100644 index 000000000..5abb36416 --- /dev/null +++ b/packages/ai-claude-code/src/adapters/text.ts @@ -0,0 +1,466 @@ +import { EventType, normalizeSystemPrompts } from '@tanstack/ai' +import { toRunErrorRawEvent } from '@tanstack/ai/adapter-internals' +import { BaseTextAdapter } from '@tanstack/ai/adapters' +import { + SandboxCapability, + approvalId, + buildApprovalRequestedEvent, + getSandbox, + getSandboxPolicy, + hostForSandbox, + resolveApproval, + spawnNdjson, + startHostToolBridge, +} from '@tanstack/ai-sandbox' +import { buildPrompt } from '../messages/prompt' +import { translateSdkStream } from '../stream/translate' +import { mapPolicyToClaudeFlags } from './policy-map' +import type { ClaudePolicyFlags } from './policy-map' +import type { + HostToolBridge, + PermissionToolResult, + SandboxHandle, + SandboxPolicy, +} from '@tanstack/ai-sandbox' +import type { + StructuredOutputOptions, + StructuredOutputResult, +} from '@tanstack/ai/adapters' +import type { + DefaultMessageMetadataByModality, + Modality, + StreamChunk, + TextOptions, +} from '@tanstack/ai' +import type { ClaudeCodeModel } from '../model-meta' +import type { ClaudeCodeTextProviderOptions } from '../provider-options' +import type { AgentSdkMessage } from '../stream/sdk-types' + +export type ClaudeCodePermissionMode = + | 'default' + | 'acceptEdits' + | 'bypassPermissions' + | 'plan' + +const DEFAULT_WORKDIR = '/workspace' + +export interface ClaudeCodeTextConfig { + /** + * Working directory inside the sandbox where `claude` runs. Defaults to + * `/workspace` (the conventional sandbox workspace root). + */ + cwd?: string + /** + * Claude Code permission mode passed via `--permission-mode`. Defaults to + * `'bypassPermissions'` — a sandbox is isolated, so the agent is allowed to + * edit files and run commands without prompting. Tighten via `defineSandboxPolicy` + * / this option for less autonomy. + */ + permissionMode?: ClaudeCodePermissionMode + /** Built-in tools the harness may use (`--allowedTools`). */ + allowedTools?: Array + /** Built-in tools removed from the harness (`--disallowedTools`). */ + disallowedTools?: Array + /** Extra directories the agent may access (`--add-dir`). */ + addDirs?: Array + /** Maximum harness-internal turns (`--max-turns`). */ + maxTurns?: number + /** + * How `systemPrompts` from `chat()` are applied: + * - `'append'` (default): `--append-system-prompt` on top of the preset. + * - `'replace'`: `--system-prompt` as the entire system prompt. + */ + systemPromptMode?: 'append' | 'replace' + /** Path/name of the claude executable inside the sandbox. Defaults to `claude`. */ + claudeExecutable?: string + /** Emit token-level deltas via `--include-partial-messages` (default true). */ + streamPartials?: boolean + /** Extra environment variables for the claude process inside the sandbox. */ + env?: Record + /** Emit a `file.changed` CUSTOM event with the git diff after the run (default true). */ + emitDiff?: boolean +} + +/** POSIX single-quote escape for embedding values in the `claude …` command. */ +function q(value: string): string { + return `'${value.replace(/'/g, `'\\''`)}'` +} + +/** Format a host tool-bridge as claude's `--mcp-config` JSON. */ +function bridgeToMcpConfig(bridge: HostToolBridge): string { + return JSON.stringify({ + mcpServers: { + [bridge.name]: { + type: 'http', + url: bridge.url, + headers: { Authorization: `Bearer ${bridge.token}` }, + }, + }, + }) +} + +export class ClaudeCodeTextAdapter< + TModel extends ClaudeCodeModel, +> extends BaseTextAdapter< + TModel, + ClaudeCodeTextProviderOptions, + ReadonlyArray & readonly ['text'], + DefaultMessageMetadataByModality, + ReadonlyArray, + unknown, + never +> { + readonly name = 'claude-code' as const + + // Harness adapter: requires a sandbox to run the agent CLI inside. + override readonly requires = [SandboxCapability] as const + + // The agent runs inside the (persistent) sandbox, so on resume the engine can + // re-attach to the still-running process and continue live after replaying the + // persisted event tail (rather than ending at replay). Live re-attach behavior + // is verified with the real CLI; the engine seam is unit-tested. + readonly supportsReattach = true + + private readonly adapterConfig: ClaudeCodeTextConfig + + constructor(config: ClaudeCodeTextConfig, model: TModel) { + super({}, model) + this.adapterConfig = config + } + + private sandboxFrom( + options: TextOptions, + ): SandboxHandle { + const ctx = options.capabilities + if (!ctx) { + throw new Error( + 'Adapter "claude-code" requires a sandbox. Add withSandbox(defineSandbox({ ... })) ' + + 'to chat() middleware (e.g. with the local-process or docker provider).', + ) + } + return getSandbox(ctx) + } + + private workdir(options: TextOptions): string { + return ( + options.modelOptions?.cwd ?? this.adapterConfig.cwd ?? DEFAULT_WORKDIR + ) + } + + /** Build the `claude` command line (prompt goes via stdin, not argv). */ + private buildCommand( + options: TextOptions, + resume: string | undefined, + policyFlags: ClaudePolicyFlags, + mcpConfigJson: string | undefined, + permissionPromptTool: string | undefined, + ): string { + const config = this.adapterConfig + const modelOptions = options.modelOptions + const exe = config.claudeExecutable ?? 'claude' + + const args: Array = [ + '-p', + '--output-format', + 'stream-json', + '--verbose', + '--model', + q(this.model), + ] + + if (config.streamPartials !== false) args.push('--include-partial-messages') + if (resume !== undefined) args.push('--resume', q(resume)) + + // Precedence: per-call modelOptions > adapter config > policy > sandbox default. + const permissionMode = + modelOptions?.permissionMode ?? + config.permissionMode ?? + policyFlags.permissionMode ?? + 'bypassPermissions' + args.push('--permission-mode', q(permissionMode)) + + const maxTurns = modelOptions?.maxTurns ?? config.maxTurns + if (maxTurns !== undefined) args.push('--max-turns', String(maxTurns)) + + for (const dir of config.addDirs ?? []) args.push('--add-dir', q(dir)) + + const allowedTools = [ + ...(modelOptions?.allowedTools ?? config.allowedTools ?? []), + ...policyFlags.allowedTools, + ] + if (allowedTools.length > 0) { + args.push('--allowedTools', q([...new Set(allowedTools)].join(','))) + } + const disallowedTools = [ + ...(modelOptions?.disallowedTools ?? config.disallowedTools ?? []), + ...policyFlags.disallowedTools, + ] + if (disallowedTools.length > 0) { + args.push('--disallowedTools', q([...new Set(disallowedTools)].join(','))) + } + + const systemPrompts = normalizeSystemPrompts(options.systemPrompts) + .map((prompt) => prompt.content) + .filter((content) => content.trim() !== '') + if (systemPrompts.length > 0) { + const joined = systemPrompts.join('\n\n') + const flag = + config.systemPromptMode === 'replace' + ? '--system-prompt' + : '--append-system-prompt' + args.push(flag, q(joined)) + } + + if (mcpConfigJson !== undefined) args.push('--mcp-config', q(mcpConfigJson)) + if (permissionPromptTool !== undefined) { + args.push('--permission-prompt-tool', q(permissionPromptTool)) + } + + return `${exe} ${args.join(' ')}` + } + + /** + * Build the permission-prompt resolver the host MCP bridge exposes to claude + * (`--permission-prompt-tool`). Maps claude's permission request onto the + * sandbox policy + client approvals; on an `ask` action with no decision yet, + * records an approval-requested event and denies (the client re-runs to grant). + */ + private buildPermissionResolver( + policy: SandboxPolicy | undefined, + approvals: ReadonlyMap | undefined, + sink: Array, + threadId: string, + runId: string, + ): (input: { tool_name?: string; input?: unknown }) => PermissionToolResult { + const writeTools = new Set(['Write', 'Edit', 'MultiEdit', 'NotebookEdit']) + const networkTools = new Set(['WebFetch', 'WebSearch']) + return (request) => { + const toolName = request.tool_name ?? 'tool' + const cmdInput = request.input + const command = + toolName === 'Bash' && + cmdInput !== null && + typeof cmdInput === 'object' && + 'command' in cmdInput && + typeof (cmdInput as { command?: unknown }).command === 'string' + ? (cmdInput as { command: string }).command + : undefined + const capability = writeTools.has(toolName) + ? 'fileWrite' + : networkTools.has(toolName) + ? 'network' + : undefined + const id = approvalId({ + provider: 'claude-code', + kind: command !== undefined ? 'command' : (capability ?? 'tool'), + target: command ?? toolName, + }) + const outcome = resolveApproval({ + policy, + approvals, + id, + ...(command !== undefined ? { command } : {}), + ...(capability !== undefined ? { capability } : {}), + }) + if (outcome.needsApproval) { + sink.push( + buildApprovalRequestedEvent({ + approvalId: id, + title: `Approve ${toolName}${command !== undefined ? `: ${command}` : ''}`, + threadId, + runId, + detail: { provider: 'claude-code', toolName }, + }), + ) + return { + behavior: 'deny', + message: + 'Awaiting client approval. Approve in the UI and re-run to continue.', + } + } + return outcome.decision === 'allow' + ? { behavior: 'allow' } + : { behavior: 'deny', message: 'Denied by sandbox policy.' } + } + } + + async *chatStream( + options: TextOptions, + ): AsyncIterable { + const { logger } = options + let bridge: HostToolBridge | undefined + const approvalRequests: Array = [] + try { + const sandbox = this.sandboxFrom(options) + const cwd = this.workdir(options) + const runId = options.runId ?? this.generateId() + const threadId = options.threadId ?? this.generateId() + + const policy = options.capabilities + ? getSandboxPolicy(options.capabilities, { optional: true }) + : undefined + + // A permission-prompt tool gates the agent's native tools when a policy + // can `ask`/`deny` (interactive approvals). + const permission = + policy !== undefined + ? { + toolName: 'approval_prompt', + resolve: this.buildPermissionResolver( + policy, + options.approvals, + approvalRequests, + threadId, + runId, + ), + } + : undefined + + // Bridge chat()-provided server tools (and/or the permission tool) into + // the sandbox over MCP. + const hasTools = options.tools !== undefined && options.tools.length > 0 + if (hasTools || permission !== undefined) { + bridge = await startHostToolBridge(options.tools ?? [], { + hostForSandbox: hostForSandbox(sandbox.provider), + context: options.context, + ...(permission !== undefined ? { permission } : {}), + ...(options.abortController?.signal + ? { signal: options.abortController.signal } + : {}), + }) + } + + const { prompt, resume } = buildPrompt( + options.messages, + options.modelOptions?.sessionId, + ) + const command = this.buildCommand( + options, + resume, + mapPolicyToClaudeFlags(policy), + bridge ? bridgeToMcpConfig(bridge) : undefined, + bridge && permission + ? `mcp__${bridge.name}__${permission.toolName}` + : undefined, + ) + + logger.request( + `activity=chat provider=claude-code model=${this.model} sandbox=${sandbox.provider} messages=${options.messages.length} resume=${resume ?? 'none'}`, + { provider: 'claude-code', model: this.model }, + ) + + const rawEvents = spawnNdjson(sandbox, command, { + cwd, + input: prompt, + ...(options.modelOptions === undefined && + this.adapterConfig.env === undefined + ? {} + : { env: this.adapterConfig.env }), + ...(options.abortController?.signal + ? { signal: options.abortController.signal } + : options.request?.signal + ? { signal: options.request.signal } + : {}), + onNonJsonLine: (line) => + logger.provider(`provider=claude-code non-json line: ${line}`, { + chunk: line, + }), + }) + + async function* asMessages(): AsyncIterable { + for await (const event of rawEvents) yield event as AgentSdkMessage + } + + yield* translateSdkStream(asMessages(), { + model: this.model, + runId, + threadId, + ...(options.parentRunId !== undefined && { + parentRunId: options.parentRunId, + }), + genId: () => this.generateId(), + onSdkMessage: (message) => + logger.provider(`provider=claude-code type=${message.type}`, { + chunk: message, + }), + }) + + // Surface the working-tree diff so UIs can render what the agent changed. + if (this.adapterConfig.emitDiff !== false) { + try { + const diff = await sandbox.process.exec(`git -C ${q(cwd)} diff`, { + cwd, + }) + if (diff.exitCode === 0 && diff.stdout.trim() !== '') { + yield { + type: EventType.CUSTOM, + name: 'file.changed', + value: { path: '.', diff: diff.stdout }, + timestamp: Date.now(), + threadId, + runId, + } + } + } catch { + // not a git repo / git unavailable — skip the diff event + } + } + + // Surface any pending approval requests (policy `ask` actions awaiting a + // client decision); the client approves and re-runs to continue. + for (const event of approvalRequests) yield event + } catch (error: unknown) { + const err = error as Error & { code?: string } + const rawEvent = toRunErrorRawEvent(error) + logger.errors('claude-code.chatStream fatal', { + error, + source: 'claude-code.chatStream', + }) + yield { + type: EventType.RUN_ERROR, + model: options.model, + timestamp: Date.now(), + message: err.message || 'Unknown error occurred', + ...(err.code !== undefined && { code: err.code }), + ...(rawEvent !== undefined && { rawEvent }), + error: { + message: err.message || 'Unknown error occurred', + ...(err.code !== undefined && { code: err.code }), + }, + } + } finally { + if (bridge) await bridge.close() + } + } + + structuredOutput( + _options: StructuredOutputOptions, + ): Promise> { + return Promise.reject( + new Error( + 'Structured output is not yet supported by the in-sandbox Claude Code adapter. ' + + 'Use a model adapter (e.g. anthropic) for structured output, or omit outputSchema.', + ), + ) + } +} + +/** + * Creates a Claude Code harness adapter that runs **inside a sandbox**. + * + * Unlike HTTP provider adapters, this is a *harness* adapter: it spawns the + * `claude` CLI inside the sandbox provided by `withSandbox(...)` (the adapter + * declares `requires: [SandboxCapability]`), streams its `stream-json` stdout + * back as AG-UI events, and lets Claude Code run its own agent loop and native + * tools (Bash, file edits, search, …) against the sandbox workspace. The + * sandbox image must provide the `claude` executable and `ANTHROPIC_API_KEY` + * in its environment (e.g. via `workspace.secrets`). The session id is + * surfaced via a CUSTOM `claude-code.session-id` event so follow-up calls can + * resume through `modelOptions.sessionId`. + */ +export function claudeCodeText( + model: TModel, + config: ClaudeCodeTextConfig = {}, +): ClaudeCodeTextAdapter { + return new ClaudeCodeTextAdapter(config, model) +} diff --git a/packages/ai-claude-code/src/index.ts b/packages/ai-claude-code/src/index.ts new file mode 100644 index 000000000..93f2352d9 --- /dev/null +++ b/packages/ai-claude-code/src/index.ts @@ -0,0 +1,21 @@ +export { ClaudeCodeTextAdapter, claudeCodeText } from './adapters/text' +export type { + ClaudeCodeTextConfig, + ClaudeCodePermissionMode, +} from './adapters/text' +export type { ClaudeCodeTextProviderOptions } from './provider-options' +export { CLAUDE_CODE_MODELS } from './model-meta' +export type { ClaudeCodeModel, KnownClaudeCodeModel } from './model-meta' +export { + SESSION_ID_EVENT, + BRIDGED_MCP_SERVER_NAME, + translateSdkStream, + stripMcpPrefix, +} from './stream/translate' +export type { + ClaudeCodeProviderUsageDetails, + TranslateContext, +} from './stream/translate' +export type { AgentSdkMessage } from './stream/sdk-types' +export { buildPrompt } from './messages/prompt' +export type { BuiltPrompt } from './messages/prompt' diff --git a/packages/ai-claude-code/src/messages/prompt.ts b/packages/ai-claude-code/src/messages/prompt.ts new file mode 100644 index 000000000..ca88b7f14 --- /dev/null +++ b/packages/ai-claude-code/src/messages/prompt.ts @@ -0,0 +1,68 @@ +import type { ModelMessage } from '@tanstack/ai' + +export interface BuiltPrompt { + prompt: string + /** Claude Code session id to resume, when the caller threaded one through. */ + resume?: string +} + +function extractText(content: ModelMessage['content']): string { + if (content === null) return '' + if (typeof content === 'string') return content + return content + .map((part) => + part.type === 'text' && typeof part.content === 'string' + ? part.content + : '', + ) + .join('') +} + +/** + * Convert TanStack chat history into the Agent SDK's `{ prompt, resume }` + * inputs. + * + * With a `sessionId`, the harness already holds the conversation context, so + * only the trailing user message is sent and the session is resumed. Without + * one, prior turns are flattened into a plain-text transcript preamble (tool + * messages and tool-call-only assistant turns are harness-internal noise and + * are skipped; prompts are text-only in v1). + */ +export function buildPrompt( + messages: Array, + sessionId: string | undefined, +): BuiltPrompt { + const lastMessage = messages.at(-1) + const lastUserText = + lastMessage?.role === 'user' ? extractText(lastMessage.content).trim() : '' + + if (!lastUserText) { + throw new Error( + 'Claude Code adapter requires a trailing user message with text content.', + ) + } + + if (sessionId !== undefined) { + return { prompt: lastUserText, resume: sessionId } + } + + const priorTurns = messages + .slice(0, -1) + .filter( + (message) => + (message.role === 'user' || message.role === 'assistant') && + extractText(message.content).trim() !== '', + ) + .map( + (message) => + `${message.role === 'user' ? 'User' : 'Assistant'}: ${extractText(message.content).trim()}`, + ) + + if (priorTurns.length === 0) { + return { prompt: lastUserText } + } + + return { + prompt: `Previous conversation:\n${priorTurns.join('\n')}\n\n${lastUserText}`, + } +} diff --git a/packages/ai-claude-code/src/model-meta.ts b/packages/ai-claude-code/src/model-meta.ts new file mode 100644 index 000000000..22edef39f --- /dev/null +++ b/packages/ai-claude-code/src/model-meta.ts @@ -0,0 +1,21 @@ +/** + * Models known to work with Claude Code. The harness accepts any Anthropic + * model id (and the `opus` / `sonnet` / `haiku` aliases resolved by the CLI), + * so this list exists for autocomplete — any string is accepted via the + * `(string & {})` escape hatch in {@link ClaudeCodeModel}. + */ +export const CLAUDE_CODE_MODELS = [ + 'claude-opus-4-8', + 'claude-opus-4-7', + 'claude-opus-4-6', + 'claude-sonnet-4-6', + 'claude-haiku-4-5', + 'opus', + 'sonnet', + 'haiku', +] as const + +export type KnownClaudeCodeModel = (typeof CLAUDE_CODE_MODELS)[number] + +/** Any Claude model id accepted by Claude Code; known ids get autocomplete. */ +export type ClaudeCodeModel = KnownClaudeCodeModel | (string & {}) diff --git a/packages/ai-claude-code/src/provider-options.ts b/packages/ai-claude-code/src/provider-options.ts new file mode 100644 index 000000000..4a40803b2 --- /dev/null +++ b/packages/ai-claude-code/src/provider-options.ts @@ -0,0 +1,30 @@ +type PermissionMode = 'default' | 'acceptEdits' | 'bypassPermissions' | 'plan' + +/** + * Per-call provider options for the Claude Code adapter, passed via + * `modelOptions` on `chat()`. + */ +export interface ClaudeCodeTextProviderOptions { + /** + * Resume an existing Claude Code session. The adapter emits the session id + * of every run via a CUSTOM `claude-code.session-id` stream event; thread + * it back here to continue that session (only the latest user message is + * sent — the harness already holds the prior context). + */ + sessionId?: string + /** + * When resuming, fork to a new session id instead of continuing the + * original session. + */ + forkSession?: boolean + /** Per-call override of the configured max harness turns. */ + maxTurns?: number + /** Per-call override of the configured permission mode. */ + permissionMode?: PermissionMode + /** Per-call override of the allowed built-in tool list. */ + allowedTools?: Array + /** Per-call override of the disallowed built-in tool list. */ + disallowedTools?: Array + /** Per-call override of the harness working directory. */ + cwd?: string +} diff --git a/packages/ai-claude-code/src/stream/sdk-types.ts b/packages/ai-claude-code/src/stream/sdk-types.ts new file mode 100644 index 000000000..a4b40be74 --- /dev/null +++ b/packages/ai-claude-code/src/stream/sdk-types.ts @@ -0,0 +1,135 @@ +/** + * Structural subset of the `@anthropic-ai/claude-agent-sdk` message types that + * the stream translator consumes. + * + * These are intentionally defined structurally (rather than imported from the + * agent SDK) so the translator stays a pure, fixture-testable state machine + * and the package's public types don't depend on the agent SDK's bundled + * `@anthropic-ai/sdk` type imports. + */ + +export interface SdkInitMessage { + type: 'system' + subtype: 'init' + session_id: string + model: string + tools: Array + cwd?: string +} + +export type SdkAssistantContentBlock = + | { type: 'text'; text: string } + | { type: 'thinking'; thinking: string } + | { type: 'tool_use'; id: string; name: string; input: unknown } + | { type: string; [key: string]: unknown } + +export interface SdkAssistantMessage { + type: 'assistant' + message: { + id?: string + content: Array + } + parent_tool_use_id: string | null +} + +export type SdkToolResultContent = + | string + | Array<{ type: string; text?: string; [key: string]: unknown }> + +export type SdkUserContentBlock = + | { + type: 'tool_result' + tool_use_id: string + content?: SdkToolResultContent + is_error?: boolean + } + | { type: string; [key: string]: unknown } + +export interface SdkUserMessage { + type: 'user' + message: { + role: 'user' + content: string | Array + } + parent_tool_use_id: string | null +} + +/** Raw Anthropic streaming events forwarded when `includePartialMessages` is set. */ +export type SdkRawStreamEvent = + | { type: 'message_start'; message: { id?: string } } + | { + type: 'content_block_start' + index: number + content_block: { type: string } + } + | { + type: 'content_block_delta' + index: number + delta: { type: string; text?: string; thinking?: string } + } + | { type: 'content_block_stop'; index: number } + | { type: 'message_delta' } + | { type: 'message_stop' } + +export interface SdkPartialAssistantMessage { + type: 'stream_event' + event: SdkRawStreamEvent + parent_tool_use_id: string | null +} + +export interface SdkUsage { + input_tokens?: number + output_tokens?: number + cache_read_input_tokens?: number + cache_creation_input_tokens?: number +} + +export interface SdkResultMessage { + type: 'result' + subtype: + | 'success' + | 'error_max_turns' + | 'error_during_execution' + | 'error_max_budget_usd' + | 'error_max_structured_output_retries' + result?: string + errors?: Array + usage?: SdkUsage + total_cost_usd?: number + structured_output?: unknown +} + +/** + * Harness-internal system messages the translator deliberately ignores. + * (The real SDK union has many more members; unknown runtime types simply + * fall through every branch.) + */ +export interface SdkNoiseSystemMessage { + type: 'system' + subtype: + | 'status' + | 'permission_denied' + | 'plugin_install' + | 'session_state_changed' + | 'task_notification' + | 'task_progress' +} + +/** Other harness-internal top-level message types the translator ignores. */ +export interface SdkNoiseMessage { + type: + | 'tool_progress' + | 'auth_status' + | 'rate_limit_event' + | 'prompt_suggestion' + | 'compact_boundary' +} + +export type AgentSdkMessage = + | SdkInitMessage + | SdkAssistantMessage + | SdkUserMessage + | SdkPartialAssistantMessage + | SdkResultMessage + | SdkNoiseSystemMessage + | SdkNoiseMessage diff --git a/packages/ai-claude-code/src/stream/translate.ts b/packages/ai-claude-code/src/stream/translate.ts new file mode 100644 index 000000000..67271d63c --- /dev/null +++ b/packages/ai-claude-code/src/stream/translate.ts @@ -0,0 +1,483 @@ +import { EventType, buildBaseUsage } from '@tanstack/ai' +import type { StreamChunk, TokenUsage } from '@tanstack/ai' +import type { + AgentSdkMessage, + SdkAssistantMessage, + SdkPartialAssistantMessage, + SdkResultMessage, + SdkToolResultContent, + SdkUsage, + SdkUserMessage, +} from './sdk-types' + +/** Name of the CUSTOM event carrying the Claude Code session id. */ +export const SESSION_ID_EVENT = 'claude-code.session-id' + +/** Server name used for bridged TanStack tools (model sees `mcp__tanstack__`). */ +export const BRIDGED_MCP_SERVER_NAME = 'tanstack' + +const BRIDGED_MCP_PREFIX = `mcp__${BRIDGED_MCP_SERVER_NAME}__` + +/** Claude Code-specific usage details attached to RUN_FINISHED usage. */ +export type ClaudeCodeProviderUsageDetails = { + /** Total cost of the harness run in USD, as reported by Claude Code. */ + totalCostUsd?: number +} + +export interface TranslateContext { + model: string + runId: string + threadId: string + parentRunId?: string + genId: () => string + /** Called as soon as the harness reports its session id. */ + onSessionId?: (sessionId: string) => void + /** Called for each raw SDK message, for logging. */ + onSdkMessage?: (message: AgentSdkMessage) => void +} + +/** + * Strip the bridged MCP server prefix so tool-call events match the TanStack + * tool names the application registered. Built-in harness tools (Bash, Read, + * Edit, ...) and foreign MCP tools pass through verbatim. + */ +export function stripMcpPrefix(name: string): string { + return name.startsWith(BRIDGED_MCP_PREFIX) + ? name.slice(BRIDGED_MCP_PREFIX.length) + : name +} + +function stringifyToolResultContent( + content: SdkToolResultContent | undefined, +): string { + if (content === undefined) return '' + if (typeof content === 'string') return content + return content + .map((block) => (typeof block.text === 'string' ? block.text : '')) + .join('') +} + +function buildUsage( + usage: SdkUsage | undefined, + totalCostUsd: number | undefined, +): TokenUsage | undefined { + if (!usage) return undefined + const promptTokens = usage.input_tokens ?? 0 + const completionTokens = usage.output_tokens ?? 0 + const result = buildBaseUsage({ + promptTokens, + completionTokens, + totalTokens: promptTokens + completionTokens, + }) + const cacheWrite = usage.cache_creation_input_tokens + const cacheRead = usage.cache_read_input_tokens + const promptTokensDetails = { + ...(cacheWrite ? { cacheWriteTokens: cacheWrite } : {}), + ...(cacheRead ? { cachedTokens: cacheRead } : {}), + } + if (Object.keys(promptTokensDetails).length > 0) { + result.promptTokensDetails = promptTokensDetails + } + if (totalCostUsd !== undefined) { + result.providerUsageDetails = { totalCostUsd } + } + return result +} + +/** + * Translate a Claude Code Agent SDK message stream into AG-UI StreamChunk + * events. + * + * The harness runs its own agent loop and executes its own tools, so the + * translation always ends with `finishReason: 'stop'` (or `'length'` / + * RUN_ERROR) — never `'tool_calls'`. Harness tool activity is emitted as + * already-resolved TOOL_CALL_START/ARGS/END + TOOL_CALL_RESULT sequences so + * UIs can render it, while the TanStack engine never tries to execute them. + * + * Invariant: every TOOL_CALL_START is eventually paired with a + * TOOL_CALL_RESULT (synthesized as `{"status":"interrupted"}` when the run + * ends or aborts before the harness reported one) so the engine's + * pending-tool-call scan on the next request never force-executes them. + */ +export async function* translateSdkStream( + sdkMessages: AsyncIterable, + ctx: TranslateContext, +): AsyncIterable { + const { model, runId, threadId, genId } = ctx + const now = () => Date.now() + + let runStarted = false + /** Tool calls started but with no result yet. */ + const unresolvedToolCalls = new Set() + /** Anthropic message ids whose text/thinking already streamed via partials. */ + const streamedMessageIds = new Set() + + // Partial-stream state + let partialMessageId: string | null = null + let partialBlockType: string | null = null + let partialTextMessageId: string | null = null + let partialTextContent = '' + let partialTextStarted = false + let partialReasoningId: string | null = null + + function* startRun(): Generator { + if (runStarted) return + runStarted = true + yield { + type: EventType.RUN_STARTED, + runId, + threadId, + model, + timestamp: now(), + ...(ctx.parentRunId !== undefined && { parentRunId: ctx.parentRunId }), + } + } + + function* synthesizeUnresolvedResults(): Generator { + for (const toolCallId of unresolvedToolCalls) { + yield { + type: EventType.TOOL_CALL_RESULT, + toolCallId, + messageId: genId(), + model, + timestamp: now(), + content: JSON.stringify({ status: 'interrupted' }), + } + } + unresolvedToolCalls.clear() + } + + function* closePartialText(): Generator { + if (partialTextStarted && partialTextMessageId) { + yield { + type: EventType.TEXT_MESSAGE_END, + messageId: partialTextMessageId, + model, + timestamp: now(), + } + } + partialTextStarted = false + partialTextMessageId = null + partialTextContent = '' + } + + function* closePartialReasoning(): Generator { + if (partialReasoningId) { + yield { + type: EventType.REASONING_MESSAGE_END, + messageId: partialReasoningId, + model, + timestamp: now(), + } + yield { + type: EventType.REASONING_END, + messageId: partialReasoningId, + model, + timestamp: now(), + } + } + partialReasoningId = null + } + + function* emitToolUse(block: { + id: string + name: string + input: unknown + }): Generator { + const toolCallName = stripMcpPrefix(block.name) + const args = JSON.stringify(block.input ?? {}) + yield { + type: EventType.TOOL_CALL_START, + toolCallId: block.id, + toolCallName, + toolName: toolCallName, + model, + timestamp: now(), + } + yield { + type: EventType.TOOL_CALL_ARGS, + toolCallId: block.id, + model, + timestamp: now(), + delta: args, + args, + } + yield { + type: EventType.TOOL_CALL_END, + toolCallId: block.id, + toolCallName, + toolName: toolCallName, + model, + timestamp: now(), + input: block.input ?? {}, + } + unresolvedToolCalls.add(block.id) + } + + function* handleAssistant( + message: SdkAssistantMessage, + ): Generator { + const alreadyStreamed = + message.message.id !== undefined && + streamedMessageIds.has(message.message.id) + + for (const block of message.message.content) { + if (block.type === 'text') { + if (alreadyStreamed) continue + const messageId = message.message.id ?? genId() + const text = (block as { text: string }).text + yield { + type: EventType.TEXT_MESSAGE_START, + messageId, + model, + timestamp: now(), + role: 'assistant', + } + yield { + type: EventType.TEXT_MESSAGE_CONTENT, + messageId, + model, + timestamp: now(), + delta: text, + content: text, + } + yield { + type: EventType.TEXT_MESSAGE_END, + messageId, + model, + timestamp: now(), + } + } else if (block.type === 'thinking') { + if (alreadyStreamed) continue + const reasoningId = genId() + const thinking = (block as { thinking: string }).thinking + yield { + type: EventType.REASONING_START, + messageId: reasoningId, + model, + timestamp: now(), + } + yield { + type: EventType.REASONING_MESSAGE_START, + messageId: reasoningId, + role: 'reasoning' as const, + model, + timestamp: now(), + } + yield { + type: EventType.REASONING_MESSAGE_CONTENT, + messageId: reasoningId, + delta: thinking, + model, + timestamp: now(), + } + yield { + type: EventType.REASONING_MESSAGE_END, + messageId: reasoningId, + model, + timestamp: now(), + } + yield { + type: EventType.REASONING_END, + messageId: reasoningId, + model, + timestamp: now(), + } + } else if (block.type === 'tool_use') { + yield* emitToolUse( + block as { id: string; name: string; input: unknown }, + ) + } + } + } + + function* handleUser(message: SdkUserMessage): Generator { + const content = message.message.content + if (typeof content === 'string') return + for (const block of content) { + if (block.type !== 'tool_result') continue + const toolResult = block as { + tool_use_id: string + content?: SdkToolResultContent + is_error?: boolean + } + unresolvedToolCalls.delete(toolResult.tool_use_id) + yield { + type: EventType.TOOL_CALL_RESULT, + toolCallId: toolResult.tool_use_id, + messageId: genId(), + model, + timestamp: now(), + content: stringifyToolResultContent(toolResult.content), + ...(toolResult.is_error === true && { state: 'output-error' as const }), + } + } + } + + function* handleResult(message: SdkResultMessage): Generator { + yield* closePartialText() + yield* closePartialReasoning() + yield* synthesizeUnresolvedResults() + + const usage = buildUsage(message.usage, message.total_cost_usd) + if (message.subtype === 'success') { + yield { + type: EventType.RUN_FINISHED, + runId, + threadId, + model, + timestamp: now(), + finishReason: 'stop', + ...(usage !== undefined && { usage }), + } + } else if (message.subtype === 'error_max_turns') { + yield { + type: EventType.RUN_FINISHED, + runId, + threadId, + model, + timestamp: now(), + finishReason: 'length', + ...(usage !== undefined && { usage }), + } + } else { + const errorMessage = + message.errors && message.errors.length > 0 + ? message.errors.join('; ') + : `Claude Code run failed: ${message.subtype}` + yield { + type: EventType.RUN_ERROR, + model, + timestamp: now(), + message: errorMessage, + code: message.subtype, + error: { message: errorMessage, code: message.subtype }, + } + } + } + + function* handleStreamEvent( + message: SdkPartialAssistantMessage, + ): Generator { + const event = message.event + if (event.type === 'message_start') { + partialMessageId = event.message.id ?? genId() + streamedMessageIds.add(partialMessageId) + } else if (event.type === 'content_block_start') { + partialBlockType = event.content_block.type + if (partialBlockType === 'text') { + partialTextMessageId = partialMessageId ?? genId() + partialTextContent = '' + if (!partialTextStarted) { + partialTextStarted = true + yield { + type: EventType.TEXT_MESSAGE_START, + messageId: partialTextMessageId, + model, + timestamp: now(), + role: 'assistant', + } + } + } else if (partialBlockType === 'thinking') { + partialReasoningId = genId() + yield { + type: EventType.REASONING_START, + messageId: partialReasoningId, + model, + timestamp: now(), + } + yield { + type: EventType.REASONING_MESSAGE_START, + messageId: partialReasoningId, + role: 'reasoning' as const, + model, + timestamp: now(), + } + } + } else if (event.type === 'content_block_delta') { + if ( + event.delta.type === 'text_delta' && + partialTextStarted && + partialTextMessageId && + typeof event.delta.text === 'string' + ) { + partialTextContent += event.delta.text + yield { + type: EventType.TEXT_MESSAGE_CONTENT, + messageId: partialTextMessageId, + model, + timestamp: now(), + delta: event.delta.text, + content: partialTextContent, + } + } else if ( + event.delta.type === 'thinking_delta' && + partialReasoningId && + typeof event.delta.thinking === 'string' + ) { + yield { + type: EventType.REASONING_MESSAGE_CONTENT, + messageId: partialReasoningId, + delta: event.delta.thinking, + model, + timestamp: now(), + } + } + } else if (event.type === 'content_block_stop') { + if (partialBlockType === 'text') { + yield* closePartialText() + } else if (partialBlockType === 'thinking') { + yield* closePartialReasoning() + } + partialBlockType = null + } + } + + try { + for await (const sdkMessage of sdkMessages) { + ctx.onSdkMessage?.(sdkMessage) + + if (sdkMessage.type === 'system' && sdkMessage.subtype === 'init') { + yield* startRun() + ctx.onSessionId?.(sdkMessage.session_id) + yield { + type: EventType.CUSTOM, + model, + timestamp: now(), + name: SESSION_ID_EVENT, + value: { + sessionId: sdkMessage.session_id, + model: sdkMessage.model, + tools: sdkMessage.tools, + }, + } + continue + } + + // Anything before init still needs RUN_STARTED first. + yield* startRun() + + if (sdkMessage.type === 'stream_event') { + if (sdkMessage.parent_tool_use_id !== null) continue + yield* handleStreamEvent(sdkMessage) + } else if (sdkMessage.type === 'assistant') { + if (sdkMessage.parent_tool_use_id !== null) continue + yield* handleAssistant(sdkMessage) + } else if (sdkMessage.type === 'user') { + if (sdkMessage.parent_tool_use_id !== null) continue + yield* handleUser(sdkMessage) + } else if (sdkMessage.type === 'result') { + yield* handleResult(sdkMessage) + } + // All other SDK message types (status, hooks, notifications, ...) are + // harness-internal and intentionally ignored. + } + } catch (error) { + // The run is dying (abort or SDK failure). Pair any started tool calls + // with a synthetic result first so the next request's pending-tool-call + // scan doesn't try to execute them, then let the adapter surface the + // error as RUN_ERROR. + yield* synthesizeUnresolvedResults() + throw error + } +} diff --git a/packages/ai-claude-code/tests/policy-map.test.ts b/packages/ai-claude-code/tests/policy-map.test.ts new file mode 100644 index 000000000..d2d5f52d5 --- /dev/null +++ b/packages/ai-claude-code/tests/policy-map.test.ts @@ -0,0 +1,56 @@ +import { describe, expect, it } from 'vitest' +import { defineSandboxPolicy } from '@tanstack/ai-sandbox' +import { mapPolicyToClaudeFlags } from '../src/adapters/policy-map' + +describe('mapPolicyToClaudeFlags', () => { + it('returns empty additions for no policy', () => { + const flags = mapPolicyToClaudeFlags(undefined) + expect(flags).toEqual({ allowedTools: [], disallowedTools: [] }) + }) + + it('maps the default decision to a permission mode', () => { + expect( + mapPolicyToClaudeFlags(defineSandboxPolicy({ default: 'allow' })) + .permissionMode, + ).toBe('bypassPermissions') + expect( + mapPolicyToClaudeFlags(defineSandboxPolicy({ default: 'deny' })) + .permissionMode, + ).toBe('default') + expect( + mapPolicyToClaudeFlags(defineSandboxPolicy({ default: 'ask' })) + .permissionMode, + ).toBe('acceptEdits') + }) + + it('disallows write tools when fileWrite is denied', () => { + const flags = mapPolicyToClaudeFlags( + defineSandboxPolicy({ capabilities: { fileWrite: 'deny' } }), + ) + expect(flags.disallowedTools).toEqual( + expect.arrayContaining(['Write', 'Edit', 'MultiEdit']), + ) + }) + + it('disallows network tools when network is denied', () => { + const flags = mapPolicyToClaudeFlags( + defineSandboxPolicy({ capabilities: { network: 'deny' } }), + ) + expect(flags.disallowedTools).toEqual( + expect.arrayContaining(['WebFetch', 'WebSearch']), + ) + }) + + it('maps tool-name command rules to allow/deny lists', () => { + const flags = mapPolicyToClaudeFlags( + defineSandboxPolicy({ + commands: { allow: ['Read'], deny: ['Bash', 'pnpm *'] }, + }), + ) + // 'Bash' is a built-in tool name -> disallowed; 'pnpm *' is a command glob, + // not a tool name, so it's left to the permission-prompt tool. + expect(flags.allowedTools).toContain('Read') + expect(flags.disallowedTools).toContain('Bash') + expect(flags.disallowedTools).not.toContain('pnpm *') + }) +}) diff --git a/packages/ai-claude-code/tests/prompt.test.ts b/packages/ai-claude-code/tests/prompt.test.ts new file mode 100644 index 000000000..6e8dfcdf3 --- /dev/null +++ b/packages/ai-claude-code/tests/prompt.test.ts @@ -0,0 +1,97 @@ +import { describe, expect, it } from 'vitest' +import { buildPrompt } from '../src/messages/prompt' +import type { ModelMessage } from '@tanstack/ai' + +const user = (content: ModelMessage['content']): ModelMessage => ({ + role: 'user', + content, +}) +const assistant = (content: ModelMessage['content']): ModelMessage => ({ + role: 'assistant', + content, +}) + +describe('buildPrompt', () => { + it('resumes with only the last user message when sessionId is provided', () => { + const result = buildPrompt( + [ + user('first question'), + assistant('first answer'), + user('follow-up question'), + ], + 'sess-1', + ) + expect(result).toEqual({ + prompt: 'follow-up question', + resume: 'sess-1', + }) + }) + + it('throws when sessionId is provided but there is no trailing user message', () => { + expect(() => buildPrompt([user('q'), assistant('a')], 'sess-1')).toThrow( + /user message/i, + ) + }) + + it('sends a single user message as-is for a fresh session', () => { + expect(buildPrompt([user('hello')], undefined)).toEqual({ + prompt: 'hello', + }) + }) + + it('flattens prior turns into a transcript preamble for fresh multi-turn history', () => { + const { prompt, resume } = buildPrompt( + [user('What is 2+2?'), assistant('4'), user('And times 3?')], + undefined, + ) + expect(resume).toBeUndefined() + expect(prompt).toBe( + 'Previous conversation:\nUser: What is 2+2?\nAssistant: 4\n\nAnd times 3?', + ) + }) + + it('skips tool messages and assistant tool-call-only turns when flattening', () => { + const messages: Array = [ + user('list files'), + { + role: 'assistant', + content: null, + toolCalls: [ + { + id: 't1', + type: 'function', + function: { name: 'ls', arguments: '{}' }, + }, + ], + } as unknown as ModelMessage, + { role: 'tool', content: 'file-a', toolCallId: 't1' }, + assistant('There is one file.'), + user('thanks, which one?'), + ] + const { prompt } = buildPrompt(messages, undefined) + expect(prompt).toBe( + 'Previous conversation:\nUser: list files\nAssistant: There is one file.\n\nthanks, which one?', + ) + }) + + it('extracts text from content-part arrays and ignores non-text parts', () => { + const { prompt } = buildPrompt( + [ + user([ + { type: 'text', content: 'describe ' }, + { + type: 'image', + source: { type: 'url', url: 'https://x/y.png' }, + } as never, + { type: 'text', content: 'this' }, + ] as ModelMessage['content']), + ], + undefined, + ) + expect(prompt).toBe('describe this') + }) + + it('throws when there is no usable user content at all', () => { + expect(() => buildPrompt([], undefined)).toThrow(/user message/i) + }) +}) diff --git a/packages/ai-claude-code/tests/text-adapter.test.ts b/packages/ai-claude-code/tests/text-adapter.test.ts new file mode 100644 index 000000000..8983050d3 --- /dev/null +++ b/packages/ai-claude-code/tests/text-adapter.test.ts @@ -0,0 +1,159 @@ +/** + * Deterministic test of the in-sandbox Claude Code adapter. + * + * Instead of the real `claude` CLI (nondeterministic, needs an API key — see + * the gated live smoke in testing/e2e), this runs a FAKE agent CLI: a tiny node + * script that reads the prompt from stdin and emits canned `stream-json` + * messages on stdout, exactly as `claude -p --output-format stream-json` would. + * It runs inside a real local-process sandbox, exercising the full + * spawn → stdout NDJSON → translate → StreamChunk path. + */ +import { afterAll, describe, expect, it } from 'vitest' +import * as fsp from 'node:fs/promises' +import * as os from 'node:os' +import * as path from 'node:path' +import { localProcessSandbox } from '@tanstack/ai-sandbox-local-process' +import { SandboxCapability } from '@tanstack/ai-sandbox' +import { claudeCodeText } from '../src/index' +import type { InternalLogger } from '@tanstack/ai/adapter-internals' +import type { CapabilityContext, StreamChunk } from '@tanstack/ai' +import type { SandboxHandle } from '@tanstack/ai-sandbox' + +const baseDir = path.join(os.tmpdir(), `tanstack-ai-cc-test-${Date.now()}`) +const provider = localProcessSandbox({ baseDir, removeOnDestroy: true }) + +afterAll(async () => { + await fsp.rm(baseDir, { recursive: true, force: true }) +}) + +// A stand-in for the `claude` CLI: ignores its flags, reads the prompt from +// stdin, then emits stream-json (system/init → assistant text → result). +const FAKE_CLAUDE = [ + `let input = ''`, + `process.stdin.on('data', (d) => { input += d })`, + `process.stdin.on('end', () => {`, + ` const w = (o) => process.stdout.write(JSON.stringify(o) + '\\n')`, + ` w({ type: 'system', subtype: 'init', session_id: 'sess-abc', model: 'haiku', tools: [] })`, + ` w({ type: 'assistant', message: { id: 'msg-1', content: [{ type: 'text', text: 'pong' }] }, parent_tool_use_id: null })`, + ` w({ type: 'result', subtype: 'success', result: 'pong', usage: { input_tokens: 1, output_tokens: 1 } })`, + `})`, +].join('\n') + +const noopLogger = { + request: () => {}, + provider: () => {}, + errors: () => {}, + agentLoop: () => {}, + warnings: () => {}, + debug: () => {}, +} as unknown as InternalLogger + +/** Build a capability context that hands the adapter the given sandbox. */ +function capabilityContextWith(handle: SandboxHandle): CapabilityContext { + const [, provideSandbox] = SandboxCapability + const ctx = { + capabilities: { markProvided: () => {}, has: () => true }, + } as unknown as CapabilityContext + provideSandbox(ctx, handle) + return ctx +} + +async function collect( + stream: AsyncIterable, +): Promise> { + const out: Array = [] + for await (const chunk of stream) out.push(chunk) + return out +} + +describe('claude-code in-sandbox adapter', () => { + it('spawns the agent CLI in the sandbox and streams translated events', async () => { + const sbx = await provider.create({}) + await sbx.fs.write('/workspace/fake-claude.mjs', FAKE_CLAUDE) + + const adapter = claudeCodeText('haiku', { + // Relative executable + cwd=/workspace (mapped to the sandbox root). + claudeExecutable: 'node fake-claude.mjs', + streamPartials: false, + emitDiff: false, + }) + + const chunks = await collect( + adapter.chatStream({ + model: 'haiku', + messages: [{ role: 'user', content: 'say pong' }], + logger: noopLogger, + capabilities: capabilityContextWith(sbx), + }), + ) + + const types = chunks.map((c) => c.type as string) + expect(types[0]).toBe('RUN_STARTED') + + const sessionEvent = chunks.find( + (c) => + c.type === 'CUSTOM' && + (c as { name?: string }).name === 'claude-code.session-id', + ) + expect(sessionEvent).toBeDefined() + expect( + (sessionEvent as { value: { sessionId: string } }).value.sessionId, + ).toBe('sess-abc') + + const text = chunks + .filter((c) => c.type === 'TEXT_MESSAGE_CONTENT') + .map((c) => (c as { delta?: string }).delta ?? '') + .join('') + expect(text).toContain('pong') + + expect(chunks.some((c) => c.type === 'RUN_FINISHED')).toBe(true) + + await sbx.destroy() + }) + + it('requires a sandbox capability', async () => { + const adapter = claudeCodeText('haiku', { emitDiff: false }) + const chunks = await collect( + adapter.chatStream({ + model: 'haiku', + messages: [{ role: 'user', content: 'hi' }], + logger: noopLogger, + // no capabilities provided + }), + ) + const err = chunks.find((c) => c.type === 'RUN_ERROR') + expect(err).toBeDefined() + expect((err as { message?: string }).message).toMatch(/requires a sandbox/i) + }) + + it('bridges chat()-provided tools (starts + tears down the MCP bridge)', async () => { + const sbx = await provider.create({}) + await sbx.fs.write('/workspace/fake-claude.mjs', FAKE_CLAUDE) + const adapter = claudeCodeText('haiku', { + claudeExecutable: 'node fake-claude.mjs', + streamPartials: false, + emitDiff: false, + }) + // The fake claude ignores the injected --mcp-config; this checks that + // passing tools no longer errors and the bridge lifecycle is clean. + const chunks = await collect( + adapter.chatStream({ + model: 'haiku', + messages: [{ role: 'user', content: 'say pong' }], + logger: noopLogger, + capabilities: capabilityContextWith(sbx), + tools: [ + { + name: 'getTime', + description: 'x', + inputSchema: { type: 'object', properties: {} }, + execute: () => Promise.resolve('now'), + } as never, + ], + }), + ) + expect(chunks.some((c) => c.type === 'RUN_ERROR')).toBe(false) + expect(chunks.some((c) => c.type === 'RUN_FINISHED')).toBe(true) + await sbx.destroy() + }) +}) diff --git a/packages/ai-claude-code/tests/translate.test.ts b/packages/ai-claude-code/tests/translate.test.ts new file mode 100644 index 000000000..607d2c457 --- /dev/null +++ b/packages/ai-claude-code/tests/translate.test.ts @@ -0,0 +1,485 @@ +import { describe, expect, it } from 'vitest' +import { translateSdkStream } from '../src/stream/translate' +import type { AgentSdkMessage } from '../src/stream/sdk-types' +import type { StreamChunk } from '@tanstack/ai' + +function makeContext() { + let id = 0 + return { + model: 'claude-opus-4-6', + runId: 'run-1', + threadId: 'thread-1', + genId: () => `gen-${++id}`, + } +} + +async function* fromArray( + messages: Array, +): AsyncIterable { + for (const message of messages) { + yield message + } +} + +async function collect( + messages: Array, +): Promise> { + const chunks: Array = [] + for await (const chunk of translateSdkStream( + fromArray(messages), + makeContext(), + )) { + chunks.push(chunk) + } + return chunks +} + +const init: AgentSdkMessage = { + type: 'system', + subtype: 'init', + session_id: 'sess-abc', + model: 'claude-opus-4-6', + tools: ['Bash', 'Read'], + cwd: '/tmp', +} + +const usage = { + input_tokens: 100, + output_tokens: 50, + cache_read_input_tokens: 10, + cache_creation_input_tokens: 5, +} + +function assistantText(text: string, messageId = 'msg-1'): AgentSdkMessage { + return { + type: 'assistant', + message: { id: messageId, content: [{ type: 'text', text }] }, + parent_tool_use_id: null, + } +} + +const resultSuccess: AgentSdkMessage = { + type: 'result', + subtype: 'success', + result: 'done', + usage, + total_cost_usd: 0.12, +} + +describe('translateSdkStream', () => { + it('translates a simple text turn into RUN_STARTED → CUSTOM → TEXT_* → RUN_FINISHED(stop)', async () => { + const chunks = await collect([init, assistantText('Hello!'), resultSuccess]) + + expect(chunks.map((c) => c.type)).toEqual([ + 'RUN_STARTED', + 'CUSTOM', + 'TEXT_MESSAGE_START', + 'TEXT_MESSAGE_CONTENT', + 'TEXT_MESSAGE_END', + 'RUN_FINISHED', + ]) + + expect(chunks[0]).toMatchObject({ + type: 'RUN_STARTED', + runId: 'run-1', + threadId: 'thread-1', + model: 'claude-opus-4-6', + }) + expect(chunks[3]).toMatchObject({ + type: 'TEXT_MESSAGE_CONTENT', + delta: 'Hello!', + content: 'Hello!', + }) + expect(chunks[5]).toMatchObject({ + type: 'RUN_FINISHED', + finishReason: 'stop', + }) + }) + + it('surfaces the session id via a CUSTOM claude-code.session-id event', async () => { + const chunks = await collect([init, assistantText('hi'), resultSuccess]) + const custom = chunks.find((c) => c.type === 'CUSTOM') + expect(custom).toMatchObject({ + type: 'CUSTOM', + name: 'claude-code.session-id', + value: { + sessionId: 'sess-abc', + model: 'claude-opus-4-6', + tools: ['Bash', 'Read'], + }, + }) + }) + + it('maps usage onto RUN_FINISHED including cache token details', async () => { + const chunks = await collect([init, assistantText('hi'), resultSuccess]) + const finished = chunks.find((c) => c.type === 'RUN_FINISHED') + expect(finished).toMatchObject({ + usage: { + promptTokens: 100, + completionTokens: 50, + totalTokens: 150, + promptTokensDetails: { cachedTokens: 10, cacheWriteTokens: 5 }, + }, + }) + }) + + it('emits resolved TOOL_CALL_* quadruples for harness tool activity and never finishes with tool_calls', async () => { + const messages: Array = [ + init, + { + type: 'assistant', + message: { + id: 'msg-1', + content: [ + { + type: 'tool_use', + id: 'toolu_1', + name: 'Bash', + input: { command: 'ls' }, + }, + ], + }, + parent_tool_use_id: null, + }, + { + type: 'user', + message: { + role: 'user', + content: [ + { + type: 'tool_result', + tool_use_id: 'toolu_1', + content: 'file-a\nfile-b', + }, + ], + }, + parent_tool_use_id: null, + }, + assistantText('Found two files.', 'msg-2'), + resultSuccess, + ] + + const chunks = await collect(messages) + const types = chunks.map((c) => c.type) + expect(types).toEqual([ + 'RUN_STARTED', + 'CUSTOM', + 'TOOL_CALL_START', + 'TOOL_CALL_ARGS', + 'TOOL_CALL_END', + 'TOOL_CALL_RESULT', + 'TEXT_MESSAGE_START', + 'TEXT_MESSAGE_CONTENT', + 'TEXT_MESSAGE_END', + 'RUN_FINISHED', + ]) + + expect(chunks[2]).toMatchObject({ + toolCallId: 'toolu_1', + toolCallName: 'Bash', + }) + expect(chunks[3]).toMatchObject({ + toolCallId: 'toolu_1', + delta: JSON.stringify({ command: 'ls' }), + }) + expect(chunks[4]).toMatchObject({ + toolCallId: 'toolu_1', + input: { command: 'ls' }, + }) + expect(chunks[5]).toMatchObject({ + type: 'TOOL_CALL_RESULT', + toolCallId: 'toolu_1', + content: 'file-a\nfile-b', + }) + + const finished = chunks.filter((c) => c.type === 'RUN_FINISHED') + expect(finished).toHaveLength(1) + expect(finished[0]).toMatchObject({ finishReason: 'stop' }) + }) + + it('strips the mcp__tanstack__ prefix from bridged tool names', async () => { + const chunks = await collect([ + init, + { + type: 'assistant', + message: { + id: 'msg-1', + content: [ + { + type: 'tool_use', + id: 'toolu_2', + name: 'mcp__tanstack__lookup_user', + input: { userId: 'u1' }, + }, + ], + }, + parent_tool_use_id: null, + }, + resultSuccess, + ]) + + const start = chunks.find((c) => c.type === 'TOOL_CALL_START') + expect(start).toMatchObject({ toolCallName: 'lookup_user' }) + }) + + it('marks errored tool results with state output-error', async () => { + const chunks = await collect([ + init, + { + type: 'assistant', + message: { + id: 'msg-1', + content: [ + { type: 'tool_use', id: 'toolu_3', name: 'Bash', input: {} }, + ], + }, + parent_tool_use_id: null, + }, + { + type: 'user', + message: { + role: 'user', + content: [ + { + type: 'tool_result', + tool_use_id: 'toolu_3', + content: [{ type: 'text', text: 'command failed' }], + is_error: true, + }, + ], + }, + parent_tool_use_id: null, + }, + resultSuccess, + ]) + + const result = chunks.find((c) => c.type === 'TOOL_CALL_RESULT') + expect(result).toMatchObject({ + toolCallId: 'toolu_3', + content: 'command failed', + state: 'output-error', + }) + }) + + it('synthesizes interrupted tool results for unresolved tool calls before RUN_FINISHED', async () => { + const chunks = await collect([ + init, + { + type: 'assistant', + message: { + id: 'msg-1', + content: [ + { type: 'tool_use', id: 'toolu_4', name: 'Bash', input: {} }, + ], + }, + parent_tool_use_id: null, + }, + resultSuccess, + ]) + + const types = chunks.map((c) => c.type as string) + expect(types.indexOf('TOOL_CALL_RESULT')).toBeGreaterThan(-1) + expect(types.indexOf('TOOL_CALL_RESULT')).toBeLessThan( + types.indexOf('RUN_FINISHED'), + ) + expect(chunks.find((c) => c.type === 'TOOL_CALL_RESULT')).toMatchObject({ + toolCallId: 'toolu_4', + content: JSON.stringify({ status: 'interrupted' }), + }) + }) + + it('translates thinking blocks into REASONING_* events', async () => { + const chunks = await collect([ + init, + { + type: 'assistant', + message: { + id: 'msg-1', + content: [ + { type: 'thinking', thinking: 'pondering...' }, + { type: 'text', text: 'answer' }, + ], + }, + parent_tool_use_id: null, + }, + resultSuccess, + ]) + + expect(chunks.map((c) => c.type)).toEqual([ + 'RUN_STARTED', + 'CUSTOM', + 'REASONING_START', + 'REASONING_MESSAGE_START', + 'REASONING_MESSAGE_CONTENT', + 'REASONING_MESSAGE_END', + 'REASONING_END', + 'TEXT_MESSAGE_START', + 'TEXT_MESSAGE_CONTENT', + 'TEXT_MESSAGE_END', + 'RUN_FINISHED', + ]) + expect( + chunks.find((c) => c.type === 'REASONING_MESSAGE_CONTENT'), + ).toMatchObject({ delta: 'pondering...' }) + }) + + it('maps error_max_turns to RUN_FINISHED(length)', async () => { + const chunks = await collect([ + init, + assistantText('partial'), + { + type: 'result', + subtype: 'error_max_turns', + usage, + total_cost_usd: 0.5, + errors: [], + }, + ]) + expect(chunks.at(-1)).toMatchObject({ + type: 'RUN_FINISHED', + finishReason: 'length', + }) + }) + + it('maps error_during_execution to RUN_ERROR', async () => { + const chunks = await collect([ + init, + { + type: 'result', + subtype: 'error_during_execution', + usage, + total_cost_usd: 0, + errors: ['boom'], + }, + ]) + expect(chunks.at(-1)).toMatchObject({ + type: 'RUN_ERROR', + message: 'boom', + code: 'error_during_execution', + }) + }) + + it('skips subagent messages (parent_tool_use_id set)', async () => { + const chunks = await collect([ + init, + { + type: 'assistant', + message: { id: 'msg-sub', content: [{ type: 'text', text: 'inner' }] }, + parent_tool_use_id: 'toolu_task', + }, + assistantText('outer'), + resultSuccess, + ]) + + const contents = chunks.filter((c) => c.type === 'TEXT_MESSAGE_CONTENT') + expect(contents).toHaveLength(1) + expect(contents[0]).toMatchObject({ delta: 'outer' }) + }) + + it('streams partial text deltas and dedupes the whole assistant message', async () => { + const chunks = await collect([ + init, + { + type: 'stream_event', + event: { type: 'message_start', message: { id: 'msg-1' } }, + parent_tool_use_id: null, + }, + { + type: 'stream_event', + event: { + type: 'content_block_start', + index: 0, + content_block: { type: 'text' }, + }, + parent_tool_use_id: null, + }, + { + type: 'stream_event', + event: { + type: 'content_block_delta', + index: 0, + delta: { type: 'text_delta', text: 'Hel' }, + }, + parent_tool_use_id: null, + }, + { + type: 'stream_event', + event: { + type: 'content_block_delta', + index: 0, + delta: { type: 'text_delta', text: 'lo' }, + }, + parent_tool_use_id: null, + }, + { + type: 'stream_event', + event: { type: 'content_block_stop', index: 0 }, + parent_tool_use_id: null, + }, + assistantText('Hello', 'msg-1'), + resultSuccess, + ]) + + expect(chunks.map((c) => c.type)).toEqual([ + 'RUN_STARTED', + 'CUSTOM', + 'TEXT_MESSAGE_START', + 'TEXT_MESSAGE_CONTENT', + 'TEXT_MESSAGE_CONTENT', + 'TEXT_MESSAGE_END', + 'RUN_FINISHED', + ]) + expect(chunks[3]).toMatchObject({ delta: 'Hel', content: 'Hel' }) + expect(chunks[4]).toMatchObject({ delta: 'lo', content: 'Hello' }) + }) + + it('emits synthetic tool results then rethrows when the SDK stream throws mid-run', async () => { + async function* throwing(): AsyncIterable { + yield init + yield { + type: 'assistant', + message: { + id: 'msg-1', + content: [ + { type: 'tool_use', id: 'toolu_5', name: 'Bash', input: {} }, + ], + }, + parent_tool_use_id: null, + } + throw new Error('aborted') + } + + const chunks: Array = [] + await expect(async () => { + for await (const chunk of translateSdkStream(throwing(), makeContext())) { + chunks.push(chunk) + } + }).rejects.toThrow('aborted') + + expect(chunks.find((c) => c.type === 'TOOL_CALL_RESULT')).toMatchObject({ + toolCallId: 'toolu_5', + content: JSON.stringify({ status: 'interrupted' }), + }) + }) + + it('ignores unknown SDK message types', async () => { + const chunks = await collect([ + init, + { + type: 'system', + subtype: 'status', + status: 'compacting', + } as unknown as AgentSdkMessage, + assistantText('hi'), + resultSuccess, + ]) + expect(chunks.map((c) => c.type)).toEqual([ + 'RUN_STARTED', + 'CUSTOM', + 'TEXT_MESSAGE_START', + 'TEXT_MESSAGE_CONTENT', + 'TEXT_MESSAGE_END', + 'RUN_FINISHED', + ]) + }) +}) diff --git a/packages/ai-claude-code/tsconfig.json b/packages/ai-claude-code/tsconfig.json new file mode 100644 index 000000000..c38689f4e --- /dev/null +++ b/packages/ai-claude-code/tsconfig.json @@ -0,0 +1,8 @@ +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "outDir": "dist" + }, + "include": ["src", "tests"], + "exclude": ["node_modules", "dist"] +} diff --git a/packages/ai-claude-code/vite.config.ts b/packages/ai-claude-code/vite.config.ts new file mode 100644 index 000000000..11f5b20b7 --- /dev/null +++ b/packages/ai-claude-code/vite.config.ts @@ -0,0 +1,37 @@ +import { defineConfig, mergeConfig } from 'vitest/config' +import { tanstackViteConfig } from '@tanstack/vite-config' +import packageJson from './package.json' + +const config = defineConfig({ + test: { + name: packageJson.name, + dir: './', + watch: false, + + globals: true, + environment: 'node', + include: ['tests/**/*.test.ts'], + coverage: { + provider: 'v8', + reporter: ['text', 'json', 'html', 'lcov'], + exclude: [ + 'node_modules/', + 'dist/', + 'tests/', + '**/*.test.ts', + '**/*.config.ts', + '**/types.ts', + ], + include: ['src/**/*.ts'], + }, + }, +}) + +export default mergeConfig( + config, + tanstackViteConfig({ + entry: ['./src/index.ts'], + srcDir: './src', + cjs: false, + }), +) diff --git a/packages/ai-client/src/chat-client.ts b/packages/ai-client/src/chat-client.ts index 468dc7618..7f3398d41 100644 --- a/packages/ai-client/src/chat-client.ts +++ b/packages/ai-client/src/chat-client.ts @@ -102,6 +102,14 @@ export class ChatClient< // focused on streaming. Undefined when no `persistence` adapter is configured. private readonly persistor?: ChatPersistor private currentRunId: string | null = null + // Resume tracking: the latest in-band cursor seen for the active run, so a + // reconnect can replay events after it. Cleared when the run terminates. + private lastResume: { runId: string; cursor: string } | null = null + private readonly autoResume: boolean + // When set, the next streamResponse() resumes this run/cursor instead of + // starting a fresh run (consumed once). + private pendingResumeRunId: string | null = null + private pendingResumeCursor: string | null = null // Track the legacy `body` option and the canonical `forwardedProps` // option as separate slots so that `updateOptions({ forwardedProps })` // doesn't wipe a previously-set `body` (and vice versa). They are @@ -170,6 +178,7 @@ export class ChatClient< constructor(options: ChatClientOptions) { this.uniqueId = options.id || this.generateUniqueId('chat') this.threadId = options.threadId || this.generateUniqueId('thread') + this.autoResume = options.autoResume ?? true if (options.persistence) { this.persistor = new ChatPersistor( options.persistence, @@ -489,6 +498,66 @@ export class ChatClient< } } + /** + * Observe the in-band resume cursor on each chunk so a reconnect can replay + * after the last seen event. Cleared when the run reaches a terminal event. + */ + private observeResumeCursor(chunk: StreamChunk): void { + if (chunk.type === 'RUN_FINISHED' || chunk.type === 'RUN_ERROR') { + // A server-signaled terminal event completes the run — drop its resume + // state. (A stream that merely ends without a terminal is an interruption + // and keeps its resume state so it can be continued.) + const runId = getChunkRunId(chunk) + if (!runId || this.lastResume?.runId === runId) { + this.lastResume = null + } + return + } + const cursor = + 'cursor' in chunk && typeof chunk.cursor === 'string' + ? chunk.cursor + : undefined + if (cursor && this.currentRunId) { + this.lastResume = { runId: this.currentRunId, cursor } + } + } + + /** + * The resume state for the active/interrupted run (the run id plus the last + * cursor seen), or null when there is nothing to resume. Apps can persist this + * to resume across a full reload; in-session reconnects use it automatically + * via {@link maybeAutoResume}. + */ + getResumeState(): { runId: string; cursor: string } | null { + return this.lastResume ? { ...this.lastResume } : null + } + + /** + * Resume a run by replaying its persisted events after the last cursor, then + * continuing live — without re-sending messages. Uses the supplied state, or + * the tracked in-session state. No-op (returns false) when there is nothing to + * resume or a stream is already in flight. + */ + resume(state?: { runId: string; cursor: string }): Promise { + const target = state ?? this.lastResume + if (!target || this.isLoading) return Promise.resolve(false) + this.pendingResumeRunId = target.runId + this.pendingResumeCursor = target.cursor + return this.streamResponse() + } + + /** + * Auto-resume hook for framework integrations to call on mount / when the tab + * comes back online. Honors the `autoResume` option (default true) and only + * fires when an interrupted run is tracked and no stream is in flight. + */ + maybeAutoResume(): Promise { + if (!this.autoResume || this.isLoading || !this.lastResume) { + return Promise.resolve(false) + } + return this.resume() + } + private generateUniqueId(prefix: string): string { return `${prefix}-${Date.now()}-${Math.random().toString(36).substring(7)}` } @@ -696,6 +765,7 @@ export class ChatClient< // per-run error only clears that run, while a runId-less RUN_ERROR is // treated as a session-level error that clears every active run. this.updateRunLifecycle(chunk) + this.observeResumeCursor(chunk) // Yield control back to event loop for UI updates await new Promise((resolve) => setTimeout(resolve, 0)) } @@ -854,7 +924,14 @@ export class ChatClient< // Track generation so a superseded stream's cleanup doesn't clobber the new one const generation = ++this.streamGeneration - const runId = `run-${Date.now()}-${Math.random().toString(36).slice(2, 8)}` + // Resuming reuses the original runId so the server replays that run's events. + const resumeRunId = this.pendingResumeRunId + const resumeCursor = this.pendingResumeCursor + this.pendingResumeRunId = null + this.pendingResumeCursor = null + const runId = + resumeRunId ?? + `run-${Date.now()}-${Math.random().toString(36).slice(2, 8)}` this.currentRunId = runId this.setIsLoading(true) @@ -945,6 +1022,7 @@ export class ChatClient< : { type: 'object' }, })), forwardedProps: { ...mergedBody }, + ...(resumeCursor ? { cursor: resumeCursor } : {}), } this.devtoolsBridge.beginRun(runContext.runId, this.threadId) activeDevtoolsRunId = runContext.runId diff --git a/packages/ai-client/src/connection-adapters.ts b/packages/ai-client/src/connection-adapters.ts index 3c4010047..56e4d2e1c 100644 --- a/packages/ai-client/src/connection-adapters.ts +++ b/packages/ai-client/src/connection-adapters.ts @@ -199,6 +199,12 @@ export interface RunAgentInputContext { threadId: string runId: string parentRunId?: string + /** + * Resume cursor. When set, the request resumes `runId` — the server replays + * persisted events after this cursor (see `chat({ cursor })`). On a resume the + * client sends no new messages. + */ + cursor?: string /** Client-declared tools to advertise in the request payload. */ clientTools?: Array<{ name: string @@ -443,6 +449,7 @@ function buildRunAgentInputBody( ...(runContext?.parentRunId !== undefined && { parentRunId: runContext.parentRunId, }), + ...(runContext?.cursor !== undefined && { cursor: runContext.cursor }), state: {}, messages: wireMessages, tools: runContext?.clientTools ?? [], diff --git a/packages/ai-client/src/types.ts b/packages/ai-client/src/types.ts index fa00811d7..d5cd7d3c4 100644 --- a/packages/ai-client/src/types.ts +++ b/packages/ai-client/src/types.ts @@ -391,6 +391,14 @@ export interface ChatClientBaseOptions< */ threadId?: string + /** + * Whether to auto-resume an interrupted run when {@link maybeAutoResume} is + * called (e.g. by a framework integration on mount / when the tab comes back + * online). Requires server-side persistence so the run's events can be + * replayed by `runId + cursor`. Defaults to `true`; set `false` to opt out. + */ + autoResume?: boolean + /** * Arbitrary client-controlled JSON forwarded to the server in the * AG-UI `RunAgentInput.forwardedProps` field. Use this for per-session diff --git a/packages/ai-client/tests/chat-client-resume.test.ts b/packages/ai-client/tests/chat-client-resume.test.ts new file mode 100644 index 000000000..2c5583f9a --- /dev/null +++ b/packages/ai-client/tests/chat-client-resume.test.ts @@ -0,0 +1,130 @@ +import { describe, expect, it } from 'vitest' +import { EventType } from '@tanstack/ai/client' +import { ChatClient } from '../src/chat-client' +import type { + ConnectConnectionAdapter, + RunAgentInputContext, +} from '../src/connection-adapters' +import type { StreamChunk } from '@tanstack/ai/client' + +/** + * Adapter that records each connect's runContext and yields scripted chunks. + * A script can be a function of the live `runContext` (so a test can emit a + * RUN_FINISHED carrying the same runId the client generated and passed in). + */ +type Script = + | Array + | ((ctx: RunAgentInputContext | undefined) => Array) + +function recordingAdapter(scripts: Array