feat: Add @n8n/agents package (#27560)

2026-04-21 15:47:20 +00:00 · 2026-03-26 13:32:46 +02:00 · 2026-03-26 13:32:46 +02:00 · 58fbaf4a88
commit 58fbaf4a88
parent d3e45bc126
147 changed files with 24961 additions and 227 deletions
--- a/packages/@n8n/agents/.env.example
+++ b/packages/@n8n/agents/.env.example
@ -0,0 +1,2 @@
+OPENAI_API_KEY=
+ANTHROPIC_API_KEY=
--- a/packages/@n8n/agents/AGENTS.md
+++ b/packages/@n8n/agents/AGENTS.md
@ -0,0 +1,138 @@
+# AGENTS.md
+
+Conventions for the `@n8n/agents` package.
+
+## Code Style
+
+- **No `_` prefix on private properties** — use `private` access modifier
+  without underscore. Write `private name: string`, not `private _name: string`.
+- **Builder pattern with lazy build** — all public primitives use a fluent
+  builder API. **User code never calls `.build()`**. Builders are passed
+  directly to the consuming method (e.g. `agent.tool(myTool)`) which calls
+  `.build()` internally. Agent and Network have `run()`/`stream()` directly
+  on the class, which lazy-build via `ensureBuilt()` on first call. `build()`
+  is `protected` on Agent and Network to keep it out of the public API.
+- **Zod for schemas** — all input/output schemas use Zod.
+
+## Package Structure
+
+```
+src/
+  index.ts              # Public API barrel export
+  types/                # Public TypeScript types
+    index.ts            # Re-exports consumable types
+    telemetry.ts
+    sdk/                # Types aligned with builders (agent, eval, guardrail, mcp, memory, message, provider, tool)
+    runtime/            # Serializable runtime shapes (events, message lists)
+    utils/              # JSON typing helpers re-exported with public types
+  sdk/                  # Fluent builders and SDK entry points
+    agent.ts            # Agent builder
+    catalog.ts          # Provider catalog fetch
+    eval.ts             # Evaluation primitives
+    evaluate.ts         # Evaluation runner over agents + dataset
+    guardrail.ts        # Guardrail builder
+    mcp-client.ts       # MCP client integration
+    memory.ts           # Memory builder
+    message.ts          # LLM/DB message helpers
+    network.ts          # Network builder
+    provider-tools.ts   # Provider-defined tool factories
+    telemetry.ts        # Telemetry builder (OTel, redaction)
+    tool.ts             # Tool builder
+    verify.ts           # Verification utilities
+  evals/                # Built-in eval scorers; exported as namespace `evals` from index
+  runtime/              # Internal — never exported from index.ts
+    agent-runtime.ts    # Core agent execution engine (AI SDK)
+    tool-adapter.ts     # Tool execution, branded suspend detection
+    stream.ts           # Streaming helpers
+    model-factory.ts    # Model instantiation
+    memory-store.ts     # Conversation / working-memory persistence hooks
+    working-memory.ts   # In-run working memory
+    message-list.ts     # Message list + serialization for agent loop
+    messages.ts         # Message normalization
+    mcp-connection.ts   # MCP connection lifecycle
+    mcp-tool-resolver.ts
+    run-state.ts        # Run / checkpoint state
+    event-bus.ts        # Internal agent events
+    runtime-helpers.ts
+    title-generation.ts
+    strip-orphaned-tool-messages.ts
+    logger.ts
+  storage/              # Optional persisted memory backends (exported)
+    sqlite-memory.ts
+    postgres-memory.ts
+  workspace/            # Workspace, sandbox, filesystem, built-in tools (exported)
+  integrations/         # Optional integrations (exported where applicable)
+    langsmith.ts        # LangSmith telemetry adapter (peer `langsmith`)
+  utils/                # Internal helpers (e.g. Zod utilities); not barrel-exported
+examples/
+  basic-agent.ts        # Sample snippet; included in format/lint paths
+docs/
+  agent-runtime-architecture.md  # In-package runtime notes
+```
+
+The **`index.ts`** surface also exports `Workspace` / sandbox / filesystem types,
+`SqliteMemory` / `PostgresMemory`, `LangSmithTelemetry`, and `evals` alongside the
+core SDK builders.
+
+Optional **peer dependencies** (telemetry): `langsmith`, `@opentelemetry/sdk-trace-node`,
+`@opentelemetry/sdk-trace-base`, `@opentelemetry/exporter-trace-otlp-http` — all
+optional; install only when wiring that telemetry.
+
+## Credential Pattern
+
+Agents declare credential requirements via `.credential('name')`. The execution
+engine resolves the name to an API key and injects it into the model config.
+User code never touches raw API keys.
+
+```typescript
+const agent = new Agent('assistant')
+  .model('anthropic/claude-sonnet-4-5')
+  .credential('anthropic')
+  .instructions('You are helpful.');
+```
+
+## Engine Injection (EngineAgent)
+
+The execution engine extends `Agent` and overrides `protected build()` to
+inject infrastructure (checkpoint storage, credentials) before calling
+`super.build()`. This is the pattern for all engine-level concerns:
+
+```typescript
+class EngineAgent extends Agent {
+  build() {
+    this.checkpoint(store);
+    const cred = this.declaredCredential;
+    if (cred) this.resolvedApiKey = resolve(cred);
+    return super.build();
+  }
+}
+```
+
+
+## Testing
+
+- Unit tests live in `src/__tests__/`, integration tests in `src/__tests__/integration/`
+- Unit tests use Jest (`pnpm test` / `pnpm test:unit`)
+- Integration tests use Vitest (`pnpm test:integration`) with real LLM calls
+  - A `.env` file at the package root is loaded automatically by the vitest config.
+    Always assume it exists when running integration tests. Never commit it.
+  - Required keys:
+    - `ANTHROPIC_API_KEY` — all integration tests
+    - `OPENAI_API_KEY` — semantic recall tests (embeddings)
+  - Tests skip automatically when the required API key is not set
+- Run from the package directory: `cd packages/@n8n/agents && pnpm test`
+
+## Documentation
+
+- Runtime architecture notes: `docs/agent-runtime-architecture.md` (this package).
+- Spec-driven work in the wider repo may use `.claude/specs/` (see repo
+  `.claude/skills/spec-driven-development`).
+
+## Building
+
+```bash
+cd packages/@n8n/agents
+pnpm build       # rimraf dist && tsc -p tsconfig.build.json → dist/
+pnpm typecheck   # tsc --noEmit
+pnpm test        # jest (unit)
+```
--- a/packages/@n8n/agents/docs/agent-runtime-architecture.md
+++ b/packages/@n8n/agents/docs/agent-runtime-architecture.md
@ -0,0 +1,451 @@
+# Agent Runtime Architecture
+
+This document describes the internal architecture of the `@n8n/agents` agent
+runtime — the execution engine that drives a single agent turn from input to
+final response.
+
+---
+
+## Overview
+
+`AgentRuntime` (`src/runtime/agent-runtime.ts`) is the core execution engine
+for a single agent turn. It uses the Vercel AI SDK directly (`generateText` /
+`streamText`) and is responsible for:
+
+- Building the LLM message context (memory history, semantic recall, working
+  memory in the system prompt, user input)
+- Stripping orphaned tool-call/tool-result pairs before LLM calls
+  (`stripOrphanedToolMessages`)
+- Running the agentic tool-call loop (default **20** iterations,
+  `MAX_LOOP_ITERATIONS`)
+- **Configurable tool-call concurrency** — tools in one LLM turn run in batches
+  of `toolCallConcurrency` (default `1`; `Infinity` runs all executable calls
+  in parallel)
+- Suspending and resuming runs for Human-in-the-Loop (HITL) **and** for tools
+  that return a branded suspend result (`suspendSchema` / `resumeSchema`)
+- Persisting new messages to a memory store at the end of each completed turn,
+  optionally saving **embeddings** for semantic recall
+- Extracting and persisting **working memory** from assistant output when
+  configured
+- Optional **structured output** (`Output.object` + Zod), **thinking** /
+  reasoning provider options, **title generation**, and **telemetry** (AI SDK
+  `experimental_telemetry`)
+- **Token usage and cost** (catalog pricing via `getModelCost` / `computeCost`)
+- Emitting lifecycle events via `AgentEventBus`
+- Tracking run state (`idle` → `running` → `success / failed / suspended / cancelled`)
+
+There are two parallel execution paths — non-streaming (`generate`) and
+streaming (`stream`) — that mirror each other in structure.
+
+```mermaid
+graph TD
+    A[User Input] --> B[normalizeInput]
+    B --> C[buildMessageList]
+    C --> D{generate or stream?}
+    D -->|generate| E[runGenerateLoop]
+    D -->|stream| F[startStreamLoop → runStreamLoop]
+    E --> G[saveToMemory]
+    F --> G
+    G --> H[Return Result]
+```
+
+---
+
+## Public API — BuiltAgent
+
+`Agent` implements `BuiltAgent`, which exposes the full public surface:
+
+| Method | Description |
+|--------|-------------|
+| `generate(input, options?)` | Non-streaming run; returns `GenerateResult` (errors often surface as `finishReason: 'error'` and `error` instead of throwing) |
+| `stream(input, options?)` | Streaming run; returns `StreamResult` with `runId` and `stream` |
+| `resume(method, data, options)` | Resume a suspended tool with payload `data`; `options` must include `runId` and `toolCallId` |
+| `approve(method, options)` | HITL approval — calls `resume` with `{ approved: true }` |
+| `deny(method, options)` | HITL decline — calls `resume` with `{ approved: false }` |
+| `on(event, handler)` | Register a lifecycle event handler |
+| `abort()` | Cancel the currently running agent |
+| `getState()` | Return the latest `SerializableAgentState` snapshot |
+| `asTool(description)` | Wrap the agent as a `BuiltTool` for multi-agent composition |
+
+`ExecutionOptions` includes `abortSignal?: AbortSignal`, forwarded into
+`AgentEventBus.resetAbort()` so callers can cancel via an external signal as
+well as `agent.abort()`.
+
+---
+
+## Event system
+
+### AgentEventBus
+
+`AgentEventBus` (`src/runtime/event-bus.ts`) is the internal publish/subscribe
+channel shared between `Agent` (registers handlers via `on()`) and
+`AgentRuntime` (emits events during the loop). A single bus instance is created
+when the SDK wires the runtime and passed in via `AgentRuntimeConfig`.
+
+```mermaid
+flowchart LR
+    UserCode -->|"agent.on(event, handler)"| AgentEventBus
+    AgentEventBus -->|"passed via config"| AgentRuntime
+    AgentRuntime -->|"bus.emit(data)"| AgentEventBus
+    AgentEventBus -->|"calls handlers synchronously"| UserCode
+```
+
+Handlers have the signature `(data: AgentEventData) => void` — there is **no**
+separate “controls” object; cancellation is done with `agent.abort()` on the
+same bus that holds the `AbortController`.
+
+`AgentMiddleware` in `types/runtime/event.ts` is a small alias type
+(`on` mirrors the agent) for future middleware-style composition.
+
+### Event types
+
+| Event | When emitted | Payload |
+|-------|----------------|---------|
+| `AgentStart` | Start of `initRun`, right after `status: running`; before `ensureModelCost` / `buildMessageList` | — |
+| `AgentEnd` | Successful completion after persistence / cleanup; payload is assistant-facing messages (`finalized.messages` in `generate`, `list.responseDelta()` in `stream`) | `{ messages }` |
+| `TurnStart` | Top of each loop iteration, before the LLM call | — |
+| `TurnEnd` | After tool calls for the iteration are processed; requires an assistant message in the new messages | `{ message, toolResults }` |
+| `ToolExecutionStart` | Before `processToolCall` runs the handler | `{ toolCallId, toolName, args }` |
+| `ToolExecutionEnd` | After the tool returns, errors, or is satisfied from an existing AI SDK tool-result | `{ toolCallId, toolName, result, isError }` |
+| `Error` | Unhandled failures (not user **abort**); also emitted on some stream failures | `{ message, error }` |
+
+---
+
+## abort()
+
+`agent.abort()` synchronously aborts the internal `AbortController`. The
+resulting signal is passed to `generateText` / `streamText` as `abortSignal`
+so in-flight HTTP cancels promptly. The loop also checks `bus.isAborted` at
+batch boundaries.
+
+`AgentEventBus.resetAbort(externalSignal?)` runs at the start of each run: it
+replaces the controller and, if `ExecutionOptions.abortSignal` is set, forwards
+that signal’s abort to the internal controller.
+
+### Abort behaviour
+
+| Mode | Behaviour on abort |
+|------|-------------------|
+| `generate` | Catches abort and returns `{ runId, messages, finishReason: 'error', ... }` without emitting `AgentEvent.Error` |
+| `stream` | Writes `{ type: 'error', error }` then finishes / closes cleanly |
+
+State becomes `cancelled`. `resetAbort()` supplies a fresh controller per run
+so the same `Agent` instance can run again.
+
+---
+
+## getState()
+
+`agent.getState()` returns a shallow copy of `SerializableAgentState`. Before
+the first `generate()` / `stream()`, the `Agent` builder returns a minimal idle
+state with an empty `messageList` (`messages`, `historyIds`, `inputIds`,
+`responseIds` all empty).
+
+### State machine
+
+```mermaid
+stateDiagram-v2
+    [*] --> idle: constructed
+    idle --> running: generate() / stream() / resume()
+    running --> success: loop completes normally
+    running --> failed: unhandled error
+    running --> suspended: tool suspends (HITL or suspend/resume)
+    running --> cancelled: abort() / external signal
+    suspended --> running: resume() / approve() / deny() loads checkpoint
+```
+
+### AgentRunState values
+
+| Status | Meaning |
+|--------|---------|
+| `idle` | No run yet (or builder before first lazy build) |
+| `running` | Loop in progress |
+| `success` | Turn finished and checkpoint cleaned up when applicable |
+| `failed` | Unrecoverable error path |
+| `suspended` | Awaiting resume (checkpoint stored under `runId`) |
+| `cancelled` | Aborted |
+| `waiting` | Reserved |
+
+### SerializableAgentState
+
+Important fields (see `types/sdk/agent.ts`):
+
+```typescript
+interface SerializableAgentState {
+  persistence?: AgentPersistenceOptions; // threadId + resourceId when using memory
+  status: AgentRunState;
+  messageList: SerializedMessageList;
+  resumeData?: AgentResumeData;
+  pendingToolCalls: Record<string, PendingToolCall>;
+  finishReason?: FinishReason;
+  usage?: TokenUsage;
+  executionOptions?: PersistedExecutionOptions; // maxIterations only — persisted on suspend
+}
+```
+
+`PendingToolCall` distinguishes tools already suspended (`suspended: true`,
+`suspendPayload`, `resumeSchema`) from calls not yet executed (`suspended:
+false`) when a batch stops at the first suspension.
+
+---
+
+## asTool()
+
+`agent.asTool(description)` wraps the agent as a `BuiltTool`. The handler calls
+`agent.generate(input, { telemetry: ctx.parentTelemetry })`, collects assistant
+text, and returns `{ result: string }`. When the sub-run produces usage,
+results are wrapped so the parent runtime can merge **`SubAgentUsage`** and
+**`totalCost`** into the parent `GenerateResult` / stream `finish` chunk.
+
+---
+
+## Message types
+
+| Type | Definition | Purpose |
+|------|------------|---------|
+| `AgentMessage` | `Message \| CustomMessage` | Internal representation; custom messages are UI-facing |
+| `ModelMessage` (AI SDK) | Roles wired to the provider | LLM-facing; custom messages never appear here |
+
+**Custom messages** are stripped for the model via `filterLlmMessages()` before
+`toAiMessages()`.
+
+`messages.ts` provides `toAiMessages`, `fromAiMessages`, and consumers rely on
+`filterLlmMessages` from `sdk/message.ts`.
+
+**Tool results vs model:** optional `BuiltTool.toModelOutput` maps the stored /
+event result before building the `tool-result` the LLM sees; `toMessage` still
+uses the raw result for custom DB messages.
+
+---
+
+## AgentMessageList
+
+`AgentMessageList` (`src/runtime/message-list.ts`) is the central structure for
+one turn. It keeps a single append-only array and **three Sets** for
+provenance: history, input, response.
+
+### Sources
+
+| Source | Added by | `turnDelta()` | `responseDelta()` | `forLlm()` |
+|--------|----------|---------------|-------------------|------------|
+| **history** | `addHistory()` | No | No | Yes (after filters) |
+| **input** | `addInput()` | Yes | No | Yes (after filters) |
+| **response** | `addResponse()` | Yes | Yes | Yes (after filters) |
+
+### Key methods
+
+```
+forLlm(baseInstructions, instructionProviderOptions?)
+  → [system + working memory block, ...toAiMessages(filterLlm(stripOrphaned(all)))]
+turnDelta()      → input ∪ response messages (memory persistence)
+responseDelta()  → response only (user-facing / GenerateResult.messages)
+serialize()      → { messages, historyIds, inputIds, responseIds }
+deserialize()    → restores all three sets via stable message ids
+```
+
+### Serialization
+
+Serialized state stores **message id arrays** per set (`historyIds`,
+`inputIds`, `responseIds`), not a single `historyCount`. After a round-trip,
+history / input / response classification is fully restored — required for
+correct `turnDelta()` after suspend/resume.
+
+`stripOrphanedToolMessages` runs on loaded history and inside `forLlm()` so
+incomplete tool pairs do not reach the model.
+
+---
+
+## Agentic loop
+
+Both `runGenerateLoop` and `runStreamLoop` follow the same high-level pattern:
+emit `TurnStart`, call the model with `list.forLlm(...)`, append assistant /
+tool traffic via `addResponse`, process tool calls through
+`iterateToolCallsConcurrent` (batched by `toolCallConcurrency`), handle
+suspension / persistence, repeat until finish or max iterations.
+
+### Tool execution and concurrency
+
+- Executable tool calls (non–provider-executed) are processed in windows of size
+  `this.concurrency` (`toolCallConcurrency ?? 1`).
+- Each window uses `Promise.allSettled` so all tools in the batch settle; a
+  suspension in the batch stops **subsequent** batches and records remaining
+  calls in `pending` without `suspendPayload`.
+- **HITL** and **suspend/resume** flows share the same pending-map machinery;
+  `processToolCall` validates JSON Schema or Zod **input** schemas (Ajv / Zod)
+  before invoking the handler.
+
+### Loop invariants
+
+1. **Single list** — `addResponse` accumulates assistant, tool, and custom
+   messages for the turn.
+2. **System prompt** — rebuilt each call via `forLlm`; working memory content
+   is injected there, not as separate list rows.
+3. **Suspension preserves pending calls** — remaining calls in the batch and
+   later calls are recorded for resume.
+4. **Max iterations** — default 20 (`MAX_LOOP_ITERATIONS`).
+5. **Abort** — checked between batches; signal passed into AI SDK calls.
+
+### Non-streaming vs streaming
+
+| Aspect | `runGenerateLoop` | `runStreamLoop` |
+|--------|-------------------|-----------------|
+| AI SDK | `generateText()` | `streamText()` |
+| Output | `GenerateResult` | `StreamChunk`s via `WritableStream` |
+| Errors | Returned on `GenerateResult` (`error`, `finishReason: 'error'`) for many paths | Error chunks + `closeStreamWithError` |
+| Suspension | `pendingSuspend` array on `GenerateResult` | `tool-call-suspended` chunks, then `finish` |
+
+---
+
+## HITL and suspend/resume
+
+**HITL (approval):** tools can require approval (`requiresApproval` /
+`needsApprovalFn`). The runtime treats approval outcomes like resume data:
+`approve()` / `deny()` delegate to `resume()` with `{ approved: true | false }`.
+
+**Programmatic suspend:** tools can return a branded suspend object; the runtime
+requires `resumeSchema` (Zod → JSON Schema for clients) and validates
+`suspendPayload` when `suspendSchema` is set.
+
+```mermaid
+sequenceDiagram
+    participant Caller
+    participant AgentRuntime
+    participant CheckpointStore
+    participant LLM
+
+    Caller->>AgentRuntime: generate/stream(input)
+    AgentRuntime->>LLM: generateText/streamText
+    LLM-->>AgentRuntime: tool calls
+    Note over AgentRuntime: Suspension: persist pendingToolCalls + messageList
+    AgentRuntime->>CheckpointStore: suspend(runId, state)
+    AgentRuntime-->>Caller: pendingSuspend / tool-call-suspended chunks
+
+    Caller->>AgentRuntime: resume/approve/deny(method, …)
+    AgentRuntime->>CheckpointStore: resume(runId) — load only
+    AgentRuntime->>AgentRuntime: processToolCall / iteratePendingToolCallsConcurrent
+    AgentRuntime->>LLM: Continue loop if needed
+    AgentRuntime->>CheckpointStore: complete(runId) when finished
+```
+
+With **concurrency > 1**, multiple tools may suspend in the same turn; the
+stream can emit **multiple** `tool-call-suspended` chunks, and `GenerateResult`
+can carry **`pendingSuspend`** with multiple entries.
+
+---
+
+## RunStateManager
+
+`RunStateManager` (`src/runtime/run-state.ts`) persists suspended runs through
+a **`CheckpointStore`**:
+
+- Default: in-memory `MemoryCheckpointStore` when `checkpointStorage` is
+  `'memory'` or omitted.
+- Custom: pass a `CheckpointStore` implementation for durability.
+
+`suspend(runId, state)` writes the state. `resume(runId)` **loads** the state
+and returns it with `status: 'running'`; it does **not** delete the key.
+`complete(runId)` deletes the checkpoint when the run finishes without remaining
+suspensions.
+
+### Known limitations
+
+In-memory checkpoints grow until `complete()` runs. Production stores should
+implement TTL or eviction as needed.
+
+---
+
+## Memory persistence
+
+At end of turn, `saveToMemory()` uses `list.turnDelta()` and
+`saveMessagesToThread`. If **semantic recall** is configured with an embedder
+and `memory.saveEmbeddings`, new messages are embedded and stored.
+
+**Working memory:** when configured, the runtime parses `<working_memory>` …
+`</working_memory>` regions from assistant text, validates structured JSON if a
+schema exists, strips the tags from the visible message, and asynchronously
+persists via `memory.saveWorkingMemory`.
+
+**Thread titles:** `titleGeneration` triggers `generateThreadTitle` (fire-and-forget)
+after a successful save when persistence and memory are present.
+
+---
+
+## Stream architecture
+
+The streaming path uses a `TransformStream`: `startStreamLoop` returns the
+readable side immediately; the loop writes chunks in the background.
+
+`stream.ts` **`convertChunk`** maps AI SDK v6 `TextStreamPart` values to our
+`StreamChunk` union (including `finish-step` / `finish` consolidation).
+
+### StreamChunk types (representative)
+
+| Type | Content |
+|------|---------|
+| `text-delta` | Incremental text |
+| `reasoning-delta` | Thinking / reasoning text |
+| `tool-call-delta` | Streaming tool name / arguments |
+| `message` | Full assistant or tool message |
+| `tool-call-suspended` | Suspension: `runId`, `toolCallId`, tool metadata, optional `resumeSchema`, `suspendPayload` |
+| `finish` | `finishReason`, `usage` (with optional **cost**), `model`, optional **`structuredOutput`**, **`subAgentUsage`**, **`totalCost`** |
+| `error` | Failure or abort |
+
+---
+
+## File map
+
+```
+src/
+  runtime/
+    agent-runtime.ts              — AgentRuntime (generate/stream/resume loops, HITL, state)
+    event-bus.ts                  — AgentEventBus + AbortController
+    message-list.ts               — AgentMessageList
+    run-state.ts                  — RunStateManager, generateRunId
+    memory-store.ts               — saveMessagesToThread helper
+    messages.ts                   — AI SDK message conversion
+    model-factory.ts              — createModel / createEmbeddingModel
+    tool-adapter.ts               — buildToolMap, executeTool, toAiSdkTools, suspend / agent-result guards
+    stream.ts                     — convertChunk, toTokenUsage
+    runtime-helpers.ts            — normalizeInput, usage merge, stream error helpers, …
+    working-memory.ts             — instruction text, parse/filter for working_memory tags
+    strip-orphaned-tool-messages.ts
+    title-generation.ts
+    logger.ts
+  types/
+    sdk/agent.ts                  — BuiltAgent, GenerateResult, StreamChunk, SerializableAgentState, …
+    sdk/tool.ts, sdk/memory.ts, … — Public SDK contracts
+    runtime/event.ts              — AgentEvent enum + AgentEventData
+    runtime/message-list.ts       — SerializedMessageList
+    telemetry.ts                  — BuiltTelemetry shape
+```
+
+---
+
+## Design decisions (selected)
+
+### Set-based message list + id serialization
+
+Three Sets plus stable **`id` on each message** allow `turnDelta()` /
+`responseDelta()` without losing custom tool messages, and checkpointed runs
+restore history vs turn data correctly after resume.
+
+### `responseDelta()` vs `turnDelta()`
+
+User input must not appear in `GenerateResult.messages`; memory persistence
+must store the full turn including input — hence two views over the same list.
+
+### Concurrency preserves suspension semantics
+
+Batches run in parallel when configured, but the first suspension still
+captures **unexecuted** tool calls in `pending` so nothing is dropped. Approval
+tools and programmatic suspends use the same pending-map format.
+
+### Why one event bus per agent
+
+The bus is shared between `Agent` and `AgentRuntime` so `on()` registrations and
+`abort()` always target the controller used by the active loop.
+
+### Why `AbortSignal`
+
+Signals cancel HTTP immediately in the AI SDK and compose with caller-provided
+`abortSignal` via `resetAbort`.
--- a/packages/@n8n/agents/eslint.config.mjs
+++ b/packages/@n8n/agents/eslint.config.mjs
@ -0,0 +1,23 @@
+import { defineConfig } from 'eslint/config';
+import { nodeConfig } from '@n8n/eslint-config/node';
+
+export default defineConfig(
+	{ ignores: ['examples/**', 'vitest.integration.config.*', 'src/__tests__/fixtures/**'] },
+	nodeConfig,
+	{
+		rules: {
+			'unicorn/filename-case': ['error', { case: 'kebabCase' }],
+			'@typescript-eslint/naming-convention': ['error', {
+				'selector': 'enumMember',
+				'format': ['UPPER_CASE', 'PascalCase'],
+			}]
+		},
+	},
+	{
+		files: ['src/__tests__/integration/**/*.ts'],
+		rules: {
+			'@typescript-eslint/require-await': 'off',
+			'n8n-local-rules/no-uncaught-json-parse': 'off',
+		},
+	},
+);
--- a/packages/@n8n/agents/examples/basic-agent.ts
+++ b/packages/@n8n/agents/examples/basic-agent.ts
@ -0,0 +1,153 @@
+/**
+ * @n8n/agents — Full API Demonstration
+ *
+ * This example demonstrates the complete builder-pattern API for creating
+ * and running AI agents. It shows: tools, agents, memory, guardrails,
+ * scorers, multi-agent patterns (agent-as-tool), and tool interrupts.
+ *
+ * To run with real LLM calls, set ANTHROPIC_API_KEY.
+ * Without keys, the runtime will throw on actual LLM calls.
+ */
+import { z } from 'zod';
+
+import { Agent, Guardrail, Memory, Tool } from '../src';
+
+// ---------------------------------------------------------------------------
+// Tools
+// ---------------------------------------------------------------------------
+
+const searchTool = new Tool('web-search')
+	.description('Search the web for information on a topic')
+	.input(
+		z.object({
+			query: z.string().describe('The search query'),
+			maxResults: z.number().default(3).describe('Maximum results to return'),
+		}),
+	)
+	.output(
+		z.object({
+			results: z.array(
+				z.object({
+					title: z.string(),
+					snippet: z.string(),
+				}),
+			),
+		}),
+	)
+	.handler(async ({ query, maxResults }) => ({
+		results: Array.from({ length: maxResults }, (_, i) => ({
+			title: `Result ${i + 1} for "${query}"`,
+			snippet: `This is a mock search result about ${query}.`,
+		})),
+	}));
+
+const writeFileTool = new Tool('write-file')
+	.description('Write content to a file (suspends for confirmation)')
+	.input(
+		z.object({
+			path: z.string().describe('File path to write to'),
+			content: z.string().describe('Content to write'),
+		}),
+	)
+	.suspend(z.object({ message: z.string(), severity: z.string() }))
+	.resume(z.object({ approved: z.boolean() }))
+	.handler(async ({ path, content }, ctx) => {
+		if (!ctx.resumeData) {
+			return await ctx.suspend({ message: `Write to "${path}"?`, severity: 'warning' });
+		}
+		if (!ctx.resumeData.approved) return { written: false };
+		console.log(`  [Mock] Would write ${content.length} chars to ${path}`);
+		return { written: true };
+	});
+
+// ---------------------------------------------------------------------------
+// Memory
+// ---------------------------------------------------------------------------
+
+const memory = new Memory()
+	.lastMessages(20)
+	.semanticRecall({ topK: 4, messageRange: { before: 1, after: 1 } });
+
+// ---------------------------------------------------------------------------
+// Agents
+// ---------------------------------------------------------------------------
+
+const researcher = new Agent('researcher')
+	.model('anthropic/claude-sonnet-4')
+	.instructions(
+		'You are a research assistant. Search for information and return structured findings.',
+	)
+	.tool(searchTool)
+	.memory(memory)
+	.inputGuardrail(
+		new Guardrail('injection-detector').type('prompt-injection').strategy('block').threshold(0.8),
+	);
+
+const writer = new Agent('writer')
+	.model('anthropic/claude-sonnet-4')
+	.instructions('You write clear, engaging content based on research provided to you.')
+	.tool(writeFileTool)
+	.checkpoint('memory');
+
+// ---------------------------------------------------------------------------
+// Multi-Agent: Agent as Tool
+// ---------------------------------------------------------------------------
+
+const orchestrator = new Agent('orchestrator')
+	.model('anthropic/claude-sonnet-4')
+	.instructions(
+		'You coordinate research and writing. Delegate research to the researcher and writing to the writer.',
+	)
+	.tool(researcher.asTool('Delegate research tasks to the research specialist'))
+	.tool(writer.asTool('Delegate writing tasks to the content writer'));
+
+// ---------------------------------------------------------------------------
+// Execution
+// ---------------------------------------------------------------------------
+
+async function main() {
+	console.log('=== @n8n/agents ===\n');
+
+	// --- 1. Single agent generate ---
+	console.log('1. Single agent generate:');
+	try {
+		const result = await researcher.generate('Find information about RAG architectures', {
+			persistence: {
+				resourceId: 'user-123',
+				threadId: 'session-1',
+			},
+		});
+		const text = result.messages
+			.flatMap((m) => ('content' in m ? m.content : []))
+			.filter((c) => c.type === 'text')
+			.map((c) => ('text' in c ? c.text : ''))
+			.join('');
+		console.log(`   Result: ${text.slice(0, 100)}...`);
+		console.log(
+			`   Usage: ${result.usage?.promptTokens} in, ${result.usage?.completionTokens} out`,
+		);
+	} catch (error) {
+		console.log(`   (Expected) Error: ${(error as Error).message}`);
+		console.log('   (Set ANTHROPIC_API_KEY to run with real LLM calls)');
+	}
+
+	// --- 2. Orchestrator (agent-as-tool pattern) ---
+	console.log('\n2. Orchestrator (agent-as-tool pattern):');
+	try {
+		const orchResult = await orchestrator.generate(
+			'Research RAG architectures and write a summary',
+		);
+		const text = orchResult.messages
+			.flatMap((m) => ('content' in m ? m.content : []))
+			.filter((c) => c.type === 'text')
+			.map((c) => ('text' in c ? c.text : ''))
+			.join('');
+		console.log(`   Result: ${text.slice(0, 100)}...`);
+	} catch (error) {
+		console.log(`   (Expected) Error: ${(error as Error).message}`);
+	}
+
+	console.log('\n=== Complete ===');
+}
+
+main().catch(console.error);
--- a/packages/@n8n/agents/jest.config.js
+++ b/packages/@n8n/agents/jest.config.js
@ -0,0 +1,7 @@
+/** @type {import('jest').Config} */
+const base = require('../../../jest.config');
+
+module.exports = {
+	...base,
+	testPathIgnorePatterns: [...(base.testPathIgnorePatterns || []), '/integration/'],
+};
--- a/packages/@n8n/agents/package.json
+++ b/packages/@n8n/agents/package.json
@ -0,0 +1,65 @@
+{
+  "name": "@n8n/agents",
+  "version": "0.1.0",
+  "description": "AI agent SDK for n8n's code-first execution engine",
+  "main": "dist/index.js",
+  "module": "dist/index.js",
+  "types": "dist/index.d.ts",
+  "files": [
+    "dist/**/*"
+  ],
+  "scripts": {
+    "clean": "rimraf dist .turbo",
+    "dev": "pnpm watch",
+    "typecheck": "tsc --noEmit",
+    "build": "rimraf dist && tsc -p tsconfig.build.json",
+    "format": "biome format --write src examples",
+    "format:check": "biome ci src examples",
+    "lint": "eslint . --quiet",
+    "lint:fix": "eslint . --fix",
+    "watch": "tsc -p tsconfig.build.json --watch",
+    "test": "jest",
+    "test:unit": "jest",
+    "test:dev": "jest --watch",
+    "test:integration": "vitest run --config vitest.integration.config.mjs"
+  },
+  "dependencies": {
+    "@ai-sdk/anthropic": "^3.0.58",
+    "@ai-sdk/google": "^3.0.43",
+    "@ai-sdk/openai": "^3.0.41",
+    "@ai-sdk/xai": "^3.0.67",
+    "@ai-sdk/provider-utils": "^4.0.21",
+    "@modelcontextprotocol/sdk": "1.26.0",
+    "ajv": "^8.18.0",
+    "@libsql/client": "^0.17.0",
+    "ai": "^6.0.116",
+    "pg": "catalog:",
+    "zod": "catalog:"
+  },
+  "peerDependencies": {
+    "langsmith": ">=0.3.0",
+    "@opentelemetry/sdk-trace-node": ">=1.0.0",
+    "@opentelemetry/sdk-trace-base": ">=1.0.0",
+    "@opentelemetry/exporter-trace-otlp-http": ">=0.50.0"
+  },
+  "peerDependenciesMeta": {
+    "langsmith": {
+      "optional": true
+    },
+    "@opentelemetry/sdk-trace-node": {
+      "optional": true
+    },
+    "@opentelemetry/sdk-trace-base": {
+      "optional": true
+    },
+    "@opentelemetry/exporter-trace-otlp-http": {
+      "optional": true
+    }
+  },
+  "devDependencies": {
+    "@n8n/typescript-config": "workspace:*",
+    "@types/json-schema": "^7.0.15",
+    "@types/pg": "^8.15.6",
+    "testcontainers": "11.11.0"
+  }
+}
--- a/packages/@n8n/agents/src/tests/agent-runtime.test.ts
+++ b/packages/@n8n/agents/src/tests/agent-runtime.test.ts
--- a/packages/@n8n/agents/src/tests/event-bus.test.ts
+++ b/packages/@n8n/agents/src/tests/event-bus.test.ts
@ -0,0 +1,51 @@
+import { AgentEventBus } from '../runtime/event-bus';
+
+describe('AgentEventBus', () => {
+	describe('resetAbort', () => {
+		it('should create a fresh signal on reset', () => {
+			const bus = new AgentEventBus();
+			bus.resetAbort();
+			expect(bus.isAborted).toBe(false);
+			expect(bus.signal.aborted).toBe(false);
+		});
+
+		it('should respect agent.abort()', () => {
+			const bus = new AgentEventBus();
+			bus.resetAbort();
+			bus.abort();
+			expect(bus.isAborted).toBe(true);
+			expect(bus.signal.aborted).toBe(true);
+		});
+
+		it('should respect external abort signal', () => {
+			const bus = new AgentEventBus();
+			const external = new AbortController();
+			bus.resetAbort(external.signal);
+
+			expect(bus.isAborted).toBe(false);
+			external.abort();
+			expect(bus.isAborted).toBe(true);
+			expect(bus.signal.aborted).toBe(true);
+		});
+
+		it('should abort when either internal or external signal fires', () => {
+			const bus = new AgentEventBus();
+			const external = new AbortController();
+			bus.resetAbort(external.signal);
+
+			bus.abort();
+			expect(bus.isAborted).toBe(true);
+			expect(external.signal.aborted).toBe(false);
+		});
+
+		it('should allow reuse after reset', () => {
+			const bus = new AgentEventBus();
+			bus.resetAbort();
+			bus.abort();
+			expect(bus.isAborted).toBe(true);
+
+			bus.resetAbort();
+			expect(bus.isAborted).toBe(false);
+		});
+	});
+});
--- a/packages/@n8n/agents/src/tests/fixtures/mcp-stdio-server.mjs
+++ b/packages/@n8n/agents/src/tests/fixtures/mcp-stdio-server.mjs
@ -0,0 +1,82 @@
+/**
+ * Minimal MCP server for stdio transport integration tests.
+ * Spawned as a child process by mcp-stdio-transport.test.ts.
+ * Run with: node mcp-stdio-server.mjs
+ */
+import { Server } from '@modelcontextprotocol/sdk/server/index.js';
+import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
+import { ListToolsRequestSchema, CallToolRequestSchema } from '@modelcontextprotocol/sdk/types.js';
+
+// 1×1 transparent PNG in base64 (smallest valid PNG)
+const TINY_PNG =
+	'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==';
+
+const server = new Server(
+	{ name: 'test-stdio-server', version: '1.0.0' },
+	{ capabilities: { tools: {} } },
+);
+
+server.setRequestHandler(ListToolsRequestSchema, async () => ({
+	tools: [
+		{
+			name: 'echo',
+			description: 'Echo the message back as-is',
+			inputSchema: {
+				type: 'object',
+				properties: { message: { type: 'string', description: 'Message to echo' } },
+				required: ['message'],
+			},
+		},
+		{
+			name: 'add',
+			description: 'Add two numbers together',
+			inputSchema: {
+				type: 'object',
+				properties: {
+					a: { type: 'number', description: 'First number' },
+					b: { type: 'number', description: 'Second number' },
+				},
+				required: ['a', 'b'],
+			},
+		},
+		{
+			name: 'image',
+			description: 'Return a small image with a caption',
+			inputSchema: {
+				type: 'object',
+				properties: { caption: { type: 'string', description: 'Image caption' } },
+				required: ['caption'],
+			},
+		},
+	],
+}));
+
+server.setRequestHandler(CallToolRequestSchema, async (request) => {
+	const { name, arguments: args = {} } = request.params;
+
+	if (name === 'echo') {
+		return { content: [{ type: 'text', text: String(args.message ?? '') }] };
+	}
+
+	if (name === 'add') {
+		const sum = Number(args.a ?? 0) + Number(args.b ?? 0);
+		return { content: [{ type: 'text', text: String(sum) }] };
+	}
+
+	if (name === 'image') {
+		return {
+			content: [
+				{ type: 'text', text: String(args.caption ?? '') },
+				{ type: 'image', data: TINY_PNG, mimeType: 'image/png' },
+			],
+		};
+	}
+
+	return {
+		isError: true,
+		content: [{ type: 'text', text: `Unknown tool: ${name}` }],
+	};
+});
+
+const transport = new StdioServerTransport();
+await server.connect(transport);
--- a/packages/@n8n/agents/src/tests/inmemory-working-memory.test.ts
+++ b/packages/@n8n/agents/src/tests/inmemory-working-memory.test.ts
@ -0,0 +1,57 @@
+import { InMemoryMemory } from '../runtime/memory-store';
+
+describe('InMemoryMemory working memory', () => {
+	it('returns null for unknown key', async () => {
+		const mem = new InMemoryMemory();
+		expect(await mem.getWorkingMemory({ threadId: 'thread-x', resourceId: 'unknown' })).toBeNull();
+	});
+
+	it('saves and retrieves working memory keyed by resourceId', async () => {
+		const mem = new InMemoryMemory();
+		await mem.saveWorkingMemory(
+			{ threadId: 'thread-1', resourceId: 'user-1' },
+			'# Context\n- Name: Alice',
+		);
+		expect(await mem.getWorkingMemory({ threadId: 'thread-1', resourceId: 'user-1' })).toBe(
+			'# Context\n- Name: Alice',
+		);
+	});
+
+	it('overwrites on subsequent save', async () => {
+		const mem = new InMemoryMemory();
+		await mem.saveWorkingMemory({ threadId: 'thread-1', resourceId: 'user-1' }, 'v1');
+		await mem.saveWorkingMemory({ threadId: 'thread-1', resourceId: 'user-1' }, 'v2');
+		expect(await mem.getWorkingMemory({ threadId: 'thread-1', resourceId: 'user-1' })).toBe('v2');
+	});
+
+	it('isolates by resourceId (resource scope)', async () => {
+		const mem = new InMemoryMemory();
+		await mem.saveWorkingMemory({ threadId: 'thread-a', resourceId: 'user-1' }, 'Alice data');
+		await mem.saveWorkingMemory({ threadId: 'thread-b', resourceId: 'user-2' }, 'Bob data');
+		expect(await mem.getWorkingMemory({ threadId: 'thread-a', resourceId: 'user-1' })).toBe(
+			'Alice data',
+		);
+		expect(await mem.getWorkingMemory({ threadId: 'thread-b', resourceId: 'user-2' })).toBe(
+			'Bob data',
+		);
+	});
+
+	it('returns null for unknown threadId (thread scope)', async () => {
+		const mem = new InMemoryMemory();
+		expect(await mem.getWorkingMemory({ threadId: 'unknown' })).toBeNull();
+	});
+
+	it('saves and retrieves working memory keyed by threadId', async () => {
+		const mem = new InMemoryMemory();
+		await mem.saveWorkingMemory({ threadId: 'thread-1' }, '# Thread Notes');
+		expect(await mem.getWorkingMemory({ threadId: 'thread-1' })).toBe('# Thread Notes');
+	});
+
+	it('isolates by threadId (thread scope)', async () => {
+		const mem = new InMemoryMemory();
+		await mem.saveWorkingMemory({ threadId: 'thread-1' }, 'data for thread 1');
+		await mem.saveWorkingMemory({ threadId: 'thread-2' }, 'data for thread 2');
+		expect(await mem.getWorkingMemory({ threadId: 'thread-1' })).toBe('data for thread 1');
+		expect(await mem.getWorkingMemory({ threadId: 'thread-2' })).toBe('data for thread 2');
+	});
+});
--- a/packages/@n8n/agents/src/tests/integration/batched-tool-execution.test.ts
+++ b/packages/@n8n/agents/src/tests/integration/batched-tool-execution.test.ts
@ -0,0 +1,134 @@
+import { expect, it } from 'vitest';
+
+import {
+	describeIf,
+	collectStreamChunks,
+	chunksOfType,
+	createAgentWithBatchedInterruptibleCalls,
+	createAgentWithBatchedNormalCalls,
+} from './helpers';
+import type { StreamChunk } from '../../index';
+
+const describe = describeIf('anthropic');
+
+describe('batched tool execution integration', () => {
+	it('normal tools with bounded concurrency complete without errors (generate)', async () => {
+		const agent = createAgentWithBatchedNormalCalls('anthropic', 2);
+
+		const result = await agent.generate(
+			'Check if these three files exist: /home/a.txt, /home/b.txt, /home/c.txt. You MUST call check_file for each file using parallel tool calls in the same turn.',
+		);
+
+		expect(result.finishReason).toBe('stop');
+		expect(result.pendingSuspend).toBeUndefined();
+		expect(result.toolCalls).toBeDefined();
+		expect(result.toolCalls!.length).toBeGreaterThanOrEqual(3);
+
+		for (const tc of result.toolCalls!) {
+			expect(tc.tool).toBe('check_file');
+			expect(tc.output).toEqual(expect.objectContaining({ exists: true }));
+		}
+	});
+
+	it('normal tools with bounded concurrency complete without errors (stream)', async () => {
+		const agent = createAgentWithBatchedNormalCalls('anthropic', 2);
+
+		const { stream: fullStream } = await agent.stream(
+			'Check if these three files exist: /home/a.txt, /home/b.txt, /home/c.txt. You MUST call check_file for each file using parallel tool calls in the same turn.',
+		);
+
+		const chunks = await collectStreamChunks(fullStream);
+
+		const errorChunks = chunks.filter((c) => c.type === 'error');
+		expect(errorChunks).toHaveLength(0);
+
+		const finishChunks = chunksOfType(chunks, 'finish');
+		expect(finishChunks.length).toBe(1);
+		expect(finishChunks[0].finishReason).toBe('stop');
+
+		expect(chunks.filter((c) => c.type === 'tool-call-suspended')).toHaveLength(0);
+	});
+
+	it('bounded concurrency suspends first batch and saves unexecuted tools, then resumes all (generate)', async () => {
+		const agent = createAgentWithBatchedInterruptibleCalls('anthropic', 2);
+
+		const first = await agent.generate(
+			'Delete these three files: /tmp/a.txt, /tmp/b.txt, /tmp/c.txt. You MUST call delete_file for each file using parallel tool calls in the same turn. After deleting, confirm success.',
+		);
+
+		expect(first.finishReason).toBe('tool-calls');
+		expect(first.pendingSuspend).toBeDefined();
+
+		// With concurrency=2 and 3 tools: batch 1 runs 2 tools (both suspend),
+		// batch 2 (1 tool) is skipped. So we get 2 suspended + 1 unexecuted.
+		expect(first.pendingSuspend!.length).toBe(2);
+
+		// Resume each suspension one at a time until the LLM loop continues.
+		// The unexecuted tools from later batches should run during resume
+		// and suspend in turn, so we expect multiple resume cycles.
+		let result = first;
+		let resumeCount = 0;
+
+		while (result.pendingSuspend && result.pendingSuspend.length > 0) {
+			const { runId, toolCallId } = result.pendingSuspend[0];
+			result = await agent.resume('generate', { approved: true }, { runId, toolCallId });
+			resumeCount++;
+
+			if (resumeCount > 10) {
+				throw new Error('Too many resume cycles — likely an infinite loop');
+			}
+		}
+
+		// All tools should eventually be resolved
+		expect(result.finishReason).toBe('stop');
+		expect(result.pendingSuspend).toBeUndefined();
+		expect(resumeCount).toBeGreaterThanOrEqual(2);
+	});
+
+	it('bounded concurrency suspends first batch and saves unexecuted tools, then resumes all (stream)', async () => {
+		const agent = createAgentWithBatchedInterruptibleCalls('anthropic', 2);
+
+		const { stream: fullStream } = await agent.stream(
+			'Delete these three files: /tmp/a.txt, /tmp/b.txt, /tmp/c.txt. You MUST call delete_file for each file using parallel tool calls in the same turn. After deleting, tell me if you succeeded.',
+		);
+
+		const chunks = await collectStreamChunks(fullStream);
+		let pendingSuspensions = chunksOfType(chunks, 'tool-call-suspended') as Array<
+			StreamChunk & { type: 'tool-call-suspended' }
+		>;
+
+		expect(pendingSuspensions.length).toBe(2);
+
+		let resumeCount = 0;
+
+		while (pendingSuspensions.length > 0) {
+			const next = pendingSuspensions[0];
+			const resumedStream = await agent.resume(
+				'stream',
+				{ approved: true },
+				{ runId: next.runId!, toolCallId: next.toolCallId! },
+			);
+
+			const resumedChunks = await collectStreamChunks(resumedStream.stream);
+			pendingSuspensions = chunksOfType(resumedChunks, 'tool-call-suspended') as Array<
+				StreamChunk & { type: 'tool-call-suspended' }
+			>;
+			resumeCount++;
+
+			if (pendingSuspensions.length === 0) {
+				const errorChunks = resumedChunks.filter((c) => c.type === 'error');
+				expect(errorChunks).toHaveLength(0);
+
+				const finishChunks = chunksOfType(resumedChunks, 'finish');
+				expect(finishChunks.length).toBeGreaterThan(0);
+				expect(finishChunks[0].finishReason).not.toBe('error');
+			}
+
+			if (resumeCount > 10) {
+				throw new Error('Too many resume cycles — likely an infinite loop');
+			}
+		}
+
+		expect(resumeCount).toBeGreaterThanOrEqual(2);
+	});
+});
--- a/packages/@n8n/agents/src/tests/integration/concurrent-tool-execution.test.ts
+++ b/packages/@n8n/agents/src/tests/integration/concurrent-tool-execution.test.ts
@ -0,0 +1,213 @@
+import { expect, it } from 'vitest';
+
+import {
+	describeIf,
+	collectStreamChunks,
+	chunksOfType,
+	createAgentWithConcurrentInterruptibleCalls,
+	createAgentWithConcurrentMixedTools,
+	collectTextDeltas,
+} from './helpers';
+import { isLlmMessage, type StreamChunk } from '../../index';
+
+const describe = describeIf('anthropic');
+
+describe('concurrent tool execution integration', () => {
+	it('suspends all interruptible tool calls concurrently and returns them as an array (generate)', async () => {
+		const agent = createAgentWithConcurrentInterruptibleCalls('anthropic');
+
+		const result = await agent.generate(
+			'Delete these two files: /tmp/a.txt and /tmp/b.txt. You MUST call delete_file for each file in a single turn using parallel tool calls.',
+		);
+
+		expect(result.finishReason).toBe('tool-calls');
+		expect(result.pendingSuspend).toBeDefined();
+		// With concurrent execution, ALL interruptible tool calls suspend at once
+		expect(result.pendingSuspend!.length).toBeGreaterThanOrEqual(2);
+
+		const toolNames = result.pendingSuspend!.map((s) => s.toolName);
+		expect(toolNames.every((n) => n === 'delete_file')).toBe(true);
+
+		// All entries share the same runId
+		const runIds = new Set(result.pendingSuspend!.map((s) => s.runId));
+		expect(runIds.size).toBe(1);
+
+		// Each entry has a unique toolCallId and a suspendPayload
+		const toolCallIds = result.pendingSuspend!.map((s) => s.toolCallId);
+		expect(new Set(toolCallIds).size).toBe(result.pendingSuspend!.length);
+
+		for (const s of result.pendingSuspend!) {
+			expect(s.suspendPayload).toEqual(
+				// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
+				expect.objectContaining({ message: expect.any(String), severity: 'destructive' }),
+			);
+		}
+	});
+
+	it('suspends all interruptible tool calls concurrently and emits multiple chunks (stream)', async () => {
+		const agent = createAgentWithConcurrentInterruptibleCalls('anthropic');
+
+		const { stream: fullStream } = await agent.stream(
+			'Delete these two files: /tmp/a.txt and /tmp/b.txt. You MUST call delete_file for each file in a single turn using parallel tool calls.',
+		);
+
+		const chunks = await collectStreamChunks(fullStream);
+		const suspendedChunks = chunksOfType(chunks, 'tool-call-suspended');
+
+		// With concurrent execution, ALL suspensions are emitted before finish
+		expect(suspendedChunks.length).toBeGreaterThanOrEqual(2);
+
+		// Each suspended chunk has a unique toolCallId
+		const toolCallIds = suspendedChunks.map((c) => c.toolCallId);
+		expect(new Set(toolCallIds).size).toBe(suspendedChunks.length);
+
+		// All share the same runId
+		const runIds = new Set(suspendedChunks.map((c) => c.runId));
+		expect(runIds.size).toBe(1);
+
+		// A single finish chunk follows the suspended chunks
+		const finishChunks = chunksOfType(chunks, 'finish');
+		expect(finishChunks.length).toBe(1);
+		expect(finishChunks[0].finishReason).toBe('tool-calls');
+	});
+
+	it('resume resolves one tool at a time, carrying forward the rest (generate)', async () => {
+		const agent = createAgentWithConcurrentInterruptibleCalls('anthropic');
+
+		const first = await agent.generate(
+			'Delete these two files: /tmp/a.txt and /tmp/b.txt. You MUST call delete_file for each file in a single turn using parallel tool calls.',
+		);
+
+		expect(first.pendingSuspend!.length).toBeGreaterThanOrEqual(2);
+
+		const { runId } = first.pendingSuspend![0];
+		const firstToolCallId = first.pendingSuspend![0].toolCallId;
+
+		// Resume the first tool
+		const second = await agent.resume(
+			'generate',
+			{ approved: true },
+			{ runId, toolCallId: firstToolCallId },
+		);
+
+		// The remaining tool(s) should still be pending
+		expect(second.pendingSuspend).toBeDefined();
+		expect(second.pendingSuspend!.length).toBe(first.pendingSuspend!.length - 1);
+
+		// The resumed tool should NOT be in the remaining list
+		const remainingIds = second.pendingSuspend!.map((s) => s.toolCallId);
+		expect(remainingIds).not.toContain(firstToolCallId);
+	});
+
+	it('resumes all suspended tools one by one until the LLM loop continues (stream)', async () => {
+		const agent = createAgentWithConcurrentInterruptibleCalls('anthropic');
+
+		const { stream: fullStream } = await agent.stream(
+			'Delete these two files: /tmp/a.txt and /tmp/b.txt. You MUST call delete_file for each file in a single turn using parallel tool calls. After deleting all files, tell me if you succeeded.',
+		);
+
+		const chunks = await collectStreamChunks(fullStream);
+		const suspendedChunks = chunksOfType(chunks, 'tool-call-suspended');
+		expect(suspendedChunks.length).toBeGreaterThanOrEqual(2);
+
+		// Resume each one until no suspensions remain
+		let pendingSuspensions = suspendedChunks as Array<
+			StreamChunk & { type: 'tool-call-suspended' }
+		>;
+
+		while (pendingSuspensions.length > 0) {
+			const next = pendingSuspensions[0];
+			const resumedStream = await agent.resume(
+				'stream',
+				{ approved: true },
+				{ runId: next.runId!, toolCallId: next.toolCallId! },
+			);
+
+			const resumedChunks = await collectStreamChunks(resumedStream.stream);
+			pendingSuspensions = chunksOfType(resumedChunks, 'tool-call-suspended');
+
+			// If there are no more suspensions, the LLM should have produced text
+			if (pendingSuspensions.length === 0) {
+				const errorChunks = resumedChunks.filter((c) => c.type === 'error');
+				expect(errorChunks).toHaveLength(0);
+
+				const finishChunks = chunksOfType(resumedChunks, 'finish');
+				expect(finishChunks.length).toBeGreaterThan(0);
+				expect(finishChunks[0].finishReason).not.toBe('error');
+			}
+		}
+	});
+
+	it('auto-executes non-interruptible tools concurrently while suspending interruptible ones', async () => {
+		const agent = createAgentWithConcurrentMixedTools('anthropic');
+
+		const { stream: fullStream } = await agent.stream(
+			'You must call both tools in parallel: call list_files with dir="/home" AND call delete_file with path="/home/readme.md". Do not skip either tool.',
+		);
+
+		const chunks = await collectStreamChunks(fullStream);
+
+		// list_files should auto-execute — its result should appear as a message chunk
+		const toolResultChunks = chunks.filter(
+			(c) =>
+				c.type === 'message' &&
+				isLlmMessage(c.message) &&
+				c.message.content.some((p) => p.type === 'tool-result'),
+		);
+
+		// delete_file should be suspended
+		const suspendedChunks = chunksOfType(chunks, 'tool-call-suspended');
+		const deleteSuspended = suspendedChunks.find((c) => c.toolName === 'delete_file');
+
+		expect(deleteSuspended).toBeDefined();
+		expect(toolResultChunks.length).toBeGreaterThan(0);
+		// If the LLM issued both tool calls in parallel:
+		if (deleteSuspended && toolResultChunks.length > 0) {
+			expect(deleteSuspended.toolName).toBe('delete_file');
+			expect(deleteSuspended.suspendPayload).toEqual(
+				// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
+				expect.objectContaining({ message: expect.any(String) }),
+			);
+
+			// list_files result should be present even though delete_file suspended
+			const listResult = toolResultChunks.find(
+				(c) =>
+					c.type === 'message' &&
+					isLlmMessage(c.message) &&
+					c.message.content.some((p) => p.type === 'tool-result' && p.toolName === 'list_files'),
+			);
+			expect(listResult).toBeDefined();
+		}
+	});
+
+	it('generate: resumes all tools and receives a final text response', async () => {
+		const agent = createAgentWithConcurrentInterruptibleCalls('anthropic');
+
+		let result = await agent.generate(
+			'Delete these two files: /tmp/a.txt and /tmp/b.txt. You MUST call delete_file for each file in a single turn using parallel tool calls. After deleting, confirm success.',
+		);
+
+		// Iterate through all pending suspensions
+		while (result.pendingSuspend && result.pendingSuspend.length > 0) {
+			const { runId, toolCallId } = result.pendingSuspend[0];
+			result = await agent.resume('generate', { approved: true }, { runId, toolCallId });
+		}
+
+		// After all tools resumed, the agent should complete with a text response
+		expect(result.finishReason).toBe('stop');
+		expect(result.pendingSuspend).toBeUndefined();
+
+		const text = collectTextDeltas(
+			result.messages
+				.filter((m) => 'role' in m && m.role === 'assistant')
+				.flatMap((m) =>
+					'content' in m
+						? m.content
+								.filter((c) => c.type === 'text')
+								.map((c) => ({ type: 'text-delta' as const, delta: c.text }))
+						: [],
+				),
+		);
+		expect(text.length).toBeGreaterThan(0);
+	});
+});
--- a/packages/@n8n/agents/src/tests/integration/custom-message-suspend-resume.test.ts
+++ b/packages/@n8n/agents/src/tests/integration/custom-message-suspend-resume.test.ts
@ -0,0 +1,92 @@
+import { expect, it } from 'vitest';
+import { z } from 'zod';
+
+import { describeIf, getModel } from './helpers';
+import { Agent, Memory, Tool } from '../../index';
+
+const describe = describeIf('anthropic');
+
+describe('custom message survives suspend/resume', () => {
+	it('preserves custom tool message in stream after resume + complete', async () => {
+		const memory = new Memory().storage('memory').lastMessages(20);
+
+		const deleteTool = new Tool('delete_file')
+			.description('Delete a file at the given path')
+			.input(
+				z.object({
+					path: z.string().describe('File path to delete'),
+				}),
+			)
+			.output(z.object({ deleted: z.boolean(), path: z.string() }))
+			.suspend(z.object({ message: z.string() }))
+			.resume(z.object({ approved: z.boolean() }))
+			.handler(async ({ path }, ctx) => {
+				if (!ctx.resumeData) {
+					return await ctx.suspend({ message: `Delete "${path}"?` });
+				}
+				if (!ctx.resumeData.approved) return { deleted: false, path };
+				return { deleted: true, path };
+			})
+			.toMessage((output) => ({
+				type: 'custom' as const,
+				data: {
+					dummy: `deleted:${(output as { path: string }).path}`,
+				},
+			}));
+
+		const agent = new Agent('custom-msg-suspend-resume-stream-test')
+			.model(getModel('anthropic'))
+			.instructions(
+				'You are a file manager. When asked to delete files, use the delete_file tool. Be concise.',
+			)
+			.tool(deleteTool)
+			.memory(memory)
+			.checkpoint('memory');
+
+		const threadId = `test-custom-msg-stream-${Date.now()}`;
+		const options = { persistence: { threadId, resourceId: 'test-user' } };
+
+		// Turn 1: stream, agent suspends
+		const result1 = await agent.stream('Delete the file /tmp/stream-test.txt', options);
+		const reader1 = result1.stream.getReader();
+		const chunks1: Array<{ type: string; [key: string]: unknown }> = [];
+		while (true) {
+			const { done, value } = await reader1.read();
+			if (done) break;
+			chunks1.push(value as { type: string; [key: string]: unknown });
+		}
+
+		const suspendedChunk = chunks1.find((c) => c.type === 'tool-call-suspended') as
+			| { type: 'tool-call-suspended'; runId: string; toolCallId: string }
+			| undefined;
+		expect(suspendedChunk).toBeDefined();
+
+		// Resume with approval and get the resumed stream
+		const result2 = await agent.resume(
+			'stream',
+			{ approved: true },
+			{ runId: suspendedChunk!.runId, toolCallId: suspendedChunk!.toolCallId },
+		);
+
+		const reader2 = result2.stream.getReader();
+		const chunks2: Array<{ type: string; [key: string]: unknown }> = [];
+		while (true) {
+			const { done, value } = await reader2.read();
+			if (done) break;
+			chunks2.push(value as { type: string; [key: string]: unknown });
+		}
+
+		// The custom message must appear in the resumed stream
+		const customChunk = chunks2.find(
+			(c) =>
+				c.type === 'message' &&
+				(c.message as { type?: string }).type === 'custom' &&
+				'data' in (c.message as object) &&
+				'dummy' in (c.message as { data: { dummy: string } }).data,
+		) as { type: 'message'; message: { type: 'custom'; data: { dummy: string } } } | undefined;
+
+		expect(customChunk).toBeDefined();
+		expect(customChunk!.message.data.dummy).toContain('deleted:');
+		expect(customChunk!.message.data.dummy).toContain('/tmp/stream-test.txt');
+	});
+});
--- a/packages/@n8n/agents/src/tests/integration/evaluate.test.ts
+++ b/packages/@n8n/agents/src/tests/integration/evaluate.test.ts
@ -0,0 +1,378 @@
+import { expect, it } from 'vitest';
+import { z } from 'zod';
+
+import { createAgentWithInterruptibleTool, describeIf, getModel } from './helpers';
+import { parseJudgeResponse } from '../../evals/parse-judge-response';
+import { Agent, Tool, Eval, evaluate, evals } from '../../index';
+
+/**
+ * Create a fruit-bowl agent with a tool that generates random fruit coordinates.
+ */
+function createFruitBowlAgent(provider: 'anthropic' | 'openai'): Agent {
+	const createFruitBowlTool = new Tool('create_fruit_bowl')
+		.description(
+			'Generate a fruit bowl with random 3D coordinates for fruits. Always use this tool when asked to create a fruit bowl.',
+		)
+		.input(
+			z.object({
+				num_apples: z.number().optional().describe('Number of apples (default: 3)'),
+			}),
+		)
+		.handler(async (input) => {
+			const numApples = input.num_apples ?? 3;
+			const fruits = Array.from({ length: numApples }, () => ({
+				type: 'apple',
+				x: Math.round((Math.random() * 20 - 10) * 10) / 10,
+				y: Math.round((Math.random() * 20 - 10) * 10) / 10,
+				z: Math.round((Math.random() * 20 - 10) * 10) / 10,
+			}));
+			return { fruits };
+		});
+
+	return new Agent('fruit-bowl-bot')
+		.model(getModel(provider))
+		.instructions(
+			'You are a fruit bowl generator. When asked to create a fruit bowl, use the create_fruit_bowl tool and then describe the contents including each fruit type and its x, y, z coordinates.',
+		)
+		.tool(createFruitBowlTool);
+}
+
+const describe = describeIf('anthropic');
+
+describe('evaluate() integration', () => {
+	it('runs deterministic evals against a fruit bowl agent', async () => {
+		const mentionsFruit = new Eval('mentions-fruit')
+			.description('Check if response mentions apples with coordinates')
+			.check(({ output }) => {
+				const lower = output.toLowerCase();
+				const hasApple = lower.includes('apple');
+				const hasCoord = /\d+\.\d/.test(output);
+				return {
+					pass: hasApple && hasCoord,
+					reasoning:
+						hasApple && hasCoord
+							? 'Mentions apples with coordinates'
+							: hasApple
+								? 'Mentions apples but no coordinates'
+								: 'No mention of apples',
+				};
+			});
+
+		const usedTool = new Eval('used-tool')
+			.description('Check if create_fruit_bowl tool was called')
+			.check(({ toolCalls }) => {
+				const used = (toolCalls ?? []).some((tc) => tc.tool === 'create_fruit_bowl');
+				return {
+					pass: used,
+					reasoning: used ? 'Tool was called' : 'Tool was NOT called',
+				};
+			});
+
+		const agent = createFruitBowlAgent('anthropic');
+
+		const results = await evaluate(agent, {
+			dataset: [{ input: 'Create a fruit bowl with 3 apples' }],
+			evals: [mentionsFruit, usedTool],
+		});
+
+		expect(results.runs).toHaveLength(1);
+
+		const run = results.runs[0];
+		expect(run.output).toBeTruthy();
+
+		expect(run.scores['mentions-fruit'].pass).toBe(true);
+		expect(run.scores['used-tool'].pass).toBe(true);
+
+		expect(results.summary['mentions-fruit'].total).toBe(1);
+		expect(results.summary['used-tool'].passed).toBe(1);
+	});
+
+	it('runs multiple dataset rows in parallel', async () => {
+		const hasContent = new Eval('has-content')
+			.description('Check response is non-empty')
+			.check(({ output }) => ({
+				pass: output.length > 10,
+				reasoning: `Response length: ${output.length}`,
+			}));
+
+		const agent = createFruitBowlAgent('anthropic');
+
+		const results = await evaluate(agent, {
+			dataset: [
+				{ input: 'Create a fruit bowl with 2 apples' },
+				{ input: 'Create a fruit bowl with 5 apples' },
+			],
+			evals: [hasContent],
+		});
+
+		expect(results.runs).toHaveLength(2);
+		expect(results.summary['has-content'].total).toBe(2);
+		expect(results.summary['has-content'].passed).toBe(2);
+	});
+
+	it('runs built-in string similarity eval', async () => {
+		const agent = new Agent('echo-agent')
+			.model(getModel('anthropic'))
+			.instructions(
+				'When asked "What is the capital of France?", reply with exactly: "Paris". Nothing else.',
+			);
+
+		const similarity = evals.stringSimilarity();
+
+		const results = await evaluate(agent, {
+			dataset: [{ input: 'What is the capital of France?', expected: 'Paris' }],
+			evals: [similarity],
+		});
+
+		expect(results.runs).toHaveLength(1);
+		expect(results.runs[0].scores['string-similarity'].pass).toBe(true);
+	});
+
+	it('runs LLM-as-judge correctness eval', async () => {
+		const agent = new Agent('math-agent')
+			.model(getModel('anthropic'))
+			.instructions('Answer math questions with just the number. No explanation.');
+
+		const correctness = evals.correctness().model(getModel('anthropic'));
+
+		const results = await evaluate(agent, {
+			dataset: [{ input: 'What is 2 + 2?', expected: '4' }],
+			evals: [correctness],
+		});
+
+		expect(results.runs).toHaveLength(1);
+		expect(results.runs[0].scores['correctness'].pass).toBe(true);
+		expect(results.runs[0].scores['correctness'].reasoning).toBeTruthy();
+	});
+
+	it('runs LLM correctness eval on fruit bowl agent with expected output', async () => {
+		const agent = createFruitBowlAgent('anthropic');
+
+		const correctness = evals.correctness().model(getModel('anthropic'));
+
+		const domainHelpfulness = new Eval('domain-helpfulness')
+			.description('Judge helpfulness in the context of a fruit-picking robot simulation')
+			.model(getModel('anthropic'))
+			.judge(async ({ input, output, llm }) => {
+				const result = await llm(
+					[
+						'You are evaluating a response from a simple fruit-picking robot simulation tool.',
+						'This is a demo/toy agent. The robot generates fruit bowls with 3D coordinates.',
+						'Judge ONLY whether the response fulfills what the user asked for — not production quality.',
+						'',
+						`User request: ${input}`,
+						`Robot response: ${output}`,
+						'',
+						'Did the response deliver what was asked?',
+						'Respond with ONLY JSON (no markdown fences): {"pass": true/false, "reasoning": "<explanation>"}',
+					].join('\n'),
+				);
+				return parseJudgeResponse(result.text);
+			});
+
+		const results = await evaluate(agent, {
+			dataset: [
+				{
+					input: 'Create a fruit bowl',
+					expected: 'A fruit bowl with a number of apples and their coordinates',
+				},
+			],
+			evals: [correctness, domainHelpfulness],
+		});
+
+		expect(results.runs).toHaveLength(1);
+
+		const run = results.runs[0];
+		expect(run.output.toLowerCase()).toContain('apple');
+
+		expect(run.scores['correctness'].pass).toBe(true);
+		expect(run.scores['correctness'].reasoning).toBeTruthy();
+
+		expect(run.scores['domain-helpfulness'].pass).toBe(true);
+		expect(run.scores['domain-helpfulness'].reasoning).toBeTruthy();
+	});
+
+	it('auto-resumes interruptible tool calls during eval', async () => {
+		const { createAgentWithMixedTools } = await import('./helpers');
+		const agent = createAgentWithMixedTools('anthropic');
+
+		const usedTool = new Eval('used-list-tool')
+			.description('Check if list_files was called')
+			.check(({ toolCalls }) => {
+				const used = (toolCalls ?? []).some((tc) => tc.tool === 'list_files');
+				return {
+					pass: used,
+					reasoning: used ? 'Called list_files' : 'Did not call list_files',
+				};
+			});
+
+		const hasOutput = new Eval('has-output')
+			.description('Check response is non-empty')
+			.check(({ output }) => ({
+				pass: output.length > 5,
+				reasoning: `Output length: ${output.length}`,
+			}));
+
+		const results = await evaluate(agent, {
+			dataset: [{ input: 'List files in /home' }],
+			evals: [usedTool, hasOutput],
+		});
+
+		expect(results.runs).toHaveLength(1);
+		expect(results.runs[0].scores['used-list-tool'].pass).toBe(true);
+		expect(results.runs[0].scores['has-output'].pass).toBe(true);
+	});
+
+	it('provides tool call inputs and outputs as JSON objects, not strings', async () => {
+		const agent = createFruitBowlAgent('anthropic');
+
+		const toolTypesEval = new Eval('tool-types')
+			.description('Verify tool call inputs/outputs are JSON objects')
+			.check(({ toolCalls }) => {
+				if (!toolCalls || toolCalls.length === 0) {
+					return { pass: false, reasoning: 'No tool calls' };
+				}
+				for (const tc of toolCalls) {
+					if (typeof tc.input === 'string') {
+						return { pass: false, reasoning: `Tool "${tc.tool}" input is a string: ${tc.input}` };
+					}
+					if (typeof tc.output === 'string') {
+						return { pass: false, reasoning: `Tool "${tc.tool}" output is a string: ${tc.output}` };
+					}
+				}
+				return { pass: true, reasoning: 'All tool inputs/outputs are JSON objects' };
+			});
+
+		const results = await evaluate(agent, {
+			dataset: [{ input: 'Create a fruit bowl with 2 apples' }],
+			evals: [toolTypesEval],
+		});
+
+		expect(results.runs).toHaveLength(1);
+		expect(results.runs[0].scores['tool-types'].pass).toBe(true);
+		expect(results.runs[0].scores['tool-types'].reasoning).toContain('JSON objects');
+	});
+
+	it('resume("generate") result includes the resumed tool call in toolCalls', async () => {
+		const agent = createAgentWithInterruptibleTool('anthropic');
+
+		// First generate: agent suspends on delete_file
+		const first = await agent.generate('Delete the file /tmp/test.txt');
+
+		expect(first.pendingSuspend).toBeDefined();
+		const { runId, toolCallId } = first.pendingSuspend![0];
+
+		// Resume with approval
+		const resumed = await agent.resume('generate', { approved: true }, { runId, toolCallId });
+
+		// The resumed tool call must appear in toolCalls.
+		// Bug: toolCalls is undefined or empty because runGenerateLoop() starts
+		// with a fresh toolCallSummary and the resume-phase tool execution is
+		// never captured.
+		expect(resumed.toolCalls).toBeDefined();
+		expect(resumed.toolCalls!.length).toBeGreaterThan(0);
+
+		const deletedCall = resumed.toolCalls!.find((tc) => tc.tool === 'delete_file');
+		expect(deletedCall).toBeDefined();
+		expect(deletedCall!.output).toMatchObject({ deleted: true, path: '/tmp/test.txt' });
+	});
+
+	it('resume("generate") result includes the resumed tool call when denied', async () => {
+		const agent = createAgentWithInterruptibleTool('anthropic');
+
+		const first = await agent.generate('Delete the file /tmp/secret.txt');
+		expect(first.pendingSuspend).toBeDefined();
+		const { runId, toolCallId } = first.pendingSuspend![0];
+
+		const resumed = await agent.resume('generate', { approved: false }, { runId, toolCallId });
+
+		expect(resumed.toolCalls).toBeDefined();
+		const deletedCall = resumed.toolCalls!.find((tc) => tc.tool === 'delete_file');
+		expect(deletedCall).toBeDefined();
+		// denied: deleted should be false
+		expect(deletedCall!.output).toMatchObject({ deleted: false });
+	});
+
+	it('evaluate() includes HITL tool calls in toolCalls passed to eval scorers', async () => {
+		const agent = createAgentWithInterruptibleTool('anthropic');
+
+		const sawDeleteCall = new Eval('saw-delete-call')
+			.description('Check that delete_file tool call appears in toolCalls after auto-resume')
+			.check(({ toolCalls }) => {
+				const found = (toolCalls ?? []).some((tc) => tc.tool === 'delete_file');
+				return {
+					pass: found,
+					reasoning: found
+						? 'delete_file present in toolCalls'
+						: `delete_file missing — toolCalls: ${JSON.stringify(toolCalls ?? [])}`,
+				};
+			});
+
+		const results = await evaluate(agent, {
+			dataset: [
+				{
+					input: 'Delete the file /tmp/test.txt',
+					// auto-resume with approved: true (default) so the tool completes
+				},
+			],
+			evals: [sawDeleteCall],
+		});
+
+		expect(results.runs).toHaveLength(1);
+		// Bug: this fails because result.toolCalls is empty after resume,
+		// so the eval scorer receives toolCalls=[] and pass=false.
+		expect(results.runs[0].scores['saw-delete-call'].pass).toBe(true);
+		expect(results.runs[0].scores['saw-delete-call'].reasoning).toContain('present');
+	});
+
+	it('evaluate() output is non-empty when agent only uses an interruptible tool (no text response)', async () => {
+		// If the agent produces no text and only tool output, evaluate() uses
+		// toolCalls to build the composite output string. With the bug, toolCalls
+		// is empty after resume and output becomes "".
+		const silentAgent = new Agent('silent-tool-agent')
+			.model(getModel('anthropic'))
+			.instructions(
+				'When asked to delete a file, call delete_file and return ONLY the raw JSON tool result. Do not add any explanatory text — your entire response must be the tool result only.',
+			)
+			.tool(
+				new Tool('delete_file')
+					.description('Delete a file')
+					.input(z.object({ path: z.string() }))
+					.output(z.object({ deleted: z.boolean(), path: z.string() }))
+					.suspend(z.object({ message: z.string(), severity: z.string() }))
+					.resume(z.object({ approved: z.boolean() }))
+					.handler(async ({ path }, ctx) => {
+						if (!ctx.resumeData) {
+							return await ctx.suspend({
+								message: `Delete "${path}"?`,
+								severity: 'destructive',
+							});
+						}
+						return { deleted: ctx.resumeData.approved, path };
+					}),
+			)
+			.checkpoint('memory');
+
+		const hasOutput = new Eval('has-output')
+			.description('Composite output must be non-empty after HITL auto-resume')
+			.check(({ output, toolCalls }) => {
+				const pass = output.length > 0;
+				return {
+					pass,
+					reasoning: pass
+						? `output="${output}"`
+						: `output is empty; toolCalls=${JSON.stringify(toolCalls ?? [])}`,
+				};
+			});
+
+		const results = await evaluate(silentAgent, {
+			dataset: [{ input: 'Delete /tmp/test.txt' }],
+			evals: [hasOutput],
+		});
+
+		expect(results.runs).toHaveLength(1);
+		// Bug: output is "" because toolCalls is empty, so the fallback path in
+		// evaluate() that builds output from tool outputs is never triggered.
+		expect(results.runs[0].scores['has-output'].pass).toBe(true);
+	});
+});
--- a/packages/@n8n/agents/src/tests/integration/events-and-abort.test.ts
+++ b/packages/@n8n/agents/src/tests/integration/events-and-abort.test.ts
@ -0,0 +1,279 @@
+import { expect, it } from 'vitest';
+import { z } from 'zod';
+
+import { collectStreamChunks, describeIf, getModel } from './helpers';
+import { Agent, AgentEvent, Tool, type AgentEventData } from '../../index';
+
+const describe = describeIf('anthropic');
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+function createSimpleAgent(provider: 'openai' | 'anthropic' = 'anthropic'): Agent {
+	return new Agent('events-test-agent')
+		.model(getModel(provider))
+		.instructions('You are a concise assistant. Reply in one short sentence.');
+}
+
+function createAgentWithTool(provider: 'openai' | 'anthropic' = 'anthropic'): Agent {
+	const addTool = new Tool('add_numbers')
+		.description('Add two numbers together')
+		.input(z.object({ a: z.number(), b: z.number() }))
+		.handler(async ({ a, b }) => ({ result: a + b }));
+
+	return new Agent('events-tool-agent')
+		.model(getModel(provider))
+		.instructions('You are a calculator. Use the add_numbers tool when asked to add.')
+		.tool(addTool);
+}
+
+// ---------------------------------------------------------------------------
+// Event system — generate path
+// ---------------------------------------------------------------------------
+
+describe('event system — generate', () => {
+	it('emits AgentStart and AgentEnd around a generate() call', async () => {
+		const agent = createSimpleAgent();
+
+		const fired: AgentEvent[] = [];
+		agent.on(AgentEvent.AgentStart, () => {
+			fired.push(AgentEvent.AgentStart);
+		});
+		agent.on(AgentEvent.AgentEnd, () => {
+			fired.push(AgentEvent.AgentEnd);
+		});
+
+		await agent.generate('Say hello');
+
+		expect(fired).toContain(AgentEvent.AgentStart);
+		expect(fired).toContain(AgentEvent.AgentEnd);
+		expect(fired.indexOf(AgentEvent.AgentStart)).toBeLessThan(fired.indexOf(AgentEvent.AgentEnd));
+	});
+
+	it('emits TurnStart and TurnEnd for each LLM call', async () => {
+		const agent = createSimpleAgent();
+
+		const fired: AgentEvent[] = [];
+		agent.on(AgentEvent.TurnStart, () => fired.push(AgentEvent.TurnStart));
+		agent.on(AgentEvent.TurnEnd, () => fired.push(AgentEvent.TurnEnd));
+
+		await agent.generate('Say hello');
+
+		expect(fired).toContain(AgentEvent.TurnStart);
+		expect(fired).toContain(AgentEvent.TurnEnd);
+	});
+
+	it('emits ToolExecutionStart and ToolExecutionEnd when a tool runs', async () => {
+		const agent = createAgentWithTool();
+
+		const toolEvents: AgentEventData[] = [];
+		agent.on(AgentEvent.ToolExecutionStart, (data) => toolEvents.push(data));
+		agent.on(AgentEvent.ToolExecutionEnd, (data) => toolEvents.push(data));
+
+		await agent.generate('What is 7 plus 3?');
+
+		const starts = toolEvents.filter((e) => e.type === AgentEvent.ToolExecutionStart);
+		const ends = toolEvents.filter((e) => e.type === AgentEvent.ToolExecutionEnd);
+
+		expect(starts.length).toBeGreaterThan(0);
+		expect(ends.length).toBeGreaterThan(0);
+
+		const start = starts[0] as AgentEventData & { type: AgentEvent.ToolExecutionStart };
+		expect(start.toolName).toBe('add_numbers');
+
+		const end = ends[0] as AgentEventData & { type: AgentEvent.ToolExecutionEnd };
+		expect(end.isError).toBe(false);
+		expect((end.result as { result: number }).result).toBe(10);
+	});
+
+	it('ToolExecutionEnd carries the correct toolCallId matching ToolExecutionStart', async () => {
+		const agent = createAgentWithTool();
+
+		const starts: Array<AgentEventData & { type: AgentEvent.ToolExecutionStart }> = [];
+		const ends: Array<AgentEventData & { type: AgentEvent.ToolExecutionEnd }> = [];
+
+		agent.on(AgentEvent.ToolExecutionStart, (data) => {
+			starts.push(data as AgentEventData & { type: AgentEvent.ToolExecutionStart });
+		});
+		agent.on(AgentEvent.ToolExecutionEnd, (data) => {
+			ends.push(data as AgentEventData & { type: AgentEvent.ToolExecutionEnd });
+		});
+
+		await agent.generate('What is 5 plus 5?');
+
+		expect(starts.length).toBeGreaterThan(0);
+		expect(ends.length).toBe(starts.length);
+		expect(ends[0].toolCallId).toBe(starts[0].toolCallId);
+	});
+
+	it('multiple handlers on the same event are all called', async () => {
+		const agent = createSimpleAgent();
+
+		const calls: number[] = [];
+		agent.on(AgentEvent.AgentEnd, () => calls.push(1));
+		agent.on(AgentEvent.AgentEnd, () => calls.push(2));
+		agent.on(AgentEvent.AgentEnd, () => calls.push(3));
+
+		await agent.generate('Say hello');
+
+		expect(calls).toEqual(expect.arrayContaining([1, 2, 3]));
+	});
+
+	it('AgentEnd data contains the response messages', async () => {
+		const agent = createSimpleAgent();
+
+		let capturedMessages: unknown[] = [];
+		agent.on(AgentEvent.AgentEnd, (data) => {
+			if (data.type === AgentEvent.AgentEnd) {
+				capturedMessages = data.messages;
+			}
+		});
+
+		await agent.generate('Say hello');
+
+		expect(capturedMessages.length).toBeGreaterThan(0);
+	});
+});
+
+// ---------------------------------------------------------------------------
+// Event system — stream path
+// ---------------------------------------------------------------------------
+
+describe('event system — stream', () => {
+	it('emits AgentStart and AgentEnd around a stream() call', async () => {
+		const agent = createSimpleAgent();
+
+		const fired: AgentEvent[] = [];
+		agent.on(AgentEvent.AgentStart, () => fired.push(AgentEvent.AgentStart));
+		agent.on(AgentEvent.AgentEnd, () => fired.push(AgentEvent.AgentEnd));
+
+		const { stream } = await agent.stream('Say hello');
+		await collectStreamChunks(stream);
+
+		expect(fired).toContain(AgentEvent.AgentStart);
+		expect(fired).toContain(AgentEvent.AgentEnd);
+		expect(fired.indexOf(AgentEvent.AgentStart)).toBeLessThan(fired.indexOf(AgentEvent.AgentEnd));
+	});
+
+	it('emits ToolExecutionStart and ToolExecutionEnd during streaming', async () => {
+		const agent = createAgentWithTool();
+
+		const toolEvents: AgentEventData[] = [];
+		agent.on(AgentEvent.ToolExecutionStart, (data) => toolEvents.push(data));
+		agent.on(AgentEvent.ToolExecutionEnd, (data) => toolEvents.push(data));
+
+		const { stream } = await agent.stream('What is 4 plus 6?');
+		await collectStreamChunks(stream);
+
+		const starts = toolEvents.filter((e) => e.type === AgentEvent.ToolExecutionStart);
+		expect(starts.length).toBeGreaterThan(0);
+
+		const start = starts[0] as AgentEventData & { type: AgentEvent.ToolExecutionStart };
+		expect(start.toolName).toBe('add_numbers');
+	});
+});
+
+// ---------------------------------------------------------------------------
+// getState()
+// ---------------------------------------------------------------------------
+
+describe('getState()', () => {
+	it('returns idle before first run', () => {
+		const agent = createSimpleAgent();
+		const state = agent.getState();
+		expect(state.status).toBe('idle');
+		expect(state.messageList.messages).toHaveLength(0);
+	});
+
+	it('returns success after a successful generate()', async () => {
+		const agent = createSimpleAgent();
+		await agent.generate('Say hello');
+		const state = agent.getState();
+		expect(state.status).toBe('success');
+	});
+
+	it('returns success after a completed stream()', async () => {
+		const agent = createSimpleAgent();
+		const { stream } = await agent.stream('Say hello');
+		await collectStreamChunks(stream);
+		const state = agent.getState();
+		expect(state.status).toBe('success');
+	});
+
+	it('state is running during the generate loop (observed via event)', async () => {
+		const agent = createSimpleAgent();
+
+		let stateWhileRunning: string | undefined;
+		agent.on(AgentEvent.TurnStart, () => {
+			stateWhileRunning = agent.getState().status;
+		});
+
+		await agent.generate('Say hello');
+
+		expect(stateWhileRunning).toBe('running');
+	});
+
+	it('reflects resourceId and threadId from RunOptions', async () => {
+		const agent = createSimpleAgent();
+		await agent.generate('Say hello', {
+			persistence: { resourceId: 'user-123', threadId: 'thread-abc' },
+		});
+		const state = agent.getState();
+		expect(state.persistence?.resourceId).toBe('user-123');
+		expect(state.persistence?.threadId).toBe('thread-abc');
+	});
+});
+
+// ---------------------------------------------------------------------------
+// asTool()
+// ---------------------------------------------------------------------------
+
+describe('asTool()', () => {
+	it('wraps the agent as a BuiltTool with the correct name and description', () => {
+		const agent = createSimpleAgent();
+		const tool = agent.asTool('A helpful assistant tool');
+
+		expect(tool.name).toBe('events-test-agent');
+		expect(tool.description).toBe('A helpful assistant tool');
+		expect(tool.inputSchema).toBeDefined();
+		expect(typeof tool.handler).toBe('function');
+	});
+
+	it('asTool handler calls the agent and returns text result', async () => {
+		const agent = createSimpleAgent();
+		const tool = agent.asTool('A helpful assistant tool');
+
+		const result = await tool.handler!({ input: 'Say "pong"' }, {});
+
+		expect(result).toHaveProperty('result');
+		expect(typeof (result as { result: string }).result).toBe('string');
+		expect((result as { result: string }).result.length).toBeGreaterThan(0);
+	});
+
+	it('coordinator agent can use sub-agent via asTool', async () => {
+		const specialist = new Agent('specialist')
+			.model(getModel('anthropic'))
+			.instructions('You are a specialist. When asked, reply with exactly "SPECIALIST_RESPONSE".');
+
+		const coordinator = new Agent('coordinator')
+			.model(getModel('anthropic'))
+			.instructions(
+				'You coordinate tasks. Use the specialist tool to answer questions. Relay the exact response.',
+			)
+			.tool(specialist.asTool('A specialist agent'));
+
+		const result = await coordinator.generate(
+			'Ask the specialist for their response and tell me what they said.',
+		);
+
+		const text = result.messages
+			.filter((m) => 'role' in m && m.role === 'assistant')
+			.flatMap((m) => ('content' in m ? m.content : []))
+			.filter((c) => c.type === 'text')
+			.map((c) => ('text' in c ? c.text : ''))
+			.join('');
+
+		expect(text.length).toBeGreaterThan(0);
+	});
+});
--- a/packages/@n8n/agents/src/tests/integration/helpers.ts
+++ b/packages/@n8n/agents/src/tests/integration/helpers.ts
@ -0,0 +1,441 @@
+import * as fs from 'fs';
+import * as os from 'os';
+import * as path from 'path';
+import { describe as _describe } from 'vitest';
+import { z } from 'zod';
+
+import {
+	Agent,
+	type ContentToolCall,
+	type ContentToolResult,
+	filterLlmMessages,
+	Tool,
+	type StreamChunk,
+	type AgentMessage,
+} from '../../index';
+import { SqliteMemory } from '../../storage/sqlite-memory';
+
+export type { StreamChunk };
+
+/**
+ * Returns `describe` or `describe.skip` depending on whether the API key is set.
+ */
+export function describeIf(provider: 'anthropic' | 'openai') {
+	const envVar = provider === 'anthropic' ? 'ANTHROPIC_API_KEY' : 'OPENAI_API_KEY';
+	return process.env[envVar] ? _describe : _describe.skip;
+}
+
+/**
+ * Read all chunks from a ReadableStream into an array.
+ */
+export async function collectStreamChunks(stream: ReadableStream<unknown>): Promise<StreamChunk[]> {
+	const chunks: StreamChunk[] = [];
+	const reader = stream.getReader();
+	while (true) {
+		const { done, value } = await reader.read();
+		if (done) break;
+		chunks.push(value as StreamChunk);
+	}
+	return chunks;
+}
+
+/**
+ * Filter chunks by type.
+ */
+export function chunksOfType<T extends StreamChunk['type']>(
+	chunks: StreamChunk[],
+	type: T,
+): Array<StreamChunk & { type: T }> {
+	return chunks.filter((c) => c.type === type) as Array<StreamChunk & { type: T }>;
+}
+
+/**
+ * Get the default model for a provider.
+ */
+export function getModel(provider: 'anthropic' | 'openai'): string {
+	return provider === 'anthropic' ? 'anthropic/claude-haiku-4-5' : 'openai/gpt-4o-mini';
+}
+
+/**
+ * Create a simple agent with an add_numbers tool for testing.
+ */
+export function createAgentWithAddTool(provider: 'anthropic' | 'openai'): Agent {
+	const addTool = new Tool('add_numbers')
+		.description('Add two numbers together and return the result')
+		.input(
+			z.object({
+				a: z.number().describe('First number'),
+				b: z.number().describe('Second number'),
+			}),
+		)
+		.output(
+			z.object({
+				result: z.number().describe('The sum'),
+			}),
+		)
+		.handler(async ({ a, b }) => ({ result: a + b }));
+
+	return new Agent('test-agent')
+		.model(getModel(provider))
+		.instructions(
+			'You are a calculator. When asked to add numbers, use the add_numbers tool. Be concise.',
+		)
+		.tool(addTool);
+}
+
+/**
+ * Create an agent with a tool that can suspend (interrupt) for confirmation.
+ */
+export function createAgentWithInterruptibleTool(provider: 'anthropic' | 'openai'): Agent {
+	const deleteTool = new Tool('delete_file')
+		.description('Delete a file at the given path')
+		.input(z.object({ path: z.string().describe('File path to delete') }))
+		.output(z.object({ deleted: z.boolean(), path: z.string() }))
+		.suspend(z.object({ message: z.string(), severity: z.string() }))
+		.resume(z.object({ approved: z.boolean() }))
+		.handler(async ({ path }, ctx) => {
+			if (!ctx.resumeData) {
+				return await ctx.suspend({ message: `Delete "${path}"?`, severity: 'destructive' });
+			}
+			if (!ctx.resumeData.approved) return { deleted: false, path };
+			return { deleted: true, path };
+		});
+
+	return new Agent('test-interrupt-agent')
+		.model(getModel(provider))
+		.instructions(
+			'You are a file manager. When asked to delete a file, use the delete_file tool. Be concise.',
+		)
+		.tool(deleteTool)
+		.checkpoint('memory');
+}
+
+/**
+ * Create an agent with two tools — one interruptible, one not.
+ */
+export function createAgentWithMixedTools(provider: 'anthropic' | 'openai'): Agent {
+	const listTool = new Tool('list_files')
+		.description('List files in a directory')
+		.input(z.object({ dir: z.string().describe('Directory path') }))
+		.handler(async ({ dir }) => ({
+			files: ['readme.md', 'index.ts', 'package.json'],
+			dir,
+		}));
+
+	const deleteTool = new Tool('delete_file')
+		.description('Delete a file at the given path — dangerous operation')
+		.input(z.object({ path: z.string().describe('File path to delete') }))
+		.output(z.object({ deleted: z.boolean(), path: z.string() }))
+		.suspend(z.object({ message: z.string(), severity: z.string() }))
+		.resume(z.object({ approved: z.boolean() }))
+		.handler(async ({ path }, ctx) => {
+			if (!ctx.resumeData) {
+				return await ctx.suspend({ message: `Delete "${path}"?`, severity: 'destructive' });
+			}
+			if (!ctx.resumeData.approved) return { deleted: false, path };
+			return { deleted: true, path };
+		});
+
+	return new Agent('test-mixed-agent')
+		.model(getModel(provider))
+		.instructions(
+			'You are a file manager. Use list_files to list and delete_file to delete. Be concise.',
+		)
+		.tool(listTool)
+		.tool(deleteTool)
+		.checkpoint('memory');
+}
+
+/**
+ * Create an agent with a tool that uses `.toContent()` to emit a custom message.
+ * The tool adds two numbers; toContent produces a text MessageContent visible to the
+ * user but never forwarded to the LLM.
+ */
+export function createAgentWithToContentTool(provider: 'anthropic' | 'openai'): Agent {
+	const calcTool = new Tool('add_numbers')
+		.description('Add two numbers together and return the result')
+		.input(
+			z.object({
+				a: z.number().describe('First number'),
+				b: z.number().describe('Second number'),
+			}),
+		)
+		.output(z.object({ result: z.number().describe('The sum') }))
+		.handler(async ({ a, b }) => ({ result: a + b }))
+		.toMessage((output) => ({
+			type: 'custom',
+			messageType: '___dummyCustomMessage',
+			data: {
+				dummy: `dummy message. Tool output ${output.result}`,
+			},
+		}));
+
+	return new Agent('test-to-content-agent')
+		.model(getModel(provider))
+		.instructions(
+			'You are a calculator. When asked to add numbers, use the add_numbers tool. Be concise.',
+		)
+		.tool(calcTool);
+}
+
+/**
+ * Create an agent with one interruptible tool designed for parallel-call
+ * scenarios. The tool only deletes one file at a time, and the instructions
+ * strongly encourage parallel tool calling.
+ */
+export function createAgentWithParallelInterruptibleCalls(provider: 'anthropic' | 'openai'): Agent {
+	const deleteTool = new Tool('delete_file')
+		.description('Delete a single file at the given path. Can only delete one file per call.')
+		.input(z.object({ path: z.string().describe('File path to delete') }))
+		.output(z.object({ deleted: z.boolean(), path: z.string() }))
+		.suspend(z.object({ message: z.string(), severity: z.string() }))
+		.resume(z.object({ approved: z.boolean() }))
+		.handler(async ({ path }, ctx) => {
+			if (!ctx.resumeData) {
+				return await ctx.suspend({ message: `Delete "${path}"?`, severity: 'destructive' });
+			}
+			if (!ctx.resumeData.approved) return { deleted: false, path };
+			return { deleted: true, path };
+		});
+
+	return new Agent('test-parallel-interrupt-agent')
+		.model(getModel(provider))
+		.instructions(
+			'You are a file manager. When asked to delete multiple files, you MUST call delete_file for EACH file using parallel tool calls in the same turn. Never skip a file.',
+		)
+		.tool(deleteTool)
+		.checkpoint('memory');
+}
+
+/**
+ * Create an agent with concurrent tool execution and an interruptible tool.
+ * Uses `toolCallConcurrency(Infinity)` so all tool calls in a single LLM turn
+ * are executed concurrently. Suspensions do not block subsequent tool calls.
+ */
+export function createAgentWithConcurrentInterruptibleCalls(
+	provider: 'anthropic' | 'openai',
+): Agent {
+	const deleteTool = new Tool('delete_file')
+		.description('Delete a single file at the given path. Can only delete one file per call.')
+		.input(z.object({ path: z.string().describe('File path to delete') }))
+		.output(z.object({ deleted: z.boolean(), path: z.string() }))
+		.suspend(z.object({ message: z.string(), severity: z.string() }))
+		.resume(z.object({ approved: z.boolean() }))
+		.handler(async ({ path }, ctx) => {
+			if (!ctx.resumeData) {
+				return await ctx.suspend({ message: `Delete "${path}"?`, severity: 'destructive' });
+			}
+			if (!ctx.resumeData.approved) return { deleted: false, path };
+			return { deleted: true, path };
+		});
+
+	return new Agent('test-concurrent-interrupt-agent')
+		.model(getModel(provider))
+		.instructions(
+			'You are a file manager. When asked to delete multiple files, you MUST call delete_file for EACH file using parallel tool calls in the same turn. Never skip a file.',
+		)
+		.tool(deleteTool)
+		.toolCallConcurrency(Infinity)
+		.checkpoint('memory');
+}
+
+/**
+ * Create an agent with concurrent tool execution mixing interruptible and
+ * non-interruptible tools. `list_files` runs immediately; `delete_file` suspends.
+ */
+export function createAgentWithConcurrentMixedTools(provider: 'anthropic' | 'openai'): Agent {
+	const listTool = new Tool('list_files')
+		.description('List files in a directory')
+		.input(z.object({ dir: z.string().describe('Directory path') }))
+		.handler(async ({ dir }) => ({
+			files: ['readme.md', 'index.ts', 'package.json'],
+			dir,
+		}));
+
+	const deleteTool = new Tool('delete_file')
+		.description('Delete a file at the given path — dangerous operation')
+		.input(z.object({ path: z.string().describe('File path to delete') }))
+		.output(z.object({ deleted: z.boolean(), path: z.string() }))
+		.suspend(z.object({ message: z.string(), severity: z.string() }))
+		.resume(z.object({ approved: z.boolean() }))
+		.handler(async ({ path }, ctx) => {
+			if (!ctx.resumeData) {
+				return await ctx.suspend({ message: `Delete "${path}"?`, severity: 'destructive' });
+			}
+			if (!ctx.resumeData.approved) return { deleted: false, path };
+			return { deleted: true, path };
+		});
+
+	return new Agent('test-concurrent-mixed-agent')
+		.model(getModel(provider))
+		.instructions(
+			'You are a file manager. Use list_files to list and delete_file to delete. Be concise.',
+		)
+		.tool(listTool)
+		.tool(deleteTool)
+		.toolCallConcurrency(Infinity)
+		.checkpoint('memory');
+}
+
+/**
+ * Create an agent with bounded concurrency and an interruptible tool.
+ * Uses `toolCallConcurrency(concurrency)` to control batching.
+ */
+export function createAgentWithBatchedInterruptibleCalls(
+	provider: 'anthropic' | 'openai',
+	concurrency: number,
+): Agent {
+	const deleteTool = new Tool('delete_file')
+		.description('Delete a single file at the given path. Can only delete one file per call.')
+		.input(z.object({ path: z.string().describe('File path to delete') }))
+		.output(z.object({ deleted: z.boolean(), path: z.string() }))
+		.suspend(z.object({ message: z.string(), severity: z.string() }))
+		.resume(z.object({ approved: z.boolean() }))
+		.handler(async ({ path }, ctx) => {
+			if (!ctx.resumeData) {
+				return await ctx.suspend({ message: `Delete "${path}"?`, severity: 'destructive' });
+			}
+			if (!ctx.resumeData.approved) return { deleted: false, path };
+			return { deleted: true, path };
+		});
+
+	return new Agent('test-batched-interrupt-agent')
+		.model(getModel(provider))
+		.instructions(
+			'You are a file manager. When asked to delete multiple files, you MUST call delete_file for EACH file using parallel tool calls in the same turn. Never skip a file.',
+		)
+		.tool(deleteTool)
+		.toolCallConcurrency(concurrency)
+		.checkpoint('memory');
+}
+
+/**
+ * Create an agent with bounded concurrency and a non-interruptible tool.
+ */
+export function createAgentWithBatchedNormalCalls(
+	provider: 'anthropic' | 'openai',
+	concurrency: number,
+): Agent {
+	const checkTool = new Tool('check_file')
+		.description('Check if a file exists at the given path. Can only check one file per call.')
+		.input(z.object({ path: z.string().describe('File path to check') }))
+		.output(z.object({ exists: z.boolean(), path: z.string() }))
+		.handler(async ({ path }) => ({ exists: true, path }));
+
+	return new Agent('test-batched-normal-agent')
+		.model(getModel(provider))
+		.instructions(
+			'You are a file manager. When asked to check multiple files, you MUST call check_file for EACH file using parallel tool calls in the same turn. Never skip a file. After checking, summarize the results concisely.',
+		)
+		.tool(checkTool)
+		.toolCallConcurrency(concurrency)
+		.checkpoint('memory');
+}
+
+/**
+ * Create an agent with a tool that always throws an error.
+ * Used to verify that tool errors surface as LLM-visible messages.
+ */
+export function createAgentWithAlwaysErrorTool(provider: 'anthropic' | 'openai'): Agent {
+	const brokenTool = new Tool('broken_tool')
+		.description('Fetch data from a remote service')
+		.input(z.object({ id: z.string().describe('Resource ID to fetch') }))
+		.handler(async () => {
+			throw new Error('Service unavailable: connection timeout');
+		});
+
+	return new Agent('test-error-agent')
+		.model(getModel(provider))
+		.instructions(
+			'You are a data fetcher. Use broken_tool to fetch data. ' +
+				'If the tool fails, acknowledge the error in your response and explain what happened. Be concise.',
+		)
+		.tool(brokenTool);
+}
+
+/**
+ * Create an agent with a tool that fails on the first call and succeeds on the second.
+ * Used to verify that the LLM can self-correct by retrying after seeing the error result.
+ */
+export function createAgentWithFlakyTool(provider: 'anthropic' | 'openai'): {
+	agent: Agent;
+	callCount: () => number;
+} {
+	let calls = 0;
+
+	const flakyTool = new Tool('fetch_data')
+		.description('Fetch data. May fail on the first attempt — retry if it does.')
+		.input(z.object({ id: z.string().describe('Resource ID to fetch') }))
+		.output(z.object({ id: z.string(), value: z.number() }))
+		.handler(async ({ id }) => {
+			calls++;
+			if (calls === 1) throw new Error('Transient error: rate limit exceeded, please retry');
+			return { id, value: 42 };
+		});
+
+	const agent = new Agent('test-flaky-agent')
+		.model(getModel(provider))
+		.instructions(
+			'You are a data fetcher. Use fetch_data to fetch data. ' +
+				'If the tool fails with a transient error, retry the SAME call once. Be concise.',
+		)
+		.tool(flakyTool);
+
+	return { agent, callCount: () => calls };
+}
+
+export const findLastTextContent = (messages: AgentMessage[]): string | undefined => {
+	return filterLlmMessages(messages)
+		.reverse()
+		.find((m) => m.content.find((c) => c.type === 'text'))
+		?.content.find((c) => c.type === 'text')?.text;
+};
+
+export const findLastToolCallContent = (messages: AgentMessage[]): ContentToolCall | undefined => {
+	return filterLlmMessages(messages)
+		.reverse()
+		.find((m) => m.content.find((c) => c.type === 'tool-call'))
+		?.content.find((c) => c.type === 'tool-call');
+};
+
+export const findAllToolCalls = (messages: AgentMessage[]): ContentToolCall[] => {
+	return filterLlmMessages(messages)
+		.filter((m) => m.content.find((c) => c.type === 'tool-call'))
+		.map((m) => m.content.filter((c) => c.type === 'tool-call'))
+		.flat();
+};
+export const findAllToolResults = (messages: AgentMessage[]): ContentToolResult[] => {
+	return filterLlmMessages(messages)
+		.filter((m) => m.content.find((c) => c.type === 'tool-result'))
+		.map((m) => m.content.find((c) => c.type === 'tool-result') as ContentToolResult);
+};
+export const collectTextDeltas = (chunks: StreamChunk[]): string => {
+	return chunks
+		.filter((c) => c.type === 'text-delta')
+		.map((c) => c.delta)
+		.join('');
+};
+
+export function createSqliteMemory(): {
+	memory: SqliteMemory;
+	cleanup: () => void;
+	url: string;
+} {
+	const dbPath = path.join(
+		os.tmpdir(),
+		`test-${Date.now()}-${Math.random().toString(36).slice(2)}.db`,
+	);
+	const url = `file:${dbPath}`;
+	const memory = new SqliteMemory({ url });
+	return {
+		memory,
+		url,
+		cleanup: () => {
+			try {
+				fs.unlinkSync(dbPath);
+			} catch {
+				// File may already be removed — ignore
+			}
+		},
+	};
+}
--- a/packages/@n8n/agents/src/tests/integration/json-schema-validation.test.ts
+++ b/packages/@n8n/agents/src/tests/integration/json-schema-validation.test.ts
@ -0,0 +1,126 @@
+/**
+ * Integration tests for JSON Schema input validation on regular (non-MCP) tools.
+ *
+ * Covers: valid input passes through, type errors surface as tool-result errors,
+ * missing required properties surface as tool-result errors, and the LLM can
+ * self-correct after receiving a JSON Schema validation error.
+ *
+ * Tests that call agent.generate() are gated on ANTHROPIC_API_KEY.
+ */
+import { expect, it, vi } from 'vitest';
+
+import { describeIf, findLastTextContent } from './helpers';
+import { Agent, filterLlmMessages } from '../../index';
+import type { BuiltTool } from '../../types/sdk/tool';
+
+const describe = describeIf('anthropic');
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+/**
+ * Build a BuiltTool whose inputSchema is a raw JSON Schema object (not Zod).
+ * This mimics the shape that MCP tools use — and the scenario we want to test
+ * for first-party tools that expose a JSONSchema7 directly.
+ */
+function makeJsonSchemaTool(overrides: Partial<BuiltTool> = {}): BuiltTool {
+	return {
+		name: 'find_user',
+		description: 'Find a user by their numeric age (18–99 only).',
+		inputSchema: {
+			type: 'object',
+			properties: {
+				age: {
+					type: 'integer',
+					minimum: 18,
+					maximum: 99,
+					description: 'User age (18–99)',
+				},
+			},
+			required: ['age'],
+		},
+		handler: async (input) => {
+			const { age } = input as { age: number };
+			return { user: `User aged ${age}` };
+		},
+		...overrides,
+	};
+}
+
+// ---------------------------------------------------------------------------
+// No-LLM tests: validation outcome is determined by the tool-result message
+// ---------------------------------------------------------------------------
+
+describe('JSON Schema validation — non-MCP tools with raw JSON Schema', () => {
+	it('passes valid input to the handler and returns a successful tool result', async () => {
+		const handler = vi.fn().mockResolvedValue({ user: 'User aged 25' });
+		const tool = makeJsonSchemaTool({ handler });
+
+		const result = await new Agent('test')
+			.model('anthropic/claude-haiku-4-5')
+			.instructions(
+				'You are a user directory. Use find_user to look up users. ' +
+					'Call the tool with age=25 and then summarise the result. Be concise.',
+			)
+			.tool(tool)
+			.generate('Find user aged 25.');
+
+		expect(result.finishReason).toBe('stop');
+		expect(result.error).toBeUndefined();
+
+		// The handler should have been called with valid data
+		expect(handler).toHaveBeenCalledWith(expect.objectContaining({ age: 25 }), expect.anything());
+
+		// No tool-result should carry an error flag
+		const allMessages = filterLlmMessages(result.messages);
+		const toolResults = allMessages.flatMap((m) =>
+			m.content.filter((c) => c.type === 'tool-result'),
+		);
+		expect(toolResults.every((r) => !r.isError)).toBe(true);
+	});
+
+	it('allows the LLM to self-correct after receiving a JSON Schema validation error', async () => {
+		let callCount = 0;
+		const handler = vi.fn().mockImplementation(async (input: unknown) => {
+			callCount++;
+			return { user: `User aged ${(input as { age: number }).age}` };
+		});
+
+		// The schema enforces age ≥ 18. The prompt asks for age 5 first, then
+		// instructs the LLM to retry with 25 if validation fails.
+		const result = await new Agent('age-self-correction')
+			.model('anthropic/claude-haiku-4-5')
+			.instructions(
+				'You are a user directory. Use find_user to look up users by age. ' +
+					'The age must be an integer between 18 and 99. ' +
+					'If validation fails, correct the age to 25 and retry. Be very concise.',
+			)
+			.tool(makeJsonSchemaTool({ handler }))
+			.generate(
+				'Find a user aged 5. If that age is invalid, use 25 instead and retry. ' +
+					'You MUST try age 5 first, and only then use 25.',
+			);
+
+		expect(result.finishReason).toBe('stop');
+		expect(result.error).toBeUndefined();
+
+		// There should be at least two tool-result messages: one error, one success
+		const allMessages = filterLlmMessages(result.messages);
+		const toolResultMessages = allMessages.filter((m) =>
+			m.content.some((c) => c.type === 'tool-result'),
+		);
+		expect(toolResultMessages.length).toBeGreaterThanOrEqual(2);
+
+		// The successful handler call should have received a valid age
+		expect(callCount).toBeGreaterThanOrEqual(1);
+		const validCallArgs = handler.mock.calls.find(
+			([input]) => (input as { age: number }).age === 25,
+		);
+		expect(validCallArgs).toBeDefined();
+
+		// The final LLM response should acknowledge finding a user
+		const text = findLastTextContent(result.messages);
+		expect(text).toBeTruthy();
+	});
+});
--- a/packages/@n8n/agents/src/tests/integration/mcp-connection.test.ts
+++ b/packages/@n8n/agents/src/tests/integration/mcp-connection.test.ts
@ -0,0 +1,252 @@
+/**
+ * Unit-style tests for McpConnection.listTools() approval wrapping.
+ *
+ * These tests use a real in-process MCP SSE server but do NOT require an LLM.
+ * They verify that the `requireApproval` field on McpServerConfig (and the
+ * global `shouldRequireToolApproval` constructor flag) correctly wrap the
+ * appropriate tools with a suspend/resume approval gate.
+ *
+ * Tool names from the test server: echo, add, image (prefixed: tools_echo, tools_add, tools_image).
+ */
+import { afterAll, afterEach, beforeAll, describe, expect, it, vi } from 'vitest';
+
+import { startSseServer, type TestServer } from './mcp-server-helpers';
+import { McpConnection } from '../../runtime/mcp-connection';
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+/** Returns true when the tool has been wrapped with an approval gate (has a suspendSchema). */
+function isApprovalWrapped(tool: { suspendSchema?: unknown }): boolean {
+	return tool.suspendSchema !== undefined;
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+describe('McpConnection.listTools() — requireApproval config', () => {
+	let server: TestServer;
+	let connection: McpConnection | undefined;
+
+	beforeAll(async () => {
+		server = await startSseServer();
+	});
+
+	afterAll(async () => {
+		await server.close();
+	});
+
+	afterEach(async () => {
+		if (connection) {
+			await connection.disconnect();
+			connection = undefined;
+		}
+	});
+
+	// -----------------------------------------------------------------------
+	// no approval
+	// -----------------------------------------------------------------------
+
+	it('does not wrap any tools when requireApproval is not set', async () => {
+		connection = new McpConnection({ name: 'tools', url: server.url });
+		await connection.connect();
+		const tools = await connection.listTools();
+
+		expect(tools.length).toBeGreaterThan(0);
+		expect(tools.every((t) => !isApprovalWrapped(t))).toBe(true);
+	});
+
+	it('does not wrap any tools when requireApproval is false', async () => {
+		connection = new McpConnection({ name: 'tools', url: server.url, requireApproval: false });
+		await connection.connect();
+		const tools = await connection.listTools();
+
+		expect(tools.every((t) => !isApprovalWrapped(t))).toBe(true);
+	});
+
+	it('does not wrap any tools when requireApproval is an empty array', async () => {
+		connection = new McpConnection({ name: 'tools', url: server.url, requireApproval: [] });
+		await connection.connect();
+		const tools = await connection.listTools();
+
+		expect(tools.every((t) => !isApprovalWrapped(t))).toBe(true);
+	});
+
+	// -----------------------------------------------------------------------
+	// requireApproval: true — all tools
+	// -----------------------------------------------------------------------
+
+	it('wraps all tools when requireApproval: true in server config', async () => {
+		connection = new McpConnection({ name: 'tools', url: server.url, requireApproval: true });
+		await connection.connect();
+		const tools = await connection.listTools();
+
+		expect(tools.length).toBeGreaterThan(0);
+		expect(tools.every((t) => isApprovalWrapped(t))).toBe(true);
+	});
+
+	// -----------------------------------------------------------------------
+	// requireApproval: string[] — selective tools
+	// -----------------------------------------------------------------------
+
+	it('wraps only the listed tool when requireApproval names a single tool', async () => {
+		connection = new McpConnection({
+			name: 'tools',
+			url: server.url,
+			requireApproval: ['echo'],
+		});
+		await connection.connect();
+		const tools = await connection.listTools();
+
+		const echo = tools.find((t) => t.name === 'tools_echo');
+		const add = tools.find((t) => t.name === 'tools_add');
+		const image = tools.find((t) => t.name === 'tools_image');
+
+		expect(echo).toBeDefined();
+		expect(add).toBeDefined();
+		expect(image).toBeDefined();
+
+		expect(isApprovalWrapped(echo!)).toBe(true);
+		expect(isApprovalWrapped(add!)).toBe(false);
+		expect(isApprovalWrapped(image!)).toBe(false);
+	});
+
+	it('wraps multiple listed tools when requireApproval names several tools', async () => {
+		connection = new McpConnection({
+			name: 'tools',
+			url: server.url,
+			requireApproval: ['echo', 'add'],
+		});
+		await connection.connect();
+		const tools = await connection.listTools();
+
+		const echo = tools.find((t) => t.name === 'tools_echo');
+		const add = tools.find((t) => t.name === 'tools_add');
+		const image = tools.find((t) => t.name === 'tools_image');
+
+		expect(isApprovalWrapped(echo!)).toBe(true);
+		expect(isApprovalWrapped(add!)).toBe(true);
+		expect(isApprovalWrapped(image!)).toBe(false);
+	});
+
+	it('does not wrap tools that are not in the requireApproval list', async () => {
+		connection = new McpConnection({
+			name: 'tools',
+			url: server.url,
+			requireApproval: ['image'],
+		});
+		await connection.connect();
+		const tools = await connection.listTools();
+
+		const echo = tools.find((t) => t.name === 'tools_echo');
+		const add = tools.find((t) => t.name === 'tools_add');
+		const image = tools.find((t) => t.name === 'tools_image');
+
+		expect(isApprovalWrapped(echo!)).toBe(false);
+		expect(isApprovalWrapped(add!)).toBe(false);
+		expect(isApprovalWrapped(image!)).toBe(true);
+	});
+
+	// -----------------------------------------------------------------------
+	// global shouldRequireToolApproval flag
+	// -----------------------------------------------------------------------
+
+	it('wraps all tools when global shouldRequireToolApproval flag is true', async () => {
+		connection = new McpConnection({ name: 'tools', url: server.url }, true);
+		await connection.connect();
+		const tools = await connection.listTools();
+
+		expect(tools.every((t) => isApprovalWrapped(t))).toBe(true);
+	});
+
+	// -----------------------------------------------------------------------
+	// global flag + config.requireApproval interaction
+	// -----------------------------------------------------------------------
+
+	it('wraps all tools when global flag is true even if config.requireApproval names only some tools', async () => {
+		connection = new McpConnection(
+			{ name: 'tools', url: server.url, requireApproval: ['echo'] },
+			true,
+		);
+		await connection.connect();
+		const tools = await connection.listTools();
+
+		expect(tools.every((t) => isApprovalWrapped(t))).toBe(true);
+	});
+
+	it('wraps all tools when config.requireApproval: true even if global flag is false', async () => {
+		connection = new McpConnection(
+			{ name: 'tools', url: server.url, requireApproval: true },
+			false,
+		);
+		await connection.connect();
+		const tools = await connection.listTools();
+
+		expect(tools.every((t) => isApprovalWrapped(t))).toBe(true);
+	});
+
+	// -----------------------------------------------------------------------
+	// prefix stripping — server name used as prefix
+	// -----------------------------------------------------------------------
+
+	it('matches tool names without prefix when requireApproval contains un-prefixed names', async () => {
+		// The server is named 'srv'; tools will be 'srv_echo', 'srv_add', 'srv_image'.
+		// requireApproval uses the un-prefixed original names.
+		connection = new McpConnection({ name: 'srv', url: server.url, requireApproval: ['echo'] });
+		await connection.connect();
+		const tools = await connection.listTools();
+
+		const echo = tools.find((t) => t.name === 'srv_echo');
+		const add = tools.find((t) => t.name === 'srv_add');
+
+		expect(isApprovalWrapped(echo!)).toBe(true);
+		expect(isApprovalWrapped(add!)).toBe(false);
+	});
+});
+
+// ---------------------------------------------------------------------------
+// Disconnect idempotency
+// ---------------------------------------------------------------------------
+
+type McpConnectionInternals = { client: { close(): Promise<void> } };
+
+describe('McpConnection.disconnect() — idempotency', () => {
+	let server: TestServer;
+
+	beforeAll(async () => {
+		server = await startSseServer();
+	});
+
+	afterAll(async () => {
+		await server.close();
+	});
+
+	it('does not throw and does not call client.close() again when disconnect is called on an already-closed connection', async () => {
+		const conn = new McpConnection({ name: 'tools', url: server.url });
+		await conn.connect();
+
+		const clientClose = vi
+			.spyOn((conn as unknown as McpConnectionInternals).client, 'close')
+			.mockResolvedValue(undefined);
+
+		await conn.disconnect();
+		await conn.disconnect();
+
+		expect(clientClose).toHaveBeenCalledTimes(1);
+	});
+
+	it('does not throw and calls client.close() exactly once when disconnect is called concurrently', async () => {
+		const conn = new McpConnection({ name: 'tools', url: server.url });
+		await conn.connect();
+
+		const clientClose = vi
+			.spyOn((conn as unknown as McpConnectionInternals).client, 'close')
+			.mockResolvedValue(undefined);
+
+		await Promise.all([conn.disconnect(), conn.disconnect()]);
+
+		expect(clientClose).toHaveBeenCalledTimes(1);
+	});
+});
--- a/packages/@n8n/agents/src/tests/integration/mcp-runtime.test.ts
+++ b/packages/@n8n/agents/src/tests/integration/mcp-runtime.test.ts
@ -0,0 +1,473 @@
+/**
+ * Integration tests for MCP lifecycle via McpClient and the Agent builder.
+ * Covers: McpClient constructor validation, connect/listTools/close, tool merge,
+ * name collision, requireToolApproval, and rich content handling.
+ *
+ * Tests that don't require a real LLM run unconditionally.
+ * Tests that call agent.generate() / agent.stream() are gated on ANTHROPIC_API_KEY.
+ */
+import { afterAll, beforeAll, describe, expect, it } from 'vitest';
+import { z } from 'zod';
+
+import {
+	describeIf,
+	getModel,
+	findLastTextContent,
+	collectStreamChunks,
+	chunksOfType,
+} from './helpers';
+import { startSseServer, type TestServer } from './mcp-server-helpers';
+import { Agent, McpClient, Tool, isLlmMessage } from '../../index';
+
+// ---------------------------------------------------------------------------
+// McpClient constructor validation — no MCP server required
+// ---------------------------------------------------------------------------
+
+describe('McpClient constructor validation', () => {
+	it('throws if neither url nor command is provided', () => {
+		expect(() => new McpClient([{ name: 'bad' }])).toThrow(
+			'exactly one of "url" or "command" must be provided',
+		);
+	});
+
+	it('throws if both url and command are provided', () => {
+		expect(
+			() => new McpClient([{ name: 'bad', url: 'http://localhost', command: 'node' }]),
+		).toThrow('provide either "url" or "command", not both');
+	});
+
+	it('throws if a duplicate server name is registered', () => {
+		expect(
+			() =>
+				new McpClient([
+					{ name: 'browser', url: 'http://localhost:9999/sse' },
+					{ name: 'browser', url: 'http://localhost:9998/sse' },
+				]),
+		).toThrow('MCP server name "browser" is already registered');
+	});
+
+	it('accepts valid url-based config', () => {
+		expect(() => new McpClient([{ name: 'srv', url: 'http://localhost:9999/sse' }])).not.toThrow();
+	});
+
+	it('accepts valid command-based config', () => {
+		expect(
+			() => new McpClient([{ name: 'stdio-srv', command: 'node', args: ['server.mjs'] }]),
+		).not.toThrow();
+	});
+
+	it('accepts multiple servers with distinct names', () => {
+		expect(
+			() =>
+				new McpClient([
+					{ name: 'srv-a', url: 'http://localhost:9999/sse' },
+					{ name: 'srv-b', url: 'http://localhost:9998/sse' },
+				]),
+		).not.toThrow();
+	});
+});
+
+// ---------------------------------------------------------------------------
+// McpClient.listTools() — needs in-process MCP server, no LLM
+// ---------------------------------------------------------------------------
+
+describe('McpClient.listTools()', () => {
+	let server: TestServer;
+
+	beforeAll(async () => {
+		server = await startSseServer();
+	});
+
+	afterAll(async () => {
+		await server.close();
+	});
+
+	it('connects and returns tools when server is reachable', async () => {
+		const client = new McpClient([{ name: 'tools', url: server.url }]);
+		const tools = await client.listTools();
+
+		expect(tools.length).toBe(3);
+		expect(tools.map((t) => t.name).sort()).toEqual(['tools_add', 'tools_echo', 'tools_image']);
+
+		await client.close();
+	});
+
+	it('returns cached tools on subsequent calls without reconnecting', async () => {
+		const client = new McpClient([{ name: 'tools', url: server.url }]);
+
+		const first = await client.listTools();
+		const second = await client.listTools();
+
+		expect(first).toBe(second);
+
+		await client.close();
+	});
+
+	it('returns empty array when no servers are configured', async () => {
+		const client = new McpClient([]);
+		const tools = await client.listTools();
+
+		expect(tools).toHaveLength(0);
+	});
+
+	it('throws and clears cache when server is unreachable', async () => {
+		const client = new McpClient([{ name: 'dead', url: 'http://127.0.0.1:1/sse' }]);
+
+		await expect(client.listTools()).rejects.toThrow();
+	});
+
+	it('reports per-server errors for partially-failing multi-server configs', async () => {
+		const client = new McpClient([
+			{ name: 'ok', url: server.url },
+			{ name: 'dead', url: 'http://127.0.0.1:1/sse' },
+		]);
+
+		await expect(client.listTools()).rejects.toThrow(/dead/);
+	});
+});
+
+// ---------------------------------------------------------------------------
+// generate() with MCP tools — requires ANTHROPIC_API_KEY
+// ---------------------------------------------------------------------------
+
+const describe_llm = describeIf('anthropic');
+
+describe_llm('agent generate() with MCP tool', () => {
+	let server: TestServer;
+
+	beforeAll(async () => {
+		server = await startSseServer();
+	});
+
+	afterAll(async () => {
+		await server.close();
+	});
+
+	it('calls an MCP tool during generation and returns the result', async () => {
+		const client = new McpClient([{ name: 'tools', url: server.url }]);
+		const agent = new Agent('mcp-agent')
+			.model(getModel('anthropic'))
+			.instructions(
+				'You are a helpful assistant. When asked to echo a message, use the tools_echo tool. Be concise.',
+			)
+			.mcp(client);
+
+		const result = await agent.generate(
+			'Echo the message "integration test passed" using the tools_echo tool.',
+		);
+
+		expect(result.finishReason).not.toBe('error');
+
+		const text = findLastTextContent(result.messages);
+		expect(text?.toLowerCase()).toContain('integration test passed');
+
+		await client.close();
+	});
+
+	it('merges static tools and MCP tools in the same agent', async () => {
+		const staticTool = new Tool('double')
+			.description('Double a number')
+			.input(z.object({ n: z.number().describe('The number to double') }))
+			.output(z.object({ result: z.number() }))
+			.handler(async ({ n }) => ({ result: n * 2 }));
+
+		const client = new McpClient([{ name: 'tools', url: server.url }]);
+		const agent = new Agent('mixed-tools-agent')
+			.model(getModel('anthropic'))
+			.instructions(
+				'You are a calculator. ' +
+					'Use the double tool to double numbers and the tools.add tool to add numbers. ' +
+					'Be concise.',
+			)
+			.tool(staticTool)
+			.mcp(client);
+
+		const result = await agent.generate('Use the tools.add tool to add 15 and 27.');
+
+		expect(result.finishReason).not.toBe('error');
+		const text = findLastTextContent(result.messages);
+		expect(text).toContain('42');
+
+		await client.close();
+	});
+
+	it('MCP connections persist across multiple generate() calls', async () => {
+		// Connections are kept alive by McpClient and reused across runs.
+		const client = new McpClient([{ name: 'tools', url: server.url }]);
+		const agent = new Agent('lifecycle-agent')
+			.model(getModel('anthropic'))
+			.instructions('Use tools.add to add numbers. Be concise.')
+			.mcp(client);
+
+		const result1 = await agent.generate('Use tools.add to add 1 and 2.');
+		const result2 = await agent.generate('Use tools.add to add 3 and 4.');
+
+		expect(result1.finishReason).not.toBe('error');
+		expect(result2.finishReason).not.toBe('error');
+
+		await client.close();
+	});
+});
+
+// ---------------------------------------------------------------------------
+// stream() with MCP tools — requires ANTHROPIC_API_KEY
+// ---------------------------------------------------------------------------
+
+describe_llm('agent stream() with MCP tool', () => {
+	let server: TestServer;
+
+	beforeAll(async () => {
+		server = await startSseServer();
+	});
+
+	afterAll(async () => {
+		await server.close();
+	});
+
+	it('streams a response that includes an MCP tool call', async () => {
+		const client = new McpClient([{ name: 'tools', url: server.url }]);
+		const agent = new Agent('stream-mcp-agent')
+			.model(getModel('anthropic'))
+			.instructions('Use tools_echo to echo messages. Be concise.')
+			.mcp(client);
+
+		const { stream } = await agent.stream('Echo "stream works" using tools_echo.');
+
+		const chunks = await collectStreamChunks(stream);
+		const messageChunks = chunksOfType(chunks, 'message');
+		const messages = messageChunks.map((c) => c.message);
+
+		const hasToolCall = messages.some(
+			(m) => isLlmMessage(m) && m.content.some((c) => c.type === 'tool-call'),
+		);
+		expect(hasToolCall).toBe(true);
+
+		await client.close();
+	});
+});
+
+// ---------------------------------------------------------------------------
+// generate() error cases — no LLM needed for the connection failure case
+// ---------------------------------------------------------------------------
+
+describe('generate() with unreachable MCP server', () => {
+	it('rejects when MCP server is unreachable', async () => {
+		const client = new McpClient([{ name: 'dead', url: 'http://127.0.0.1:1/sse' }]);
+		const agent = new Agent('bad-mcp-agent')
+			.model('anthropic/claude-haiku-4-5')
+			.instructions('test')
+			.mcp(client);
+
+		await expect(agent.generate('hello')).rejects.toThrow(/dead/i);
+	});
+});
+
+// ---------------------------------------------------------------------------
+// MCP tool name collision detection — no LLM needed
+// ---------------------------------------------------------------------------
+
+describe('MCP tool name collision detection', () => {
+	let server: TestServer;
+
+	beforeAll(async () => {
+		server = await startSseServer();
+	});
+
+	afterAll(async () => {
+		await server.close();
+	});
+
+	it('throws when a static tool and an MCP tool share the same prefixed name', async () => {
+		const conflicting = new Tool('tools_echo')
+			.description('conflicts with MCP echo')
+			.input(z.object({ message: z.string() }))
+			.handler(async ({ message }) => ({ result: message }));
+
+		const client = new McpClient([{ name: 'tools', url: server.url }]);
+		const agent = new Agent('collision-agent')
+			.model('anthropic/claude-haiku-4-5')
+			.instructions('test')
+			.tool(conflicting)
+			.mcp(client);
+
+		try {
+			await expect(agent.generate('hello')).rejects.toThrow(/collision/i);
+		} finally {
+			await client.close();
+		}
+	});
+});
+
+// ---------------------------------------------------------------------------
+// requireToolApproval with MCP tools — requires ANTHROPIC_API_KEY
+// ---------------------------------------------------------------------------
+
+describe_llm('requireToolApproval() with MCP tools', () => {
+	let server: TestServer;
+
+	beforeAll(async () => {
+		server = await startSseServer();
+	});
+
+	afterAll(async () => {
+		await server.close();
+	});
+
+	it('suspends the MCP tool call when requireToolApproval is enabled', async () => {
+		const client = new McpClient([{ name: 'tools', url: server.url }]);
+		const agent = new Agent('approval-mcp-agent')
+			.model(getModel('anthropic'))
+			.instructions('Use tools_echo to echo messages. Be concise.')
+			.mcp(client)
+			.requireToolApproval()
+			.checkpoint('memory');
+
+		const { stream } = await agent.stream('Echo "needs approval" using tools_echo.');
+		const chunks = await collectStreamChunks(stream);
+
+		const suspendedChunks = chunksOfType(chunks, 'tool-call-suspended');
+		expect(suspendedChunks.length).toBeGreaterThanOrEqual(1);
+		expect(suspendedChunks[0].toolName).toBe('tools_echo');
+
+		await client.close();
+	});
+});
+
+// ---------------------------------------------------------------------------
+// McpServerConfig.requireApproval — builder validation (no LLM needed)
+// ---------------------------------------------------------------------------
+
+describe('McpServerConfig.requireApproval — builder validation', () => {
+	it('throws when requireApproval: true is set without a checkpoint store', async () => {
+		const client = new McpClient([
+			{ name: 'tools', url: 'http://localhost:9999/sse', requireApproval: true },
+		]);
+		const agent = new Agent('no-checkpoint')
+			.model('anthropic/claude-haiku-4-5')
+			.instructions('test')
+			.mcp(client);
+
+		// build() is triggered by generate() — fails before attempting connection
+		await expect(agent.generate('test')).rejects.toThrow(/checkpoint/i);
+	});
+
+	it('throws when requireApproval: string[] is set without a checkpoint store', async () => {
+		const client = new McpClient([
+			{ name: 'tools', url: 'http://localhost:9999/sse', requireApproval: ['echo'] },
+		]);
+		const agent = new Agent('no-checkpoint-selective')
+			.model('anthropic/claude-haiku-4-5')
+			.instructions('test')
+			.mcp(client);
+
+		await expect(agent.generate('test')).rejects.toThrow(/checkpoint/i);
+	});
+
+	it('does not throw when requireApproval: true is set with a checkpoint store', () => {
+		expect(() =>
+			new Agent('with-checkpoint')
+				.model('anthropic/claude-haiku-4-5')
+				.instructions('test')
+				.mcp(
+					new McpClient([
+						{ name: 'tools', url: 'http://localhost:9999/sse', requireApproval: true },
+					]),
+				)
+				.checkpoint('memory'),
+		).not.toThrow();
+	});
+
+	it('does not throw when requireApproval: false is set without a checkpoint store', () => {
+		expect(() =>
+			new Agent('no-approval')
+				.model('anthropic/claude-haiku-4-5')
+				.instructions('test')
+				.mcp(
+					new McpClient([
+						{ name: 'tools', url: 'http://localhost:9999/sse', requireApproval: false },
+					]),
+				),
+		).not.toThrow();
+	});
+
+	it('does not throw when requireApproval is an empty array without a checkpoint store', () => {
+		expect(() =>
+			new Agent('empty-approval')
+				.model('anthropic/claude-haiku-4-5')
+				.instructions('test')
+				.mcp(
+					new McpClient([{ name: 'tools', url: 'http://localhost:9999/sse', requireApproval: [] }]),
+				),
+		).not.toThrow();
+	});
+});
+
+// ---------------------------------------------------------------------------
+// McpServerConfig.requireApproval end-to-end — requires ANTHROPIC_API_KEY
+// ---------------------------------------------------------------------------
+
+describe_llm('McpServerConfig.requireApproval with MCP tools', () => {
+	let server: TestServer;
+
+	beforeAll(async () => {
+		server = await startSseServer();
+	});
+
+	afterAll(async () => {
+		await server.close();
+	});
+
+	it('suspends all MCP tools when config.requireApproval: true', async () => {
+		const client = new McpClient([{ name: 'tools', url: server.url, requireApproval: true }]);
+		const agent = new Agent('config-approval-all-agent')
+			.model(getModel('anthropic'))
+			.instructions('Use tools_echo to echo messages. Be concise.')
+			.mcp(client)
+			.checkpoint('memory');
+
+		const { stream } = await agent.stream('Echo "needs approval" using tools_echo.');
+		const chunks = await collectStreamChunks(stream);
+
+		const suspendedChunks = chunksOfType(chunks, 'tool-call-suspended');
+		expect(suspendedChunks.length).toBeGreaterThanOrEqual(1);
+		expect(suspendedChunks[0].toolName).toBe('tools_echo');
+
+		await client.close();
+	});
+
+	it('suspends only the listed tool when config.requireApproval is a string array', async () => {
+		const client = new McpClient([{ name: 'tools', url: server.url, requireApproval: ['echo'] }]);
+		const agent = new Agent('config-approval-selective-agent')
+			.model(getModel('anthropic'))
+			.instructions('Use tools_echo to echo messages. Be concise.')
+			.mcp(client)
+			.checkpoint('memory');
+
+		const { stream } = await agent.stream('Echo "selective approval" using tools_echo.');
+		const chunks = await collectStreamChunks(stream);
+
+		const suspendedChunks = chunksOfType(chunks, 'tool-call-suspended');
+		expect(suspendedChunks.length).toBeGreaterThanOrEqual(1);
+		expect(suspendedChunks[0].toolName).toBe('tools_echo');
+
+		await client.close();
+	});
+
+	it('does not suspend a tool not listed in config.requireApproval', async () => {
+		// Only 'echo' requires approval; 'add' should run to completion without suspension.
+		const client = new McpClient([{ name: 'tools', url: server.url, requireApproval: ['echo'] }]);
+		const agent = new Agent('config-approval-unlisted-agent')
+			.model(getModel('anthropic'))
+			.instructions('Use tools.add to add numbers. Do not use any other tool. Be concise.')
+			.mcp(client)
+			.checkpoint('memory');
+
+		const result = await agent.generate('Use tools.add to add 10 and 32.');
+
+		expect(result.finishReason).not.toBe('error');
+		const text = findLastTextContent(result.messages);
+		expect(text).toContain('42');
+
+		await client.close();
+	});
+});
--- a/packages/@n8n/agents/src/tests/integration/mcp-server-helpers.ts
+++ b/packages/@n8n/agents/src/tests/integration/mcp-server-helpers.ts
@ -0,0 +1,164 @@
+/**
+ * In-process MCP test server helpers.
+ * Creates real MCP servers (SSE and StreamableHTTP) bound to random localhost ports
+ * for use in integration tests. No mocking of SDK internals.
+ */
+
+import { Server as McpServer } from '@modelcontextprotocol/sdk/server/index.js';
+import { SSEServerTransport } from '@modelcontextprotocol/sdk/server/sse.js';
+import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/streamableHttp.js';
+import { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js';
+import http from 'http';
+
+/** 1×1 transparent PNG in base64 (smallest valid PNG). Used for image tool tests. */
+export const TINY_PNG =
+	'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==';
+
+export interface TestServer {
+	url: string;
+	close: () => Promise<void>;
+}
+
+/** Create an in-process MCP Server with three test tools: echo, add, and image. */
+export function createTestMcpServer(): McpServer {
+	const server = new McpServer(
+		{ name: 'test-mcp-server', version: '1.0.0' },
+		{ capabilities: { tools: {} } },
+	);
+
+	server.setRequestHandler(ListToolsRequestSchema, async () => ({
+		tools: [
+			{
+				name: 'echo',
+				description: 'Echo the message back as-is',
+				inputSchema: {
+					type: 'object',
+					properties: { message: { type: 'string', description: 'Message to echo' } },
+					required: ['message'],
+				},
+			},
+			{
+				name: 'add',
+				description: 'Add two numbers together',
+				inputSchema: {
+					type: 'object',
+					properties: {
+						a: { type: 'number', description: 'First number' },
+						b: { type: 'number', description: 'Second number' },
+					},
+					required: ['a', 'b'],
+				},
+			},
+			{
+				name: 'image',
+				description: 'Return a small image with a caption',
+				inputSchema: {
+					type: 'object',
+					properties: { caption: { type: 'string', description: 'Image caption' } },
+					required: ['caption'],
+				},
+			},
+		],
+	}));
+
+	server.setRequestHandler(CallToolRequestSchema, async (request) => {
+		const { name, arguments: args = {} } = request.params;
+
+		if (name === 'echo') {
+			// eslint-disable-next-line @typescript-eslint/no-base-to-string
+			return { content: [{ type: 'text', text: String(args.message ?? '') }] };
+		}
+
+		if (name === 'add') {
+			const sum = Number(args.a ?? 0) + Number(args.b ?? 0);
+			return { content: [{ type: 'text', text: String(sum) }] };
+		}
+
+		if (name === 'image') {
+			return {
+				content: [
+					// eslint-disable-next-line @typescript-eslint/no-base-to-string
+					{ type: 'text', text: String(args.caption ?? '') },
+					{ type: 'image', data: TINY_PNG, mimeType: 'image/png' },
+				],
+			};
+		}
+
+		return {
+			isError: true,
+			content: [{ type: 'text', text: `Unknown tool: ${name}` }],
+		};
+	});
+
+	return server;
+}
+
+/** Start an SSE MCP server on a random port. Returns the SSE endpoint URL and a close function. */
+export async function startSseServer(): Promise<TestServer> {
+	const transports = new Map<string, SSEServerTransport>();
+
+	const httpServer = http.createServer(async (req, res) => {
+		try {
+			if (req.method === 'GET' && req.url === '/sse') {
+				// Create a fresh McpServer per client connection — the Server class holds
+				// a single active transport reference and rejects a second connect() call
+				// if the first transport hasn't been fully torn down yet.
+				const mcpServer = createTestMcpServer();
+				const transport = new SSEServerTransport('/message', res);
+				transports.set(transport.sessionId, transport);
+				await mcpServer.connect(transport);
+			} else if (req.method === 'POST' && req.url?.startsWith('/message')) {
+				const sessionId = new URL(req.url, 'http://localhost').searchParams.get('sessionId') ?? '';
+				const transport = transports.get(sessionId);
+				if (transport) {
+					await transport.handlePostMessage(req, res);
+				} else {
+					res.writeHead(404).end(`No transport for sessionId: ${sessionId}`);
+				}
+			} else {
+				res.writeHead(404).end('Not found');
+			}
+		} catch {
+			if (!res.headersSent) res.writeHead(500).end('Internal server error');
+		}
+	});
+
+	await new Promise<void>((resolve) => httpServer.listen(0, '127.0.0.1', resolve));
+	const { port } = httpServer.address() as { port: number };
+
+	return {
+		url: `http://127.0.0.1:${port}/sse`,
+		close: async () => {
+			httpServer.closeAllConnections();
+			await new Promise<void>((resolve) => httpServer.close(() => resolve()));
+		},
+	};
+}
+
+/** Start a Streamable HTTP MCP server on a random port. Returns the endpoint URL and a close function. */
+export async function startStreamableHttpServer(): Promise<TestServer> {
+	// In stateless mode (sessionIdGenerator: undefined) the SDK enforces that each
+	// transport instance handles exactly one HTTP request. A fresh McpServer + transport
+	// must therefore be created per-request, mirroring the SSE server pattern above.
+	const httpServer = http.createServer(async (req, res) => {
+		try {
+			const mcpServer = createTestMcpServer();
+			const transport = new StreamableHTTPServerTransport({ sessionIdGenerator: undefined });
+			await mcpServer.connect(transport);
+			await transport.handleRequest(req, res);
+		} catch {
+			if (!res.headersSent) res.writeHead(500).end('Internal server error');
+		}
+	});
+
+	await new Promise<void>((resolve) => httpServer.listen(0, '127.0.0.1', resolve));
+	const { port } = httpServer.address() as { port: number };
+
+	return {
+		url: `http://127.0.0.1:${port}/mcp`,
+		close: async () => {
+			httpServer.closeAllConnections();
+			await new Promise<void>((resolve) => httpServer.close(() => resolve()));
+		},
+	};
+}
--- a/packages/@n8n/agents/src/tests/integration/mcp-sse-transport.test.ts
+++ b/packages/@n8n/agents/src/tests/integration/mcp-sse-transport.test.ts
@ -0,0 +1,201 @@
+/**
+ * Integration tests for McpConnection with SSE transport.
+ * Uses a real in-process HTTP server implementing the MCP SSE protocol.
+ * No mocking of SDK internals or McpConnection.
+ */
+import { afterAll, beforeAll, describe, expect, it } from 'vitest';
+
+import { startSseServer, TINY_PNG, type TestServer } from './mcp-server-helpers';
+import { McpConnection } from '../../runtime/mcp-connection';
+import type { ContentFile, ContentText, Message } from '../../types/sdk/message';
+import { isZodSchema } from '../../utils/zod';
+
+describe('McpConnection — SSE transport', () => {
+	let server: TestServer;
+
+	beforeAll(async () => {
+		server = await startSseServer();
+	});
+
+	afterAll(async () => {
+		await server.close();
+	});
+
+	it('connects to an SSE server and lists tools', async () => {
+		const conn = new McpConnection({ name: 'test', url: server.url });
+		await conn.connect();
+
+		const tools = await conn.listTools();
+
+		expect(tools).toHaveLength(3);
+		expect(tools.map((t) => t.name)).toEqual(
+			expect.arrayContaining(['test_echo', 'test_add', 'test_image']),
+		);
+
+		await conn.disconnect();
+	});
+
+	it('calls echo tool and returns text content', async () => {
+		const conn = new McpConnection({ name: 'test', url: server.url });
+		await conn.connect();
+
+		const result = await conn.callTool('echo', { message: 'hello from sse' });
+
+		expect(result.isError).toBeFalsy();
+		expect(result.content).toHaveLength(1);
+		expect(result.content[0]).toEqual({ type: 'text', text: 'hello from sse' });
+
+		await conn.disconnect();
+	});
+
+	it('calls add tool and returns calculated result', async () => {
+		const conn = new McpConnection({ name: 'test', url: server.url });
+		await conn.connect();
+
+		const result = await conn.callTool('add', { a: 7, b: 13 });
+
+		expect(result.isError).toBeFalsy();
+		expect(result.content[0]).toEqual({ type: 'text', text: '20' });
+
+		await conn.disconnect();
+	});
+
+	it('calls image tool and returns mixed text + image content', async () => {
+		const conn = new McpConnection({ name: 'test', url: server.url });
+		await conn.connect();
+
+		const result = await conn.callTool('image', { caption: 'landscape' });
+
+		expect(result.isError).toBeFalsy();
+		expect(result.content).toHaveLength(2);
+		expect(result.content[0]).toMatchObject({ type: 'text', text: 'landscape' });
+		expect(result.content[1]).toMatchObject({
+			type: 'image',
+			data: TINY_PNG,
+			mimeType: 'image/png',
+		});
+
+		await conn.disconnect();
+	});
+
+	it('disconnects cleanly without throwing', async () => {
+		const conn = new McpConnection({ name: 'test', url: server.url });
+		await conn.connect();
+		await expect(conn.disconnect()).resolves.toBeUndefined();
+	});
+
+	it('throws when listTools() is called without connecting first', async () => {
+		const conn = new McpConnection({ name: 'test', url: server.url });
+		// Do NOT call conn.connect()
+		await expect(conn.listTools()).rejects.toThrow();
+	});
+
+	it('throws when callTool() is called without connecting first', async () => {
+		const conn = new McpConnection({ name: 'test', url: server.url });
+		await expect(conn.callTool('echo', { message: 'hi' })).rejects.toThrow();
+	});
+
+	it('is idempotent — calling connect() twice resolves without starting a second connection', async () => {
+		const conn = new McpConnection({ name: 'test', url: server.url });
+		await conn.connect();
+		await expect(conn.connect()).resolves.toBeUndefined();
+		await conn.disconnect();
+	});
+
+	it('deduplicates concurrent connect() calls — both resolve via the same promise', async () => {
+		const conn = new McpConnection({ name: 'test', url: server.url });
+		const [r1, r2] = await Promise.all([conn.connect(), conn.connect()]);
+		expect(r1).toBeUndefined();
+		expect(r2).toBeUndefined();
+		await conn.disconnect();
+	});
+
+	describe('listTools() resolved tools', () => {
+		it('prefixes tool names with the server name', async () => {
+			const conn = new McpConnection({ name: 'browser', url: server.url });
+			await conn.connect();
+
+			const builtTools = await conn.listTools();
+
+			expect(builtTools.every((t) => t.name.startsWith('browser_'))).toBe(true);
+			expect(builtTools.map((t) => t.name)).toEqual(
+				expect.arrayContaining(['browser_echo', 'browser_add', 'browser_image']),
+			);
+
+			await conn.disconnect();
+		});
+
+		it('sets inputSchema as raw JSON Schema (not Zod) and sets mcpTool flag', async () => {
+			const conn = new McpConnection({ name: 'test', url: server.url });
+			await conn.connect();
+
+			const builtTools = await conn.listTools();
+
+			for (const t of builtTools) {
+				expect(t.inputSchema).toBeDefined();
+				expect(isZodSchema(t.inputSchema!)).toBe(false);
+				expect(t.mcpTool).toBe(true);
+				expect(t.mcpServerName).toBe('test');
+			}
+
+			await conn.disconnect();
+		});
+
+		it('handler calls the tool and returns the MCP result', async () => {
+			const conn = new McpConnection({ name: 'test', url: server.url });
+			await conn.connect();
+
+			const builtTools = await conn.listTools();
+			const echoTool = builtTools.find((t) => t.name === 'test_echo')!;
+
+			const result = await echoTool.handler!({ message: 'from handler' }, {} as never);
+			const mcpResult = result as { content: Array<{ type: string; text: string }> };
+
+			expect(mcpResult.content[0]).toEqual({ type: 'text', text: 'from handler' });
+
+			await conn.disconnect();
+		});
+
+		it('toMessage returns undefined for text-only results', async () => {
+			const conn = new McpConnection({ name: 'test', url: server.url });
+			await conn.connect();
+
+			const builtTools = await conn.listTools();
+			const echoTool = builtTools.find((t) => t.name === 'test_echo')!;
+
+			const mcpResult = await conn.callTool('echo', { message: 'text only' });
+			const message = echoTool.toMessage!(mcpResult);
+
+			expect(message).toBeUndefined();
+
+			await conn.disconnect();
+		});
+
+		it('toMessage returns a user message with file part for image results', async () => {
+			const conn = new McpConnection({ name: 'test', url: server.url });
+			await conn.connect();
+
+			const builtTools = await conn.listTools();
+			const imageTool = builtTools.find((t) => t.name === 'test_image')!;
+
+			const mcpResult = await conn.callTool('image', { caption: 'my photo' });
+			const message = imageTool.toMessage!(mcpResult);
+
+			expect(message).toBeDefined();
+			const llmMessage = message as Message;
+			expect(llmMessage.role).toBe('assistant');
+
+			const content = llmMessage.content as Array<ContentText | ContentFile>;
+			const textPart = content.find((c): c is ContentText => c.type === 'text');
+			const filePart = content.find((c): c is ContentFile => c.type === 'file');
+
+			expect(textPart).toBeDefined();
+			expect(textPart!.text).toBe('my photo');
+			expect(filePart).toBeDefined();
+			expect(filePart!.mediaType).toBe('image/png');
+			expect(filePart!.data).toBe(TINY_PNG);
+
+			await conn.disconnect();
+		});
+	});
+});
--- a/packages/@n8n/agents/src/tests/integration/mcp-stdio-transport.test.ts
+++ b/packages/@n8n/agents/src/tests/integration/mcp-stdio-transport.test.ts
@ -0,0 +1,118 @@
+/**
+ * Integration tests for McpConnection with stdio transport.
+ * Spawns a real child process (mcp-stdio-server.mjs) and communicates via stdin/stdout.
+ * No mocking of SDK internals or McpConnection.
+ */
+import path from 'path';
+import { describe, expect, it } from 'vitest';
+
+import { TINY_PNG } from './mcp-server-helpers';
+import { McpConnection } from '../../runtime/mcp-connection';
+
+// vitest injects __dirname for TypeScript test files in the node environment.
+const FIXTURE_PATH = path.resolve(__dirname, '../fixtures/mcp-stdio-server.mjs');
+
+/** Config that spawns the stdio fixture server. */
+function stdioConfig(name = 'test') {
+	return {
+		name,
+		command: 'node',
+		args: [FIXTURE_PATH],
+	};
+}
+
+describe('McpConnection — stdio transport', () => {
+	it('connects to a stdio server and lists tools', async () => {
+		const conn = new McpConnection(stdioConfig());
+		await conn.connect();
+
+		const tools = await conn.listTools();
+
+		expect(tools).toHaveLength(3);
+		expect(tools.map((t) => t.name)).toEqual(
+			expect.arrayContaining(['test_echo', 'test_add', 'test_image']),
+		);
+
+		await conn.disconnect();
+	});
+
+	it('calls echo tool and returns text content', async () => {
+		const conn = new McpConnection(stdioConfig());
+		await conn.connect();
+
+		const result = await conn.callTool('echo', { message: 'hello from stdio' });
+
+		expect(result.isError).toBeFalsy();
+		expect(result.content).toHaveLength(1);
+		expect(result.content[0]).toEqual({ type: 'text', text: 'hello from stdio' });
+
+		await conn.disconnect();
+	});
+
+	it('calls add tool and returns calculated result', async () => {
+		const conn = new McpConnection(stdioConfig());
+		await conn.connect();
+
+		const result = await conn.callTool('add', { a: 42, b: 58 });
+
+		expect(result.isError).toBeFalsy();
+		expect(result.content[0]).toEqual({ type: 'text', text: '100' });
+
+		await conn.disconnect();
+	});
+
+	it('calls image tool and returns mixed text + image content', async () => {
+		const conn = new McpConnection(stdioConfig());
+		await conn.connect();
+
+		const result = await conn.callTool('image', { caption: 'forest' });
+
+		expect(result.isError).toBeFalsy();
+		expect(result.content).toHaveLength(2);
+		expect(result.content[0]).toMatchObject({ type: 'text', text: 'forest' });
+		expect(result.content[1]).toMatchObject({
+			type: 'image',
+			data: TINY_PNG,
+			mimeType: 'image/png',
+		});
+
+		await conn.disconnect();
+	});
+
+	it('disconnects cleanly, terminating the child process', async () => {
+		const conn = new McpConnection(stdioConfig());
+		await conn.connect();
+		await expect(conn.disconnect()).resolves.toBeUndefined();
+	});
+
+	describe('listTools() resolved tools', () => {
+		it('prefixes tool names with the server name', async () => {
+			const conn = new McpConnection(stdioConfig('fs'));
+			await conn.connect();
+
+			const builtTools = await conn.listTools();
+
+			expect(builtTools.every((t) => t.name.startsWith('fs_'))).toBe(true);
+			expect(builtTools.map((t) => t.name)).toEqual(
+				expect.arrayContaining(['fs_echo', 'fs_add', 'fs_image']),
+			);
+
+			await conn.disconnect();
+		});
+
+		it('handler invokes the child process tool and returns MCP result', async () => {
+			const conn = new McpConnection(stdioConfig());
+			await conn.connect();
+
+			const builtTools = await conn.listTools();
+			const addTool = builtTools.find((t) => t.name === 'test_add')!;
+
+			const result = await addTool.handler!({ a: 3, b: 4 }, {} as never);
+			const mcpResult = result as { content: Array<{ type: string; text: string }> };
+
+			expect(mcpResult.content[0]).toEqual({ type: 'text', text: '7' });
+
+			await conn.disconnect();
+		});
+	});
+});
--- a/packages/@n8n/agents/src/tests/integration/mcp-streamable-http-transport.test.ts
+++ b/packages/@n8n/agents/src/tests/integration/mcp-streamable-http-transport.test.ts
@ -0,0 +1,131 @@
+/**
+ * Integration tests for McpConnection with Streamable HTTP transport.
+ * Uses a real in-process HTTP server implementing the MCP Streamable HTTP protocol.
+ * No mocking of SDK internals or McpConnection.
+ */
+import { afterAll, beforeAll, describe, expect, it } from 'vitest';
+
+import { startStreamableHttpServer, TINY_PNG, type TestServer } from './mcp-server-helpers';
+import { McpConnection } from '../../runtime/mcp-connection';
+import type { ContentFile, ContentText, Message } from '../../types/sdk/message';
+
+describe('McpConnection — Streamable HTTP transport', () => {
+	let server: TestServer;
+
+	beforeAll(async () => {
+		server = await startStreamableHttpServer();
+	});
+
+	afterAll(async () => {
+		await server.close();
+	});
+
+	it('connects to a Streamable HTTP server and lists tools', async () => {
+		const conn = new McpConnection({ name: 'test', url: server.url, transport: 'streamableHttp' });
+		await conn.connect();
+
+		const tools = await conn.listTools();
+
+		expect(tools).toHaveLength(3);
+		expect(tools.map((t) => t.name)).toEqual(
+			expect.arrayContaining(['test_echo', 'test_add', 'test_image']),
+		);
+
+		await conn.disconnect();
+	});
+
+	it('calls echo tool and returns text content', async () => {
+		const conn = new McpConnection({ name: 'test', url: server.url, transport: 'streamableHttp' });
+		await conn.connect();
+
+		const result = await conn.callTool('echo', { message: 'hello from streamable-http' });
+
+		expect(result.isError).toBeFalsy();
+		expect(result.content).toHaveLength(1);
+		expect(result.content[0]).toEqual({ type: 'text', text: 'hello from streamable-http' });
+
+		await conn.disconnect();
+	});
+
+	it('calls add tool and returns calculated result', async () => {
+		const conn = new McpConnection({ name: 'test', url: server.url, transport: 'streamableHttp' });
+		await conn.connect();
+
+		const result = await conn.callTool('add', { a: 100, b: 200 });
+
+		expect(result.isError).toBeFalsy();
+		expect(result.content[0]).toEqual({ type: 'text', text: '300' });
+
+		await conn.disconnect();
+	});
+
+	it('calls image tool and returns mixed text + image content', async () => {
+		const conn = new McpConnection({ name: 'test', url: server.url, transport: 'streamableHttp' });
+		await conn.connect();
+
+		const result = await conn.callTool('image', { caption: 'mountains' });
+
+		expect(result.isError).toBeFalsy();
+		expect(result.content).toHaveLength(2);
+		expect(result.content[0]).toMatchObject({ type: 'text', text: 'mountains' });
+		expect(result.content[1]).toMatchObject({
+			type: 'image',
+			data: TINY_PNG,
+			mimeType: 'image/png',
+		});
+
+		await conn.disconnect();
+	});
+
+	it('disconnects cleanly without throwing', async () => {
+		const conn = new McpConnection({ name: 'test', url: server.url, transport: 'streamableHttp' });
+		await conn.connect();
+		await expect(conn.disconnect()).resolves.toBeUndefined();
+	});
+
+	describe('listTools() resolved tools', () => {
+		it('prefixes tool names with the server name', async () => {
+			const conn = new McpConnection({
+				name: 'devtools',
+				url: server.url,
+				transport: 'streamableHttp',
+			});
+			await conn.connect();
+
+			const builtTools = await conn.listTools();
+
+			expect(builtTools.every((t) => t.name.startsWith('devtools_'))).toBe(true);
+			expect(builtTools.map((t) => t.name)).toEqual(
+				expect.arrayContaining(['devtools_echo', 'devtools_add', 'devtools_image']),
+			);
+
+			await conn.disconnect();
+		});
+
+		it('toMessage returns a user message with file part for image results', async () => {
+			const conn = new McpConnection({
+				name: 'test',
+				url: server.url,
+				transport: 'streamableHttp',
+			});
+			await conn.connect();
+
+			const builtTools = await conn.listTools();
+			const imageTool = builtTools.find((t) => t.name === 'test_image')!;
+
+			const mcpResult = await conn.callTool('image', { caption: 'sunset' });
+			const message = imageTool.toMessage!(mcpResult);
+
+			expect(message).toBeDefined();
+			const llmMessage = message as Message;
+			expect(llmMessage.role).toBe('assistant');
+
+			const content = llmMessage.content as Array<ContentText | ContentFile>;
+			const filePart = content.find((c): c is ContentFile => c.type === 'file');
+			expect(filePart).toBeDefined();
+			expect(filePart!.mediaType).toBe('image/png');
+
+			await conn.disconnect();
+		});
+	});
+});
--- a/packages/@n8n/agents/src/tests/integration/memory/memory-custom-backend.test.ts
+++ b/packages/@n8n/agents/src/tests/integration/memory/memory-custom-backend.test.ts
@ -0,0 +1,302 @@
+/**
+ * Integration test: custom BuiltMemory backend.
+ *
+ * Proves that any object implementing the BuiltMemory interface works with the
+ * agent runtime — no SDK-provided storage class needed. This is the contract
+ * that Redis, DynamoDB, TypeORM, or any other persistence layer must satisfy.
+ */
+import { expect, it, beforeEach } from 'vitest';
+
+import { Agent, Memory, toDbMessage, type AgentDbMessage, type AgentMessage } from '../../../index';
+import type { BuiltMemory, Thread } from '../../../types/sdk/memory';
+import { describeIf, findLastTextContent, getModel } from '../helpers';
+
+const describe = describeIf('anthropic');
+
+// ---------------------------------------------------------------------------
+// Custom in-memory BuiltMemory implementation (simulates Redis, DynamoDB, etc.)
+// ---------------------------------------------------------------------------
+class CustomMapMemory implements BuiltMemory {
+	readonly threads = new Map<string, Thread>();
+	readonly messages = new Map<string, AgentDbMessage[]>();
+	readonly workingMemory = new Map<string, string>();
+
+	// --- Thread management ---
+
+	async getThread(threadId: string): Promise<Thread | null> {
+		return this.threads.get(threadId) ?? null;
+	}
+
+	async saveThread(thread: Omit<Thread, 'createdAt' | 'updatedAt'>): Promise<Thread> {
+		const now = new Date();
+		const full: Thread = { ...thread, createdAt: now, updatedAt: now };
+		this.threads.set(thread.id, full);
+		return full;
+	}
+
+	async deleteThread(threadId: string): Promise<void> {
+		this.threads.delete(threadId);
+		this.messages.delete(threadId);
+	}
+
+	// --- Message persistence ---
+
+	async getMessages(
+		threadId: string,
+		opts?: { limit?: number; before?: Date },
+	): Promise<AgentDbMessage[]> {
+		let msgs = this.messages.get(threadId) ?? [];
+		if (opts?.before) {
+			msgs = msgs.filter((m) => {
+				const ts = 'createdAt' in m ? (m as Record<string, unknown>).createdAt : undefined;
+				return ts instanceof Date ? ts < opts.before! : true;
+			});
+		}
+		if (opts?.limit) {
+			msgs = msgs.slice(-opts.limit);
+		}
+		return msgs.map(toDbMessage);
+	}
+
+	async saveMessages(args: {
+		threadId: string;
+		resourceId?: string;
+		messages: AgentMessage[];
+	}): Promise<void> {
+		const existing = this.messages.get(args.threadId) ?? [];
+		this.messages.set(args.threadId, [...existing, ...args.messages.map(toDbMessage)]);
+	}
+
+	async deleteMessages(messageIds: string[]): Promise<void> {
+		for (const [threadId, msgs] of this.messages) {
+			const idSet = new Set(messageIds);
+			this.messages.set(
+				threadId,
+				msgs.filter((m) => !idSet.has(m.id)),
+			);
+		}
+	}
+
+	// --- Working memory (Tier 2) ---
+
+	async getWorkingMemory(params: {
+		threadId: string;
+		resourceId: string;
+		scope: 'resource' | 'thread';
+	}): Promise<string | null> {
+		return (
+			this.workingMemory.get(params.scope === 'resource' ? params.resourceId : params.threadId) ??
+			null
+		);
+	}
+
+	async saveWorkingMemory(
+		params: { threadId: string; resourceId: string; scope: 'resource' | 'thread' },
+		content: string,
+	): Promise<void> {
+		const id = params.scope === 'resource' ? params.resourceId : params.threadId;
+		this.workingMemory.set(id, content);
+	}
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+describe('custom BuiltMemory backend', () => {
+	let store: CustomMapMemory;
+
+	beforeEach(() => {
+		store = new CustomMapMemory();
+	});
+
+	it('recalls previous messages across turns', async () => {
+		const memory = new Memory().storage(store).lastMessages(10);
+
+		const agent = new Agent('custom-mem-recall')
+			.model(getModel('anthropic'))
+			.instructions('You are a helpful assistant. Be concise.')
+			.memory(memory);
+
+		const threadId = `custom-thread-${Date.now()}`;
+		const options = { persistence: { threadId, resourceId: 'user-1' } };
+
+		await agent.generate('My name is Valentina. Just acknowledge.', options);
+
+		const result = await agent.generate('What is my name?', options);
+		expect(findLastTextContent(result.messages)?.toLowerCase()).toContain('valentina');
+
+		// Verify the custom store actually received messages
+		const stored = store.messages.get(threadId);
+		expect(stored).toBeDefined();
+		expect(stored!.length).toBeGreaterThanOrEqual(2);
+	});
+
+	it('isolates threads in the custom backend', async () => {
+		const memory = new Memory().storage(store).lastMessages(10);
+
+		const agent = new Agent('custom-mem-isolation')
+			.model(getModel('anthropic'))
+			.instructions(
+				'You are a helpful assistant. Be concise. If you don\'t know something, say "I don\'t know".',
+			)
+			.memory(memory);
+
+		const thread1 = `custom-t1-${Date.now()}`;
+		const thread2 = `custom-t2-${Date.now()}`;
+
+		await agent.generate('The secret word is NEPTUNE. Just acknowledge.', {
+			persistence: { threadId: thread1, resourceId: 'user-1' },
+		});
+
+		const result = await agent.generate('What is the secret word?', {
+			persistence: { threadId: thread2, resourceId: 'user-1' },
+		});
+
+		expect(findLastTextContent(result.messages)?.toLowerCase()).not.toContain('neptune');
+
+		// Thread 1 should have messages, thread 2 should have its own
+		expect(store.messages.get(thread1)!.length).toBeGreaterThan(0);
+		expect(store.messages.get(thread2)!.length).toBeGreaterThan(0);
+	});
+
+	it('persists and retrieves resource-scoped working memory via custom backend', async () => {
+		const memory = new Memory()
+			.storage(store)
+			.lastMessages(10)
+			.scope('resource')
+			.freeform('# User Profile\n- **Name**:\n- **Favorite color**:');
+
+		const agent = new Agent('custom-mem-working')
+			.model(getModel('anthropic'))
+			.instructions('You are a helpful assistant. Be concise. Always update your working memory.')
+			.memory(memory);
+
+		const threadId = `custom-wm-${Date.now()}`;
+		const resourceId = 'user-wm-1';
+		const options = { persistence: { threadId, resourceId } };
+
+		await agent.generate('My name is Kenji and my favorite color is teal.', options);
+
+		// Working memory should have been persisted keyed by resourceId
+		const wm = store.workingMemory.get(resourceId);
+		expect(wm).toBeDefined();
+		expect(wm!.toLowerCase()).toContain('kenji');
+
+		// New thread, same resourceId — resource-scoped working memory carries over
+		const thread2 = `custom-wm2-${Date.now()}`;
+		const result = await agent.generate('What is my name?', {
+			persistence: { threadId: thread2, resourceId },
+		});
+		expect(findLastTextContent(result.messages)?.toLowerCase()).toContain('kenji');
+	});
+
+	it('persists and retrieves thread-scoped working memory via custom backend', async () => {
+		const memory = new Memory()
+			.storage(store)
+			.lastMessages(10)
+			.scope('thread')
+			.freeform('# Conversation Notes\n- **Topic**:\n- **Key facts**:');
+
+		const agent = new Agent('custom-mem-thread-wm')
+			.model(getModel('anthropic'))
+			.instructions('You are a helpful assistant. Be concise. Always update your working memory.')
+			.memory(memory);
+
+		const threadId = `custom-twm-${Date.now()}`;
+		const resourceId = 'user-twm-1';
+
+		await agent.generate('The project codename is AURORA. Just acknowledge.', {
+			persistence: { threadId, resourceId },
+		});
+
+		// Working memory should be stored keyed by threadId
+		const wmByThread = store.workingMemory.get(threadId);
+		expect(wmByThread).toBeDefined();
+		expect(wmByThread!.toLowerCase()).toContain('aurora');
+
+		// Different thread for same resource — should NOT see the previous working memory
+		const thread2 = `custom-twm2-${Date.now()}`;
+		const result = await agent.generate(
+			'What is the project codename? Answer "unknown" if you have no information.',
+			{ persistence: { threadId: thread2, resourceId } },
+		);
+		expect(findLastTextContent(result.messages)?.toLowerCase()).not.toContain('aurora');
+
+		// Thread 2 working memory should be independent
+		expect(store.workingMemory.get(thread2)).not.toContain('aurora');
+	});
+
+	it('thread-scoped working memory allows recall within the same thread when history is truncated', async () => {
+		// Use lastMessages: 1 so earlier turns are pushed out of the history window.
+		// The agent must rely on working memory — not chat history — to recall old facts.
+		const memory = new Memory()
+			.storage(store)
+			.lastMessages(1)
+			.scope('thread')
+			.freeform('# Key facts\n- **Secret word**:\n- **User name**:');
+
+		const agent = new Agent('custom-mem-thread-wm-recall')
+			.model(getModel('anthropic'))
+			.instructions(
+				'You are a helpful assistant. Be concise. ' +
+					'Always update your working memory with any important facts you learn.',
+			)
+			.memory(memory);
+
+		const threadId = `custom-twm-recall-${Date.now()}`;
+		const options = { persistence: { threadId, resourceId: 'user-twm-recall' } };
+
+		// Turn 1: share a fact — agent writes it into working memory
+		await agent.generate('The secret word is COBALT. Remember it. Just acknowledge.', options);
+
+		// Turn 2: filler turn — this pushes turn 1 out of the 1-message history window
+		await agent.generate('Just say "ok".', options);
+
+		// Turn 3: ask for the fact — only working memory can supply it now (turn 1 is truncated)
+		const result = await agent.generate('What was the secret word I told you earlier?', options);
+		expect(findLastTextContent(result.messages)?.toLowerCase()).toContain('cobalt');
+	});
+
+	it('works with stream() path', async () => {
+		const memory = new Memory().storage(store).lastMessages(10);
+
+		const agent = new Agent('custom-mem-stream')
+			.model(getModel('anthropic'))
+			.instructions('You are a helpful assistant. Be concise.')
+			.memory(memory);
+
+		const threadId = `custom-stream-${Date.now()}`;
+		const options = { persistence: { threadId, resourceId: 'user-stream' } };
+
+		// Turn 1 via stream
+		const result1 = await agent.stream('The capital of France is Paris. Acknowledge.', options);
+		const reader = result1.stream.getReader();
+		while (true) {
+			const { done } = await reader.read();
+			if (done) break;
+		}
+
+		// Turn 2 via generate — should recall from custom store
+		const result = await agent.generate('What is the capital of France?', options);
+		expect(findLastTextContent(result.messages)?.toLowerCase()).toContain('paris');
+
+		expect(store.messages.get(threadId)!.length).toBeGreaterThanOrEqual(2);
+	});
+
+	it('works when passed directly to agent.memory() as bare BuiltMemory', async () => {
+		// Skip the Memory builder entirely — pass the raw BuiltMemory object
+		const agent = new Agent('custom-mem-bare')
+			.model(getModel('anthropic'))
+			.instructions('You are a helpful assistant. Be concise.')
+			.memory(store);
+
+		const threadId = `custom-bare-${Date.now()}`;
+		const options = { persistence: { threadId, resourceId: 'user-bare' } };
+
+		await agent.generate('Remember: the answer is 42. Acknowledge.', options);
+
+		const result = await agent.generate('What is the answer?', options);
+		expect(findLastTextContent(result.messages)?.toLowerCase()).toContain('42');
+	});
+});
--- a/packages/@n8n/agents/src/tests/integration/memory/memory-freeform.test.ts
+++ b/packages/@n8n/agents/src/tests/integration/memory/memory-freeform.test.ts
@ -0,0 +1,108 @@
+import { expect, it, afterEach } from 'vitest';
+
+import { Agent, Memory } from '../../../index';
+import { SqliteMemory } from '../../../storage/sqlite-memory';
+import { describeIf, findLastTextContent, getModel, createSqliteMemory } from '../helpers';
+
+const describe = describeIf('anthropic');
+
+const cleanups: Array<() => void> = [];
+afterEach(() => {
+	cleanups.forEach((fn) => fn());
+	cleanups.length = 0;
+});
+
+describe('freeform working memory', () => {
+	const template = '# User Context\n- **Name**:\n- **City**:\n- **Pet**:';
+
+	it('agent recalls info via working memory across turns', async () => {
+		const memory = new Memory().storage('memory').lastMessages(10).freeform(template);
+
+		const agent = new Agent('freeform-test')
+			.model(getModel('anthropic'))
+			.instructions('You are a helpful assistant. Be concise.')
+			.memory(memory);
+
+		const threadId = `freeform-${Date.now()}`;
+		const options = { persistence: { threadId, resourceId: 'test-user' } };
+
+		await agent.generate('My name is Alice and I live in Berlin.', options);
+		const result = await agent.generate('What city do I live in?', options);
+
+		expect(findLastTextContent(result.messages)?.toLowerCase()).toContain('berlin');
+	});
+
+	it('working memory tags are stripped from visible response', async () => {
+		const memory = new Memory().storage('memory').lastMessages(10).freeform(template);
+
+		const agent = new Agent('strip-test')
+			.model(getModel('anthropic'))
+			.instructions('You are a helpful assistant. Be concise.')
+			.memory(memory);
+
+		const threadId = `strip-${Date.now()}`;
+		const options = { persistence: { threadId, resourceId: 'test-user' } };
+
+		const result = await agent.generate('My name is Bob.', options);
+
+		const allText = result.messages
+			.flatMap((m) => ('content' in m ? m.content : []))
+			.filter((c) => c.type === 'text')
+			.map((c) => (c as { text: string }).text)
+			.join(' ');
+		expect(allText).not.toContain('<working_memory>');
+		expect(allText).not.toContain('</working_memory>');
+	});
+
+	it('working memory persists across threads with same resourceId', async () => {
+		const { memory, cleanup } = createSqliteMemory();
+		cleanups.push(cleanup);
+
+		const mem = new Memory().storage(memory).lastMessages(10).freeform(template);
+		const agent = new Agent('cross-thread-test')
+			.model(getModel('anthropic'))
+			.instructions('You are a helpful assistant. Be concise.')
+			.memory(mem);
+
+		const resourceId = `user-${Date.now()}`;
+
+		await agent.generate('My name is Charlie and I have a dog named Rex.', {
+			persistence: { threadId: `thread-1-${Date.now()}`, resourceId },
+		});
+
+		const result = await agent.generate("What's my dog's name?", {
+			persistence: { threadId: `thread-2-${Date.now()}`, resourceId },
+		});
+
+		expect(findLastTextContent(result.messages)?.toLowerCase()).toContain('rex');
+	});
+
+	it('working memory survives SqliteMemory restart', async () => {
+		const { memory, cleanup, url } = createSqliteMemory();
+		cleanups.push(cleanup);
+
+		const mem = new Memory().storage(memory).lastMessages(10).freeform(template);
+		const agent1 = new Agent('restart-wm-1')
+			.model(getModel('anthropic'))
+			.instructions('You are a helpful assistant. Be concise.')
+			.memory(mem);
+
+		const resourceId = `user-${Date.now()}`;
+		const threadId = `restart-wm-${Date.now()}`;
+
+		await agent1.generate('My name is Diana.', { persistence: { threadId, resourceId } });
+
+		const memory2 = new SqliteMemory({ url });
+		const mem2 = new Memory().storage(memory2).lastMessages(10).freeform(template);
+		const agent2 = new Agent('restart-wm-2')
+			.model(getModel('anthropic'))
+			.instructions('You are a helpful assistant. Be concise.')
+			.memory(mem2);
+
+		const result = await agent2.generate('What is my name?', {
+			persistence: { threadId: `new-thread-${Date.now()}`, resourceId },
+		});
+
+		expect(findLastTextContent(result.messages)?.toLowerCase()).toContain('diana');
+	});
+});
--- a/packages/@n8n/agents/src/tests/integration/memory/memory-postgres.test.ts
+++ b/packages/@n8n/agents/src/tests/integration/memory/memory-postgres.test.ts
@ -0,0 +1,627 @@
+/**
+ * Integration test: PostgresMemory with pgvector semantic recall.
+ *
+ * Uses testcontainers to spin up a real Postgres instance with pgvector,
+ * then runs the agent against it to verify full end-to-end memory behavior.
+ */
+import { execSync } from 'node:child_process';
+import { Pool } from 'pg';
+import { GenericContainer, Wait, type StartedTestContainer } from 'testcontainers';
+import { afterAll, beforeAll, describe, expect, it } from 'vitest';
+
+import { Agent, Memory, PostgresMemory } from '../../../index';
+import { describeIf, findLastTextContent, getModel } from '../helpers';
+
+const describeWithApi = describeIf('anthropic');
+
+/**
+ * Check if Docker is available synchronously. testcontainers requires a running
+ * Docker daemon; skip the entire file in environments without it.
+ */
+function isDockerAvailable(): boolean {
+	try {
+		execSync('docker info', { stdio: 'ignore' });
+		return true;
+	} catch {
+		return false;
+	}
+}
+
+const hasDocker = isDockerAvailable();
+
+let container: StartedTestContainer;
+let connectionString: string;
+
+beforeAll(async () => {
+	if (!hasDocker) return;
+
+	container = await new GenericContainer('pgvector/pgvector:pg17')
+		.withExposedPorts(5432)
+		.withEnvironment({
+			POSTGRES_USER: 'test',
+			POSTGRES_PASSWORD: 'test',
+			POSTGRES_DB: 'testdb',
+		})
+		// Postgres emits this message twice: once during initdb (temporary) and once when truly ready.
+		// Waiting for the second occurrence ensures we don't connect during the brief restart window.
+		.withWaitStrategy(Wait.forLogMessage('database system is ready to accept connections', 2))
+		.start();
+
+	const host = container.getHost();
+	const port = container.getMappedPort(5432);
+	connectionString = `postgresql://test:test@${host}:${port}/testdb`;
+}, 60_000);
+
+afterAll(async () => {
+	try {
+		if (container) await container.stop();
+	} catch (error) {
+		console.error('Error stopping container:', error);
+	}
+}, 30_000);
+
+/** describe that requires Docker — tests are no-ops without it. */
+function describeWithDocker(name: string, fn: () => void) {
+	describe(name, () => {
+		if (!hasDocker) {
+			it('skipped — Docker not available', () => {});
+			return;
+		}
+		fn();
+	});
+}
+
+describeWithDocker('PostgresMemory saveThread upsert', () => {
+	it('preserves existing title and metadata when not provided', async () => {
+		const mem = new PostgresMemory({ connection: connectionString, namespace: 'upsert_test' });
+
+		await mem.saveThread({
+			id: 'upsert-t1',
+			resourceId: 'user-1',
+			title: 'Original Title',
+			metadata: { key: 'value' },
+		});
+
+		// Upsert without title or metadata (simulates saveMessagesToThread)
+		await mem.saveThread({ id: 'upsert-t1', resourceId: 'user-1' });
+
+		const thread = await mem.getThread('upsert-t1');
+		expect(thread).not.toBeNull();
+		expect(thread!.title).toBe('Original Title');
+		expect(thread!.metadata).toEqual({ key: 'value' });
+
+		await mem.close();
+	});
+
+	it('overwrites title and metadata when explicitly provided', async () => {
+		const mem = new PostgresMemory({ connection: connectionString, namespace: 'upsert_ow' });
+
+		await mem.saveThread({
+			id: 'upsert-t2',
+			resourceId: 'user-1',
+			title: 'Old Title',
+			metadata: { old: true },
+		});
+
+		await mem.saveThread({
+			id: 'upsert-t2',
+			resourceId: 'user-1',
+			title: 'New Title',
+			metadata: { new: true },
+		});
+
+		const thread = await mem.getThread('upsert-t2');
+		expect(thread!.title).toBe('New Title');
+		expect(thread!.metadata).toEqual({ new: true });
+
+		await mem.close();
+	});
+});
+
+describeWithDocker('PostgresMemory unit tests', () => {
+	it('creates tables on first use and round-trips a thread', async () => {
+		const mem = new PostgresMemory({ connection: connectionString });
+
+		const thread = await mem.saveThread({
+			id: 'thread-1',
+			resourceId: 'user-1',
+			title: 'Test Thread',
+		});
+
+		expect(thread.id).toBe('thread-1');
+		expect(thread.createdAt).toBeInstanceOf(Date);
+
+		const loaded = await mem.getThread('thread-1');
+		expect(loaded).not.toBeNull();
+		expect(loaded!.title).toBe('Test Thread');
+		expect(loaded!.resourceId).toBe('user-1');
+
+		await mem.close();
+	});
+
+	it('saves and retrieves messages with limit', async () => {
+		const mem = new PostgresMemory({ connection: connectionString, namespace: 'msg_test' });
+
+		await mem.saveThread({ id: 't1', resourceId: 'u1' });
+
+		const messages = [
+			{ role: 'user' as const, content: [{ type: 'text' as const, text: 'Hello' }] },
+			{ role: 'assistant' as const, content: [{ type: 'text' as const, text: 'Hi there' }] },
+			{ role: 'user' as const, content: [{ type: 'text' as const, text: 'How are you?' }] },
+		];
+
+		await mem.saveMessages({ threadId: 't1', messages });
+
+		// Get last 2 messages
+		const last2 = await mem.getMessages('t1', { limit: 2 });
+		expect(last2).toHaveLength(2);
+
+		// Get all messages
+		const all = await mem.getMessages('t1');
+		expect(all).toHaveLength(3);
+
+		await mem.close();
+	});
+
+	it('saves and retrieves working memory keyed by resourceId', async () => {
+		const mem = new PostgresMemory({ connection: connectionString, namespace: 'wm_test' });
+
+		expect(
+			await mem.getWorkingMemory({ threadId: 'thread-1', resourceId: 'user-1', scope: 'resource' }),
+		).toBeNull();
+
+		await mem.saveWorkingMemory(
+			{ threadId: 'thread-1', resourceId: 'user-1', scope: 'resource' },
+			'# Profile\n- Name: Alice',
+		);
+		expect(
+			await mem.getWorkingMemory({ threadId: 'thread-1', resourceId: 'user-1', scope: 'resource' }),
+		).toBe('# Profile\n- Name: Alice');
+
+		// Overwrite
+		await mem.saveWorkingMemory(
+			{ threadId: 'thread-1', resourceId: 'user-1', scope: 'resource' },
+			'# Profile\n- Name: Alice\n- Role: Engineer',
+		);
+		expect(
+			await mem.getWorkingMemory({ threadId: 'thread-1', resourceId: 'user-1', scope: 'resource' }),
+		).toContain('Engineer');
+
+		await mem.close();
+	});
+
+	it('saves and retrieves working memory keyed by threadId (no resourceId)', async () => {
+		const mem = new PostgresMemory({ connection: connectionString, namespace: 'wm_thread_test' });
+
+		expect(
+			await mem.getWorkingMemory({ threadId: 'thread-1', resourceId: 'user-1', scope: 'thread' }),
+		).toBeNull();
+
+		await mem.saveWorkingMemory(
+			{ threadId: 'thread-1', resourceId: 'user-1', scope: 'thread' },
+			'thread context',
+		);
+		expect(
+			await mem.getWorkingMemory({ threadId: 'thread-1', resourceId: 'user-1', scope: 'thread' }),
+		).toBe('thread context');
+
+		await mem.close();
+	});
+
+	it('isolates working memory by resourceId', async () => {
+		const mem = new PostgresMemory({ connection: connectionString, namespace: 'wm_iso_test' });
+
+		await mem.saveWorkingMemory(
+			{ threadId: 'thread-a', resourceId: 'user-a', scope: 'resource' },
+			'data for user-a',
+		);
+		await mem.saveWorkingMemory(
+			{ threadId: 'thread-b', resourceId: 'user-b', scope: 'resource' },
+			'data for user-b',
+		);
+
+		expect(
+			await mem.getWorkingMemory({ threadId: 'thread-a', resourceId: 'user-a', scope: 'resource' }),
+		).toBe('data for user-a');
+		expect(
+			await mem.getWorkingMemory({ threadId: 'thread-b', resourceId: 'user-b', scope: 'resource' }),
+		).toBe('data for user-b');
+
+		await mem.close();
+	});
+
+	it('stores scope=resource when resourceId is provided', async () => {
+		const mem = new PostgresMemory({ connection: connectionString, namespace: 'wm_scope_test' });
+
+		await mem.saveWorkingMemory(
+			{ threadId: 'thread-1', resourceId: 'res-1', scope: 'resource' },
+			'resource content',
+		);
+
+		const pool = new Pool({ connectionString });
+		const result = await pool.query<{ scope: string }>(
+			'SELECT scope FROM wm_scope_test_working_memory WHERE key = $1',
+			['res-1'],
+		);
+		expect(result.rows[0].scope).toBe('resource');
+		await pool.end();
+
+		await mem.close();
+	});
+
+	it('stores scope=thread when only threadId is provided', async () => {
+		const mem = new PostgresMemory({
+			connection: connectionString,
+			namespace: 'wm_scope_thread_test',
+		});
+
+		await mem.saveWorkingMemory(
+			{ threadId: 'thread-1', resourceId: 'user-1', scope: 'thread' },
+			'thread content',
+		);
+
+		const pool = new Pool({ connectionString });
+		const result = await pool.query<{ scope: string }>(
+			'SELECT scope FROM wm_scope_thread_test_working_memory WHERE key = $1',
+			['thread-1'],
+		);
+		expect(result.rows[0].scope).toBe('thread');
+		await pool.end();
+
+		await mem.close();
+	});
+
+	it('does not mix resource-scoped and thread-scoped entries with the same key value', async () => {
+		const mem = new PostgresMemory({
+			connection: connectionString,
+			namespace: 'wm_scope_iso_test',
+		});
+		const sharedKey = 'same-id';
+
+		await mem.saveWorkingMemory(
+			{ threadId: 'thread-1', resourceId: sharedKey, scope: 'resource' },
+			'resource data',
+		);
+		await mem.saveWorkingMemory(
+			{ threadId: sharedKey, resourceId: sharedKey, scope: 'thread' },
+			'thread data',
+		);
+
+		expect(
+			await mem.getWorkingMemory({
+				threadId: 'thread-1',
+				resourceId: sharedKey,
+				scope: 'resource',
+			}),
+		).toBe('resource data');
+		expect(
+			await mem.getWorkingMemory({ threadId: sharedKey, resourceId: sharedKey, scope: 'thread' }),
+		).toBe('thread data');
+
+		await mem.close();
+	});
+
+	it('deletes thread and cascades to messages', async () => {
+		const mem = new PostgresMemory({ connection: connectionString, namespace: 'del_test' });
+
+		await mem.saveThread({ id: 'del-t1', resourceId: 'u1' });
+		await mem.saveMessages({
+			threadId: 'del-t1',
+			messages: [{ role: 'user' as const, content: [{ type: 'text' as const, text: 'test' }] }],
+		});
+
+		await mem.deleteThread('del-t1');
+
+		expect(await mem.getThread('del-t1')).toBeNull();
+		expect(await mem.getMessages('del-t1')).toHaveLength(0);
+
+		await mem.close();
+	});
+
+	it('stores and queries embeddings with pgvector', async () => {
+		const mem = new PostgresMemory({ connection: connectionString, namespace: 'vec_test' });
+
+		await mem.saveThread({ id: 'vec-t1', resourceId: 'u1' });
+
+		// Save some embeddings (3-dimensional for simplicity)
+		await mem.saveEmbeddings({
+			threadId: 'vec-t1',
+			resourceId: 'u1',
+			entries: [
+				{ id: 'msg-1', vector: [1.0, 0.0, 0.0], text: 'About cats', model: 'test' },
+				{ id: 'msg-2', vector: [0.0, 1.0, 0.0], text: 'About dogs', model: 'test' },
+				{ id: 'msg-3', vector: [0.9, 0.1, 0.0], text: 'About kittens', model: 'test' },
+			],
+		});
+
+		// Query for vectors close to [1, 0, 0] — should return msg-1 and msg-3 first
+		const results = await mem.queryEmbeddings({
+			scope: 'resource',
+			resourceId: 'u1',
+			vector: [1.0, 0.0, 0.0],
+			topK: 2,
+		});
+
+		expect(results).toHaveLength(2);
+		expect(results[0].id).toBe('msg-1');
+		expect(results[0].score).toBeGreaterThan(0.9);
+		// msg-3 should be second (cosine similarity ~0.99 with [0.9, 0.1, 0])
+		expect(results[1].id).toBe('msg-3');
+
+		await mem.close();
+	});
+
+	it('filters embeddings by resourceId with scope=resource (default)', async () => {
+		const mem = new PostgresMemory({ connection: connectionString, namespace: 'vec_res' });
+
+		await mem.saveEmbeddings({
+			threadId: 't1',
+			resourceId: 'user-a',
+			entries: [{ id: 'msg-a1', vector: [1.0, 0.0, 0.0], text: 'User A thread 1', model: 'test' }],
+		});
+		await mem.saveEmbeddings({
+			threadId: 't2',
+			resourceId: 'user-a',
+			entries: [{ id: 'msg-a2', vector: [0.9, 0.1, 0.0], text: 'User A thread 2', model: 'test' }],
+		});
+		await mem.saveEmbeddings({
+			threadId: 't3',
+			resourceId: 'user-b',
+			entries: [{ id: 'msg-b1', vector: [1.0, 0.0, 0.0], text: 'User B thread 3', model: 'test' }],
+		});
+
+		// Default scope is 'resource' — should return both user-a embeddings across threads
+		const results = await mem.queryEmbeddings({
+			resourceId: 'user-a',
+			vector: [1.0, 0.0, 0.0],
+			topK: 10,
+		});
+
+		expect(results).toHaveLength(2);
+		const ids = results.map((r) => r.id);
+		expect(ids).toContain('msg-a1');
+		expect(ids).toContain('msg-a2');
+		expect(ids).not.toContain('msg-b1');
+
+		await mem.close();
+	});
+
+	it('filters embeddings by threadId with scope=thread', async () => {
+		const mem = new PostgresMemory({ connection: connectionString, namespace: 'vec_thr' });
+
+		await mem.saveEmbeddings({
+			threadId: 't1',
+			resourceId: 'user-1',
+			entries: [
+				{ id: 'msg-t1a', vector: [1.0, 0.0, 0.0], text: 'Thread 1 A', model: 'test' },
+				{ id: 'msg-t1b', vector: [0.0, 1.0, 0.0], text: 'Thread 1 B', model: 'test' },
+			],
+		});
+		await mem.saveEmbeddings({
+			threadId: 't2',
+			resourceId: 'user-1',
+			entries: [{ id: 'msg-t2', vector: [1.0, 0.0, 0.0], text: 'Thread 2', model: 'test' }],
+		});
+
+		const results = await mem.queryEmbeddings({
+			scope: 'thread',
+			threadId: 't1',
+			vector: [1.0, 0.0, 0.0],
+			topK: 10,
+		});
+
+		expect(results).toHaveLength(2);
+		const ids = results.map((r) => r.id);
+		expect(ids).toContain('msg-t1a');
+		expect(ids).toContain('msg-t1b');
+		expect(ids).not.toContain('msg-t2');
+
+		await mem.close();
+	});
+
+	it('resource scope excludes embeddings from other resources', async () => {
+		const mem = new PostgresMemory({ connection: connectionString, namespace: 'vec_iso' });
+
+		await mem.saveEmbeddings({
+			threadId: 't1',
+			resourceId: 'res-1',
+			entries: [{ id: 'msg-r1', vector: [1.0, 0.0, 0.0], text: 'Resource 1', model: 'test' }],
+		});
+		await mem.saveEmbeddings({
+			threadId: 't2',
+			resourceId: 'res-2',
+			entries: [{ id: 'msg-r2', vector: [1.0, 0.0, 0.0], text: 'Resource 2', model: 'test' }],
+		});
+
+		const results = await mem.queryEmbeddings({
+			scope: 'resource',
+			resourceId: 'res-1',
+			vector: [1.0, 0.0, 0.0],
+			topK: 10,
+		});
+
+		expect(results).toHaveLength(1);
+		expect(results[0].id).toBe('msg-r1');
+
+		await mem.close();
+	});
+
+	it('stores resourceId in the embeddings table', async () => {
+		const mem = new PostgresMemory({ connection: connectionString, namespace: 'vec_col' });
+
+		await mem.saveEmbeddings({
+			threadId: 't1',
+			resourceId: 'my-resource',
+			entries: [
+				{ id: 'msg-check', vector: [1.0, 0.0, 0.0], text: 'Check resourceId', model: 'test' },
+			],
+		});
+
+		const pool = new Pool({ connectionString });
+		const result = await pool.query<{ resourceId: string }>(
+			'SELECT "resourceId" FROM vec_col_message_embeddings WHERE id = $1',
+			['msg-check'],
+		);
+		expect(result.rows[0].resourceId).toBe('my-resource');
+		await pool.end();
+
+		await mem.close();
+	});
+
+	it('isolates namespaces', async () => {
+		const mem1 = new PostgresMemory({ connection: connectionString, namespace: 'ns_a' });
+		const mem2 = new PostgresMemory({ connection: connectionString, namespace: 'ns_b' });
+
+		await mem1.saveThread({ id: 'shared-id', resourceId: 'u1', title: 'From A' });
+		await mem2.saveThread({ id: 'shared-id', resourceId: 'u1', title: 'From B' });
+
+		expect((await mem1.getThread('shared-id'))!.title).toBe('From A');
+		expect((await mem2.getThread('shared-id'))!.title).toBe('From B');
+
+		await mem1.close();
+		await mem2.close();
+	});
+});
+
+/** describe that requires both Docker and an Anthropic API key. */
+function describeWithDockerAndApi(name: string, fn: () => void) {
+	const describeOrSkip = describeWithApi;
+	describeOrSkip(name, () => {
+		if (!hasDocker) {
+			it('skipped — Docker not available', () => {});
+			return;
+		}
+		fn();
+	});
+}
+
+describeWithDockerAndApi('PostgresMemory agent integration', () => {
+	it('recalls previous messages across turns', async () => {
+		const store = new PostgresMemory({ connection: connectionString, namespace: 'agent_recall' });
+		const memory = new Memory().storage(store).lastMessages(10);
+
+		const agent = new Agent('pg-recall-test')
+			.model(getModel('anthropic'))
+			.instructions('You are a helpful assistant. Be concise.')
+			.memory(memory);
+
+		const threadId = `pg-thread-${Date.now()}`;
+		const options = { persistence: { threadId, resourceId: 'pg-user-1' } };
+
+		await agent.generate('My favorite planet is Saturn. Just acknowledge.', options);
+		const result = await agent.generate('What is my favorite planet?', options);
+
+		expect(findLastTextContent(result.messages)?.toLowerCase()).toContain('saturn');
+
+		await store.close();
+	});
+
+	it('persists resource-scoped working memory via Postgres backend', async () => {
+		const store = new PostgresMemory({ connection: connectionString, namespace: 'agent_wm' });
+		const memory = new Memory()
+			.storage(store)
+			.lastMessages(10)
+			.scope('resource')
+			.freeform('# User Profile\n- **Name**:\n- **Hobby**:');
+
+		const agent = new Agent('pg-wm-test')
+			.model(getModel('anthropic'))
+			.instructions('You are a helpful assistant. Be concise. Always update your working memory.')
+			.memory(memory);
+
+		const threadId = `pg-wm-${Date.now()}`;
+		const resourceId = 'pg-wm-user';
+
+		await agent.generate('My name is Hiro and I enjoy cycling.', {
+			persistence: { threadId, resourceId },
+		});
+
+		// Working memory should be persisted in Postgres (keyed by resourceId)
+		const wm = await store.getWorkingMemory({ threadId, resourceId, scope: 'resource' });
+		expect(wm).toBeDefined();
+		expect(wm!.toLowerCase()).toContain('hiro');
+
+		// New thread, same resourceId — resource-scoped working memory carries over
+		const result = await agent.generate('What is my name?', {
+			persistence: { threadId: `pg-wm2-${Date.now()}`, resourceId },
+		});
+		expect(findLastTextContent(result.messages)?.toLowerCase()).toContain('hiro');
+
+		await store.close();
+	});
+
+	it('persists thread-scoped working memory via Postgres backend', async () => {
+		const store = new PostgresMemory({
+			connection: connectionString,
+			namespace: 'agent_thread_wm',
+		});
+		const memory = new Memory()
+			.storage(store)
+			.lastMessages(10)
+			.scope('thread')
+			.freeform('# Conversation Notes\n- **Topic**:\n- **Key facts**:');
+
+		const agent = new Agent('pg-thread-wm-test')
+			.model(getModel('anthropic'))
+			.instructions('You are a helpful assistant. Be concise. Always update your working memory.')
+			.memory(memory);
+
+		const threadId = `pg-twm-${Date.now()}`;
+		const resourceId = 'pg-twm-user';
+
+		await agent.generate('The secret project name is HELIOS. Just acknowledge.', {
+			persistence: { threadId, resourceId },
+		});
+
+		// Working memory should be stored keyed by threadId
+		const wmByThread = await store.getWorkingMemory({ threadId, resourceId, scope: 'thread' });
+		expect(wmByThread).toBeDefined();
+		expect(wmByThread!.toLowerCase()).toContain('helios');
+
+		// resourceId key should be empty — nothing stored there
+		const wmByResource = await store.getWorkingMemory({ threadId, resourceId, scope: 'resource' });
+		expect(wmByResource).toBeNull();
+
+		// New thread for same resource — should NOT carry over thread-scoped working memory
+		const thread2 = `pg-twm2-${Date.now()}`;
+		const result = await agent.generate(
+			'What is the project name? Answer "unknown" if you have no information.',
+			{ persistence: { threadId: thread2, resourceId } },
+		);
+		expect(findLastTextContent(result.messages)?.toLowerCase()).not.toContain('helios');
+
+		await store.close();
+	});
+
+	it('works with stream() path', async () => {
+		const store = new PostgresMemory({ connection: connectionString, namespace: 'agent_stream' });
+		const memory = new Memory().storage(store).lastMessages(10);
+
+		const agent = new Agent('pg-stream-test')
+			.model(getModel('anthropic'))
+			.instructions('You are a helpful assistant. Be concise.')
+			.memory(memory);
+
+		const threadId = `pg-stream-${Date.now()}`;
+		const options = { persistence: { threadId, resourceId: 'pg-stream-user' } };
+
+		// Turn 1 via stream
+		const { stream } = await agent.stream(
+			'The speed of light is approximately 300,000 km/s. Acknowledge.',
+			options,
+		);
+		const reader = stream.getReader();
+		while (true) {
+			const { done } = await reader.read();
+			if (done) break;
+		}
+
+		// Turn 2 via generate — should recall
+		const genResult = await agent.generate('What is the speed of light approximately?', options);
+		const text = findLastTextContent(genResult.messages);
+		expect(text).toBeTruthy();
+		expect(text!.toLowerCase()).toContain('300');
+
+		await store.close();
+	});
+});
--- a/packages/@n8n/agents/src/tests/integration/memory/memory-semantic.test.ts
+++ b/packages/@n8n/agents/src/tests/integration/memory/memory-semantic.test.ts
@ -0,0 +1,94 @@
+import { expect, it, afterEach, describe as _describe } from 'vitest';
+
+import { Agent, Memory } from '../../../index';
+import { findLastTextContent, getModel, createSqliteMemory } from '../helpers';
+
+// Only run when both API keys are present
+const describe =
+	process.env.ANTHROPIC_API_KEY && process.env.OPENAI_API_KEY ? _describe : _describe.skip;
+
+const cleanups: Array<() => void> = [];
+afterEach(() => {
+	cleanups.forEach((fn) => fn());
+	cleanups.length = 0;
+});
+
+describe('semantic recall', () => {
+	it('recalls relevant info beyond the lastMessages window', async () => {
+		const { memory, cleanup } = createSqliteMemory();
+		cleanups.push(cleanup);
+
+		const mem = new Memory()
+			.storage(memory)
+			.lastMessages(3)
+			.semanticRecall({ topK: 3, embedder: 'openai/text-embedding-3-small' });
+
+		const agent = new Agent('semantic-test')
+			.model(getModel('anthropic'))
+			.instructions('You are a helpful assistant. Be concise. Answer from your context.')
+			.memory(mem);
+
+		const threadId = `semantic-${Date.now()}`;
+		const resourceId = 'test-user';
+		const options = { persistence: { threadId, resourceId } };
+
+		// Turn 1: unique fact that will be pushed out of the 3-message window
+		await agent.generate(
+			'The annual rainfall in Timbuktu is approximately 200mm. Just acknowledge.',
+			options,
+		);
+
+		// Filler turns to push turn 1 out of the lastMessages window
+		await agent.generate('What is 2 + 2?', options);
+		await agent.generate('Tell me a one-word synonym for happy.', options);
+		await agent.generate('What color is the sky?', options);
+
+		// Ask about the fact from turn 1 — should be recalled via semantic search
+		const result = await agent.generate('What is the annual rainfall in Timbuktu?', options);
+
+		expect(findLastTextContent(result.messages)?.toLowerCase()).toContain('200');
+	});
+
+	it('works combined with freeform working memory', async () => {
+		const { memory, cleanup } = createSqliteMemory();
+		cleanups.push(cleanup);
+
+		const template = '# User Context\n- **Name**:\n- **Interest**:';
+
+		const mem = new Memory()
+			.storage(memory)
+			.lastMessages(3)
+			.freeform(template)
+			.semanticRecall({ topK: 3, embedder: 'openai/text-embedding-3-small' });
+
+		const agent = new Agent('combined-test')
+			.model(getModel('anthropic'))
+			.instructions('You are a helpful assistant. Be concise.')
+			.memory(mem);
+
+		const threadId = `combined-${Date.now()}`;
+		const resourceId = `user-${Date.now()}`;
+		const options = { persistence: { threadId, resourceId } };
+
+		// Turn 1: name (working memory) + unique fact (semantic recall)
+		await agent.generate(
+			'My name is Frank. Also, the capital of Bhutan is Thimphu. Just acknowledge both.',
+			options,
+		);
+
+		// Filler turns
+		await agent.generate('What is 3 + 3?', options);
+		await agent.generate('Name a primary color.', options);
+		await agent.generate('What day comes after Monday?', options);
+
+		// Ask about both — name from working memory, fact from semantic recall
+		const result = await agent.generate(
+			'What is my name, and what is the capital of Bhutan?',
+			options,
+		);
+
+		const text = findLastTextContent(result.messages)?.toLowerCase() ?? '';
+		expect(text).toContain('frank');
+		expect(text).toContain('thimphu');
+	});
+});
--- a/packages/@n8n/agents/src/tests/integration/memory/memory-sqlite.test.ts
+++ b/packages/@n8n/agents/src/tests/integration/memory/memory-sqlite.test.ts
@ -0,0 +1,105 @@
+import { describe as _describe, expect, it, afterEach } from 'vitest';
+
+import { Agent, Memory } from '../../../index';
+import { SqliteMemory } from '../../../storage/sqlite-memory';
+import { describeIf, findLastTextContent, getModel, createSqliteMemory } from '../helpers';
+
+const describe = describeIf('anthropic');
+
+const cleanups: Array<() => void> = [];
+afterEach(() => {
+	cleanups.forEach((fn) => fn());
+	cleanups.length = 0;
+});
+
+_describe('SqliteMemory saveThread upsert', () => {
+	it('preserves existing title and metadata when not provided', async () => {
+		const { memory, cleanup } = createSqliteMemory();
+		cleanups.push(cleanup);
+
+		await memory.saveThread({
+			id: 'upsert-t1',
+			resourceId: 'user-1',
+			title: 'Original Title',
+			metadata: { key: 'value' },
+		});
+
+		// Upsert without title or metadata (simulates saveMessagesToThread)
+		await memory.saveThread({ id: 'upsert-t1', resourceId: 'user-1' });
+
+		const thread = await memory.getThread('upsert-t1');
+		expect(thread).not.toBeNull();
+		expect(thread!.title).toBe('Original Title');
+		expect(thread!.metadata).toEqual({ key: 'value' });
+	});
+
+	it('overwrites title and metadata when explicitly provided', async () => {
+		const { memory, cleanup } = createSqliteMemory();
+		cleanups.push(cleanup);
+
+		await memory.saveThread({
+			id: 'upsert-t2',
+			resourceId: 'user-1',
+			title: 'Old Title',
+			metadata: { old: true },
+		});
+
+		await memory.saveThread({
+			id: 'upsert-t2',
+			resourceId: 'user-1',
+			title: 'New Title',
+			metadata: { new: true },
+		});
+
+		const thread = await memory.getThread('upsert-t2');
+		expect(thread!.title).toBe('New Title');
+		expect(thread!.metadata).toEqual({ new: true });
+	});
+});
+
+describe('SQLite memory integration', () => {
+	it('agent recalls info from previous turn with SqliteMemory', async () => {
+		const { memory, cleanup } = createSqliteMemory();
+		cleanups.push(cleanup);
+
+		const mem = new Memory().storage(memory).lastMessages(10);
+		const agent = new Agent('sqlite-test')
+			.model(getModel('anthropic'))
+			.instructions('You are a helpful assistant. Be concise.')
+			.memory(mem);
+
+		const threadId = `sqlite-${Date.now()}`;
+		const options = { persistence: { threadId, resourceId: 'test-user' } };
+
+		await agent.generate('My favorite number is 42. Just acknowledge.', options);
+		const result = await agent.generate('What is my favorite number?', options);
+
+		expect(findLastTextContent(result.messages)?.toLowerCase()).toContain('42');
+	});
+
+	it('data survives a fresh SqliteMemory instance', async () => {
+		const { memory, cleanup, url } = createSqliteMemory();
+		cleanups.push(cleanup);
+
+		const mem1 = new Memory().storage(memory).lastMessages(10);
+		const agent1 = new Agent('persist-test-1')
+			.model(getModel('anthropic'))
+			.instructions('You are a helpful assistant. Be concise.')
+			.memory(mem1);
+
+		const threadId = `persist-${Date.now()}`;
+		const options = { persistence: { threadId, resourceId: 'test-user' } };
+		await agent1.generate('My favorite animal is a dolphin. Just acknowledge.', options);
+
+		// New SqliteMemory instance, same file
+		const memory2 = new SqliteMemory({ url });
+		const mem2 = new Memory().storage(memory2).lastMessages(10);
+		const agent2 = new Agent('persist-test-2')
+			.model(getModel('anthropic'))
+			.instructions('You are a helpful assistant. Be concise.')
+			.memory(mem2);
+
+		const result = await agent2.generate('What is my favorite animal?', options);
+		expect(findLastTextContent(result.messages)?.toLowerCase()).toContain('dolphin');
+	});
+});
--- a/packages/@n8n/agents/src/tests/integration/memory/memory-structured.test.ts
+++ b/packages/@n8n/agents/src/tests/integration/memory/memory-structured.test.ts
@ -0,0 +1,40 @@
+import { expect, it } from 'vitest';
+import { z } from 'zod';
+
+import { Agent, Memory } from '../../../index';
+import { describeIf, findLastTextContent, getModel } from '../helpers';
+
+const describe = describeIf('anthropic');
+
+describe('structured working memory', () => {
+	const schema = z.object({
+		userName: z.string().optional().describe("The user's name"),
+		favoriteColor: z.string().optional().describe('Favorite color'),
+		location: z.string().optional().describe('Where the user lives'),
+	});
+
+	it('agent fills structured fields across turns', async () => {
+		const memory = new Memory().storage('memory').lastMessages(10).structured(schema);
+
+		const agent = new Agent('structured-test')
+			.model(getModel('anthropic'))
+			.instructions('You are a helpful assistant. Be concise.')
+			.memory(memory);
+
+		const threadId = `structured-${Date.now()}`;
+		const options = { persistence: { threadId, resourceId: 'test-user' } };
+
+		await agent.generate('My name is Eve and I love purple.', options);
+		const result = await agent.generate('What is my name and favorite color?', options);
+
+		const text = findLastTextContent(result.messages)?.toLowerCase() ?? '';
+		expect(text).toContain('eve');
+		expect(text).toContain('purple');
+	});
+
+	it('throws when both .structured() and .freeform() are used', () => {
+		expect(() => {
+			new Memory().storage('memory').structured(schema).freeform('# Template').build();
+		}).toThrow(/cannot use both/i);
+	});
+});
--- a/packages/@n8n/agents/src/tests/integration/memory/memory.test.ts
+++ b/packages/@n8n/agents/src/tests/integration/memory/memory.test.ts
@ -0,0 +1,148 @@
+import { expect, it } from 'vitest';
+import { z } from 'zod';
+
+import { Agent, Memory, Tool } from '../../../index';
+import { describeIf, findLastTextContent, findLastToolCallContent, getModel } from '../helpers';
+
+const describe = describeIf('anthropic');
+
+describe('memory integration', () => {
+	it('recalls previous messages within the same thread', async () => {
+		const memory = new Memory().storage('memory').lastMessages(10);
+
+		const agent = new Agent('memory-test')
+			.model(getModel('anthropic'))
+			.instructions('You are a helpful assistant. Be concise.')
+			.memory(memory);
+
+		const threadId = `test-thread-${Date.now()}`;
+		const options = { persistence: { threadId, resourceId: 'test-user' } };
+
+		const result1 = await agent.generate(
+			'My favorite color is purple. Just acknowledge this.',
+			options,
+		);
+		expect(findLastTextContent(result1.messages)).toBeTruthy();
+
+		const result2 = await agent.generate('What is my favorite color?', options);
+
+		expect(findLastTextContent(result2.messages)?.toLowerCase()).toContain('purple');
+	});
+
+	it('isolates separate threads', async () => {
+		const memory = new Memory().storage('memory').lastMessages(10);
+
+		const agent = new Agent('thread-isolation-test')
+			.model(getModel('anthropic'))
+			.instructions(
+				'You are a helpful assistant. Be concise. If you do not know something, say "I don\'t know".',
+			)
+			.memory(memory);
+
+		const thread1 = `test-thread-1-${Date.now()}`;
+		const thread2 = `test-thread-2-${Date.now()}`;
+
+		await agent.generate('Remember this secret code: ALPHA-7. Just acknowledge.', {
+			persistence: { threadId: thread1, resourceId: 'test-user' },
+		});
+
+		const result2 = await agent.generate('What is the secret code I told you?', {
+			persistence: { threadId: thread2, resourceId: 'test-user' },
+		});
+
+		expect(findLastTextContent(result2.messages)?.toLowerCase()).not.toContain('alpha-7');
+	});
+
+	it('recalls tool results with generate()', async () => {
+		const memory = new Memory().storage('memory').lastMessages(20);
+
+		const lookupTool = new Tool('lookup_inventory')
+			.description('Look up the current inventory count for a product')
+			.input(
+				z.object({
+					product: z.string().describe('Product name'),
+				}),
+			)
+			.handler(async ({ product }) => ({
+				product,
+				count: 42,
+				warehouse: 'Building-7',
+			}));
+
+		const agent = new Agent('store-results-run-test')
+			.model(getModel('anthropic'))
+			.instructions(
+				'You are an inventory assistant. Use the lookup_inventory tool when asked about stock. Be concise.',
+			)
+			.tool(lookupTool)
+			.memory(memory);
+
+		const threadId = `test-store-results-run-${Date.now()}`;
+		const options = { persistence: { threadId, resourceId: 'test-user' } };
+
+		// Turn 1: trigger the tool via generate()
+		const result1 = await agent.generate('How many widgets do we have in stock?', options);
+		expect(findLastTextContent(result1.messages)).toBeTruthy();
+		expect(findLastToolCallContent(result1.messages)).toBeTruthy();
+
+		// Turn 2: ask about the tool result without re-triggering the tool
+		const result2 = await agent.generate(
+			'Which warehouse are the widgets stored in? Do NOT call any tools — answer from what you already know.',
+			options,
+		);
+
+		expect(findLastTextContent(result2.messages)?.toLowerCase()).toContain('building-7');
+		expect(findLastToolCallContent(result2.messages)).toBeUndefined();
+	});
+
+	it('recalls tool results with stream()', async () => {
+		const memory = new Memory().storage('memory').lastMessages(20);
+
+		const lookupTool = new Tool('lookup_inventory')
+			.description('Look up the current inventory count for a product')
+			.input(
+				z.object({
+					product: z.string().describe('Product name'),
+				}),
+			)
+			.handler(async ({ product }) => ({
+				product,
+				count: 42,
+				warehouse: 'Building-7',
+			}));
+
+		const agent = new Agent('store-results-stream-test')
+			.model(getModel('anthropic'))
+			.instructions(
+				'You are an inventory assistant. Use the lookup_inventory tool when asked about stock. Be concise.',
+			)
+			.tool(lookupTool)
+			.memory(memory);
+
+		const threadId = `test-store-results-stream-${Date.now()}`;
+		const options = { persistence: { threadId, resourceId: 'test-user' } };
+
+		// Turn 1: trigger the tool via stream()
+		const { stream: stream1 } = await agent.stream(
+			'How many widgets do we have in stock?',
+			options,
+		);
+		// Must consume the stream AND call getResult() to trigger saveToolResultsToMemory
+		const reader = stream1.getReader();
+		while (true) {
+			const { done } = await reader.read();
+			if (done) break;
+		}
+		const result1 = await agent.generate('How many widgets do we have in stock?', options);
+		expect(findLastToolCallContent(result1.messages)).toBeTruthy();
+
+		// Turn 2: ask about the tool result
+		const result2 = await agent.generate(
+			'Which warehouse are the widgets stored in? Do NOT call any tools — answer from what you already know.',
+			options,
+		);
+
+		expect(findLastTextContent(result2.messages)?.toLowerCase()).toContain('building-7');
+		expect(findLastToolCallContent(result2.messages)).toBeUndefined();
+	});
+});
--- a/packages/@n8n/agents/src/tests/integration/multi-tool-calls.test.ts
+++ b/packages/@n8n/agents/src/tests/integration/multi-tool-calls.test.ts
@ -0,0 +1,141 @@
+import { expect, it } from 'vitest';
+import { z } from 'zod';
+
+import {
+	describeIf,
+	collectStreamChunks,
+	getModel,
+	chunksOfType,
+	findAllToolResults,
+	collectTextDeltas,
+} from './helpers';
+import { Agent, Tool } from '../../index';
+
+const describe = describeIf('anthropic');
+
+describe('multi-tool-calls integration', () => {
+	it('correctly merges results when the same tool is called multiple times', async () => {
+		let callCount = 0;
+
+		const lookupTool = new Tool('lookup_price')
+			.description('Look up the price of a product by name')
+			.input(z.object({ product: z.string().describe('Product name') }))
+			.output(z.object({ product: z.string(), price: z.number() }))
+			.handler(async ({ product }) => {
+				callCount++;
+				const prices: Record<string, number> = {
+					apple: 1.5,
+					banana: 0.75,
+					cherry: 3.0,
+				};
+				return { product, price: prices[product.toLowerCase()] ?? 0 };
+			});
+
+		const agent = new Agent('multi-call-test')
+			.model(getModel('anthropic'))
+			.instructions(
+				'You are a price checker. When asked about prices, use the lookup_price tool for EACH product separately. Be concise.',
+			)
+			.tool(lookupTool);
+
+		const { stream: fullStream } = await agent.stream(
+			'What are the prices of apple, banana, and cherry? Look up each one.',
+		);
+
+		const chunks = await collectStreamChunks(fullStream);
+		const messageChunks = chunksOfType(chunks, 'message');
+		const toolCallResults = findAllToolResults(messageChunks.map((c) => c.message));
+
+		// Should have called the tool multiple times
+		const priceCalls = toolCallResults.filter((tc) => tc.toolName === 'lookup_price');
+		expect(priceCalls.length).toBeGreaterThanOrEqual(2);
+
+		// Each call should have its own correct output (not all pointing to the first result)
+		const outputs = priceCalls.map((tc) => tc.result as { product: string; price: number });
+
+		// Verify that different products got different prices (index-based merging works)
+		const uniquePrices = new Set(outputs.map((o) => o.price));
+		expect(uniquePrices.size).toBeGreaterThanOrEqual(2);
+
+		// The response should mention the prices
+		const text = collectTextDeltas(chunks);
+		expect(text).toBeTruthy();
+		expect(text).toMatch(/apple/i);
+		expect(text).toMatch(/banana/i);
+		expect(text).toMatch(/cherry/i);
+		expect(text).toMatch(/1\.5/i);
+		expect(text).toMatch(/0\.75/i);
+		expect(text).toMatch(/3\.0/i);
+	});
+
+	it('correctly merges results when different tools are called in sequence', async () => {
+		const addTool = new Tool('add')
+			.description('Add two numbers')
+			.input(z.object({ a: z.number(), b: z.number() }))
+			.handler(async ({ a, b }) => ({ result: a + b }));
+
+		const multiplyTool = new Tool('multiply')
+			.description('Multiply two numbers')
+			.input(z.object({ a: z.number(), b: z.number() }))
+			.handler(async ({ a, b }) => ({ result: a * b }));
+
+		const agent = new Agent('mixed-tools-test')
+			.model(getModel('anthropic'))
+			.instructions(
+				'You are a calculator. Use the add tool for addition and multiply tool for multiplication. Be concise.',
+			)
+			.tool(addTool)
+			.tool(multiplyTool);
+
+		const { stream: fullStream } = await agent.stream('What is 3 + 4 and also what is 5 * 6?');
+
+		const chunks = await collectStreamChunks(fullStream);
+		const messageChunks = chunksOfType(chunks, 'message');
+		const toolCallResults = findAllToolResults(messageChunks.map((c) => c.message));
+
+		const toolCalls = toolCallResults.filter(
+			(tc) => tc.toolName === 'add' || tc.toolName === 'multiply',
+		);
+		expect(toolCalls.length).toBeGreaterThanOrEqual(2);
+
+		const addCall = toolCallResults.find((tc) => tc.toolName === 'add');
+		const multiplyCall = toolCallResults.find((tc) => tc.toolName === 'multiply');
+
+		expect(addCall).toBeDefined();
+		expect(multiplyCall).toBeDefined();
+
+		expect((addCall!.result as { result: number }).result).toBe(7);
+		expect((multiplyCall!.result as { result: number }).result).toBe(30);
+	});
+
+	it('correctly merges results via the run() path', async () => {
+		const lookupTool = new Tool('get_length')
+			.description('Get the length of a string')
+			.input(z.object({ text: z.string() }))
+			.output(z.object({ text: z.string(), length: z.number() }))
+			.handler(async ({ text }) => ({ text, length: text.length }));
+
+		const agent = new Agent('multi-call-run-test')
+			.model(getModel('anthropic'))
+			.instructions(
+				'You are a string utility. When asked about string lengths, use the get_length tool for EACH string separately. Be concise.',
+			)
+			.tool(lookupTool);
+
+		const { stream: fullStream } = await agent.stream(
+			'What are the lengths of "hello" and "world"? Look up each one separately.',
+		);
+		const chunks = await collectStreamChunks(fullStream);
+		const messageChunks = chunksOfType(chunks, 'message');
+		const toolCallResults = findAllToolResults(messageChunks.map((c) => c.message));
+
+		const lengthCalls = toolCallResults.filter((tc) => tc.toolName === 'get_length');
+		expect(lengthCalls.length).toBeGreaterThanOrEqual(2);
+
+		// Each should have correct output
+		for (const call of lengthCalls) {
+			const output = call.result as { text: string; length: number };
+			expect(output.length).toBe(output.text.length);
+		}
+	});
+});
--- a/packages/@n8n/agents/src/tests/integration/multimodal.test.ts
+++ b/packages/@n8n/agents/src/tests/integration/multimodal.test.ts
@ -0,0 +1,126 @@
+import { expect, it } from 'vitest';
+
+import {
+	describeIf,
+	collectStreamChunks,
+	chunksOfType,
+	getModel,
+	findLastTextContent,
+} from './helpers';
+import { Agent } from '../../index';
+import type { Message, StreamChunk } from '../../index';
+
+const describe = describeIf('anthropic');
+
+/** Convert a base64 string to Uint8Array for the AI SDK file part. */
+function base64ToUint8Array(base64: string): Uint8Array {
+	return Uint8Array.from(Buffer.from(base64, 'base64'));
+}
+
+// Valid 1×1 red PNG pixel
+const RED_PIXEL_BASE64 =
+	'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAIAAACQd1PeAAAADElEQVR4nGP4z8AAAAMBAQDJ/pLvAAAAAElFTkSuQmCC';
+// Valid 1×1 blue PNG pixel
+const BLUE_PIXEL_BASE64 =
+	'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAIAAACQd1PeAAAADElEQVR4nGNgYPgPAAEDAQAIicLsAAAAAElFTkSuQmCC';
+
+describe('multimodal integration', () => {
+	it('accepts an image via binary data and references it in the response', async () => {
+		const messages: Message[] = [
+			{
+				role: 'user',
+				content: [
+					{
+						type: 'file',
+						mediaType: 'image/png',
+						data: base64ToUint8Array(RED_PIXEL_BASE64),
+					},
+					{
+						type: 'text',
+						text: 'What color is this image? Reply with just the color name, nothing else.',
+					},
+				],
+			},
+		];
+
+		const agent = new Agent('vision-test')
+			.model(getModel('anthropic'))
+			.instructions('You are a vision assistant. Describe images concisely.');
+
+		const { stream: fullStream } = await agent.stream(messages);
+
+		const chunks = await collectStreamChunks(fullStream);
+		const textChunks = chunksOfType(chunks, 'text-delta') as Array<
+			StreamChunk & { type: 'text-delta' }
+		>;
+		expect(textChunks.length).toBeGreaterThan(0);
+
+		const text = textChunks.map((c) => c.delta).join('');
+		expect(text).toBeTruthy();
+		expect(text).toMatch(/red/i);
+	});
+
+	it('accepts multiple content blocks (text + image) in a single message', async () => {
+		const messages: Message[] = [
+			{
+				role: 'user',
+				content: [
+					{ type: 'text', text: 'I have two questions.' },
+					{
+						type: 'file',
+						mediaType: 'image/png',
+						data: base64ToUint8Array(BLUE_PIXEL_BASE64),
+					},
+					{
+						type: 'text',
+						text: 'Question 1: Can you see an image above? Answer only YES or NO. Question 2: What is 2+2? Answer both briefly.',
+					},
+				],
+			},
+		];
+
+		const agent = new Agent('multi-content-test')
+			.model(getModel('anthropic'))
+			.instructions('You are a helpful assistant with vision capabilities. Be concise.');
+
+		const { stream: fullStream } = await agent.stream(messages);
+		const chunks = await collectStreamChunks(fullStream);
+		const textChunks = chunksOfType(chunks, 'text-delta') as Array<
+			StreamChunk & { type: 'text-delta' }
+		>;
+		expect(textChunks.length).toBeGreaterThan(0);
+
+		const text = textChunks.map((c) => c.delta).join('');
+		expect(text).toBeTruthy();
+		expect(text).toMatch(/4/);
+		expect(text).toMatch(/yes/i);
+	});
+
+	it('passes image content through the run() path (non-streaming)', async () => {
+		const messages: Message[] = [
+			{
+				role: 'user',
+				content: [
+					{
+						type: 'file',
+						mediaType: 'image/png',
+						data: base64ToUint8Array(RED_PIXEL_BASE64),
+					},
+					{
+						type: 'text',
+						text: 'What color is this image? Reply with just the color name.',
+					},
+				],
+			},
+		];
+
+		const agent = new Agent('vision-run-test')
+			.model(getModel('anthropic'))
+			.instructions('You are a vision assistant. Be concise.');
+
+		const result = await agent.generate(messages);
+		const text = findLastTextContent(result.messages);
+		expect(text).toBeTruthy();
+		expect(text).toMatch(/red/i);
+	});
+});
--- a/packages/@n8n/agents/src/tests/integration/orphaned-tool-messages.test.ts
+++ b/packages/@n8n/agents/src/tests/integration/orphaned-tool-messages.test.ts
@ -0,0 +1,164 @@
+import { expect, it, afterEach } from 'vitest';
+import { z } from 'zod';
+
+import { describeIf, getModel, createSqliteMemory } from './helpers';
+import { Agent, Memory, Tool } from '../../index';
+import type { AgentMessage } from '../../index';
+
+const describe = describeIf('anthropic');
+
+describe('orphaned tool messages in memory', () => {
+	const cleanups: Array<() => void> = [];
+
+	afterEach(() => {
+		for (const fn of cleanups) fn();
+		cleanups.length = 0;
+	});
+
+	/**
+	 * Build a dummy tool so the agent has a valid tool schema.
+	 * The tool itself should never be called in these tests.
+	 */
+	function buildLookupTool() {
+		return new Tool('lookup')
+			.description('Look up data by id')
+			.input(z.object({ id: z.string() }))
+			.output(z.object({ count: z.number() }))
+			.handler(async () => ({ count: 99 }));
+	}
+
+	/**
+	 * Seed memory with a conversation that has tool-call / tool-result pairs
+	 * surrounded by plain user/assistant exchanges.
+	 *
+	 * Message layout (indices 0–7):
+	 *   0: user   "How many widgets?"
+	 *   1: assistant  text + tool-call(call_1)
+	 *   2: tool   tool-result(call_1)
+	 *   3: assistant  "There are 10 widgets"
+	 *   4: user   "What about gadgets?"
+	 *   5: assistant  text + tool-call(call_2)
+	 *   6: tool   tool-result(call_2)
+	 *   7: assistant  "There are 5 gadgets"
+	 */
+	function buildSeedMessages(): AgentMessage[] {
+		return [
+			{
+				role: 'user',
+				content: [{ type: 'text', text: 'How many widgets do we have?' }],
+			},
+			{
+				role: 'assistant',
+				content: [
+					{ type: 'text', text: 'Let me look that up.' },
+					{ type: 'tool-call', toolCallId: 'call_1', toolName: 'lookup', input: { id: 'widgets' } },
+				],
+			},
+			{
+				role: 'tool',
+				content: [
+					{ type: 'tool-result', toolCallId: 'call_1', toolName: 'lookup', result: { count: 10 } },
+				],
+			},
+			{
+				role: 'assistant',
+				content: [{ type: 'text', text: 'There are 10 widgets in stock.' }],
+			},
+			{
+				role: 'user',
+				content: [{ type: 'text', text: 'What about gadgets?' }],
+			},
+			{
+				role: 'assistant',
+				content: [
+					{ type: 'text', text: 'Let me check.' },
+					{ type: 'tool-call', toolCallId: 'call_2', toolName: 'lookup', input: { id: 'gadgets' } },
+				],
+			},
+			{
+				role: 'tool',
+				content: [
+					{ type: 'tool-result', toolCallId: 'call_2', toolName: 'lookup', result: { count: 5 } },
+				],
+			},
+			{
+				role: 'assistant',
+				content: [{ type: 'text', text: 'There are 5 gadgets in stock.' }],
+			},
+		];
+	}
+
+	it('handles orphaned tool results when tool-call message is truncated from history', async () => {
+		const { memory, cleanup } = createSqliteMemory();
+		cleanups.push(cleanup);
+
+		const threadId = 'thread-orphan-result';
+
+		// Seed 8 messages into the thread
+		await memory.saveMessages({ threadId, messages: buildSeedMessages() });
+
+		// lastMessages=6 → loads messages 2–7
+		// Message at index 2 is a tool-result for call_1, but the matching
+		// assistant+tool-call (index 1) is truncated. This is an orphaned tool result.
+		const mem = new Memory().storage(memory).lastMessages(6);
+
+		const agent = new Agent('orphan-result-test')
+			.model(getModel('anthropic'))
+			.instructions('You are an inventory assistant. Use lookup to check stock. Be concise.')
+			.tool(buildLookupTool())
+			.memory(mem);
+
+		// This should NOT throw even though history contains an orphaned tool-result
+		const result = await agent.generate('Can you summarize what we discussed?', {
+			persistence: { threadId, resourceId: 'test' },
+		});
+
+		expect(result.finishReason).toBe('stop');
+	});
+
+	it('handles orphaned tool calls when tool-result message is truncated from history', async () => {
+		const { memory, cleanup } = createSqliteMemory();
+		cleanups.push(cleanup);
+
+		const threadId = 'thread-orphan-call';
+
+		// Store a conversation where the last saved message is an assistant
+		// with a tool-call but the tool-result was never persisted (simulating
+		// a partial save / interrupted turn).
+		const messages: AgentMessage[] = [
+			{
+				role: 'user',
+				content: [{ type: 'text', text: 'How many widgets?' }],
+			},
+			{
+				role: 'assistant',
+				content: [
+					{ type: 'text', text: 'Checking inventory.' },
+					{
+						type: 'tool-call',
+						toolCallId: 'call_orphan',
+						toolName: 'lookup',
+						input: { id: 'widgets' },
+					},
+				],
+			},
+		];
+
+		await memory.saveMessages({ threadId, messages });
+
+		const mem = new Memory().storage(memory).lastMessages(10);
+
+		const agent = new Agent('orphan-call-test')
+			.model(getModel('anthropic'))
+			.instructions('You are an inventory assistant. Use lookup to check stock. Be concise.')
+			.tool(buildLookupTool())
+			.memory(mem);
+
+		// This should NOT throw even though history has a tool-call with no result
+		const result = await agent.generate('Actually, never mind. How are you?', {
+			persistence: { threadId, resourceId: 'test' },
+		});
+
+		expect(result.finishReason).toBe('stop');
+	});
+});
--- a/packages/@n8n/agents/src/tests/integration/provider-metadata.test.ts
+++ b/packages/@n8n/agents/src/tests/integration/provider-metadata.test.ts
@ -0,0 +1,65 @@
+import { expect, it } from 'vitest';
+
+import { describeIf, collectStreamChunks, getModel, chunksOfType } from './helpers';
+import { Agent } from '../../index';
+
+const describe = describeIf('anthropic');
+
+describe('provider metadata integration', () => {
+	it('includes finishReason in finish chunks', async () => {
+		const agent = new Agent('metadata-test')
+			.model(getModel('anthropic'))
+			.instructions('Reply with exactly: "OK". Nothing else.');
+
+		const { stream: fullStream } = await agent.stream('Acknowledge');
+
+		const chunks = await collectStreamChunks(fullStream);
+		const finishChunks = chunksOfType(chunks, 'finish');
+
+		expect(finishChunks.length).toBeGreaterThan(0);
+
+		for (const chunk of finishChunks) {
+			if (chunk.type === 'finish') {
+				expect(chunk.finishReason).toBeDefined();
+				expect(['stop', 'length', 'content-filter', 'tool-calls', 'error', 'other']).toContain(
+					chunk.finishReason,
+				);
+			}
+		}
+	});
+
+	it('finish reason is "stop" for a normal completion', async () => {
+		const agent = new Agent('stop-reason-test')
+			.model(getModel('anthropic'))
+			.instructions('Reply with exactly: "Done". Nothing else.');
+
+		const { stream: fullStream } = await agent.stream('Say done');
+
+		const chunks = await collectStreamChunks(fullStream);
+		const finishChunks = chunksOfType(chunks, 'finish');
+
+		// The last finish chunk should be 'stop'
+		const lastFinish = finishChunks[finishChunks.length - 1];
+		expect(lastFinish).toBeDefined();
+		if (lastFinish?.type === 'finish') {
+			expect(lastFinish.finishReason).toBe('stop');
+		}
+	});
+
+	it('result contains usage metadata from the provider', async () => {
+		const agent = new Agent('usage-metadata-test')
+			.model(getModel('anthropic'))
+			.instructions('You are a helpful assistant. Be concise.');
+
+		const { stream: fullStream } = await agent.stream('What is 1+1?');
+		const chunks = await collectStreamChunks(fullStream);
+		const finishChunks = chunksOfType(chunks, 'finish');
+		const usage = finishChunks[0].usage;
+
+		expect(usage).toBeDefined();
+		expect(typeof usage!.promptTokens).toBe('number');
+		expect(typeof usage!.completionTokens).toBe('number');
+		expect(typeof usage!.totalTokens).toBe('number');
+		expect(usage!.totalTokens).toBeGreaterThan(0);
+	});
+});
--- a/packages/@n8n/agents/src/tests/integration/provider-options.test.ts
+++ b/packages/@n8n/agents/src/tests/integration/provider-options.test.ts
@ -0,0 +1,225 @@
+import { expect, it } from 'vitest';
+
+import { collectStreamChunks, chunksOfType, describeIf } from './helpers';
+import { Agent } from '../../index';
+
+const describe = describeIf('anthropic');
+
+/**
+ * Integration tests for provider options: prompt caching, deep merge with
+ * thinking, external abort signal, and model config object form.
+ *
+ * Prompt caching requires a system prompt of at least 1024 tokens for
+ * Anthropic, so we generate a long instruction string.
+ */
+
+// A system prompt long enough to be eligible for Anthropic prompt caching.
+// Claude Haiku requires at least 2048 tokens for caching to activate.
+const LONG_SYSTEM_PROMPT =
+	'You are a concise assistant. Reply in one short sentence. ' +
+	'Here is additional context to ensure the prompt is long enough for caching: ' +
+	Array.from(
+		{ length: 500 },
+		(_, i) => `Rule ${i + 1}: Always be helpful and accurate in your responses.`,
+	).join(' ');
+
+// ---------------------------------------------------------------------------
+// Prompt caching — instruction-level
+// ---------------------------------------------------------------------------
+
+describe('prompt caching via instruction providerOptions', () => {
+	it('second call with cached instructions reports cacheRead tokens', async () => {
+		const agent = new Agent('cache-instructions-test')
+			.model('anthropic/claude-haiku-4-5')
+			.instructions(LONG_SYSTEM_PROMPT, {
+				providerOptions: {
+					anthropic: { cacheControl: { type: 'ephemeral' } },
+				},
+			});
+
+		// First call — creates the cache entry
+		const result1 = await agent.generate('Say hello', {
+			persistence: { resourceId: 'user1', threadId: 'thread1' },
+		});
+		expect(result1.finishReason).toBe('stop');
+
+		// Second call — should read from cache
+		const result2 = await agent.generate('Say goodbye', {
+			persistence: { resourceId: 'user1', threadId: 'thread2' },
+		});
+		expect(result2.finishReason).toBe('stop');
+
+		// At least one of the two calls should show cache activity (write or read)
+		const write1 = result1.usage?.inputTokenDetails?.cacheWrite ?? 0;
+		const read2 = result2.usage?.inputTokenDetails?.cacheRead ?? 0;
+		expect(write1 + read2).toBeGreaterThan(0);
+	});
+});
+
+// ---------------------------------------------------------------------------
+// Prompt caching — call-level providerOptions
+// ---------------------------------------------------------------------------
+
+describe('prompt caching via call-level providerOptions', () => {
+	it('second call with call-level cacheControl reports cacheRead tokens', async () => {
+		// Call-level cacheControl applies to the API request, not individual messages.
+		// For Anthropic, prompt caching at call level needs instruction-level cacheControl
+		// to mark which content to cache. This test verifies call-level options don't error.
+		const agent = new Agent('cache-call-level-test')
+			.model('anthropic/claude-haiku-4-5')
+			.instructions(LONG_SYSTEM_PROMPT);
+
+		const result = await agent.generate('Say hello', {
+			persistence: { resourceId: 'user1', threadId: 'thread1' },
+			providerOptions: {
+				anthropic: { cacheControl: { type: 'ephemeral' } },
+			},
+		});
+		expect(result.finishReason).toBe('stop');
+		expect(result.messages.length).toBeGreaterThan(0);
+	});
+});
+
+// ---------------------------------------------------------------------------
+// Prompt caching — streaming path
+// ---------------------------------------------------------------------------
+
+describe('prompt caching via stream', () => {
+	it('second stream with cached instructions reports cacheRead tokens in finish chunk', async () => {
+		const agent = new Agent('cache-stream-test')
+			.model('anthropic/claude-haiku-4-5')
+			.instructions(LONG_SYSTEM_PROMPT, {
+				providerOptions: {
+					anthropic: { cacheControl: { type: 'ephemeral' } },
+				},
+			});
+
+		// First call — creates the cache entry
+		const { stream: stream1 } = await agent.stream('Say hello', {
+			persistence: { resourceId: 'user1', threadId: 'thread1' },
+		});
+		await collectStreamChunks(stream1);
+
+		// Second call — should read from cache
+		const { stream: stream2 } = await agent.stream('Say goodbye', {
+			persistence: { resourceId: 'user1', threadId: 'thread2' },
+		});
+		const chunks = await collectStreamChunks(stream2);
+		const finishChunks = chunksOfType(chunks, 'finish');
+
+		expect(finishChunks.length).toBeGreaterThan(0);
+		const usage = finishChunks[0].usage;
+		expect(usage).toBeDefined();
+
+		// At least one stream should show cache activity
+		const write = usage!.inputTokenDetails?.cacheWrite ?? 0;
+		const read = usage!.inputTokenDetails?.cacheRead ?? 0;
+		expect(write + read).toBeGreaterThan(0);
+	});
+});
+
+// ---------------------------------------------------------------------------
+// Thinking + cacheControl coexistence (deep merge)
+// ---------------------------------------------------------------------------
+
+describe('thinking + cacheControl coexistence', () => {
+	it('both thinking and prompt caching work simultaneously', async () => {
+		const agent = new Agent('thinking-cache-test')
+			.model('anthropic', 'claude-sonnet-4-5')
+			.thinking('anthropic', { budgetTokens: 5000 })
+			.instructions(LONG_SYSTEM_PROMPT, {
+				providerOptions: {
+					anthropic: { cacheControl: { type: 'ephemeral' } },
+				},
+			});
+
+		// First call — cache miss, but thinking should work
+		const { stream: stream1 } = await agent.stream('What is 7 * 8?', {
+			persistence: { resourceId: 'user1', threadId: 'thread1' },
+		});
+		const chunks1 = await collectStreamChunks(stream1);
+
+		// Should have reasoning chunks (thinking is enabled)
+		const reasoningChunks = chunksOfType(chunks1, 'reasoning-delta');
+		expect(reasoningChunks.length).toBeGreaterThan(0);
+
+		// Second call — cache hit, thinking should still work
+		const { stream: stream2 } = await agent.stream('What is 12 * 13?', {
+			persistence: { resourceId: 'user1', threadId: 'thread2' },
+		});
+		const chunks2 = await collectStreamChunks(stream2);
+
+		// Should still have reasoning
+		const reasoning2 = chunksOfType(chunks2, 'reasoning-delta');
+		expect(reasoning2.length).toBeGreaterThan(0);
+
+		// At least one call should show cache activity
+		const finishChunks = chunksOfType(chunks2, 'finish');
+		expect(finishChunks.length).toBeGreaterThan(0);
+		const usage = finishChunks[0].usage;
+		expect(usage).toBeDefined();
+		const write = usage!.inputTokenDetails?.cacheWrite ?? 0;
+		const read = usage!.inputTokenDetails?.cacheRead ?? 0;
+		expect(write + read).toBeGreaterThan(0);
+	});
+});
+
+// ---------------------------------------------------------------------------
+// External abort signal
+// ---------------------------------------------------------------------------
+
+describe('external abort signal', () => {
+	it('cancels a generate() call via external AbortSignal', async () => {
+		const agent = new Agent('abort-signal-test')
+			.model('anthropic/claude-haiku-4-5')
+			.instructions('You are a helpful assistant. Tell me a very long story.');
+
+		const controller = new AbortController();
+		setTimeout(() => controller.abort(), 100);
+
+		const result = await agent.generate('Tell me a very long detailed story about a dragon', {
+			persistence: { resourceId: 'user1', threadId: 'thread1' },
+			abortSignal: controller.signal,
+		});
+
+		expect(result.finishReason).toBe('error');
+		expect(agent.getState().status).toBe('cancelled');
+	});
+
+	it('cancels a stream() call via external AbortSignal', async () => {
+		const agent = new Agent('abort-stream-signal-test')
+			.model('anthropic/claude-haiku-4-5')
+			.instructions('You are a helpful assistant. Tell me a very long story.');
+
+		const controller = new AbortController();
+		setTimeout(() => controller.abort(), 100);
+
+		const { stream } = await agent.stream('Tell me a very long detailed story about a dragon', {
+			persistence: { resourceId: 'user1', threadId: 'thread1' },
+			abortSignal: controller.signal,
+		});
+
+		const chunks = await collectStreamChunks(stream);
+		const errorChunks = chunks.filter((c) => c.type === 'error');
+		expect(errorChunks.length).toBeGreaterThan(0);
+	});
+});
+
+// ---------------------------------------------------------------------------
+// Model config object form
+// ---------------------------------------------------------------------------
+
+describe('model config object form', () => {
+	it('generates with model config object', async () => {
+		const agent = new Agent('model-config-test')
+			.model({ id: 'anthropic/claude-haiku-4-5' })
+			.instructions('You are a concise assistant. Reply in one short sentence.');
+
+		const result = await agent.generate('Say hello', {
+			persistence: { resourceId: 'user1', threadId: 'thread1' },
+		});
+
+		expect(result.finishReason).toBe('stop');
+		expect(result.messages.length).toBeGreaterThan(0);
+	});
+});
--- a/packages/@n8n/agents/src/tests/integration/provider-tools.test.ts
+++ b/packages/@n8n/agents/src/tests/integration/provider-tools.test.ts
@ -0,0 +1,132 @@
+import { expect, it } from 'vitest';
+import { z } from 'zod';
+
+import {
+	describeIf,
+	collectStreamChunks,
+	getModel,
+	chunksOfType,
+	collectTextDeltas,
+	findAllToolCalls,
+} from './helpers';
+import { Agent, Tool, providerTools, type StreamChunk } from '../../index';
+
+const describe = describeIf('anthropic');
+
+/**
+ * Instructions that force the model to use web search before answering.
+ * Required because the model may otherwise answer from its training data.
+ */
+const WEB_SEARCH_INSTRUCTIONS =
+	'You MUST call the web_search tool before answering any question, even if you think you already know the answer. Never answer without searching first.';
+
+describe('provider tools integration', () => {
+	it('generate: the model calls the web search provider tool', async () => {
+		const agent = new Agent('provider-tool-generate-test')
+			.model(getModel('anthropic'))
+			.instructions(WEB_SEARCH_INSTRUCTIONS)
+			.providerTool(providerTools.anthropicWebSearch());
+
+		const result = await agent.generate('What is the weather in Tokyo?');
+
+		expect(result.finishReason).toBe('stop');
+		expect(result.pendingSuspend).toBeUndefined();
+
+		const toolCalls = findAllToolCalls(result.messages);
+		const webSearchCall = toolCalls.find((tc) => tc.toolName.includes('web_search'));
+		expect(webSearchCall).toBeDefined();
+	});
+
+	it('stream: the model calls the web search provider tool without suspending', async () => {
+		const agent = new Agent('provider-tool-stream-test')
+			.model(getModel('anthropic'))
+			.instructions(WEB_SEARCH_INSTRUCTIONS)
+			.providerTool(providerTools.anthropicWebSearch());
+
+		const { stream } = await agent.stream('What is the weather in Tokyo?');
+		const chunks = await collectStreamChunks(stream);
+
+		// Provider tools must never cause a suspension
+		const suspendChunks = chunksOfType(chunks, 'tool-call-suspended');
+		expect(suspendChunks.length).toBe(0);
+
+		// Must finish cleanly
+		const finishChunks = chunksOfType(chunks, 'finish');
+		const lastFinish = finishChunks[finishChunks.length - 1];
+		expect(lastFinish?.type === 'finish' && lastFinish.finishReason).toBe('stop');
+
+		// Collect tool calls from message chunks
+		const messageChunks = chunksOfType(chunks, 'message');
+		const allMessages = messageChunks.map((c) => c.message);
+		const toolCalls = findAllToolCalls(allMessages);
+		const webSearchCall = toolCalls.find((tc) => tc.toolName.includes('web_search'));
+		expect(webSearchCall).toBeDefined();
+
+		// Must include a text response
+		const text = collectTextDeltas(chunks);
+		expect(text).toBeTruthy();
+	});
+
+	it('provider tool executes without interruption while a mixed-in interruptible tool suspends', async () => {
+		const saveToDbTool = new Tool('save_to_db')
+			.description('Save weather data to the database.')
+			.input(z.object({ data: z.string().describe('The data to save') }))
+			.output(z.object({ saved: z.boolean() }))
+			.suspend(z.object({ message: z.string() }))
+			.resume(z.object({ approved: z.boolean() }))
+			.handler(async ({ data }, ctx) => {
+				if (!ctx.resumeData) {
+					return await ctx.suspend({ message: `Save "${data}" to the database?` });
+				}
+				return { saved: ctx.resumeData.approved };
+			});
+
+		const agent = new Agent('mixed-provider-hitl-test')
+			.model(getModel('anthropic'))
+			.instructions(
+				'When asked about weather: first search the web for current weather, then call save_to_db with the result. You MUST call both tools.',
+			)
+			.providerTool(providerTools.anthropicWebSearch())
+			.tool(saveToDbTool)
+			.checkpoint('memory');
+
+		const { stream } = await agent.stream(
+			'Get the current weather in London and save the result to the database.',
+		);
+		const chunks = await collectStreamChunks(stream);
+		// The web search provider tool must NOT cause a suspension
+		// Only save_to_db (the interruptible tool) should suspend
+		const suspendChunks = chunksOfType(chunks, 'tool-call-suspended');
+		expect(suspendChunks.length).toBe(1);
+
+		const suspended = suspendChunks[0] as StreamChunk & { type: 'tool-call-suspended' };
+		expect(suspended.toolName).toBe('save_to_db');
+		expect(suspended.runId).toBeTruthy();
+		expect(suspended.toolCallId).toBeTruthy();
+
+		// The web search provider tool call should appear in the message history
+		const messageChunks = chunksOfType(chunks, 'message');
+		const toolCalls = findAllToolCalls(messageChunks.map((c) => c.message));
+		const webSearchCall = toolCalls.find((tc) => tc.toolName.includes('web_search'));
+		expect(webSearchCall).toBeDefined();
+
+		// Resume with approval — agent should complete cleanly
+		const resumeStream = await agent.resume(
+			'stream',
+			{ approved: true },
+			{
+				runId: suspended.runId!,
+				toolCallId: suspended.toolCallId!,
+			},
+		);
+		const resumeChunks = await collectStreamChunks(resumeStream.stream);
+
+		// console.log('Second', JSON.stringify(resumeChunks, null, 2));
+		const errorChunks = resumeChunks.filter((c) => c.type === 'error');
+		expect(errorChunks).toHaveLength(0);
+
+		const finishChunks = chunksOfType(resumeChunks, 'finish');
+		const lastFinish = finishChunks[finishChunks.length - 1];
+		expect(lastFinish?.type === 'finish' && lastFinish.finishReason).toBe('stop');
+	});
+});
--- a/packages/@n8n/agents/src/tests/integration/state-restore-after-suspension.test.ts
+++ b/packages/@n8n/agents/src/tests/integration/state-restore-after-suspension.test.ts
@ -0,0 +1,221 @@
+import { expect, it } from 'vitest';
+import { z } from 'zod';
+
+import { collectStreamChunks, chunksOfType, describeIf, getModel } from './helpers';
+import type { StreamChunk } from './helpers';
+import { Agent, Tool } from '../../index';
+import type { CheckpointStore, SerializableAgentState } from '../../types';
+
+const describe = describeIf('anthropic');
+
+/**
+ * A minimal CheckpointStore backed by a plain Map so it can be shared across
+ * agent instances to simulate durable external storage (database, Redis, etc.).
+ */
+class InMemoryCheckpointStore implements CheckpointStore {
+	private store = new Map<string, SerializableAgentState>();
+
+	async save(key: string, state: SerializableAgentState): Promise<void> {
+		this.store.set(key, structuredClone(state));
+	}
+
+	async load(key: string): Promise<SerializableAgentState | undefined> {
+		const state = this.store.get(key);
+		return state ? structuredClone(state) : undefined;
+	}
+
+	async delete(key: string): Promise<void> {
+		this.store.delete(key);
+	}
+
+	get size(): number {
+		return this.store.size;
+	}
+}
+
+/**
+ * Build an agent that has a delete_file tool that always suspends on the first
+ * call and resumes with approval/denial on the second.
+ */
+function buildDeleteAgent(checkpointStore: CheckpointStore): Agent {
+	const deleteTool = new Tool('delete_file')
+		.description('Delete a file at the given path')
+		.input(z.object({ path: z.string().describe('File path to delete') }))
+		.output(z.object({ deleted: z.boolean(), path: z.string() }))
+		.suspend(z.object({ message: z.string(), severity: z.string() }))
+		.resume(z.object({ approved: z.boolean() }))
+		.handler(async ({ path }, ctx) => {
+			if (!ctx.resumeData) {
+				return await ctx.suspend({ message: `Delete "${path}"?`, severity: 'destructive' });
+			}
+			if (!ctx.resumeData.approved) return { deleted: false, path };
+			return { deleted: true, path };
+		});
+
+	return new Agent('file-manager')
+		.model(getModel('anthropic'))
+		.instructions(
+			'You are a file manager. When asked to delete a file, use the delete_file tool. After the tool result, confirm what happened concisely.',
+		)
+		.tool(deleteTool)
+		.checkpoint(checkpointStore);
+}
+
+describe('state restore after suspension', () => {
+	it('resumes with generate after agent instance is destroyed and recreated', async () => {
+		const checkpointStore = new InMemoryCheckpointStore();
+
+		// --- Agent 1: run until suspended ---
+		let suspendedRunId: string;
+		let suspendedToolCallId: string;
+
+		{
+			const agent1 = buildDeleteAgent(checkpointStore);
+
+			const result = await agent1.generate('Delete the file /tmp/important.log');
+
+			expect(result.finishReason).toBe('tool-calls');
+			expect(result.pendingSuspend).toBeDefined();
+
+			suspendedRunId = result.pendingSuspend![0].runId;
+			suspendedToolCallId = result.pendingSuspend![0].toolCallId;
+			expect(suspendedRunId).toBeTruthy();
+			expect(suspendedToolCallId).toBeTruthy();
+
+			// Checkpoint store now holds the suspended state
+			expect(checkpointStore.size).toBe(1);
+
+			// agent1 goes out of scope here — its in-flight Map is gone
+		}
+
+		// --- Agent 2: freshly created, loads state from the shared CheckpointStore ---
+		const agent2 = buildDeleteAgent(checkpointStore);
+
+		const result2 = await agent2.resume(
+			'generate',
+			{ approved: true },
+			{ runId: suspendedRunId, toolCallId: suspendedToolCallId },
+		);
+
+		expect(result2.finishReason).not.toBe('error');
+		expect(result2.finishReason).not.toBe('tool-calls');
+
+		// The resumed result should contain a text response from the assistant
+		const assistantMessages = result2.messages.filter((m) => 'role' in m && m.role === 'assistant');
+		expect(assistantMessages.length).toBeGreaterThan(0);
+
+		const hasText = assistantMessages.some(
+			(m) => 'content' in m && m.content.some((c) => c.type === 'text'),
+		);
+		expect(hasText).toBe(true);
+
+		// Checkpoint should have been cleaned up after successful resumption
+		expect(checkpointStore.size).toBe(0);
+	});
+
+	it('resumes with stream after agent instance is destroyed and recreated', async () => {
+		const checkpointStore = new InMemoryCheckpointStore();
+
+		let suspendedRunId: string;
+		let suspendedToolCallId: string;
+
+		{
+			const agent1 = buildDeleteAgent(checkpointStore);
+
+			const { stream } = await agent1.stream('Delete the file /tmp/data.csv');
+			const chunks = await collectStreamChunks(stream);
+
+			const suspendedChunks = chunksOfType(chunks, 'tool-call-suspended');
+			expect(suspendedChunks.length).toBe(1);
+
+			const suspended = suspendedChunks[0] as StreamChunk & { type: 'tool-call-suspended' };
+			expect(suspended.toolName).toBe('delete_file');
+
+			suspendedRunId = suspended.runId!;
+			suspendedToolCallId = suspended.toolCallId!;
+
+			// State is persisted in the external store
+			expect(checkpointStore.size).toBe(1);
+
+			// agent1 is destroyed here
+		}
+
+		// --- Agent 2: new instance, same checkpoint store ---
+		const agent2 = buildDeleteAgent(checkpointStore);
+
+		const resumedStream = await agent2.resume(
+			'stream',
+			{ approved: true },
+			{ runId: suspendedRunId, toolCallId: suspendedToolCallId },
+		);
+
+		const resumedChunks = await collectStreamChunks(resumedStream.stream);
+
+		// No error chunks
+		const errorChunks = resumedChunks.filter((c) => c.type === 'error');
+		expect(errorChunks).toHaveLength(0);
+
+		// Stream must contain the tool result message
+		const toolResultChunks = resumedChunks.filter(
+			(c) =>
+				c.type === 'message' &&
+				'message' in c &&
+				'content' in (c.message as object) &&
+				(c.message as { content: Array<{ type: string }> }).content.some(
+					(part) => part.type === 'tool-result',
+				),
+		);
+		expect(toolResultChunks.length).toBeGreaterThan(0);
+
+		// Stream must end with a finish chunk (not error)
+		const finishChunks = chunksOfType(resumedChunks, 'finish') as Array<
+			StreamChunk & { type: 'finish' }
+		>;
+		expect(finishChunks.length).toBeGreaterThan(0);
+		expect(finishChunks[0].finishReason).not.toBe('error');
+
+		// At least one text-delta should arrive (the LLM's final response)
+		const textDeltas = chunksOfType(resumedChunks, 'text-delta');
+		expect(textDeltas.length).toBeGreaterThan(0);
+	});
+
+	it('correctly restores message history so the LLM has full context', async () => {
+		const checkpointStore = new InMemoryCheckpointStore();
+
+		let suspendedRunId: string;
+		let suspendedToolCallId: string;
+		let originalPath: string;
+
+		{
+			originalPath = '/tmp/critical-data.db';
+			const agent1 = buildDeleteAgent(checkpointStore);
+			const result = await agent1.generate(`Delete the file ${originalPath}`);
+
+			expect(result.pendingSuspend).toBeDefined();
+			suspendedRunId = result.pendingSuspend![0].runId;
+			suspendedToolCallId = result.pendingSuspend![0].toolCallId;
+		}
+
+		const agent2 = buildDeleteAgent(checkpointStore);
+		const result2 = await agent2.resume(
+			'generate',
+			{ approved: true },
+			{ runId: suspendedRunId, toolCallId: suspendedToolCallId },
+		);
+
+		expect(result2.finishReason).not.toBe('error');
+
+		// The assistant response should reference the original file path,
+		// proving the full conversation context was restored correctly
+		const textContent = result2.messages
+			.filter((m) => 'role' in m && m.role === 'assistant')
+			.flatMap((m) => ('content' in m ? m.content : []))
+			.filter((c) => c.type === 'text')
+			.map((c) => ('text' in c ? c.text : ''))
+			.join('');
+
+		expect(textContent.length).toBeGreaterThan(0);
+		// The LLM should confirm what happened (mentioning the file or deletion)
+		expect(textContent.toLowerCase()).toMatch(/delete|delet|remov|file/);
+	});
+});
--- a/packages/@n8n/agents/src/tests/integration/stream-timing.test.ts
+++ b/packages/@n8n/agents/src/tests/integration/stream-timing.test.ts
@ -0,0 +1,72 @@
+import { expect, it } from 'vitest';
+import { z } from 'zod';
+
+import { describeIf, getModel } from './helpers';
+import { Agent, Tool } from '../../index';
+
+const describe = describeIf('anthropic');
+
+describe('stream timing', () => {
+	it('tool-call-delta chunks arrive incrementally (not all buffered)', async () => {
+		const agent = new Agent('timing-test')
+			.model(getModel('anthropic'))
+			.instructions(
+				'When asked to write code, call the set_code tool with the code. Write at least 10 lines.',
+			)
+			.tool(
+				new Tool('set_code')
+					.description('Set code in the editor')
+					.input(
+						z.object({
+							code: z.string().describe('The complete source code'),
+						}),
+					)
+					.providerOptions({ anthropic: { eagerInputStreaming: true } })
+					.handler(async ({ code }) => ({ ok: true, length: code.length })),
+			);
+
+		const result = await agent.stream(
+			'Write a TypeScript function that implements bubble sort. Use the set_code tool.',
+		);
+
+		const reader = result.stream.getReader();
+
+		// Track timestamps of each reader.read() that returns a tool-call-delta
+		// This measures when the reader YIELDS each chunk, not when the agent enqueues it.
+		const deltaReadTimes: number[] = [];
+		const start = Date.now();
+
+		while (true) {
+			const { done, value } = await reader.read();
+			if (done) break;
+			const chunk = value;
+			if (chunk.type === 'tool-call-delta' && (chunk as { name?: string }).name === 'set_code') {
+				deltaReadTimes.push(Date.now() - start);
+			}
+		}
+
+		expect(deltaReadTimes.length).toBeGreaterThan(0);
+
+		console.log(`set_code delta reads: ${deltaReadTimes.length}`);
+		if (deltaReadTimes.length > 1) {
+			const first = deltaReadTimes[0];
+			const last = deltaReadTimes[deltaReadTimes.length - 1];
+			const spread = last - first;
+			console.log(`Time spread: ${spread}ms (first read: ${first}ms, last read: ${last}ms)`);
+
+			// Count how many distinct timestamps (ms resolution)
+			const uniqueTimes = new Set(deltaReadTimes).size;
+			console.log(`Unique timestamps: ${uniqueTimes} out of ${deltaReadTimes.length} reads`);
+
+			// If truly streaming: spread should be significant (>500ms for code generation)
+			// If buffered: spread will be near 0 and most reads share the same timestamp
+			const bufferingRatio = uniqueTimes / deltaReadTimes.length;
+			console.log(`Buffering ratio: ${(bufferingRatio * 100).toFixed(1)}% unique timestamps`);
+			console.log(
+				bufferingRatio < 0.1
+					? 'BUFFERED: The agent releases all deltas in one burst'
+					: 'STREAMING: Deltas arrive incrementally',
+			);
+		}
+	});
+});
--- a/packages/@n8n/agents/src/tests/integration/structured-output.test.ts
+++ b/packages/@n8n/agents/src/tests/integration/structured-output.test.ts
@ -0,0 +1,223 @@
+import { expect, it } from 'vitest';
+import { z } from 'zod';
+
+import { describeIf, collectStreamChunks, chunksOfType, getModel } from './helpers';
+import { Agent, Tool } from '../../index';
+import type { StreamChunk } from '../../index';
+
+const answerSchema = z.object({
+	city: z.string().describe('The name of the city'),
+	country: z.string().describe('The country the city is in'),
+	population_millions: z.number().describe('Approximate population in millions'),
+});
+
+function createStructuredAgent(provider: 'anthropic' | 'openai'): Agent {
+	return new Agent('structured-output-test')
+		.model(getModel(provider))
+		.instructions(
+			'You answer geography questions. Always respond with the structured output schema. Be precise and factual.',
+		)
+		.structuredOutput(answerSchema);
+}
+
+function createStructuredAgentWithTool(provider: 'anthropic' | 'openai'): Agent {
+	const lookupTool = new Tool('lookup_capital')
+		.description('Look up the capital city of a country')
+		.input(z.object({ country: z.string().describe('Country name') }))
+		.output(z.object({ capital: z.string(), population_millions: z.number() }))
+		.handler(async ({ country }) => {
+			const data: Record<string, { capital: string; population_millions: number }> = {
+				france: { capital: 'Paris', population_millions: 2.1 },
+				japan: { capital: 'Tokyo', population_millions: 13.9 },
+				brazil: { capital: 'Brasília', population_millions: 3.0 },
+			};
+			return data[country.toLowerCase()] ?? { capital: 'Unknown', population_millions: 0 };
+		});
+
+	return new Agent('structured-tool-test')
+		.model(getModel(provider))
+		.instructions(
+			'You answer geography questions. Use the lookup_capital tool when asked about capitals. Always respond with the structured output schema.',
+		)
+		.tool(lookupTool)
+		.structuredOutput(answerSchema);
+}
+
+function createStructuredAgentWithInterruptibleTool(provider: 'anthropic' | 'openai'): Agent {
+	const deleteTool = new Tool('delete_record')
+		.description('Delete a geographic record — requires confirmation')
+		.input(z.object({ city: z.string().describe('City to delete') }))
+		.output(z.object({ deleted: z.boolean(), city: z.string() }))
+		.suspend(z.object({ message: z.string() }))
+		.resume(z.object({ approved: z.boolean() }))
+		.handler(async ({ city }, ctx) => {
+			if (!ctx.resumeData) {
+				return await ctx.suspend({ message: `Delete record for "${city}"?` });
+			}
+			return { deleted: ctx.resumeData.approved, city };
+		});
+
+	const resultSchema = z.object({
+		action: z.string().describe('The action that was performed'),
+		city: z.string().describe('The city affected'),
+		success: z.boolean().describe('Whether the action succeeded'),
+	});
+
+	return new Agent('structured-interrupt-test')
+		.model(getModel(provider))
+		.instructions(
+			'You manage geographic records. When asked to delete a record, use the delete_record tool. Always respond with the structured output schema.',
+		)
+		.tool(deleteTool)
+		.structuredOutput(resultSchema)
+		.checkpoint('memory');
+}
+
+const describe = describeIf('anthropic');
+
+describe('structured output integration', () => {
+	it('returns parsed structuredOutput via generate()', async () => {
+		const agent = createStructuredAgent('anthropic');
+
+		const result = await agent.generate('What is the capital of France?');
+
+		expect(result.finishReason).toBe('stop');
+		expect(result.structuredOutput).toBeDefined();
+
+		const parsed = answerSchema.safeParse(result.structuredOutput);
+		expect(parsed.success).toBe(true);
+		if (parsed.success) {
+			expect(parsed.data.city.toLowerCase()).toContain('paris');
+			expect(parsed.data.country.toLowerCase()).toContain('france');
+			expect(parsed.data.population_millions).toBeGreaterThan(0);
+		}
+	});
+
+	it('returns parsed structuredOutput in stream finish chunk', async () => {
+		const agent = createStructuredAgent('anthropic');
+
+		const { stream } = await agent.stream('What is the capital of Japan?');
+		const chunks = await collectStreamChunks(stream);
+
+		const finishChunks = chunksOfType(chunks, 'finish');
+		expect(finishChunks.length).toBeGreaterThan(0);
+
+		const finish = finishChunks[0] as StreamChunk & { type: 'finish' };
+		expect(finish.structuredOutput).toBeDefined();
+
+		const parsed = answerSchema.safeParse(finish.structuredOutput);
+		expect(parsed.success).toBe(true);
+		if (parsed.success) {
+			expect(parsed.data.city.toLowerCase()).toContain('tokyo');
+		}
+	});
+
+	it('returns structuredOutput after tool use via generate()', async () => {
+		const agent = createStructuredAgentWithTool('anthropic');
+
+		const result = await agent.generate('What is the capital of France? Use the lookup tool.');
+
+		expect(result.finishReason).toBe('stop');
+		expect(result.structuredOutput).toBeDefined();
+
+		const parsed = answerSchema.safeParse(result.structuredOutput);
+		expect(parsed.success).toBe(true);
+		if (parsed.success) {
+			expect(parsed.data.city.toLowerCase()).toContain('paris');
+		}
+	});
+
+	it('returns structuredOutput after tool use via stream()', async () => {
+		const agent = createStructuredAgentWithTool('anthropic');
+
+		const { stream } = await agent.stream('What is the capital of Japan? Use the lookup tool.');
+		const chunks = await collectStreamChunks(stream);
+
+		const finishChunks = chunksOfType(chunks, 'finish');
+		expect(finishChunks.length).toBeGreaterThan(0);
+
+		const finish = finishChunks[0] as StreamChunk & { type: 'finish' };
+		expect(finish.structuredOutput).toBeDefined();
+
+		const parsed = answerSchema.safeParse(finish.structuredOutput);
+		expect(parsed.success).toBe(true);
+	});
+
+	it('returns structuredOutput after resume("generate")', async () => {
+		const agent = createStructuredAgentWithInterruptibleTool('anthropic');
+
+		const first = await agent.generate('Delete the record for Paris');
+		expect(first.pendingSuspend).toBeDefined();
+		const { runId, toolCallId } = first.pendingSuspend![0];
+
+		const resumed = await agent.resume('generate', { approved: true }, { runId, toolCallId });
+
+		expect(resumed.finishReason).toBe('stop');
+		expect(resumed.structuredOutput).toBeDefined();
+
+		const resultSchema = z.object({
+			action: z.string(),
+			city: z.string(),
+			success: z.boolean(),
+		});
+		const parsed = resultSchema.safeParse(resumed.structuredOutput);
+		expect(parsed.success).toBe(true);
+	});
+
+	it('returns structuredOutput after resume("stream")', async () => {
+		const agent = createStructuredAgentWithInterruptibleTool('anthropic');
+
+		const first = await agent.generate('Delete the record for Tokyo');
+		expect(first.pendingSuspend).toBeDefined();
+		const { runId, toolCallId } = first.pendingSuspend![0];
+
+		const resumedStream = await agent.resume('stream', { approved: true }, { runId, toolCallId });
+
+		const chunks = await collectStreamChunks(resumedStream.stream);
+		const finishChunks = chunksOfType(chunks, 'finish');
+		expect(finishChunks.length).toBeGreaterThan(0);
+
+		const finish = finishChunks[0] as StreamChunk & { type: 'finish' };
+		expect(finish.structuredOutput).toBeDefined();
+
+		const resultSchema = z.object({
+			action: z.string(),
+			city: z.string(),
+			success: z.boolean(),
+		});
+		const parsed = resultSchema.safeParse(finish.structuredOutput);
+		expect(parsed.success).toBe(true);
+	});
+
+	it('structuredOutput conforms to the schema', async () => {
+		const strictSchema = z.object({
+			name: z.string(),
+			is_capital: z.boolean(),
+			continent: z.enum([
+				'Africa',
+				'Antarctica',
+				'Asia',
+				'Europe',
+				'North America',
+				'Oceania',
+				'South America',
+			]),
+		});
+
+		const agent = new Agent('strict-schema-test')
+			.model(getModel('anthropic'))
+			.instructions('Answer geography questions using the structured output schema.')
+			.structuredOutput(strictSchema);
+
+		const result = await agent.generate('Tell me about Berlin');
+
+		expect(result.structuredOutput).toBeDefined();
+		const parsed = strictSchema.safeParse(result.structuredOutput);
+		expect(parsed.success).toBe(true);
+		if (parsed.success) {
+			expect(parsed.data.name.toLowerCase()).toContain('berlin');
+			expect(parsed.data.continent).toBe('Europe');
+			expect(typeof parsed.data.is_capital).toBe('boolean');
+		}
+	});
+});
--- a/packages/@n8n/agents/src/tests/integration/sub-agent.test.ts
+++ b/packages/@n8n/agents/src/tests/integration/sub-agent.test.ts
@ -0,0 +1,96 @@
+import { expect, it } from 'vitest';
+
+import {
+	chunksOfType,
+	collectStreamChunks,
+	collectTextDeltas,
+	describeIf,
+	findAllToolResults,
+	getModel,
+} from './helpers';
+import type { StreamChunk } from '../../index';
+import { Agent } from '../../index';
+
+const describe = describeIf('anthropic');
+
+describe('sub-agent (asTool) integration', () => {
+	it('orchestrator calls a sub-agent as a tool and gets its response', async () => {
+		const mathAgent = new Agent('math-specialist')
+			.model(getModel('anthropic'))
+			.instructions(
+				'You are a math specialist. When given a math problem, compute the answer and reply with just the number. No explanation.',
+			);
+
+		const orchestrator = new Agent('orchestrator')
+			.model(getModel('anthropic'))
+			.instructions(
+				'You are a coordinator. When asked a math question, delegate to the math_specialist tool. ' +
+					'Pass the question as the prompt. Then relay the answer back.',
+			)
+			.tool(mathAgent.asTool('A math specialist that can solve math problems'));
+
+		const { stream: fullStream } = await orchestrator.stream('What is 15 * 4?');
+
+		const chunks = await collectStreamChunks(fullStream);
+		const text = collectTextDeltas(chunks);
+		const messageChunks = chunksOfType(chunks, 'message') as Array<
+			StreamChunk & { type: 'message' }
+		>;
+		const toolResults = findAllToolResults(messageChunks.map((c) => c.message));
+
+		// The orchestrator should have called the sub-agent tool
+		expect(toolResults.length).toBeGreaterThan(0);
+		const mathCall = toolResults.find((tc) => tc.toolName === 'math-specialist');
+		expect(mathCall).toBeDefined();
+
+		// The output should contain the sub-agent's response
+		expect(mathCall!.result).toBeDefined();
+
+		// The final text should reference 60
+		expect(text).toBeTruthy();
+		expect(text).toContain('60');
+	});
+
+	it('handles a chain of two sub-agents', async () => {
+		const translatorAgent = new Agent('translator')
+			.model(getModel('anthropic'))
+			.instructions(
+				'You are a translator. Translate the given text to French. Reply with only the French translation.',
+			);
+
+		const uppercaseAgent = new Agent('uppercaser')
+			.model(getModel('anthropic'))
+			.instructions(
+				'You convert text to uppercase. Reply with the input text in all uppercase letters. Nothing else.',
+			);
+
+		const orchestrator = new Agent('chain-orchestrator')
+			.model(getModel('anthropic'))
+			.instructions(
+				'You are a coordinator with two tools. ' +
+					'When asked to translate and uppercase text: ' +
+					'1. First use the translator tool to translate to French. ' +
+					'2. Then use the uppercaser tool to convert the French text to uppercase. ' +
+					'Return the final uppercase French text.',
+			)
+			.tool(translatorAgent.asTool('Translates text to French'))
+			.tool(uppercaseAgent.asTool('Converts text to uppercase'));
+
+		const { stream: fullStream } = await orchestrator.stream(
+			'Translate "hello" to French and then make it uppercase.',
+		);
+		const chunks = await collectStreamChunks(fullStream);
+		const messageChunks = chunksOfType(chunks, 'message') as Array<
+			StreamChunk & { type: 'message' }
+		>;
+		const toolResults = findAllToolResults(messageChunks.map((c) => c.message));
+
+		// Should have called both tools
+		expect(toolResults.length).toBeGreaterThanOrEqual(2);
+
+		const text = collectTextDeltas(chunks);
+		expect(text).toBeTruthy();
+		// The result should contain BONJOUR (or SALUT) — uppercase French for hello
+		expect(text).toMatch(/BONJOUR/i);
+	});
+});
--- a/packages/@n8n/agents/src/tests/integration/telemetry-langsmith.test.ts
+++ b/packages/@n8n/agents/src/tests/integration/telemetry-langsmith.test.ts
@ -0,0 +1,197 @@
+/**
+ * Integration test: Telemetry → LangSmith.
+ *
+ * Runs a real agent against a real model with LangSmith telemetry configured,
+ * uses a local HTTP server to capture the trace data that would be sent to
+ * LangSmith, and verifies the full pipeline works end-to-end.
+ *
+ * Pipeline under test:
+ *   Agent.generate() → AI SDK (generateText with experimental_telemetry)
+ *     → OTel spans with ai.operationId → LangSmithOTLPSpanProcessor
+ *     → LangSmithOTLPTraceExporter → HTTP POST → captured by local server
+ */
+import * as http from 'node:http';
+import { afterAll, afterEach, beforeAll, expect, it } from 'vitest';
+import { z } from 'zod';
+
+import { describeIf, getModel } from './helpers';
+import { Agent, LangSmithTelemetry, type Telemetry, type BuiltTelemetry, Tool } from '../../index';
+
+const describe = describeIf('anthropic');
+
+interface CapturedRequest {
+	url: string;
+	headers: http.IncomingHttpHeaders;
+	body: Buffer;
+}
+
+describe('Telemetry → LangSmith integration', () => {
+	let server: http.Server;
+	let serverPort: number;
+	let captured: CapturedRequest[];
+	let previousTracingV2: string | undefined;
+
+	beforeAll(async () => {
+		// LangSmith exporter requires this env var to be set, otherwise it silently drops spans
+		previousTracingV2 = process.env.LANGCHAIN_TRACING_V2;
+		process.env.LANGCHAIN_TRACING_V2 = 'true';
+		captured = [];
+		server = http.createServer((req, res) => {
+			const chunks: Buffer[] = [];
+			req.on('data', (c: Buffer) => chunks.push(c));
+			req.on('end', () => {
+				captured.push({
+					url: req.url ?? '',
+					headers: req.headers,
+					body: Buffer.concat(chunks),
+				});
+				res.writeHead(200, { 'Content-Type': 'application/json' });
+				res.end('{}');
+			});
+		});
+
+		await new Promise<void>((resolve) => {
+			server.listen(0, () => {
+				const addr = server.address();
+				serverPort = typeof addr === 'object' && addr ? addr.port : 0;
+				resolve();
+			});
+		});
+	});
+
+	afterEach(() => {
+		captured = [];
+	});
+
+	afterAll(async () => {
+		if (previousTracingV2 === undefined) {
+			delete process.env.LANGCHAIN_TRACING_V2;
+		} else {
+			process.env.LANGCHAIN_TRACING_V2 = previousTracingV2;
+		}
+		await new Promise<void>((resolve) => {
+			server.close(() => resolve());
+		});
+	});
+
+	function createTestAgent(telemetry: Telemetry | BuiltTelemetry) {
+		return new Agent('langsmith-test')
+			.model(getModel('anthropic'))
+			.instructions('You are a calculator. Use the add tool when asked to add. Be concise.')
+			.telemetry(telemetry as Telemetry)
+			.tool(
+				new Tool('add')
+					.description('Add two numbers')
+					.input(z.object({ a: z.number(), b: z.number() }))
+					.handler(async ({ a, b }) => ({ result: a + b })),
+			);
+	}
+
+	it('sends trace data to LangSmith using LangSmithTelemetry', async () => {
+		const built = await new LangSmithTelemetry({
+			apiKey: 'ls-test-key-12345',
+			project: 'agents-test',
+			url: `http://localhost:${serverPort}/otel/v1/traces`,
+		})
+			.functionId('calc-agent')
+			.build();
+
+		const agent = createTestAgent(built);
+		const result = await agent.generate('What is 3 + 4?');
+
+		if (built.provider) await built.provider.forceFlush();
+
+		// Verify the agent produced a response
+		expect(result.messages.length).toBeGreaterThan(0);
+
+		// Verify LangSmith received trace data
+		expect(captured.length).toBeGreaterThan(0);
+
+		// Verify the request hit the OTLP traces endpoint
+		expect(captured.some((r) => r.url.includes('/otel/v1/traces'))).toBe(true);
+
+		// Verify the API key was sent in the header
+		expect(captured.some((r) => r.headers['x-api-key'] === 'ls-test-key-12345')).toBe(true);
+
+		// Verify the body is non-empty (actual protobuf trace data)
+		const totalBytes = captured.reduce((sum, r) => sum + r.body.length, 0);
+		expect(totalBytes).toBeGreaterThan(0);
+
+		if (built.provider) await built.provider.shutdown();
+	});
+
+	it('supports endpoint shorthand (auto-appends /otel/v1/traces)', async () => {
+		const built = await new LangSmithTelemetry({
+			apiKey: 'ls-endpoint-key',
+			project: 'agents-test',
+			endpoint: `http://localhost:${serverPort}`,
+		})
+			.functionId('endpoint-test')
+			.build();
+
+		const agent = createTestAgent(built);
+		const result = await agent.generate('What is 10 + 20?');
+
+		if (built.provider) await built.provider.forceFlush();
+
+		expect(result.messages.length).toBeGreaterThan(0);
+		expect(captured.length).toBeGreaterThan(0);
+		expect(captured.some((r) => r.headers['x-api-key'] === 'ls-endpoint-key')).toBe(true);
+
+		if (built.provider) await built.provider.shutdown();
+	});
+
+	it('includes tool call spans in the trace', async () => {
+		const built = await new LangSmithTelemetry({
+			apiKey: 'ls-tool-test',
+			project: 'agents-test',
+			url: `http://localhost:${serverPort}/otel/v1/traces`,
+		})
+			.functionId('tool-trace-test')
+			.build();
+
+		const agent = createTestAgent(built);
+		await agent.generate('What is 5 + 7?');
+
+		if (built.provider) await built.provider.forceFlush();
+
+		// Multiple spans exported as protobuf
+		expect(captured.length).toBeGreaterThan(0);
+		const totalBytes = captured.reduce((sum, r) => sum + r.body.length, 0);
+		expect(totalBytes).toBeGreaterThan(50);
+
+		if (built.provider) await built.provider.shutdown();
+	});
+
+	it('fires TelemetryIntegration hooks alongside LangSmith traces', async () => {
+		const hookEvents: string[] = [];
+
+		const built = await new LangSmithTelemetry({
+			apiKey: 'ls-hooks-test',
+			project: 'agents-test',
+			url: `http://localhost:${serverPort}/otel/v1/traces`,
+		})
+			.functionId('hooks-test')
+			.integration({
+				onStart: () => {
+					hookEvents.push('start');
+				},
+				onFinish: () => {
+					hookEvents.push('finish');
+				},
+			})
+			.build();
+
+		const agent = createTestAgent(built);
+		await agent.generate('What is 1 + 1?');
+
+		if (built.provider) await built.provider.forceFlush();
+
+		// Both LangSmith traces and integration hooks should fire
+		expect(captured.length).toBeGreaterThan(0);
+		expect(hookEvents).toContain('start');
+		expect(hookEvents).toContain('finish');
+
+		if (built.provider) await built.provider.shutdown();
+	});
+});
--- a/packages/@n8n/agents/src/tests/integration/thinking.test.ts
+++ b/packages/@n8n/agents/src/tests/integration/thinking.test.ts
@ -0,0 +1,67 @@
+import { expect, it } from 'vitest';
+
+import { describeIf, collectStreamChunks, chunksOfType } from './helpers';
+import { Agent } from '../../index';
+
+/**
+ * Thinking / reasoning stream integration tests.
+ *
+ * These require models that support extended thinking:
+ * - Anthropic: claude-sonnet-4-5 (not haiku — it doesn't support thinking)
+ * - OpenAI: o3-mini (reasoning model)
+ */
+
+const describeAnthropic = describeIf('anthropic');
+
+describeAnthropic('thinking stream (Anthropic)', () => {
+	it('emits reasoning-delta chunks when thinking is enabled', async () => {
+		const agent = new Agent('thinking-test')
+			.model('anthropic', 'claude-sonnet-4-5')
+			.thinking('anthropic', { budgetTokens: 5000 })
+			.instructions('You are a helpful assistant. Think carefully before answering.');
+
+		const { stream: fullStream } = await agent.stream('What is 17 * 23?');
+
+		const chunks = await collectStreamChunks(fullStream);
+		const reasoningChunks = chunksOfType(chunks, 'reasoning-delta');
+
+		expect(reasoningChunks.length).toBeGreaterThan(0);
+
+		// Verify reasoning chunks have non-empty delta content
+		const deltas = reasoningChunks
+			.filter((c): c is typeof c & { delta: string } => 'delta' in c)
+			.map((c) => c.delta);
+		const fullReasoning = deltas.join('');
+		expect(fullReasoning.length).toBeGreaterThan(0);
+
+		// Should also have text-delta chunks (the actual answer)
+		const textChunks = chunksOfType(chunks, 'text-delta');
+		expect(textChunks.length).toBeGreaterThan(0);
+	});
+});
+
+const describeOpenAI = describeIf('openai');
+
+describeOpenAI('thinking stream (OpenAI)', () => {
+	it('works with reasoning model and .thinking() enabled', async () => {
+		const agent = new Agent('openai-thinking-test')
+			.model('openai', 'o3-mini')
+			.thinking('openai', { reasoningEffort: 'medium' })
+			.instructions('You are a helpful assistant.');
+
+		const { stream: fullStream } = await agent.stream('What is 17 * 23?');
+
+		const chunks = await collectStreamChunks(fullStream);
+
+		// OpenAI reasoning models do internal reasoning but don't expose it
+		// as streamed chunks — verify the agent produces a text response.
+		const textChunks = chunksOfType(chunks, 'text-delta');
+		expect(textChunks.length).toBeGreaterThan(0);
+
+		const text = textChunks
+			.filter((c): c is typeof c & { delta: string } => 'delta' in c)
+			.map((c) => c.delta)
+			.join('');
+		expect(text).toContain('391');
+	});
+});
--- a/packages/@n8n/agents/src/tests/integration/title-generation.test.ts
+++ b/packages/@n8n/agents/src/tests/integration/title-generation.test.ts
@ -0,0 +1,154 @@
+import { expect, it, vi, afterEach, beforeEach } from 'vitest';
+
+import { describeIf, getModel, collectStreamChunks, createSqliteMemory } from './helpers';
+import { Agent, Memory } from '../../index';
+
+const describe = describeIf('anthropic');
+
+describe('title generation integration', () => {
+	let sqliteCtx: ReturnType<typeof createSqliteMemory>;
+
+	beforeEach(() => {
+		sqliteCtx = createSqliteMemory();
+	});
+
+	afterEach(async () => {
+		sqliteCtx.cleanup();
+	});
+
+	it('auto-generates a thread title after generate() on a new thread', async () => {
+		const memory = new Memory().storage(sqliteCtx.memory).lastMessages(10).titleGeneration(true);
+
+		const agent = new Agent('title-gen-test')
+			.model(getModel('anthropic'))
+			.instructions('You are a helpful assistant. Be concise.')
+			.memory(memory);
+
+		const threadId = `title-test-${Date.now()}`;
+		const resourceId = 'test-user';
+
+		const threadBefore = await sqliteCtx.memory.getThread(threadId);
+		expect(threadBefore).toBeNull();
+
+		await agent.generate('Tell me about the history of Rome', {
+			persistence: { threadId, resourceId },
+		});
+
+		await vi.waitFor(
+			async () => {
+				const thread = await sqliteCtx.memory.getThread(threadId);
+				expect(thread).toBeDefined();
+				expect(thread!.title).toBeTruthy();
+				expect(thread!.title!.length).toBeGreaterThan(0);
+				expect(thread!.title!.length).toBeLessThanOrEqual(80);
+			},
+			{ timeout: 30_000, interval: 500 },
+		);
+	});
+
+	it('auto-generates a thread title after stream() on a new thread', async () => {
+		const memory = new Memory().storage(sqliteCtx.memory).lastMessages(10).titleGeneration(true);
+
+		const agent = new Agent('title-gen-stream-test')
+			.model(getModel('anthropic'))
+			.instructions('You are a helpful assistant. Be concise.')
+			.memory(memory);
+
+		const threadId = `title-stream-test-${Date.now()}`;
+		const resourceId = 'test-user';
+
+		const { stream } = await agent.stream('Explain quantum computing basics', {
+			persistence: { threadId, resourceId },
+		});
+
+		await collectStreamChunks(stream);
+
+		await vi.waitFor(
+			async () => {
+				const thread = await sqliteCtx.memory.getThread(threadId);
+				expect(thread).toBeDefined();
+				expect(thread!.title).toBeTruthy();
+				expect(thread!.title!.length).toBeGreaterThan(0);
+				expect(thread!.title!.length).toBeLessThanOrEqual(80);
+			},
+			{ timeout: 30_000, interval: 500 },
+		);
+	});
+
+	it('does not generate a title when titleGeneration is not configured', async () => {
+		const memory = new Memory().storage(sqliteCtx.memory).lastMessages(10);
+
+		const agent = new Agent('no-title-gen-test')
+			.model(getModel('anthropic'))
+			.instructions('You are a helpful assistant. Be concise.')
+			.memory(memory);
+
+		const threadId = `no-title-test-${Date.now()}`;
+
+		await agent.generate('Hello, how are you?', {
+			persistence: { threadId, resourceId: 'test-user' },
+		});
+
+		await new Promise((r) => setTimeout(r, 3_000));
+
+		const thread = await sqliteCtx.memory.getThread(threadId);
+		expect(thread).toBeDefined();
+		expect(thread!.title).toBeFalsy();
+	});
+
+	it('does not overwrite a pre-existing thread title', async () => {
+		const existingTitle = 'My Pre-Existing Title';
+
+		await sqliteCtx.memory.saveThread({
+			id: 'pre-titled-thread',
+			resourceId: 'test-user',
+			title: existingTitle,
+			metadata: { custom: 'data' },
+		});
+
+		const memory = new Memory().storage(sqliteCtx.memory).lastMessages(10).titleGeneration(true);
+
+		const agent = new Agent('title-no-overwrite-test')
+			.model(getModel('anthropic'))
+			.instructions('You are a helpful assistant. Be concise.')
+			.memory(memory);
+
+		await agent.generate('What is 2+2?', {
+			persistence: { threadId: 'pre-titled-thread', resourceId: 'test-user' },
+		});
+
+		// Allow fire-and-forget title generation to settle
+		await new Promise((r) => setTimeout(r, 5_000));
+
+		const thread = await sqliteCtx.memory.getThread('pre-titled-thread');
+		expect(thread!.title).toBe(existingTitle);
+		expect(thread!.metadata).toEqual({ custom: 'data' });
+	});
+
+	it('accepts a custom model for title generation', async () => {
+		const memory = new Memory().storage(sqliteCtx.memory).lastMessages(10).titleGeneration({
+			model: 'anthropic/claude-haiku-4-5',
+		});
+
+		const agent = new Agent('title-custom-model-test')
+			.model(getModel('anthropic'))
+			.instructions('You are a helpful assistant. Be concise.')
+			.memory(memory);
+
+		const threadId = `title-custom-model-${Date.now()}`;
+
+		await agent.generate('What are the best practices for growing tomatoes?', {
+			persistence: { threadId, resourceId: 'test-user' },
+		});
+
+		await vi.waitFor(
+			async () => {
+				const thread = await sqliteCtx.memory.getThread(threadId);
+				expect(thread).toBeDefined();
+				expect(thread!.title).toBeTruthy();
+				expect(thread!.title!.length).toBeGreaterThan(0);
+			},
+			{ timeout: 30_000, interval: 500 },
+		);
+	});
+});
--- a/packages/@n8n/agents/src/tests/integration/to-model-output.test.ts
+++ b/packages/@n8n/agents/src/tests/integration/to-model-output.test.ts
@ -0,0 +1,211 @@
+import { expect, it, vi } from 'vitest';
+import { z } from 'zod';
+
+import {
+	describeIf,
+	getModel,
+	collectStreamChunks,
+	chunksOfType,
+	findAllToolResults,
+	collectTextDeltas,
+} from './helpers';
+import { Agent, filterLlmMessages, Tool } from '../../index';
+
+const describe = describeIf('anthropic');
+
+describe('toModelOutput integration', () => {
+	it('sends the transformed output to the LLM while preserving raw output in toolCalls', async () => {
+		const handlerSpy = vi.fn();
+
+		const searchTool = new Tool('search_db')
+			.description('Search the database and return matching records')
+			.input(z.object({ query: z.string().describe('Search query') }))
+			.output(
+				z.object({
+					records: z.array(z.object({ id: z.number(), name: z.string(), data: z.string() })),
+					total: z.number(),
+				}),
+			)
+			.handler(async ({ query }) => {
+				handlerSpy(query);
+				return {
+					records: [
+						{ id: 1, name: 'Widget A', data: 'x'.repeat(200) },
+						{ id: 2, name: 'Widget B', data: 'y'.repeat(200) },
+						{ id: 3, name: 'Gadget C', data: 'z'.repeat(200) },
+					],
+					total: 3,
+				};
+			})
+			.toModelOutput((output) => ({
+				summary: `Found ${output.total} records: ${output.records.map((r) => r.name).join(', ')}`,
+			}));
+
+		const agent = new Agent('to-model-output-test')
+			.model(getModel('anthropic'))
+			.instructions(
+				'You are a database assistant. Use search_db to find records. Be concise in your response.',
+			)
+			.tool(searchTool);
+
+		const result = await agent.generate('Search for widgets in the database');
+
+		expect(handlerSpy).toHaveBeenCalled();
+
+		// toolCalls on GenerateResult stores the raw output
+		expect(result.toolCalls).toBeDefined();
+		const searchEntry = result.toolCalls!.find((tc) => tc.tool === 'search_db');
+		expect(searchEntry).toBeDefined();
+		const rawOutput = searchEntry!.output as {
+			records: Array<{ id: number; name: string; data: string }>;
+			total: number;
+		};
+		expect(rawOutput.total).toBe(3);
+		expect(rawOutput.records[0].data).toBe('x'.repeat(200));
+
+		// ContentToolResult in messages stores the transformed output (what the LLM saw)
+		const toolResults = findAllToolResults(result.messages);
+		const searchToolResult = toolResults.find((tr) => tr.toolName === 'search_db');
+		expect(searchToolResult).toBeDefined();
+		const modelOutput = searchToolResult!.result as { summary: string };
+		expect(modelOutput.summary).toContain('Found 3 records');
+		expect(modelOutput.summary).toContain('Widget A');
+	});
+
+	it('works with stream() — LLM receives transformed output', async () => {
+		const fetchTool = new Tool('fetch_report')
+			.description('Fetch a detailed report by ID')
+			.input(z.object({ reportId: z.string().describe('Report ID') }))
+			.output(
+				z.object({
+					id: z.string(),
+					title: z.string(),
+					body: z.string(),
+					metadata: z.object({ pages: z.number(), author: z.string() }),
+				}),
+			)
+			.handler(async ({ reportId }) => ({
+				id: reportId,
+				title: 'Q4 Sales Report',
+				body: 'Detailed analysis spanning multiple pages...'.repeat(10),
+				metadata: { pages: 42, author: 'Jane Doe' },
+			}))
+			.toModelOutput((output) => ({
+				id: output.id,
+				title: output.title,
+				pageCount: output.metadata.pages,
+			}));
+
+		const agent = new Agent('to-model-output-stream-test')
+			.model(getModel('anthropic'))
+			.instructions(
+				'You are a report assistant. Use fetch_report to retrieve reports. Mention the title and page count. Be concise.',
+			)
+			.tool(fetchTool);
+
+		const { stream } = await agent.stream('Get report RPT-001');
+		const chunks = await collectStreamChunks(stream);
+
+		// The tool result messages in the stream contain the transformed output
+		const messageChunks = chunksOfType(chunks, 'message');
+		const toolResults = findAllToolResults(messageChunks.map((c) => c.message));
+
+		const reportResult = toolResults.find((tr) => tr.toolName === 'fetch_report');
+		expect(reportResult).toBeDefined();
+
+		// The model output (transformed) should have the truncated fields
+		const modelOutput = reportResult!.result as { id: string; title: string; pageCount: number };
+		expect(modelOutput.id).toBe('RPT-001');
+		expect(modelOutput.title).toBe('Q4 Sales Report');
+		expect(modelOutput.pageCount).toBe(42);
+		// The body should NOT be in the model output (it was stripped by toModelOutput)
+		expect((modelOutput as Record<string, unknown>).body).toBeUndefined();
+
+		const text = collectTextDeltas(chunks);
+		expect(text).toBeTruthy();
+		expect(text).toMatch(/Q4 Sales Report/i);
+	});
+
+	it('does not affect the LLM output when toModelOutput is not set', async () => {
+		const echoTool = new Tool('echo')
+			.description('Echo back the input message')
+			.input(z.object({ message: z.string().describe('Message to echo') }))
+			.output(z.object({ echoed: z.string() }))
+			.handler(async ({ message }) => ({ echoed: message }));
+
+		const agent = new Agent('no-to-model-output-test')
+			.model(getModel('anthropic'))
+			.instructions('You are a simple echo bot. Use echo tool and repeat the result. Be concise.')
+			.tool(echoTool);
+
+		const result = await agent.generate('Echo the message "hello world"');
+
+		// Without toModelOutput, tool result in messages should have the raw output
+		const toolResults = findAllToolResults(result.messages);
+		const echoResult = toolResults.find((tr) => tr.toolName === 'echo');
+		expect(echoResult).toBeDefined();
+		expect((echoResult!.result as { echoed: string }).echoed).toBe('hello world');
+
+		// And toolCalls should also have the same raw output
+		expect(result.toolCalls).toBeDefined();
+		const echoEntry = result.toolCalls!.find((tc) => tc.tool === 'echo');
+		expect(echoEntry).toBeDefined();
+		expect((echoEntry!.output as { echoed: string }).echoed).toBe('hello world');
+	});
+
+	it('works alongside toMessage — both transforms apply independently', async () => {
+		const calcTool = new Tool('multiply')
+			.description('Multiply two numbers')
+			.input(
+				z.object({
+					a: z.number().describe('First number'),
+					b: z.number().describe('Second number'),
+				}),
+			)
+			.output(z.object({ result: z.number() }))
+			.handler(async ({ a, b }) => ({ result: a * b }))
+			.toModelOutput((output) => ({
+				answer: output.result,
+				note: 'multiplication complete',
+			}))
+			.toMessage((output) => ({
+				type: 'custom',
+				data: {
+					dummy: `Product is ${output.result}`,
+				},
+			}));
+
+		const agent = new Agent('both-transforms-test')
+			.model(getModel('anthropic'))
+			.instructions('You are a calculator. Use multiply to multiply numbers. Be concise.')
+			.tool(calcTool);
+
+		const result = await agent.generate('What is 7 times 8?');
+
+		// Custom message from toMessage should be present (uses raw output)
+		const customMessages = result.messages.filter((m) => m.type === 'custom') as Array<{
+			type: 'custom';
+			data: { dummy: string };
+		}>;
+		expect(customMessages.length).toBeGreaterThan(0);
+		expect(customMessages[0].data.dummy).toBe('Product is 56');
+
+		// toolCalls stores the raw output
+		expect(result.toolCalls).toBeDefined();
+		const multiplyEntry = result.toolCalls!.find((tc) => tc.tool === 'multiply');
+		expect(multiplyEntry).toBeDefined();
+		expect((multiplyEntry!.output as { result: number }).result).toBe(56);
+
+		// Tool result in messages stores the transformed output for the LLM
+		const toolResults = findAllToolResults(result.messages);
+		const multiplyToolResult = toolResults.find((tr) => tr.toolName === 'multiply');
+		expect(multiplyToolResult).toBeDefined();
+		const modelOutput = multiplyToolResult!.result as { answer: number; note: string };
+		expect(modelOutput.answer).toBe(56);
+		expect(modelOutput.note).toBe('multiplication complete');
+
+		// The custom messages should be filtered out for the LLM
+		const llmMessages = filterLlmMessages(result.messages);
+		expect(llmMessages.length).toBeLessThan(result.messages.length);
+	});
+});
--- a/packages/@n8n/agents/src/tests/integration/token-usage.test.ts
+++ b/packages/@n8n/agents/src/tests/integration/token-usage.test.ts
@ -0,0 +1,115 @@
+import { expect, it } from 'vitest';
+
+import {
+	describeIf,
+	collectStreamChunks,
+	chunksOfType,
+	getModel,
+	createAgentWithAddTool,
+} from './helpers';
+import { Agent } from '../../index';
+import type { StreamChunk } from '../../index';
+
+const describe = describeIf('anthropic');
+
+describe('token usage integration', () => {
+	it('reports token usage on a simple text response via streamText', async () => {
+		const agent = new Agent('token-test')
+			.model(getModel('anthropic'))
+			.instructions('Reply with exactly: "Hello". Nothing else.');
+
+		const { stream: fullStream } = await agent.stream('Say hello');
+
+		const chunks = await collectStreamChunks(fullStream);
+		const finishChunks = chunksOfType(chunks, 'finish');
+		expect(finishChunks.length).toBeGreaterThan(0);
+
+		const finish = finishChunks[0] as StreamChunk & { type: 'finish' };
+		expect(finish.usage).toBeDefined();
+		expect(finish.usage!.promptTokens).toBeGreaterThan(0);
+		expect(finish.usage!.completionTokens).toBeGreaterThan(0);
+		expect(finish.usage!.totalTokens).toBe(
+			finish.usage!.promptTokens + finish.usage!.completionTokens,
+		);
+	});
+
+	it('reports token usage on a simple text response via run()', async () => {
+		const agent = new Agent('token-run-test')
+			.model(getModel('anthropic'))
+			.instructions('Reply with exactly: "Hello". Nothing else.');
+
+		const result = await agent.generate('Say hello');
+		expect(result.usage).toBeDefined();
+		expect(result.usage!.promptTokens).toBeGreaterThan(0);
+		expect(result.usage!.completionTokens).toBeGreaterThan(0);
+		expect(result.usage!.totalTokens).toBe(
+			result.usage!.promptTokens + result.usage!.completionTokens,
+		);
+	});
+
+	it('reports token usage after a multi-step tool call', async () => {
+		const agent = createAgentWithAddTool('anthropic');
+
+		const { stream: fullStream } = await agent.stream('What is 7 + 13?');
+
+		const chunks = await collectStreamChunks(fullStream);
+		const finishChunks = chunksOfType(chunks, 'finish');
+		expect(finishChunks.length).toBeGreaterThan(0);
+		const finish = finishChunks[0] as StreamChunk & { type: 'finish' };
+
+		expect(finish.usage).toBeDefined();
+		// Multi-step should use more tokens than a simple response
+		expect(finish.usage!.promptTokens).toBeGreaterThan(0);
+		expect(finish.usage!.completionTokens).toBeGreaterThan(0);
+		expect(finish.usage!.totalTokens).toBe(
+			finish.usage!.promptTokens + finish.usage!.completionTokens,
+		);
+	});
+
+	it('emits finish chunks with token usage in the stream', async () => {
+		const agent = new Agent('finish-chunk-test')
+			.model(getModel('anthropic'))
+			.instructions('Reply with exactly: "OK". Nothing else.');
+
+		const { stream: fullStream } = await agent.stream('Acknowledge');
+
+		const chunks = await collectStreamChunks(fullStream);
+		const finishChunks = chunksOfType(chunks, 'finish');
+
+		expect(finishChunks.length).toBeGreaterThan(0);
+
+		const finish = finishChunks[0] as StreamChunk & { type: 'finish' };
+		expect(finish.finishReason).toBeDefined();
+
+		// Finish chunks should carry usage when available
+		if (finish.usage) {
+			expect(finish.usage.promptTokens).toBeGreaterThanOrEqual(0);
+			expect(finish.usage.completionTokens).toBeGreaterThanOrEqual(0);
+		}
+	});
+
+	it('accumulates higher token counts with more complex prompts', async () => {
+		const agent = new Agent('token-scale-test')
+			.model(getModel('anthropic'))
+			.instructions('You are a helpful assistant. Be concise.');
+
+		// Short prompt
+		const { stream: short } = await agent.stream('Hi');
+		const chunks = await collectStreamChunks(short);
+		const finishChunks = chunksOfType(chunks, 'finish');
+		expect(finishChunks.length).toBeGreaterThan(0);
+		const finishShort = finishChunks[0] as StreamChunk & { type: 'finish' };
+
+		// Longer prompt
+		const { stream: long } = await agent.stream(
+			'Explain the difference between TCP and UDP networking protocols. Include at least three key differences.',
+		);
+		const chunksLong = await collectStreamChunks(long);
+		const finishChunksLong = chunksOfType(chunksLong, 'finish');
+		expect(finishChunksLong.length).toBeGreaterThan(0);
+		const finishLong = finishChunksLong[0] as StreamChunk & { type: 'finish' };
+
+		// Longer prompt should use more completion tokens (longer response)
+		expect(finishLong.usage!.completionTokens).toBeGreaterThan(finishShort.usage!.completionTokens);
+	});
+});
--- a/packages/@n8n/agents/src/tests/integration/tool-error-handling.test.ts
+++ b/packages/@n8n/agents/src/tests/integration/tool-error-handling.test.ts
@ -0,0 +1,104 @@
+import { expect, it } from 'vitest';
+
+import {
+	describeIf,
+	collectStreamChunks,
+	chunksOfType,
+	collectTextDeltas,
+	findAllToolResults,
+	createAgentWithAlwaysErrorTool,
+	createAgentWithFlakyTool,
+} from './helpers';
+import type { StreamChunk } from './helpers';
+
+const describe = describeIf('anthropic');
+
+describe('tool error handling integration', () => {
+	it('does not crash when a tool throws — stream completes with a finish chunk', async () => {
+		const agent = createAgentWithAlwaysErrorTool('anthropic');
+
+		const { stream } = await agent.stream('Fetch the data for id "abc123".');
+		const chunks = await collectStreamChunks(stream);
+
+		// Stream must never emit an error chunk
+		const errorChunks = chunks.filter((c) => c.type === 'error');
+		expect(errorChunks).toHaveLength(0);
+
+		// Stream must close with a finish chunk whose reason is not 'error'
+		const finishChunks = chunksOfType(chunks, 'finish');
+		expect(finishChunks.length).toBeGreaterThan(0);
+		const finish = finishChunks[0] as StreamChunk & { type: 'finish' };
+		expect(finish.finishReason).not.toBe('error');
+	});
+
+	it('does not crash when a tool throws — generate returns finishReason stop', async () => {
+		const agent = createAgentWithAlwaysErrorTool('anthropic');
+
+		const result = await agent.generate('Fetch the data for id "abc123".');
+
+		expect(result.error).toBeUndefined();
+		expect(result.finishReason).toBe('stop');
+	});
+
+	it('LLM receives the error message and acknowledges it in the response', async () => {
+		const agent = createAgentWithAlwaysErrorTool('anthropic');
+
+		const { stream } = await agent.stream('Fetch the data for id "abc123".');
+		const chunks = await collectStreamChunks(stream);
+
+		// Verify there IS a text response (LLM acknowledged the error)
+		const text = collectTextDeltas(chunks);
+		expect(text.length).toBeGreaterThan(0);
+
+		// The response should mention the failure (error was visible to LLM)
+		const mentionsFailure = /error|fail|unavailable|timeout|unable|could not/i.test(text);
+		expect(mentionsFailure).toBe(true);
+	});
+
+	it('error tool-result appears in the message list', async () => {
+		const agent = createAgentWithAlwaysErrorTool('anthropic');
+
+		const { stream } = await agent.stream('Fetch the data for id "abc123".');
+		const chunks = await collectStreamChunks(stream);
+
+		// There should be a tool-result message in the stream
+		const messageChunks = chunksOfType(chunks, 'message');
+		const toolResults = findAllToolResults(messageChunks.map((c) => c.message));
+
+		// The tool should have been called and produced a result (even if it errored)
+		expect(toolResults.length).toBeGreaterThan(0);
+		const brokenResult = toolResults.find((r) => r.toolName === 'broken_tool');
+		expect(brokenResult).toBeDefined();
+	});
+
+	it('LLM can self-correct by retrying a flaky tool', async () => {
+		const { agent, callCount } = createAgentWithFlakyTool('anthropic');
+
+		const result = await agent.generate('Fetch the data for id "xyz".');
+
+		// Tool was called more than once — LLM retried after seeing the error
+		expect(callCount()).toBeGreaterThanOrEqual(2);
+
+		// Agent completed successfully
+		expect(result.error).toBeUndefined();
+		expect(result.finishReason).toBe('stop');
+	});
+
+	it('LLM self-correction: stream mode — flaky tool succeeds on retry', async () => {
+		const { agent, callCount } = createAgentWithFlakyTool('anthropic');
+
+		const { stream } = await agent.stream('Fetch the data for id "xyz".');
+		const chunks = await collectStreamChunks(stream);
+
+		// No error chunk in the stream
+		const errorChunks = chunks.filter((c) => c.type === 'error');
+		expect(errorChunks).toHaveLength(0);
+
+		// Tool was retried
+		expect(callCount()).toBeGreaterThanOrEqual(2);
+
+		// Response should mention success or the value
+		const text = collectTextDeltas(chunks);
+		expect(text.length).toBeGreaterThan(0);
+	});
+});
--- a/packages/@n8n/agents/src/tests/integration/tool-interrupt.test.ts
+++ b/packages/@n8n/agents/src/tests/integration/tool-interrupt.test.ts
@ -0,0 +1,185 @@
+import { expect, it } from 'vitest';
+
+import {
+	describeIf,
+	collectStreamChunks,
+	chunksOfType,
+	createAgentWithInterruptibleTool,
+	createAgentWithMixedTools,
+	createAgentWithParallelInterruptibleCalls,
+} from './helpers';
+import { isLlmMessage, type StreamChunk } from '../../index';
+
+const describe = describeIf('anthropic');
+
+describe('tool interrupt integration', () => {
+	it('pauses the stream when a tool suspends', async () => {
+		const agent = createAgentWithInterruptibleTool('anthropic');
+
+		const { stream: fullStream } = await agent.stream('Delete the file /tmp/test.txt');
+
+		const chunks = await collectStreamChunks(fullStream);
+		const chunkTypes = chunks.map((c) => c.type);
+
+		expect(chunkTypes).toContain('tool-call-suspended');
+
+		const suspendedChunks = chunksOfType(chunks, 'tool-call-suspended');
+		expect(suspendedChunks.length).toBe(1);
+
+		const suspended = suspendedChunks[0] as StreamChunk & { type: 'tool-call-suspended' };
+		expect(suspended.toolName).toBe('delete_file');
+		expect(suspended.runId).toBeTruthy();
+		expect(suspended.toolCallId).toBeTruthy();
+		expect(suspended.suspendPayload).toEqual(
+			// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
+			expect.objectContaining({ message: expect.any(String), severity: 'destructive' }),
+		);
+
+		// No tool-result should appear (tool is suspended)
+		const contentChunks = chunks.filter(
+			(c) =>
+				c.type === 'message' &&
+				'content' in c &&
+				(c.content as { type: string }).type === 'tool-result',
+		);
+		expect(contentChunks).toHaveLength(0);
+	});
+
+	it('resumes the stream after resume with approval', async () => {
+		const agent = createAgentWithInterruptibleTool('anthropic');
+
+		const { stream: fullStream } = await agent.stream('Delete the file /tmp/test.txt');
+
+		const chunks = await collectStreamChunks(fullStream);
+		const suspendedChunks = chunksOfType(chunks, 'tool-call-suspended');
+		expect(suspendedChunks.length).toBe(1);
+
+		const suspended = suspendedChunks[0] as StreamChunk & { type: 'tool-call-suspended' };
+		const resumedStream = await agent.resume(
+			'stream',
+			{ approved: true },
+			{ runId: suspended.runId!, toolCallId: suspended.toolCallId! },
+		);
+
+		const resumedChunks = await collectStreamChunks(resumedStream.stream);
+		const resumedTypes = resumedChunks.map((c) => c.type);
+
+		// After approval, tool-result should appear as content chunk
+		const toolResultChunks = resumedChunks.filter(
+			(c) =>
+				c.type === 'message' &&
+				isLlmMessage(c.message) &&
+				c.message.content.some((c) => c.type === 'tool-result'),
+		);
+		expect(toolResultChunks.length).toBeGreaterThan(0);
+
+		expect(resumedTypes).toContain('text-delta');
+	});
+
+	it('resumes the stream after resume with denial', async () => {
+		const agent = createAgentWithInterruptibleTool('anthropic');
+
+		const { stream: fullStream } = await agent.stream('Delete the file /tmp/test.txt');
+
+		const chunks = await collectStreamChunks(fullStream);
+		const suspendedChunks = chunksOfType(chunks, 'tool-call-suspended');
+		expect(suspendedChunks.length).toBe(1);
+
+		const suspended = suspendedChunks[0] as StreamChunk & { type: 'tool-call-suspended' };
+		const resumedStream = await agent.resume(
+			'stream',
+			{ approved: false },
+			{ runId: suspended.runId!, toolCallId: suspended.toolCallId! },
+		);
+
+		const resumedChunks = await collectStreamChunks(resumedStream.stream);
+		const resumedTypes = resumedChunks.map((c) => c.type);
+
+		expect(resumedTypes).toContain('text-delta');
+	});
+
+	it('resumes each pending tool call one by one when multiple tool calls are suspended', async () => {
+		const agent = createAgentWithParallelInterruptibleCalls('anthropic');
+
+		const { stream: fullStream } = await agent.stream(
+			'Delete these two files: /tmp/a.txt and /tmp/b.txt. You MUST call delete_file for each file in a single turn using parallel tool calls. After deleting all files, tell if you succeeded',
+		);
+
+		const chunks = await collectStreamChunks(fullStream);
+		const suspendedChunks = chunksOfType(chunks, 'tool-call-suspended');
+
+		// The first interruptible tool call suspends, halting the loop.
+		// Only 1 suspended chunk is emitted even though 2 tool calls were made.
+		expect(suspendedChunks.length).toBe(1);
+
+		const suspended1 = suspendedChunks[0] as StreamChunk & { type: 'tool-call-suspended' };
+		expect(suspended1.toolName).toBe('delete_file');
+
+		// Resume the first suspended tool call
+		const stream2 = await agent.resume(
+			'stream',
+			{ approved: true },
+			{ runId: suspended1.runId!, toolCallId: suspended1.toolCallId! },
+		);
+
+		const chunks2 = await collectStreamChunks(stream2.stream);
+		const suspendedChunks2 = chunksOfType(chunks2, 'tool-call-suspended');
+
+		// The second tool call should now be suspended (not an error)
+		expect(suspendedChunks2.length).toBe(1);
+
+		const suspended2 = suspendedChunks2[0] as StreamChunk & { type: 'tool-call-suspended' };
+		expect(suspended2.toolCallId).not.toBe(suspended1.toolCallId);
+		expect(suspended2.toolName).toBe('delete_file');
+
+		// Resume the second suspended tool call
+		const stream3 = await agent.resume(
+			'stream',
+			{ approved: true },
+			{ runId: suspended2.runId!, toolCallId: suspended2.toolCallId! },
+		);
+
+		const chunks3 = await collectStreamChunks(stream3.stream);
+
+		// After all original tool calls are resolved, the agent loop should
+		// continue without crashing (no AI_MissingToolResultsError).
+		// The LLM may respond with text or make additional tool calls.
+		const errorChunks = chunks3.filter((c) => c.type === 'error');
+		expect(errorChunks).toHaveLength(0);
+
+		const finishChunks = chunksOfType(chunks3, 'finish');
+		expect(finishChunks.length).toBeGreaterThan(0);
+		const finish = finishChunks[0] as StreamChunk & { type: 'finish' };
+		expect(finish.finishReason).not.toBe('error');
+	});
+
+	it('auto-executes non-interruptible tools while suspending interruptible ones', async () => {
+		const agent = createAgentWithMixedTools('anthropic');
+
+		const { stream: fullStream } = await agent.stream(
+			'You must call both tools: first call list_files with dir="/home", then call delete_file with path="/home/readme.md". Do not skip either tool.',
+		);
+
+		const chunks = await collectStreamChunks(fullStream);
+
+		// list_files should auto-execute — its result should appear as content
+		const toolResultChunks = chunks.filter(
+			(c) =>
+				c.type === 'message' &&
+				isLlmMessage(c.message) &&
+				c.message.content.some((c) => c.type === 'tool-result'),
+		);
+		expect(toolResultChunks.length).toBeGreaterThan(0);
+
+		// delete_file should be suspended
+		const suspendedChunks = chunksOfType(chunks, 'tool-call-suspended');
+		const deleteSuspended = suspendedChunks.find(
+			(c) => (c as StreamChunk & { type: 'tool-call-suspended' }).toolName === 'delete_file',
+		);
+
+		// If the LLM called delete_file, it should have been suspended
+		if (deleteSuspended) {
+			expect(deleteSuspended).toBeDefined();
+		}
+	});
+});
--- a/packages/@n8n/agents/src/tests/integration/tool-result-to-message.test.ts
+++ b/packages/@n8n/agents/src/tests/integration/tool-result-to-message.test.ts
@ -0,0 +1,55 @@
+import { expect, it } from 'vitest';
+
+import {
+	chunksOfType,
+	collectStreamChunks,
+	createAgentWithToContentTool,
+	describeIf,
+} from './helpers';
+import { filterLlmMessages } from '../../index';
+import type { AgentMessage, StreamChunk } from '../../index';
+
+const describe = describeIf('anthropic');
+
+describe('tool-result to message integration', () => {
+	it('adds a custom message to generate result that is visible to user but not to the LLM', async () => {
+		const agent = createAgentWithToContentTool('anthropic');
+		const result = await agent.generate('What is 3 + 4?');
+
+		// The custom message must appear in result.messages
+		const customMessages = result.messages.filter((m) => m.type === 'custom');
+		expect(customMessages.length).toBeGreaterThan(0);
+
+		const toolResultMsg = customMessages.find((m) => m.type === 'custom' && 'dummy' in m.data) as
+			| { type: 'custom'; data: { dummy: string } }
+			| undefined;
+
+		expect(toolResultMsg).toBeDefined();
+		expect(toolResultMsg!.data.dummy).toContain('dummy message. Tool output');
+
+		// filterLlmMessages must strip the custom message — the LLM never sees it.
+		// The filtered count must be less than total because custom messages were removed.
+		const llmMessages = filterLlmMessages(result.messages);
+		expect(llmMessages.length).toBeLessThan(result.messages.length);
+	});
+
+	it('emits toContent result as a content chunk in the stream', async () => {
+		const agent = createAgentWithToContentTool('anthropic');
+		const { stream } = await agent.stream('What is 5 + 6?');
+
+		const chunks = await collectStreamChunks(stream);
+		// Must contain at least one content chunk with the custom text from toContent
+		const messageChunks = chunksOfType(chunks, 'message') as Array<
+			StreamChunk & { type: 'message'; message: AgentMessage }
+		>;
+
+		const toContentChunk = messageChunks.find(
+			(c) => c.message.type === 'custom' && 'dummy' in c.message.data,
+		);
+
+		expect(toContentChunk).toBeDefined();
+		expect(
+			(toContentChunk!.message as { type: 'custom'; data: { dummy: string } }).data.dummy,
+		).toContain('dummy message. Tool output');
+	});
+});
--- a/packages/@n8n/agents/src/tests/integration/usage.test.ts
+++ b/packages/@n8n/agents/src/tests/integration/usage.test.ts
@ -0,0 +1,170 @@
+import { expect, it } from 'vitest';
+
+import { describeIf, collectStreamChunks, chunksOfType, getModel } from './helpers';
+import { Agent } from '../../index';
+import type { StreamChunk } from '../../index';
+
+const describeAnthropic = describeIf('anthropic');
+
+describeAnthropic('usage and cost (Anthropic)', () => {
+	it('returns token usage on generate result', async () => {
+		const agent = new Agent('usage-test').model(getModel('anthropic')).instructions('Be concise.');
+
+		const result = await agent.generate('Say hello');
+
+		expect(result.usage).toBeDefined();
+		expect(result.usage!.promptTokens).toBeGreaterThan(0);
+		expect(result.usage!.completionTokens).toBeGreaterThan(0);
+		expect(result.usage!.totalTokens).toBe(
+			result.usage!.promptTokens + result.usage!.completionTokens,
+		);
+	});
+
+	it('returns token usage on stream finish chunk', async () => {
+		const agent = new Agent('usage-stream-test')
+			.model(getModel('anthropic'))
+			.instructions('Be concise.');
+
+		const { stream: fullStream } = await agent.stream('Say hello');
+		const chunks = await collectStreamChunks(fullStream);
+		const finishChunks = chunksOfType(chunks, 'finish');
+
+		expect(finishChunks.length).toBeGreaterThan(0);
+		const finish = finishChunks[0] as StreamChunk & { type: 'finish' };
+		expect(finish.usage).toBeDefined();
+		expect(finish.usage!.promptTokens).toBeGreaterThan(0);
+		expect(finish.usage!.completionTokens).toBeGreaterThan(0);
+		expect(finish.usage!.totalTokens).toBe(
+			finish.usage!.promptTokens + finish.usage!.completionTokens,
+		);
+	});
+
+	it('includes estimated cost from models.dev pricing', async () => {
+		const agent = new Agent('cost-test').model(getModel('anthropic')).instructions('Be concise.');
+
+		const result = await agent.generate('Say hello');
+
+		expect(result.usage).toBeDefined();
+		expect(result.usage!.cost).toBeDefined();
+		expect(result.usage!.cost).toBeGreaterThan(0);
+
+		// Sanity check: a simple "say hello" should cost less than $0.01
+		expect(result.usage!.cost!).toBeLessThan(0.01);
+	});
+
+	it('includes model ID in generate result', async () => {
+		const agent = new Agent('model-test').model(getModel('anthropic')).instructions('Be concise.');
+
+		const result = await agent.generate('Say hello');
+		expect(result.model).toBe(getModel('anthropic'));
+	});
+
+	it('includes cost in stream finish chunk', async () => {
+		const agent = new Agent('cost-stream-test')
+			.model(getModel('anthropic'))
+			.instructions('Be concise.');
+
+		const { stream: fullStream } = await agent.stream('Say hello');
+		const chunks = await collectStreamChunks(fullStream);
+		const finishChunks = chunksOfType(chunks, 'finish');
+
+		expect(finishChunks.length).toBeGreaterThan(0);
+		const finish = finishChunks[0] as StreamChunk & { type: 'finish' };
+		expect(finish.usage).toBeDefined();
+		expect(finish.usage!.cost).toBeDefined();
+		expect(finish.usage!.cost).toBeGreaterThan(0);
+	});
+
+	it('aggregates sub-agent usage when using asTool()', async () => {
+		const subAgent = new Agent('translator')
+			.model(getModel('anthropic'))
+			.instructions('Translate the input to French. Reply with only the translation.');
+
+		const parentAgent = new Agent('orchestrator')
+			.model(getModel('anthropic'))
+			.instructions(
+				'You are an orchestrator. When asked to translate, use the translator tool. Be concise.',
+			)
+			.tool(subAgent.asTool('Translate text to French'));
+
+		const result = await parentAgent.generate('Translate "hello world" to French');
+
+		// Parent should have its own usage
+		expect(result.usage).toBeDefined();
+		expect(result.usage!.promptTokens).toBeGreaterThan(0);
+		expect(result.usage!.cost).toBeGreaterThan(0);
+		expect(result.model).toBe(getModel('anthropic'));
+
+		// Sub-agent usage should be captured
+		expect(result.subAgentUsage).toBeDefined();
+		expect(result.subAgentUsage!.length).toBeGreaterThan(0);
+
+		const translatorUsage = result.subAgentUsage!.find((s) => s.agent === 'translator');
+		expect(translatorUsage).toBeDefined();
+		expect(translatorUsage!.usage.promptTokens).toBeGreaterThan(0);
+		expect(translatorUsage!.usage.cost).toBeGreaterThan(0);
+
+		// Total cost should be parent + sub-agent
+		expect(result.totalCost).toBeDefined();
+		expect(result.totalCost!).toBeGreaterThan(result.usage!.cost!);
+		expect(result.totalCost!).toBeCloseTo(result.usage!.cost! + translatorUsage!.usage.cost!, 6);
+	});
+
+	it('aggregates sub-agent usage via stream()', async () => {
+		const subAgent = new Agent('stream-translator')
+			.model(getModel('anthropic'))
+			.instructions('Translate the input to French. Reply with only the translation.');
+
+		const parentAgent = new Agent('stream-orchestrator')
+			.model(getModel('anthropic'))
+			.instructions(
+				'You are an orchestrator. When asked to translate, use the stream-translator tool. Be concise.',
+			)
+			.tool(subAgent.asTool('Translate text to French'));
+
+		const { stream: fullStream } = await parentAgent.stream('Translate "goodbye" to French');
+		const chunks = await collectStreamChunks(fullStream);
+		const finishChunks = chunksOfType(chunks, 'finish');
+
+		expect(finishChunks.length).toBeGreaterThan(0);
+		const finish = finishChunks[finishChunks.length - 1] as StreamChunk & { type: 'finish' };
+
+		// Should have usage with cost
+		expect(finish.usage).toBeDefined();
+		expect(finish.usage!.cost).toBeGreaterThan(0);
+
+		// Should include model
+		expect(finish.model).toBe(getModel('anthropic'));
+
+		// Should include sub-agent usage
+		expect(finish.subAgentUsage).toBeDefined();
+		expect(finish.subAgentUsage!.length).toBeGreaterThan(0);
+
+		const translatorUsage = finish.subAgentUsage!.find((s) => s.agent === 'stream-translator');
+		expect(translatorUsage).toBeDefined();
+		expect(translatorUsage!.usage.promptTokens).toBeGreaterThan(0);
+		expect(translatorUsage!.usage.cost).toBeGreaterThan(0);
+
+		// Total cost should include parent + sub-agent
+		expect(finish.totalCost).toBeDefined();
+		expect(finish.totalCost!).toBeGreaterThan(finish.usage!.cost!);
+	});
+});
+
+const describeOpenAI = describeIf('openai');
+
+describeOpenAI('usage and cost (OpenAI)', () => {
+	it('returns token usage and cost on generate result', async () => {
+		const agent = new Agent('openai-usage-test')
+			.model(getModel('openai'))
+			.instructions('Be concise.');
+
+		const result = await agent.generate('Say hello');
+
+		expect(result.usage).toBeDefined();
+		expect(result.usage!.promptTokens).toBeGreaterThan(0);
+		expect(result.usage!.completionTokens).toBeGreaterThan(0);
+		expect(result.usage!.cost).toBeDefined();
+		expect(result.usage!.cost).toBeGreaterThan(0);
+	});
+});
--- a/packages/@n8n/agents/src/tests/integration/workspace/workspace-agent.test.ts
+++ b/packages/@n8n/agents/src/tests/integration/workspace/workspace-agent.test.ts
@ -0,0 +1,240 @@
+import { afterEach, beforeEach, expect, it } from 'vitest';
+
+import { Agent } from '../../../sdk/agent';
+import type { FileEntry } from '../../../workspace/types';
+import { Workspace } from '../../../workspace/workspace';
+import { InMemoryFilesystem, FakeProcessManager, FakeSandbox } from '../../workspace/test-utils';
+import {
+	chunksOfType,
+	collectStreamChunks,
+	collectTextDeltas,
+	describeIf,
+	findAllToolCalls,
+	findAllToolResults,
+	getModel,
+} from '../helpers';
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+const describe = describeIf('anthropic');
+
+describe('workspace agent integration', () => {
+	let memFs: InMemoryFilesystem;
+	let fakeProcessManager: FakeProcessManager;
+	let fakeSandbox: FakeSandbox;
+	let workspace: Workspace;
+
+	beforeEach(async () => {
+		memFs = new InMemoryFilesystem('agent-test-fs');
+		fakeProcessManager = new FakeProcessManager();
+		fakeSandbox = new FakeSandbox('agent-test', fakeProcessManager);
+		workspace = new Workspace({
+			id: 'agent-ws',
+			filesystem: memFs,
+			sandbox: fakeSandbox,
+		});
+		await workspace.init();
+	});
+
+	afterEach(async () => {
+		await workspace.destroy();
+	});
+
+	it('agent uses workspace_write_file and workspace_read_file tools', async () => {
+		const agent = new Agent('workspace-file-test')
+			.model(getModel('anthropic'))
+			.instructions(
+				'You are a file manager. When asked to create a file, use workspace_write_file. ' +
+					'When asked to read a file, use workspace_read_file. Be concise.',
+			)
+			.workspace(workspace);
+
+		const result = await agent.generate(
+			'Write "Hello from n8n!" to /greeting.txt, then read it back and tell me the contents. You MUST call both tools',
+		);
+
+		expect(result.finishReason).toBe('stop');
+		expect(result.error).toBeUndefined();
+
+		const toolCalls = findAllToolCalls(result.messages);
+		const toolResults = findAllToolResults(result.messages);
+
+		const writeCall = toolCalls.find((tc) => tc.toolName === 'workspace_write_file');
+		expect(writeCall).toBeDefined();
+
+		const readCall = toolCalls.find((tc) => tc.toolName === 'workspace_read_file');
+		expect(readCall).toBeDefined();
+
+		const readResult = toolResults.find((tr) => tr.toolName === 'workspace_read_file');
+		expect(readResult).toBeDefined();
+		expect((readResult!.result as { content: string }).content).toContain('Hello from n8n!');
+
+		expect(memFs.getFileContent('/greeting.txt')).toBe('Hello from n8n!');
+	});
+
+	it('agent uses workspace_execute_command tool', async () => {
+		fakeProcessManager.commandHandler = (cmd) => {
+			if (cmd.includes('echo')) {
+				const match = cmd.match(/echo\s+"?([^"]*)"?/);
+				const text = match?.[1] ?? 'unknown';
+				return { stdout: `${text}\n`, stderr: '', exitCode: 0 };
+			}
+			return { stdout: `ran: ${cmd}\n`, stderr: '', exitCode: 0 };
+		};
+
+		const agent = new Agent('workspace-exec-test')
+			.model(getModel('anthropic'))
+			.instructions(
+				'You are a shell assistant. When asked to run a command, use workspace_execute_command. Be concise.',
+			)
+			.workspace(workspace);
+
+		const result = await agent.generate('Run the command: echo "n8n workspace test"');
+
+		expect(result.finishReason).toBe('stop');
+		expect(result.error).toBeUndefined();
+
+		const toolCalls = findAllToolCalls(result.messages);
+		const execCall = toolCalls.find((tc) => tc.toolName === 'workspace_execute_command');
+		expect(execCall).toBeDefined();
+
+		const toolResults = findAllToolResults(result.messages);
+		const execResult = toolResults.find((tr) => tr.toolName === 'workspace_execute_command');
+		expect(execResult).toBeDefined();
+		expect((execResult!.result as { success: boolean }).success).toBe(true);
+	});
+
+	it('agent uses workspace_mkdir and workspace_list_files together', async () => {
+		await memFs.mkdir('/project', { recursive: true });
+		await memFs.writeFile('/project/index.ts', 'console.log("hello")');
+		await memFs.writeFile('/project/README.md', '# Project');
+
+		const agent = new Agent('workspace-list-test')
+			.model(getModel('anthropic'))
+			.instructions(
+				'You are a file manager. Use workspace_list_files to list files. Be concise and list the filenames you find.',
+			)
+			.workspace(workspace);
+
+		const result = await agent.generate('List the files in the /project directory.');
+
+		expect(result.finishReason).toBe('stop');
+		expect(result.error).toBeUndefined();
+
+		const toolCalls = findAllToolCalls(result.messages);
+		const listCall = toolCalls.find((tc) => tc.toolName === 'workspace_list_files');
+		expect(listCall).toBeDefined();
+
+		const toolResults = findAllToolResults(result.messages);
+		const listResult = toolResults.find((tr) => tr.toolName === 'workspace_list_files');
+		expect(listResult).toBeDefined();
+		const entries = (listResult!.result as unknown as { entries: FileEntry[] }).entries;
+		const names = entries.map((e) => e.name);
+		expect(names).toContain('index.ts');
+		expect(names).toContain('README.md');
+	});
+
+	it('workspace instructions are appended to agent instructions', () => {
+		new Agent('workspace-instructions-test')
+			.model(getModel('anthropic'))
+			.instructions('Base instructions.')
+			.workspace(workspace);
+		const tools = workspace.getTools();
+		expect(tools.length).toBe(13);
+
+		const instructions = workspace.getInstructions();
+		expect(instructions).toContain('Fake sandbox');
+		expect(instructions).toContain('In-memory filesystem');
+	});
+
+	it('stream: agent writes a file and streams the response', async () => {
+		const agent = new Agent('workspace-stream-test')
+			.model(getModel('anthropic'))
+			.instructions(
+				'You are a file manager. When asked to create a file, use workspace_write_file. Be very concise.',
+			)
+			.workspace(workspace);
+
+		const { stream } = await agent.stream(
+			'Create a file at /hello.txt with the content "streaming works"',
+		);
+		const chunks = await collectStreamChunks(stream);
+
+		const errorChunks = chunks.filter((c) => c.type === 'error');
+		expect(errorChunks).toHaveLength(0);
+
+		const finishChunks = chunksOfType(chunks, 'finish');
+		expect(finishChunks.length).toBeGreaterThan(0);
+		const lastFinish = finishChunks[finishChunks.length - 1] as {
+			type: 'finish';
+			finishReason: string;
+		};
+		expect(lastFinish.finishReason).toBe('stop');
+
+		const text = collectTextDeltas(chunks);
+		expect(text.length).toBeGreaterThan(0);
+
+		expect(memFs.getFileContent('/hello.txt')).toBe('streaming works');
+	});
+
+	it('agent uses workspace_file_stat to get file metadata', async () => {
+		await memFs.writeFile('/data.json', '{"key": "value", "count": 42}');
+
+		const agent = new Agent('workspace-stat-test')
+			.model(getModel('anthropic'))
+			.instructions(
+				'You are a file manager. Use workspace_file_stat to get file info. Report the file size and type. Be concise.',
+			)
+			.workspace(workspace);
+
+		const result = await agent.generate('What is the size and type of /data.json?');
+
+		expect(result.finishReason).toBe('stop');
+		expect(result.error).toBeUndefined();
+
+		const toolCalls = findAllToolCalls(result.messages);
+		const statCall = toolCalls.find((tc) => tc.toolName === 'workspace_file_stat');
+		expect(statCall).toBeDefined();
+
+		const toolResults = findAllToolResults(result.messages);
+		const statResult = toolResults.find((tr) => tr.toolName === 'workspace_file_stat');
+		expect(statResult).toBeDefined();
+		const stat = statResult!.result as { type: string; size: number };
+		expect(stat.type).toBe('file');
+		expect(stat.size).toBe(29);
+	});
+
+	it('agent handles multi-step workflow: mkdir, write, list, read', async () => {
+		const agent = new Agent('workspace-workflow-test')
+			.model(getModel('anthropic'))
+			.instructions(
+				"You are a file manager. Follow the user's instructions step by step using workspace tools. " +
+					'Available: workspace_mkdir, workspace_write_file, workspace_list_files, workspace_read_file. Be concise.',
+			)
+			.workspace(workspace);
+
+		const result = await agent.generate(
+			'1. Create a directory /app\n' +
+				'2. Write "export default {}" to /app/config.ts\n' +
+				'3. List files in /app\n' +
+				'4. Read /app/config.ts and tell me its contents',
+		);
+
+		expect(result.finishReason).toBe('stop');
+		expect(result.error).toBeUndefined();
+
+		const toolResults = findAllToolResults(result.messages);
+		const resultToolNames = toolResults.map((tr) => tr.toolName);
+
+		expect(resultToolNames).toContain('workspace_write_file');
+		expect(resultToolNames).toContain('workspace_read_file');
+
+		const readResult = toolResults.find((tr) => tr.toolName === 'workspace_read_file');
+		expect(readResult).toBeDefined();
+		expect((readResult!.result as { content: string }).content).toContain('export default {}');
+
+		expect(memFs.getFileContent('/app/config.ts')).toBe('export default {}');
+	});
+});
--- a/packages/@n8n/agents/src/tests/integration/zod-validation-error.test.ts
+++ b/packages/@n8n/agents/src/tests/integration/zod-validation-error.test.ts
@ -0,0 +1,59 @@
+import { expect, it } from 'vitest';
+import { z } from 'zod';
+
+import { describeIf, findLastTextContent } from './helpers';
+import { Agent, Tool, filterLlmMessages } from '../../index';
+
+const describe = describeIf('anthropic');
+
+describe('Zod validation errors surface to LLM and allow self-correction', () => {
+	/**
+	 * Verify that when the LLM receives a Zod error result, it shows up in the
+	 * conversation as an actual tool-result message with an error payload (not a
+	 * thrown exception), so the agent loop continues rather than aborting.
+	 */
+	it('includes the Zod error text in the tool-result visible to the LLM', async () => {
+		const strictTool = new Tool('find_user')
+			.description('Find a user by their numeric age (18–99 only).')
+			.input(
+				z.object({
+					age: z
+						.number()
+						.int()
+						.min(18, 'age must be at least 18')
+						.max(99, 'age must be at most 99')
+						.describe('User age (18–99)'),
+				}),
+			)
+			.output(z.object({ user: z.string() }))
+			.handler(async ({ age }) => ({ user: `User aged ${age}` }));
+
+		const agent = new Agent('age-correction-agent')
+			.model('anthropic/claude-haiku-4-5')
+			.instructions(
+				'You are a user directory. Use find_user to look up users by age. ' +
+					'The age must be between 18 and 99. ' +
+					'If validation fails, correct the age and retry. Be very concise.',
+			)
+			.tool(strictTool);
+
+		// "150" is out of range — should trigger a Zod error, then retry with a valid age
+		const result = await agent.generate(
+			'Find a user aged 150. If that age is invalid, use 25 instead and retry. You MUST find a user aged 150, and only then use 25',
+		);
+
+		expect(result.finishReason).toBe('stop');
+		expect(result.error).toBeUndefined();
+
+		// At least two tool-result messages: one error, one success
+		const allMessages = filterLlmMessages(result.messages);
+		const toolResultMessages = allMessages.filter((m) =>
+			m.content.some((c) => c.type === 'tool-result'),
+		);
+		expect(toolResultMessages.length).toBeGreaterThanOrEqual(2);
+
+		// The final response should mention a user (age 25 or similar)
+		const text = findLastTextContent(result.messages);
+		expect(text).toBeTruthy();
+	});
+});
--- a/packages/@n8n/agents/src/tests/model-factory.test.ts
+++ b/packages/@n8n/agents/src/tests/model-factory.test.ts
@ -0,0 +1,66 @@
+import type { LanguageModel } from 'ai';
+
+import { createModel } from '../runtime/model-factory';
+
+jest.mock('@ai-sdk/anthropic', () => ({
+	createAnthropic: (opts?: { apiKey?: string; baseURL?: string }) => (model: string) => ({
+		provider: 'anthropic',
+		modelId: model,
+		apiKey: opts?.apiKey,
+		baseURL: opts?.baseURL,
+		specificationVersion: 'v3',
+	}),
+}));
+
+jest.mock('@ai-sdk/openai', () => ({
+	createOpenAI: (opts?: { apiKey?: string; baseURL?: string }) => (model: string) => ({
+		provider: 'openai',
+		modelId: model,
+		apiKey: opts?.apiKey,
+		baseURL: opts?.baseURL,
+		specificationVersion: 'v3',
+	}),
+}));
+
+describe('createModel', () => {
+	it('should accept a string config', () => {
+		const model = createModel('anthropic/claude-sonnet-4-5') as unknown as Record<string, unknown>;
+		expect(model.provider).toBe('anthropic');
+		expect(model.modelId).toBe('claude-sonnet-4-5');
+	});
+
+	it('should accept an object config with url', () => {
+		const model = createModel({
+			id: 'openai/gpt-4o',
+			apiKey: 'sk-test',
+			url: 'https://custom.endpoint.com/v1',
+		}) as unknown as Record<string, unknown>;
+		expect(model.provider).toBe('openai');
+		expect(model.modelId).toBe('gpt-4o');
+		expect(model.apiKey).toBe('sk-test');
+		expect(model.baseURL).toBe('https://custom.endpoint.com/v1');
+	});
+
+	it('should pass through a prebuilt LanguageModel', () => {
+		const prebuilt = {
+			doGenerate: jest.fn(),
+			doStream: jest.fn(),
+			specificationVersion: 'v2' as const,
+			modelId: 'custom-model',
+			provider: 'custom',
+			defaultObjectGenerationMode: undefined,
+		} as unknown as LanguageModel;
+
+		const result = createModel(prebuilt);
+		expect(result).toBe(prebuilt);
+	});
+
+	it('should handle model IDs with multiple slashes', () => {
+		const model = createModel('openai/ft:gpt-4o:my-org:custom:abc123') as unknown as Record<
+			string,
+			unknown
+		>;
+		expect(model.provider).toBe('openai');
+		expect(model.modelId).toBe('ft:gpt-4o:my-org:custom:abc123');
+	});
+});
--- a/packages/@n8n/agents/src/tests/sqlite-memory.test.ts
+++ b/packages/@n8n/agents/src/tests/sqlite-memory.test.ts
@ -0,0 +1,553 @@
+import * as fs from 'fs';
+import * as os from 'os';
+import * as path from 'path';
+
+import { SqliteMemory } from '../storage/sqlite-memory';
+import type { AgentMessage, Message } from '../types/sdk/message';
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+function makeTempDb(): string {
+	return path.join(os.tmpdir(), `test-${Date.now()}-${Math.random().toString(36).slice(2)}.db`);
+}
+
+function makeMsg(role: 'user' | 'assistant', text: string): Message {
+	return { role, content: [{ type: 'text', text }] };
+}
+
+function textOf(msg: AgentMessage): string {
+	const m = msg as Message;
+	return (m.content[0] as { text: string }).text;
+}
+
+function makeMemory(dbPath: string, namespace?: string): SqliteMemory {
+	return new SqliteMemory({ url: `file:${dbPath}`, namespace });
+}
+
+// ---------------------------------------------------------------------------
+// Thread management
+// ---------------------------------------------------------------------------
+
+describe('SqliteMemory — threads', () => {
+	let dbPath: string;
+
+	beforeEach(() => {
+		dbPath = makeTempDb();
+	});
+
+	afterEach(() => {
+		try {
+			fs.unlinkSync(dbPath);
+		} catch {
+			/* ignore */
+		}
+	});
+
+	it('saves and retrieves a thread', async () => {
+		const mem = makeMemory(dbPath);
+		const saved = await mem.saveThread({
+			id: 't-1',
+			resourceId: 'user-1',
+			title: 'Hello',
+			metadata: { foo: 'bar' },
+		});
+
+		expect(saved.id).toBe('t-1');
+		expect(saved.resourceId).toBe('user-1');
+		expect(saved.title).toBe('Hello');
+		expect(saved.metadata).toEqual({ foo: 'bar' });
+		expect(saved.createdAt).toBeInstanceOf(Date);
+		expect(saved.updatedAt).toBeInstanceOf(Date);
+
+		const fetched = await mem.getThread('t-1');
+		expect(fetched).not.toBeNull();
+		expect(fetched!.id).toBe('t-1');
+		expect(fetched!.title).toBe('Hello');
+		expect(fetched!.metadata).toEqual({ foo: 'bar' });
+	});
+
+	it('returns null for an unknown thread', async () => {
+		const mem = makeMemory(dbPath);
+		const result = await mem.getThread('nonexistent');
+		expect(result).toBeNull();
+	});
+
+	it('deletes a thread and its messages', async () => {
+		const mem = makeMemory(dbPath);
+		await mem.saveThread({ id: 't-del', resourceId: 'user-1' });
+		await mem.saveMessages({ threadId: 't-del', messages: [makeMsg('user', 'hi')] });
+
+		await mem.deleteThread('t-del');
+
+		expect(await mem.getThread('t-del')).toBeNull();
+		expect(await mem.getMessages('t-del')).toEqual([]);
+	});
+
+	it('preserves createdAt on re-save, updates updatedAt', async () => {
+		const mem = makeMemory(dbPath);
+		const first = await mem.saveThread({ id: 't-resave', resourceId: 'user-1', title: 'v1' });
+
+		// Small delay to ensure updatedAt differs
+		await new Promise((r) => setTimeout(r, 20));
+
+		const second = await mem.saveThread({ id: 't-resave', resourceId: 'user-1', title: 'v2' });
+
+		expect(second.createdAt.getTime()).toBe(first.createdAt.getTime());
+		expect(second.updatedAt.getTime()).toBeGreaterThanOrEqual(first.updatedAt.getTime());
+		expect(second.title).toBe('v2');
+	});
+});
+
+// ---------------------------------------------------------------------------
+// Message persistence
+// ---------------------------------------------------------------------------
+
+describe('SqliteMemory — messages', () => {
+	let dbPath: string;
+
+	beforeEach(() => {
+		dbPath = makeTempDb();
+	});
+
+	afterEach(() => {
+		try {
+			fs.unlinkSync(dbPath);
+		} catch {
+			/* ignore */
+		}
+	});
+
+	it('saves and retrieves messages in order', async () => {
+		const mem = makeMemory(dbPath);
+		await mem.saveMessages({
+			threadId: 't-1',
+			messages: [
+				makeMsg('user', 'first'),
+				makeMsg('assistant', 'second'),
+				makeMsg('user', 'third'),
+			],
+		});
+
+		const msgs = await mem.getMessages('t-1');
+		expect(msgs).toHaveLength(3);
+		expect(textOf(msgs[0])).toBe('first');
+		expect(textOf(msgs[1])).toBe('second');
+		expect(textOf(msgs[2])).toBe('third');
+	});
+
+	it('respects limit — returns last N messages', async () => {
+		const mem = makeMemory(dbPath);
+		// Save messages one at a time to guarantee distinct createdAt timestamps
+		await mem.saveMessages({ threadId: 't-1', messages: [makeMsg('user', 'msg-1')] });
+		await mem.saveMessages({ threadId: 't-1', messages: [makeMsg('assistant', 'msg-2')] });
+		await mem.saveMessages({ threadId: 't-1', messages: [makeMsg('user', 'msg-3')] });
+		await mem.saveMessages({ threadId: 't-1', messages: [makeMsg('assistant', 'msg-4')] });
+
+		const msgs = await mem.getMessages('t-1', { limit: 2 });
+		expect(msgs).toHaveLength(2);
+		expect(textOf(msgs[0])).toBe('msg-3');
+		expect(textOf(msgs[1])).toBe('msg-4');
+	});
+
+	it('isolates messages by thread', async () => {
+		const mem = makeMemory(dbPath);
+		await mem.saveMessages({ threadId: 't-a', messages: [makeMsg('user', 'thread-a')] });
+		await mem.saveMessages({ threadId: 't-b', messages: [makeMsg('user', 'thread-b')] });
+
+		const msgsA = await mem.getMessages('t-a');
+		expect(msgsA).toHaveLength(1);
+		expect(textOf(msgsA[0])).toBe('thread-a');
+
+		const msgsB = await mem.getMessages('t-b');
+		expect(msgsB).toHaveLength(1);
+		expect(textOf(msgsB[0])).toBe('thread-b');
+	});
+
+	it('assigns stable IDs — preserves existing, generates for missing', async () => {
+		const mem = makeMemory(dbPath);
+		const withId = { ...makeMsg('user', 'has-id'), id: 'custom-id-123' } as unknown as AgentMessage;
+		const withoutId = makeMsg('assistant', 'no-id');
+
+		await mem.saveMessages({ threadId: 't-1', messages: [withId, withoutId] });
+
+		const msgs = await mem.getMessages('t-1');
+		expect(msgs).toHaveLength(2);
+
+		// The message with a pre-existing id should keep it
+		const first = msgs[0] as unknown as { id: string };
+		expect(first.id).toBe('custom-id-123');
+
+		// The message without id should have gotten one assigned
+		const second = msgs[1] as unknown as { id: string };
+		expect(typeof second.id).toBe('string');
+		expect(second.id.length).toBeGreaterThan(0);
+	});
+
+	it('deletes specific messages', async () => {
+		const mem = makeMemory(dbPath);
+		const m1 = { ...makeMsg('user', 'keep'), id: 'keep-1' } as unknown as AgentMessage;
+		const m2 = { ...makeMsg('user', 'delete-me'), id: 'del-1' } as unknown as AgentMessage;
+		await mem.saveMessages({ threadId: 't-1', messages: [m1, m2] });
+
+		await mem.deleteMessages(['del-1']);
+
+		const msgs = await mem.getMessages('t-1');
+		expect(msgs).toHaveLength(1);
+		expect((msgs[0] as unknown as { id: string }).id).toBe('keep-1');
+	});
+});
+
+// ---------------------------------------------------------------------------
+// Working memory
+// ---------------------------------------------------------------------------
+
+describe('SqliteMemory — working memory', () => {
+	let dbPath: string;
+
+	beforeEach(() => {
+		dbPath = makeTempDb();
+	});
+
+	afterEach(() => {
+		try {
+			fs.unlinkSync(dbPath);
+		} catch {
+			/* ignore */
+		}
+	});
+
+	it('returns null for an unknown key', async () => {
+		const mem = makeMemory(dbPath);
+		const result = await mem.getWorkingMemory({ threadId: 'thread-x', resourceId: 'unknown' });
+		expect(result).toBeNull();
+	});
+
+	it('saves and retrieves working memory keyed by resourceId', async () => {
+		const mem = makeMemory(dbPath);
+		await mem.saveWorkingMemory({ threadId: 'thread-1', resourceId: 'res-1' }, 'some context');
+		const result = await mem.getWorkingMemory({ threadId: 'thread-1', resourceId: 'res-1' });
+		expect(result).toBe('some context');
+	});
+
+	it('overwrites working memory on re-save', async () => {
+		const mem = makeMemory(dbPath);
+		await mem.saveWorkingMemory({ threadId: 'thread-1', resourceId: 'res-1' }, 'v1');
+		await mem.saveWorkingMemory({ threadId: 'thread-1', resourceId: 'res-1' }, 'v2');
+		const result = await mem.getWorkingMemory({ threadId: 'thread-1', resourceId: 'res-1' });
+		expect(result).toBe('v2');
+	});
+
+	it('isolates working memory by resourceId', async () => {
+		const mem = makeMemory(dbPath);
+		await mem.saveWorkingMemory({ threadId: 'thread-a', resourceId: 'res-a' }, 'content-a');
+		await mem.saveWorkingMemory({ threadId: 'thread-b', resourceId: 'res-b' }, 'content-b');
+
+		expect(await mem.getWorkingMemory({ threadId: 'thread-a', resourceId: 'res-a' })).toBe(
+			'content-a',
+		);
+		expect(await mem.getWorkingMemory({ threadId: 'thread-b', resourceId: 'res-b' })).toBe(
+			'content-b',
+		);
+	});
+
+	it('saves and retrieves working memory keyed by threadId (no resourceId)', async () => {
+		const mem = makeMemory(dbPath);
+		await mem.saveWorkingMemory({ threadId: 'thread-1' }, 'thread context');
+		const result = await mem.getWorkingMemory({ threadId: 'thread-1' });
+		expect(result).toBe('thread context');
+	});
+
+	it('isolates working memory by threadId', async () => {
+		const mem = makeMemory(dbPath);
+		await mem.saveWorkingMemory({ threadId: 'thread-1' }, 'data 1');
+		await mem.saveWorkingMemory({ threadId: 'thread-2' }, 'data 2');
+
+		expect(await mem.getWorkingMemory({ threadId: 'thread-1' })).toBe('data 1');
+		expect(await mem.getWorkingMemory({ threadId: 'thread-2' })).toBe('data 2');
+	});
+
+	it('stores scope=resource when resourceId is provided', async () => {
+		const mem = makeMemory(dbPath);
+		await mem.saveWorkingMemory({ threadId: 'thread-1', resourceId: 'res-1' }, 'resource content');
+
+		const { createClient } = await import('@libsql/client');
+		const db = createClient({ url: `file:${dbPath}` });
+		const result = await db.execute('SELECT scope FROM working_memory WHERE key = ?', ['res-1']);
+		expect(result.rows[0].scope).toBe('resource');
+	});
+
+	it('stores scope=thread when only threadId is provided', async () => {
+		const mem = makeMemory(dbPath);
+		await mem.saveWorkingMemory({ threadId: 'thread-1' }, 'thread content');
+
+		const { createClient } = await import('@libsql/client');
+		const db = createClient({ url: `file:${dbPath}` });
+		const result = await db.execute('SELECT scope FROM working_memory WHERE key = ?', ['thread-1']);
+		expect(result.rows[0].scope).toBe('thread');
+	});
+
+	it('does not mix resource-scoped and thread-scoped entries with the same key value', async () => {
+		const mem = makeMemory(dbPath);
+		const sharedKey = 'same-id';
+		await mem.saveWorkingMemory({ threadId: 'thread-1', resourceId: sharedKey }, 'resource data');
+		await mem.saveWorkingMemory({ threadId: sharedKey }, 'thread data');
+
+		expect(await mem.getWorkingMemory({ threadId: 'thread-1', resourceId: sharedKey })).toBe(
+			'resource data',
+		);
+		expect(await mem.getWorkingMemory({ threadId: sharedKey })).toBe('thread data');
+	});
+});
+
+// ---------------------------------------------------------------------------
+// Restart survival
+// ---------------------------------------------------------------------------
+
+describe('SqliteMemory — restart survival', () => {
+	let dbPath: string;
+
+	beforeEach(() => {
+		dbPath = makeTempDb();
+	});
+
+	afterEach(() => {
+		try {
+			fs.unlinkSync(dbPath);
+		} catch {
+			/* ignore */
+		}
+	});
+
+	it('data survives a new SqliteMemory instance on same file', async () => {
+		const mem1 = makeMemory(dbPath);
+		await mem1.saveThread({ id: 't-surv', resourceId: 'user-1', title: 'persistent' });
+		await mem1.saveMessages({ threadId: 't-surv', messages: [makeMsg('user', 'hello from past')] });
+		await mem1.saveWorkingMemory({ threadId: 't-surv', resourceId: 'user-1' }, 'wm-data');
+
+		// Create a brand new instance pointing at the same file
+		const mem2 = makeMemory(dbPath);
+
+		const thread = await mem2.getThread('t-surv');
+		expect(thread).not.toBeNull();
+		expect(thread!.title).toBe('persistent');
+
+		const msgs = await mem2.getMessages('t-surv');
+		expect(msgs).toHaveLength(1);
+		expect(textOf(msgs[0])).toBe('hello from past');
+
+		const wm = await mem2.getWorkingMemory({ threadId: 't-surv', resourceId: 'user-1' });
+		expect(wm).toBe('wm-data');
+	});
+});
+
+// ---------------------------------------------------------------------------
+// Search
+// ---------------------------------------------------------------------------
+
+describe('SqliteMemory — queryEmbeddings', () => {
+	let dbPath: string;
+	let mem: SqliteMemory;
+
+	beforeEach(() => {
+		dbPath = makeTempDb();
+		mem = makeMemory(dbPath);
+	});
+
+	afterEach(() => {
+		try {
+			fs.unlinkSync(dbPath);
+		} catch {
+			/* ignore */
+		}
+	});
+
+	it('returns empty array when no embeddings stored', async () => {
+		const results = await mem.queryEmbeddings({
+			threadId: 't1',
+			vector: new Array<number>(3).fill(0),
+			topK: 5,
+		});
+		expect(results).toEqual([]);
+	});
+
+	it('returns nearest neighbours by cosine similarity', async () => {
+		await mem.saveThread({ id: 't1', resourceId: 'u1' });
+
+		await mem.saveEmbeddings({
+			threadId: 't1',
+			resourceId: 'u1',
+			entries: [
+				{ id: 'msg-cats', vector: [1.0, 0.0, 0.0], text: 'About cats', model: 'test' },
+				{ id: 'msg-dogs', vector: [0.0, 1.0, 0.0], text: 'About dogs', model: 'test' },
+				{ id: 'msg-kittens', vector: [0.9, 0.1, 0.0], text: 'About kittens', model: 'test' },
+			],
+		});
+
+		// Query close to [1,0,0] — should return cats first, then kittens
+		const results = await mem.queryEmbeddings({
+			scope: 'resource',
+			resourceId: 'u1',
+			vector: [1.0, 0.0, 0.0],
+			topK: 2,
+		});
+
+		expect(results).toHaveLength(2);
+		expect(results[0].id).toBe('msg-cats');
+		expect(results[0].score).toBeGreaterThan(0.9);
+		expect(results[1].id).toBe('msg-kittens');
+	});
+
+	it('filters by threadId with scope=thread', async () => {
+		await mem.saveThread({ id: 't1', resourceId: 'u1' });
+		await mem.saveThread({ id: 't2', resourceId: 'u1' });
+
+		await mem.saveEmbeddings({
+			threadId: 't1',
+			entries: [{ id: 'msg-t1', vector: [1.0, 0.0, 0.0], text: 'Thread 1', model: 'test' }],
+		});
+		await mem.saveEmbeddings({
+			threadId: 't2',
+			entries: [{ id: 'msg-t2', vector: [1.0, 0.0, 0.0], text: 'Thread 2', model: 'test' }],
+		});
+
+		const results = await mem.queryEmbeddings({
+			scope: 'thread',
+			threadId: 't1',
+			vector: [1.0, 0.0, 0.0],
+			topK: 10,
+		});
+
+		expect(results).toHaveLength(1);
+		expect(results[0].id).toBe('msg-t1');
+	});
+
+	it('filters by resourceId with scope=resource', async () => {
+		await mem.saveThread({ id: 't1', resourceId: 'user-a' });
+		await mem.saveThread({ id: 't2', resourceId: 'user-a' });
+		await mem.saveThread({ id: 't3', resourceId: 'user-b' });
+
+		await mem.saveEmbeddings({
+			threadId: 't1',
+			resourceId: 'user-a',
+			entries: [{ id: 'msg-1', vector: [1.0, 0.0, 0.0], text: 'User A thread 1', model: 'test' }],
+		});
+		await mem.saveEmbeddings({
+			threadId: 't2',
+			resourceId: 'user-a',
+			entries: [{ id: 'msg-2', vector: [0.9, 0.1, 0.0], text: 'User A thread 2', model: 'test' }],
+		});
+		await mem.saveEmbeddings({
+			threadId: 't3',
+			resourceId: 'user-b',
+			entries: [{ id: 'msg-3', vector: [1.0, 0.0, 0.0], text: 'User B thread 3', model: 'test' }],
+		});
+
+		const results = await mem.queryEmbeddings({
+			scope: 'resource',
+			resourceId: 'user-a',
+			vector: [1.0, 0.0, 0.0],
+			topK: 10,
+		});
+
+		expect(results).toHaveLength(2);
+		const ids = results.map((r) => r.id);
+		expect(ids).toContain('msg-1');
+		expect(ids).toContain('msg-2');
+		expect(ids).not.toContain('msg-3');
+	});
+
+	it('defaults to resource scope — returns all embeddings for a resourceId across threads', async () => {
+		await mem.saveThread({ id: 't1', resourceId: 'user-x' });
+		await mem.saveThread({ id: 't2', resourceId: 'user-x' });
+
+		await mem.saveEmbeddings({
+			threadId: 't1',
+			resourceId: 'user-x',
+			entries: [{ id: 'msg-a', vector: [1.0, 0.0, 0.0], text: 'Thread 1 msg', model: 'test' }],
+		});
+		await mem.saveEmbeddings({
+			threadId: 't2',
+			resourceId: 'user-x',
+			entries: [{ id: 'msg-b', vector: [0.9, 0.1, 0.0], text: 'Thread 2 msg', model: 'test' }],
+		});
+
+		// No explicit scope — should default to 'resource'
+		const results = await mem.queryEmbeddings({
+			resourceId: 'user-x',
+			vector: [1.0, 0.0, 0.0],
+			topK: 10,
+		});
+
+		expect(results).toHaveLength(2);
+	});
+
+	it('resource scope excludes embeddings from other resources', async () => {
+		await mem.saveEmbeddings({
+			threadId: 't1',
+			resourceId: 'res-1',
+			entries: [{ id: 'msg-r1', vector: [1.0, 0.0, 0.0], text: 'Resource 1', model: 'test' }],
+		});
+		await mem.saveEmbeddings({
+			threadId: 't2',
+			resourceId: 'res-2',
+			entries: [{ id: 'msg-r2', vector: [1.0, 0.0, 0.0], text: 'Resource 2', model: 'test' }],
+		});
+
+		const results = await mem.queryEmbeddings({
+			scope: 'resource',
+			resourceId: 'res-1',
+			vector: [1.0, 0.0, 0.0],
+			topK: 10,
+		});
+
+		expect(results).toHaveLength(1);
+		expect(results[0].id).toBe('msg-r1');
+	});
+
+	it('thread scope only returns embeddings from the specified thread', async () => {
+		await mem.saveEmbeddings({
+			threadId: 't1',
+			resourceId: 'user-1',
+			entries: [
+				{ id: 'msg-t1a', vector: [1.0, 0.0, 0.0], text: 'Thread 1 A', model: 'test' },
+				{ id: 'msg-t1b', vector: [0.0, 1.0, 0.0], text: 'Thread 1 B', model: 'test' },
+			],
+		});
+		await mem.saveEmbeddings({
+			threadId: 't2',
+			resourceId: 'user-1',
+			entries: [{ id: 'msg-t2', vector: [1.0, 0.0, 0.0], text: 'Thread 2', model: 'test' }],
+		});
+
+		const results = await mem.queryEmbeddings({
+			scope: 'thread',
+			threadId: 't1',
+			vector: [1.0, 0.0, 0.0],
+			topK: 10,
+		});
+
+		expect(results).toHaveLength(2);
+		const ids = results.map((r) => r.id);
+		expect(ids).toContain('msg-t1a');
+		expect(ids).toContain('msg-t1b');
+		expect(ids).not.toContain('msg-t2');
+	});
+});
+
+// ---------------------------------------------------------------------------
+// Namespace validation
+// ---------------------------------------------------------------------------
+
+describe('SqliteMemory — namespace', () => {
+	it('rejects invalid namespace characters', () => {
+		expect(() => new SqliteMemory({ url: 'file::memory:', namespace: 'bad-ns!' })).toThrow(
+			/Invalid namespace/,
+		);
+	});
+
+	it('accepts valid namespace', () => {
+		expect(() => new SqliteMemory({ url: 'file::memory:', namespace: 'my_ns_01' })).not.toThrow();
+	});
+});
--- a/packages/@n8n/agents/src/tests/strip-orphaned-tool-messages.test.ts
+++ b/packages/@n8n/agents/src/tests/strip-orphaned-tool-messages.test.ts
@ -0,0 +1,157 @@
+import { stripOrphanedToolMessages } from '../runtime/strip-orphaned-tool-messages';
+import { isLlmMessage, toDbMessage } from '../sdk/message';
+import type { AgentDbMessage, AgentMessage, Message } from '../types/sdk/message';
+
+function seed(messages: AgentMessage[]): AgentDbMessage[] {
+	return messages.map(toDbMessage);
+}
+
+describe('stripOrphanedToolMessages', () => {
+	it('returns messages unchanged when all tool pairs are complete', () => {
+		const messages = seed([
+			{ role: 'user', content: [{ type: 'text', text: 'Hello' }] },
+			{
+				role: 'assistant',
+				content: [
+					{ type: 'text', text: 'Looking up...' },
+					{ type: 'tool-call', toolCallId: 'c1', toolName: 'lookup', input: {} },
+				],
+			},
+			{
+				role: 'tool',
+				content: [{ type: 'tool-result', toolCallId: 'c1', toolName: 'lookup', result: 42 }],
+			},
+			{ role: 'assistant', content: [{ type: 'text', text: 'Done.' }] },
+		]);
+
+		const result = stripOrphanedToolMessages(messages);
+		expect(result).toBe(messages);
+	});
+
+	it('strips orphaned tool-result when matching tool-call is missing', () => {
+		const messages = seed([
+			{
+				role: 'tool',
+				content: [{ type: 'tool-result', toolCallId: 'c1', toolName: 'lookup', result: 42 }],
+			},
+			{ role: 'assistant', content: [{ type: 'text', text: 'There are 42.' }] },
+			{ role: 'user', content: [{ type: 'text', text: 'Thanks' }] },
+		]);
+
+		const result = stripOrphanedToolMessages(messages).filter(isLlmMessage) as Message[];
+
+		expect(result).toHaveLength(2);
+		expect(result[0].role).toBe('assistant');
+		expect(result[1].role).toBe('user');
+	});
+
+	it('strips orphaned tool-call when matching tool-result is missing', () => {
+		const messages = seed([
+			{ role: 'user', content: [{ type: 'text', text: 'Check it' }] },
+			{
+				role: 'assistant',
+				content: [
+					{ type: 'text', text: 'Checking...' },
+					{ type: 'tool-call', toolCallId: 'c1', toolName: 'lookup', input: {} },
+				],
+			},
+		]);
+
+		const result = stripOrphanedToolMessages(messages).filter(isLlmMessage) as Message[];
+
+		expect(result).toHaveLength(2);
+		const assistantMsg = result[1];
+		expect(assistantMsg.role).toBe('assistant');
+		expect(assistantMsg.content).toHaveLength(1);
+		expect(assistantMsg.content[0].type).toBe('text');
+	});
+
+	it('drops assistant message entirely if it only contained an orphaned tool-call', () => {
+		const messages = seed([
+			{ role: 'user', content: [{ type: 'text', text: 'Do it' }] },
+			{
+				role: 'assistant',
+				content: [{ type: 'tool-call', toolCallId: 'c1', toolName: 'action', input: {} }],
+			},
+		]);
+
+		const result = stripOrphanedToolMessages(messages).filter(isLlmMessage) as Message[];
+
+		expect(result).toHaveLength(1);
+		expect(result[0].role).toBe('user');
+	});
+
+	it('handles mixed scenario: one complete pair and one orphaned result', () => {
+		const messages = seed([
+			{
+				role: 'tool',
+				content: [
+					{ type: 'tool-result', toolCallId: 'orphan', toolName: 'lookup', result: 'stale' },
+				],
+			},
+			{ role: 'assistant', content: [{ type: 'text', text: 'Old result' }] },
+			{ role: 'user', content: [{ type: 'text', text: 'New question' }] },
+			{
+				role: 'assistant',
+				content: [
+					{ type: 'text', text: 'Looking up...' },
+					{ type: 'tool-call', toolCallId: 'c2', toolName: 'lookup', input: {} },
+				],
+			},
+			{
+				role: 'tool',
+				content: [{ type: 'tool-result', toolCallId: 'c2', toolName: 'lookup', result: 99 }],
+			},
+			{ role: 'assistant', content: [{ type: 'text', text: '99 items' }] },
+		]);
+
+		const result = stripOrphanedToolMessages(messages).filter(isLlmMessage) as Message[];
+
+		expect(result).toHaveLength(5);
+		expect(result[0].role).toBe('assistant');
+		expect(result[0].content[0]).toEqual(
+			expect.objectContaining({ type: 'text', text: 'Old result' }),
+		);
+
+		const toolCallMsg = result.find(
+			(m) => m.role === 'assistant' && m.content.some((c) => c.type === 'tool-call'),
+		);
+		expect(toolCallMsg).toBeDefined();
+		const toolResultMsg = result.find((m) => m.role === 'tool');
+		expect(toolResultMsg).toBeDefined();
+	});
+
+	it('preserves custom (non-LLM) messages', () => {
+		const customMsg: AgentDbMessage = {
+			id: 'custom-1',
+			type: 'custom',
+			messageType: 'notification',
+			data: { info: 'hello' },
+		} as unknown as AgentDbMessage;
+
+		const messages: AgentDbMessage[] = [
+			customMsg,
+			...seed([
+				{
+					role: 'tool',
+					content: [{ type: 'tool-result', toolCallId: 'orphan', toolName: 'x', result: null }],
+				},
+			]),
+		];
+
+		const result = stripOrphanedToolMessages(messages);
+
+		expect(result).toHaveLength(1);
+		expect(result[0]).toBe(customMsg);
+	});
+
+	it('returns same array reference when no orphans exist (no-op fast path)', () => {
+		const messages = seed([
+			{ role: 'user', content: [{ type: 'text', text: 'Hi' }] },
+			{ role: 'assistant', content: [{ type: 'text', text: 'Hello!' }] },
+		]);
+
+		const result = stripOrphanedToolMessages(messages);
+		expect(result).toBe(messages);
+	});
+});
--- a/packages/@n8n/agents/src/tests/telemetry.test.ts
+++ b/packages/@n8n/agents/src/tests/telemetry.test.ts
@ -0,0 +1,170 @@
+import type { TelemetryIntegration } from 'ai';
+
+import { Telemetry } from '../sdk/telemetry';
+
+describe('Telemetry builder', () => {
+	it('builds with defaults', async () => {
+		const built = await new Telemetry().build();
+		expect(built.enabled).toBe(true);
+		expect(built.recordInputs).toBe(true);
+		expect(built.recordOutputs).toBe(true);
+		expect(built.functionId).toBeUndefined();
+		expect(built.metadata).toBeUndefined();
+		expect(built.integrations).toEqual([]);
+		expect(built.tracer).toBeUndefined();
+		expect(built.provider).toBeUndefined();
+	});
+
+	it('sets all scalar fields', async () => {
+		const built = await new Telemetry()
+			.enabled(false)
+			.functionId('my-agent')
+			.metadata({ team: 'platform', version: 2 })
+			.recordInputs(false)
+			.recordOutputs(false)
+			.build();
+
+		expect(built.enabled).toBe(false);
+		expect(built.functionId).toBe('my-agent');
+		expect(built.metadata).toEqual({ team: 'platform', version: 2 });
+		expect(built.recordInputs).toBe(false);
+		expect(built.recordOutputs).toBe(false);
+	});
+
+	it('accepts a pre-built tracer', async () => {
+		const fakeTracer = { startSpan: jest.fn() };
+		const built = await new Telemetry().tracer(fakeTracer).build();
+		expect(built.tracer).toBe(fakeTracer);
+	});
+
+	it('throws when both .tracer() and .otlpEndpoint() are set', async () => {
+		await expect(
+			new Telemetry()
+				.tracer({ startSpan: jest.fn() })
+				.otlpEndpoint('http://localhost:4318')
+				.build(),
+		).rejects.toThrow('Cannot set both .tracer() and .otlpEndpoint()');
+	});
+
+	it('collects multiple integrations', async () => {
+		const int1: TelemetryIntegration = { onStart: jest.fn() };
+		const int2: TelemetryIntegration = { onFinish: jest.fn() };
+		const built = await new Telemetry().integration(int1).integration(int2).build();
+		expect(built.integrations).toHaveLength(2);
+	});
+});
+
+describe('Telemetry — redaction wrapping', () => {
+	it('wraps integrations with redaction when .redact() is set', async () => {
+		const receivedEvents: unknown[] = [];
+		const integration: TelemetryIntegration = {
+			onStart: (event) => {
+				receivedEvents.push(event);
+			},
+			onFinish: (event) => {
+				receivedEvents.push(event);
+			},
+		};
+
+		const built = await new Telemetry()
+			.redact((data) => {
+				const filtered = { ...data };
+				delete filtered.secret;
+				return filtered;
+			})
+			.integration(integration)
+			.build();
+
+		// Call the wrapped onStart hook
+		const startEvent = { model: { modelId: 'test' }, messages: { secret: 'hidden', safe: 'ok' } };
+		built.integrations[0].onStart!(startEvent as never);
+		// The secret should be redacted from nested objects
+		const received = receivedEvents[0] as Record<string, unknown>;
+		const messages = received.messages as Record<string, unknown>;
+		expect(messages.secret).toBeUndefined();
+		expect(messages.safe).toBe('ok');
+	});
+
+	it('does not wrap integrations when .redact() is not set', async () => {
+		const integration: TelemetryIntegration = { onStart: jest.fn() };
+		const built = await new Telemetry().integration(integration).build();
+		// The integration should be a copy (not the same reference due to spread) but functionally identical
+		expect(built.integrations[0].onStart).toBe(integration.onStart);
+	});
+
+	it('redacts top-level scalar fields via the redact callback', async () => {
+		const receivedEvents: unknown[] = [];
+		const integration: TelemetryIntegration = {
+			onStart: (event) => {
+				receivedEvents.push(event);
+			},
+		};
+
+		const built = await new Telemetry()
+			.redact((data) => {
+				const filtered = { ...data };
+				delete filtered.secret;
+				return filtered;
+			})
+			.integration(integration)
+			.build();
+
+		const startEvent = { secret: 'top-level-secret', safe: 'ok', nested: { a: 1 } };
+		built.integrations[0].onStart!(startEvent as never);
+		const received = receivedEvents[0] as Record<string, unknown>;
+		expect(received.secret).toBeUndefined();
+		expect(received.safe).toBe('ok');
+	});
+
+	it('redacts objects inside arrays', async () => {
+		const receivedEvents: unknown[] = [];
+		const integration: TelemetryIntegration = {
+			onStart: (event) => {
+				receivedEvents.push(event);
+			},
+		};
+
+		const built = await new Telemetry()
+			.redact((data) => {
+				const filtered = { ...data };
+				delete filtered.secret;
+				return filtered;
+			})
+			.integration(integration)
+			.build();
+
+		const startEvent = {
+			items: [
+				{ secret: 'hidden', safe: 'ok' },
+				{ secret: 'also-hidden', value: 42 },
+			],
+		};
+		built.integrations[0].onStart!(startEvent as never);
+		const received = receivedEvents[0] as Record<string, unknown>;
+		const items = received.items as Array<Record<string, unknown>>;
+		expect(items[0].secret).toBeUndefined();
+		expect(items[0].safe).toBe('ok');
+		expect(items[1].secret).toBeUndefined();
+		expect(items[1].value).toBe(42);
+	});
+});
+
+describe('Telemetry.shutdown()', () => {
+	it('calls provider.shutdown() when provider exists', async () => {
+		const shutdownMock = jest.fn().mockResolvedValue(undefined);
+		const built = await new Telemetry().build();
+		// Manually inject a mock provider
+		const withProvider = {
+			...built,
+			provider: { forceFlush: jest.fn(), shutdown: shutdownMock },
+		};
+		await Telemetry.shutdown(withProvider);
+		expect(shutdownMock).toHaveBeenCalled();
+	});
+
+	it('does nothing when no provider exists', async () => {
+		const built = await new Telemetry().build();
+		// Should not throw
+		await Telemetry.shutdown(built);
+	});
+});
--- a/packages/@n8n/agents/src/tests/tool-adapter.test.ts
+++ b/packages/@n8n/agents/src/tests/tool-adapter.test.ts
@ -0,0 +1,191 @@
+import type { JSONSchema7 } from 'json-schema';
+import { z } from 'zod';
+
+import { toAiSdkTools } from '../runtime/tool-adapter';
+import type { BuiltTool } from '../types';
+
+// ---------------------------------------------------------------------------
+// Module mocks
+// ---------------------------------------------------------------------------
+
+// eslint-disable-next-line @typescript-eslint/consistent-type-imports
+type AiImport = typeof import('ai');
+
+const jsonSchemaMock = jest.fn((schema: JSONSchema7) => ({ __jsonSchema: schema }));
+
+jest.mock('ai', () => {
+	const actual = jest.requireActual<AiImport>('ai');
+	return {
+		...actual,
+		tool: jest.fn((config: unknown) => config),
+		jsonSchema: (schema: JSONSchema7) => jsonSchemaMock(schema),
+	};
+});
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+function makeJsonSchemaTool(
+	inputSchema: JSONSchema7,
+	overrides: Partial<BuiltTool> = {},
+): BuiltTool {
+	return {
+		name: 'testTool',
+		description: 'A test tool',
+		inputSchema,
+		...overrides,
+	};
+}
+
+function makeZodSchemaTool(overrides: Partial<BuiltTool> = {}): BuiltTool {
+	return {
+		name: 'zodTool',
+		description: 'A zod schema tool',
+		inputSchema: z.object({ id: z.string() }),
+		...overrides,
+	};
+}
+
+// ---------------------------------------------------------------------------
+// toAiSdkTools — empty / missing input
+// ---------------------------------------------------------------------------
+
+describe('toAiSdkTools — empty / missing input', () => {
+	it('returns an empty object when tools is undefined', () => {
+		expect(toAiSdkTools(undefined)).toEqual({});
+	});
+
+	it('returns an empty object when tools is an empty array', () => {
+		expect(toAiSdkTools([])).toEqual({});
+	});
+
+	it('skips tools that have no inputSchema', () => {
+		const tool: BuiltTool = { name: 'noSchema', description: 'no schema' };
+		const result = toAiSdkTools([tool]);
+		expect(result).toEqual({});
+	});
+});
+
+// ---------------------------------------------------------------------------
+// toAiSdkTools — Zod schemas
+// ---------------------------------------------------------------------------
+
+describe('toAiSdkTools — Zod schemas', () => {
+	beforeEach(() => {
+		jsonSchemaMock.mockClear();
+	});
+
+	it('registers a tool keyed by its name', () => {
+		const result = toAiSdkTools([makeZodSchemaTool()]);
+		expect(result).toHaveProperty('zodTool');
+	});
+
+	it('does NOT call jsonSchema() for Zod schema tools', () => {
+		toAiSdkTools([makeZodSchemaTool()]);
+		expect(jsonSchemaMock).not.toHaveBeenCalled();
+	});
+
+	it('passes the Zod schema directly as inputSchema', () => {
+		const zodSchema = z.object({ query: z.string() });
+		const result = toAiSdkTools([
+			{ name: 'search', description: 'Search', inputSchema: zodSchema },
+		]);
+		expect((result['search'] as { inputSchema: unknown }).inputSchema).toBe(zodSchema);
+	});
+});
+
+// ---------------------------------------------------------------------------
+// toAiSdkTools — JSON Schema (fixSchema behaviour)
+// ---------------------------------------------------------------------------
+
+describe('toAiSdkTools — JSON Schema / fixSchema', () => {
+	beforeEach(() => {
+		jsonSchemaMock.mockClear();
+	});
+
+	it('registers a tool keyed by its name', () => {
+		const result = toAiSdkTools([makeJsonSchemaTool({ properties: { id: { type: 'string' } } })]);
+		expect(result).toHaveProperty('testTool');
+	});
+
+	it('calls jsonSchema() for JSON Schema tools', () => {
+		toAiSdkTools([makeJsonSchemaTool({ type: 'object', properties: { id: { type: 'string' } } })]);
+		expect(jsonSchemaMock).toHaveBeenCalledTimes(1);
+	});
+
+	it('fixSchema: adds type "object" when properties is present but type is absent', () => {
+		const rawSchema: JSONSchema7 = {
+			properties: { name: { type: 'string' } },
+		};
+		toAiSdkTools([makeJsonSchemaTool(rawSchema)]);
+
+		expect(jsonSchemaMock).toHaveBeenCalledWith(
+			expect.objectContaining({ type: 'object', properties: { name: { type: 'string' } } }),
+		);
+	});
+
+	it('fixSchema: preserves existing type when type is already set alongside properties', () => {
+		const rawSchema: JSONSchema7 = {
+			type: 'object',
+			properties: { count: { type: 'number' } },
+		};
+		toAiSdkTools([makeJsonSchemaTool(rawSchema)]);
+
+		expect(jsonSchemaMock).toHaveBeenCalledWith(
+			expect.objectContaining({ type: 'object', properties: { count: { type: 'number' } } }),
+		);
+		// Confirm type was not altered from original
+		const received = jsonSchemaMock.mock.calls[0][0];
+		expect(received.type).toBe('object');
+	});
+
+	it('fixSchema: does not add type when properties is absent', () => {
+		const rawSchema: JSONSchema7 = { description: 'no properties' };
+		toAiSdkTools([makeJsonSchemaTool(rawSchema)]);
+
+		const received = jsonSchemaMock.mock.calls[0][0];
+		expect(received).not.toHaveProperty('type');
+	});
+
+	it('fixSchema: does not mutate the original schema object', () => {
+		const rawSchema: JSONSchema7 = { properties: { x: { type: 'string' } } };
+		toAiSdkTools([makeJsonSchemaTool(rawSchema)]);
+
+		expect(rawSchema).not.toHaveProperty('type');
+	});
+
+	it('handles multiple JSON Schema tools independently', () => {
+		const schemaWithProps: JSONSchema7 = { properties: { a: { type: 'string' } } };
+		const schemaWithType: JSONSchema7 = { type: 'object', properties: { b: { type: 'number' } } };
+
+		const result = toAiSdkTools([
+			makeJsonSchemaTool(schemaWithProps, { name: 'toolA' }),
+			makeJsonSchemaTool(schemaWithType, { name: 'toolB' }),
+		]);
+
+		expect(result).toHaveProperty('toolA');
+		expect(result).toHaveProperty('toolB');
+		expect(jsonSchemaMock).toHaveBeenCalledTimes(2);
+
+		const firstCall = jsonSchemaMock.mock.calls[0][0];
+		const secondCall = jsonSchemaMock.mock.calls[1][0];
+		expect(firstCall.type).toBe('object');
+		expect(secondCall.type).toBe('object');
+	});
+});
+
+// ---------------------------------------------------------------------------
+// toAiSdkTools — description forwarding
+// ---------------------------------------------------------------------------
+
+describe('toAiSdkTools — description forwarding', () => {
+	it('forwards the tool description to the AI SDK tool config', () => {
+		const zodSchema = z.object({ q: z.string() });
+		const result = toAiSdkTools([
+			{ name: 'myTool', description: 'Does something useful', inputSchema: zodSchema },
+		]);
+
+		expect((result['myTool'] as { description: string }).description).toBe('Does something useful');
+	});
+});
--- a/packages/@n8n/agents/src/tests/tool.test.ts
+++ b/packages/@n8n/agents/src/tests/tool.test.ts
@ -0,0 +1,289 @@
+import { z } from 'zod';
+
+import { Tool, wrapToolForApproval } from '../sdk/tool';
+import type { BuiltTelemetry, BuiltTool, InterruptibleToolContext, ToolContext } from '../types';
+
+// ---------------------------------------------------------------------------
+// Test helpers
+// ---------------------------------------------------------------------------
+
+function makeBuiltTool(overrides: Partial<BuiltTool> = {}): BuiltTool {
+	return {
+		name: 'testTool',
+		description: 'A test tool',
+		inputSchema: z.object({ id: z.string() }),
+		handler: async (input) => {
+			return await Promise.resolve({ result: (input as { id: string }).id });
+		},
+		...overrides,
+	};
+}
+
+function makeCtx(resumeData?: unknown): { ctx: InterruptibleToolContext; suspendMock: jest.Mock } {
+	const suspendMock = jest.fn().mockImplementation(async (payload: unknown) => {
+		return await Promise.resolve({ __suspended: true, payload });
+	});
+	const ctx: InterruptibleToolContext = {
+		suspend: suspendMock as unknown as InterruptibleToolContext['suspend'],
+		resumeData,
+	};
+	return { ctx, suspendMock };
+}
+
+// ---------------------------------------------------------------------------
+// Tool builder — .requireApproval()
+// ---------------------------------------------------------------------------
+
+describe('Tool builder — .requireApproval()', () => {
+	it('build() returns a tool with suspendSchema and resumeSchema when .requireApproval() is set', () => {
+		const tool = new Tool('delete')
+			.description('Delete a record')
+			.input(z.object({ id: z.string() }))
+			.requireApproval()
+			.handler(async ({ id }) => {
+				return await Promise.resolve({ deleted: id });
+			})
+			.build();
+
+		expect(tool.suspendSchema).toBeDefined();
+		expect(tool.resumeSchema).toBeDefined();
+	});
+
+	it('build() throws when .requireApproval() is combined with .suspend()/.resume()', () => {
+		expect(() => {
+			new Tool('delete')
+				.description('Delete a record')
+				.input(z.object({ id: z.string() }))
+				.requireApproval()
+				.suspend(z.object({ msg: z.string() }))
+				.resume(z.object({ ok: z.boolean() }))
+				.handler(async (_input, _ctx) => {
+					return await Promise.resolve({});
+				})
+				.build();
+		}).toThrow('cannot use both approval');
+	});
+});
+
+// ---------------------------------------------------------------------------
+// Tool builder — .needsApprovalFn()
+// ---------------------------------------------------------------------------
+
+describe('Tool builder — .needsApprovalFn()', () => {
+	it('build() returns a tool with suspendSchema and resumeSchema when .needsApprovalFn() is set', () => {
+		const tool = new Tool('query')
+			.description('Run a query')
+			.input(z.object({ id: z.string() }))
+			.needsApprovalFn(async (args) => {
+				return await Promise.resolve((args as { id: string }).id === 'secret');
+			})
+			.handler(async ({ id }) => {
+				return await Promise.resolve({ result: id });
+			})
+			.build();
+
+		expect(tool.suspendSchema).toBeDefined();
+		expect(tool.resumeSchema).toBeDefined();
+	});
+
+	it('build() throws when .needsApprovalFn() is combined with .suspend()/.resume()', () => {
+		expect(() => {
+			new Tool('query')
+				.description('Run a query')
+				.input(z.object({ id: z.string() }))
+				.needsApprovalFn(async () => {
+					return await Promise.resolve(true);
+				})
+				.suspend(z.object({ msg: z.string() }))
+				.resume(z.object({ ok: z.boolean() }))
+				.handler(async (_input, _ctx) => {
+					return await Promise.resolve({});
+				})
+				.build();
+		}).toThrow('cannot use both approval');
+	});
+});
+
+// ---------------------------------------------------------------------------
+// Tool builder — without approval
+// ---------------------------------------------------------------------------
+
+describe('Tool builder — without approval', () => {
+	it('build() returns a normal tool (no suspendSchema) when neither .requireApproval() nor .needsApprovalFn() is set', () => {
+		const tool = new Tool('fetch')
+			.description('Fetch data')
+			.input(z.object({ id: z.string() }))
+			.handler(async ({ id }) => {
+				return await Promise.resolve({ data: id });
+			})
+			.build();
+
+		expect(tool.suspendSchema).toBeUndefined();
+		expect(tool.resumeSchema).toBeUndefined();
+	});
+});
+
+// ---------------------------------------------------------------------------
+// wrapToolForApproval — requireApproval: true
+// ---------------------------------------------------------------------------
+
+describe('wrapToolForApproval — requireApproval: true', () => {
+	it('suspends on first call when requireApproval is true', async () => {
+		const baseTool = makeBuiltTool();
+		const wrapped = wrapToolForApproval(baseTool, { requireApproval: true });
+		const { ctx, suspendMock } = makeCtx(); // resumeData = undefined → first call
+
+		await wrapped.handler!({ id: '1' }, ctx);
+
+		expect(suspendMock).toHaveBeenCalledWith({
+			type: 'approval',
+			toolName: 'testTool',
+			args: { id: '1' },
+		});
+	});
+
+	it('executes original handler when approved on resume', async () => {
+		const baseTool = makeBuiltTool();
+		const wrapped = wrapToolForApproval(baseTool, { requireApproval: true });
+		const { ctx } = makeCtx({ approved: true });
+
+		const result = await wrapped.handler!({ id: 'abc' }, ctx);
+
+		expect(result).toEqual({ result: 'abc' });
+	});
+
+	it('returns declined message when not approved on resume', async () => {
+		const baseTool = makeBuiltTool();
+		const wrapped = wrapToolForApproval(baseTool, { requireApproval: true });
+		const { ctx } = makeCtx({ approved: false });
+
+		const result = await wrapped.handler!({ id: 'abc' }, ctx);
+
+		expect(result).toEqual({ declined: true, message: 'Tool "testTool" was not approved' });
+	});
+});
+
+// ---------------------------------------------------------------------------
+// wrapToolForApproval — needsApprovalFn
+// ---------------------------------------------------------------------------
+
+describe('wrapToolForApproval — needsApprovalFn', () => {
+	it('does not suspend when needsApprovalFn returns false', async () => {
+		const baseTool = makeBuiltTool();
+		const wrapped = wrapToolForApproval(baseTool, {
+			needsApprovalFn: async () => {
+				return await Promise.resolve(false);
+			},
+		});
+		const { ctx, suspendMock } = makeCtx(); // resumeData = undefined
+
+		const result = await wrapped.handler!({ id: 'safe' }, ctx);
+
+		expect(suspendMock).not.toHaveBeenCalled();
+		expect(result).toEqual({ result: 'safe' });
+	});
+
+	it('suspends when needsApprovalFn returns true', async () => {
+		const baseTool = makeBuiltTool();
+		const wrapped = wrapToolForApproval(baseTool, {
+			needsApprovalFn: async (args) => {
+				return await Promise.resolve((args as { id: string }).id === 'secret');
+			},
+		});
+		const { ctx, suspendMock } = makeCtx(); // resumeData = undefined
+
+		await wrapped.handler!({ id: 'secret' }, ctx);
+
+		expect(suspendMock).toHaveBeenCalledWith({
+			type: 'approval',
+			toolName: 'testTool',
+			args: { id: 'secret' },
+		});
+	});
+
+	it('does not suspend when needsApprovalFn returns false for non-matching args', async () => {
+		const baseTool = makeBuiltTool();
+		const wrapped = wrapToolForApproval(baseTool, {
+			needsApprovalFn: async (args) => {
+				return await Promise.resolve((args as { id: string }).id === 'secret');
+			},
+		});
+		const { ctx, suspendMock } = makeCtx();
+
+		const result = await wrapped.handler!({ id: 'public' }, ctx);
+
+		expect(suspendMock).not.toHaveBeenCalled();
+		expect(result).toEqual({ result: 'public' });
+	});
+});
+
+// ---------------------------------------------------------------------------
+// wrapToolForApproval — config: { requireApproval: true } (agent-level wrapping)
+// ---------------------------------------------------------------------------
+
+describe('wrapToolForApproval — config: { requireApproval: true } (agent-level wrapping)', () => {
+	it('always suspends regardless of original tool settings', async () => {
+		const baseTool = makeBuiltTool();
+		const wrapped = wrapToolForApproval(baseTool, { requireApproval: true });
+		const { ctx, suspendMock } = makeCtx(); // resumeData = undefined
+
+		await wrapped.handler!({ id: 'any-id' }, ctx);
+
+		expect(suspendMock).toHaveBeenCalledWith({
+			type: 'approval',
+			toolName: 'testTool',
+			args: { id: 'any-id' },
+		});
+	});
+});
+
+// ---------------------------------------------------------------------------
+// wrapToolForApproval — telemetry propagation
+// ---------------------------------------------------------------------------
+
+describe('wrapToolForApproval — telemetry propagation', () => {
+	const fakeTelemetry: BuiltTelemetry = {
+		enabled: true,
+		functionId: 'parent-agent',
+		recordInputs: true,
+		recordOutputs: true,
+		integrations: [],
+		tracer: { startSpan: jest.fn() },
+	};
+
+	it('forwards parentTelemetry to the original handler when approval is not needed', async () => {
+		let capturedCtx: ToolContext | undefined;
+		const baseTool = makeBuiltTool({
+			handler: async (_input, ctx) => {
+				capturedCtx = ctx as ToolContext;
+				return await Promise.resolve({ result: 'ok' });
+			},
+		});
+		const wrapped = wrapToolForApproval(baseTool, { requireApproval: false });
+		const { ctx } = makeCtx(); // no resumeData
+		ctx.parentTelemetry = fakeTelemetry;
+
+		await wrapped.handler!({ id: 'test' }, ctx);
+
+		expect(capturedCtx).toBeDefined();
+		expect(capturedCtx!.parentTelemetry).toBe(fakeTelemetry);
+	});
+
+	it('forwards parentTelemetry to the original handler after approval', async () => {
+		let capturedCtx: ToolContext | undefined;
+		const baseTool = makeBuiltTool({
+			handler: async (_input, ctx) => {
+				capturedCtx = ctx as ToolContext;
+				return await Promise.resolve({ result: 'ok' });
+			},
+		});
+		const wrapped = wrapToolForApproval(baseTool, { requireApproval: true });
+		const { ctx } = makeCtx({ approved: true }); // resumeData = approved
+		ctx.parentTelemetry = fakeTelemetry;
+
+		await wrapped.handler!({ id: 'test' }, ctx);
+
+		expect(capturedCtx).toBeDefined();
+		expect(capturedCtx!.parentTelemetry).toBe(fakeTelemetry);
+	});
+});
--- a/packages/@n8n/agents/src/tests/working-memory.test.ts
+++ b/packages/@n8n/agents/src/tests/working-memory.test.ts
@ -0,0 +1,197 @@
+import { z } from 'zod';
+
+import {
+	parseWorkingMemory,
+	buildWorkingMemoryInstruction,
+	templateFromSchema,
+	WorkingMemoryStreamFilter,
+} from '../runtime/working-memory';
+import type { StreamChunk } from '../types';
+
+describe('parseWorkingMemory', () => {
+	it('extracts content between tags at end of text', () => {
+		const text = 'Hello world.\n<working_memory>\n# Name: Alice\n</working_memory>';
+		const result = parseWorkingMemory(text);
+		expect(result.cleanText).toBe('Hello world.');
+		expect(result.workingMemory).toBe('# Name: Alice');
+	});
+
+	it('extracts content between tags in middle of text', () => {
+		const text = 'Before.\n<working_memory>\ndata\n</working_memory>\nAfter.';
+		const result = parseWorkingMemory(text);
+		expect(result.cleanText).toBe('Before.\nAfter.');
+		expect(result.workingMemory).toBe('data');
+	});
+
+	it('returns null when no tags present', () => {
+		const text = 'Just a normal response.';
+		const result = parseWorkingMemory(text);
+		expect(result.cleanText).toBe('Just a normal response.');
+		expect(result.workingMemory).toBeNull();
+	});
+
+	it('handles empty working memory', () => {
+		const text = 'Response.\n<working_memory>\n</working_memory>';
+		const result = parseWorkingMemory(text);
+		expect(result.cleanText).toBe('Response.');
+		expect(result.workingMemory).toBe('');
+	});
+
+	it('handles multiline content with markdown', () => {
+		const wm = '# User Context\n- **Name**: Alice\n- **City**: Berlin';
+		const text = `Response text.\n<working_memory>\n${wm}\n</working_memory>`;
+		const result = parseWorkingMemory(text);
+		expect(result.workingMemory).toBe(wm);
+	});
+});
+
+describe('buildWorkingMemoryInstruction', () => {
+	it('generates freeform instruction', () => {
+		const result = buildWorkingMemoryInstruction('# Context\n- Name:', false);
+		expect(result).toContain('<working_memory>');
+		expect(result).toContain('</working_memory>');
+		expect(result).toContain('# Context\n- Name:');
+	});
+
+	it('generates structured instruction mentioning JSON', () => {
+		const result = buildWorkingMemoryInstruction('{"userName": ""}', true);
+		expect(result).toContain('JSON');
+		expect(result).toContain('<working_memory>');
+	});
+});
+
+describe('templateFromSchema', () => {
+	it('converts Zod schema to JSON template', () => {
+		const schema = z.object({
+			userName: z.string().optional().describe("The user's name"),
+			favoriteColor: z.string().optional().describe('Favorite color'),
+		});
+		const result = templateFromSchema(schema);
+		expect(result).toContain('userName');
+		expect(result).toContain('favoriteColor');
+		// Should be valid JSON
+		let parsed: unknown;
+		try {
+			parsed = JSON.parse(result);
+		} catch {
+			parsed = undefined;
+		}
+		expect(parsed).toHaveProperty('userName');
+	});
+});
+
+/**
+ * Helper that feeds chunks through a WorkingMemoryStreamFilter and collects
+ * the output text and any persisted working memory content.
+ */
+async function runStreamFilter(
+	chunks: string[],
+): Promise<{ outputText: string; persisted: string[] }> {
+	const persisted: string[] = [];
+	const stream = new TransformStream<StreamChunk>();
+	const writer = stream.writable.getWriter();
+	// eslint-disable-next-line @typescript-eslint/require-await
+	const filter = new WorkingMemoryStreamFilter(writer, async (content) => {
+		persisted.push(content);
+	});
+
+	// Read the readable side concurrently to avoid backpressure deadlock
+	const reader = stream.readable.getReader();
+	const readAll = (async () => {
+		let outputText = '';
+		while (true) {
+			const result = await reader.read();
+			if (result.done) break;
+			const chunk = result.value as StreamChunk;
+			if (chunk.type === 'text-delta') outputText += chunk.delta;
+		}
+		return outputText;
+	})();
+
+	for (const chunk of chunks) {
+		await filter.write({ type: 'text-delta', delta: chunk });
+	}
+	await filter.flush();
+	await writer.close();
+
+	const outputText = await readAll;
+	return { outputText, persisted };
+}
+
+describe('WorkingMemoryStreamFilter with tag split across multiple chunks', () => {
+	it('handles tag split mid-open-tag', async () => {
+		const { outputText, persisted } = await runStreamFilter([
+			'Hello <work',
+			'ing_memory>state</working_memory>',
+		]);
+		expect(outputText).toBe('Hello ');
+		expect(persisted).toEqual(['state']);
+	});
+
+	it('handles tag split mid-close-tag', async () => {
+		const { outputText, persisted } = await runStreamFilter([
+			'<working_memory>state</worki',
+			'ng_memory> after',
+		]);
+		expect(persisted).toEqual(['state']);
+		expect(outputText).toBe(' after');
+	});
+
+	it('handles tag spread across 3+ chunks', async () => {
+		const { outputText, persisted } = await runStreamFilter([
+			'<wor',
+			'king_mem',
+			'ory>data</working_memory>',
+		]);
+		expect(persisted).toEqual(['data']);
+		expect(outputText).toBe('');
+	});
+
+	it('handles partial < that is not a tag', async () => {
+		const { outputText, persisted } = await runStreamFilter(['Hello <', 'div>world']);
+		expect(outputText).toBe('Hello <div>world');
+		expect(persisted).toEqual([]);
+	});
+});
+
+describe('parseWorkingMemory with invalid structured content', () => {
+	it('strips tags and extracts content regardless of JSON validity', () => {
+		const invalidJson = '{not valid json!!!}';
+		const text = `Here is my response.\n<working_memory>\n${invalidJson}\n</working_memory>`;
+		const result = parseWorkingMemory(text);
+
+		expect(result.cleanText).toBe('Here is my response.');
+		expect(result.workingMemory).toBe(invalidJson);
+	});
+
+	it('strips tags with content that fails Zod schema validation', () => {
+		// Content is valid JSON but wrong shape for the schema
+		const wrongShape = '{"unexpected": true}';
+		const text = `Response text.\n<working_memory>\n${wrongShape}\n</working_memory>`;
+		const result = parseWorkingMemory(text);
+
+		// Tags are stripped from response regardless
+		expect(result.cleanText).toBe('Response text.');
+		// Raw content is returned — caller decides whether it passes validation
+		expect(result.workingMemory).toBe(wrongShape);
+
+		// Verify the content would indeed fail schema validation
+		expect(result.workingMemory).not.toBeNull();
+		let parsed: unknown;
+		try {
+			parsed = JSON.parse(result.workingMemory!);
+		} catch {
+			parsed = undefined;
+		}
+		expect(parsed).toBeDefined();
+	});
+
+	it('strips tags even when content is completely non-JSON', () => {
+		const text =
+			'My reply.\n<working_memory>\nthis is just plain text, not JSON at all\n</working_memory>';
+		const result = parseWorkingMemory(text);
+
+		expect(result.cleanText).toBe('My reply.');
+		expect(result.workingMemory).toBe('this is just plain text, not JSON at all');
+	});
+});
--- a/packages/@n8n/agents/src/tests/workspace/base-filesystem.test.ts
+++ b/packages/@n8n/agents/src/tests/workspace/base-filesystem.test.ts
@ -0,0 +1,271 @@
+import { BaseFilesystem } from '../../workspace/filesystem/base-filesystem';
+import type { BaseFilesystemOptions } from '../../workspace/filesystem/base-filesystem';
+import type {
+	FileContent,
+	FileStat,
+	FileEntry,
+	ReadOptions,
+	WriteOptions,
+	ListOptions,
+	RemoveOptions,
+	CopyOptions,
+	ProviderStatus,
+} from '../../workspace/types';
+
+class TestFilesystem extends BaseFilesystem {
+	readonly id: string;
+	readonly name = 'TestFS';
+	readonly provider = 'test';
+	status: ProviderStatus = 'pending';
+
+	initFn = jest.fn().mockResolvedValue(undefined);
+	destroyFn = jest.fn().mockResolvedValue(undefined);
+
+	constructor(id: string, options?: BaseFilesystemOptions) {
+		super(options);
+		this.id = id;
+	}
+
+	override async init(): Promise<void> {
+		await this.initFn();
+	}
+
+	override async destroy(): Promise<void> {
+		await this.destroyFn();
+	}
+
+	async readFile(_path: string, _options?: ReadOptions): Promise<string | Buffer> {
+		await this.ensureReady();
+		return 'test content';
+	}
+
+	async writeFile(_path: string, _content: FileContent, _options?: WriteOptions): Promise<void> {
+		await this.ensureReady();
+	}
+
+	async appendFile(_path: string, _content: FileContent): Promise<void> {
+		await this.ensureReady();
+	}
+
+	async deleteFile(_path: string, _options?: RemoveOptions): Promise<void> {
+		await this.ensureReady();
+	}
+
+	async copyFile(_src: string, _dest: string, _options?: CopyOptions): Promise<void> {
+		await this.ensureReady();
+	}
+
+	async moveFile(_src: string, _dest: string, _options?: CopyOptions): Promise<void> {
+		await this.ensureReady();
+	}
+
+	async mkdir(_path: string, _options?: { recursive?: boolean }): Promise<void> {
+		await this.ensureReady();
+	}
+
+	async rmdir(_path: string, _options?: RemoveOptions): Promise<void> {
+		await this.ensureReady();
+	}
+
+	async readdir(_path: string, _options?: ListOptions): Promise<FileEntry[]> {
+		await this.ensureReady();
+		return [];
+	}
+
+	async exists(_path: string): Promise<boolean> {
+		await this.ensureReady();
+		return false;
+	}
+
+	async stat(_path: string): Promise<FileStat> {
+		await this.ensureReady();
+		return {
+			name: 'test',
+			path: _path,
+			type: 'file',
+			size: 0,
+			createdAt: new Date(),
+			modifiedAt: new Date(),
+		};
+	}
+}
+
+describe('BaseFilesystem', () => {
+	describe('lifecycle state transitions', () => {
+		it('starts in pending status', () => {
+			const fs = new TestFilesystem('1');
+			expect(fs.status).toBe('pending');
+		});
+
+		it('transitions pending → initializing → ready on _init', async () => {
+			const statuses: string[] = [];
+			const fs = new TestFilesystem('1');
+			fs.initFn.mockImplementation(() => {
+				statuses.push(fs.status);
+			});
+
+			await fs._init();
+
+			expect(statuses).toContain('initializing');
+			expect(fs.status).toBe('ready');
+		});
+
+		it('_init is idempotent when already ready', async () => {
+			const fs = new TestFilesystem('1');
+			await fs._init();
+			fs.initFn.mockClear();
+
+			await fs._init();
+
+			expect(fs.initFn).not.toHaveBeenCalled();
+			expect(fs.status).toBe('ready');
+		});
+
+		it('transitions to error on init failure', async () => {
+			const fs = new TestFilesystem('1');
+			fs.initFn.mockRejectedValue(new Error('init boom'));
+
+			await expect(fs._init()).rejects.toThrow('init boom');
+			expect(fs.status).toBe('error');
+			expect(fs.error).toBe('init boom');
+		});
+
+		it('transitions to destroyed on _destroy after ready', async () => {
+			const fs = new TestFilesystem('1');
+			await fs._init();
+
+			const statuses: string[] = [];
+			fs.destroyFn.mockImplementation(() => {
+				statuses.push(fs.status);
+			});
+
+			await fs._destroy();
+
+			expect(statuses).toContain('destroying');
+			expect(fs.status).toBe('destroyed');
+		});
+
+		it('_destroy from pending goes directly to destroyed', async () => {
+			const fs = new TestFilesystem('1');
+			await fs._destroy();
+
+			expect(fs.status).toBe('destroyed');
+			expect(fs.destroyFn).not.toHaveBeenCalled();
+		});
+
+		it('_destroy is idempotent when already destroyed', async () => {
+			const fs = new TestFilesystem('1');
+			await fs._init();
+			await fs._destroy();
+			fs.destroyFn.mockClear();
+
+			await fs._destroy();
+
+			expect(fs.destroyFn).not.toHaveBeenCalled();
+		});
+
+		it('transitions to error on destroy failure', async () => {
+			const fs = new TestFilesystem('1');
+			await fs._init();
+			fs.destroyFn.mockRejectedValue(new Error('destroy boom'));
+
+			await expect(fs._destroy()).rejects.toThrow('destroy boom');
+			expect(fs.status).toBe('error');
+		});
+	});
+
+	describe('lifecycle hooks', () => {
+		it('calls onInit hook after successful init', async () => {
+			const onInit = jest.fn();
+			const fs = new TestFilesystem('1', { onInit });
+
+			await fs._init();
+
+			expect(onInit).toHaveBeenCalledWith({ filesystem: fs });
+		});
+
+		it('does not fail when onInit hook throws', async () => {
+			const onInit = jest.fn().mockRejectedValue(new Error('hook err'));
+			const fs = new TestFilesystem('1', { onInit });
+
+			await fs._init();
+
+			expect(fs.status).toBe('ready');
+		});
+
+		it('calls onDestroy hook during destroy', async () => {
+			const onDestroy = jest.fn();
+			const fs = new TestFilesystem('1', { onDestroy });
+			await fs._init();
+
+			await fs._destroy();
+
+			expect(onDestroy).toHaveBeenCalledWith({ filesystem: fs });
+		});
+	});
+
+	describe('ensureReady', () => {
+		it('auto-initializes when calling a fs method from pending', async () => {
+			const fs = new TestFilesystem('1');
+
+			const content = await fs.readFile('/test');
+
+			expect(content).toBe('test content');
+			expect(fs.status).toBe('ready');
+		});
+
+		it('throws if init fails when auto-initializing', async () => {
+			const fs = new TestFilesystem('1');
+			fs.initFn.mockRejectedValue(new Error('init fail'));
+
+			await expect(fs.readFile('/test')).rejects.toThrow();
+		});
+	});
+
+	describe('concurrent lifecycle calls', () => {
+		it('deduplicates concurrent _init calls', async () => {
+			const fs = new TestFilesystem('1');
+			let resolveInit: () => void;
+			fs.initFn.mockImplementation(
+				async () =>
+					await new Promise<void>((r) => {
+						resolveInit = r;
+					}),
+			);
+
+			const p1 = fs._init();
+			const p2 = fs._init();
+
+			resolveInit!();
+			await Promise.all([p1, p2]);
+
+			expect(fs.initFn).toHaveBeenCalledTimes(1);
+			expect(fs.status).toBe('ready');
+		});
+
+		it('deduplicates concurrent _destroy calls', async () => {
+			const fs = new TestFilesystem('1');
+			await fs._init();
+
+			let resolveDestroy!: () => void;
+			fs.destroyFn.mockImplementation(
+				async () =>
+					await new Promise<void>((r) => {
+						resolveDestroy = r;
+					}),
+			);
+
+			const p1 = fs._destroy();
+			// Flush microtasks so executeDestroy reaches destroyFn
+			await Promise.resolve();
+			await Promise.resolve();
+			const p2 = fs._destroy();
+
+			resolveDestroy();
+			await Promise.all([p1, p2]);
+
+			expect(fs.destroyFn).toHaveBeenCalledTimes(1);
+			expect(fs.status).toBe('destroyed');
+		});
+	});
+});
--- a/packages/@n8n/agents/src/tests/workspace/base-sandbox.test.ts
+++ b/packages/@n8n/agents/src/tests/workspace/base-sandbox.test.ts
@ -0,0 +1,369 @@
+import { BaseSandbox } from '../../workspace/sandbox/base-sandbox';
+import type {
+	CommandResult,
+	SandboxProcessManager,
+	BaseSandboxOptions,
+} from '../../workspace/types';
+import { ProcessHandle } from '../../workspace/types';
+
+class StubProcessHandle extends ProcessHandle {
+	readonly pid: number;
+	private resolvedExitCode: number | undefined;
+
+	constructor(pid: number) {
+		super();
+		this.pid = pid;
+	}
+
+	get exitCode(): number | undefined {
+		return this.resolvedExitCode;
+	}
+
+	async kill(): Promise<boolean> {
+		this.resolvedExitCode = 137;
+		return await Promise.resolve(true);
+	}
+
+	async sendStdin(_data: string): Promise<void> {}
+
+	protected async _wait(): Promise<CommandResult> {
+		this.resolvedExitCode = 0;
+		this.emitStdout('ok\n');
+		return await Promise.resolve({
+			success: true,
+			exitCode: 0,
+			stdout: this.stdout,
+			stderr: this.stderr,
+			executionTimeMs: 1,
+		});
+	}
+}
+
+function makeStubProcessManager(): SandboxProcessManager & {
+	spawnMock: jest.Mock;
+} {
+	const handle = new StubProcessHandle(1);
+	const spawnMock = jest.fn().mockResolvedValue(handle);
+	return {
+		spawn: spawnMock,
+		list: jest.fn().mockResolvedValue([]),
+		get: jest.fn().mockResolvedValue(undefined),
+		kill: jest.fn().mockResolvedValue(false),
+		spawnMock,
+	} as unknown as SandboxProcessManager & { spawnMock: jest.Mock };
+}
+
+class TestSandbox extends BaseSandbox {
+	readonly id: string;
+	readonly name: string;
+	readonly provider = 'test';
+
+	startFn = jest.fn().mockResolvedValue(undefined);
+	stopFn = jest.fn().mockResolvedValue(undefined);
+	destroyFn = jest.fn().mockResolvedValue(undefined);
+
+	constructor(id: string, options?: BaseSandboxOptions) {
+		super(options);
+		this.id = id;
+		this.name = `test-sandbox-${id}`;
+	}
+
+	async start(): Promise<void> {
+		await this.startFn();
+	}
+
+	async stop(): Promise<void> {
+		await this.stopFn();
+	}
+
+	async destroy(): Promise<void> {
+		await this.destroyFn();
+	}
+}
+
+describe('BaseSandbox', () => {
+	describe('lifecycle state transitions', () => {
+		it('starts in pending status', () => {
+			const sb = new TestSandbox('1');
+			expect(sb.status).toBe('pending');
+		});
+
+		it('transitions pending → starting → running on _start', async () => {
+			const statuses: string[] = [];
+			const sb = new TestSandbox('1');
+			sb.startFn.mockImplementation(() => {
+				statuses.push(sb.status);
+			});
+
+			await sb._start();
+
+			expect(statuses).toContain('starting');
+			expect(sb.status).toBe('running');
+		});
+
+		it('_start is idempotent when already running', async () => {
+			const sb = new TestSandbox('1');
+			await sb._start();
+			sb.startFn.mockClear();
+
+			await sb._start();
+
+			expect(sb.startFn).not.toHaveBeenCalled();
+			expect(sb.status).toBe('running');
+		});
+
+		it('transitions to error on start failure', async () => {
+			const sb = new TestSandbox('1');
+			sb.startFn.mockRejectedValue(new Error('start boom'));
+
+			await expect(sb._start()).rejects.toThrow('start boom');
+			expect(sb.status).toBe('error');
+		});
+
+		it('transitions running → stopping → stopped on _stop', async () => {
+			const sb = new TestSandbox('1');
+			await sb._start();
+
+			const statuses: string[] = [];
+			sb.stopFn.mockImplementation(() => {
+				statuses.push(sb.status);
+			});
+
+			await sb._stop();
+
+			expect(statuses).toContain('stopping');
+			expect(sb.status).toBe('stopped');
+		});
+
+		it('_stop is no-op when already stopped', async () => {
+			const sb = new TestSandbox('1');
+			await sb._start();
+			await sb._stop();
+			sb.stopFn.mockClear();
+
+			await sb._stop();
+
+			expect(sb.stopFn).not.toHaveBeenCalled();
+		});
+
+		it('_stop is no-op when pending', async () => {
+			const sb = new TestSandbox('1');
+			await sb._stop();
+			expect(sb.stopFn).not.toHaveBeenCalled();
+		});
+
+		it('transitions to error on stop failure', async () => {
+			const sb = new TestSandbox('1');
+			await sb._start();
+			sb.stopFn.mockRejectedValue(new Error('stop boom'));
+
+			await expect(sb._stop()).rejects.toThrow('stop boom');
+			expect(sb.status).toBe('error');
+		});
+
+		it('transitions running → destroying → destroyed on _destroy', async () => {
+			const sb = new TestSandbox('1');
+			await sb._start();
+
+			const statuses: string[] = [];
+			sb.destroyFn.mockImplementation(() => {
+				statuses.push(sb.status);
+			});
+
+			await sb._destroy();
+
+			expect(statuses).toContain('destroying');
+			expect(sb.status).toBe('destroyed');
+		});
+
+		it('_destroy from pending goes directly to destroyed', async () => {
+			const sb = new TestSandbox('1');
+			await sb._destroy();
+
+			expect(sb.status).toBe('destroyed');
+			expect(sb.destroyFn).not.toHaveBeenCalled();
+		});
+
+		it('_destroy is idempotent when already destroyed', async () => {
+			const sb = new TestSandbox('1');
+			await sb._start();
+			await sb._destroy();
+			sb.destroyFn.mockClear();
+
+			await sb._destroy();
+
+			expect(sb.destroyFn).not.toHaveBeenCalled();
+		});
+
+		it('throws when trying to _start a destroyed sandbox', async () => {
+			const sb = new TestSandbox('1');
+			await sb._start();
+			await sb._destroy();
+
+			await expect(sb._start()).rejects.toThrow('Cannot start a destroyed sandbox');
+		});
+
+		it('transitions to error on destroy failure', async () => {
+			const sb = new TestSandbox('1');
+			await sb._start();
+			sb.destroyFn.mockRejectedValue(new Error('destroy boom'));
+
+			await expect(sb._destroy()).rejects.toThrow('destroy boom');
+			expect(sb.status).toBe('error');
+		});
+	});
+
+	describe('lifecycle hooks', () => {
+		it('calls onStart hook after successful start', async () => {
+			const onStart = jest.fn();
+			const sb = new TestSandbox('1', { onStart });
+
+			await sb._start();
+
+			expect(onStart).toHaveBeenCalledWith({ sandbox: sb });
+		});
+
+		it('does not fail when onStart hook throws', async () => {
+			const onStart = jest.fn().mockRejectedValue(new Error('hook error'));
+			const sb = new TestSandbox('1', { onStart });
+
+			await sb._start();
+
+			expect(sb.status).toBe('running');
+		});
+
+		it('calls onStop hook before stopping', async () => {
+			const onStop = jest.fn();
+			const sb = new TestSandbox('1', { onStop });
+			await sb._start();
+
+			await sb._stop();
+
+			expect(onStop).toHaveBeenCalledWith({ sandbox: sb });
+		});
+
+		it('calls onDestroy hook before destroying', async () => {
+			const onDestroy = jest.fn();
+			const sb = new TestSandbox('1', { onDestroy });
+			await sb._start();
+
+			await sb._destroy();
+
+			expect(onDestroy).toHaveBeenCalledWith({ sandbox: sb });
+		});
+	});
+
+	describe('ensureRunning', () => {
+		it('starts the sandbox if not running', async () => {
+			const sb = new TestSandbox('1');
+			await sb.ensureRunning();
+
+			expect(sb.status).toBe('running');
+			expect(sb.startFn).toHaveBeenCalled();
+		});
+
+		it('does nothing if already running', async () => {
+			const sb = new TestSandbox('1');
+			await sb._start();
+			sb.startFn.mockClear();
+
+			await sb.ensureRunning();
+
+			expect(sb.startFn).not.toHaveBeenCalled();
+		});
+
+		it('throws if sandbox is destroyed', async () => {
+			const sb = new TestSandbox('1');
+			await sb._start();
+			await sb._destroy();
+
+			await expect(sb.ensureRunning()).rejects.toThrow('has been destroyed');
+		});
+	});
+
+	describe('executeCommand', () => {
+		it('spawns a process and returns results', async () => {
+			const pm = makeStubProcessManager();
+			const sb = new TestSandbox('1', { processes: pm });
+
+			await sb._start();
+			const result = await sb.executeCommand('echo', ['hello']);
+
+			expect(pm.spawnMock).toHaveBeenCalledTimes(1);
+			expect((pm.spawnMock.mock.calls as unknown as string[][])[0][0]).toBe('echo hello');
+			expect(result.success).toBe(true);
+			expect(result.stdout).toBe('ok\n');
+		});
+
+		it('auto-starts sandbox before executing', async () => {
+			const pm = makeStubProcessManager();
+			const sb = new TestSandbox('1', { processes: pm });
+
+			const result = await sb.executeCommand('ls');
+
+			expect(sb.status).toBe('running');
+			expect(result.success).toBe(true);
+		});
+
+		it('throws when no process manager is available', async () => {
+			const sb = new TestSandbox('1');
+			await sb._start();
+
+			await expect(sb.executeCommand('ls')).rejects.toThrow('no process manager');
+		});
+	});
+
+	describe('getInstructions', () => {
+		it('returns empty string by default', () => {
+			const sb = new TestSandbox('1');
+			expect(sb.getInstructions()).toBe('');
+		});
+	});
+
+	describe('concurrent lifecycle calls', () => {
+		it('deduplicates concurrent _start calls', async () => {
+			const sb = new TestSandbox('1');
+			let resolveStart: () => void;
+			sb.startFn.mockImplementation(
+				async () =>
+					await new Promise<void>((r) => {
+						resolveStart = r;
+					}),
+			);
+
+			const p1 = sb._start();
+			const p2 = sb._start();
+
+			resolveStart!();
+			await Promise.all([p1, p2]);
+
+			expect(sb.startFn).toHaveBeenCalledTimes(1);
+			expect(sb.status).toBe('running');
+		});
+
+		it('deduplicates concurrent _destroy calls', async () => {
+			const sb = new TestSandbox('1');
+			await sb._start();
+
+			let resolveDestroy!: () => void;
+			sb.destroyFn.mockImplementation(
+				async () =>
+					await new Promise<void>((r) => {
+						resolveDestroy = r;
+					}),
+			);
+
+			const p1 = sb._destroy();
+			// Flush microtasks so executeDestroy reaches destroyFn
+			await Promise.resolve();
+			await Promise.resolve();
+			const p2 = sb._destroy();
+
+			resolveDestroy();
+			await Promise.all([p1, p2]);
+
+			expect(sb.destroyFn).toHaveBeenCalledTimes(1);
+			expect(sb.status).toBe('destroyed');
+		});
+	});
+});
--- a/packages/@n8n/agents/src/tests/workspace/lifecycle.test.ts
+++ b/packages/@n8n/agents/src/tests/workspace/lifecycle.test.ts
@ -0,0 +1,90 @@
+import { callLifecycle } from '../../workspace/lifecycle';
+
+describe('callLifecycle', () => {
+	it('calls _init when both _init and init exist', async () => {
+		const target = {
+			_init: jest.fn().mockResolvedValue(undefined),
+			init: jest.fn().mockResolvedValue(undefined),
+		};
+
+		await callLifecycle(target, 'init');
+
+		expect(target._init).toHaveBeenCalledTimes(1);
+		expect(target.init).not.toHaveBeenCalled();
+	});
+
+	it('falls back to init when _init is undefined', async () => {
+		const target = {
+			init: jest.fn().mockResolvedValue(undefined),
+		};
+
+		await callLifecycle(target, 'init');
+
+		expect(target.init).toHaveBeenCalledTimes(1);
+	});
+
+	it('calls _start when both _start and start exist', async () => {
+		const target = {
+			_start: jest.fn().mockResolvedValue(undefined),
+			start: jest.fn().mockResolvedValue(undefined),
+		};
+
+		await callLifecycle(target, 'start');
+
+		expect(target._start).toHaveBeenCalledTimes(1);
+		expect(target.start).not.toHaveBeenCalled();
+	});
+
+	it('calls _stop over stop', async () => {
+		const target = {
+			_stop: jest.fn().mockResolvedValue(undefined),
+			stop: jest.fn().mockResolvedValue(undefined),
+		};
+
+		await callLifecycle(target, 'stop');
+
+		expect(target._stop).toHaveBeenCalledTimes(1);
+		expect(target.stop).not.toHaveBeenCalled();
+	});
+
+	it('calls _destroy over destroy', async () => {
+		const target = {
+			_destroy: jest.fn().mockResolvedValue(undefined),
+			destroy: jest.fn().mockResolvedValue(undefined),
+		};
+
+		await callLifecycle(target, 'destroy');
+
+		expect(target._destroy).toHaveBeenCalledTimes(1);
+		expect(target.destroy).not.toHaveBeenCalled();
+	});
+
+	it('does nothing if neither underscore nor plain method exists', async () => {
+		const target = {};
+
+		await expect(callLifecycle(target, 'init')).resolves.toBeUndefined();
+	});
+
+	it('propagates errors from lifecycle methods', async () => {
+		const error = new Error('lifecycle failure');
+		const target = {
+			_start: jest.fn().mockRejectedValue(error),
+		};
+
+		await expect(callLifecycle(target, 'start')).rejects.toThrow('lifecycle failure');
+	});
+
+	it('binds correctly (calls with proper this)', async () => {
+		const target = {
+			value: 42,
+			// eslint-disable-next-line @typescript-eslint/require-await
+			_init: jest.fn(async function (this: { value: number }) {
+				expect(this.value).toBe(42);
+			}),
+		};
+
+		await callLifecycle(target, 'init');
+
+		expect(target._init).toHaveBeenCalled();
+	});
+});
--- a/packages/@n8n/agents/src/tests/workspace/test-utils.ts
+++ b/packages/@n8n/agents/src/tests/workspace/test-utils.ts
@ -0,0 +1,338 @@
+import { BaseFilesystem } from '../../workspace/filesystem/base-filesystem';
+import { BaseSandbox } from '../../workspace/sandbox/base-sandbox';
+import { ProcessHandle, SandboxProcessManager } from '../../workspace/types';
+import type {
+	CommandResult,
+	FileContent,
+	FileEntry,
+	FileStat,
+	ListOptions,
+	MountConfig,
+	ProcessInfo,
+	ProviderStatus,
+	ReadOptions,
+	RemoveOptions,
+	SpawnProcessOptions,
+	WriteOptions,
+} from '../../workspace/types';
+
+// ---------------------------------------------------------------------------
+// In-memory filesystem (fake)
+// ---------------------------------------------------------------------------
+
+export class InMemoryFilesystem extends BaseFilesystem {
+	readonly id: string;
+	readonly name = 'InMemoryFilesystem';
+	readonly provider = 'memory';
+	readonly basePath = '/mem';
+	status: ProviderStatus = 'pending';
+
+	private files = new Map<string, Buffer>();
+	private dirs = new Set<string>();
+
+	constructor(id = 'mem-fs') {
+		super();
+		this.id = id;
+	}
+
+	// eslint-disable-next-line @typescript-eslint/require-await
+	override async init(): Promise<void> {
+		this.dirs.add('/');
+	}
+
+	private normalizePath(p: string): string {
+		return p.startsWith('/') ? p : `/${p}`;
+	}
+
+	private parentDir(p: string): string {
+		const parts = p.split('/');
+		parts.pop();
+		return parts.join('/') || '/';
+	}
+
+	async readFile(filePath: string, options?: ReadOptions): Promise<string | Buffer> {
+		await this.ensureReady();
+		const p = this.normalizePath(filePath);
+		const buf = this.files.get(p);
+		if (!buf) throw new Error(`ENOENT: ${p}`);
+		if (options?.encoding) return buf.toString(options.encoding);
+		return buf;
+	}
+
+	async writeFile(filePath: string, content: FileContent, options?: WriteOptions): Promise<void> {
+		await this.ensureReady();
+		const p = this.normalizePath(filePath);
+		if (options?.recursive) {
+			this.mkdirRecursive(this.parentDir(p));
+		}
+		const parent = this.parentDir(p);
+		if (!this.dirs.has(parent))
+			throw new Error(`ENOENT: parent directory ${parent} does not exist`);
+		this.files.set(p, Buffer.from(content));
+	}
+
+	async appendFile(filePath: string, content: FileContent): Promise<void> {
+		await this.ensureReady();
+		const p = this.normalizePath(filePath);
+		const existing = this.files.get(p) ?? Buffer.alloc(0);
+		const append = typeof content === 'string' ? Buffer.from(content) : Buffer.from(content);
+		this.files.set(p, Buffer.concat([existing, append]));
+	}
+
+	async deleteFile(filePath: string): Promise<void> {
+		await this.ensureReady();
+		const p = this.normalizePath(filePath);
+		if (!this.files.has(p)) throw new Error(`ENOENT: ${p}`);
+		this.files.delete(p);
+	}
+
+	async copyFile(src: string, dest: string): Promise<void> {
+		await this.ensureReady();
+		const content = await this.readFile(src);
+		await this.writeFile(dest, content);
+	}
+
+	async moveFile(src: string, dest: string): Promise<void> {
+		await this.ensureReady();
+		await this.copyFile(src, dest);
+		await this.deleteFile(src);
+	}
+
+	async mkdir(dirPath: string, options?: { recursive?: boolean }): Promise<void> {
+		await this.ensureReady();
+		const p = this.normalizePath(dirPath);
+		if (options?.recursive) {
+			this.mkdirRecursive(p);
+		} else {
+			this.dirs.add(p);
+		}
+	}
+
+	async rmdir(dirPath: string, options?: RemoveOptions): Promise<void> {
+		await this.ensureReady();
+		const p = this.normalizePath(dirPath);
+		if (options?.recursive) {
+			for (const key of [...this.files.keys()]) {
+				if (key.startsWith(p + '/') || key === p) this.files.delete(key);
+			}
+			for (const d of [...this.dirs]) {
+				if (d.startsWith(p + '/') || d === p) this.dirs.delete(d);
+			}
+		} else {
+			this.dirs.delete(p);
+		}
+	}
+
+	async readdir(dirPath: string, options?: ListOptions): Promise<FileEntry[]> {
+		await this.ensureReady();
+		const p = this.normalizePath(dirPath);
+		const entries: FileEntry[] = [];
+		const seen = new Set<string>();
+
+		for (const d of this.dirs) {
+			if (d === p) continue;
+			if (!d.startsWith(p + '/')) continue;
+			const rel = d.slice(p.length + 1);
+			if (!rel) continue;
+			const isDirectChild = !rel.includes('/');
+			if (isDirectChild || options?.recursive) {
+				const name = rel.split('/').pop()!;
+				if (!seen.has(`dir:${name}`)) {
+					seen.add(`dir:${name}`);
+					entries.push({ name, type: 'directory' });
+				}
+			}
+		}
+
+		for (const [filePath] of this.files) {
+			if (!filePath.startsWith(p + '/')) continue;
+			const rel = filePath.slice(p.length + 1);
+			if (!rel) continue;
+			const isDirectChild = !rel.includes('/');
+			if (isDirectChild || options?.recursive) {
+				const name = filePath.split('/').pop()!;
+				if (options?.extension) {
+					const ext = options.extension.startsWith('.')
+						? options.extension
+						: `.${options.extension}`;
+					if (!name.endsWith(ext)) continue;
+				}
+				if (!seen.has(`file:${name}`)) {
+					seen.add(`file:${name}`);
+					entries.push({ name, type: 'file' });
+				}
+			}
+		}
+
+		return entries;
+	}
+
+	async exists(filePath: string): Promise<boolean> {
+		await this.ensureReady();
+		const p = this.normalizePath(filePath);
+		return this.files.has(p) || this.dirs.has(p);
+	}
+
+	async stat(filePath: string): Promise<FileStat> {
+		await this.ensureReady();
+		const p = this.normalizePath(filePath);
+		const now = new Date();
+		if (this.dirs.has(p)) {
+			return {
+				name: p.split('/').pop() ?? '/',
+				path: filePath,
+				type: 'directory',
+				size: 0,
+				createdAt: now,
+				modifiedAt: now,
+			};
+		}
+		const buf = this.files.get(p);
+		if (!buf) throw new Error(`ENOENT: ${p}`);
+		return {
+			name: p.split('/').pop()!,
+			path: filePath,
+			type: 'file',
+			size: buf.length,
+			createdAt: now,
+			modifiedAt: now,
+		};
+	}
+
+	getMountConfig(): MountConfig {
+		return { type: 'local', basePath: '/mem' };
+	}
+
+	getInstructions(): string {
+		return 'In-memory filesystem. All file paths are relative to /mem.';
+	}
+
+	getFileContent(filePath: string): string | undefined {
+		const p = this.normalizePath(filePath);
+		return this.files.get(p)?.toString('utf-8');
+	}
+
+	private mkdirRecursive(p: string): void {
+		const parts = p.split('/');
+		let current = '';
+		for (const part of parts) {
+			current += current === '/' ? part : `/${part}`;
+			if (!current) current = '/';
+			this.dirs.add(current);
+		}
+	}
+}
+
+export class FakeProcessHandle extends ProcessHandle {
+	readonly pid: number;
+	private resolvedExitCode: number | undefined;
+	private readonly outputFn: (command: string) => {
+		stdout: string;
+		stderr: string;
+		exitCode: number;
+	};
+	private readonly cmdString: string;
+
+	constructor(
+		pid: number,
+		command: string,
+		outputFn: (cmd: string) => { stdout: string; stderr: string; exitCode: number },
+	) {
+		super();
+		this.pid = pid;
+		this.cmdString = command;
+		this.command = command;
+		this.outputFn = outputFn;
+	}
+
+	get exitCode(): number | undefined {
+		return this.resolvedExitCode;
+	}
+
+	async kill(): Promise<boolean> {
+		this.resolvedExitCode = 137;
+		return await Promise.resolve(true);
+	}
+
+	async sendStdin(_data: string): Promise<void> {}
+
+	protected async _wait(): Promise<CommandResult> {
+		const result = this.outputFn(this.cmdString);
+		this.emitStdout(result.stdout);
+		if (result.stderr) this.emitStderr(result.stderr);
+		this.resolvedExitCode = result.exitCode;
+
+		return await Promise.resolve({
+			success: result.exitCode === 0,
+			exitCode: result.exitCode,
+			stdout: this.stdout,
+			stderr: this.stderr,
+			executionTimeMs: 1,
+			command: this.command,
+		});
+	}
+}
+
+// ---------------------------------------------------------------------------
+// Fake process manager
+// ---------------------------------------------------------------------------
+
+export class FakeProcessManager extends SandboxProcessManager {
+	private nextPid = 1;
+	private tracked = new Map<number, FakeProcessHandle>();
+	commandHandler: (command: string) => { stdout: string; stderr: string; exitCode: number };
+
+	constructor() {
+		super();
+		this.commandHandler = (cmd) => ({ stdout: `executed: ${cmd}\n`, stderr: '', exitCode: 0 });
+	}
+
+	async spawn(command: string, _options?: SpawnProcessOptions): Promise<ProcessHandle> {
+		const pid = this.nextPid++;
+		const handle = new FakeProcessHandle(pid, command, this.commandHandler);
+		this.tracked.set(pid, handle);
+		return await Promise.resolve(handle);
+	}
+
+	async list(): Promise<ProcessInfo[]> {
+		return await Promise.resolve(
+			[...this.tracked.entries()].map(([pid, h]) => ({
+				pid,
+				command: h.command,
+				exitCode: h.exitCode,
+			})),
+		);
+	}
+
+	async get(pid: number): Promise<ProcessHandle | undefined> {
+		return await Promise.resolve(this.tracked.get(pid));
+	}
+
+	async kill(pid: number): Promise<boolean> {
+		const h = this.tracked.get(pid);
+		if (!h) return false;
+		const result = await h.kill();
+		this.tracked.delete(pid);
+		return result;
+	}
+}
+
+export class FakeSandbox extends BaseSandbox {
+	readonly id: string;
+	readonly name: string;
+	readonly provider = 'fake';
+
+	constructor(id: string, pm: FakeProcessManager) {
+		super({ processes: pm });
+		this.id = id;
+		this.name = `fake-sandbox-${id}`;
+	}
+
+	async start(): Promise<void> {}
+	async stop(): Promise<void> {}
+	async destroy(): Promise<void> {}
+
+	override getInstructions(): string {
+		return 'Fake sandbox for executing commands.';
+	}
+}
--- a/packages/@n8n/agents/src/tests/workspace/workspace-integration.test.ts
+++ b/packages/@n8n/agents/src/tests/workspace/workspace-integration.test.ts
@ -0,0 +1,325 @@
+import { InMemoryFilesystem, FakeProcessManager, FakeSandbox } from './test-utils';
+import type { FileEntry } from '../../workspace/types';
+import { Workspace } from '../../workspace/workspace';
+
+// ---------------------------------------------------------------------------
+// Integration tests
+// ---------------------------------------------------------------------------
+
+describe('Workspace integration with fakes', () => {
+	let memFs: InMemoryFilesystem;
+	let fakeProcessManager: FakeProcessManager;
+	let fakeSandbox: FakeSandbox;
+	let workspace: Workspace;
+
+	beforeEach(async () => {
+		memFs = new InMemoryFilesystem();
+		fakeProcessManager = new FakeProcessManager();
+		fakeSandbox = new FakeSandbox('test', fakeProcessManager);
+		workspace = new Workspace({
+			id: 'integration-test',
+			filesystem: memFs,
+			sandbox: fakeSandbox,
+		});
+		await workspace.init();
+	});
+
+	afterEach(async () => {
+		await workspace.destroy();
+	});
+
+	it('initializes all providers and reaches ready state', () => {
+		expect(workspace.status).toBe('ready');
+		expect(memFs.status).toBe('ready');
+		expect(fakeSandbox.status).toBe('running');
+	});
+
+	it('returns combined instructions', () => {
+		const instructions = workspace.getInstructions();
+		expect(instructions).toContain('Fake sandbox');
+		expect(instructions).toContain('In-memory filesystem');
+	});
+
+	it('exposes all expected tools', () => {
+		const tools = workspace.getTools();
+		const names = tools.map((t) => t.name);
+
+		expect(names).toContain('workspace_read_file');
+		expect(names).toContain('workspace_write_file');
+		expect(names).toContain('workspace_list_files');
+		expect(names).toContain('workspace_file_stat');
+		expect(names).toContain('workspace_mkdir');
+		expect(names).toContain('workspace_execute_command');
+	});
+
+	describe('filesystem tools end-to-end', () => {
+		it('write_file → read_file round-trip', async () => {
+			const tools = workspace.getTools();
+			const write = tools.find((t) => t.name === 'workspace_write_file')!;
+			const read = tools.find((t) => t.name === 'workspace_read_file')!;
+
+			await write.handler!(
+				{ path: '/hello.txt', content: 'Hello from integration test!' },
+				{} as never,
+			);
+			const result = await read.handler!({ path: '/hello.txt', encoding: 'utf-8' }, {} as never);
+
+			expect((result as { content: string }).content).toBe('Hello from integration test!');
+		});
+
+		it('mkdir → write → list round-trip', async () => {
+			const tools = workspace.getTools();
+			const mkdirTool = tools.find((t) => t.name === 'workspace_mkdir')!;
+			const write = tools.find((t) => t.name === 'workspace_write_file')!;
+			const list = tools.find((t) => t.name === 'workspace_list_files')!;
+
+			await mkdirTool.handler!({ path: '/project' }, {} as never);
+			await write.handler!({ path: '/project/index.ts', content: 'export {}' }, {} as never);
+			await write.handler!({ path: '/project/readme.md', content: '# Readme' }, {} as never);
+
+			const result = (await list.handler!({ path: '/project' }, {} as never)) as {
+				entries: FileEntry[];
+			};
+
+			expect(result.entries).toHaveLength(2);
+			const names = result.entries.map((e) => e.name);
+			expect(names).toContain('index.ts');
+			expect(names).toContain('readme.md');
+		});
+
+		it('write → stat returns metadata', async () => {
+			const tools = workspace.getTools();
+			const write = tools.find((t) => t.name === 'workspace_write_file')!;
+			const stat = tools.find((t) => t.name === 'workspace_file_stat')!;
+
+			await write.handler!({ path: '/data.json', content: '{"key": "value"}' }, {} as never);
+			const result = (await stat.handler!({ path: '/data.json' }, {} as never)) as {
+				name: string;
+				type: string;
+				size: number;
+			};
+
+			expect(result.name).toBe('data.json');
+			expect(result.type).toBe('file');
+			expect(result.size).toBe(16);
+		});
+	});
+
+	describe('sandbox tools end-to-end', () => {
+		it('executes a command through the tool', async () => {
+			fakeProcessManager.commandHandler = (cmd) => ({
+				stdout: `ran: ${cmd}\n`,
+				stderr: '',
+				exitCode: 0,
+			});
+
+			const tools = workspace.getTools();
+			const exec = tools.find((t) => t.name === 'workspace_execute_command')!;
+
+			const result = (await exec.handler!({ command: 'echo test' }, {} as never)) as {
+				success: boolean;
+				stdout: string;
+				exitCode: number;
+			};
+
+			expect(result.success).toBe(true);
+			expect(result.stdout).toBe('ran: echo test\n');
+			expect(result.exitCode).toBe(0);
+		});
+
+		it('reports command failure', async () => {
+			fakeProcessManager.commandHandler = () => ({
+				stdout: '',
+				stderr: 'command not found',
+				exitCode: 127,
+			});
+
+			const tools = workspace.getTools();
+			const exec = tools.find((t) => t.name === 'workspace_execute_command')!;
+
+			const result = (await exec.handler!({ command: 'invalid-cmd' }, {} as never)) as {
+				success: boolean;
+				stderr: string;
+				exitCode: number;
+			};
+
+			expect(result.success).toBe(false);
+			expect(result.exitCode).toBe(127);
+			expect(result.stderr).toBe('command not found');
+		});
+	});
+
+	describe('full lifecycle', () => {
+		it('init → use → destroy cycle', async () => {
+			const ws = new Workspace({
+				filesystem: new InMemoryFilesystem('lc-fs'),
+				sandbox: new FakeSandbox('lc-sb', new FakeProcessManager()),
+			});
+
+			expect(ws.status).toBe('pending');
+
+			await ws.init();
+			expect(ws.status).toBe('ready');
+			expect(ws.filesystem!.status).toBe('ready');
+			expect(ws.sandbox!.status).toBe('running');
+
+			const tools = ws.getTools();
+			expect(tools.length).toBeGreaterThan(0);
+
+			await ws.destroy();
+			expect(ws.status).toBe('destroyed');
+			expect(ws.sandbox!.status).toBe('destroyed');
+			expect(ws.filesystem!.status).toBe('destroyed');
+		});
+
+		it('workspace with only filesystem', async () => {
+			const ws = new Workspace({ filesystem: new InMemoryFilesystem('fs-only') });
+			await ws.init();
+
+			const tools = ws.getTools();
+			const names = tools.map((t) => t.name);
+			expect(names).not.toContain('workspace_execute_command');
+			expect(names).toContain('workspace_read_file');
+
+			await ws.destroy();
+		});
+
+		it('workspace with only sandbox', async () => {
+			const ws = new Workspace({
+				sandbox: new FakeSandbox('sb-only', new FakeProcessManager()),
+			});
+			await ws.init();
+
+			const tools = ws.getTools();
+			const names = tools.map((t) => t.name);
+			expect(names).toContain('workspace_execute_command');
+			expect(names).not.toContain('workspace_read_file');
+
+			await ws.destroy();
+		});
+
+		it('empty workspace lifecycle', async () => {
+			const ws = new Workspace({});
+			await ws.init();
+			expect(ws.status).toBe('ready');
+			expect(ws.getTools()).toEqual([]);
+			await ws.destroy();
+			expect(ws.status).toBe('destroyed');
+		});
+	});
+
+	describe('in-memory filesystem operations', () => {
+		it('supports append', async () => {
+			await memFs.writeFile('/log.txt', 'line1\n');
+			await memFs.appendFile('/log.txt', 'line2\n');
+
+			const content = await memFs.readFile('/log.txt', { encoding: 'utf-8' });
+			expect(content).toBe('line1\nline2\n');
+		});
+
+		it('supports copy and move', async () => {
+			await memFs.writeFile('/original.txt', 'original');
+			await memFs.copyFile('/original.txt', '/copy.txt');
+
+			expect(await memFs.readFile('/copy.txt', { encoding: 'utf-8' })).toBe('original');
+
+			await memFs.moveFile('/copy.txt', '/moved.txt');
+			expect(await memFs.exists('/copy.txt')).toBe(false);
+			expect(await memFs.readFile('/moved.txt', { encoding: 'utf-8' })).toBe('original');
+		});
+
+		it('supports rmdir recursive', async () => {
+			await memFs.mkdir('/deep/nested', { recursive: true });
+			await memFs.writeFile('/deep/nested/file.txt', 'data');
+
+			await memFs.rmdir('/deep', { recursive: true });
+
+			expect(await memFs.exists('/deep')).toBe(false);
+			expect(await memFs.exists('/deep/nested/file.txt')).toBe(false);
+		});
+
+		it('readFile throws on non-existent file', async () => {
+			await expect(memFs.readFile('/nonexistent')).rejects.toThrow('ENOENT');
+		});
+
+		it('deleteFile throws on non-existent file', async () => {
+			await expect(memFs.deleteFile('/nonexistent')).rejects.toThrow('ENOENT');
+		});
+	});
+
+	describe('fake process manager', () => {
+		it('tracks spawned processes', async () => {
+			const handle = await fakeProcessManager.spawn('echo hello');
+			const processes = await fakeProcessManager.list();
+
+			expect(processes).toHaveLength(1);
+			expect(processes[0].pid).toBe(handle.pid);
+		});
+
+		it('can retrieve a handle by pid', async () => {
+			const handle = await fakeProcessManager.spawn('ls');
+			const retrieved = await fakeProcessManager.get(handle.pid);
+
+			expect(retrieved).toBe(handle);
+		});
+
+		it('returns undefined for unknown pid', async () => {
+			expect(await fakeProcessManager.get(999)).toBeUndefined();
+		});
+
+		it('can kill a process', async () => {
+			const handle = await fakeProcessManager.spawn('sleep 100');
+			const killed = await fakeProcessManager.kill(handle.pid);
+
+			expect(killed).toBe(true);
+			expect(handle.exitCode).toBe(137);
+		});
+
+		it('kill returns false for unknown pid', async () => {
+			expect(await fakeProcessManager.kill(999)).toBe(false);
+		});
+	});
+
+	describe('ProcessHandle stdout/stderr buffering', () => {
+		it('buffers stdout and stderr', async () => {
+			fakeProcessManager.commandHandler = () => ({
+				stdout: 'output data',
+				stderr: 'error data',
+				exitCode: 0,
+			});
+
+			const handle = await fakeProcessManager.spawn('test');
+			const collected: string[] = [];
+
+			await handle.wait({
+				onStdout: (data) => collected.push(`out:${data}`),
+				onStderr: (data) => collected.push(`err:${data}`),
+			});
+
+			expect(handle.stdout).toBe('output data');
+			expect(handle.stderr).toBe('error data');
+			expect(collected).toContain('out:output data');
+			expect(collected).toContain('err:error data');
+		});
+
+		it('supports multiple stdout/stderr listeners', async () => {
+			fakeProcessManager.commandHandler = () => ({
+				stdout: 'hello',
+				stderr: '',
+				exitCode: 0,
+			});
+
+			const handle = await fakeProcessManager.spawn('test');
+			const listener1: string[] = [];
+			const listener2: string[] = [];
+
+			handle.addStdoutListener((d) => listener1.push(d));
+			handle.addStdoutListener((d) => listener2.push(d));
+
+			await handle.wait();
+
+			expect(listener1).toEqual(['hello']);
+			expect(listener2).toEqual(['hello']);
+		});
+	});
+});
--- a/packages/@n8n/agents/src/tests/workspace/workspace-tools.test.ts
+++ b/packages/@n8n/agents/src/tests/workspace/workspace-tools.test.ts
@ -0,0 +1,268 @@
+import { createWorkspaceTools } from '../../workspace/tools/workspace-tools';
+import type { WorkspaceFilesystem, WorkspaceSandbox, CommandResult } from '../../workspace/types';
+
+function makeFakeFilesystem(overrides: Partial<WorkspaceFilesystem> = {}): WorkspaceFilesystem {
+	return {
+		id: 'test-fs',
+		name: 'TestFS',
+		provider: 'test',
+		status: 'ready',
+		readFile: jest.fn().mockResolvedValue('file content'),
+		writeFile: jest.fn().mockResolvedValue(undefined),
+		appendFile: jest.fn().mockResolvedValue(undefined),
+		deleteFile: jest.fn().mockResolvedValue(undefined),
+		copyFile: jest.fn().mockResolvedValue(undefined),
+		moveFile: jest.fn().mockResolvedValue(undefined),
+		mkdir: jest.fn().mockResolvedValue(undefined),
+		rmdir: jest.fn().mockResolvedValue(undefined),
+		readdir: jest.fn().mockResolvedValue([
+			{ name: 'file1.txt', type: 'file' as const },
+			{ name: 'subdir', type: 'directory' as const },
+		]),
+		exists: jest.fn().mockResolvedValue(true),
+		stat: jest.fn().mockResolvedValue({
+			name: 'test.txt',
+			path: '/test.txt',
+			type: 'file' as const,
+			size: 100,
+			createdAt: new Date('2024-01-01'),
+			modifiedAt: new Date('2024-06-01'),
+		}),
+		...overrides,
+	};
+}
+
+function makeFakeSandbox(overrides: Partial<WorkspaceSandbox> = {}): WorkspaceSandbox {
+	const mockResult: CommandResult = {
+		success: true,
+		exitCode: 0,
+		stdout: 'hello world',
+		stderr: '',
+		executionTimeMs: 42,
+	};
+	return {
+		id: 'test-sandbox',
+		name: 'TestSandbox',
+		provider: 'test',
+		status: 'running',
+		executeCommand: jest.fn().mockResolvedValue(mockResult),
+		...overrides,
+	};
+}
+
+describe('createWorkspaceTools', () => {
+	it('returns no tools when workspace has no providers', () => {
+		const tools = createWorkspaceTools({});
+		expect(tools).toEqual([]);
+	});
+
+	it('returns filesystem tools when filesystem is set', () => {
+		const tools = createWorkspaceTools({ filesystem: makeFakeFilesystem() });
+		const names = tools.map((t) => t.name);
+
+		expect(names).toEqual([
+			'workspace_read_file',
+			'workspace_write_file',
+			'workspace_list_files',
+			'workspace_file_stat',
+			'workspace_mkdir',
+			'workspace_delete_file',
+			'workspace_append_file',
+			'workspace_copy_file',
+			'workspace_move_file',
+			'workspace_rmdir',
+		]);
+	});
+
+	it('returns execute_command when sandbox has executeCommand', () => {
+		const tools = createWorkspaceTools({ sandbox: makeFakeSandbox() });
+		const names = tools.map((t) => t.name);
+
+		expect(names).toEqual(['workspace_execute_command']);
+	});
+
+	it('does not return execute_command when sandbox lacks executeCommand', () => {
+		const tools = createWorkspaceTools({
+			sandbox: makeFakeSandbox({ executeCommand: undefined }),
+		});
+
+		expect(tools).toEqual([]);
+	});
+
+	it('returns all tools when both filesystem and sandbox are set', () => {
+		const tools = createWorkspaceTools({
+			filesystem: makeFakeFilesystem(),
+			sandbox: makeFakeSandbox(),
+		});
+		const names = tools.map((t) => t.name);
+
+		expect(names).toContain('workspace_read_file');
+		expect(names).toContain('workspace_execute_command');
+		expect(names).toHaveLength(11);
+	});
+
+	describe('tool handlers', () => {
+		it('read_file handler calls filesystem.readFile', async () => {
+			const fs = makeFakeFilesystem();
+			const tools = createWorkspaceTools({ filesystem: fs });
+			const readTool = tools.find((t) => t.name === 'workspace_read_file')!;
+
+			const result = await readTool.handler!({ path: '/test.txt', encoding: 'utf-8' }, {} as never);
+
+			expect(fs.readFile).toHaveBeenCalledWith('/test.txt', { encoding: 'utf-8' });
+			expect(result).toEqual({ content: 'file content' });
+		});
+
+		it('write_file handler calls filesystem.writeFile', async () => {
+			const fs = makeFakeFilesystem();
+			const tools = createWorkspaceTools({ filesystem: fs });
+			const writeTool = tools.find((t) => t.name === 'workspace_write_file')!;
+
+			const result = await writeTool.handler!(
+				{ path: '/out.txt', content: 'hello', recursive: true },
+				{} as never,
+			);
+
+			expect(fs.writeFile).toHaveBeenCalledWith('/out.txt', 'hello', { recursive: true });
+			expect(result).toEqual({ success: true });
+		});
+
+		it('list_files handler calls filesystem.readdir', async () => {
+			const fs = makeFakeFilesystem();
+			const tools = createWorkspaceTools({ filesystem: fs });
+			const listTool = tools.find((t) => t.name === 'workspace_list_files')!;
+
+			const result = await listTool.handler!({ path: '/', recursive: false }, {} as never);
+
+			expect(fs.readdir).toHaveBeenCalledWith('/', { recursive: false });
+			expect(result).toEqual({
+				entries: [
+					{ name: 'file1.txt', type: 'file' },
+					{ name: 'subdir', type: 'directory' },
+				],
+			});
+		});
+
+		it('file_stat handler calls filesystem.stat', async () => {
+			const fs = makeFakeFilesystem();
+			const tools = createWorkspaceTools({ filesystem: fs });
+			const statTool = tools.find((t) => t.name === 'workspace_file_stat')!;
+
+			const result = await statTool.handler!({ path: '/test.txt' }, {} as never);
+
+			expect(fs.stat).toHaveBeenCalledWith('/test.txt');
+			expect(result).toEqual({
+				name: 'test.txt',
+				path: '/test.txt',
+				type: 'file',
+				size: 100,
+				createdAt: '2024-01-01T00:00:00.000Z',
+				modifiedAt: '2024-06-01T00:00:00.000Z',
+			});
+		});
+
+		it('mkdir handler calls filesystem.mkdir', async () => {
+			const fs = makeFakeFilesystem();
+			const tools = createWorkspaceTools({ filesystem: fs });
+			const mkdirTool = tools.find((t) => t.name === 'workspace_mkdir')!;
+
+			const result = await mkdirTool.handler!({ path: '/new-dir', recursive: true }, {} as never);
+
+			expect(fs.mkdir).toHaveBeenCalledWith('/new-dir', { recursive: true });
+			expect(result).toEqual({ success: true });
+		});
+
+		it('delete_file handler calls filesystem.deleteFile', async () => {
+			const fs = makeFakeFilesystem();
+			const tools = createWorkspaceTools({ filesystem: fs });
+			const deleteTool = tools.find((t) => t.name === 'workspace_delete_file')!;
+
+			const result = await deleteTool.handler!(
+				{ path: '/old.txt', recursive: false, force: true },
+				{} as never,
+			);
+
+			expect(fs.deleteFile).toHaveBeenCalledWith('/old.txt', { recursive: false, force: true });
+			expect(result).toEqual({ success: true });
+		});
+
+		it('append_file handler calls filesystem.appendFile', async () => {
+			const fs = makeFakeFilesystem();
+			const tools = createWorkspaceTools({ filesystem: fs });
+			const appendTool = tools.find((t) => t.name === 'workspace_append_file')!;
+
+			const result = await appendTool.handler!(
+				{ path: '/log.txt', content: 'new line' },
+				{} as never,
+			);
+
+			expect(fs.appendFile).toHaveBeenCalledWith('/log.txt', 'new line');
+			expect(result).toEqual({ success: true });
+		});
+
+		it('copy_file handler calls filesystem.copyFile', async () => {
+			const fs = makeFakeFilesystem();
+			const tools = createWorkspaceTools({ filesystem: fs });
+			const copyTool = tools.find((t) => t.name === 'workspace_copy_file')!;
+
+			const result = await copyTool.handler!(
+				{ src: '/a.txt', dest: '/b.txt', overwrite: true },
+				{} as never,
+			);
+
+			expect(fs.copyFile).toHaveBeenCalledWith('/a.txt', '/b.txt', { overwrite: true });
+			expect(result).toEqual({ success: true });
+		});
+
+		it('move_file handler calls filesystem.moveFile', async () => {
+			const fs = makeFakeFilesystem();
+			const tools = createWorkspaceTools({ filesystem: fs });
+			const moveTool = tools.find((t) => t.name === 'workspace_move_file')!;
+
+			const result = await moveTool.handler!(
+				{ src: '/old.txt', dest: '/new.txt', overwrite: false },
+				{} as never,
+			);
+
+			expect(fs.moveFile).toHaveBeenCalledWith('/old.txt', '/new.txt', { overwrite: false });
+			expect(result).toEqual({ success: true });
+		});
+
+		it('rmdir handler calls filesystem.rmdir', async () => {
+			const fs = makeFakeFilesystem();
+			const tools = createWorkspaceTools({ filesystem: fs });
+			const rmdirTool = tools.find((t) => t.name === 'workspace_rmdir')!;
+
+			const result = await rmdirTool.handler!(
+				{ path: '/old-dir', recursive: true, force: false },
+				{} as never,
+			);
+
+			expect(fs.rmdir).toHaveBeenCalledWith('/old-dir', { recursive: true, force: false });
+			expect(result).toEqual({ success: true });
+		});
+
+		it('execute_command handler calls sandbox.executeCommand', async () => {
+			const sb = makeFakeSandbox();
+			const tools = createWorkspaceTools({ sandbox: sb });
+			const execTool = tools.find((t) => t.name === 'workspace_execute_command')!;
+
+			const result = await execTool.handler!(
+				{ command: 'echo hello', cwd: '/tmp', timeout: 5000 },
+				{} as never,
+			);
+
+			expect(sb.executeCommand).toHaveBeenCalledWith('echo hello', undefined, {
+				cwd: '/tmp',
+				timeout: 5000,
+			});
+			expect(result).toEqual({
+				success: true,
+				exitCode: 0,
+				stdout: 'hello world',
+				stderr: '',
+				executionTimeMs: 42,
+			});
+		});
+	});
+});
--- a/packages/@n8n/agents/src/tests/workspace/workspace.test.ts
+++ b/packages/@n8n/agents/src/tests/workspace/workspace.test.ts
@ -0,0 +1,309 @@
+import type { WorkspaceFilesystem, WorkspaceSandbox } from '../../workspace/types';
+import { Workspace } from '../../workspace/workspace';
+
+function makeFakeFilesystem(overrides: Partial<WorkspaceFilesystem> = {}): WorkspaceFilesystem {
+	return {
+		id: 'test-fs',
+		name: 'TestFS',
+		provider: 'test',
+		status: 'pending',
+		readFile: jest.fn(),
+		writeFile: jest.fn(),
+		appendFile: jest.fn(),
+		deleteFile: jest.fn(),
+		copyFile: jest.fn(),
+		moveFile: jest.fn(),
+		mkdir: jest.fn(),
+		rmdir: jest.fn(),
+		readdir: jest.fn(),
+		exists: jest.fn(),
+		stat: jest.fn(),
+		...overrides,
+	};
+}
+
+function makeFakeSandbox(overrides: Partial<WorkspaceSandbox> = {}): WorkspaceSandbox {
+	return {
+		id: 'test-sandbox',
+		name: 'TestSandbox',
+		provider: 'test',
+		status: 'pending',
+		...overrides,
+	};
+}
+
+describe('Workspace', () => {
+	describe('constructor', () => {
+		it('generates an id when none is provided', () => {
+			const ws = new Workspace({});
+			expect(ws.id).toMatch(/^workspace-[0-9a-f-]+$/);
+		});
+
+		it('uses a custom id when provided', () => {
+			const ws = new Workspace({ id: 'my-ws' });
+			expect(ws.id).toBe('my-ws');
+		});
+
+		it('generates a name from the id when none is provided', () => {
+			const ws = new Workspace({ id: 'abc' });
+			expect(ws.name).toBe('workspace-abc');
+		});
+
+		it('uses a custom name when provided', () => {
+			const ws = new Workspace({ id: 'abc', name: 'My Workspace' });
+			expect(ws.name).toBe('My Workspace');
+		});
+
+		it('starts with pending status', () => {
+			const ws = new Workspace({});
+			expect(ws.status).toBe('pending');
+		});
+
+		it('exposes filesystem and sandbox', () => {
+			const fs = makeFakeFilesystem();
+			const sb = makeFakeSandbox();
+			const ws = new Workspace({ filesystem: fs, sandbox: sb });
+
+			expect(ws.filesystem).toBe(fs);
+			expect(ws.sandbox).toBe(sb);
+		});
+
+		it('returns undefined for absent filesystem and sandbox', () => {
+			const ws = new Workspace({});
+
+			expect(ws.filesystem).toBeUndefined();
+			expect(ws.sandbox).toBeUndefined();
+		});
+
+		it('generates unique IDs using randomUUID', () => {
+			const ws1 = new Workspace({});
+			const ws2 = new Workspace({});
+			expect(ws1.id).not.toBe(ws2.id);
+			expect(ws1.id).toMatch(/^workspace-/);
+		});
+	});
+
+	describe('init', () => {
+		it('calls filesystem._init then sandbox._start', async () => {
+			const order: string[] = [];
+			const fs = makeFakeFilesystem({
+				_init: jest.fn(async () => {
+					await Promise.resolve();
+					order.push('fs-init');
+				}),
+			});
+			const sb = makeFakeSandbox({
+				_start: jest.fn(async () => {
+					await Promise.resolve();
+					order.push('sb-start');
+				}),
+			});
+			const ws = new Workspace({ filesystem: fs, sandbox: sb });
+
+			await ws.init();
+
+			expect(order).toEqual(['fs-init', 'sb-start']);
+			expect(ws.status).toBe('ready');
+		});
+
+		it('sets status to ready when no providers', async () => {
+			const ws = new Workspace({});
+			await ws.init();
+			expect(ws.status).toBe('ready');
+		});
+
+		it('initializes only filesystem when no sandbox', async () => {
+			const fs = makeFakeFilesystem({
+				_init: jest.fn().mockResolvedValue(undefined),
+			});
+			const ws = new Workspace({ filesystem: fs });
+
+			await ws.init();
+
+			expect(fs._init).toHaveBeenCalled();
+			expect(ws.status).toBe('ready');
+		});
+
+		it('starts only sandbox when no filesystem', async () => {
+			const sb = makeFakeSandbox({
+				_start: jest.fn().mockResolvedValue(undefined),
+			});
+			const ws = new Workspace({ sandbox: sb });
+
+			await ws.init();
+
+			expect(sb._start).toHaveBeenCalled();
+			expect(ws.status).toBe('ready');
+		});
+
+		it('destroys filesystem and sets error status when sandbox start fails', async () => {
+			const fs = makeFakeFilesystem({
+				_init: jest.fn().mockResolvedValue(undefined),
+				_destroy: jest.fn().mockResolvedValue(undefined),
+			});
+			const sb = makeFakeSandbox({
+				_start: jest.fn().mockRejectedValue(new Error('sandbox start failed')),
+			});
+			const ws = new Workspace({ filesystem: fs, sandbox: sb });
+
+			await expect(ws.init()).rejects.toThrow('sandbox start failed');
+
+			expect(fs._init).toHaveBeenCalled();
+			expect(fs._destroy).toHaveBeenCalled();
+			expect(ws.status).toBe('error');
+		});
+
+		it('is idempotent when already ready', async () => {
+			const fs = makeFakeFilesystem({
+				_init: jest.fn().mockResolvedValue(undefined),
+			});
+			const ws = new Workspace({ filesystem: fs });
+
+			await ws.init();
+			(fs._init as jest.Mock).mockClear();
+
+			await ws.init();
+
+			expect(fs._init).not.toHaveBeenCalled();
+		});
+
+		it('deduplicates concurrent init calls', async () => {
+			let resolveInit: () => void;
+			const fs = makeFakeFilesystem({
+				_init: jest.fn(
+					async () =>
+						await new Promise<void>((r) => {
+							resolveInit = r;
+						}),
+				),
+			});
+			const ws = new Workspace({ filesystem: fs });
+
+			const p1 = ws.init();
+			const p2 = ws.init();
+
+			resolveInit!();
+			await Promise.all([p1, p2]);
+
+			expect(fs._init).toHaveBeenCalledTimes(1);
+			expect(ws.status).toBe('ready');
+		});
+	});
+
+	describe('destroy', () => {
+		it('calls sandbox._destroy then filesystem._destroy', async () => {
+			const order: string[] = [];
+			const fs = makeFakeFilesystem({
+				_destroy: jest.fn(async () => {
+					await Promise.resolve();
+					order.push('fs-destroy');
+				}),
+			});
+			const sb = makeFakeSandbox({
+				_destroy: jest.fn(async () => {
+					await Promise.resolve();
+					order.push('sb-destroy');
+				}),
+			});
+			const ws = new Workspace({ filesystem: fs, sandbox: sb });
+
+			await ws.destroy();
+
+			expect(order).toEqual(['sb-destroy', 'fs-destroy']);
+			expect(ws.status).toBe('destroyed');
+		});
+
+		it('sets status to destroyed when no providers', async () => {
+			const ws = new Workspace({});
+			await ws.destroy();
+			expect(ws.status).toBe('destroyed');
+		});
+
+		it('transitions to error when sandbox destroy throws', async () => {
+			const fs = makeFakeFilesystem({
+				_destroy: jest.fn().mockResolvedValue(undefined),
+			});
+			const sb = makeFakeSandbox({
+				_destroy: jest.fn().mockRejectedValue(new Error('sandbox boom')),
+			});
+			const ws = new Workspace({ filesystem: fs, sandbox: sb });
+
+			await expect(ws.destroy()).rejects.toThrow('sandbox boom');
+
+			expect(fs._destroy).toHaveBeenCalled();
+			expect(ws.status).toBe('error');
+		});
+	});
+
+	describe('getInstructions', () => {
+		it('combines sandbox and filesystem instructions', () => {
+			const fs = makeFakeFilesystem({
+				getInstructions: () => 'FS instructions',
+			});
+			const sb = makeFakeSandbox({
+				getInstructions: () => 'SB instructions',
+			});
+			const ws = new Workspace({ filesystem: fs, sandbox: sb });
+
+			expect(ws.getInstructions()).toBe('SB instructions\n\nFS instructions');
+		});
+
+		it('returns empty string when no providers', () => {
+			const ws = new Workspace({});
+			expect(ws.getInstructions()).toBe('');
+		});
+
+		it('omits empty instruction strings', () => {
+			const fs = makeFakeFilesystem({
+				getInstructions: () => '',
+			});
+			const sb = makeFakeSandbox({
+				getInstructions: () => 'SB only',
+			});
+			const ws = new Workspace({ filesystem: fs, sandbox: sb });
+
+			expect(ws.getInstructions()).toBe('SB only');
+		});
+	});
+
+	describe('getTools', () => {
+		it('returns filesystem tools when filesystem is set', () => {
+			const fs = makeFakeFilesystem();
+			const ws = new Workspace({ filesystem: fs });
+
+			const tools = ws.getTools();
+
+			const names = tools.map((t) => t.name);
+			expect(names).toContain('workspace_read_file');
+			expect(names).toContain('workspace_write_file');
+			expect(names).toContain('workspace_list_files');
+			expect(names).toContain('workspace_file_stat');
+			expect(names).toContain('workspace_mkdir');
+		});
+
+		it('returns execute_command tool when sandbox has executeCommand', () => {
+			const sb = makeFakeSandbox({
+				executeCommand: jest.fn(),
+			});
+			const ws = new Workspace({ sandbox: sb });
+
+			const tools = ws.getTools();
+			const names = tools.map((t) => t.name);
+			expect(names).toContain('workspace_execute_command');
+		});
+
+		it('returns empty array when no providers', () => {
+			const ws = new Workspace({});
+			expect(ws.getTools()).toEqual([]);
+		});
+
+		it('does not include execute_command if sandbox has no executeCommand', () => {
+			const sb = makeFakeSandbox();
+			const ws = new Workspace({ sandbox: sb });
+
+			const tools = ws.getTools();
+			const names = tools.map((t) => t.name);
+			expect(names).not.toContain('workspace_execute_command');
+		});
+	});
+});
--- a/packages/@n8n/agents/src/evals/categorization.ts
+++ b/packages/@n8n/agents/src/evals/categorization.ts
@ -0,0 +1,25 @@
+import { Eval } from '../sdk/eval';
+
+/** Deterministic categorization eval — checks if output matches the expected label. */
+export function categorization(): Eval {
+	return new Eval('categorization')
+		.description('Checks if output matches the expected category label')
+		.check(({ output, expected }) => {
+			if (!expected) {
+				return { pass: false, reasoning: 'No expected category provided' };
+			}
+
+			const normalOutput = output.toLowerCase().trim();
+			const normalExpected = expected.toLowerCase().trim();
+
+			if (normalOutput === normalExpected) {
+				return { pass: true, reasoning: 'Exact match' };
+			}
+
+			if (normalOutput.includes(normalExpected)) {
+				return { pass: true, reasoning: `Output contains expected label "${expected}"` };
+			}
+
+			return { pass: false, reasoning: `Expected "${expected}", got "${output}"` };
+		});
+}
--- a/packages/@n8n/agents/src/evals/contains-keywords.ts
+++ b/packages/@n8n/agents/src/evals/contains-keywords.ts
@ -0,0 +1,35 @@
+import { Eval } from '../sdk/eval';
+
+/**
+ * Deterministic keyword presence eval.
+ * Expects `expected` to be a comma-separated list of keywords.
+ * Passes only if ALL keywords are found in the output.
+ */
+export function containsKeywords(): Eval {
+	return new Eval('contains-keywords')
+		.description('Checks if output contains all expected keywords')
+		.check(({ output, expected }) => {
+			if (!expected) {
+				return { pass: false, reasoning: 'No expected keywords provided' };
+			}
+
+			const keywords = expected
+				.split(',')
+				.map((k) => k.trim().toLowerCase())
+				.filter(Boolean);
+			if (keywords.length === 0) {
+				return { pass: false, reasoning: 'No keywords to check' };
+			}
+
+			const normalOutput = output.toLowerCase();
+			const missing = keywords.filter((k) => !normalOutput.includes(k));
+
+			return {
+				pass: missing.length === 0,
+				reasoning:
+					missing.length === 0
+						? `All ${keywords.length} keywords found`
+						: `Missing ${missing.length}/${keywords.length} keywords: ${missing.join(', ')}`,
+			};
+		});
+}
--- a/packages/@n8n/agents/src/evals/correctness.ts
+++ b/packages/@n8n/agents/src/evals/correctness.ts
@ -0,0 +1,30 @@
+import { parseJudgeResponse } from './parse-judge-response';
+import { Eval } from '../sdk/eval';
+
+/**
+ * LLM-as-judge correctness eval. Returns an Eval pre-configured with a
+ * judge handler — caller must still set `.model()` and `.credential()`.
+ */
+export function correctness(): Eval {
+	return new Eval('correctness')
+		.description('Judges if the output is factually correct compared to the expected answer')
+		.judge(async ({ input, output, expected, llm }) => {
+			const prompt = [
+				'You are evaluating an AI assistant response for factual correctness.',
+				'',
+				`User question: ${input}`,
+				`Expected answer: ${expected ?? '(none provided)'}`,
+				`Actual answer: ${output}`,
+				'',
+				'Does the actual answer correctly address the question and match the expected answer?',
+				'Answer with pass or fail:',
+				'- pass = the answer is correct and addresses the question',
+				'- fail = the answer is incorrect, incomplete, or irrelevant',
+				'',
+				'Respond with ONLY a JSON object (no markdown fences): {"pass": true/false, "reasoning": "<explanation>"}',
+			].join('\n');
+
+			const result = await llm(prompt);
+			return parseJudgeResponse(result.text);
+		});
+}
--- a/packages/@n8n/agents/src/evals/helpfulness.ts
+++ b/packages/@n8n/agents/src/evals/helpfulness.ts
@ -0,0 +1,28 @@
+import { parseJudgeResponse } from './parse-judge-response';
+import { Eval } from '../sdk/eval';
+
+/**
+ * LLM-as-judge helpfulness eval. Returns an Eval pre-configured with a
+ * judge handler — caller must still set `.model()` and `.credential()`.
+ */
+export function helpfulness(): Eval {
+	return new Eval('helpfulness')
+		.description('Judges whether the response is helpful for the user query')
+		.judge(async ({ input, output, llm }) => {
+			const prompt = [
+				'You are evaluating an AI assistant response for helpfulness.',
+				'',
+				`User question: ${input}`,
+				`Assistant response: ${output}`,
+				'',
+				'Is this response helpful to the user?',
+				'- pass = the response is helpful, addresses the question, and provides useful information',
+				'- fail = the response is unhelpful, off-topic, or lacks useful information',
+				'',
+				'Respond with ONLY a JSON object (no markdown fences): {"pass": true/false, "reasoning": "<explanation>"}',
+			].join('\n');
+
+			const result = await llm(prompt);
+			return parseJudgeResponse(result.text);
+		});
+}
--- a/packages/@n8n/agents/src/evals/index.ts
+++ b/packages/@n8n/agents/src/evals/index.ts
@ -0,0 +1,7 @@
+export { correctness } from './correctness';
+export { helpfulness } from './helpfulness';
+export { stringSimilarity } from './string-similarity';
+export { categorization } from './categorization';
+export { containsKeywords } from './contains-keywords';
+export { jsonValidity } from './json-validity';
+export { toolCallAccuracy } from './tool-call-accuracy';
--- a/packages/@n8n/agents/src/evals/json-validity.ts
+++ b/packages/@n8n/agents/src/evals/json-validity.ts
@ -0,0 +1,18 @@
+import { Eval } from '../sdk/eval';
+
+/** Deterministic JSON validity eval — checks if the output is parseable JSON. */
+export function jsonValidity(): Eval {
+	return new Eval('json-validity')
+		.description('Checks if output is valid JSON')
+		.check(({ output }) => {
+			try {
+				JSON.parse(output);
+				return { pass: true, reasoning: 'Valid JSON' };
+			} catch (e) {
+				return {
+					pass: false,
+					reasoning: `Invalid JSON: ${e instanceof Error ? e.message : 'parse error'}`,
+				};
+			}
+		});
+}
--- a/packages/@n8n/agents/src/evals/parse-judge-response.ts
+++ b/packages/@n8n/agents/src/evals/parse-judge-response.ts
@ -0,0 +1,32 @@
+import type { EvalScore } from '../types';
+
+/**
+ * Parse an LLM judge response into an EvalScore (pass/fail).
+ * Handles JSON wrapped in markdown fences, plain JSON, or raw text.
+ */
+export function parseJudgeResponse(text: string): EvalScore {
+	// Strip markdown code fences if present: ```json ... ``` or ``` ... ```
+	const stripped = text
+		.replace(/^```(?:json)?\s*\n?/i, '')
+		.replace(/\n?```\s*$/i, '')
+		.trim();
+
+	try {
+		const parsed = JSON.parse(stripped) as { pass?: boolean; score?: number; reasoning?: string };
+		// Support both { pass: true } and legacy { score: 0.8 } formats
+		const pass = parsed.pass ?? (parsed.score !== undefined ? parsed.score >= 0.7 : false);
+		return {
+			pass,
+			reasoning: parsed.reasoning ?? stripped,
+		};
+	} catch {
+		// Fallback: detect pass/fail from plain text or malformed JSON
+		const lowerText = stripped.toLowerCase();
+		const hasPassTrue = lowerText.includes('"pass": true') || lowerText.includes('"pass":true');
+		const hasFailFalse = lowerText.includes('"pass": false') || lowerText.includes('"pass":false');
+		// If no JSON-like pattern, check for plain-text "pass" or "fail" keywords
+		const pass =
+			hasPassTrue || (!hasFailFalse && /\bpass\b/i.test(stripped) && !/\bfail\b/i.test(stripped));
+		return { pass, reasoning: stripped };
+	}
+}
--- a/packages/@n8n/agents/src/evals/string-similarity.ts
+++ b/packages/@n8n/agents/src/evals/string-similarity.ts
@ -0,0 +1,46 @@
+import { Eval } from '../sdk/eval';
+
+/**
+ * Dice coefficient string similarity — measures overlap of bigrams between
+ * two strings. Returns 0-1 where 1 is identical.
+ */
+function diceSimilarity(a: string, b: string): number {
+	const normalA = a.toLowerCase().trim();
+	const normalB = b.toLowerCase().trim();
+
+	if (normalA === normalB) return 1;
+	if (normalA.length < 2 || normalB.length < 2) return 0;
+
+	const bigrams = (s: string): Set<string> => {
+		const set = new Set<string>();
+		for (let i = 0; i < s.length - 1; i++) {
+			set.add(s.slice(i, i + 2));
+		}
+		return set;
+	};
+
+	const aBigrams = bigrams(normalA);
+	const bBigrams = bigrams(normalB);
+	let intersection = 0;
+	for (const bg of aBigrams) {
+		if (bBigrams.has(bg)) intersection++;
+	}
+
+	return (2 * intersection) / (aBigrams.size + bBigrams.size);
+}
+
+/** Deterministic string similarity eval using Dice coefficient. */
+export function stringSimilarity(): Eval {
+	return new Eval('string-similarity')
+		.description('Measures string similarity between output and expected answer')
+		.check(({ output, expected }) => {
+			if (expected === undefined) {
+				return { pass: false, reasoning: 'No expected value provided' };
+			}
+			const similarity = diceSimilarity(output, expected);
+			return {
+				pass: similarity >= 0.7,
+				reasoning: `Dice similarity: ${(similarity * 100).toFixed(1)}%`,
+			};
+		});
+}
--- a/packages/@n8n/agents/src/evals/tool-call-accuracy.ts
+++ b/packages/@n8n/agents/src/evals/tool-call-accuracy.ts
@ -0,0 +1,35 @@
+import { Eval } from '../sdk/eval';
+
+/**
+ * Deterministic tool call accuracy eval.
+ * Expects `expected` to be a comma-separated list of tool names that should have been called.
+ * Passes only if ALL expected tools were called.
+ */
+export function toolCallAccuracy(): Eval {
+	return new Eval('tool-call-accuracy')
+		.description('Checks if the agent called all expected tools')
+		.check(({ expected, toolCalls }) => {
+			if (!expected) {
+				return { pass: false, reasoning: 'No expected tool names provided' };
+			}
+
+			const expectedTools = expected
+				.split(',')
+				.map((t) => t.trim().toLowerCase())
+				.filter(Boolean);
+			if (expectedTools.length === 0) {
+				return { pass: false, reasoning: 'No expected tools to check' };
+			}
+
+			const calledTools = new Set((toolCalls ?? []).map((tc) => tc.tool.toLowerCase()));
+			const missing = expectedTools.filter((t) => !calledTools.has(t));
+
+			return {
+				pass: missing.length === 0,
+				reasoning:
+					missing.length === 0
+						? `All ${expectedTools.length} expected tools were called`
+						: `Missing tools: ${missing.join(', ')}. Called: [${[...calledTools].join(', ') || 'none'}]`,
+			};
+		});
+}
--- a/packages/@n8n/agents/src/index.ts
+++ b/packages/@n8n/agents/src/index.ts
@ -0,0 +1,129 @@
+export type {
+	BuiltTool,
+	BuiltProviderTool,
+	BuiltAgent,
+	BuiltMemory,
+	BuiltGuardrail,
+	BuiltEval,
+	RunOptions,
+	AgentResult,
+	GenerateResult,
+	StreamResult,
+	EvalInput,
+	EvalScore,
+	EvalRunResult,
+	EvalResults,
+	ToolContext,
+	InterruptibleToolContext,
+	CheckpointStore,
+	StreamChunk,
+	SubAgentUsage,
+	Provider,
+	ThinkingConfig,
+	ThinkingConfigFor,
+	AnthropicThinkingConfig,
+	OpenAIThinkingConfig,
+	GoogleThinkingConfig,
+	XaiThinkingConfig,
+	SerializableAgentState,
+	AgentRunState,
+	MemoryConfig,
+	TitleGenerationConfig,
+	Thread,
+	SemanticRecallConfig,
+	ResumeOptions,
+	McpServerConfig,
+	McpVerifyResult,
+	ModelConfig,
+	ExecutionOptions,
+	PersistedExecutionOptions,
+	BuiltTelemetry,
+	AttributeValue,
+} from './types';
+export type { ProviderOptions } from '@ai-sdk/provider-utils';
+export { AgentEvent } from './types';
+export type { AgentEventData, AgentEventHandler } from './types';
+
+export { Tool } from './sdk/tool';
+export { Memory } from './sdk/memory';
+export { Guardrail } from './sdk/guardrail';
+export { Eval } from './sdk/eval';
+export { evaluate } from './sdk/evaluate';
+export type { DatasetRow, EvaluateConfig } from './sdk/evaluate';
+export * as evals from './evals/index';
+export { Telemetry } from './sdk/telemetry';
+export { LangSmithTelemetry } from './integrations/langsmith';
+export type { LangSmithTelemetryConfig } from './integrations/langsmith';
+export { Agent } from './sdk/agent';
+export { McpClient } from './sdk/mcp-client';
+export { Network } from './sdk/network';
+export { providerTools } from './sdk/provider-tools';
+export { verify } from './sdk/verify';
+export type { VerifyResult } from './sdk/verify';
+export type {
+	ContentCitation,
+	ContentFile,
+	ContentMetadata,
+	ContentReasoning,
+	ContentText,
+	ContentToolCall,
+	ContentToolResult,
+	Message,
+	MessageContent,
+	MessageRole,
+	AgentMessage,
+	CustomAgentMessages,
+	AgentDbMessage,
+} from './types/sdk/message';
+export {
+	toDbMessage,
+	filterLlmMessages,
+	isLlmMessage,
+} from './sdk/message';
+export { fetchProviderCatalog } from './sdk/catalog';
+export type {
+	ProviderCatalog,
+	ProviderInfo,
+	ModelInfo,
+	ModelCost,
+	ModelLimits,
+} from './sdk/catalog';
+export { SqliteMemory } from './storage/sqlite-memory';
+export type { SqliteMemoryConfig } from './storage/sqlite-memory';
+export { PostgresMemory } from './storage/postgres-memory';
+export type { PostgresMemoryConfig } from './storage/postgres-memory';
+
+export { Workspace } from './workspace';
+export { BaseFilesystem } from './workspace';
+export { BaseSandbox } from './workspace';
+export { createWorkspaceTools } from './workspace';
+export { SandboxProcessManager, ProcessHandle } from './workspace';
+
+export type {
+	BaseFilesystemOptions,
+	FilesystemLifecycleHook,
+	WorkspaceFilesystem,
+	WorkspaceSandbox,
+	WorkspaceConfig,
+	CommandResult,
+	CommandOptions,
+	ExecuteCommandOptions,
+	FileContent,
+	FileStat,
+	FileEntry,
+	ReadOptions,
+	WriteOptions,
+	ListOptions,
+	RemoveOptions,
+	CopyOptions,
+	ProviderStatus,
+	SandboxInfo,
+	LocalFilesystemOptions,
+	LocalSandboxOptions,
+	DaytonaSandboxOptions,
+	BaseSandboxOptions,
+	MountConfig,
+	MountResult,
+	SpawnProcessOptions,
+	ProcessInfo,
+} from './workspace';
--- a/packages/@n8n/agents/src/integrations/langsmith.ts
+++ b/packages/@n8n/agents/src/integrations/langsmith.ts
@ -0,0 +1,131 @@
+import { Telemetry } from '../sdk/telemetry';
+import type { BuiltTelemetry, OpaqueTracer, OpaqueTracerProvider } from '../types/telemetry';
+
+export interface LangSmithTelemetryConfig {
+	/** LangSmith API key. If omitted, resolved via `.credential()` or LANGSMITH_API_KEY env var. */
+	apiKey?: string;
+	/** LangSmith project name. Falls back to LANGSMITH_PROJECT env var, then 'default'. */
+	project?: string;
+	/** LangSmith API base URL. Falls back to LANGSMITH_ENDPOINT env var. */
+	endpoint?: string;
+	/**
+	 * Override the full OTLP traces URL. Normally derived from `endpoint`
+	 * as `${endpoint}/otel/v1/traces`. Use this for custom collectors or testing.
+	 */
+	url?: string;
+}
+
+/**
+ * Create the LangSmith OTel tracer + provider from config.
+ * Dynamically imports langsmith and OTel packages so they remain
+ * optional peer dependencies.
+ */
+async function createLangSmithTracer(
+	config?: LangSmithTelemetryConfig,
+	resolvedApiKey?: string,
+): Promise<{ tracer: OpaqueTracer; provider: OpaqueTracerProvider }> {
+	const { NodeTracerProvider } = (await import('@opentelemetry/sdk-trace-node')) as {
+		NodeTracerProvider: new (cfg?: {
+			spanProcessors?: unknown[];
+		}) => OpaqueTracerProvider & {
+			getTracer(name: string): OpaqueTracer;
+		};
+	};
+
+	const { LangSmithOTLPTraceExporter } = (await import('langsmith/experimental/otel/exporter')) as {
+		LangSmithOTLPTraceExporter: new (cfg?: {
+			apiKey?: string;
+			projectName?: string;
+			endpoint?: string;
+		}) => unknown;
+	};
+
+	const { LangSmithOTLPSpanProcessor } = (await import(
+		'langsmith/experimental/otel/processor'
+	)) as {
+		LangSmithOTLPSpanProcessor: new (exporter: unknown) => unknown;
+	};
+
+	// SECURITY: When the engine-resolved credential is the active key (i.e. no
+	// explicit config.apiKey overrides it), ignore user-provided url/endpoint to
+	// prevent redirecting the injected API key to an arbitrary host.
+	const apiKey = config?.apiKey ?? resolvedApiKey;
+	const usingResolvedKey = !config?.apiKey && resolvedApiKey !== undefined;
+	const url = usingResolvedKey
+		? undefined
+		: (config?.url ??
+			(config?.endpoint ? `${config.endpoint.replace(/\/$/, '')}/otel/v1/traces` : undefined));
+
+	const exporter = new LangSmithOTLPTraceExporter({
+		apiKey,
+		projectName: config?.project,
+		...(url ? { url } : {}),
+	});
+
+	const processor = new LangSmithOTLPSpanProcessor(exporter);
+
+	const provider = new NodeTracerProvider({
+		spanProcessors: [processor],
+	});
+	// Do NOT call provider.register() — avoid polluting the global tracer provider.
+
+	return { tracer: provider.getTracer('@n8n/agents'), provider };
+}
+
+/**
+ * Pre-built telemetry for LangSmith. Extends `Telemetry` so all builder
+ * methods (`.credential()`, `.functionId()`, `.recordOutputs()`, `.redact()`,
+ * etc.) are available.
+ *
+ * Requires `langsmith` and `@opentelemetry/sdk-trace-node` as peer dependencies.
+ *
+ * @example
+ * ```typescript
+ * import { Agent, LangSmithTelemetry } from '@n8n/agents';
+ *
+ * const telemetry = new LangSmithTelemetry({ project: 'my-project' })
+ *   .credential('langsmith')
+ *   .recordOutputs(false);
+ *
+ * const agent = new Agent('assistant')
+ *   .model('anthropic/claude-sonnet-4-5')
+ *   .telemetry(telemetry)
+ *   .instructions('...');
+ * ```
+ */
+export class LangSmithTelemetry extends Telemetry {
+	private langsmithConfig?: LangSmithTelemetryConfig;
+
+	constructor(config?: LangSmithTelemetryConfig) {
+		super();
+		this.langsmithConfig = config;
+	}
+
+	/** @override Build telemetry config, creating the LangSmith tracer. */
+	override async build(): Promise<BuiltTelemetry> {
+		if (this.otlpEndpointValue !== undefined) {
+			throw new Error('LangSmithTelemetry creates its own tracer — do not use .otlpEndpoint().');
+		}
+
+		// Clear any tracer from a previous build() so the parent's
+		// .tracer()/.otlpEndpoint() mutual-exclusion check passes cleanly.
+		this.tracerValue = undefined;
+
+		// The LangSmith exporter silently drops all spans unless this is set.
+		// Auto-enable it so users don't have to remember a magic env var.
+		process.env.LANGCHAIN_TRACING_V2 ??= 'true';
+
+		const { tracer, provider } = await createLangSmithTracer(
+			this.langsmithConfig,
+			this.resolvedKey,
+		);
+		this.tracerValue = tracer;
+
+		// Call parent build() which handles integrations, redaction, etc.
+		const built = await super.build();
+
+		// Attach the provider for flush/shutdown (parent build sets it from
+		// otlpEndpoint but not from .tracer(), so we add it here).
+		return { ...built, provider };
+	}
+}
--- a/packages/@n8n/agents/src/runtime/agent-runtime.ts
+++ b/packages/@n8n/agents/src/runtime/agent-runtime.ts
--- a/packages/@n8n/agents/src/runtime/event-bus.ts
+++ b/packages/@n8n/agents/src/runtime/event-bus.ts
@ -0,0 +1,82 @@
+import { AgentEvent } from '../types/runtime/event';
+import type { AgentEventData, AgentEventHandler } from '../types/runtime/event';
+
+/**
+ * Internal event bus for agent lifecycle events.
+ *
+ * Shared between Agent (public API) and AgentRuntime (emitter).
+ * Handlers registered via `on()` are called synchronously when
+ * `emit()` is invoked from the agentic loop.
+ *
+ * Cancellation uses a standard `AbortController`. The signal is passed
+ * directly to the AI SDK's `generateText` / `streamText` calls so that
+ * in-flight HTTP requests are cancelled immediately when `abort()` is called,
+ * rather than waiting for the current LLM call to finish.
+ *
+ * A new controller is created for each run via `resetAbort()` so the same
+ * agent instance can be reused after cancellation.
+ */
+export class AgentEventBus {
+	private handlers = new Map<AgentEvent, Set<AgentEventHandler>>();
+
+	private controller = new AbortController();
+
+	private externalCleanup?: () => void;
+
+	on(event: AgentEvent, handler: AgentEventHandler): void {
+		let set = this.handlers.get(event);
+		if (!set) {
+			set = new Set();
+			this.handlers.set(event, set);
+		}
+		set.add(handler);
+	}
+
+	emit(data: AgentEventData): void {
+		const set = this.handlers.get(data.type);
+		if (!set) return;
+		for (const handler of set) {
+			handler(data);
+		}
+	}
+
+	abort(): void {
+		this.controller.abort();
+	}
+
+	/**
+	 * Replace the AbortController with a fresh one.
+	 * Called at the start of each generate() / stream() so the agent
+	 * can be reused after a previous cancellation.
+	 *
+	 * When an external signal is provided, its abort is forwarded to the
+	 * internal controller so that either `abort()` or the external signal
+	 * can cancel the current run.
+	 */
+	resetAbort(externalSignal?: AbortSignal): void {
+		this.externalCleanup?.();
+		this.externalCleanup = undefined;
+		this.controller = new AbortController();
+
+		if (externalSignal) {
+			if (externalSignal.aborted) {
+				this.controller.abort(externalSignal.reason);
+			} else {
+				const onAbort = () => this.controller.abort(externalSignal.reason);
+				externalSignal.addEventListener('abort', onAbort, { once: true });
+				this.externalCleanup = () => externalSignal.removeEventListener('abort', onAbort);
+			}
+		}
+	}
+
+	/** The AbortSignal for the current run. Pass to generateText / streamText. */
+	get signal(): AbortSignal {
+		return this.controller.signal;
+	}
+
+	get isAborted(): boolean {
+		return this.controller.signal.aborted;
+	}
+}
+
+export { AgentEvent };
--- a/packages/@n8n/agents/src/runtime/logger.ts
+++ b/packages/@n8n/agents/src/runtime/logger.ts
@ -0,0 +1,45 @@
+/**
+ * Filtered logger that suppresses known noisy warnings from the runtime.
+ * All other messages are forwarded to console.
+ */
+
+const SUPPRESSED_PATTERNS = [
+	'No memory is configured but resourceId and threadId were passed in args',
+];
+
+function isSuppressed(message: string): boolean {
+	return SUPPRESSED_PATTERNS.some((pattern) => message.includes(pattern));
+}
+
+/**
+ * Creates a logger that drops messages matching known suppressed patterns
+ * and forwards everything else to console.
+ */
+export function createFilteredLogger() {
+	return {
+		debug(message: string, ...args: unknown[]) {
+			if (!isSuppressed(message)) console.debug(message, ...args);
+		},
+		info(message: string, ...args: unknown[]) {
+			if (!isSuppressed(message)) console.info(message, ...args);
+		},
+		warn(message: string, ...args: unknown[]) {
+			if (!isSuppressed(message)) console.warn(message, ...args);
+		},
+		error(message: string, ...args: unknown[]) {
+			if (!isSuppressed(message)) console.error(message, ...args);
+		},
+		trackException() {},
+		getTransports() {
+			return new Map();
+		},
+		// eslint-disable-next-line @typescript-eslint/require-await
+		async listLogs() {
+			return { logs: [] as unknown[], total: 0, page: 1, perPage: 100, hasMore: false };
+		},
+		// eslint-disable-next-line @typescript-eslint/require-await
+		async listLogsByRunId() {
+			return { logs: [] as unknown[], total: 0, page: 1, perPage: 100, hasMore: false };
+		},
+	};
+}
--- a/packages/@n8n/agents/src/runtime/mcp-connection.ts
+++ b/packages/@n8n/agents/src/runtime/mcp-connection.ts
@ -0,0 +1,178 @@
+/** Don't remove the .js extensions. That's how the @modelcontextprotocol/sdk is packaged. */
+import { Client } from '@modelcontextprotocol/sdk/client/index.js';
+import { SSEClientTransport } from '@modelcontextprotocol/sdk/client/sse.js';
+import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js';
+import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js';
+import { CallToolResultSchema, type CallToolResult } from '@modelcontextprotocol/sdk/types.js';
+
+import { McpToolResolver } from './mcp-tool-resolver';
+import { wrapToolForApproval } from '../sdk/tool';
+import type { McpServerConfig } from '../types/sdk/mcp';
+import type { BuiltTool } from '../types/sdk/tool';
+
+/** The raw result returned by an MCP tool call. */
+export type McpCallToolResult = CallToolResult;
+
+/** Wraps a single MCP SDK Client instance for one server. Not publicly exported. */
+export class McpConnection {
+	private client: Client;
+
+	private config: McpServerConfig;
+
+	private readonly shouldRequireToolApproval: boolean;
+
+	private connectionPromise: Promise<void> | undefined = undefined;
+	private disconnectPromise: Promise<void> | undefined = undefined;
+	private closed = false;
+
+	constructor(config: McpServerConfig, requireToolApproval = false) {
+		this.config = config;
+		this.shouldRequireToolApproval = requireToolApproval;
+		this.client = new Client({ name: '@n8n/agents', version: '0.1.0' }, { capabilities: {} });
+	}
+
+	async connect(): Promise<void> {
+		if (this.connectionPromise !== undefined) {
+			return await this.connectionPromise;
+		}
+		this.connectionPromise = this.connectWithTransport(this.createTransport(this.config));
+		try {
+			await this.connectionPromise;
+		} catch (error) {
+			this.connectionPromise = undefined;
+			throw error;
+		}
+	}
+
+	private async connectWithTransport(
+		transport: SSEClientTransport | StreamableHTTPClientTransport | StdioClientTransport,
+	): Promise<void> {
+		const timeoutMs = this.config.connectionTimeoutMs;
+		if (timeoutMs === undefined) {
+			await this.client.connect(transport);
+			return;
+		}
+		if (!Number.isFinite(timeoutMs) || timeoutMs <= 0) {
+			throw new Error(
+				`MCP server "${this.config.name}": connectionTimeoutMs must be a positive finite number`,
+			);
+		}
+		let timeoutId: ReturnType<typeof setTimeout> | undefined;
+		try {
+			await Promise.race([
+				this.client.connect(transport),
+				new Promise<never>((_, reject) => {
+					timeoutId = setTimeout(() => {
+						reject(
+							new Error(
+								`MCP server "${this.config.name}": connection timed out after ${timeoutMs}ms`,
+							),
+						);
+					}, timeoutMs);
+				}),
+			]);
+		} catch (error) {
+			await this.client.close().catch(() => {});
+			throw error;
+		} finally {
+			if (timeoutId !== undefined) clearTimeout(timeoutId);
+		}
+	}
+
+	/** List tools from the server, resolving them into BuiltTool instances with prefixed names. */
+	async listTools(): Promise<BuiltTool[]> {
+		const result = await this.client.listTools();
+		const resolver = new McpToolResolver();
+		const tools = resolver.resolve(this, result.tools);
+		return tools.map((t) =>
+			t.suspendSchema || !this.needsApproval(t)
+				? t
+				: wrapToolForApproval(t, { requireApproval: true }),
+		);
+	}
+
+	/**
+	 * Returns true when a resolved tool should be wrapped with an approval gate.
+	 *
+	 * A tool needs approval when either:
+	 * - the global `shouldRequireToolApproval` flag (set via Agent.requireToolApproval()) is true, OR
+	 * - `config.requireApproval` is `true` (all tools on this server), OR
+	 * - `config.requireApproval` is a string array that includes the tool's original (un-prefixed) name.
+	 */
+	private needsApproval(tool: BuiltTool): boolean {
+		if (this.shouldRequireToolApproval) return true;
+
+		const { requireApproval } = this.config;
+		if (requireApproval === true) return true;
+
+		if (Array.isArray(requireApproval) && requireApproval.length > 0) {
+			const prefix = `${this.config.name}_`;
+			const originalName = tool.name.startsWith(prefix)
+				? tool.name.slice(prefix.length)
+				: tool.name;
+			return requireApproval.includes(originalName);
+		}
+
+		return false;
+	}
+
+	async callTool(name: string, args: Record<string, unknown>): Promise<McpCallToolResult> {
+		const result = await this.client.callTool({ name, arguments: args }, CallToolResultSchema);
+		return result as McpCallToolResult;
+	}
+
+	async disconnect(): Promise<void> {
+		if (this.disconnectPromise) return await this.disconnectPromise;
+		const promise = this.doDisconnect();
+		this.disconnectPromise = promise;
+		return await promise.finally(() => {
+			if (this.disconnectPromise === promise) this.disconnectPromise = undefined;
+		});
+	}
+
+	private async doDisconnect(): Promise<void> {
+		if (this.closed) return;
+		await this.client.close();
+		this.connectionPromise = undefined;
+		this.closed = true;
+	}
+
+	get name(): string {
+		return this.config.name;
+	}
+
+	/**
+	 * Returns true when this server's config declares per-server approval requirements
+	 * without requiring a network connection.
+	 */
+	declaresApproval(): boolean {
+		const { requireApproval } = this.config;
+		return (
+			requireApproval === true || (Array.isArray(requireApproval) && requireApproval.length > 0)
+		);
+	}
+
+	private createTransport(
+		config: McpServerConfig,
+	): SSEClientTransport | StreamableHTTPClientTransport | StdioClientTransport {
+		if (config.command) {
+			return new StdioClientTransport({
+				command: config.command,
+				args: config.args,
+				env: config.env,
+			});
+		} else if (config.url) {
+			const url = new URL(config.url);
+			const requestInit: RequestInit | undefined = config.headers
+				? { headers: config.headers }
+				: undefined;
+
+			if (config.transport === 'streamableHttp') {
+				return new StreamableHTTPClientTransport(url, { requestInit });
+			}
+
+			return new SSEClientTransport(url, { requestInit });
+		}
+		throw new Error(`MCP server "${config.name}": provide either "url" or "command"`);
+	}
+}
--- a/packages/@n8n/agents/src/runtime/mcp-tool-resolver.ts
+++ b/packages/@n8n/agents/src/runtime/mcp-tool-resolver.ts
@ -0,0 +1,92 @@
+import type { Tool } from '@modelcontextprotocol/sdk/types.js';
+import type { JSONSchema7 } from 'json-schema';
+
+import type { McpCallToolResult, McpConnection } from './mcp-connection';
+import type { AgentMessage, ContentFile, ContentText } from '../types/sdk/message';
+import type { BuiltTool, InterruptibleToolContext, ToolContext } from '../types/sdk/tool';
+
+type McpContentBlock = McpCallToolResult['content'][number];
+
+/**
+ * Convert raw MCP tool definitions into BuiltTool instances.
+ * Tool names are prefixed with the server name to prevent collisions.
+ * Not publicly exported.
+ */
+export class McpToolResolver {
+	resolve(connection: McpConnection, tools: Tool[]): BuiltTool[] {
+		return tools.map((tool) => this.resolveTool(connection, tool));
+	}
+
+	private resolveTool(connection: McpConnection, tool: Tool): BuiltTool {
+		const prefixedName = `${connection.name}_${tool.name}`;
+		const originalName = tool.name;
+
+		const handler = async (
+			input: unknown,
+			_ctx: ToolContext | InterruptibleToolContext,
+		): Promise<unknown> => {
+			const args = (input ?? {}) as Record<string, unknown>;
+			return await connection.callTool(originalName, args);
+		};
+
+		const toMessage = (output: unknown): AgentMessage | undefined => {
+			return buildRichMessage(output as McpCallToolResult);
+		};
+
+		const builtTool: BuiltTool = {
+			name: prefixedName,
+			description: tool.description ?? '',
+			inputSchema: tool.inputSchema as JSONSchema7,
+			handler,
+			toMessage,
+			mcpTool: true,
+			mcpServerName: connection.name,
+		};
+
+		return builtTool;
+	}
+}
+
+/**
+ * Convert an MCP CallToolResult into a rich AgentMessage containing text and image content parts.
+ * Returns undefined if the result contains only text (the tool-result JSON is sufficient for the LLM).
+ * Returns an assistant Message with ContentFile parts for image blocks so multimodal models can process them.
+ */
+function buildRichMessage(result: McpCallToolResult): AgentMessage | undefined {
+	if (!result?.content) return undefined;
+
+	const hasImages = result.content.some((block) => block.type === 'image');
+	if (!hasImages) return undefined;
+
+	const contentParts: Array<ContentText | ContentFile> = [];
+
+	for (const block of result.content) {
+		const part = blockToContentPart(block);
+		if (part) contentParts.push(part);
+	}
+
+	if (contentParts.length === 0) return undefined;
+
+	return { role: 'assistant', content: contentParts };
+}
+
+function blockToContentPart(block: McpContentBlock): ContentText | ContentFile | undefined {
+	if (block.type === 'text' && block.text) {
+		return { type: 'text', text: block.text };
+	}
+
+	if (block.type === 'image' && block.data) {
+		return {
+			type: 'file',
+			data: block.data,
+			mediaType: block.mimeType ?? 'image/png',
+		};
+	}
+
+	if (block.type === 'resource' && block.resource) {
+		const text = 'text' in block.resource ? block.resource.text : block.resource.uri;
+		return { type: 'text', text };
+	}
+
+	return undefined;
+}
--- a/packages/@n8n/agents/src/runtime/memory-store.ts
+++ b/packages/@n8n/agents/src/runtime/memory-store.ts
@ -0,0 +1,122 @@
+import { toDbMessage } from '../sdk/message';
+import type { BuiltMemory, Thread } from '../types';
+import type { AgentDbMessage, AgentMessage } from '../types/sdk/message';
+
+interface StoredMessage {
+	message: AgentDbMessage;
+	createdAt: Date;
+}
+
+/**
+ * In-memory implementation of BuiltMemory.
+ * All data is lost on process restart — suitable for development and testing.
+ *
+ * Thread context for `saveMessages` is established by calling `saveThread` first.
+ * The most recently saved thread is used when `saveMessages` is called.
+ */
+export class InMemoryMemory implements BuiltMemory {
+	private threads = new Map<string, Thread>();
+
+	private messagesByThread = new Map<string, StoredMessage[]>();
+
+	private workingMemoryByKey = new Map<string, string>();
+
+	// eslint-disable-next-line @typescript-eslint/require-await
+	async getWorkingMemory(params: { threadId: string; resourceId?: string }): Promise<
+		string | null
+	> {
+		return this.workingMemoryByKey.get(params.resourceId ?? params.threadId) ?? null;
+	}
+
+	// eslint-disable-next-line @typescript-eslint/require-await
+	async saveWorkingMemory(
+		params: { threadId: string; resourceId?: string },
+		content: string,
+	): Promise<void> {
+		this.workingMemoryByKey.set(params.resourceId ?? params.threadId, content);
+	}
+
+	// eslint-disable-next-line @typescript-eslint/require-await
+	async getThread(threadId: string): Promise<Thread | null> {
+		return this.threads.get(threadId) ?? null;
+	}
+
+	// eslint-disable-next-line @typescript-eslint/require-await
+	async saveThread(thread: Omit<Thread, 'createdAt' | 'updatedAt'>): Promise<Thread> {
+		const existing = this.threads.get(thread.id);
+		const now = new Date();
+		const saved: Thread = {
+			...thread,
+			title: thread.title ?? existing?.title,
+			metadata: thread.metadata ?? existing?.metadata,
+			createdAt: existing?.createdAt ?? now,
+			updatedAt: now,
+		};
+		this.threads.set(thread.id, saved);
+		return saved;
+	}
+
+	// eslint-disable-next-line @typescript-eslint/require-await
+	async deleteThread(threadId: string): Promise<void> {
+		this.threads.delete(threadId);
+		this.messagesByThread.delete(threadId);
+	}
+
+	// eslint-disable-next-line @typescript-eslint/require-await
+	async getMessages(
+		threadId: string,
+		opts?: { limit?: number; before?: Date },
+	): Promise<AgentDbMessage[]> {
+		let stored = this.messagesByThread.get(threadId) ?? [];
+		if (opts?.before) {
+			const cutoff = opts.before.getTime();
+			stored = stored.filter((s) => s.createdAt.getTime() < cutoff);
+		}
+		if (opts?.limit) stored = stored.slice(-opts.limit);
+		return stored.map((s) => s.message);
+	}
+
+	/**
+	 * Save messages to the thread established by the most recent `saveThread` call.
+	 * Always call `saveThread` before `saveMessages` to set the thread context.
+	 */
+	// eslint-disable-next-line @typescript-eslint/require-await
+	async saveMessages(args: {
+		threadId: string;
+		resourceId?: string;
+		messages: AgentMessage[];
+	}): Promise<void> {
+		const existing = this.messagesByThread.get(args.threadId) ?? [];
+		const now = new Date();
+		for (const msg of args.messages) {
+			existing.push({ message: toDbMessage(msg), createdAt: now });
+		}
+		this.messagesByThread.set(args.threadId, existing);
+	}
+
+	// eslint-disable-next-line @typescript-eslint/require-await
+	async deleteMessages(messageIds: string[]): Promise<void> {
+		const idSet = new Set(messageIds);
+		for (const [threadId, messages] of this.messagesByThread.entries()) {
+			this.messagesByThread.set(
+				threadId,
+				messages.filter((s) => !idSet.has(s.message.id)),
+			);
+		}
+	}
+}
+
+/**
+ * Save messages to a specific thread, ensuring the thread exists first.
+ * Always call this instead of `memory.saveMessages()` directly, as it
+ * establishes the thread context required by implementations like InMemoryMemory.
+ */
+export async function saveMessagesToThread(
+	memory: BuiltMemory,
+	threadId: string,
+	resourceId: string,
+	messages: AgentMessage[],
+): Promise<void> {
+	await memory.saveThread({ id: threadId, resourceId });
+	await memory.saveMessages({ threadId, resourceId, messages });
+}
--- a/packages/@n8n/agents/src/runtime/message-list.ts
+++ b/packages/@n8n/agents/src/runtime/message-list.ts
@ -0,0 +1,128 @@
+import type { ProviderOptions } from '@ai-sdk/provider-utils';
+import type { ModelMessage } from 'ai';
+
+import { toAiMessages } from './messages';
+import { stripOrphanedToolMessages } from './strip-orphaned-tool-messages';
+import { buildWorkingMemoryInstruction } from './working-memory';
+import { filterLlmMessages } from '../sdk/message';
+import type { SerializedMessageList } from '../types/runtime/message-list';
+import type { AgentDbMessage } from '../types/sdk/message';
+
+export type { SerializedMessageList };
+
+export interface WorkingMemoryContext {
+	template: string;
+	structured: boolean;
+	/** The current persisted state, or null if not yet loaded. Falls back to template. */
+	state: string | null;
+}
+
+/**
+ * Append-only message container with Set-based source tracking.
+ *
+ * Three named sources:
+ *   history   — messages loaded from memory at the start of the turn.
+ *               Never included in turnDelta(); already persisted.
+ *   input     — the caller's raw input for this turn (custom messages preserved).
+ *   response  — LLM replies, tool results, and custom tool messages from this turn.
+ *
+ * Serialization stores the flat message array plus the IDs of each set so
+ * the full three-way source distinction survives a round-trip.
+ */
+export class AgentMessageList {
+	private all: AgentDbMessage[] = [];
+
+	private historySet = new Set<AgentDbMessage>();
+
+	private inputSet = new Set<AgentDbMessage>();
+
+	private responseSet = new Set<AgentDbMessage>();
+
+	/** Working memory context for this run. Set by buildMessageList / resume. */
+	workingMemory: WorkingMemoryContext | undefined;
+
+	addHistory(messages: AgentDbMessage[]): void {
+		for (const m of messages) {
+			this.all.push(m);
+			this.historySet.add(m);
+		}
+	}
+
+	addInput(messages: AgentDbMessage[]): void {
+		for (const m of messages) {
+			this.all.push(m);
+			this.inputSet.add(m);
+		}
+	}
+
+	addResponse(messages: AgentDbMessage[]): void {
+		for (const m of messages) {
+			this.all.push(m);
+			this.responseSet.add(m);
+		}
+	}
+
+	/**
+	 * Full LLM context for a generateText / streamText call.
+	 * Prepends the system prompt (with working memory appended if configured),
+	 * strips custom messages via filterLlmMessages.
+	 */
+	forLlm(baseInstructions: string, instructionProviderOptions?: ProviderOptions): ModelMessage[] {
+		let systemPrompt = baseInstructions;
+
+		if (this.workingMemory) {
+			const wmInstruction = buildWorkingMemoryInstruction(
+				this.workingMemory.template,
+				this.workingMemory.structured,
+			);
+			const wmState = this.workingMemory.state ?? this.workingMemory.template;
+			systemPrompt +=
+				wmInstruction + '\n\nCurrent working memory state:\n```\n' + wmState + '\n```';
+		}
+
+		const systemMessage: ModelMessage = instructionProviderOptions
+			? { role: 'system', content: systemPrompt, providerOptions: instructionProviderOptions }
+			: { role: 'system', content: systemPrompt };
+		return [systemMessage, ...toAiMessages(filterLlmMessages(stripOrphanedToolMessages(this.all)))];
+	}
+
+	/**
+	 * Current-turn delta for memory persistence (input + responses).
+	 * Non-destructive — safe to call multiple times (e.g. on retry).
+	 */
+	turnDelta(): AgentDbMessage[] {
+		return this.all.filter((m) => this.inputSet.has(m) || this.responseSet.has(m));
+	}
+
+	/**
+	 * Only the LLM-produced messages from this turn (responses + tool results).
+	 * Used for GenerateResult.messages — callers should not see their own input echoed back.
+	 */
+	responseDelta(): AgentDbMessage[] {
+		return this.all.filter((m) => this.responseSet.has(m));
+	}
+
+	serialize(): SerializedMessageList {
+		const toIds = (set: Set<AgentDbMessage>) => Array.from(set).map((m) => m.id);
+		return {
+			messages: [...this.all],
+			historyIds: toIds(this.historySet),
+			inputIds: toIds(this.inputSet),
+			responseIds: toIds(this.responseSet),
+		};
+	}
+
+	static deserialize(data: SerializedMessageList): AgentMessageList {
+		const list = new AgentMessageList();
+		const historyIdSet = new Set(data.historyIds);
+		const inputIdSet = new Set(data.inputIds);
+		const responseIdSet = new Set(data.responseIds);
+		for (const m of data.messages) {
+			list.all.push(m);
+			if (historyIdSet.has(m.id)) list.historySet.add(m);
+			if (inputIdSet.has(m.id)) list.inputSet.add(m);
+			if (responseIdSet.has(m.id)) list.responseSet.add(m);
+		}
+		return list;
+	}
+}
--- a/packages/@n8n/agents/src/runtime/messages.ts
+++ b/packages/@n8n/agents/src/runtime/messages.ts
@ -0,0 +1,299 @@
+import type {
+	FilePart,
+	ModelMessage,
+	TextPart,
+	ToolCallPart,
+	ToolResultPart,
+	ImagePart,
+	ToolApprovalRequest,
+	ToolApprovalResponse,
+	FinishReason as AiFinishReason,
+} from 'ai';
+
+import { toDbMessage } from '../sdk/message';
+import type { FinishReason } from '../types';
+import type {
+	AgentDbMessage,
+	AgentMessage,
+	ContentFile,
+	ContentReasoning,
+	ContentText,
+	ContentToolCall,
+	ContentToolResult,
+	Message,
+	MessageContent,
+} from '../types/sdk/message';
+import type { JSONValue } from '../types/utils/json';
+
+/** Reasoning content part — mirrors @ai-sdk/provider-utils ReasoningPart (not re-exported by 'ai'). */
+type ReasoningPart = { type: 'reasoning'; text: string };
+
+type AiContentPart =
+	| TextPart
+	| FilePart
+	| ImagePart
+	| ReasoningPart
+	| ToolCallPart
+	| ToolResultPart
+	| ToolApprovalRequest
+	| ToolApprovalResponse;
+
+// --- Type guards for MessageContent blocks ---
+
+function isText(block: MessageContent): block is ContentText {
+	return block.type === 'text';
+}
+
+function isReasoning(block: MessageContent): block is ContentReasoning {
+	return block.type === 'reasoning';
+}
+
+function isFile(block: MessageContent): block is ContentFile {
+	return block.type === 'file';
+}
+
+function isToolCall(block: MessageContent): block is ContentToolCall {
+	return block.type === 'tool-call';
+}
+
+function isToolResult(block: MessageContent): block is ContentToolResult {
+	return block.type === 'tool-result';
+}
+
+/**
+ * Parse a JSONValue that may be a stringified JSON object back into
+ * its parsed form. Non-string values pass through unchanged.
+ */
+function parseJsonValue(value: JSONValue): unknown {
+	if (typeof value === 'string') {
+		try {
+			return JSON.parse(value);
+		} catch {
+			return value;
+		}
+	}
+	return value;
+}
+
+/** Convert a single n8n MessageContent block to an AI SDK content part. */
+function toAiContent(block: MessageContent): AiContentPart | undefined {
+	let base: AiContentPart | undefined;
+	if (isText(block)) {
+		base = { type: 'text', text: block.text };
+	} else if (isFile(block)) {
+		base = {
+			type: 'file',
+			data: block.data,
+			mediaType: block.mediaType ?? 'application/octet-stream',
+		};
+	} else if (isToolCall(block)) {
+		base = {
+			type: 'tool-call',
+			toolCallId: block.toolCallId ?? '',
+			toolName: block.toolName,
+			input: parseJsonValue(block.input),
+			providerExecuted: block.providerExecuted,
+		};
+	}
+	if (isToolResult(block)) {
+		if (block.isError) {
+			if (typeof block.result === 'string') {
+				base = {
+					type: 'tool-result',
+					toolCallId: block.toolCallId,
+					toolName: block.toolName,
+					output: { type: 'error-text', value: block.result },
+				};
+			} else {
+				base = {
+					type: 'tool-result',
+					toolCallId: block.toolCallId,
+					toolName: block.toolName,
+					output: { type: 'error-json', value: block.result },
+				};
+			}
+		} else {
+			base = {
+				type: 'tool-result',
+				toolCallId: block.toolCallId,
+				toolName: block.toolName,
+				output: { type: 'json', value: block.result },
+			};
+		}
+	} else if (isReasoning(block)) {
+		base = { type: 'reasoning', text: block.text };
+	}
+
+	if (base && block.providerOptions) {
+		return { ...base, providerOptions: block.providerOptions } as AiContentPart;
+	}
+	return base;
+}
+
+/** Convert a single AI SDK content part to an n8n MessageContent block. */
+function fromAiContent(part: AiContentPart): MessageContent | undefined {
+	const providerOptions = 'providerOptions' in part ? part.providerOptions : undefined;
+
+	let base: MessageContent | undefined;
+	switch (part.type) {
+		case 'text':
+			base = { type: 'text', text: part.text };
+			break;
+		case 'file': {
+			const data =
+				part.data instanceof URL ? part.data.toString() : (part.data as ContentFile['data']);
+			base = { type: 'file', data, mediaType: part.mediaType };
+			break;
+		}
+		case 'image': {
+			const data =
+				part.image instanceof URL ? part.image.toString() : (part.image as ContentFile['data']);
+			base = { type: 'file', data, mediaType: part.mediaType };
+			break;
+		}
+		case 'reasoning':
+			base = { type: 'reasoning', text: part.text };
+			break;
+		case 'tool-call':
+			base = {
+				type: 'tool-call',
+				toolCallId: part.toolCallId,
+				toolName: part.toolName,
+				input: part.input as JSONValue,
+				providerExecuted: part.providerExecuted,
+			};
+			break;
+		case 'tool-result': {
+			const { output } = part;
+			let result: JSONValue;
+			let isError: boolean | undefined;
+			if (output.type === 'json') {
+				result = output.value;
+			} else if (output.type === 'text') {
+				result = output.value;
+			} else if (output.type === 'error-json') {
+				result = output.value;
+				isError = true;
+			} else if (output.type === 'error-text') {
+				result = output.value;
+				isError = true;
+			} else {
+				result = null;
+				isError = true;
+			}
+			base = {
+				type: 'tool-result',
+				toolCallId: part.toolCallId,
+				toolName: part.toolName,
+				result,
+				isError,
+			};
+			break;
+		}
+		// Ignore these types, because HITL is handled by our runtime
+		case 'tool-approval-request':
+		case 'tool-approval-response':
+		default:
+			return undefined;
+	}
+
+	if (base && providerOptions) {
+		return { ...base, providerOptions };
+	}
+	return base;
+}
+
+/** Convert a single n8n Message to an AI SDK ModelMessage. */
+export function toAiMessage(msg: Message): ModelMessage {
+	let base: ModelMessage;
+	switch (msg.role) {
+		case 'system': {
+			const text = msg.content
+				.filter(isText)
+				.map((b) => b.text)
+				.join('');
+			base = { role: 'system', content: text };
+			break;
+		}
+
+		case 'user': {
+			const parts = msg.content
+				.map(toAiContent)
+				.filter((p): p is TextPart | FilePart => p?.type === 'text' || p?.type === 'file');
+			base = { role: 'user', content: parts };
+			break;
+		}
+
+		case 'assistant': {
+			const parts = msg.content
+				.map(toAiContent)
+				.filter(
+					(p): p is TextPart | ReasoningPart | ToolCallPart | ToolResultPart | FilePart =>
+						p?.type === 'text' ||
+						p?.type === 'reasoning' ||
+						p?.type === 'tool-call' ||
+						p?.type === 'tool-result' ||
+						p?.type === 'file',
+				);
+			base = { role: 'assistant', content: parts };
+			break;
+		}
+
+		case 'tool': {
+			const parts = msg.content
+				.map(toAiContent)
+				.filter((p): p is ToolResultPart => p?.type === 'tool-result');
+			base = { role: 'tool', content: parts };
+			break;
+		}
+
+		default:
+			throw new Error(`Unknown role: ${msg.role as string}`);
+	}
+
+	if (msg.providerOptions) {
+		return { ...base, providerOptions: msg.providerOptions };
+	}
+	return base;
+}
+
+/** Convert n8n Messages to AI SDK ModelMessages for passing to stream/generateText. */
+export function toAiMessages(messages: Message[]): ModelMessage[] {
+	return messages.map(toAiMessage);
+}
+
+/** Convert a single AI SDK ModelMessage to an n8n AgentDbMessage (with a generated id). */
+export function fromAiMessage(msg: ModelMessage): AgentDbMessage {
+	const rawContent = msg.content;
+	const content: MessageContent[] =
+		typeof rawContent === 'string'
+			? [{ type: 'text', text: rawContent }]
+			: rawContent.map(fromAiContent).filter((p): p is MessageContent => p !== undefined);
+	const message: AgentMessage = { role: msg.role, content };
+	if ('providerOptions' in msg && msg.providerOptions) {
+		message.providerOptions = msg.providerOptions;
+	}
+	return toDbMessage(message);
+}
+
+/** Convert AI SDK ModelMessages to n8n AgentDbMessages (each with a generated id). */
+export function fromAiMessages(messages: ModelMessage[]): AgentDbMessage[] {
+	return messages.map(fromAiMessage);
+}
+
+export function fromAiFinishReason(reason: AiFinishReason): FinishReason {
+	switch (reason) {
+		case 'stop':
+			return 'stop';
+		case 'length':
+			return 'length';
+		case 'content-filter':
+			return 'content-filter';
+		case 'tool-calls':
+			return 'tool-calls';
+		case 'error':
+			return 'error';
+		case 'other':
+			return 'other';
+	}
+}
--- a/packages/@n8n/agents/src/runtime/model-factory.ts
+++ b/packages/@n8n/agents/src/runtime/model-factory.ts
@ -0,0 +1,116 @@
+/* eslint-disable @typescript-eslint/no-require-imports */
+import type { EmbeddingModel, LanguageModel } from 'ai';
+
+import type { ModelConfig } from '../types/sdk/agent';
+
+type CreateProviderFn = (opts?: {
+	apiKey?: string;
+	baseURL?: string;
+}) => (model: string) => LanguageModel;
+type CreateEmbeddingProviderFn = (opts?: { apiKey?: string }) => {
+	embeddingModel(model: string): EmbeddingModel;
+};
+
+function isLanguageModel(config: unknown): config is LanguageModel {
+	return typeof config === 'object' && config !== null && 'doGenerate' in config;
+}
+
+/**
+ * Provider packages are loaded dynamically via require() so only the
+ * provider needed at runtime must be installed.
+ */
+export function createModel(config: ModelConfig): LanguageModel {
+	if (isLanguageModel(config)) {
+		return config;
+	}
+
+	const stripEmpty = <T>(value: T | undefined): T | undefined => {
+		if (!value) return undefined;
+		if (typeof value === 'string' && value.trim() === '') return undefined;
+		return value;
+	};
+
+	const modelId = stripEmpty(typeof config === 'string' ? config : config.id);
+	const apiKey = stripEmpty(typeof config === 'string' ? undefined : config.apiKey);
+	const baseURL = stripEmpty(typeof config === 'string' ? undefined : config.url);
+
+	if (!modelId) {
+		throw new Error('Model ID is required');
+	}
+
+	const [provider, ...rest] = modelId.split('/');
+	const modelName = rest.join('/');
+
+	switch (provider) {
+		case 'anthropic': {
+			const { createAnthropic } = require('@ai-sdk/anthropic') as {
+				createAnthropic: CreateProviderFn;
+			};
+			return createAnthropic({ apiKey, baseURL })(modelName);
+		}
+		case 'openai': {
+			const { createOpenAI } = require('@ai-sdk/openai') as {
+				createOpenAI: CreateProviderFn;
+			};
+			return createOpenAI({ apiKey, baseURL })(modelName);
+		}
+		case 'google': {
+			const { createGoogleGenerativeAI } = require('@ai-sdk/google') as {
+				createGoogleGenerativeAI: CreateProviderFn;
+			};
+			return createGoogleGenerativeAI({ apiKey, baseURL })(modelName);
+		}
+		case 'xai': {
+			const { createXai } = require('@ai-sdk/xai') as {
+				createXai: CreateProviderFn;
+			};
+			return createXai({ apiKey, baseURL })(modelName);
+		}
+		default:
+			throw new Error(
+				`Unsupported provider: "${provider}". Supported: anthropic, openai, google, xai`,
+			);
+	}
+}
+
+/**
+ * Registry of embedding provider packages and their factory function names.
+ * Each AI SDK provider follows the same pattern:
+ *   createProvider({ apiKey }).embeddingModel(modelName)
+ *
+ * To add a new provider, install its @ai-sdk/* package and add an entry here.
+ */
+const EMBEDDING_PROVIDERS = {
+	openai: { pkg: '@ai-sdk/openai', factory: 'createOpenAI' },
+	google: { pkg: '@ai-sdk/google', factory: 'createGoogleGenerativeAI' },
+	mistral: { pkg: '@ai-sdk/mistral', factory: 'createMistral' },
+	cohere: { pkg: '@ai-sdk/cohere', factory: 'createCohere' },
+	amazon: { pkg: '@ai-sdk/amazon-bedrock', factory: 'createAmazonBedrock' },
+	bedrock: { pkg: '@ai-sdk/amazon-bedrock', factory: 'createAmazonBedrock' },
+} as const;
+
+type EmbeddingProvider = keyof typeof EMBEDDING_PROVIDERS;
+type EmbeddingModelId = `${EmbeddingProvider}/${string}`;
+
+/**
+ * Create an embedding model from a "provider/model" string (e.g. "openai/text-embedding-3-small").
+ * Supports any AI SDK provider that exposes `.embeddingModel()`.
+ * The provider package must be installed at runtime.
+ */
+export function createEmbeddingModel(
+	embedderString: EmbeddingModelId | (string & {}),
+	apiKey?: string,
+): EmbeddingModel {
+	const [provider, ...rest] = embedderString.split('/');
+	const modelName = rest.join('/');
+
+	const entry = EMBEDDING_PROVIDERS[provider as EmbeddingProvider];
+	if (!entry) {
+		const supported = Object.keys(EMBEDDING_PROVIDERS).join(', ');
+		throw new Error(`Unsupported embedding provider: "${provider}". Supported: ${supported}`);
+	}
+
+	const mod = require(entry.pkg) as Record<string, CreateEmbeddingProviderFn>;
+	const factory = mod[entry.factory];
+	return factory({ apiKey }).embeddingModel(modelName);
+}
--- a/packages/@n8n/agents/src/runtime/run-state.ts
+++ b/packages/@n8n/agents/src/runtime/run-state.ts
@ -0,0 +1,68 @@
+import type { CheckpointStore, SerializableAgentState } from '../types';
+
+/**
+ * Default in-memory CheckpointStore implementation.
+ * Used when no external store is configured (storage: 'memory' or omitted).
+ *
+ * Note: Suspended runs that are never resumed accumulate indefinitely.
+ * For long-running processes a TTL-based eviction mechanism should be added
+ * to prevent unbounded memory growth.
+ */
+class MemoryCheckpointStore implements CheckpointStore {
+	private store = new Map<string, SerializableAgentState>();
+
+	async save(key: string, state: SerializableAgentState): Promise<void> {
+		await Promise.resolve(this.store.set(key, state));
+	}
+
+	async load(key: string): Promise<SerializableAgentState | undefined> {
+		return await Promise.resolve(this.store.get(key));
+	}
+
+	async delete(key: string): Promise<void> {
+		await Promise.resolve(this.store.delete(key));
+	}
+}
+
+/**
+ * Manages suspended agent run state for tool approval (HITL).
+ * Delegates all persistence to a CheckpointStore — either the provided
+ * external store or the default MemoryCheckpointStore.
+ */
+export class RunStateManager {
+	private store: CheckpointStore;
+
+	constructor(storage?: 'memory' | CheckpointStore) {
+		this.store = storage && storage !== 'memory' ? storage : new MemoryCheckpointStore();
+	}
+
+	/** Save a suspended run state. */
+	async suspend(runId: string, state: SerializableAgentState): Promise<void> {
+		await this.store.save(runId, { ...state, status: 'suspended' });
+	}
+
+	/** Load a suspended run state for resumption and mark it running. Status is not updated in the store. */
+	async resume(runId: string): Promise<SerializableAgentState | undefined> {
+		const state = await this.store.load(runId);
+		if (!state) return undefined;
+		if (state.status !== 'suspended') {
+			throw new Error(`Run ${runId} is not suspended. Cannot resume.`);
+		}
+		const newState: SerializableAgentState = { ...state, status: 'running' };
+		return newState;
+	}
+
+	/** Delete a finished run from storage. Called when a resumed run completes without re-suspending. */
+	async complete(runId: string): Promise<void> {
+		try {
+			await this.store.delete(runId);
+		} catch (deleteError: unknown) {
+			console.error(`[RunStateManager] Failed to delete checkpoint ${runId}:`, deleteError);
+		}
+	}
+}
+
+/** Generate a unique run ID. */
+export function generateRunId(): string {
+	return `run_${crypto.randomUUID()}`;
+}
--- a/packages/@n8n/agents/src/runtime/runtime-helpers.ts
+++ b/packages/@n8n/agents/src/runtime/runtime-helpers.ts
@ -0,0 +1,145 @@
+/**
+ * Pure utility functions used by AgentRuntime that require no class context.
+ * These are extracted here to keep agent-runtime.ts focused on orchestration logic.
+ */
+import { toDbMessage } from '../sdk/message';
+import type { GenerateResult, StreamChunk, TokenUsage } from '../types';
+import { toTokenUsage } from './stream';
+import type { AgentDbMessage, AgentMessage, ContentToolResult } from '../types/sdk/message';
+import type { JSONValue } from '../types/utils/json';
+
+/** Normalize a string input to an AgentDbMessage array, assigning ids where missing. */
+export function normalizeInput(input: AgentMessage[] | string): AgentDbMessage[] {
+	if (typeof input === 'string') {
+		return [toDbMessage({ role: 'user', content: [{ type: 'text', text: input }] })];
+	}
+	return input.map(toDbMessage);
+}
+
+/** Build an AI SDK tool ModelMessage for a tool execution result. */
+export function makeToolResultMessage(
+	toolCallId: string,
+	toolName: string,
+	result: unknown,
+): AgentDbMessage {
+	return toDbMessage({
+		role: 'tool',
+		content: [
+			{
+				type: 'tool-result',
+				toolCallId,
+				toolName,
+				result: result as JSONValue,
+			},
+		],
+	});
+}
+
+/**
+ * Build an AI SDK tool ModelMessage for a tool execution error.
+ * The LLM receives this as a tool result so it can self-correct on the next iteration.
+ * The error is surfaced via the output json value so the LLM can read and reason about it.
+ */
+export function makeErrorToolResultMessage(
+	toolCallId: string,
+	toolName: string,
+	error: unknown,
+): AgentDbMessage {
+	const message = error instanceof Error ? `${error.name}: ${error.message}` : String(error);
+	return toDbMessage({
+		role: 'tool',
+		content: [
+			{
+				type: 'tool-result',
+				toolCallId,
+				toolName,
+				result: { error: message } as JSONValue,
+				isError: true,
+			},
+		],
+	});
+}
+
+/** Extract all tool-result content parts from a flat list of agent messages. */
+export function extractToolResults(messages: AgentDbMessage[]): ContentToolResult[] {
+	return messages
+		.flatMap((m) => ('content' in m ? m.content : []))
+		.filter((c): c is ContentToolResult => c.type === 'tool-result');
+}
+
+/**
+ * Return a ReadableStream that immediately yields an error chunk followed by
+ * a finish chunk. Used when setup errors prevent the normal stream loop from
+ * starting, so callers always receive a well-formed stream.
+ */
+export function makeErrorStream(error: unknown): ReadableStream<StreamChunk> {
+	const { readable, writable } = new TransformStream<StreamChunk, StreamChunk>();
+	const writer = writable.getWriter();
+	writer.write({ type: 'error', error }).catch(() => {});
+	writer.write({ type: 'finish', finishReason: 'error' }).catch(() => {});
+	writer.close().catch(() => {});
+	return readable;
+}
+
+/** Accumulate token usage across two values, returning undefined if both are absent. */
+export function mergeUsage(
+	current: TokenUsage | undefined,
+	next: TokenUsage | undefined,
+): TokenUsage | undefined {
+	if (!next) return current;
+	if (!current) return next;
+	const merged: TokenUsage = {
+		promptTokens: current.promptTokens + next.promptTokens,
+		completionTokens: current.completionTokens + next.completionTokens,
+		totalTokens: current.totalTokens + next.totalTokens,
+	};
+
+	const cacheRead =
+		(current.inputTokenDetails?.cacheRead ?? 0) + (next.inputTokenDetails?.cacheRead ?? 0);
+	const cacheWrite =
+		(current.inputTokenDetails?.cacheWrite ?? 0) + (next.inputTokenDetails?.cacheWrite ?? 0);
+	if (cacheRead > 0 || cacheWrite > 0) {
+		merged.inputTokenDetails = {
+			...(cacheRead > 0 && { cacheRead }),
+			...(cacheWrite > 0 && { cacheWrite }),
+		};
+	}
+
+	const reasoning =
+		(current.outputTokenDetails?.reasoning ?? 0) + (next.outputTokenDetails?.reasoning ?? 0);
+	if (reasoning > 0) {
+		merged.outputTokenDetails = { reasoning };
+	}
+
+	return merged;
+}
+
+/**
+ * Accumulate token usage across loop iterations.
+ * Wraps mergeUsage + toTokenUsage to keep call sites concise.
+ */
+export function accumulateUsage(
+	current: TokenUsage | undefined,
+	raw:
+		| {
+				inputTokens?: number | undefined;
+				outputTokens?: number | undefined;
+				totalTokens?: number | undefined;
+				inputTokenDetails?: { cacheReadTokens?: number; cacheWriteTokens?: number };
+				outputTokenDetails?: { reasoningTokens?: number };
+		  }
+		| undefined,
+): TokenUsage | undefined {
+	if (!raw) return current;
+	return mergeUsage(current, toTokenUsage(raw));
+}
+
+/** Compute totalCost from sub-agent usage already present on the result. */
+export function applySubAgentUsage(result: GenerateResult): GenerateResult {
+	if (!result.subAgentUsage || result.subAgentUsage.length === 0) return result;
+
+	const parentCost = result.usage?.cost ?? 0;
+	const subCost = result.subAgentUsage.reduce((sum, s) => sum + (s.usage.cost ?? 0), 0);
+
+	return { ...result, totalCost: parentCost + subCost };
+}
--- a/packages/@n8n/agents/src/runtime/stream.ts
+++ b/packages/@n8n/agents/src/runtime/stream.ts
@ -0,0 +1,120 @@
+import type { TextStreamPart, ToolSet } from 'ai';
+
+import type { FinishReason, StreamChunk, TokenUsage } from '../types';
+import type { JSONValue } from '../types/utils/json';
+
+/** Map AI SDK v6 LanguageModelUsage to our TokenUsage type. */
+export function toTokenUsage(
+	usage:
+		| {
+				inputTokens?: number;
+				outputTokens?: number;
+				totalTokens?: number;
+				inputTokenDetails?: { cacheReadTokens?: number; cacheWriteTokens?: number };
+				outputTokenDetails?: { reasoningTokens?: number };
+		  }
+		| undefined,
+): TokenUsage | undefined {
+	if (!usage) return undefined;
+
+	const result: TokenUsage = {
+		promptTokens: usage.inputTokens ?? 0,
+		completionTokens: usage.outputTokens ?? 0,
+		totalTokens: usage.totalTokens ?? (usage.inputTokens ?? 0) + (usage.outputTokens ?? 0),
+	};
+
+	const cacheRead = usage.inputTokenDetails?.cacheReadTokens;
+	const cacheWrite = usage.inputTokenDetails?.cacheWriteTokens;
+	if (cacheRead || cacheWrite) {
+		result.inputTokenDetails = {
+			...(cacheRead && { cacheRead }),
+			...(cacheWrite && { cacheWrite }),
+		};
+	}
+
+	if (usage.outputTokenDetails?.reasoningTokens !== undefined) {
+		result.outputTokenDetails = { reasoning: usage.outputTokenDetails.reasoningTokens };
+	}
+
+	return result;
+}
+
+/** Convert a single AI SDK v6 fullStream chunk to an n8n StreamChunk (or undefined to skip). */
+export function convertChunk(c: TextStreamPart<ToolSet>): StreamChunk | undefined {
+	switch (c.type) {
+		case 'text-delta':
+			return { type: 'text-delta', delta: c.text ?? '' };
+
+		case 'reasoning-delta':
+			return { type: 'reasoning-delta', delta: c.text ?? '' };
+
+		case 'tool-call':
+			return {
+				type: 'message',
+				message: {
+					role: 'tool',
+					content: [
+						{
+							type: 'tool-call',
+							toolCallId: c.toolCallId,
+							toolName: c.toolName ?? '',
+							input: c.input as JSONValue,
+						},
+					],
+				},
+			};
+
+		case 'tool-input-start':
+			return {
+				type: 'tool-call-delta',
+				name: c.toolName,
+			};
+
+		case 'tool-input-delta':
+			return {
+				type: 'tool-call-delta',
+				...(c.delta !== undefined && { argumentsDelta: c.delta }),
+			};
+
+		case 'tool-result':
+			return {
+				type: 'message',
+				message: {
+					role: 'tool',
+					content: [
+						{
+							type: 'tool-result',
+							toolCallId: c.toolCallId ?? '',
+							toolName: c.toolName ?? '',
+							// eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
+							result: c.output && 'value' in c.output ? (c.output.value as JSONValue) : null,
+						},
+					],
+				},
+			};
+
+		case 'error':
+			return { type: 'error', error: c.error };
+
+		case 'finish-step': {
+			const usage = toTokenUsage(c.usage);
+			return {
+				type: 'finish',
+				finishReason: (c.finishReason ?? 'stop') as FinishReason,
+				...(usage && { usage }),
+			};
+		}
+
+		case 'finish': {
+			const usage = toTokenUsage(c.totalUsage);
+			return {
+				type: 'finish',
+				finishReason: (c.finishReason ?? 'stop') as FinishReason,
+				...(usage && { usage }),
+			};
+		}
+
+		default:
+			return undefined;
+	}
+}
--- a/packages/@n8n/agents/src/runtime/strip-orphaned-tool-messages.ts
+++ b/packages/@n8n/agents/src/runtime/strip-orphaned-tool-messages.ts
@ -0,0 +1,70 @@
+import { isLlmMessage } from '../sdk/message';
+import type { AgentDbMessage, MessageContent } from '../types/sdk/message';
+
+/**
+ * Strip orphaned tool-call and tool-result content from a message list.
+ *
+ * When memory loads the last N messages, the window boundary can split
+ * tool-call / tool-result pairs, leaving one side without its counterpart.
+ * Sending these orphans to the LLM causes provider errors because tool
+ * calls and results must always be paired.
+ *
+ * This function:
+ *  1. Collects all toolCallIds present in tool-call and tool-result blocks.
+ *  2. Identifies orphans — calls without a matching result and vice-versa.
+ *  3. Strips orphaned content blocks from their messages.
+ *  4. Drops messages that become empty after stripping (e.g. a tool message
+ *     whose only content was the orphaned result).
+ *  5. Preserves non-tool content (text, reasoning, files) in mixed messages.
+ */
+export function stripOrphanedToolMessages(messages: AgentDbMessage[]): AgentDbMessage[] {
+	const callIds = new Set<string>();
+	const resultIds = new Set<string>();
+
+	for (const msg of messages) {
+		if (!isLlmMessage(msg)) continue;
+		for (const block of msg.content) {
+			if (block.type === 'tool-call' && block.toolCallId) {
+				callIds.add(block.toolCallId);
+			} else if (block.type === 'tool-result' && block.toolCallId) {
+				resultIds.add(block.toolCallId);
+			}
+		}
+	}
+
+	const orphanedCallIds = new Set([...callIds].filter((id) => !resultIds.has(id)));
+	const orphanedResultIds = new Set([...resultIds].filter((id) => !callIds.has(id)));
+
+	if (orphanedCallIds.size === 0 && orphanedResultIds.size === 0) {
+		return messages;
+	}
+
+	const result: AgentDbMessage[] = [];
+
+	for (const msg of messages) {
+		if (!isLlmMessage(msg)) {
+			result.push(msg);
+			continue;
+		}
+
+		const filtered = msg.content.filter((block: MessageContent) => {
+			if (block.type === 'tool-call' && block.toolCallId && orphanedCallIds.has(block.toolCallId)) {
+				return false;
+			}
+			if (
+				block.type === 'tool-result' &&
+				block.toolCallId &&
+				orphanedResultIds.has(block.toolCallId)
+			) {
+				return false;
+			}
+			return true;
+		});
+
+		if (filtered.length === 0) continue;
+
+		result.push({ ...msg, content: filtered });
+	}
+
+	return result;
+}
--- a/packages/@n8n/agents/src/runtime/title-generation.ts
+++ b/packages/@n8n/agents/src/runtime/title-generation.ts
@ -0,0 +1,77 @@
+import { generateText } from 'ai';
+
+import type { BuiltMemory, TitleGenerationConfig } from '../types';
+import { createFilteredLogger } from './logger';
+import { createModel } from './model-factory';
+import type { ModelConfig } from '../types/sdk/agent';
+import type { AgentDbMessage } from '../types/sdk/message';
+
+const logger = createFilteredLogger();
+
+const DEFAULT_TITLE_INSTRUCTIONS = [
+	'- you will generate a short title based on the first message a user begins a conversation with',
+	'- ensure it is not more than 80 characters long',
+	"- the title should be a summary of the user's message",
+	'- do not use quotes or colons',
+	'- the entire text you return will be used as the title',
+].join('\n');
+
+/**
+ * Generate a title for a thread if it doesn't already have one.
+ *
+ * Designed to run fire-and-forget after the agent response is complete.
+ * All errors are caught and logged — title generation failures never
+ * block or break the agent response.
+ */
+export async function generateThreadTitle(opts: {
+	memory: BuiltMemory;
+	threadId: string;
+	resourceId: string;
+	titleConfig: TitleGenerationConfig;
+	/** The agent's own model, used as fallback when titleConfig.model is not set. */
+	agentModel: ModelConfig;
+	/** Messages from the current turn, used to find the first user message. */
+	turnDelta: AgentDbMessage[];
+}): Promise<void> {
+	try {
+		const thread = await opts.memory.getThread(opts.threadId);
+		if (thread?.title) return;
+
+		const userMessage = opts.turnDelta.find((m) => 'role' in m && m.role === 'user');
+		if (!userMessage || !('content' in userMessage)) return;
+
+		const userText = (userMessage.content as Array<{ type: string; text?: string }>)
+			.filter((c) => c.type === 'text' && c.text)
+			.map((c) => c.text!)
+			.join(' ');
+		if (!userText) return;
+
+		const titleModelId = opts.titleConfig.model ?? opts.agentModel;
+		const titleModel = createModel(titleModelId);
+		const instructions = opts.titleConfig.instructions ?? DEFAULT_TITLE_INSTRUCTIONS;
+
+		const result = await generateText({
+			model: titleModel,
+			messages: [
+				{ role: 'system', content: instructions },
+				{ role: 'user', content: userText },
+			],
+		});
+
+		let title = result.text?.trim();
+		if (!title) return;
+
+		// Strip <think>...</think> blocks (e.g. from DeepSeek R1)
+		title = title.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
+		if (!title) return;
+
+		await opts.memory.saveThread({
+			id: opts.threadId,
+			resourceId: opts.resourceId,
+			title,
+			metadata: thread?.metadata,
+		});
+	} catch (error) {
+		logger.warn('Failed to generate thread title', { error });
+	}
+}
--- a/packages/@n8n/agents/src/runtime/tool-adapter.ts
+++ b/packages/@n8n/agents/src/runtime/tool-adapter.ts
@ -0,0 +1,182 @@
+import { tool, jsonSchema, type Tool as AiSdkTool } from 'ai';
+import type { JSONSchema7 } from 'json-schema';
+import { z } from 'zod';
+
+import {
+	type BuiltProviderTool,
+	type BuiltTool,
+	type BuiltTelemetry,
+	type InterruptibleToolContext,
+	type ToolContext,
+} from '../types';
+import type { SubAgentUsage } from '../types/sdk/agent';
+import { isZodSchema } from '../utils/zod';
+
+type AiSdkProviderTool = AiSdkTool & {
+	type: 'provider';
+};
+/**
+ * Branded symbol used to tag the return value of `ctx.suspend(payload)`.
+ * The agent runtime checks for this brand on the tool's return value
+ * instead of catching a thrown error.
+ */
+const SUSPEND_BRAND = Symbol('SuspendBrand');
+
+/**
+ * Branded symbol used to tag tool results from agent-as-tool calls.
+ * Carries sub-agent usage so the parent runtime can aggregate costs
+ * without any external state (WeakMap, mutable tool fields, etc.).
+ */
+const AGENT_TOOL_BRAND = Symbol('AgentToolBrand');
+
+export interface SuspendedToolResult {
+	readonly [SUSPEND_BRAND]: true;
+	payload: unknown;
+}
+
+/** Type guard: returns true when a tool's return value is a suspend signal. */
+export function isSuspendedToolResult(value: unknown): value is SuspendedToolResult {
+	return typeof value === 'object' && value !== null && SUSPEND_BRAND in value;
+}
+
+export interface AgentToolResult {
+	readonly [AGENT_TOOL_BRAND]: true;
+	/** The actual tool output (passed back to the LLM). */
+	readonly output: unknown;
+	/** Sub-agent usage entries to aggregate into the parent's result. */
+	readonly subAgentUsage: SubAgentUsage[];
+}
+
+/** Type guard: returns true when a tool result carries sub-agent usage. */
+export function isAgentToolResult(value: unknown): value is AgentToolResult {
+	return typeof value === 'object' && value !== null && AGENT_TOOL_BRAND in value;
+}
+
+/**
+ * Create a branded agent-tool result that carries sub-agent usage alongside the output.
+ * The output properties are spread onto the object so it remains a valid tool output
+ * even when accessed directly (e.g. in tests). The runtime detects the brand via
+ * isAgentToolResult() and extracts the sub-agent usage.
+ * Typed as `never` so `return createAgentToolResult(...)` satisfies any handler return type
+ * (same pattern as ctx.suspend).
+ */
+export function createAgentToolResult(output: unknown, subAgentUsage: SubAgentUsage[]): never {
+	const base = typeof output === 'object' && output !== null ? output : {};
+	return { ...base, [AGENT_TOOL_BRAND]: true, output, subAgentUsage } as never;
+}
+
+/**
+ * Convert an array of BuiltProviderTools into a Record of AI SDK provider-defined tool objects.
+ * Provider tools are executed on the provider's infrastructure (e.g. Anthropic web search,
+ * OpenAI code interpreter) — they are never executed locally by the agent loop.
+ *
+ * The cast to AiSdkTool is required because the AI SDK's ToolSet type demands `inputSchema`
+ * on every entry, but provider-defined tools have no input schema (the provider handles it).
+ * At runtime the AI SDK correctly recognises the `type: 'provider'` discriminant.
+ */
+export function toAiSdkProviderTools(tools?: BuiltProviderTool[]): Record<string, AiSdkTool> {
+	if (!tools || tools.length === 0) return {};
+
+	const result: Record<string, AiSdkTool> = {};
+	for (const t of tools) {
+		const providerTool: AiSdkProviderTool = {
+			type: 'provider',
+			id: t.name,
+			args: t.args,
+			inputSchema: t.inputSchema ?? z.any(),
+		};
+		result[t.name] = providerTool;
+	}
+	return result;
+}
+
+const fixSchema = (schema: JSONSchema7): JSONSchema7 => {
+	// Ensure 'type: object' is present when properties are present (required by some providers):
+	if (
+		typeof schema === 'object' &&
+		schema !== null &&
+		'properties' in schema &&
+		!('type' in schema)
+	) {
+		return { ...schema, type: 'object' as const };
+	}
+	return schema;
+};
+
+/**
+ * Convert an array of BuiltTools into a Record of AI SDK tool definitions.
+ * Tools are created WITHOUT execute — the agent loop handles execution manually.
+ * Supports both Zod schemas (SDK-defined tools) and raw JSON Schema (MCP tools).
+ */
+export function toAiSdkTools(tools?: BuiltTool[]): Record<string, AiSdkTool> {
+	if (!tools || tools.length === 0) return {};
+
+	const result: Record<string, AiSdkTool> = {};
+	for (const t of tools) {
+		if (t.inputSchema) {
+			if (isZodSchema(t.inputSchema)) {
+				result[t.name] = tool({
+					description: t.description,
+					inputSchema: t.inputSchema,
+					providerOptions: t.providerOptions,
+				});
+			} else {
+				result[t.name] = tool({
+					description: t.description,
+					inputSchema: jsonSchema(fixSchema(t.inputSchema)),
+					providerOptions: t.providerOptions,
+				});
+			}
+		}
+	}
+	return result;
+}
+
+/**
+ * Execute a tool call by finding its handler and running it.
+ * For tools with suspend/resume schemas, passes an InterruptibleToolContext
+ * that lets the handler call `suspend(payload)`.
+ */
+export async function executeTool(
+	args: unknown,
+	builtTool: BuiltTool,
+	resumeData?: unknown,
+	parentTelemetry?: BuiltTelemetry,
+): Promise<unknown> {
+	if (!builtTool.handler) {
+		throw new Error(`No handler found for tool "${builtTool.name}"`);
+	}
+
+	if (builtTool.suspendSchema) {
+		const ctx: InterruptibleToolContext = {
+			suspend: async (payload: unknown): Promise<never> => {
+				return await Promise.resolve({ [SUSPEND_BRAND]: true, payload } as never);
+			},
+			resumeData,
+			parentTelemetry,
+		};
+		return await builtTool.handler(args, ctx);
+	}
+
+	const ctx: ToolContext = { parentTelemetry };
+	return await builtTool.handler(args, ctx);
+}
+
+/**
+ * Check if a tool has suspend/resume schemas (i.e. is interruptible).
+ */
+export function isInterruptible(toolName: string, toolMap: Map<string, BuiltTool>): boolean {
+	const builtTool = toolMap.get(toolName);
+	return !!builtTool?.suspendSchema;
+}
+
+/** Build a Map from tool name to BuiltTool for quick lookups. */
+export function buildToolMap(tools?: BuiltTool[]): Map<string, BuiltTool> {
+	const map = new Map<string, BuiltTool>();
+	if (tools) {
+		for (const t of tools) {
+			map.set(t.name, t);
+		}
+	}
+	return map;
+}
--- a/packages/@n8n/agents/src/runtime/working-memory.ts
+++ b/packages/@n8n/agents/src/runtime/working-memory.ts
@ -0,0 +1,183 @@
+import type { z } from 'zod';
+
+import type { StreamChunk } from '../types';
+import { createFilteredLogger } from './logger';
+
+const logger = createFilteredLogger();
+
+type ZodObjectSchema = z.ZodObject<z.ZodRawShape>;
+
+const OPEN_TAG = '<working_memory>';
+const CLOSE_TAG = '</working_memory>';
+
+/**
+ * Extract working memory content from an LLM response.
+ * Returns the clean text (tags stripped) and the extracted working memory (or null).
+ */
+export function parseWorkingMemory(text: string): {
+	cleanText: string;
+	workingMemory: string | null;
+} {
+	const openIdx = text.indexOf(OPEN_TAG);
+	if (openIdx === -1) return { cleanText: text, workingMemory: null };
+
+	const closeIdx = text.indexOf(CLOSE_TAG, openIdx);
+	if (closeIdx === -1) return { cleanText: text, workingMemory: null };
+
+	const contentStart = openIdx + OPEN_TAG.length;
+	const rawContent = text.slice(contentStart, closeIdx);
+	const workingMemory = rawContent.replace(/^\n/, '').replace(/\n$/, '');
+
+	const before = text.slice(0, openIdx).replace(/\n$/, '');
+	const after = text.slice(closeIdx + CLOSE_TAG.length).replace(/^\n/, '');
+	const cleanText = (before + (after ? '\n' + after : '')).trim();
+
+	return { cleanText, workingMemory };
+}
+
+/**
+ * Generate the system prompt instruction for working memory.
+ */
+export function buildWorkingMemoryInstruction(template: string, structured: boolean): string {
+	const format = structured
+		? 'Emit the updated state as valid JSON matching the schema'
+		: 'Update the template with any new information learned';
+
+	return [
+		'',
+		'## Working Memory',
+		'',
+		'You have persistent working memory that survives across conversations.',
+		'The current state will be shown to you in a system message.',
+		'IMPORTANT: Always respond to the user first with your normal reply.',
+		`Then, at the very end of your response, emit your updated working memory inside ${OPEN_TAG}...${CLOSE_TAG} tags on a new line.`,
+		`${format}. If nothing changed, emit the current state unchanged.`,
+		'The working memory block must be the last thing in your response, after your reply to the user.',
+		'',
+		'Current template:',
+		'```',
+		template,
+		'```',
+	].join('\n');
+}
+
+/**
+ * Convert a Zod object schema to a JSON template string for structured working memory.
+ */
+export function templateFromSchema(schema: ZodObjectSchema): string {
+	const obj: Record<string, string> = {};
+	for (const [key, field] of Object.entries(schema.shape)) {
+		const desc = field.description;
+		obj[key] = desc ?? '';
+	}
+	return JSON.stringify(obj, null, 2);
+}
+
+type PersistFn = (content: string) => Promise<void>;
+
+/**
+ * Wraps a stream writer to intercept <working_memory> tags from text-delta chunks.
+ * All non-text-delta chunks pass through unchanged.
+ * Text inside the tags is buffered and persisted when the closing tag is detected.
+ */
+export class WorkingMemoryStreamFilter {
+	private writer: WritableStreamDefaultWriter<StreamChunk>;
+
+	private persist: PersistFn;
+
+	private state: 'normal' | 'inside' = 'normal';
+
+	private buffer = '';
+
+	private pendingText = '';
+
+	constructor(writer: WritableStreamDefaultWriter<StreamChunk>, persist: PersistFn) {
+		this.writer = writer;
+		this.persist = persist;
+	}
+
+	async write(chunk: StreamChunk): Promise<void> {
+		if (chunk.type !== 'text-delta') {
+			await this.writer.write(chunk);
+			return;
+		}
+
+		this.pendingText += chunk.delta;
+
+		while (this.pendingText.length > 0) {
+			if (this.state === 'normal') {
+				const openIdx = this.pendingText.indexOf(OPEN_TAG);
+				if (openIdx === -1) {
+					// No full open tag found. Check if the tail is a valid prefix of OPEN_TAG.
+					const lastLt = this.pendingText.lastIndexOf('<');
+					if (
+						lastLt !== -1 &&
+						this.pendingText.length - lastLt < OPEN_TAG.length &&
+						OPEN_TAG.startsWith(this.pendingText.slice(lastLt))
+					) {
+						// Potential partial tag at end — forward everything before it, hold the rest
+						if (lastLt > 0) {
+							await this.writer.write({
+								type: 'text-delta',
+								delta: this.pendingText.slice(0, lastLt),
+							});
+						}
+						this.pendingText = this.pendingText.slice(lastLt);
+					} else {
+						// No partial tag concern — forward everything
+						await this.writer.write({ type: 'text-delta', delta: this.pendingText });
+						this.pendingText = '';
+					}
+					break;
+				}
+				// Forward text before the tag
+				if (openIdx > 0) {
+					await this.writer.write({
+						type: 'text-delta',
+						delta: this.pendingText.slice(0, openIdx),
+					});
+				}
+				this.state = 'inside';
+				this.pendingText = this.pendingText.slice(openIdx + OPEN_TAG.length);
+				this.buffer = '';
+			} else {
+				// Inside tag — look for closing tag
+				const closeIdx = this.pendingText.indexOf(CLOSE_TAG);
+				if (closeIdx === -1) {
+					// Check if the tail is a valid prefix of CLOSE_TAG — hold it back
+					const lastLt = this.pendingText.lastIndexOf('<');
+					if (
+						lastLt !== -1 &&
+						this.pendingText.length - lastLt < CLOSE_TAG.length &&
+						CLOSE_TAG.startsWith(this.pendingText.slice(lastLt))
+					) {
+						this.buffer += this.pendingText.slice(0, lastLt);
+						this.pendingText = this.pendingText.slice(lastLt);
+					} else {
+						this.buffer += this.pendingText;
+						this.pendingText = '';
+					}
+					break;
+				}
+				this.buffer += this.pendingText.slice(0, closeIdx);
+				this.pendingText = this.pendingText.slice(closeIdx + CLOSE_TAG.length);
+				this.state = 'normal';
+				const content = this.buffer.replace(/^\n/, '').replace(/\n$/, '');
+				this.persist(content).catch((error: unknown) => {
+					logger.warn('Failed to persist working memory', { error });
+				});
+				this.buffer = '';
+			}
+		}
+	}
+
+	async flush(): Promise<void> {
+		if (this.state === 'normal' && this.pendingText.length > 0) {
+			await this.writer.write({ type: 'text-delta', delta: this.pendingText });
+		}
+		// Reset all state so the filter is clean for reuse after abort/completion.
+		this.pendingText = '';
+		this.buffer = '';
+		this.state = 'normal';
+	}
+}
--- a/packages/@n8n/agents/src/sdk/agent.ts
+++ b/packages/@n8n/agents/src/sdk/agent.ts
@ -0,0 +1,676 @@
+import type { ProviderOptions } from '@ai-sdk/provider-utils';
+import { z } from 'zod';
+
+import type { Eval } from './eval';
+import type { McpClient } from './mcp-client';
+import { Memory } from './memory';
+import { Telemetry } from './telemetry';
+import { Tool, wrapToolForApproval } from './tool';
+import { AgentRuntime } from '../runtime/agent-runtime';
+import { AgentEventBus } from '../runtime/event-bus';
+import { createAgentToolResult } from '../runtime/tool-adapter';
+import type {
+	AgentEvent,
+	AgentEventHandler,
+	AgentMiddleware,
+	BuiltAgent,
+	BuiltEval,
+	BuiltGuardrail,
+	BuiltMemory,
+	BuiltProviderTool,
+	BuiltTool,
+	BuiltTelemetry,
+	CheckpointStore,
+	ExecutionOptions,
+	GenerateResult,
+	MemoryConfig,
+	ModelConfig,
+	Provider,
+	RunOptions,
+	SerializableAgentState,
+	StreamResult,
+	SubAgentUsage,
+	ThinkingConfig,
+	ThinkingConfigFor,
+	ResumeOptions,
+} from '../types';
+import type { AgentMessage } from '../types/sdk/message';
+import type { Workspace } from '../workspace/workspace';
+
+const DEFAULT_LAST_MESSAGES = 10;
+
+type ToolParameter = BuiltTool | { build(): BuiltTool };
+
+/**
+ * Builder for creating AI agents with a fluent API.
+ *
+ * Usage:
+ * ```typescript
+ * const agent = new Agent('assistant')
+ *   .model('anthropic', 'claude-sonnet-4')   // typed: Agent<'anthropic'>
+ *   .credential('anthropic')
+ *   .instructions('You are a helpful assistant.')
+ *   .tool(searchTool);
+ *
+ * const result = await agent.generate('Hello!');
+ * ```
+ */
+
+export class Agent implements BuiltAgent {
+	readonly name: string;
+
+	private modelId?: string;
+
+	private modelConfigObj?: ModelConfig;
+
+	private instructionProviderOpts?: ProviderOptions;
+
+	private instructionsText?: string;
+
+	private tools: BuiltTool[] = [];
+
+	private providerTools: BuiltProviderTool[] = [];
+
+	private memoryConfig?: MemoryConfig;
+
+	// TODO: Guardrails are accepted by the builder API for forward
+	// compatibility but not yet wired to the runtime.
+	private inputGuardrails: BuiltGuardrail[] = [];
+
+	private outputGuardrails: BuiltGuardrail[] = [];
+
+	private agentEvals: BuiltEval[] = [];
+
+	private outputSchema?: z.ZodType;
+
+	private checkpointStore?: 'memory' | CheckpointStore;
+
+	private thinkingConfig?: ThinkingConfig;
+
+	private credentialName?: string;
+
+	private resolvedKey?: string;
+
+	private runtime?: AgentRuntime;
+
+	private concurrencyValue?: number;
+
+	private telemetryBuilder?: Telemetry;
+
+	private telemetryConfig?: BuiltTelemetry;
+
+	private middlewares: AgentMiddleware[] = [];
+
+	private requireToolApprovalValue = false;
+
+	private mcpClients: McpClient[] = [];
+
+	private buildPromise: Promise<AgentRuntime> | undefined;
+
+	private eventBus = new AgentEventBus();
+
+	private workspaceInstance?: Workspace;
+
+	constructor(name: string) {
+		this.name = name;
+	}
+
+	/**
+	 * Set the model with provider type information.
+	 *
+	 * @example
+	 * ```typescript
+	 * // Typed form — enables provider-specific config on .thinking() etc.
+	 * agent.model('anthropic', 'claude-sonnet-4-5')
+	 *
+	 * // Untyped form — backwards compatible
+	 * agent.model('anthropic/claude-sonnet-4-5')
+	 * ```
+	 */
+	model(providerOrIdOrConfig: string | ModelConfig, modelName?: string): this {
+		if (typeof providerOrIdOrConfig === 'string') {
+			this.modelId = modelName ? `${providerOrIdOrConfig}/${modelName}` : providerOrIdOrConfig;
+			this.modelConfigObj = undefined;
+		} else {
+			this.modelConfigObj = providerOrIdOrConfig;
+			this.modelId = undefined;
+		}
+		return this;
+	}
+
+	/** Set the system instructions for the agent. Required before building. */
+	instructions(text: string, options?: { providerOptions?: ProviderOptions }): this {
+		this.instructionsText = text;
+		this.instructionProviderOpts = options?.providerOptions;
+		return this;
+	}
+
+	/** Add a tool to the agent's capabilities. Accepts a built tool or a Tool builder (which will be built automatically). Can also accept an array of tools. */
+	tool(t: ToolParameter | ToolParameter[]): this {
+		if (Array.isArray(t)) {
+			for (const tool of t) {
+				this.tool(tool);
+			}
+			return this;
+		}
+		const built = 'build' in t ? t.build() : t;
+		this.tools.push(built);
+		return this;
+	}
+
+	/** Add a provider-defined tool (e.g. Anthropic web search, OpenAI code interpreter). */
+	providerTool(builtProviderTool: BuiltProviderTool): this {
+		this.providerTools.push(builtProviderTool);
+		return this;
+	}
+
+	/** Set the memory configuration. Accepts a MemoryConfig, Memory builder, or bare BuiltMemory. */
+	memory(m: MemoryConfig | Memory | BuiltMemory): this {
+		if (m instanceof Memory) {
+			// Memory builder — call build()
+			this.memoryConfig = m.build();
+		} else if ('memory' in m && 'lastMessages' in m) {
+			// MemoryConfig — use directly
+			this.memoryConfig = m;
+		} else {
+			// Bare BuiltMemory — wrap in minimal config
+			this.memoryConfig = { memory: m, lastMessages: DEFAULT_LAST_MESSAGES };
+		}
+		return this;
+	}
+
+	/** Add a middleware. */
+	middleware(m: AgentMiddleware): this {
+		this.middlewares.push(m);
+		return this;
+	}
+
+	// TODO: guardrails can be a middleware internally
+	/** Add an input guardrail. Accepts a built guardrail or a Guardrail builder. */
+	inputGuardrail(g: BuiltGuardrail | { build(): BuiltGuardrail }): this {
+		this.inputGuardrails.push('_config' in g ? g : g.build());
+		return this;
+	}
+
+	/** Add an output guardrail. Accepts a built guardrail or a Guardrail builder. */
+	outputGuardrail(g: BuiltGuardrail | { build(): BuiltGuardrail }): this {
+		this.outputGuardrails.push('_config' in g ? g : g.build());
+		return this;
+	}
+
+	/** Add an eval to run after each agent response. Accepts an Eval builder or BuiltEval. */
+	eval(e: Eval | BuiltEval | { ensureBuilt(): BuiltEval }): this {
+		const built = '_run' in e ? e : (e as Eval).ensureBuilt();
+		this.agentEvals.push(built);
+		return this;
+	}
+
+	/**
+	 * Set the checkpoint storage for tool suspend/resume (human-in-the-loop).
+	 * Required when any tool uses `.suspend()` / `.resume()`.
+	 *
+	 * - `'memory'` — in-process storage (lost on restart, fine for dev)
+	 * - A storage provider instance (e.g. `new LibSQLStore(...)`, `new PgStore(...)`)
+	 *
+	 * @example
+	 * ```typescript
+	 * const agent = new Agent('assistant')
+	 *   .model('anthropic/claude-sonnet-4-5')
+	 *   .instructions('...')
+	 *   .tool(dangerousTool) // has .suspend() / .resume()
+	 *   .checkpoint('memory');
+	 * ```
+	 */
+	checkpoint(storage: 'memory' | CheckpointStore): this {
+		this.checkpointStore = storage;
+		return this;
+	}
+
+	/**
+	 * Declare a credential this agent requires. The execution engine resolves
+	 * the credential name to an API key at build time and injects it into the
+	 * model configuration — user code never handles raw keys.
+	 *
+	 * @example
+	 * ```typescript
+	 * const agent = new Agent('assistant')
+	 *   .model('anthropic/claude-sonnet-4-5')
+	 *   .credential('anthropic')
+	 *   .instructions('You are helpful.');
+	 * ```
+	 */
+	credential(name: string): this {
+		this.credentialName = name;
+		return this;
+	}
+
+	/** @internal Read the declared credential name (used by the execution engine). */
+	protected get declaredCredential(): string | undefined {
+		return this.credentialName;
+	}
+
+	/** @internal Set the resolved API key (called by the execution engine before super.build()). */
+	protected set resolvedApiKey(key: string) {
+		this.resolvedKey = key;
+	}
+
+	/**
+	 * Set a structured output schema. When set, the agent's response will be
+	 * parsed into a typed object matching the schema, available as `result.output`.
+	 *
+	 * @example
+	 * ```typescript
+	 * const agent = new Agent('extractor')
+	 *   .model('anthropic/claude-sonnet-4-5')
+	 *   .instructions('Extract structured data.')
+	 *   .structuredOutput(z.object({
+	 *     code: z.string(),
+	 *     explanation: z.string(),
+	 *   }));
+	 *
+	 * const result = await agent.generate('...');
+	 * console.log(result.structuredOutput); // { code: '...', explanation: '...' }
+	 * ```
+	 */
+	structuredOutput(schema: z.ZodType): this {
+		this.outputSchema = schema;
+		return this;
+	}
+
+	/**
+	 * Enable extended thinking / reasoning for the agent.
+	 * The config type is inferred from the provider set via `.model()`.
+	 *
+	 * @example
+	 * ```typescript
+	 * // Anthropic — budgetTokens
+	 * new Agent('thinker')
+	 *   .model('anthropic', 'claude-sonnet-4-5')
+	 *   .thinking({ budgetTokens: 10000 })
+	 *
+	 * // OpenAI — reasoningEffort
+	 * new Agent('thinker')
+	 *   .model('openai', 'o3-mini')
+	 *   .thinking({ reasoningEffort: 'high' })
+	 * ```
+	 */
+	thinking<P extends Provider>(_provider: P, config?: ThinkingConfigFor<P>): this {
+		this.thinkingConfig = config ?? {};
+		return this;
+	}
+
+	/** Set telemetry configuration for this agent. Accepts a Telemetry builder or pre-built config. */
+	telemetry(t: Telemetry | BuiltTelemetry): this {
+		if (t instanceof Telemetry) {
+			this.telemetryBuilder = t;
+			this.telemetryConfig = undefined;
+		} else {
+			this.telemetryBuilder = undefined;
+			this.telemetryConfig = t;
+		}
+		return this;
+	}
+
+	/** @internal Read the declared telemetry builder (used by the execution engine to resolve credentials). */
+	protected get declaredTelemetry(): Telemetry | undefined {
+		return this.telemetryBuilder;
+	}
+
+	/**
+	 * Set the number of tool calls to execute concurrently within a single LLM turn.
+	 *
+	 * - `1` (default) — sequential execution, fully backward-compatible.
+	 * - `Infinity` — unlimited parallelism (all tool calls start at once).
+	 * - Any number in between — bounded concurrency (e.g. `5` = at most 5 tools run simultaneously).
+	 */
+	toolCallConcurrency(n: number): this {
+		if ((n !== Infinity && !Number.isInteger(n)) || n < 1) {
+			throw new Error('toolCallConcurrency must be a positive integer or Infinity');
+		}
+		this.concurrencyValue = n;
+		return this;
+	}
+
+	/**
+	 * Require human approval before any tool executes.
+	 * Tools that already have .suspend()/.resume() (suspendSchema) are skipped.
+	 * Requires .checkpoint() to be set.
+	 */
+	requireToolApproval(): this {
+		this.requireToolApprovalValue = true;
+		return this;
+	}
+
+	/**
+	 * Attach a workspace to this agent. Workspace tools and instructions
+	 * are injected at build time.
+	 */
+	workspace(ws: Workspace): this {
+		this.workspaceInstance = ws;
+		return this;
+	}
+
+	/**
+	 * Add an MCP client as a tool source for this agent.
+	 * Tools from all servers in the client become available to the agent.
+	 * Multiple clients can be added; tools are merged across all of them.
+	 *
+	 * @example
+	 * ```typescript
+	 * const client = new McpClient([
+	 *   { name: 'browser', url: 'http://localhost:9222/mcp', transport: 'streamableHttp' },
+	 *   { name: 'fs', command: 'npx', args: ['@anthropic/mcp-fs', '/tmp'] },
+	 * ]);
+	 *
+	 * const agent = new Agent('assistant')
+	 *   .model('anthropic', 'claude-sonnet-4')
+	 *   .mcp(client)
+	 *   .instructions('You are a helpful assistant.');
+	 * ```
+	 */
+	mcp(client: McpClient): this {
+		this.mcpClients.push(client);
+		return this;
+	}
+
+	/** Get the evals attached to this agent. */
+	get evaluations(): BuiltEval[] {
+		return [...this.agentEvals];
+	}
+
+	/**
+	 * Register a handler for an agent lifecycle event.
+	 * Handlers are called synchronously during the agentic loop.
+	 */
+	on(event: AgentEvent, handler: AgentEventHandler): void {
+		this.eventBus.on(event, handler);
+	}
+
+	/**
+	 * Wrap this agent as a tool for use in multi-agent composition.
+	 * The tool sends a text prompt to this agent and returns the text of the response.
+	 *
+	 * @example
+	 * ```typescript
+	 * const coordinatorAgent = new Agent('coordinator')
+	 *   .model('anthropic/claude-sonnet-4-5')
+	 *   .instructions('Route tasks to specialist agents.')
+	 *   .tool(writerAgent.asTool('Write content given a topic'));
+	 * ```
+	 */
+	asTool(description: string): BuiltTool {
+		// eslint-disable-next-line @typescript-eslint/no-this-alias
+		const agent = this;
+
+		const tool = new Tool(this.name)
+			.description(description)
+			.input(
+				z.object({
+					input: z.string().describe('The input to send to the agent'),
+				}),
+			)
+			.output(
+				z.object({
+					result: z.string().describe('The result of the agent'),
+				}),
+			)
+			.handler(async (rawInput, ctx) => {
+				const { input } = rawInput as { input: string };
+				const result = await agent.generate(input, {
+					telemetry: ctx.parentTelemetry,
+				} as RunOptions & ExecutionOptions);
+
+				const text = result.messages
+					.filter((m) => 'role' in m && m.role === 'assistant')
+					.flatMap((m) => ('content' in m ? m.content : []))
+					.filter((c) => c.type === 'text')
+					.map((c) => ('text' in c ? c.text : ''))
+					.join('');
+
+				// Collect sub-agent usage: this agent's own + any nested sub-agents
+				const subAgentUsage: SubAgentUsage[] = [];
+				if (result.usage) {
+					subAgentUsage.push({ agent: agent.name, model: result.model, usage: result.usage });
+				}
+				if (result.subAgentUsage) {
+					subAgentUsage.push(...result.subAgentUsage);
+				}
+
+				// Return branded result — the runtime unwraps it to extract sub-agent usage.
+				// createAgentToolResult returns `never`, same pattern as ctx.suspend().
+				if (subAgentUsage.length > 0) {
+					return createAgentToolResult({ result: text }, subAgentUsage);
+				}
+				return { result: text };
+			});
+
+		return tool.build();
+	}
+
+	/** Return the latest state snapshot of the agent. Returns `{ status: 'idle' }` before first run. */
+	getState(): SerializableAgentState {
+		if (!this.runtime) {
+			return {
+				persistence: undefined,
+				status: 'idle',
+				messageList: { messages: [], historyIds: [], inputIds: [], responseIds: [] },
+				pendingToolCalls: {},
+			};
+		}
+		return this.runtime.getState();
+	}
+
+	/**
+	 * Cancel the currently running agent.
+	 * Synchronous — sets an abort flag; the agentic loop checks it asynchronously.
+	 */
+	abort(): void {
+		this.eventBus.abort();
+	}
+
+	/** Generate a response (non-streaming). Lazy-builds on first call. */
+	async generate(
+		input: AgentMessage[] | string,
+		options?: RunOptions & ExecutionOptions,
+	): Promise<GenerateResult> {
+		const runtime = await this.ensureBuilt();
+		return await runtime.generate(this.toMessages(input), options);
+	}
+
+	/** Stream a response. Lazy-builds on first call. */
+	async stream(
+		input: AgentMessage[] | string,
+		options?: RunOptions & ExecutionOptions,
+	): Promise<StreamResult> {
+		const runtime = await this.ensureBuilt();
+		return await runtime.stream(this.toMessages(input), options);
+	}
+
+	/** Resume a suspended tool call with data. Lazy-builds on first call. */
+	async resume(
+		method: 'generate',
+		data: unknown,
+		options: ResumeOptions & ExecutionOptions,
+	): Promise<GenerateResult>;
+	async resume(
+		method: 'stream',
+		data: unknown,
+		options: ResumeOptions & ExecutionOptions,
+	): Promise<StreamResult>;
+	async resume(
+		method: 'generate' | 'stream',
+		data: unknown,
+		options: ResumeOptions & ExecutionOptions,
+	): Promise<GenerateResult | StreamResult> {
+		const runtime = await this.ensureBuilt();
+		if (method === 'generate') {
+			return await runtime.resume('generate', data, options);
+		}
+		return await runtime.resume('stream', data, options);
+	}
+
+	approve(method: 'generate', options: ResumeOptions & ExecutionOptions): Promise<GenerateResult>;
+	approve(method: 'stream', options: ResumeOptions & ExecutionOptions): Promise<StreamResult>;
+	async approve(
+		method: 'generate' | 'stream',
+		options: ResumeOptions & ExecutionOptions,
+	): Promise<GenerateResult | StreamResult> {
+		if (method === 'generate') {
+			return await this.resume('generate', { approved: true }, options);
+		}
+		return await this.resume('stream', { approved: true }, options);
+	}
+
+	deny(method: 'generate', options: ResumeOptions & ExecutionOptions): Promise<GenerateResult>;
+	deny(method: 'stream', options: ResumeOptions & ExecutionOptions): Promise<StreamResult>;
+	async deny(
+		method: 'generate' | 'stream',
+		options: ResumeOptions & ExecutionOptions,
+	): Promise<GenerateResult | StreamResult> {
+		if (method === 'generate') {
+			return await this.resume('generate', { approved: false }, options);
+		}
+		return await this.resume('stream', { approved: false }, options);
+	}
+
+	/**
+	 * @internal Lazy-build the agent on first use. Stores the promise so
+	 * concurrent callers share one build operation. On error the promise is
+	 * cleared so the caller can retry.
+	 */
+	private async ensureBuilt(): Promise<AgentRuntime> {
+		if (!this.buildPromise) {
+			const p = this.build();
+			this.buildPromise = p;
+			p.catch(() => {
+				if (this.buildPromise === p) this.buildPromise = undefined;
+			});
+		}
+		return await this.buildPromise;
+	}
+
+	private toMessages(input: string | AgentMessage[]): AgentMessage[] {
+		if (Array.isArray(input)) return input;
+		return [{ role: 'user', content: [{ type: 'text', text: input }] }];
+	}
+
+	/** @internal Validate configuration and produce an AgentRuntime. Overridden by the execution engine. */
+	protected async build(): Promise<AgentRuntime> {
+		const hasModel = this.modelId ?? this.modelConfigObj;
+		if (!hasModel) {
+			throw new Error(`Agent "${this.name}" requires a model`);
+		}
+		if (!this.instructionsText) {
+			throw new Error(`Agent "${this.name}" requires instructions`);
+		}
+
+		const finalTools = [...this.tools];
+
+		if (this.workspaceInstance) {
+			const wsTools = this.workspaceInstance.getTools();
+			finalTools.push(...wsTools);
+		}
+
+		let finalStaticTools = finalTools;
+		if (this.requireToolApprovalValue) {
+			finalStaticTools = finalTools.map((t) =>
+				t.suspendSchema ? t : wrapToolForApproval(t, { requireApproval: true }),
+			);
+		}
+
+		// Validate checkpoint requirement from static tools and known MCP approval config
+		// before attempting any network connections (allows fast failure).
+		const staticNeedsCheckpoint = finalStaticTools.some((t) => t.suspendSchema);
+		const mcpNeedsCheckpoint =
+			(this.requireToolApprovalValue && this.mcpClients.length > 0) ||
+			this.mcpClients.some((c) => c.declaresApproval());
+		if ((staticNeedsCheckpoint || mcpNeedsCheckpoint) && !this.checkpointStore) {
+			throw new Error(
+				`Agent "${this.name}" has tools requiring approval or suspend/resume but no checkpoint storage. ` +
+					"Add .checkpoint('memory') for in-process storage, " +
+					'or pass a persistent store (e.g. LibSQLStore, PgStore).',
+			);
+		}
+
+		// Resolve tools from all MCP clients.
+		const mcpToolLists = await Promise.all(this.mcpClients.map(async (c) => await c.listTools()));
+		let mcpTools = mcpToolLists.flat();
+
+		// Apply global requireToolApproval to MCP tools (per-server approval is already
+		// handled inside McpClient/McpConnection.listTools()).
+		if (this.requireToolApprovalValue) {
+			mcpTools = mcpTools.map((t) =>
+				t.suspendSchema ? t : wrapToolForApproval(t, { requireApproval: true }),
+			);
+		}
+
+		// Detect collisions between MCP tools and static tools.
+		const staticNames = new Set(finalStaticTools.map((t) => t.name));
+		const collisions = mcpTools.filter((t) => staticNames.has(t.name)).map((t) => t.name);
+		if (collisions.length > 0) {
+			throw new Error(
+				`MCP tool name collision — the following tool names resolve to duplicates: ${collisions.join(', ')}`,
+			);
+		}
+
+		const allTools = [...finalStaticTools, ...mcpTools];
+
+		// Validate checkpoint again after discovering actual MCP tools
+		// (catches the case where MCP tools have suspendSchema after listing).
+		const allNeedCheckpoint = allTools.some((t) => t.suspendSchema);
+		if (allNeedCheckpoint && !this.checkpointStore) {
+			throw new Error(
+				`Agent "${this.name}" has tools requiring approval or suspend/resume but no checkpoint storage. ` +
+					"Add .checkpoint('memory') for in-process storage, " +
+					'or pass a persistent store (e.g. LibSQLStore, PgStore).',
+			);
+		}
+
+		let modelConfig: ModelConfig;
+		if (this.modelConfigObj) {
+			if (
+				this.resolvedKey &&
+				typeof this.modelConfigObj === 'object' &&
+				'id' in this.modelConfigObj
+			) {
+				modelConfig = { ...this.modelConfigObj, apiKey: this.resolvedKey };
+			} else {
+				modelConfig = this.modelConfigObj;
+			}
+		} else if (this.resolvedKey) {
+			modelConfig = { id: this.modelId!, apiKey: this.resolvedKey };
+		} else {
+			modelConfig = this.modelId!;
+		}
+
+		let instructions = this.instructionsText;
+		if (this.workspaceInstance) {
+			const wsInstructions = this.workspaceInstance.getInstructions();
+			if (wsInstructions) {
+				instructions = `${instructions}\n\n${wsInstructions}`;
+			}
+		}
+
+		this.runtime = new AgentRuntime({
+			name: this.name,
+			model: modelConfig,
+			instructions,
+			tools: allTools.length > 0 ? allTools : undefined,
+			instructionProviderOptions: this.instructionProviderOpts,
+			providerTools: this.providerTools.length > 0 ? this.providerTools : undefined,
+			memory: this.memoryConfig?.memory,
+			lastMessages: this.memoryConfig?.lastMessages,
+			workingMemory: this.memoryConfig?.workingMemory,
+			semanticRecall: this.memoryConfig?.semanticRecall,
+			structuredOutput: this.outputSchema,
+			checkpointStorage: this.checkpointStore,
+			thinking: this.thinkingConfig,
+			eventBus: this.eventBus,
+			toolCallConcurrency: this.concurrencyValue,
+			titleGeneration: this.memoryConfig?.titleGeneration,
+			telemetry: this.telemetryConfig ?? (await this.telemetryBuilder?.build()),
+		});
+
+		return this.runtime;
+	}
+}
--- a/packages/@n8n/agents/src/sdk/catalog.ts
+++ b/packages/@n8n/agents/src/sdk/catalog.ts
@ -0,0 +1,186 @@
+const MODELS_DEV_URL = 'https://models.dev/api.json';
+
+/** Cost per million tokens. */
+export interface ModelCost {
+	/** Cost per million input tokens (USD). */
+	input: number;
+	/** Cost per million output tokens (USD). */
+	output: number;
+	/** Cost per million cached input tokens (USD). */
+	cacheRead?: number;
+	/** Cost per million cache write tokens (USD). */
+	cacheWrite?: number;
+}
+
+/** Model context/output limits. */
+export interface ModelLimits {
+	/** Maximum context window size in tokens. */
+	context?: number;
+	/** Maximum output tokens. */
+	output?: number;
+}
+
+/** Information about a single model. */
+export interface ModelInfo {
+	/** Model ID (e.g. 'claude-sonnet-4-5'). */
+	id: string;
+	/** Human-readable name (e.g. 'Claude Sonnet 4.5'). */
+	name: string;
+	/** Whether the model supports reasoning / thinking. */
+	reasoning: boolean;
+	/** Whether the model supports tool calling. */
+	toolCall: boolean;
+	/** Cost per million tokens. */
+	cost?: ModelCost;
+	/** Token limits. */
+	limits?: ModelLimits;
+}
+
+/** Information about a provider. */
+export interface ProviderInfo {
+	/** Provider ID (e.g. 'anthropic'). */
+	id: string;
+	/** Human-readable name (e.g. 'Anthropic'). */
+	name: string;
+	/** Available models keyed by model ID. */
+	models: Record<string, ModelInfo>;
+}
+
+/** The full catalog of providers and their models. */
+export type ProviderCatalog = Record<string, ProviderInfo>;
+
+interface ModelsDevModel {
+	id: string;
+	name: string;
+	reasoning?: boolean;
+	tool_call?: boolean;
+	cost?: { input?: number; output?: number; cache_read?: number; cache_write?: number };
+	limit?: { context?: number; output?: number };
+}
+
+interface ModelsDevProvider {
+	id: string;
+	name: string;
+	models?: Record<string, ModelsDevModel>;
+}
+
+/**
+ * Fetch the provider/model catalog from models.dev.
+ *
+ * Returns a map of provider ID → ProviderInfo with all available models.
+ * The catalog is fetched once and can be cached by the caller.
+ *
+ * @example
+ * ```typescript
+ * import { fetchProviderCatalog } from '@n8n/agents';
+ *
+ * const catalog = await fetchProviderCatalog();
+ * console.log(Object.keys(catalog)); // ['anthropic', 'openai', ...]
+ * console.log(catalog.anthropic.models['claude-sonnet-4-5'].reasoning); // true
+ * ```
+ */
+export async function fetchProviderCatalog(): Promise<ProviderCatalog> {
+	const response = await fetch(MODELS_DEV_URL);
+	if (!response.ok) {
+		throw new Error(`Failed to fetch provider catalog: ${response.statusText}`);
+	}
+
+	const data = (await response.json()) as Record<string, ModelsDevProvider>;
+	const catalog: ProviderCatalog = {};
+
+	for (const [key, provider] of Object.entries(data)) {
+		if (!provider.models || Object.keys(provider.models).length === 0) continue;
+
+		const models: Record<string, ModelInfo> = {};
+		for (const [modelId, model] of Object.entries(provider.models)) {
+			const info: ModelInfo = {
+				id: model.id,
+				name: model.name,
+				reasoning: model.reasoning ?? false,
+				toolCall: model.tool_call ?? false,
+			};
+			if (model.cost?.input !== undefined && model.cost?.output !== undefined) {
+				info.cost = {
+					input: model.cost.input,
+					output: model.cost.output,
+					...(model.cost.cache_read !== undefined && { cacheRead: model.cost.cache_read }),
+					...(model.cost.cache_write !== undefined && { cacheWrite: model.cost.cache_write }),
+				};
+			}
+			if (model.limit) {
+				info.limits = {
+					...(model.limit.context !== undefined && { context: model.limit.context }),
+					...(model.limit.output !== undefined && { output: model.limit.output }),
+				};
+			}
+			models[modelId] = info;
+		}
+
+		catalog[key] = {
+			id: provider.id,
+			name: provider.name,
+			models,
+		};
+	}
+
+	return catalog;
+}
+
+// --- Global cached catalog for internal use ---
+
+let cachedCatalog: ProviderCatalog | undefined;
+let catalogFetchPromise: Promise<ProviderCatalog | undefined> | undefined;
+
+/**
+ * Get the cached catalog, fetching once if needed.
+ * Returns undefined if the fetch fails (offline, timeout, etc.).
+ * On failure, clears the in-flight promise so the next call retries.
+ * @internal
+ */
+export async function getCachedCatalog(): Promise<ProviderCatalog | undefined> {
+	if (cachedCatalog) return cachedCatalog;
+
+	catalogFetchPromise ??= fetchProviderCatalog()
+		.then((c) => {
+			cachedCatalog = c;
+			return c;
+		})
+		.catch((error: unknown) => {
+			// Clear so subsequent calls retry
+			catalogFetchPromise = undefined;
+			console.warn(
+				'[agents] Failed to fetch model catalog from models.dev — cost data will be unavailable:',
+				error instanceof Error ? error.message : error,
+			);
+			return undefined;
+		});
+
+	return await catalogFetchPromise;
+}
+
+/**
+ * Look up cost info for a model by its full ID (e.g. 'anthropic/claude-sonnet-4-5').
+ * Returns undefined if catalog is unavailable or model not found.
+ * @internal
+ */
+export async function getModelCost(modelId: string): Promise<ModelCost | undefined> {
+	const catalog = await getCachedCatalog();
+	if (!catalog) return undefined;
+
+	const [provider, ...rest] = modelId.split('/');
+	const modelName = rest.join('/');
+
+	return catalog[provider]?.models[modelName]?.cost;
+}
+
+/**
+ * Compute the cost in USD from token usage and per-million-token pricing.
+ */
+export function computeCost(
+	usage: { promptTokens: number; completionTokens: number },
+	cost: ModelCost,
+): number {
+	const inputCost = (usage.promptTokens / 1_000_000) * cost.input;
+	const outputCost = (usage.completionTokens / 1_000_000) * cost.output;
+	return inputCost + outputCost;
+}
--- a/packages/@n8n/agents/src/sdk/eval.ts
+++ b/packages/@n8n/agents/src/sdk/eval.ts
@ -0,0 +1,183 @@
+import { filterLlmMessages } from './message';
+import { AgentRuntime } from '../runtime/agent-runtime';
+import type { BuiltEval, CheckFn, EvalInput, EvalScore, JudgeFn, JudgeHandlerFn } from '../types';
+import type { AgentMessage } from '../types/sdk/message';
+
+/** Extract text content from LLM messages (custom messages are skipped). */
+function extractText(messages: AgentMessage[]): string {
+	return filterLlmMessages(messages)
+		.flatMap((m) => m.content)
+		.filter((c) => c.type === 'text')
+		.map((c) => (c as { text: string }).text)
+		.join('');
+}
+
+/**
+ * Builder for creating evaluations with a fluent API.
+ *
+ * Two modes:
+ * - **Deterministic**: `.check(fn)` — pure function scoring
+ * - **LLM-as-judge**: `.model()` + `.credential()` + `.judge(fn)` — LLM-powered scoring
+ *
+ * Usage:
+ * ```typescript
+ * // Deterministic
+ * const jsonCheck = new Eval('json-check')
+ *   .description('Verify output is valid JSON')
+ *   .check(({ output }) => {
+ *     try { JSON.parse(output); return { score: 1, reasoning: 'Valid JSON' }; }
+ *     catch { return { score: 0, reasoning: 'Invalid JSON' }; }
+ *   });
+ *
+ * // LLM-as-judge
+ * const correctness = new Eval('correctness')
+ *   .description('Judge factual correctness')
+ *   .model('anthropic/claude-haiku-4-5')
+ *   .credential('anthropic')
+ *   .judge(async ({ input, output, expected, llm }) => {
+ *     const result = await llm(`Is "${output}" correct for "${input}"? Expected: ${expected}`);
+ *     const score = parseFloat(result.text.match(/[\d.]+/)?.[0] ?? '0');
+ *     return { score: Math.min(1, Math.max(0, score)), reasoning: result.text };
+ *   });
+ * ```
+ */
+export class Eval {
+	private evalName: string;
+
+	private desc?: string;
+
+	private checkFn?: CheckFn;
+
+	private judgeFn?: JudgeHandlerFn;
+
+	private modelId?: string;
+
+	private credentialName?: string;
+
+	private _resolvedApiKey?: string;
+
+	constructor(name: string) {
+		this.evalName = name;
+	}
+
+	/** Human-readable description of what this eval measures. */
+	description(desc: string): this {
+		this.desc = desc;
+		return this;
+	}
+
+	/** Set the judge model (LLM-as-judge mode). */
+	model(modelId: string): this {
+		this.modelId = modelId;
+		return this;
+	}
+
+	/** Declare a credential for the judge model. */
+	credential(name: string): this {
+		this.credentialName = name;
+		return this;
+	}
+
+	/** @internal Read the declared credential name (used by the execution engine). */
+	protected get declaredCredential(): string | undefined {
+		return this.credentialName;
+	}
+
+	/** @internal Set the resolved API key for the judge model. */
+	protected set resolvedApiKey(key: string) {
+		this._resolvedApiKey = key;
+	}
+
+	/**
+	 * Set a deterministic check function.
+	 * Mutually exclusive with `.judge()`.
+	 */
+	check(fn: CheckFn): this {
+		if (this.judgeFn) {
+			throw new Error(`Eval "${this.evalName}": cannot use both .check() and .judge()`);
+		}
+		this.checkFn = fn;
+		return this;
+	}
+
+	/**
+	 * Set an LLM-as-judge handler. Requires `.model()` and `.credential()`.
+	 * The handler receives `{ input, output, expected, llm }` where `llm`
+	 * is a callable function bound to the judge model.
+	 * Mutually exclusive with `.check()`.
+	 */
+	judge(fn: JudgeHandlerFn): this {
+		if (this.checkFn) {
+			throw new Error(`Eval "${this.evalName}": cannot use both .check() and .judge()`);
+		}
+		this.judgeFn = fn;
+		return this;
+	}
+
+	/** The eval name. */
+	get name(): string {
+		return this.evalName;
+	}
+
+	/** @internal Build the eval into a runnable form. */
+	protected build(): BuiltEval {
+		if (!this.checkFn && !this.judgeFn) {
+			throw new Error(`Eval "${this.evalName}" requires either .check() or .judge()`);
+		}
+
+		if (this.judgeFn && !this.modelId) {
+			throw new Error(`Eval "${this.evalName}" uses .judge() but no .model() was set`);
+		}
+
+		const name = this.evalName;
+		const desc = this.desc;
+
+		if (this.checkFn) {
+			const checkFn = this.checkFn;
+			return {
+				name,
+				description: desc,
+				_run: async (input: EvalInput) => await checkFn(input),
+			};
+		}
+
+		// LLM-as-judge mode
+		const judgeFn = this.judgeFn!;
+		const modelConfig: string | { id: `${string}/${string}`; apiKey: string } = this._resolvedApiKey
+			? { id: this.modelId! as `${string}/${string}`, apiKey: this._resolvedApiKey }
+			: this.modelId!;
+
+		const runtime = new AgentRuntime({
+			name: `${name}-judge`,
+			model: modelConfig,
+			instructions: 'You are an evaluation judge. Respond precisely as instructed.',
+		});
+
+		const llm: JudgeFn = async (prompt: string) => {
+			const result = await runtime.generate([
+				{ role: 'user', content: [{ type: 'text', text: prompt }] },
+			]);
+			return { text: extractText(result.messages) };
+		};
+
+		return {
+			name,
+			description: desc,
+			_run: async (input: EvalInput) => await judgeFn({ ...input, llm }),
+		};
+	}
+
+	/** @internal Ensure the eval is built (lazy). */
+	private _built?: BuiltEval;
+
+	/** @internal */
+	ensureBuilt(): BuiltEval {
+		this._built ??= this.build();
+		return this._built;
+	}
+
+	/** Run this eval against a single input. Lazy-builds on first call. */
+	async run(input: EvalInput): Promise<EvalScore> {
+		return await this.ensureBuilt()._run(input);
+	}
+}
--- a/packages/@n8n/agents/src/sdk/evaluate.ts
+++ b/packages/@n8n/agents/src/sdk/evaluate.ts
@ -0,0 +1,159 @@
+import type { Agent } from './agent';
+import type { Eval } from './eval';
+import { filterLlmMessages } from './message';
+import type { EvalResults, EvalRunResult, EvalScore, GenerateResult } from '../types';
+import type { AgentMessage } from '../types/sdk/message';
+
+/** Extract text content from messages. */
+function extractText(messages: AgentMessage[]): string {
+	return filterLlmMessages(messages)
+		.flatMap((m) => m.content)
+		.filter((c) => c.type === 'text')
+		.map((c) => (c as { text: string }).text)
+		.join('');
+}
+
+export interface DatasetRow {
+	/** The prompt to send to the agent. */
+	input: string;
+	/** Expected answer (used by evals like correctness/similarity). */
+	expected?: string;
+	/**
+	 * Per-tool resume data overrides for evaluation. By default all suspended
+	 * tools are auto-resumed with `{ approved: true }` during evaluations.
+	 * Use this to test denial or custom resume scenarios.
+	 *
+	 * - `'deny'` is shorthand for `{ approved: false }`
+	 * - An object value is passed as-is to `agent.resume()`
+	 */
+	resumeData?: Record<string, 'deny' | Record<string, unknown>>;
+}
+
+export interface EvaluateConfig {
+	/** Dataset of test cases to run through the agent. */
+	dataset: DatasetRow[];
+	/** Evals to run against each agent response. */
+	evals: Eval[];
+}
+
+/**
+ * Run a dataset through an agent and score the results with evals.
+ *
+ * All dataset rows and evals run in parallel for maximum throughput.
+ * Suspended tool calls are **auto-resumed with `{ approved: true }`** during
+ * evals. Use `resumeData` in dataset rows to override per tool.
+ *
+ * @example
+ * ```typescript
+ * const results = await evaluate(agent, {
+ *   dataset: [
+ *     { input: 'What is 2+2?', expected: '4' },
+ *     { input: 'Delete temp files', resumeData: { delete_file: 'deny' } },
+ *     { input: 'Book flight', resumeData: { book: { seat: '12A' } } },
+ *   ],
+ *   evals: [correctness, similarity],
+ * });
+ * ```
+ */
+export async function evaluate(agent: Agent, config: EvaluateConfig): Promise<EvalResults> {
+	const { dataset, evals } = config;
+
+	const runs: EvalRunResult[] = await Promise.all(
+		dataset.map(async (row) => {
+			const result = await runWithInterrupts(agent, row.input, row.resumeData);
+
+			const toolCalls = result.toolCalls ?? [];
+
+			// Build composite output: if the agent's text is empty but it made
+			// tool calls, include the tool outputs so evals have something to score.
+			let output = extractText(result.messages);
+			if (!output.trim() && toolCalls.length > 0) {
+				const toolOutputs = toolCalls
+					.filter((tc) => tc.output !== undefined)
+					.map((tc) => `[${tc.tool}] ${JSON.stringify(tc.output)}`);
+				if (toolOutputs.length > 0) {
+					output = toolOutputs.join('\n');
+				}
+			}
+
+			const scoreEntries = await Promise.all(
+				evals.map(async (ev): Promise<[string, EvalScore]> => {
+					const score = await ev.run({
+						input: row.input,
+						output,
+						expected: row.expected,
+						toolCalls,
+					});
+					return [ev.name, score];
+				}),
+			);
+
+			return {
+				input: row.input,
+				output,
+				expected: row.expected,
+				scores: Object.fromEntries(scoreEntries),
+			};
+		}),
+	);
+
+	const summary: EvalResults['summary'] = {};
+	for (const ev of evals) {
+		const results = runs
+			.map((r) => r.scores[ev.name]?.pass)
+			.filter((p): p is boolean => p !== undefined);
+
+		if (results.length > 0) {
+			const passed = results.filter(Boolean).length;
+			summary[ev.name] = {
+				passed,
+				failed: results.length - passed,
+				total: results.length,
+			};
+		}
+	}
+
+	return { runs, summary };
+}
+
+/**
+ * Run the agent with automatic interrupt handling.
+ * Uses generate() and loops: if the result has a pendingSuspend, resolves
+ * the resume data and calls agent.resume('generate', ...) to get a
+ * GenerateResult directly without needing to stream-and-re-generate.
+ *
+ * Tools are auto-resumed with `{ approved: true }` by default;
+ * use `resumeOverrides` to override per tool.
+ */
+async function runWithInterrupts(
+	agent: Agent,
+	input: string,
+	resumeOverrides?: Record<string, 'deny' | Record<string, unknown>>,
+): Promise<GenerateResult> {
+	let result = await agent.generate(input);
+	const allToolCalls: Array<{ tool: string; input: unknown; output: unknown }> = [
+		...(result.toolCalls ?? []),
+	];
+
+	while (result.pendingSuspend && result.pendingSuspend.length > 0) {
+		const { runId, toolCallId, toolName } = result.pendingSuspend[0];
+		const override = toolName ? resumeOverrides?.[toolName] : undefined;
+
+		let data: Record<string, unknown>;
+		if (override === 'deny') {
+			data = { approved: false };
+		} else if (override && typeof override === 'object') {
+			data = override;
+		} else {
+			data = { approved: true };
+		}
+
+		result = await agent.resume('generate', data, { runId, toolCallId });
+		allToolCalls.push(...(result.toolCalls ?? []));
+	}
+
+	return {
+		...result,
+		...(allToolCalls.length > 0 ? { toolCalls: allToolCalls } : {}),
+	};
+}
--- a/packages/@n8n/agents/src/sdk/guardrail.ts
+++ b/packages/@n8n/agents/src/sdk/guardrail.ts
@ -0,0 +1,52 @@
+import type { BuiltGuardrail, GuardrailType, GuardrailStrategy, PiiDetectionType } from '../types';
+
+export class Guardrail {
+	private name: string;
+
+	private guardType?: GuardrailType;
+
+	private strategyType?: GuardrailStrategy;
+
+	private detectionTypes?: PiiDetectionType[];
+
+	private thresholdValue?: number;
+
+	constructor(name: string) {
+		this.name = name;
+	}
+
+	type(guardType: GuardrailType): this {
+		this.guardType = guardType;
+		return this;
+	}
+
+	strategy(strategy: GuardrailStrategy): this {
+		this.strategyType = strategy;
+		return this;
+	}
+
+	detect(types: PiiDetectionType[]): this {
+		this.detectionTypes = types;
+		return this;
+	}
+
+	threshold(value: number): this {
+		this.thresholdValue = value;
+		return this;
+	}
+
+	build(): BuiltGuardrail {
+		if (!this.guardType) throw new Error(`Guardrail "${this.name}" requires a type`);
+		if (!this.strategyType) throw new Error(`Guardrail "${this.name}" requires a strategy`);
+
+		return {
+			name: this.name,
+			guardType: this.guardType,
+			strategy: this.strategyType,
+			_config: {
+				detectionTypes: this.detectionTypes,
+				threshold: this.thresholdValue,
+			},
+		};
+	}
+}
--- a/packages/@n8n/agents/src/sdk/mcp-client.ts
+++ b/packages/@n8n/agents/src/sdk/mcp-client.ts
@ -0,0 +1,231 @@
+import { McpConnection } from '../runtime/mcp-connection';
+import type { McpServerConfig, McpVerifyResult } from '../types/sdk/mcp';
+import type { BuiltTool } from '../types/sdk/tool';
+
+/**
+ * Manages connections to one or more MCP servers and exposes their tools
+ * as a flat list of BuiltTool instances.
+ *
+ * Connections are established lazily on the first `listTools()` call and
+ * kept alive until `close()` is called. Both operations deduplicate
+ * concurrent calls via stored promises, so calling `listTools()` from
+ * multiple concurrent `generate()` runs is safe.
+ *
+ * @example
+ * ```typescript
+ * const client = new McpClient([
+ *   { name: 'browser', url: 'http://localhost:9222/mcp', transport: 'streamableHttp' },
+ *   { name: 'fs', command: 'npx', args: ['@anthropic/mcp-fs', '/tmp'] },
+ * ]);
+ *
+ * const agent = new Agent('assistant')
+ *   .model('anthropic/claude-sonnet-4-5')
+ *   .instructions('You are a helpful assistant.')
+ *   .mcp(client);
+ *
+ * const result = await agent.generate('List files in /tmp');
+ * await client.close();
+ * ```
+ */
+export class McpClient {
+	private readonly configs: McpServerConfig[];
+
+	private connections: McpConnection[];
+
+	private listToolsPromise: Promise<BuiltTool[]> | undefined;
+
+	private closePromise: Promise<void> | undefined;
+
+	/**
+	 * @param configs - Server configurations. Each must have either `url` or `command`.
+	 *   Duplicate names within the list are rejected.
+	 * @param requireToolApproval - When true, every tool from every server is wrapped
+	 *   with a human-approval gate (requires `.checkpoint()` on the Agent).
+	 */
+	constructor(configs: McpServerConfig[], requireToolApproval = false) {
+		for (const cfg of configs) {
+			if (!cfg.url && !cfg.command) {
+				throw new Error(
+					`MCP server "${cfg.name}": exactly one of "url" or "command" must be provided`,
+				);
+			}
+			if (cfg.url && cfg.command) {
+				throw new Error(`MCP server "${cfg.name}": provide either "url" or "command", not both`);
+			}
+		}
+
+		const seen = new Set<string>();
+		for (const cfg of configs) {
+			if (seen.has(cfg.name)) {
+				throw new Error(`MCP server name "${cfg.name}" is already registered`);
+			}
+			seen.add(cfg.name);
+		}
+
+		this.configs = configs;
+		this.connections = configs.map((cfg) => new McpConnection(cfg, requireToolApproval));
+	}
+
+	/**
+	 * Explicitly connect to all servers without listing tools.
+	 * Optional — `listTools()` connects implicitly.
+	 */
+	async connect(): Promise<void> {
+		await this.listTools();
+	}
+
+	/**
+	 * Connect to all servers (if not already connected) and return the full
+	 * flat list of tools. Subsequent calls return the cached list without
+	 * additional network round-trips. On error the cache is cleared so the
+	 * caller can retry.
+	 */
+	async listTools(): Promise<BuiltTool[]> {
+		if (!this.listToolsPromise) {
+			const p = this.doListTools();
+			this.listToolsPromise = p;
+			p.catch(() => {
+				if (this.listToolsPromise === p) this.listToolsPromise = undefined;
+			});
+		}
+		return await this.listToolsPromise;
+	}
+
+	/**
+	 * Disconnect from all servers. Subsequent calls are no-ops.
+	 * Best-effort — errors are logged but not thrown.
+	 */
+	async close(): Promise<void> {
+		this.closePromise ??= this.doClose();
+		return await this.closePromise;
+	}
+
+	/**
+	 * Verify connectivity to all configured servers.
+	 * Each server is connected to with a temporary connection, its tools are
+	 * listed, and the connection is closed — this does NOT affect the
+	 * long-lived connections used by `listTools()`.
+	 *
+	 * Never throws — returns a result object indicating success or per-server
+	 * errors so callers can handle partial failures gracefully.
+	 *
+	 * @example
+	 * ```typescript
+	 * const result = await client.verify();
+	 * if (!result.ok) {
+	 *   console.error('MCP connection failed:', result.errors);
+	 * }
+	 * ```
+	 */
+	async verify(): Promise<McpVerifyResult> {
+		if (this.configs.length === 0) {
+			return { ok: true, servers: [] };
+		}
+
+		const results = await Promise.allSettled(
+			this.configs.map(async (cfg) => {
+				const conn = new McpConnection(cfg);
+				try {
+					await conn.connect();
+					const tools = await conn.listTools();
+					return { name: cfg.name, tools: tools.length };
+				} finally {
+					await conn.disconnect().catch(() => {});
+				}
+			}),
+		);
+
+		const errors: Array<{ server: string; error: string }> = [];
+		const servers: Array<{ name: string; tools: number }> = [];
+
+		for (let i = 0; i < results.length; i++) {
+			const result = results[i];
+			if (result.status === 'rejected') {
+				errors.push({
+					server: this.configs[i].name,
+					error: result.reason instanceof Error ? result.reason.message : String(result.reason),
+				});
+			} else {
+				servers.push(result.value);
+			}
+		}
+
+		return errors.length > 0 ? { ok: false, errors } : { ok: true, servers };
+	}
+
+	/**
+	 * Returns true when any configured server declares per-server approval
+	 * requirements (`requireApproval: true` or a non-empty `requireApproval`
+	 * string array). Does NOT require a network connection.
+	 *
+	 * Used by the Agent builder to validate checkpoint configuration before
+	 * attempting to connect.
+	 */
+	declaresApproval(): boolean {
+		return this.connections.some((conn) => conn.declaresApproval());
+	}
+
+	private async doListTools(): Promise<BuiltTool[]> {
+		const connectedConnections: McpConnection[] = [];
+
+		const settled = await Promise.allSettled(
+			this.connections.map(async (conn) => {
+				await conn.connect();
+				connectedConnections.push(conn);
+				return await conn.listTools();
+			}),
+		);
+
+		const failed = settled
+			.map((r, i) => ({ result: r, name: this.connections[i].name }))
+			.filter((x) => x.result.status === 'rejected');
+
+		if (failed.length > 0) {
+			await Promise.allSettled(connectedConnections.map(async (c) => await c.disconnect()));
+			const details = failed
+				.map((x) => {
+					const reason =
+						x.result.status === 'rejected'
+							? x.result.reason instanceof Error
+								? x.result.reason.message
+								: String(x.result.reason)
+							: '';
+					return `${x.name}: ${reason}`;
+				})
+				.join('; ');
+			throw new Error(`MCP connection failed — ${details}`);
+		}
+
+		const tools = settled.flatMap((r) => (r.status === 'fulfilled' ? r.value : []));
+
+		const seen = new Set<string>();
+		const duplicates: string[] = [];
+		for (const tool of tools) {
+			if (seen.has(tool.name)) {
+				duplicates.push(tool.name);
+			}
+			seen.add(tool.name);
+		}
+
+		if (duplicates.length > 0) {
+			await Promise.allSettled(connectedConnections.map(async (c) => await c.disconnect()));
+			throw new Error(
+				`MCP tool name collision — the following tool names resolve to duplicates: ${duplicates.join(', ')}`,
+			);
+		}
+
+		return tools;
+	}
+
+	private async doClose(): Promise<void> {
+		await Promise.allSettled(
+			this.connections.map(async (conn) => {
+				try {
+					await conn.disconnect();
+				} catch (error) {
+					console.error(`MCP disconnect error for server "${conn.name}":`, error);
+				}
+			}),
+		);
+	}
+}
--- a/packages/@n8n/agents/src/sdk/memory.ts
+++ b/packages/@n8n/agents/src/sdk/memory.ts
@ -0,0 +1,187 @@
+import type { z } from 'zod';
+
+import { InMemoryMemory } from '../runtime/memory-store';
+import { templateFromSchema } from '../runtime/working-memory';
+import type {
+	BuiltMemory,
+	MemoryConfig,
+	SemanticRecallConfig,
+	TitleGenerationConfig,
+} from '../types';
+
+type ZodObjectSchema = z.ZodObject<z.ZodRawShape>;
+
+const DEFAULT_LAST_MESSAGES = 10;
+
+/**
+ * Builder for configuring conversation memory.
+ *
+ * Usage:
+ * ```typescript
+ * const memory = new Memory()
+ *   .storage('memory')
+ *   .lastMessages(20)
+ *   .freeform('# User Context\n- **Name**:\n- **City**:');
+ *
+ * agent.memory(memory);
+ * ```
+ */
+export class Memory {
+	private lastMessagesValue: number = DEFAULT_LAST_MESSAGES;
+
+	private semanticRecallConfig?: SemanticRecallConfig;
+
+	private workingMemorySchema?: ZodObjectSchema;
+
+	private workingMemoryTemplate?: string;
+
+	private workingMemoryScope: 'resource' | 'thread' = 'resource';
+
+	private memoryBackend?: BuiltMemory;
+
+	private titleGenerationConfig?: TitleGenerationConfig;
+
+	/** The configured number of recent messages to include. */
+	get lastMessageCount(): number {
+		return this.lastMessagesValue;
+	}
+
+	/**
+	 * Set the storage backend for conversation history.
+	 *
+	 * - `'memory'` — in-process memory (default, lost on restart)
+	 * - A `BuiltMemory` instance — for persistent storage (e.g. SqliteMemory)
+	 */
+	storage(backend: 'memory' | BuiltMemory): this {
+		if (backend === 'memory') {
+			this.memoryBackend = undefined;
+		} else {
+			this.memoryBackend = backend;
+		}
+		return this;
+	}
+
+	/** Set the number of recent messages to include in context. */
+	lastMessages(count: number): this {
+		this.lastMessagesValue = count;
+		return this;
+	}
+
+	/** Enable semantic recall (RAG-based retrieval of relevant past messages). */
+	semanticRecall(config: SemanticRecallConfig): this {
+		this.semanticRecallConfig = config;
+		return this;
+	}
+
+	/**
+	 * Enable structured working memory with a Zod schema.
+	 * Mutually exclusive with `.freeform()`.
+	 */
+	structured(schema: ZodObjectSchema): this {
+		this.workingMemorySchema = schema;
+		return this;
+	}
+
+	/**
+	 * Enable free-form working memory with a markdown/text template.
+	 * Mutually exclusive with `.structured()`.
+	 */
+	freeform(template: string): this {
+		this.workingMemoryTemplate = template;
+		return this;
+	}
+
+	/**
+	 * Set the working memory scope.
+	 *
+	 * - `'resource'` (default) — working memory is shared across all threads for the same resource/user.
+	 * - `'thread'` — working memory is scoped to a single conversation thread.
+	 */
+	scope(s: 'resource' | 'thread'): this {
+		this.workingMemoryScope = s;
+		return this;
+	}
+
+	/**
+	 * Enable automatic title generation for new threads.
+	 *
+	 * - `true` — uses the agent's own model and default instructions.
+	 * - `{ model, instructions }` — custom model and/or custom instructions.
+	 *
+	 * Titles are generated once per thread (only when the thread has no title)
+	 * and run asynchronously so they never block the agent response.
+	 */
+	titleGeneration(config: boolean | TitleGenerationConfig): this {
+		if (config === true) {
+			this.titleGenerationConfig = {};
+		} else if (config === false) {
+			this.titleGenerationConfig = undefined;
+		} else {
+			this.titleGenerationConfig = config;
+		}
+		return this;
+	}
+
+	/**
+	 * Validate configuration and produce a `MemoryConfig`.
+	 *
+	 * @throws if both `.structured()` and `.freeform()` are used
+	 * @throws if `.freeform()` template is empty
+	 * @throws if `.semanticRecall()` is used with a backend that doesn't support search()
+	 */
+	build(): MemoryConfig {
+		if (this.workingMemorySchema && this.workingMemoryTemplate !== undefined) {
+			throw new Error(
+				'Working memory cannot use both .structured() and .freeform(). ' +
+					'Choose one: .structured(zodSchema) for typed state, or .freeform(template) for free-form text.',
+			);
+		}
+
+		if (this.workingMemoryTemplate !== undefined && this.workingMemoryTemplate.trim() === '') {
+			throw new Error(
+				'Free-form working memory template cannot be empty. ' +
+					'Provide a markdown template with slots for the agent to fill.',
+			);
+		}
+
+		const memory: BuiltMemory = this.memoryBackend ?? new InMemoryMemory();
+
+		if (this.semanticRecallConfig) {
+			if (!memory.queryEmbeddings && !memory.search) {
+				throw new Error(
+					'Semantic recall requires a storage backend with queryEmbeddings() or search() support.',
+				);
+			}
+			if (!memory.search && !this.semanticRecallConfig.embedder) {
+				throw new Error(
+					'Semantic recall requires an embedder when using queryEmbeddings(). Add embedder to your semanticRecall config: ' +
+						".semanticRecall({ topK: 5, embedder: 'openai/text-embedding-3-small' })",
+				);
+			}
+		}
+
+		let workingMemory: MemoryConfig['workingMemory'];
+		if (this.workingMemorySchema) {
+			workingMemory = {
+				template: templateFromSchema(this.workingMemorySchema),
+				structured: true,
+				schema: this.workingMemorySchema,
+				scope: this.workingMemoryScope,
+			};
+		} else if (this.workingMemoryTemplate !== undefined) {
+			workingMemory = {
+				template: this.workingMemoryTemplate,
+				structured: false,
+				scope: this.workingMemoryScope,
+			};
+		}
+
+		return {
+			memory,
+			lastMessages: this.lastMessagesValue,
+			workingMemory,
+			semanticRecall: this.semanticRecallConfig,
+			titleGeneration: this.titleGenerationConfig,
+		};
+	}
+}
--- a/Show more
+++ b/Show more