feat: Add @n8n/agents package (#27560)

This commit is contained in:
yehorkardash 2026-03-26 13:32:46 +02:00 committed by GitHub
parent d3e45bc126
commit 58fbaf4a88
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
147 changed files with 24961 additions and 227 deletions

View file

@ -0,0 +1,2 @@
OPENAI_API_KEY=
ANTHROPIC_API_KEY=

View file

@ -0,0 +1,138 @@
# AGENTS.md
Conventions for the `@n8n/agents` package.
## Code Style
- **No `_` prefix on private properties** — use `private` access modifier
without underscore. Write `private name: string`, not `private _name: string`.
- **Builder pattern with lazy build** — all public primitives use a fluent
builder API. **User code never calls `.build()`**. Builders are passed
directly to the consuming method (e.g. `agent.tool(myTool)`) which calls
`.build()` internally. Agent and Network have `run()`/`stream()` directly
on the class, which lazy-build via `ensureBuilt()` on first call. `build()`
is `protected` on Agent and Network to keep it out of the public API.
- **Zod for schemas** — all input/output schemas use Zod.
## Package Structure
```
src/
index.ts # Public API barrel export
types/ # Public TypeScript types
index.ts # Re-exports consumable types
telemetry.ts
sdk/ # Types aligned with builders (agent, eval, guardrail, mcp, memory, message, provider, tool)
runtime/ # Serializable runtime shapes (events, message lists)
utils/ # JSON typing helpers re-exported with public types
sdk/ # Fluent builders and SDK entry points
agent.ts # Agent builder
catalog.ts # Provider catalog fetch
eval.ts # Evaluation primitives
evaluate.ts # Evaluation runner over agents + dataset
guardrail.ts # Guardrail builder
mcp-client.ts # MCP client integration
memory.ts # Memory builder
message.ts # LLM/DB message helpers
network.ts # Network builder
provider-tools.ts # Provider-defined tool factories
telemetry.ts # Telemetry builder (OTel, redaction)
tool.ts # Tool builder
verify.ts # Verification utilities
evals/ # Built-in eval scorers; exported as namespace `evals` from index
runtime/ # Internal — never exported from index.ts
agent-runtime.ts # Core agent execution engine (AI SDK)
tool-adapter.ts # Tool execution, branded suspend detection
stream.ts # Streaming helpers
model-factory.ts # Model instantiation
memory-store.ts # Conversation / working-memory persistence hooks
working-memory.ts # In-run working memory
message-list.ts # Message list + serialization for agent loop
messages.ts # Message normalization
mcp-connection.ts # MCP connection lifecycle
mcp-tool-resolver.ts
run-state.ts # Run / checkpoint state
event-bus.ts # Internal agent events
runtime-helpers.ts
title-generation.ts
strip-orphaned-tool-messages.ts
logger.ts
storage/ # Optional persisted memory backends (exported)
sqlite-memory.ts
postgres-memory.ts
workspace/ # Workspace, sandbox, filesystem, built-in tools (exported)
integrations/ # Optional integrations (exported where applicable)
langsmith.ts # LangSmith telemetry adapter (peer `langsmith`)
utils/ # Internal helpers (e.g. Zod utilities); not barrel-exported
examples/
basic-agent.ts # Sample snippet; included in format/lint paths
docs/
agent-runtime-architecture.md # In-package runtime notes
```
The **`index.ts`** surface also exports `Workspace` / sandbox / filesystem types,
`SqliteMemory` / `PostgresMemory`, `LangSmithTelemetry`, and `evals` alongside the
core SDK builders.
Optional **peer dependencies** (telemetry): `langsmith`, `@opentelemetry/sdk-trace-node`,
`@opentelemetry/sdk-trace-base`, `@opentelemetry/exporter-trace-otlp-http` — all
optional; install only when wiring that telemetry.
## Credential Pattern
Agents declare credential requirements via `.credential('name')`. The execution
engine resolves the name to an API key and injects it into the model config.
User code never touches raw API keys.
```typescript
const agent = new Agent('assistant')
.model('anthropic/claude-sonnet-4-5')
.credential('anthropic')
.instructions('You are helpful.');
```
## Engine Injection (EngineAgent)
The execution engine extends `Agent` and overrides `protected build()` to
inject infrastructure (checkpoint storage, credentials) before calling
`super.build()`. This is the pattern for all engine-level concerns:
```typescript
class EngineAgent extends Agent {
build() {
this.checkpoint(store);
const cred = this.declaredCredential;
if (cred) this.resolvedApiKey = resolve(cred);
return super.build();
}
}
```
## Testing
- Unit tests live in `src/__tests__/`, integration tests in `src/__tests__/integration/`
- Unit tests use Jest (`pnpm test` / `pnpm test:unit`)
- Integration tests use Vitest (`pnpm test:integration`) with real LLM calls
- A `.env` file at the package root is loaded automatically by the vitest config.
Always assume it exists when running integration tests. Never commit it.
- Required keys:
- `ANTHROPIC_API_KEY` — all integration tests
- `OPENAI_API_KEY` — semantic recall tests (embeddings)
- Tests skip automatically when the required API key is not set
- Run from the package directory: `cd packages/@n8n/agents && pnpm test`
## Documentation
- Runtime architecture notes: `docs/agent-runtime-architecture.md` (this package).
- Spec-driven work in the wider repo may use `.claude/specs/` (see repo
`.claude/skills/spec-driven-development`).
## Building
```bash
cd packages/@n8n/agents
pnpm build # rimraf dist && tsc -p tsconfig.build.json → dist/
pnpm typecheck # tsc --noEmit
pnpm test # jest (unit)
```

View file

@ -0,0 +1,451 @@
# Agent Runtime Architecture
This document describes the internal architecture of the `@n8n/agents` agent
runtime — the execution engine that drives a single agent turn from input to
final response.
---
## Overview
`AgentRuntime` (`src/runtime/agent-runtime.ts`) is the core execution engine
for a single agent turn. It uses the Vercel AI SDK directly (`generateText` /
`streamText`) and is responsible for:
- Building the LLM message context (memory history, semantic recall, working
memory in the system prompt, user input)
- Stripping orphaned tool-call/tool-result pairs before LLM calls
(`stripOrphanedToolMessages`)
- Running the agentic tool-call loop (default **20** iterations,
`MAX_LOOP_ITERATIONS`)
- **Configurable tool-call concurrency** — tools in one LLM turn run in batches
of `toolCallConcurrency` (default `1`; `Infinity` runs all executable calls
in parallel)
- Suspending and resuming runs for Human-in-the-Loop (HITL) **and** for tools
that return a branded suspend result (`suspendSchema` / `resumeSchema`)
- Persisting new messages to a memory store at the end of each completed turn,
optionally saving **embeddings** for semantic recall
- Extracting and persisting **working memory** from assistant output when
configured
- Optional **structured output** (`Output.object` + Zod), **thinking** /
reasoning provider options, **title generation**, and **telemetry** (AI SDK
`experimental_telemetry`)
- **Token usage and cost** (catalog pricing via `getModelCost` / `computeCost`)
- Emitting lifecycle events via `AgentEventBus`
- Tracking run state (`idle` → `running``success / failed / suspended / cancelled`)
There are two parallel execution paths — non-streaming (`generate`) and
streaming (`stream`) — that mirror each other in structure.
```mermaid
graph TD
A[User Input] --> B[normalizeInput]
B --> C[buildMessageList]
C --> D{generate or stream?}
D -->|generate| E[runGenerateLoop]
D -->|stream| F[startStreamLoop → runStreamLoop]
E --> G[saveToMemory]
F --> G
G --> H[Return Result]
```
---
## Public API — BuiltAgent
`Agent` implements `BuiltAgent`, which exposes the full public surface:
| Method | Description |
|--------|-------------|
| `generate(input, options?)` | Non-streaming run; returns `GenerateResult` (errors often surface as `finishReason: 'error'` and `error` instead of throwing) |
| `stream(input, options?)` | Streaming run; returns `StreamResult` with `runId` and `stream` |
| `resume(method, data, options)` | Resume a suspended tool with payload `data`; `options` must include `runId` and `toolCallId` |
| `approve(method, options)` | HITL approval — calls `resume` with `{ approved: true }` |
| `deny(method, options)` | HITL decline — calls `resume` with `{ approved: false }` |
| `on(event, handler)` | Register a lifecycle event handler |
| `abort()` | Cancel the currently running agent |
| `getState()` | Return the latest `SerializableAgentState` snapshot |
| `asTool(description)` | Wrap the agent as a `BuiltTool` for multi-agent composition |
`ExecutionOptions` includes `abortSignal?: AbortSignal`, forwarded into
`AgentEventBus.resetAbort()` so callers can cancel via an external signal as
well as `agent.abort()`.
---
## Event system
### AgentEventBus
`AgentEventBus` (`src/runtime/event-bus.ts`) is the internal publish/subscribe
channel shared between `Agent` (registers handlers via `on()`) and
`AgentRuntime` (emits events during the loop). A single bus instance is created
when the SDK wires the runtime and passed in via `AgentRuntimeConfig`.
```mermaid
flowchart LR
UserCode -->|"agent.on(event, handler)"| AgentEventBus
AgentEventBus -->|"passed via config"| AgentRuntime
AgentRuntime -->|"bus.emit(data)"| AgentEventBus
AgentEventBus -->|"calls handlers synchronously"| UserCode
```
Handlers have the signature `(data: AgentEventData) => void` — there is **no**
separate “controls” object; cancellation is done with `agent.abort()` on the
same bus that holds the `AbortController`.
`AgentMiddleware` in `types/runtime/event.ts` is a small alias type
(`on` mirrors the agent) for future middleware-style composition.
### Event types
| Event | When emitted | Payload |
|-------|----------------|---------|
| `AgentStart` | Start of `initRun`, right after `status: running`; before `ensureModelCost` / `buildMessageList` | — |
| `AgentEnd` | Successful completion after persistence / cleanup; payload is assistant-facing messages (`finalized.messages` in `generate`, `list.responseDelta()` in `stream`) | `{ messages }` |
| `TurnStart` | Top of each loop iteration, before the LLM call | — |
| `TurnEnd` | After tool calls for the iteration are processed; requires an assistant message in the new messages | `{ message, toolResults }` |
| `ToolExecutionStart` | Before `processToolCall` runs the handler | `{ toolCallId, toolName, args }` |
| `ToolExecutionEnd` | After the tool returns, errors, or is satisfied from an existing AI SDK tool-result | `{ toolCallId, toolName, result, isError }` |
| `Error` | Unhandled failures (not user **abort**); also emitted on some stream failures | `{ message, error }` |
---
## abort()
`agent.abort()` synchronously aborts the internal `AbortController`. The
resulting signal is passed to `generateText` / `streamText` as `abortSignal`
so in-flight HTTP cancels promptly. The loop also checks `bus.isAborted` at
batch boundaries.
`AgentEventBus.resetAbort(externalSignal?)` runs at the start of each run: it
replaces the controller and, if `ExecutionOptions.abortSignal` is set, forwards
that signals abort to the internal controller.
### Abort behaviour
| Mode | Behaviour on abort |
|------|-------------------|
| `generate` | Catches abort and returns `{ runId, messages, finishReason: 'error', ... }` without emitting `AgentEvent.Error` |
| `stream` | Writes `{ type: 'error', error }` then finishes / closes cleanly |
State becomes `cancelled`. `resetAbort()` supplies a fresh controller per run
so the same `Agent` instance can run again.
---
## getState()
`agent.getState()` returns a shallow copy of `SerializableAgentState`. Before
the first `generate()` / `stream()`, the `Agent` builder returns a minimal idle
state with an empty `messageList` (`messages`, `historyIds`, `inputIds`,
`responseIds` all empty).
### State machine
```mermaid
stateDiagram-v2
[*] --> idle: constructed
idle --> running: generate() / stream() / resume()
running --> success: loop completes normally
running --> failed: unhandled error
running --> suspended: tool suspends (HITL or suspend/resume)
running --> cancelled: abort() / external signal
suspended --> running: resume() / approve() / deny() loads checkpoint
```
### AgentRunState values
| Status | Meaning |
|--------|---------|
| `idle` | No run yet (or builder before first lazy build) |
| `running` | Loop in progress |
| `success` | Turn finished and checkpoint cleaned up when applicable |
| `failed` | Unrecoverable error path |
| `suspended` | Awaiting resume (checkpoint stored under `runId`) |
| `cancelled` | Aborted |
| `waiting` | Reserved |
### SerializableAgentState
Important fields (see `types/sdk/agent.ts`):
```typescript
interface SerializableAgentState {
persistence?: AgentPersistenceOptions; // threadId + resourceId when using memory
status: AgentRunState;
messageList: SerializedMessageList;
resumeData?: AgentResumeData;
pendingToolCalls: Record<string, PendingToolCall>;
finishReason?: FinishReason;
usage?: TokenUsage;
executionOptions?: PersistedExecutionOptions; // maxIterations only — persisted on suspend
}
```
`PendingToolCall` distinguishes tools already suspended (`suspended: true`,
`suspendPayload`, `resumeSchema`) from calls not yet executed (`suspended:
false`) when a batch stops at the first suspension.
---
## asTool()
`agent.asTool(description)` wraps the agent as a `BuiltTool`. The handler calls
`agent.generate(input, { telemetry: ctx.parentTelemetry })`, collects assistant
text, and returns `{ result: string }`. When the sub-run produces usage,
results are wrapped so the parent runtime can merge **`SubAgentUsage`** and
**`totalCost`** into the parent `GenerateResult` / stream `finish` chunk.
---
## Message types
| Type | Definition | Purpose |
|------|------------|---------|
| `AgentMessage` | `Message \| CustomMessage` | Internal representation; custom messages are UI-facing |
| `ModelMessage` (AI SDK) | Roles wired to the provider | LLM-facing; custom messages never appear here |
**Custom messages** are stripped for the model via `filterLlmMessages()` before
`toAiMessages()`.
`messages.ts` provides `toAiMessages`, `fromAiMessages`, and consumers rely on
`filterLlmMessages` from `sdk/message.ts`.
**Tool results vs model:** optional `BuiltTool.toModelOutput` maps the stored /
event result before building the `tool-result` the LLM sees; `toMessage` still
uses the raw result for custom DB messages.
---
## AgentMessageList
`AgentMessageList` (`src/runtime/message-list.ts`) is the central structure for
one turn. It keeps a single append-only array and **three Sets** for
provenance: history, input, response.
### Sources
| Source | Added by | `turnDelta()` | `responseDelta()` | `forLlm()` |
|--------|----------|---------------|-------------------|------------|
| **history** | `addHistory()` | No | No | Yes (after filters) |
| **input** | `addInput()` | Yes | No | Yes (after filters) |
| **response** | `addResponse()` | Yes | Yes | Yes (after filters) |
### Key methods
```
forLlm(baseInstructions, instructionProviderOptions?)
→ [system + working memory block, ...toAiMessages(filterLlm(stripOrphaned(all)))]
turnDelta() → input response messages (memory persistence)
responseDelta() → response only (user-facing / GenerateResult.messages)
serialize() → { messages, historyIds, inputIds, responseIds }
deserialize() → restores all three sets via stable message ids
```
### Serialization
Serialized state stores **message id arrays** per set (`historyIds`,
`inputIds`, `responseIds`), not a single `historyCount`. After a round-trip,
history / input / response classification is fully restored — required for
correct `turnDelta()` after suspend/resume.
`stripOrphanedToolMessages` runs on loaded history and inside `forLlm()` so
incomplete tool pairs do not reach the model.
---
## Agentic loop
Both `runGenerateLoop` and `runStreamLoop` follow the same high-level pattern:
emit `TurnStart`, call the model with `list.forLlm(...)`, append assistant /
tool traffic via `addResponse`, process tool calls through
`iterateToolCallsConcurrent` (batched by `toolCallConcurrency`), handle
suspension / persistence, repeat until finish or max iterations.
### Tool execution and concurrency
- Executable tool calls (nonprovider-executed) are processed in windows of size
`this.concurrency` (`toolCallConcurrency ?? 1`).
- Each window uses `Promise.allSettled` so all tools in the batch settle; a
suspension in the batch stops **subsequent** batches and records remaining
calls in `pending` without `suspendPayload`.
- **HITL** and **suspend/resume** flows share the same pending-map machinery;
`processToolCall` validates JSON Schema or Zod **input** schemas (Ajv / Zod)
before invoking the handler.
### Loop invariants
1. **Single list**`addResponse` accumulates assistant, tool, and custom
messages for the turn.
2. **System prompt** — rebuilt each call via `forLlm`; working memory content
is injected there, not as separate list rows.
3. **Suspension preserves pending calls** — remaining calls in the batch and
later calls are recorded for resume.
4. **Max iterations** — default 20 (`MAX_LOOP_ITERATIONS`).
5. **Abort** — checked between batches; signal passed into AI SDK calls.
### Non-streaming vs streaming
| Aspect | `runGenerateLoop` | `runStreamLoop` |
|--------|-------------------|-----------------|
| AI SDK | `generateText()` | `streamText()` |
| Output | `GenerateResult` | `StreamChunk`s via `WritableStream` |
| Errors | Returned on `GenerateResult` (`error`, `finishReason: 'error'`) for many paths | Error chunks + `closeStreamWithError` |
| Suspension | `pendingSuspend` array on `GenerateResult` | `tool-call-suspended` chunks, then `finish` |
---
## HITL and suspend/resume
**HITL (approval):** tools can require approval (`requiresApproval` /
`needsApprovalFn`). The runtime treats approval outcomes like resume data:
`approve()` / `deny()` delegate to `resume()` with `{ approved: true | false }`.
**Programmatic suspend:** tools can return a branded suspend object; the runtime
requires `resumeSchema` (Zod → JSON Schema for clients) and validates
`suspendPayload` when `suspendSchema` is set.
```mermaid
sequenceDiagram
participant Caller
participant AgentRuntime
participant CheckpointStore
participant LLM
Caller->>AgentRuntime: generate/stream(input)
AgentRuntime->>LLM: generateText/streamText
LLM-->>AgentRuntime: tool calls
Note over AgentRuntime: Suspension: persist pendingToolCalls + messageList
AgentRuntime->>CheckpointStore: suspend(runId, state)
AgentRuntime-->>Caller: pendingSuspend / tool-call-suspended chunks
Caller->>AgentRuntime: resume/approve/deny(method, …)
AgentRuntime->>CheckpointStore: resume(runId) — load only
AgentRuntime->>AgentRuntime: processToolCall / iteratePendingToolCallsConcurrent
AgentRuntime->>LLM: Continue loop if needed
AgentRuntime->>CheckpointStore: complete(runId) when finished
```
With **concurrency > 1**, multiple tools may suspend in the same turn; the
stream can emit **multiple** `tool-call-suspended` chunks, and `GenerateResult`
can carry **`pendingSuspend`** with multiple entries.
---
## RunStateManager
`RunStateManager` (`src/runtime/run-state.ts`) persists suspended runs through
a **`CheckpointStore`**:
- Default: in-memory `MemoryCheckpointStore` when `checkpointStorage` is
`'memory'` or omitted.
- Custom: pass a `CheckpointStore` implementation for durability.
`suspend(runId, state)` writes the state. `resume(runId)` **loads** the state
and returns it with `status: 'running'`; it does **not** delete the key.
`complete(runId)` deletes the checkpoint when the run finishes without remaining
suspensions.
### Known limitations
In-memory checkpoints grow until `complete()` runs. Production stores should
implement TTL or eviction as needed.
---
## Memory persistence
At end of turn, `saveToMemory()` uses `list.turnDelta()` and
`saveMessagesToThread`. If **semantic recall** is configured with an embedder
and `memory.saveEmbeddings`, new messages are embedded and stored.
**Working memory:** when configured, the runtime parses `<working_memory>`
`</working_memory>` regions from assistant text, validates structured JSON if a
schema exists, strips the tags from the visible message, and asynchronously
persists via `memory.saveWorkingMemory`.
**Thread titles:** `titleGeneration` triggers `generateThreadTitle` (fire-and-forget)
after a successful save when persistence and memory are present.
---
## Stream architecture
The streaming path uses a `TransformStream`: `startStreamLoop` returns the
readable side immediately; the loop writes chunks in the background.
`stream.ts` **`convertChunk`** maps AI SDK v6 `TextStreamPart` values to our
`StreamChunk` union (including `finish-step` / `finish` consolidation).
### StreamChunk types (representative)
| Type | Content |
|------|---------|
| `text-delta` | Incremental text |
| `reasoning-delta` | Thinking / reasoning text |
| `tool-call-delta` | Streaming tool name / arguments |
| `message` | Full assistant or tool message |
| `tool-call-suspended` | Suspension: `runId`, `toolCallId`, tool metadata, optional `resumeSchema`, `suspendPayload` |
| `finish` | `finishReason`, `usage` (with optional **cost**), `model`, optional **`structuredOutput`**, **`subAgentUsage`**, **`totalCost`** |
| `error` | Failure or abort |
---
## File map
```
src/
runtime/
agent-runtime.ts — AgentRuntime (generate/stream/resume loops, HITL, state)
event-bus.ts — AgentEventBus + AbortController
message-list.ts — AgentMessageList
run-state.ts — RunStateManager, generateRunId
memory-store.ts — saveMessagesToThread helper
messages.ts — AI SDK message conversion
model-factory.ts — createModel / createEmbeddingModel
tool-adapter.ts — buildToolMap, executeTool, toAiSdkTools, suspend / agent-result guards
stream.ts — convertChunk, toTokenUsage
runtime-helpers.ts — normalizeInput, usage merge, stream error helpers, …
working-memory.ts — instruction text, parse/filter for working_memory tags
strip-orphaned-tool-messages.ts
title-generation.ts
logger.ts
types/
sdk/agent.ts — BuiltAgent, GenerateResult, StreamChunk, SerializableAgentState, …
sdk/tool.ts, sdk/memory.ts, … — Public SDK contracts
runtime/event.ts — AgentEvent enum + AgentEventData
runtime/message-list.ts — SerializedMessageList
telemetry.ts — BuiltTelemetry shape
```
---
## Design decisions (selected)
### Set-based message list + id serialization
Three Sets plus stable **`id` on each message** allow `turnDelta()` /
`responseDelta()` without losing custom tool messages, and checkpointed runs
restore history vs turn data correctly after resume.
### `responseDelta()` vs `turnDelta()`
User input must not appear in `GenerateResult.messages`; memory persistence
must store the full turn including input — hence two views over the same list.
### Concurrency preserves suspension semantics
Batches run in parallel when configured, but the first suspension still
captures **unexecuted** tool calls in `pending` so nothing is dropped. Approval
tools and programmatic suspends use the same pending-map format.
### Why one event bus per agent
The bus is shared between `Agent` and `AgentRuntime` so `on()` registrations and
`abort()` always target the controller used by the active loop.
### Why `AbortSignal`
Signals cancel HTTP immediately in the AI SDK and compose with caller-provided
`abortSignal` via `resetAbort`.

View file

@ -0,0 +1,23 @@
import { defineConfig } from 'eslint/config';
import { nodeConfig } from '@n8n/eslint-config/node';
export default defineConfig(
{ ignores: ['examples/**', 'vitest.integration.config.*', 'src/__tests__/fixtures/**'] },
nodeConfig,
{
rules: {
'unicorn/filename-case': ['error', { case: 'kebabCase' }],
'@typescript-eslint/naming-convention': ['error', {
'selector': 'enumMember',
'format': ['UPPER_CASE', 'PascalCase'],
}]
},
},
{
files: ['src/__tests__/integration/**/*.ts'],
rules: {
'@typescript-eslint/require-await': 'off',
'n8n-local-rules/no-uncaught-json-parse': 'off',
},
},
);

View file

@ -0,0 +1,153 @@
/**
* @n8n/agents Full API Demonstration
*
* This example demonstrates the complete builder-pattern API for creating
* and running AI agents. It shows: tools, agents, memory, guardrails,
* scorers, multi-agent patterns (agent-as-tool), and tool interrupts.
*
* To run with real LLM calls, set ANTHROPIC_API_KEY.
* Without keys, the runtime will throw on actual LLM calls.
*/
import { z } from 'zod';
import { Agent, Guardrail, Memory, Tool } from '../src';
// ---------------------------------------------------------------------------
// Tools
// ---------------------------------------------------------------------------
const searchTool = new Tool('web-search')
.description('Search the web for information on a topic')
.input(
z.object({
query: z.string().describe('The search query'),
maxResults: z.number().default(3).describe('Maximum results to return'),
}),
)
.output(
z.object({
results: z.array(
z.object({
title: z.string(),
snippet: z.string(),
}),
),
}),
)
.handler(async ({ query, maxResults }) => ({
results: Array.from({ length: maxResults }, (_, i) => ({
title: `Result ${i + 1} for "${query}"`,
snippet: `This is a mock search result about ${query}.`,
})),
}));
const writeFileTool = new Tool('write-file')
.description('Write content to a file (suspends for confirmation)')
.input(
z.object({
path: z.string().describe('File path to write to'),
content: z.string().describe('Content to write'),
}),
)
.suspend(z.object({ message: z.string(), severity: z.string() }))
.resume(z.object({ approved: z.boolean() }))
.handler(async ({ path, content }, ctx) => {
if (!ctx.resumeData) {
return await ctx.suspend({ message: `Write to "${path}"?`, severity: 'warning' });
}
if (!ctx.resumeData.approved) return { written: false };
console.log(` [Mock] Would write ${content.length} chars to ${path}`);
return { written: true };
});
// ---------------------------------------------------------------------------
// Memory
// ---------------------------------------------------------------------------
const memory = new Memory()
.lastMessages(20)
.semanticRecall({ topK: 4, messageRange: { before: 1, after: 1 } });
// ---------------------------------------------------------------------------
// Agents
// ---------------------------------------------------------------------------
const researcher = new Agent('researcher')
.model('anthropic/claude-sonnet-4')
.instructions(
'You are a research assistant. Search for information and return structured findings.',
)
.tool(searchTool)
.memory(memory)
.inputGuardrail(
new Guardrail('injection-detector').type('prompt-injection').strategy('block').threshold(0.8),
);
const writer = new Agent('writer')
.model('anthropic/claude-sonnet-4')
.instructions('You write clear, engaging content based on research provided to you.')
.tool(writeFileTool)
.checkpoint('memory');
// ---------------------------------------------------------------------------
// Multi-Agent: Agent as Tool
// ---------------------------------------------------------------------------
const orchestrator = new Agent('orchestrator')
.model('anthropic/claude-sonnet-4')
.instructions(
'You coordinate research and writing. Delegate research to the researcher and writing to the writer.',
)
.tool(researcher.asTool('Delegate research tasks to the research specialist'))
.tool(writer.asTool('Delegate writing tasks to the content writer'));
// ---------------------------------------------------------------------------
// Execution
// ---------------------------------------------------------------------------
async function main() {
console.log('=== @n8n/agents ===\n');
// --- 1. Single agent generate ---
console.log('1. Single agent generate:');
try {
const result = await researcher.generate('Find information about RAG architectures', {
persistence: {
resourceId: 'user-123',
threadId: 'session-1',
},
});
const text = result.messages
.flatMap((m) => ('content' in m ? m.content : []))
.filter((c) => c.type === 'text')
.map((c) => ('text' in c ? c.text : ''))
.join('');
console.log(` Result: ${text.slice(0, 100)}...`);
console.log(
` Usage: ${result.usage?.promptTokens} in, ${result.usage?.completionTokens} out`,
);
} catch (error) {
console.log(` (Expected) Error: ${(error as Error).message}`);
console.log(' (Set ANTHROPIC_API_KEY to run with real LLM calls)');
}
// --- 2. Orchestrator (agent-as-tool pattern) ---
console.log('\n2. Orchestrator (agent-as-tool pattern):');
try {
const orchResult = await orchestrator.generate(
'Research RAG architectures and write a summary',
);
const text = orchResult.messages
.flatMap((m) => ('content' in m ? m.content : []))
.filter((c) => c.type === 'text')
.map((c) => ('text' in c ? c.text : ''))
.join('');
console.log(` Result: ${text.slice(0, 100)}...`);
} catch (error) {
console.log(` (Expected) Error: ${(error as Error).message}`);
}
console.log('\n=== Complete ===');
}
main().catch(console.error);

View file

@ -0,0 +1,7 @@
/** @type {import('jest').Config} */
const base = require('../../../jest.config');
module.exports = {
...base,
testPathIgnorePatterns: [...(base.testPathIgnorePatterns || []), '/integration/'],
};

View file

@ -0,0 +1,65 @@
{
"name": "@n8n/agents",
"version": "0.1.0",
"description": "AI agent SDK for n8n's code-first execution engine",
"main": "dist/index.js",
"module": "dist/index.js",
"types": "dist/index.d.ts",
"files": [
"dist/**/*"
],
"scripts": {
"clean": "rimraf dist .turbo",
"dev": "pnpm watch",
"typecheck": "tsc --noEmit",
"build": "rimraf dist && tsc -p tsconfig.build.json",
"format": "biome format --write src examples",
"format:check": "biome ci src examples",
"lint": "eslint . --quiet",
"lint:fix": "eslint . --fix",
"watch": "tsc -p tsconfig.build.json --watch",
"test": "jest",
"test:unit": "jest",
"test:dev": "jest --watch",
"test:integration": "vitest run --config vitest.integration.config.mjs"
},
"dependencies": {
"@ai-sdk/anthropic": "^3.0.58",
"@ai-sdk/google": "^3.0.43",
"@ai-sdk/openai": "^3.0.41",
"@ai-sdk/xai": "^3.0.67",
"@ai-sdk/provider-utils": "^4.0.21",
"@modelcontextprotocol/sdk": "1.26.0",
"ajv": "^8.18.0",
"@libsql/client": "^0.17.0",
"ai": "^6.0.116",
"pg": "catalog:",
"zod": "catalog:"
},
"peerDependencies": {
"langsmith": ">=0.3.0",
"@opentelemetry/sdk-trace-node": ">=1.0.0",
"@opentelemetry/sdk-trace-base": ">=1.0.0",
"@opentelemetry/exporter-trace-otlp-http": ">=0.50.0"
},
"peerDependenciesMeta": {
"langsmith": {
"optional": true
},
"@opentelemetry/sdk-trace-node": {
"optional": true
},
"@opentelemetry/sdk-trace-base": {
"optional": true
},
"@opentelemetry/exporter-trace-otlp-http": {
"optional": true
}
},
"devDependencies": {
"@n8n/typescript-config": "workspace:*",
"@types/json-schema": "^7.0.15",
"@types/pg": "^8.15.6",
"testcontainers": "11.11.0"
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,51 @@
import { AgentEventBus } from '../runtime/event-bus';
describe('AgentEventBus', () => {
describe('resetAbort', () => {
it('should create a fresh signal on reset', () => {
const bus = new AgentEventBus();
bus.resetAbort();
expect(bus.isAborted).toBe(false);
expect(bus.signal.aborted).toBe(false);
});
it('should respect agent.abort()', () => {
const bus = new AgentEventBus();
bus.resetAbort();
bus.abort();
expect(bus.isAborted).toBe(true);
expect(bus.signal.aborted).toBe(true);
});
it('should respect external abort signal', () => {
const bus = new AgentEventBus();
const external = new AbortController();
bus.resetAbort(external.signal);
expect(bus.isAborted).toBe(false);
external.abort();
expect(bus.isAborted).toBe(true);
expect(bus.signal.aborted).toBe(true);
});
it('should abort when either internal or external signal fires', () => {
const bus = new AgentEventBus();
const external = new AbortController();
bus.resetAbort(external.signal);
bus.abort();
expect(bus.isAborted).toBe(true);
expect(external.signal.aborted).toBe(false);
});
it('should allow reuse after reset', () => {
const bus = new AgentEventBus();
bus.resetAbort();
bus.abort();
expect(bus.isAborted).toBe(true);
bus.resetAbort();
expect(bus.isAborted).toBe(false);
});
});
});

View file

@ -0,0 +1,82 @@
/**
* Minimal MCP server for stdio transport integration tests.
* Spawned as a child process by mcp-stdio-transport.test.ts.
* Run with: node mcp-stdio-server.mjs
*/
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
import { ListToolsRequestSchema, CallToolRequestSchema } from '@modelcontextprotocol/sdk/types.js';
// 1×1 transparent PNG in base64 (smallest valid PNG)
const TINY_PNG =
'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==';
const server = new Server(
{ name: 'test-stdio-server', version: '1.0.0' },
{ capabilities: { tools: {} } },
);
server.setRequestHandler(ListToolsRequestSchema, async () => ({
tools: [
{
name: 'echo',
description: 'Echo the message back as-is',
inputSchema: {
type: 'object',
properties: { message: { type: 'string', description: 'Message to echo' } },
required: ['message'],
},
},
{
name: 'add',
description: 'Add two numbers together',
inputSchema: {
type: 'object',
properties: {
a: { type: 'number', description: 'First number' },
b: { type: 'number', description: 'Second number' },
},
required: ['a', 'b'],
},
},
{
name: 'image',
description: 'Return a small image with a caption',
inputSchema: {
type: 'object',
properties: { caption: { type: 'string', description: 'Image caption' } },
required: ['caption'],
},
},
],
}));
server.setRequestHandler(CallToolRequestSchema, async (request) => {
const { name, arguments: args = {} } = request.params;
if (name === 'echo') {
return { content: [{ type: 'text', text: String(args.message ?? '') }] };
}
if (name === 'add') {
const sum = Number(args.a ?? 0) + Number(args.b ?? 0);
return { content: [{ type: 'text', text: String(sum) }] };
}
if (name === 'image') {
return {
content: [
{ type: 'text', text: String(args.caption ?? '') },
{ type: 'image', data: TINY_PNG, mimeType: 'image/png' },
],
};
}
return {
isError: true,
content: [{ type: 'text', text: `Unknown tool: ${name}` }],
};
});
const transport = new StdioServerTransport();
await server.connect(transport);

View file

@ -0,0 +1,57 @@
import { InMemoryMemory } from '../runtime/memory-store';
describe('InMemoryMemory working memory', () => {
it('returns null for unknown key', async () => {
const mem = new InMemoryMemory();
expect(await mem.getWorkingMemory({ threadId: 'thread-x', resourceId: 'unknown' })).toBeNull();
});
it('saves and retrieves working memory keyed by resourceId', async () => {
const mem = new InMemoryMemory();
await mem.saveWorkingMemory(
{ threadId: 'thread-1', resourceId: 'user-1' },
'# Context\n- Name: Alice',
);
expect(await mem.getWorkingMemory({ threadId: 'thread-1', resourceId: 'user-1' })).toBe(
'# Context\n- Name: Alice',
);
});
it('overwrites on subsequent save', async () => {
const mem = new InMemoryMemory();
await mem.saveWorkingMemory({ threadId: 'thread-1', resourceId: 'user-1' }, 'v1');
await mem.saveWorkingMemory({ threadId: 'thread-1', resourceId: 'user-1' }, 'v2');
expect(await mem.getWorkingMemory({ threadId: 'thread-1', resourceId: 'user-1' })).toBe('v2');
});
it('isolates by resourceId (resource scope)', async () => {
const mem = new InMemoryMemory();
await mem.saveWorkingMemory({ threadId: 'thread-a', resourceId: 'user-1' }, 'Alice data');
await mem.saveWorkingMemory({ threadId: 'thread-b', resourceId: 'user-2' }, 'Bob data');
expect(await mem.getWorkingMemory({ threadId: 'thread-a', resourceId: 'user-1' })).toBe(
'Alice data',
);
expect(await mem.getWorkingMemory({ threadId: 'thread-b', resourceId: 'user-2' })).toBe(
'Bob data',
);
});
it('returns null for unknown threadId (thread scope)', async () => {
const mem = new InMemoryMemory();
expect(await mem.getWorkingMemory({ threadId: 'unknown' })).toBeNull();
});
it('saves and retrieves working memory keyed by threadId', async () => {
const mem = new InMemoryMemory();
await mem.saveWorkingMemory({ threadId: 'thread-1' }, '# Thread Notes');
expect(await mem.getWorkingMemory({ threadId: 'thread-1' })).toBe('# Thread Notes');
});
it('isolates by threadId (thread scope)', async () => {
const mem = new InMemoryMemory();
await mem.saveWorkingMemory({ threadId: 'thread-1' }, 'data for thread 1');
await mem.saveWorkingMemory({ threadId: 'thread-2' }, 'data for thread 2');
expect(await mem.getWorkingMemory({ threadId: 'thread-1' })).toBe('data for thread 1');
expect(await mem.getWorkingMemory({ threadId: 'thread-2' })).toBe('data for thread 2');
});
});

View file

@ -0,0 +1,134 @@
import { expect, it } from 'vitest';
import {
describeIf,
collectStreamChunks,
chunksOfType,
createAgentWithBatchedInterruptibleCalls,
createAgentWithBatchedNormalCalls,
} from './helpers';
import type { StreamChunk } from '../../index';
const describe = describeIf('anthropic');
describe('batched tool execution integration', () => {
it('normal tools with bounded concurrency complete without errors (generate)', async () => {
const agent = createAgentWithBatchedNormalCalls('anthropic', 2);
const result = await agent.generate(
'Check if these three files exist: /home/a.txt, /home/b.txt, /home/c.txt. You MUST call check_file for each file using parallel tool calls in the same turn.',
);
expect(result.finishReason).toBe('stop');
expect(result.pendingSuspend).toBeUndefined();
expect(result.toolCalls).toBeDefined();
expect(result.toolCalls!.length).toBeGreaterThanOrEqual(3);
for (const tc of result.toolCalls!) {
expect(tc.tool).toBe('check_file');
expect(tc.output).toEqual(expect.objectContaining({ exists: true }));
}
});
it('normal tools with bounded concurrency complete without errors (stream)', async () => {
const agent = createAgentWithBatchedNormalCalls('anthropic', 2);
const { stream: fullStream } = await agent.stream(
'Check if these three files exist: /home/a.txt, /home/b.txt, /home/c.txt. You MUST call check_file for each file using parallel tool calls in the same turn.',
);
const chunks = await collectStreamChunks(fullStream);
const errorChunks = chunks.filter((c) => c.type === 'error');
expect(errorChunks).toHaveLength(0);
const finishChunks = chunksOfType(chunks, 'finish');
expect(finishChunks.length).toBe(1);
expect(finishChunks[0].finishReason).toBe('stop');
expect(chunks.filter((c) => c.type === 'tool-call-suspended')).toHaveLength(0);
});
it('bounded concurrency suspends first batch and saves unexecuted tools, then resumes all (generate)', async () => {
const agent = createAgentWithBatchedInterruptibleCalls('anthropic', 2);
const first = await agent.generate(
'Delete these three files: /tmp/a.txt, /tmp/b.txt, /tmp/c.txt. You MUST call delete_file for each file using parallel tool calls in the same turn. After deleting, confirm success.',
);
expect(first.finishReason).toBe('tool-calls');
expect(first.pendingSuspend).toBeDefined();
// With concurrency=2 and 3 tools: batch 1 runs 2 tools (both suspend),
// batch 2 (1 tool) is skipped. So we get 2 suspended + 1 unexecuted.
expect(first.pendingSuspend!.length).toBe(2);
// Resume each suspension one at a time until the LLM loop continues.
// The unexecuted tools from later batches should run during resume
// and suspend in turn, so we expect multiple resume cycles.
let result = first;
let resumeCount = 0;
while (result.pendingSuspend && result.pendingSuspend.length > 0) {
const { runId, toolCallId } = result.pendingSuspend[0];
result = await agent.resume('generate', { approved: true }, { runId, toolCallId });
resumeCount++;
if (resumeCount > 10) {
throw new Error('Too many resume cycles — likely an infinite loop');
}
}
// All tools should eventually be resolved
expect(result.finishReason).toBe('stop');
expect(result.pendingSuspend).toBeUndefined();
expect(resumeCount).toBeGreaterThanOrEqual(2);
});
it('bounded concurrency suspends first batch and saves unexecuted tools, then resumes all (stream)', async () => {
const agent = createAgentWithBatchedInterruptibleCalls('anthropic', 2);
const { stream: fullStream } = await agent.stream(
'Delete these three files: /tmp/a.txt, /tmp/b.txt, /tmp/c.txt. You MUST call delete_file for each file using parallel tool calls in the same turn. After deleting, tell me if you succeeded.',
);
const chunks = await collectStreamChunks(fullStream);
let pendingSuspensions = chunksOfType(chunks, 'tool-call-suspended') as Array<
StreamChunk & { type: 'tool-call-suspended' }
>;
expect(pendingSuspensions.length).toBe(2);
let resumeCount = 0;
while (pendingSuspensions.length > 0) {
const next = pendingSuspensions[0];
const resumedStream = await agent.resume(
'stream',
{ approved: true },
{ runId: next.runId!, toolCallId: next.toolCallId! },
);
const resumedChunks = await collectStreamChunks(resumedStream.stream);
pendingSuspensions = chunksOfType(resumedChunks, 'tool-call-suspended') as Array<
StreamChunk & { type: 'tool-call-suspended' }
>;
resumeCount++;
if (pendingSuspensions.length === 0) {
const errorChunks = resumedChunks.filter((c) => c.type === 'error');
expect(errorChunks).toHaveLength(0);
const finishChunks = chunksOfType(resumedChunks, 'finish');
expect(finishChunks.length).toBeGreaterThan(0);
expect(finishChunks[0].finishReason).not.toBe('error');
}
if (resumeCount > 10) {
throw new Error('Too many resume cycles — likely an infinite loop');
}
}
expect(resumeCount).toBeGreaterThanOrEqual(2);
});
});

View file

@ -0,0 +1,213 @@
import { expect, it } from 'vitest';
import {
describeIf,
collectStreamChunks,
chunksOfType,
createAgentWithConcurrentInterruptibleCalls,
createAgentWithConcurrentMixedTools,
collectTextDeltas,
} from './helpers';
import { isLlmMessage, type StreamChunk } from '../../index';
const describe = describeIf('anthropic');
describe('concurrent tool execution integration', () => {
it('suspends all interruptible tool calls concurrently and returns them as an array (generate)', async () => {
const agent = createAgentWithConcurrentInterruptibleCalls('anthropic');
const result = await agent.generate(
'Delete these two files: /tmp/a.txt and /tmp/b.txt. You MUST call delete_file for each file in a single turn using parallel tool calls.',
);
expect(result.finishReason).toBe('tool-calls');
expect(result.pendingSuspend).toBeDefined();
// With concurrent execution, ALL interruptible tool calls suspend at once
expect(result.pendingSuspend!.length).toBeGreaterThanOrEqual(2);
const toolNames = result.pendingSuspend!.map((s) => s.toolName);
expect(toolNames.every((n) => n === 'delete_file')).toBe(true);
// All entries share the same runId
const runIds = new Set(result.pendingSuspend!.map((s) => s.runId));
expect(runIds.size).toBe(1);
// Each entry has a unique toolCallId and a suspendPayload
const toolCallIds = result.pendingSuspend!.map((s) => s.toolCallId);
expect(new Set(toolCallIds).size).toBe(result.pendingSuspend!.length);
for (const s of result.pendingSuspend!) {
expect(s.suspendPayload).toEqual(
// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
expect.objectContaining({ message: expect.any(String), severity: 'destructive' }),
);
}
});
it('suspends all interruptible tool calls concurrently and emits multiple chunks (stream)', async () => {
const agent = createAgentWithConcurrentInterruptibleCalls('anthropic');
const { stream: fullStream } = await agent.stream(
'Delete these two files: /tmp/a.txt and /tmp/b.txt. You MUST call delete_file for each file in a single turn using parallel tool calls.',
);
const chunks = await collectStreamChunks(fullStream);
const suspendedChunks = chunksOfType(chunks, 'tool-call-suspended');
// With concurrent execution, ALL suspensions are emitted before finish
expect(suspendedChunks.length).toBeGreaterThanOrEqual(2);
// Each suspended chunk has a unique toolCallId
const toolCallIds = suspendedChunks.map((c) => c.toolCallId);
expect(new Set(toolCallIds).size).toBe(suspendedChunks.length);
// All share the same runId
const runIds = new Set(suspendedChunks.map((c) => c.runId));
expect(runIds.size).toBe(1);
// A single finish chunk follows the suspended chunks
const finishChunks = chunksOfType(chunks, 'finish');
expect(finishChunks.length).toBe(1);
expect(finishChunks[0].finishReason).toBe('tool-calls');
});
it('resume resolves one tool at a time, carrying forward the rest (generate)', async () => {
const agent = createAgentWithConcurrentInterruptibleCalls('anthropic');
const first = await agent.generate(
'Delete these two files: /tmp/a.txt and /tmp/b.txt. You MUST call delete_file for each file in a single turn using parallel tool calls.',
);
expect(first.pendingSuspend!.length).toBeGreaterThanOrEqual(2);
const { runId } = first.pendingSuspend![0];
const firstToolCallId = first.pendingSuspend![0].toolCallId;
// Resume the first tool
const second = await agent.resume(
'generate',
{ approved: true },
{ runId, toolCallId: firstToolCallId },
);
// The remaining tool(s) should still be pending
expect(second.pendingSuspend).toBeDefined();
expect(second.pendingSuspend!.length).toBe(first.pendingSuspend!.length - 1);
// The resumed tool should NOT be in the remaining list
const remainingIds = second.pendingSuspend!.map((s) => s.toolCallId);
expect(remainingIds).not.toContain(firstToolCallId);
});
it('resumes all suspended tools one by one until the LLM loop continues (stream)', async () => {
const agent = createAgentWithConcurrentInterruptibleCalls('anthropic');
const { stream: fullStream } = await agent.stream(
'Delete these two files: /tmp/a.txt and /tmp/b.txt. You MUST call delete_file for each file in a single turn using parallel tool calls. After deleting all files, tell me if you succeeded.',
);
const chunks = await collectStreamChunks(fullStream);
const suspendedChunks = chunksOfType(chunks, 'tool-call-suspended');
expect(suspendedChunks.length).toBeGreaterThanOrEqual(2);
// Resume each one until no suspensions remain
let pendingSuspensions = suspendedChunks as Array<
StreamChunk & { type: 'tool-call-suspended' }
>;
while (pendingSuspensions.length > 0) {
const next = pendingSuspensions[0];
const resumedStream = await agent.resume(
'stream',
{ approved: true },
{ runId: next.runId!, toolCallId: next.toolCallId! },
);
const resumedChunks = await collectStreamChunks(resumedStream.stream);
pendingSuspensions = chunksOfType(resumedChunks, 'tool-call-suspended');
// If there are no more suspensions, the LLM should have produced text
if (pendingSuspensions.length === 0) {
const errorChunks = resumedChunks.filter((c) => c.type === 'error');
expect(errorChunks).toHaveLength(0);
const finishChunks = chunksOfType(resumedChunks, 'finish');
expect(finishChunks.length).toBeGreaterThan(0);
expect(finishChunks[0].finishReason).not.toBe('error');
}
}
});
it('auto-executes non-interruptible tools concurrently while suspending interruptible ones', async () => {
const agent = createAgentWithConcurrentMixedTools('anthropic');
const { stream: fullStream } = await agent.stream(
'You must call both tools in parallel: call list_files with dir="/home" AND call delete_file with path="/home/readme.md". Do not skip either tool.',
);
const chunks = await collectStreamChunks(fullStream);
// list_files should auto-execute — its result should appear as a message chunk
const toolResultChunks = chunks.filter(
(c) =>
c.type === 'message' &&
isLlmMessage(c.message) &&
c.message.content.some((p) => p.type === 'tool-result'),
);
// delete_file should be suspended
const suspendedChunks = chunksOfType(chunks, 'tool-call-suspended');
const deleteSuspended = suspendedChunks.find((c) => c.toolName === 'delete_file');
expect(deleteSuspended).toBeDefined();
expect(toolResultChunks.length).toBeGreaterThan(0);
// If the LLM issued both tool calls in parallel:
if (deleteSuspended && toolResultChunks.length > 0) {
expect(deleteSuspended.toolName).toBe('delete_file');
expect(deleteSuspended.suspendPayload).toEqual(
// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
expect.objectContaining({ message: expect.any(String) }),
);
// list_files result should be present even though delete_file suspended
const listResult = toolResultChunks.find(
(c) =>
c.type === 'message' &&
isLlmMessage(c.message) &&
c.message.content.some((p) => p.type === 'tool-result' && p.toolName === 'list_files'),
);
expect(listResult).toBeDefined();
}
});
it('generate: resumes all tools and receives a final text response', async () => {
const agent = createAgentWithConcurrentInterruptibleCalls('anthropic');
let result = await agent.generate(
'Delete these two files: /tmp/a.txt and /tmp/b.txt. You MUST call delete_file for each file in a single turn using parallel tool calls. After deleting, confirm success.',
);
// Iterate through all pending suspensions
while (result.pendingSuspend && result.pendingSuspend.length > 0) {
const { runId, toolCallId } = result.pendingSuspend[0];
result = await agent.resume('generate', { approved: true }, { runId, toolCallId });
}
// After all tools resumed, the agent should complete with a text response
expect(result.finishReason).toBe('stop');
expect(result.pendingSuspend).toBeUndefined();
const text = collectTextDeltas(
result.messages
.filter((m) => 'role' in m && m.role === 'assistant')
.flatMap((m) =>
'content' in m
? m.content
.filter((c) => c.type === 'text')
.map((c) => ({ type: 'text-delta' as const, delta: c.text }))
: [],
),
);
expect(text.length).toBeGreaterThan(0);
});
});

View file

@ -0,0 +1,92 @@
import { expect, it } from 'vitest';
import { z } from 'zod';
import { describeIf, getModel } from './helpers';
import { Agent, Memory, Tool } from '../../index';
const describe = describeIf('anthropic');
describe('custom message survives suspend/resume', () => {
it('preserves custom tool message in stream after resume + complete', async () => {
const memory = new Memory().storage('memory').lastMessages(20);
const deleteTool = new Tool('delete_file')
.description('Delete a file at the given path')
.input(
z.object({
path: z.string().describe('File path to delete'),
}),
)
.output(z.object({ deleted: z.boolean(), path: z.string() }))
.suspend(z.object({ message: z.string() }))
.resume(z.object({ approved: z.boolean() }))
.handler(async ({ path }, ctx) => {
if (!ctx.resumeData) {
return await ctx.suspend({ message: `Delete "${path}"?` });
}
if (!ctx.resumeData.approved) return { deleted: false, path };
return { deleted: true, path };
})
.toMessage((output) => ({
type: 'custom' as const,
data: {
dummy: `deleted:${(output as { path: string }).path}`,
},
}));
const agent = new Agent('custom-msg-suspend-resume-stream-test')
.model(getModel('anthropic'))
.instructions(
'You are a file manager. When asked to delete files, use the delete_file tool. Be concise.',
)
.tool(deleteTool)
.memory(memory)
.checkpoint('memory');
const threadId = `test-custom-msg-stream-${Date.now()}`;
const options = { persistence: { threadId, resourceId: 'test-user' } };
// Turn 1: stream, agent suspends
const result1 = await agent.stream('Delete the file /tmp/stream-test.txt', options);
const reader1 = result1.stream.getReader();
const chunks1: Array<{ type: string; [key: string]: unknown }> = [];
while (true) {
const { done, value } = await reader1.read();
if (done) break;
chunks1.push(value as { type: string; [key: string]: unknown });
}
const suspendedChunk = chunks1.find((c) => c.type === 'tool-call-suspended') as
| { type: 'tool-call-suspended'; runId: string; toolCallId: string }
| undefined;
expect(suspendedChunk).toBeDefined();
// Resume with approval and get the resumed stream
const result2 = await agent.resume(
'stream',
{ approved: true },
{ runId: suspendedChunk!.runId, toolCallId: suspendedChunk!.toolCallId },
);
const reader2 = result2.stream.getReader();
const chunks2: Array<{ type: string; [key: string]: unknown }> = [];
while (true) {
const { done, value } = await reader2.read();
if (done) break;
chunks2.push(value as { type: string; [key: string]: unknown });
}
// The custom message must appear in the resumed stream
const customChunk = chunks2.find(
(c) =>
c.type === 'message' &&
(c.message as { type?: string }).type === 'custom' &&
'data' in (c.message as object) &&
'dummy' in (c.message as { data: { dummy: string } }).data,
) as { type: 'message'; message: { type: 'custom'; data: { dummy: string } } } | undefined;
expect(customChunk).toBeDefined();
expect(customChunk!.message.data.dummy).toContain('deleted:');
expect(customChunk!.message.data.dummy).toContain('/tmp/stream-test.txt');
});
});

View file

@ -0,0 +1,378 @@
import { expect, it } from 'vitest';
import { z } from 'zod';
import { createAgentWithInterruptibleTool, describeIf, getModel } from './helpers';
import { parseJudgeResponse } from '../../evals/parse-judge-response';
import { Agent, Tool, Eval, evaluate, evals } from '../../index';
/**
* Create a fruit-bowl agent with a tool that generates random fruit coordinates.
*/
function createFruitBowlAgent(provider: 'anthropic' | 'openai'): Agent {
const createFruitBowlTool = new Tool('create_fruit_bowl')
.description(
'Generate a fruit bowl with random 3D coordinates for fruits. Always use this tool when asked to create a fruit bowl.',
)
.input(
z.object({
num_apples: z.number().optional().describe('Number of apples (default: 3)'),
}),
)
.handler(async (input) => {
const numApples = input.num_apples ?? 3;
const fruits = Array.from({ length: numApples }, () => ({
type: 'apple',
x: Math.round((Math.random() * 20 - 10) * 10) / 10,
y: Math.round((Math.random() * 20 - 10) * 10) / 10,
z: Math.round((Math.random() * 20 - 10) * 10) / 10,
}));
return { fruits };
});
return new Agent('fruit-bowl-bot')
.model(getModel(provider))
.instructions(
'You are a fruit bowl generator. When asked to create a fruit bowl, use the create_fruit_bowl tool and then describe the contents including each fruit type and its x, y, z coordinates.',
)
.tool(createFruitBowlTool);
}
const describe = describeIf('anthropic');
describe('evaluate() integration', () => {
it('runs deterministic evals against a fruit bowl agent', async () => {
const mentionsFruit = new Eval('mentions-fruit')
.description('Check if response mentions apples with coordinates')
.check(({ output }) => {
const lower = output.toLowerCase();
const hasApple = lower.includes('apple');
const hasCoord = /\d+\.\d/.test(output);
return {
pass: hasApple && hasCoord,
reasoning:
hasApple && hasCoord
? 'Mentions apples with coordinates'
: hasApple
? 'Mentions apples but no coordinates'
: 'No mention of apples',
};
});
const usedTool = new Eval('used-tool')
.description('Check if create_fruit_bowl tool was called')
.check(({ toolCalls }) => {
const used = (toolCalls ?? []).some((tc) => tc.tool === 'create_fruit_bowl');
return {
pass: used,
reasoning: used ? 'Tool was called' : 'Tool was NOT called',
};
});
const agent = createFruitBowlAgent('anthropic');
const results = await evaluate(agent, {
dataset: [{ input: 'Create a fruit bowl with 3 apples' }],
evals: [mentionsFruit, usedTool],
});
expect(results.runs).toHaveLength(1);
const run = results.runs[0];
expect(run.output).toBeTruthy();
expect(run.scores['mentions-fruit'].pass).toBe(true);
expect(run.scores['used-tool'].pass).toBe(true);
expect(results.summary['mentions-fruit'].total).toBe(1);
expect(results.summary['used-tool'].passed).toBe(1);
});
it('runs multiple dataset rows in parallel', async () => {
const hasContent = new Eval('has-content')
.description('Check response is non-empty')
.check(({ output }) => ({
pass: output.length > 10,
reasoning: `Response length: ${output.length}`,
}));
const agent = createFruitBowlAgent('anthropic');
const results = await evaluate(agent, {
dataset: [
{ input: 'Create a fruit bowl with 2 apples' },
{ input: 'Create a fruit bowl with 5 apples' },
],
evals: [hasContent],
});
expect(results.runs).toHaveLength(2);
expect(results.summary['has-content'].total).toBe(2);
expect(results.summary['has-content'].passed).toBe(2);
});
it('runs built-in string similarity eval', async () => {
const agent = new Agent('echo-agent')
.model(getModel('anthropic'))
.instructions(
'When asked "What is the capital of France?", reply with exactly: "Paris". Nothing else.',
);
const similarity = evals.stringSimilarity();
const results = await evaluate(agent, {
dataset: [{ input: 'What is the capital of France?', expected: 'Paris' }],
evals: [similarity],
});
expect(results.runs).toHaveLength(1);
expect(results.runs[0].scores['string-similarity'].pass).toBe(true);
});
it('runs LLM-as-judge correctness eval', async () => {
const agent = new Agent('math-agent')
.model(getModel('anthropic'))
.instructions('Answer math questions with just the number. No explanation.');
const correctness = evals.correctness().model(getModel('anthropic'));
const results = await evaluate(agent, {
dataset: [{ input: 'What is 2 + 2?', expected: '4' }],
evals: [correctness],
});
expect(results.runs).toHaveLength(1);
expect(results.runs[0].scores['correctness'].pass).toBe(true);
expect(results.runs[0].scores['correctness'].reasoning).toBeTruthy();
});
it('runs LLM correctness eval on fruit bowl agent with expected output', async () => {
const agent = createFruitBowlAgent('anthropic');
const correctness = evals.correctness().model(getModel('anthropic'));
const domainHelpfulness = new Eval('domain-helpfulness')
.description('Judge helpfulness in the context of a fruit-picking robot simulation')
.model(getModel('anthropic'))
.judge(async ({ input, output, llm }) => {
const result = await llm(
[
'You are evaluating a response from a simple fruit-picking robot simulation tool.',
'This is a demo/toy agent. The robot generates fruit bowls with 3D coordinates.',
'Judge ONLY whether the response fulfills what the user asked for — not production quality.',
'',
`User request: ${input}`,
`Robot response: ${output}`,
'',
'Did the response deliver what was asked?',
'Respond with ONLY JSON (no markdown fences): {"pass": true/false, "reasoning": "<explanation>"}',
].join('\n'),
);
return parseJudgeResponse(result.text);
});
const results = await evaluate(agent, {
dataset: [
{
input: 'Create a fruit bowl',
expected: 'A fruit bowl with a number of apples and their coordinates',
},
],
evals: [correctness, domainHelpfulness],
});
expect(results.runs).toHaveLength(1);
const run = results.runs[0];
expect(run.output.toLowerCase()).toContain('apple');
expect(run.scores['correctness'].pass).toBe(true);
expect(run.scores['correctness'].reasoning).toBeTruthy();
expect(run.scores['domain-helpfulness'].pass).toBe(true);
expect(run.scores['domain-helpfulness'].reasoning).toBeTruthy();
});
it('auto-resumes interruptible tool calls during eval', async () => {
const { createAgentWithMixedTools } = await import('./helpers');
const agent = createAgentWithMixedTools('anthropic');
const usedTool = new Eval('used-list-tool')
.description('Check if list_files was called')
.check(({ toolCalls }) => {
const used = (toolCalls ?? []).some((tc) => tc.tool === 'list_files');
return {
pass: used,
reasoning: used ? 'Called list_files' : 'Did not call list_files',
};
});
const hasOutput = new Eval('has-output')
.description('Check response is non-empty')
.check(({ output }) => ({
pass: output.length > 5,
reasoning: `Output length: ${output.length}`,
}));
const results = await evaluate(agent, {
dataset: [{ input: 'List files in /home' }],
evals: [usedTool, hasOutput],
});
expect(results.runs).toHaveLength(1);
expect(results.runs[0].scores['used-list-tool'].pass).toBe(true);
expect(results.runs[0].scores['has-output'].pass).toBe(true);
});
it('provides tool call inputs and outputs as JSON objects, not strings', async () => {
const agent = createFruitBowlAgent('anthropic');
const toolTypesEval = new Eval('tool-types')
.description('Verify tool call inputs/outputs are JSON objects')
.check(({ toolCalls }) => {
if (!toolCalls || toolCalls.length === 0) {
return { pass: false, reasoning: 'No tool calls' };
}
for (const tc of toolCalls) {
if (typeof tc.input === 'string') {
return { pass: false, reasoning: `Tool "${tc.tool}" input is a string: ${tc.input}` };
}
if (typeof tc.output === 'string') {
return { pass: false, reasoning: `Tool "${tc.tool}" output is a string: ${tc.output}` };
}
}
return { pass: true, reasoning: 'All tool inputs/outputs are JSON objects' };
});
const results = await evaluate(agent, {
dataset: [{ input: 'Create a fruit bowl with 2 apples' }],
evals: [toolTypesEval],
});
expect(results.runs).toHaveLength(1);
expect(results.runs[0].scores['tool-types'].pass).toBe(true);
expect(results.runs[0].scores['tool-types'].reasoning).toContain('JSON objects');
});
it('resume("generate") result includes the resumed tool call in toolCalls', async () => {
const agent = createAgentWithInterruptibleTool('anthropic');
// First generate: agent suspends on delete_file
const first = await agent.generate('Delete the file /tmp/test.txt');
expect(first.pendingSuspend).toBeDefined();
const { runId, toolCallId } = first.pendingSuspend![0];
// Resume with approval
const resumed = await agent.resume('generate', { approved: true }, { runId, toolCallId });
// The resumed tool call must appear in toolCalls.
// Bug: toolCalls is undefined or empty because runGenerateLoop() starts
// with a fresh toolCallSummary and the resume-phase tool execution is
// never captured.
expect(resumed.toolCalls).toBeDefined();
expect(resumed.toolCalls!.length).toBeGreaterThan(0);
const deletedCall = resumed.toolCalls!.find((tc) => tc.tool === 'delete_file');
expect(deletedCall).toBeDefined();
expect(deletedCall!.output).toMatchObject({ deleted: true, path: '/tmp/test.txt' });
});
it('resume("generate") result includes the resumed tool call when denied', async () => {
const agent = createAgentWithInterruptibleTool('anthropic');
const first = await agent.generate('Delete the file /tmp/secret.txt');
expect(first.pendingSuspend).toBeDefined();
const { runId, toolCallId } = first.pendingSuspend![0];
const resumed = await agent.resume('generate', { approved: false }, { runId, toolCallId });
expect(resumed.toolCalls).toBeDefined();
const deletedCall = resumed.toolCalls!.find((tc) => tc.tool === 'delete_file');
expect(deletedCall).toBeDefined();
// denied: deleted should be false
expect(deletedCall!.output).toMatchObject({ deleted: false });
});
it('evaluate() includes HITL tool calls in toolCalls passed to eval scorers', async () => {
const agent = createAgentWithInterruptibleTool('anthropic');
const sawDeleteCall = new Eval('saw-delete-call')
.description('Check that delete_file tool call appears in toolCalls after auto-resume')
.check(({ toolCalls }) => {
const found = (toolCalls ?? []).some((tc) => tc.tool === 'delete_file');
return {
pass: found,
reasoning: found
? 'delete_file present in toolCalls'
: `delete_file missing — toolCalls: ${JSON.stringify(toolCalls ?? [])}`,
};
});
const results = await evaluate(agent, {
dataset: [
{
input: 'Delete the file /tmp/test.txt',
// auto-resume with approved: true (default) so the tool completes
},
],
evals: [sawDeleteCall],
});
expect(results.runs).toHaveLength(1);
// Bug: this fails because result.toolCalls is empty after resume,
// so the eval scorer receives toolCalls=[] and pass=false.
expect(results.runs[0].scores['saw-delete-call'].pass).toBe(true);
expect(results.runs[0].scores['saw-delete-call'].reasoning).toContain('present');
});
it('evaluate() output is non-empty when agent only uses an interruptible tool (no text response)', async () => {
// If the agent produces no text and only tool output, evaluate() uses
// toolCalls to build the composite output string. With the bug, toolCalls
// is empty after resume and output becomes "".
const silentAgent = new Agent('silent-tool-agent')
.model(getModel('anthropic'))
.instructions(
'When asked to delete a file, call delete_file and return ONLY the raw JSON tool result. Do not add any explanatory text — your entire response must be the tool result only.',
)
.tool(
new Tool('delete_file')
.description('Delete a file')
.input(z.object({ path: z.string() }))
.output(z.object({ deleted: z.boolean(), path: z.string() }))
.suspend(z.object({ message: z.string(), severity: z.string() }))
.resume(z.object({ approved: z.boolean() }))
.handler(async ({ path }, ctx) => {
if (!ctx.resumeData) {
return await ctx.suspend({
message: `Delete "${path}"?`,
severity: 'destructive',
});
}
return { deleted: ctx.resumeData.approved, path };
}),
)
.checkpoint('memory');
const hasOutput = new Eval('has-output')
.description('Composite output must be non-empty after HITL auto-resume')
.check(({ output, toolCalls }) => {
const pass = output.length > 0;
return {
pass,
reasoning: pass
? `output="${output}"`
: `output is empty; toolCalls=${JSON.stringify(toolCalls ?? [])}`,
};
});
const results = await evaluate(silentAgent, {
dataset: [{ input: 'Delete /tmp/test.txt' }],
evals: [hasOutput],
});
expect(results.runs).toHaveLength(1);
// Bug: output is "" because toolCalls is empty, so the fallback path in
// evaluate() that builds output from tool outputs is never triggered.
expect(results.runs[0].scores['has-output'].pass).toBe(true);
});
});

View file

@ -0,0 +1,279 @@
import { expect, it } from 'vitest';
import { z } from 'zod';
import { collectStreamChunks, describeIf, getModel } from './helpers';
import { Agent, AgentEvent, Tool, type AgentEventData } from '../../index';
const describe = describeIf('anthropic');
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
function createSimpleAgent(provider: 'openai' | 'anthropic' = 'anthropic'): Agent {
return new Agent('events-test-agent')
.model(getModel(provider))
.instructions('You are a concise assistant. Reply in one short sentence.');
}
function createAgentWithTool(provider: 'openai' | 'anthropic' = 'anthropic'): Agent {
const addTool = new Tool('add_numbers')
.description('Add two numbers together')
.input(z.object({ a: z.number(), b: z.number() }))
.handler(async ({ a, b }) => ({ result: a + b }));
return new Agent('events-tool-agent')
.model(getModel(provider))
.instructions('You are a calculator. Use the add_numbers tool when asked to add.')
.tool(addTool);
}
// ---------------------------------------------------------------------------
// Event system — generate path
// ---------------------------------------------------------------------------
describe('event system — generate', () => {
it('emits AgentStart and AgentEnd around a generate() call', async () => {
const agent = createSimpleAgent();
const fired: AgentEvent[] = [];
agent.on(AgentEvent.AgentStart, () => {
fired.push(AgentEvent.AgentStart);
});
agent.on(AgentEvent.AgentEnd, () => {
fired.push(AgentEvent.AgentEnd);
});
await agent.generate('Say hello');
expect(fired).toContain(AgentEvent.AgentStart);
expect(fired).toContain(AgentEvent.AgentEnd);
expect(fired.indexOf(AgentEvent.AgentStart)).toBeLessThan(fired.indexOf(AgentEvent.AgentEnd));
});
it('emits TurnStart and TurnEnd for each LLM call', async () => {
const agent = createSimpleAgent();
const fired: AgentEvent[] = [];
agent.on(AgentEvent.TurnStart, () => fired.push(AgentEvent.TurnStart));
agent.on(AgentEvent.TurnEnd, () => fired.push(AgentEvent.TurnEnd));
await agent.generate('Say hello');
expect(fired).toContain(AgentEvent.TurnStart);
expect(fired).toContain(AgentEvent.TurnEnd);
});
it('emits ToolExecutionStart and ToolExecutionEnd when a tool runs', async () => {
const agent = createAgentWithTool();
const toolEvents: AgentEventData[] = [];
agent.on(AgentEvent.ToolExecutionStart, (data) => toolEvents.push(data));
agent.on(AgentEvent.ToolExecutionEnd, (data) => toolEvents.push(data));
await agent.generate('What is 7 plus 3?');
const starts = toolEvents.filter((e) => e.type === AgentEvent.ToolExecutionStart);
const ends = toolEvents.filter((e) => e.type === AgentEvent.ToolExecutionEnd);
expect(starts.length).toBeGreaterThan(0);
expect(ends.length).toBeGreaterThan(0);
const start = starts[0] as AgentEventData & { type: AgentEvent.ToolExecutionStart };
expect(start.toolName).toBe('add_numbers');
const end = ends[0] as AgentEventData & { type: AgentEvent.ToolExecutionEnd };
expect(end.isError).toBe(false);
expect((end.result as { result: number }).result).toBe(10);
});
it('ToolExecutionEnd carries the correct toolCallId matching ToolExecutionStart', async () => {
const agent = createAgentWithTool();
const starts: Array<AgentEventData & { type: AgentEvent.ToolExecutionStart }> = [];
const ends: Array<AgentEventData & { type: AgentEvent.ToolExecutionEnd }> = [];
agent.on(AgentEvent.ToolExecutionStart, (data) => {
starts.push(data as AgentEventData & { type: AgentEvent.ToolExecutionStart });
});
agent.on(AgentEvent.ToolExecutionEnd, (data) => {
ends.push(data as AgentEventData & { type: AgentEvent.ToolExecutionEnd });
});
await agent.generate('What is 5 plus 5?');
expect(starts.length).toBeGreaterThan(0);
expect(ends.length).toBe(starts.length);
expect(ends[0].toolCallId).toBe(starts[0].toolCallId);
});
it('multiple handlers on the same event are all called', async () => {
const agent = createSimpleAgent();
const calls: number[] = [];
agent.on(AgentEvent.AgentEnd, () => calls.push(1));
agent.on(AgentEvent.AgentEnd, () => calls.push(2));
agent.on(AgentEvent.AgentEnd, () => calls.push(3));
await agent.generate('Say hello');
expect(calls).toEqual(expect.arrayContaining([1, 2, 3]));
});
it('AgentEnd data contains the response messages', async () => {
const agent = createSimpleAgent();
let capturedMessages: unknown[] = [];
agent.on(AgentEvent.AgentEnd, (data) => {
if (data.type === AgentEvent.AgentEnd) {
capturedMessages = data.messages;
}
});
await agent.generate('Say hello');
expect(capturedMessages.length).toBeGreaterThan(0);
});
});
// ---------------------------------------------------------------------------
// Event system — stream path
// ---------------------------------------------------------------------------
describe('event system — stream', () => {
it('emits AgentStart and AgentEnd around a stream() call', async () => {
const agent = createSimpleAgent();
const fired: AgentEvent[] = [];
agent.on(AgentEvent.AgentStart, () => fired.push(AgentEvent.AgentStart));
agent.on(AgentEvent.AgentEnd, () => fired.push(AgentEvent.AgentEnd));
const { stream } = await agent.stream('Say hello');
await collectStreamChunks(stream);
expect(fired).toContain(AgentEvent.AgentStart);
expect(fired).toContain(AgentEvent.AgentEnd);
expect(fired.indexOf(AgentEvent.AgentStart)).toBeLessThan(fired.indexOf(AgentEvent.AgentEnd));
});
it('emits ToolExecutionStart and ToolExecutionEnd during streaming', async () => {
const agent = createAgentWithTool();
const toolEvents: AgentEventData[] = [];
agent.on(AgentEvent.ToolExecutionStart, (data) => toolEvents.push(data));
agent.on(AgentEvent.ToolExecutionEnd, (data) => toolEvents.push(data));
const { stream } = await agent.stream('What is 4 plus 6?');
await collectStreamChunks(stream);
const starts = toolEvents.filter((e) => e.type === AgentEvent.ToolExecutionStart);
expect(starts.length).toBeGreaterThan(0);
const start = starts[0] as AgentEventData & { type: AgentEvent.ToolExecutionStart };
expect(start.toolName).toBe('add_numbers');
});
});
// ---------------------------------------------------------------------------
// getState()
// ---------------------------------------------------------------------------
describe('getState()', () => {
it('returns idle before first run', () => {
const agent = createSimpleAgent();
const state = agent.getState();
expect(state.status).toBe('idle');
expect(state.messageList.messages).toHaveLength(0);
});
it('returns success after a successful generate()', async () => {
const agent = createSimpleAgent();
await agent.generate('Say hello');
const state = agent.getState();
expect(state.status).toBe('success');
});
it('returns success after a completed stream()', async () => {
const agent = createSimpleAgent();
const { stream } = await agent.stream('Say hello');
await collectStreamChunks(stream);
const state = agent.getState();
expect(state.status).toBe('success');
});
it('state is running during the generate loop (observed via event)', async () => {
const agent = createSimpleAgent();
let stateWhileRunning: string | undefined;
agent.on(AgentEvent.TurnStart, () => {
stateWhileRunning = agent.getState().status;
});
await agent.generate('Say hello');
expect(stateWhileRunning).toBe('running');
});
it('reflects resourceId and threadId from RunOptions', async () => {
const agent = createSimpleAgent();
await agent.generate('Say hello', {
persistence: { resourceId: 'user-123', threadId: 'thread-abc' },
});
const state = agent.getState();
expect(state.persistence?.resourceId).toBe('user-123');
expect(state.persistence?.threadId).toBe('thread-abc');
});
});
// ---------------------------------------------------------------------------
// asTool()
// ---------------------------------------------------------------------------
describe('asTool()', () => {
it('wraps the agent as a BuiltTool with the correct name and description', () => {
const agent = createSimpleAgent();
const tool = agent.asTool('A helpful assistant tool');
expect(tool.name).toBe('events-test-agent');
expect(tool.description).toBe('A helpful assistant tool');
expect(tool.inputSchema).toBeDefined();
expect(typeof tool.handler).toBe('function');
});
it('asTool handler calls the agent and returns text result', async () => {
const agent = createSimpleAgent();
const tool = agent.asTool('A helpful assistant tool');
const result = await tool.handler!({ input: 'Say "pong"' }, {});
expect(result).toHaveProperty('result');
expect(typeof (result as { result: string }).result).toBe('string');
expect((result as { result: string }).result.length).toBeGreaterThan(0);
});
it('coordinator agent can use sub-agent via asTool', async () => {
const specialist = new Agent('specialist')
.model(getModel('anthropic'))
.instructions('You are a specialist. When asked, reply with exactly "SPECIALIST_RESPONSE".');
const coordinator = new Agent('coordinator')
.model(getModel('anthropic'))
.instructions(
'You coordinate tasks. Use the specialist tool to answer questions. Relay the exact response.',
)
.tool(specialist.asTool('A specialist agent'));
const result = await coordinator.generate(
'Ask the specialist for their response and tell me what they said.',
);
const text = result.messages
.filter((m) => 'role' in m && m.role === 'assistant')
.flatMap((m) => ('content' in m ? m.content : []))
.filter((c) => c.type === 'text')
.map((c) => ('text' in c ? c.text : ''))
.join('');
expect(text.length).toBeGreaterThan(0);
});
});

View file

@ -0,0 +1,441 @@
import * as fs from 'fs';
import * as os from 'os';
import * as path from 'path';
import { describe as _describe } from 'vitest';
import { z } from 'zod';
import {
Agent,
type ContentToolCall,
type ContentToolResult,
filterLlmMessages,
Tool,
type StreamChunk,
type AgentMessage,
} from '../../index';
import { SqliteMemory } from '../../storage/sqlite-memory';
export type { StreamChunk };
/**
* Returns `describe` or `describe.skip` depending on whether the API key is set.
*/
export function describeIf(provider: 'anthropic' | 'openai') {
const envVar = provider === 'anthropic' ? 'ANTHROPIC_API_KEY' : 'OPENAI_API_KEY';
return process.env[envVar] ? _describe : _describe.skip;
}
/**
* Read all chunks from a ReadableStream into an array.
*/
export async function collectStreamChunks(stream: ReadableStream<unknown>): Promise<StreamChunk[]> {
const chunks: StreamChunk[] = [];
const reader = stream.getReader();
while (true) {
const { done, value } = await reader.read();
if (done) break;
chunks.push(value as StreamChunk);
}
return chunks;
}
/**
* Filter chunks by type.
*/
export function chunksOfType<T extends StreamChunk['type']>(
chunks: StreamChunk[],
type: T,
): Array<StreamChunk & { type: T }> {
return chunks.filter((c) => c.type === type) as Array<StreamChunk & { type: T }>;
}
/**
* Get the default model for a provider.
*/
export function getModel(provider: 'anthropic' | 'openai'): string {
return provider === 'anthropic' ? 'anthropic/claude-haiku-4-5' : 'openai/gpt-4o-mini';
}
/**
* Create a simple agent with an add_numbers tool for testing.
*/
export function createAgentWithAddTool(provider: 'anthropic' | 'openai'): Agent {
const addTool = new Tool('add_numbers')
.description('Add two numbers together and return the result')
.input(
z.object({
a: z.number().describe('First number'),
b: z.number().describe('Second number'),
}),
)
.output(
z.object({
result: z.number().describe('The sum'),
}),
)
.handler(async ({ a, b }) => ({ result: a + b }));
return new Agent('test-agent')
.model(getModel(provider))
.instructions(
'You are a calculator. When asked to add numbers, use the add_numbers tool. Be concise.',
)
.tool(addTool);
}
/**
* Create an agent with a tool that can suspend (interrupt) for confirmation.
*/
export function createAgentWithInterruptibleTool(provider: 'anthropic' | 'openai'): Agent {
const deleteTool = new Tool('delete_file')
.description('Delete a file at the given path')
.input(z.object({ path: z.string().describe('File path to delete') }))
.output(z.object({ deleted: z.boolean(), path: z.string() }))
.suspend(z.object({ message: z.string(), severity: z.string() }))
.resume(z.object({ approved: z.boolean() }))
.handler(async ({ path }, ctx) => {
if (!ctx.resumeData) {
return await ctx.suspend({ message: `Delete "${path}"?`, severity: 'destructive' });
}
if (!ctx.resumeData.approved) return { deleted: false, path };
return { deleted: true, path };
});
return new Agent('test-interrupt-agent')
.model(getModel(provider))
.instructions(
'You are a file manager. When asked to delete a file, use the delete_file tool. Be concise.',
)
.tool(deleteTool)
.checkpoint('memory');
}
/**
* Create an agent with two tools one interruptible, one not.
*/
export function createAgentWithMixedTools(provider: 'anthropic' | 'openai'): Agent {
const listTool = new Tool('list_files')
.description('List files in a directory')
.input(z.object({ dir: z.string().describe('Directory path') }))
.handler(async ({ dir }) => ({
files: ['readme.md', 'index.ts', 'package.json'],
dir,
}));
const deleteTool = new Tool('delete_file')
.description('Delete a file at the given path — dangerous operation')
.input(z.object({ path: z.string().describe('File path to delete') }))
.output(z.object({ deleted: z.boolean(), path: z.string() }))
.suspend(z.object({ message: z.string(), severity: z.string() }))
.resume(z.object({ approved: z.boolean() }))
.handler(async ({ path }, ctx) => {
if (!ctx.resumeData) {
return await ctx.suspend({ message: `Delete "${path}"?`, severity: 'destructive' });
}
if (!ctx.resumeData.approved) return { deleted: false, path };
return { deleted: true, path };
});
return new Agent('test-mixed-agent')
.model(getModel(provider))
.instructions(
'You are a file manager. Use list_files to list and delete_file to delete. Be concise.',
)
.tool(listTool)
.tool(deleteTool)
.checkpoint('memory');
}
/**
* Create an agent with a tool that uses `.toContent()` to emit a custom message.
* The tool adds two numbers; toContent produces a text MessageContent visible to the
* user but never forwarded to the LLM.
*/
export function createAgentWithToContentTool(provider: 'anthropic' | 'openai'): Agent {
const calcTool = new Tool('add_numbers')
.description('Add two numbers together and return the result')
.input(
z.object({
a: z.number().describe('First number'),
b: z.number().describe('Second number'),
}),
)
.output(z.object({ result: z.number().describe('The sum') }))
.handler(async ({ a, b }) => ({ result: a + b }))
.toMessage((output) => ({
type: 'custom',
messageType: '___dummyCustomMessage',
data: {
dummy: `dummy message. Tool output ${output.result}`,
},
}));
return new Agent('test-to-content-agent')
.model(getModel(provider))
.instructions(
'You are a calculator. When asked to add numbers, use the add_numbers tool. Be concise.',
)
.tool(calcTool);
}
/**
* Create an agent with one interruptible tool designed for parallel-call
* scenarios. The tool only deletes one file at a time, and the instructions
* strongly encourage parallel tool calling.
*/
export function createAgentWithParallelInterruptibleCalls(provider: 'anthropic' | 'openai'): Agent {
const deleteTool = new Tool('delete_file')
.description('Delete a single file at the given path. Can only delete one file per call.')
.input(z.object({ path: z.string().describe('File path to delete') }))
.output(z.object({ deleted: z.boolean(), path: z.string() }))
.suspend(z.object({ message: z.string(), severity: z.string() }))
.resume(z.object({ approved: z.boolean() }))
.handler(async ({ path }, ctx) => {
if (!ctx.resumeData) {
return await ctx.suspend({ message: `Delete "${path}"?`, severity: 'destructive' });
}
if (!ctx.resumeData.approved) return { deleted: false, path };
return { deleted: true, path };
});
return new Agent('test-parallel-interrupt-agent')
.model(getModel(provider))
.instructions(
'You are a file manager. When asked to delete multiple files, you MUST call delete_file for EACH file using parallel tool calls in the same turn. Never skip a file.',
)
.tool(deleteTool)
.checkpoint('memory');
}
/**
* Create an agent with concurrent tool execution and an interruptible tool.
* Uses `toolCallConcurrency(Infinity)` so all tool calls in a single LLM turn
* are executed concurrently. Suspensions do not block subsequent tool calls.
*/
export function createAgentWithConcurrentInterruptibleCalls(
provider: 'anthropic' | 'openai',
): Agent {
const deleteTool = new Tool('delete_file')
.description('Delete a single file at the given path. Can only delete one file per call.')
.input(z.object({ path: z.string().describe('File path to delete') }))
.output(z.object({ deleted: z.boolean(), path: z.string() }))
.suspend(z.object({ message: z.string(), severity: z.string() }))
.resume(z.object({ approved: z.boolean() }))
.handler(async ({ path }, ctx) => {
if (!ctx.resumeData) {
return await ctx.suspend({ message: `Delete "${path}"?`, severity: 'destructive' });
}
if (!ctx.resumeData.approved) return { deleted: false, path };
return { deleted: true, path };
});
return new Agent('test-concurrent-interrupt-agent')
.model(getModel(provider))
.instructions(
'You are a file manager. When asked to delete multiple files, you MUST call delete_file for EACH file using parallel tool calls in the same turn. Never skip a file.',
)
.tool(deleteTool)
.toolCallConcurrency(Infinity)
.checkpoint('memory');
}
/**
* Create an agent with concurrent tool execution mixing interruptible and
* non-interruptible tools. `list_files` runs immediately; `delete_file` suspends.
*/
export function createAgentWithConcurrentMixedTools(provider: 'anthropic' | 'openai'): Agent {
const listTool = new Tool('list_files')
.description('List files in a directory')
.input(z.object({ dir: z.string().describe('Directory path') }))
.handler(async ({ dir }) => ({
files: ['readme.md', 'index.ts', 'package.json'],
dir,
}));
const deleteTool = new Tool('delete_file')
.description('Delete a file at the given path — dangerous operation')
.input(z.object({ path: z.string().describe('File path to delete') }))
.output(z.object({ deleted: z.boolean(), path: z.string() }))
.suspend(z.object({ message: z.string(), severity: z.string() }))
.resume(z.object({ approved: z.boolean() }))
.handler(async ({ path }, ctx) => {
if (!ctx.resumeData) {
return await ctx.suspend({ message: `Delete "${path}"?`, severity: 'destructive' });
}
if (!ctx.resumeData.approved) return { deleted: false, path };
return { deleted: true, path };
});
return new Agent('test-concurrent-mixed-agent')
.model(getModel(provider))
.instructions(
'You are a file manager. Use list_files to list and delete_file to delete. Be concise.',
)
.tool(listTool)
.tool(deleteTool)
.toolCallConcurrency(Infinity)
.checkpoint('memory');
}
/**
* Create an agent with bounded concurrency and an interruptible tool.
* Uses `toolCallConcurrency(concurrency)` to control batching.
*/
export function createAgentWithBatchedInterruptibleCalls(
provider: 'anthropic' | 'openai',
concurrency: number,
): Agent {
const deleteTool = new Tool('delete_file')
.description('Delete a single file at the given path. Can only delete one file per call.')
.input(z.object({ path: z.string().describe('File path to delete') }))
.output(z.object({ deleted: z.boolean(), path: z.string() }))
.suspend(z.object({ message: z.string(), severity: z.string() }))
.resume(z.object({ approved: z.boolean() }))
.handler(async ({ path }, ctx) => {
if (!ctx.resumeData) {
return await ctx.suspend({ message: `Delete "${path}"?`, severity: 'destructive' });
}
if (!ctx.resumeData.approved) return { deleted: false, path };
return { deleted: true, path };
});
return new Agent('test-batched-interrupt-agent')
.model(getModel(provider))
.instructions(
'You are a file manager. When asked to delete multiple files, you MUST call delete_file for EACH file using parallel tool calls in the same turn. Never skip a file.',
)
.tool(deleteTool)
.toolCallConcurrency(concurrency)
.checkpoint('memory');
}
/**
* Create an agent with bounded concurrency and a non-interruptible tool.
*/
export function createAgentWithBatchedNormalCalls(
provider: 'anthropic' | 'openai',
concurrency: number,
): Agent {
const checkTool = new Tool('check_file')
.description('Check if a file exists at the given path. Can only check one file per call.')
.input(z.object({ path: z.string().describe('File path to check') }))
.output(z.object({ exists: z.boolean(), path: z.string() }))
.handler(async ({ path }) => ({ exists: true, path }));
return new Agent('test-batched-normal-agent')
.model(getModel(provider))
.instructions(
'You are a file manager. When asked to check multiple files, you MUST call check_file for EACH file using parallel tool calls in the same turn. Never skip a file. After checking, summarize the results concisely.',
)
.tool(checkTool)
.toolCallConcurrency(concurrency)
.checkpoint('memory');
}
/**
* Create an agent with a tool that always throws an error.
* Used to verify that tool errors surface as LLM-visible messages.
*/
export function createAgentWithAlwaysErrorTool(provider: 'anthropic' | 'openai'): Agent {
const brokenTool = new Tool('broken_tool')
.description('Fetch data from a remote service')
.input(z.object({ id: z.string().describe('Resource ID to fetch') }))
.handler(async () => {
throw new Error('Service unavailable: connection timeout');
});
return new Agent('test-error-agent')
.model(getModel(provider))
.instructions(
'You are a data fetcher. Use broken_tool to fetch data. ' +
'If the tool fails, acknowledge the error in your response and explain what happened. Be concise.',
)
.tool(brokenTool);
}
/**
* Create an agent with a tool that fails on the first call and succeeds on the second.
* Used to verify that the LLM can self-correct by retrying after seeing the error result.
*/
export function createAgentWithFlakyTool(provider: 'anthropic' | 'openai'): {
agent: Agent;
callCount: () => number;
} {
let calls = 0;
const flakyTool = new Tool('fetch_data')
.description('Fetch data. May fail on the first attempt — retry if it does.')
.input(z.object({ id: z.string().describe('Resource ID to fetch') }))
.output(z.object({ id: z.string(), value: z.number() }))
.handler(async ({ id }) => {
calls++;
if (calls === 1) throw new Error('Transient error: rate limit exceeded, please retry');
return { id, value: 42 };
});
const agent = new Agent('test-flaky-agent')
.model(getModel(provider))
.instructions(
'You are a data fetcher. Use fetch_data to fetch data. ' +
'If the tool fails with a transient error, retry the SAME call once. Be concise.',
)
.tool(flakyTool);
return { agent, callCount: () => calls };
}
export const findLastTextContent = (messages: AgentMessage[]): string | undefined => {
return filterLlmMessages(messages)
.reverse()
.find((m) => m.content.find((c) => c.type === 'text'))
?.content.find((c) => c.type === 'text')?.text;
};
export const findLastToolCallContent = (messages: AgentMessage[]): ContentToolCall | undefined => {
return filterLlmMessages(messages)
.reverse()
.find((m) => m.content.find((c) => c.type === 'tool-call'))
?.content.find((c) => c.type === 'tool-call');
};
export const findAllToolCalls = (messages: AgentMessage[]): ContentToolCall[] => {
return filterLlmMessages(messages)
.filter((m) => m.content.find((c) => c.type === 'tool-call'))
.map((m) => m.content.filter((c) => c.type === 'tool-call'))
.flat();
};
export const findAllToolResults = (messages: AgentMessage[]): ContentToolResult[] => {
return filterLlmMessages(messages)
.filter((m) => m.content.find((c) => c.type === 'tool-result'))
.map((m) => m.content.find((c) => c.type === 'tool-result') as ContentToolResult);
};
export const collectTextDeltas = (chunks: StreamChunk[]): string => {
return chunks
.filter((c) => c.type === 'text-delta')
.map((c) => c.delta)
.join('');
};
export function createSqliteMemory(): {
memory: SqliteMemory;
cleanup: () => void;
url: string;
} {
const dbPath = path.join(
os.tmpdir(),
`test-${Date.now()}-${Math.random().toString(36).slice(2)}.db`,
);
const url = `file:${dbPath}`;
const memory = new SqliteMemory({ url });
return {
memory,
url,
cleanup: () => {
try {
fs.unlinkSync(dbPath);
} catch {
// File may already be removed — ignore
}
},
};
}

View file

@ -0,0 +1,126 @@
/**
* Integration tests for JSON Schema input validation on regular (non-MCP) tools.
*
* Covers: valid input passes through, type errors surface as tool-result errors,
* missing required properties surface as tool-result errors, and the LLM can
* self-correct after receiving a JSON Schema validation error.
*
* Tests that call agent.generate() are gated on ANTHROPIC_API_KEY.
*/
import { expect, it, vi } from 'vitest';
import { describeIf, findLastTextContent } from './helpers';
import { Agent, filterLlmMessages } from '../../index';
import type { BuiltTool } from '../../types/sdk/tool';
const describe = describeIf('anthropic');
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
/**
* Build a BuiltTool whose inputSchema is a raw JSON Schema object (not Zod).
* This mimics the shape that MCP tools use and the scenario we want to test
* for first-party tools that expose a JSONSchema7 directly.
*/
function makeJsonSchemaTool(overrides: Partial<BuiltTool> = {}): BuiltTool {
return {
name: 'find_user',
description: 'Find a user by their numeric age (1899 only).',
inputSchema: {
type: 'object',
properties: {
age: {
type: 'integer',
minimum: 18,
maximum: 99,
description: 'User age (1899)',
},
},
required: ['age'],
},
handler: async (input) => {
const { age } = input as { age: number };
return { user: `User aged ${age}` };
},
...overrides,
};
}
// ---------------------------------------------------------------------------
// No-LLM tests: validation outcome is determined by the tool-result message
// ---------------------------------------------------------------------------
describe('JSON Schema validation — non-MCP tools with raw JSON Schema', () => {
it('passes valid input to the handler and returns a successful tool result', async () => {
const handler = vi.fn().mockResolvedValue({ user: 'User aged 25' });
const tool = makeJsonSchemaTool({ handler });
const result = await new Agent('test')
.model('anthropic/claude-haiku-4-5')
.instructions(
'You are a user directory. Use find_user to look up users. ' +
'Call the tool with age=25 and then summarise the result. Be concise.',
)
.tool(tool)
.generate('Find user aged 25.');
expect(result.finishReason).toBe('stop');
expect(result.error).toBeUndefined();
// The handler should have been called with valid data
expect(handler).toHaveBeenCalledWith(expect.objectContaining({ age: 25 }), expect.anything());
// No tool-result should carry an error flag
const allMessages = filterLlmMessages(result.messages);
const toolResults = allMessages.flatMap((m) =>
m.content.filter((c) => c.type === 'tool-result'),
);
expect(toolResults.every((r) => !r.isError)).toBe(true);
});
it('allows the LLM to self-correct after receiving a JSON Schema validation error', async () => {
let callCount = 0;
const handler = vi.fn().mockImplementation(async (input: unknown) => {
callCount++;
return { user: `User aged ${(input as { age: number }).age}` };
});
// The schema enforces age ≥ 18. The prompt asks for age 5 first, then
// instructs the LLM to retry with 25 if validation fails.
const result = await new Agent('age-self-correction')
.model('anthropic/claude-haiku-4-5')
.instructions(
'You are a user directory. Use find_user to look up users by age. ' +
'The age must be an integer between 18 and 99. ' +
'If validation fails, correct the age to 25 and retry. Be very concise.',
)
.tool(makeJsonSchemaTool({ handler }))
.generate(
'Find a user aged 5. If that age is invalid, use 25 instead and retry. ' +
'You MUST try age 5 first, and only then use 25.',
);
expect(result.finishReason).toBe('stop');
expect(result.error).toBeUndefined();
// There should be at least two tool-result messages: one error, one success
const allMessages = filterLlmMessages(result.messages);
const toolResultMessages = allMessages.filter((m) =>
m.content.some((c) => c.type === 'tool-result'),
);
expect(toolResultMessages.length).toBeGreaterThanOrEqual(2);
// The successful handler call should have received a valid age
expect(callCount).toBeGreaterThanOrEqual(1);
const validCallArgs = handler.mock.calls.find(
([input]) => (input as { age: number }).age === 25,
);
expect(validCallArgs).toBeDefined();
// The final LLM response should acknowledge finding a user
const text = findLastTextContent(result.messages);
expect(text).toBeTruthy();
});
});

View file

@ -0,0 +1,252 @@
/**
* Unit-style tests for McpConnection.listTools() approval wrapping.
*
* These tests use a real in-process MCP SSE server but do NOT require an LLM.
* They verify that the `requireApproval` field on McpServerConfig (and the
* global `shouldRequireToolApproval` constructor flag) correctly wrap the
* appropriate tools with a suspend/resume approval gate.
*
* Tool names from the test server: echo, add, image (prefixed: tools_echo, tools_add, tools_image).
*/
import { afterAll, afterEach, beforeAll, describe, expect, it, vi } from 'vitest';
import { startSseServer, type TestServer } from './mcp-server-helpers';
import { McpConnection } from '../../runtime/mcp-connection';
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
/** Returns true when the tool has been wrapped with an approval gate (has a suspendSchema). */
function isApprovalWrapped(tool: { suspendSchema?: unknown }): boolean {
return tool.suspendSchema !== undefined;
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
describe('McpConnection.listTools() — requireApproval config', () => {
let server: TestServer;
let connection: McpConnection | undefined;
beforeAll(async () => {
server = await startSseServer();
});
afterAll(async () => {
await server.close();
});
afterEach(async () => {
if (connection) {
await connection.disconnect();
connection = undefined;
}
});
// -----------------------------------------------------------------------
// no approval
// -----------------------------------------------------------------------
it('does not wrap any tools when requireApproval is not set', async () => {
connection = new McpConnection({ name: 'tools', url: server.url });
await connection.connect();
const tools = await connection.listTools();
expect(tools.length).toBeGreaterThan(0);
expect(tools.every((t) => !isApprovalWrapped(t))).toBe(true);
});
it('does not wrap any tools when requireApproval is false', async () => {
connection = new McpConnection({ name: 'tools', url: server.url, requireApproval: false });
await connection.connect();
const tools = await connection.listTools();
expect(tools.every((t) => !isApprovalWrapped(t))).toBe(true);
});
it('does not wrap any tools when requireApproval is an empty array', async () => {
connection = new McpConnection({ name: 'tools', url: server.url, requireApproval: [] });
await connection.connect();
const tools = await connection.listTools();
expect(tools.every((t) => !isApprovalWrapped(t))).toBe(true);
});
// -----------------------------------------------------------------------
// requireApproval: true — all tools
// -----------------------------------------------------------------------
it('wraps all tools when requireApproval: true in server config', async () => {
connection = new McpConnection({ name: 'tools', url: server.url, requireApproval: true });
await connection.connect();
const tools = await connection.listTools();
expect(tools.length).toBeGreaterThan(0);
expect(tools.every((t) => isApprovalWrapped(t))).toBe(true);
});
// -----------------------------------------------------------------------
// requireApproval: string[] — selective tools
// -----------------------------------------------------------------------
it('wraps only the listed tool when requireApproval names a single tool', async () => {
connection = new McpConnection({
name: 'tools',
url: server.url,
requireApproval: ['echo'],
});
await connection.connect();
const tools = await connection.listTools();
const echo = tools.find((t) => t.name === 'tools_echo');
const add = tools.find((t) => t.name === 'tools_add');
const image = tools.find((t) => t.name === 'tools_image');
expect(echo).toBeDefined();
expect(add).toBeDefined();
expect(image).toBeDefined();
expect(isApprovalWrapped(echo!)).toBe(true);
expect(isApprovalWrapped(add!)).toBe(false);
expect(isApprovalWrapped(image!)).toBe(false);
});
it('wraps multiple listed tools when requireApproval names several tools', async () => {
connection = new McpConnection({
name: 'tools',
url: server.url,
requireApproval: ['echo', 'add'],
});
await connection.connect();
const tools = await connection.listTools();
const echo = tools.find((t) => t.name === 'tools_echo');
const add = tools.find((t) => t.name === 'tools_add');
const image = tools.find((t) => t.name === 'tools_image');
expect(isApprovalWrapped(echo!)).toBe(true);
expect(isApprovalWrapped(add!)).toBe(true);
expect(isApprovalWrapped(image!)).toBe(false);
});
it('does not wrap tools that are not in the requireApproval list', async () => {
connection = new McpConnection({
name: 'tools',
url: server.url,
requireApproval: ['image'],
});
await connection.connect();
const tools = await connection.listTools();
const echo = tools.find((t) => t.name === 'tools_echo');
const add = tools.find((t) => t.name === 'tools_add');
const image = tools.find((t) => t.name === 'tools_image');
expect(isApprovalWrapped(echo!)).toBe(false);
expect(isApprovalWrapped(add!)).toBe(false);
expect(isApprovalWrapped(image!)).toBe(true);
});
// -----------------------------------------------------------------------
// global shouldRequireToolApproval flag
// -----------------------------------------------------------------------
it('wraps all tools when global shouldRequireToolApproval flag is true', async () => {
connection = new McpConnection({ name: 'tools', url: server.url }, true);
await connection.connect();
const tools = await connection.listTools();
expect(tools.every((t) => isApprovalWrapped(t))).toBe(true);
});
// -----------------------------------------------------------------------
// global flag + config.requireApproval interaction
// -----------------------------------------------------------------------
it('wraps all tools when global flag is true even if config.requireApproval names only some tools', async () => {
connection = new McpConnection(
{ name: 'tools', url: server.url, requireApproval: ['echo'] },
true,
);
await connection.connect();
const tools = await connection.listTools();
expect(tools.every((t) => isApprovalWrapped(t))).toBe(true);
});
it('wraps all tools when config.requireApproval: true even if global flag is false', async () => {
connection = new McpConnection(
{ name: 'tools', url: server.url, requireApproval: true },
false,
);
await connection.connect();
const tools = await connection.listTools();
expect(tools.every((t) => isApprovalWrapped(t))).toBe(true);
});
// -----------------------------------------------------------------------
// prefix stripping — server name used as prefix
// -----------------------------------------------------------------------
it('matches tool names without prefix when requireApproval contains un-prefixed names', async () => {
// The server is named 'srv'; tools will be 'srv_echo', 'srv_add', 'srv_image'.
// requireApproval uses the un-prefixed original names.
connection = new McpConnection({ name: 'srv', url: server.url, requireApproval: ['echo'] });
await connection.connect();
const tools = await connection.listTools();
const echo = tools.find((t) => t.name === 'srv_echo');
const add = tools.find((t) => t.name === 'srv_add');
expect(isApprovalWrapped(echo!)).toBe(true);
expect(isApprovalWrapped(add!)).toBe(false);
});
});
// ---------------------------------------------------------------------------
// Disconnect idempotency
// ---------------------------------------------------------------------------
type McpConnectionInternals = { client: { close(): Promise<void> } };
describe('McpConnection.disconnect() — idempotency', () => {
let server: TestServer;
beforeAll(async () => {
server = await startSseServer();
});
afterAll(async () => {
await server.close();
});
it('does not throw and does not call client.close() again when disconnect is called on an already-closed connection', async () => {
const conn = new McpConnection({ name: 'tools', url: server.url });
await conn.connect();
const clientClose = vi
.spyOn((conn as unknown as McpConnectionInternals).client, 'close')
.mockResolvedValue(undefined);
await conn.disconnect();
await conn.disconnect();
expect(clientClose).toHaveBeenCalledTimes(1);
});
it('does not throw and calls client.close() exactly once when disconnect is called concurrently', async () => {
const conn = new McpConnection({ name: 'tools', url: server.url });
await conn.connect();
const clientClose = vi
.spyOn((conn as unknown as McpConnectionInternals).client, 'close')
.mockResolvedValue(undefined);
await Promise.all([conn.disconnect(), conn.disconnect()]);
expect(clientClose).toHaveBeenCalledTimes(1);
});
});

View file

@ -0,0 +1,473 @@
/**
* Integration tests for MCP lifecycle via McpClient and the Agent builder.
* Covers: McpClient constructor validation, connect/listTools/close, tool merge,
* name collision, requireToolApproval, and rich content handling.
*
* Tests that don't require a real LLM run unconditionally.
* Tests that call agent.generate() / agent.stream() are gated on ANTHROPIC_API_KEY.
*/
import { afterAll, beforeAll, describe, expect, it } from 'vitest';
import { z } from 'zod';
import {
describeIf,
getModel,
findLastTextContent,
collectStreamChunks,
chunksOfType,
} from './helpers';
import { startSseServer, type TestServer } from './mcp-server-helpers';
import { Agent, McpClient, Tool, isLlmMessage } from '../../index';
// ---------------------------------------------------------------------------
// McpClient constructor validation — no MCP server required
// ---------------------------------------------------------------------------
describe('McpClient constructor validation', () => {
it('throws if neither url nor command is provided', () => {
expect(() => new McpClient([{ name: 'bad' }])).toThrow(
'exactly one of "url" or "command" must be provided',
);
});
it('throws if both url and command are provided', () => {
expect(
() => new McpClient([{ name: 'bad', url: 'http://localhost', command: 'node' }]),
).toThrow('provide either "url" or "command", not both');
});
it('throws if a duplicate server name is registered', () => {
expect(
() =>
new McpClient([
{ name: 'browser', url: 'http://localhost:9999/sse' },
{ name: 'browser', url: 'http://localhost:9998/sse' },
]),
).toThrow('MCP server name "browser" is already registered');
});
it('accepts valid url-based config', () => {
expect(() => new McpClient([{ name: 'srv', url: 'http://localhost:9999/sse' }])).not.toThrow();
});
it('accepts valid command-based config', () => {
expect(
() => new McpClient([{ name: 'stdio-srv', command: 'node', args: ['server.mjs'] }]),
).not.toThrow();
});
it('accepts multiple servers with distinct names', () => {
expect(
() =>
new McpClient([
{ name: 'srv-a', url: 'http://localhost:9999/sse' },
{ name: 'srv-b', url: 'http://localhost:9998/sse' },
]),
).not.toThrow();
});
});
// ---------------------------------------------------------------------------
// McpClient.listTools() — needs in-process MCP server, no LLM
// ---------------------------------------------------------------------------
describe('McpClient.listTools()', () => {
let server: TestServer;
beforeAll(async () => {
server = await startSseServer();
});
afterAll(async () => {
await server.close();
});
it('connects and returns tools when server is reachable', async () => {
const client = new McpClient([{ name: 'tools', url: server.url }]);
const tools = await client.listTools();
expect(tools.length).toBe(3);
expect(tools.map((t) => t.name).sort()).toEqual(['tools_add', 'tools_echo', 'tools_image']);
await client.close();
});
it('returns cached tools on subsequent calls without reconnecting', async () => {
const client = new McpClient([{ name: 'tools', url: server.url }]);
const first = await client.listTools();
const second = await client.listTools();
expect(first).toBe(second);
await client.close();
});
it('returns empty array when no servers are configured', async () => {
const client = new McpClient([]);
const tools = await client.listTools();
expect(tools).toHaveLength(0);
});
it('throws and clears cache when server is unreachable', async () => {
const client = new McpClient([{ name: 'dead', url: 'http://127.0.0.1:1/sse' }]);
await expect(client.listTools()).rejects.toThrow();
});
it('reports per-server errors for partially-failing multi-server configs', async () => {
const client = new McpClient([
{ name: 'ok', url: server.url },
{ name: 'dead', url: 'http://127.0.0.1:1/sse' },
]);
await expect(client.listTools()).rejects.toThrow(/dead/);
});
});
// ---------------------------------------------------------------------------
// generate() with MCP tools — requires ANTHROPIC_API_KEY
// ---------------------------------------------------------------------------
const describe_llm = describeIf('anthropic');
describe_llm('agent generate() with MCP tool', () => {
let server: TestServer;
beforeAll(async () => {
server = await startSseServer();
});
afterAll(async () => {
await server.close();
});
it('calls an MCP tool during generation and returns the result', async () => {
const client = new McpClient([{ name: 'tools', url: server.url }]);
const agent = new Agent('mcp-agent')
.model(getModel('anthropic'))
.instructions(
'You are a helpful assistant. When asked to echo a message, use the tools_echo tool. Be concise.',
)
.mcp(client);
const result = await agent.generate(
'Echo the message "integration test passed" using the tools_echo tool.',
);
expect(result.finishReason).not.toBe('error');
const text = findLastTextContent(result.messages);
expect(text?.toLowerCase()).toContain('integration test passed');
await client.close();
});
it('merges static tools and MCP tools in the same agent', async () => {
const staticTool = new Tool('double')
.description('Double a number')
.input(z.object({ n: z.number().describe('The number to double') }))
.output(z.object({ result: z.number() }))
.handler(async ({ n }) => ({ result: n * 2 }));
const client = new McpClient([{ name: 'tools', url: server.url }]);
const agent = new Agent('mixed-tools-agent')
.model(getModel('anthropic'))
.instructions(
'You are a calculator. ' +
'Use the double tool to double numbers and the tools.add tool to add numbers. ' +
'Be concise.',
)
.tool(staticTool)
.mcp(client);
const result = await agent.generate('Use the tools.add tool to add 15 and 27.');
expect(result.finishReason).not.toBe('error');
const text = findLastTextContent(result.messages);
expect(text).toContain('42');
await client.close();
});
it('MCP connections persist across multiple generate() calls', async () => {
// Connections are kept alive by McpClient and reused across runs.
const client = new McpClient([{ name: 'tools', url: server.url }]);
const agent = new Agent('lifecycle-agent')
.model(getModel('anthropic'))
.instructions('Use tools.add to add numbers. Be concise.')
.mcp(client);
const result1 = await agent.generate('Use tools.add to add 1 and 2.');
const result2 = await agent.generate('Use tools.add to add 3 and 4.');
expect(result1.finishReason).not.toBe('error');
expect(result2.finishReason).not.toBe('error');
await client.close();
});
});
// ---------------------------------------------------------------------------
// stream() with MCP tools — requires ANTHROPIC_API_KEY
// ---------------------------------------------------------------------------
describe_llm('agent stream() with MCP tool', () => {
let server: TestServer;
beforeAll(async () => {
server = await startSseServer();
});
afterAll(async () => {
await server.close();
});
it('streams a response that includes an MCP tool call', async () => {
const client = new McpClient([{ name: 'tools', url: server.url }]);
const agent = new Agent('stream-mcp-agent')
.model(getModel('anthropic'))
.instructions('Use tools_echo to echo messages. Be concise.')
.mcp(client);
const { stream } = await agent.stream('Echo "stream works" using tools_echo.');
const chunks = await collectStreamChunks(stream);
const messageChunks = chunksOfType(chunks, 'message');
const messages = messageChunks.map((c) => c.message);
const hasToolCall = messages.some(
(m) => isLlmMessage(m) && m.content.some((c) => c.type === 'tool-call'),
);
expect(hasToolCall).toBe(true);
await client.close();
});
});
// ---------------------------------------------------------------------------
// generate() error cases — no LLM needed for the connection failure case
// ---------------------------------------------------------------------------
describe('generate() with unreachable MCP server', () => {
it('rejects when MCP server is unreachable', async () => {
const client = new McpClient([{ name: 'dead', url: 'http://127.0.0.1:1/sse' }]);
const agent = new Agent('bad-mcp-agent')
.model('anthropic/claude-haiku-4-5')
.instructions('test')
.mcp(client);
await expect(agent.generate('hello')).rejects.toThrow(/dead/i);
});
});
// ---------------------------------------------------------------------------
// MCP tool name collision detection — no LLM needed
// ---------------------------------------------------------------------------
describe('MCP tool name collision detection', () => {
let server: TestServer;
beforeAll(async () => {
server = await startSseServer();
});
afterAll(async () => {
await server.close();
});
it('throws when a static tool and an MCP tool share the same prefixed name', async () => {
const conflicting = new Tool('tools_echo')
.description('conflicts with MCP echo')
.input(z.object({ message: z.string() }))
.handler(async ({ message }) => ({ result: message }));
const client = new McpClient([{ name: 'tools', url: server.url }]);
const agent = new Agent('collision-agent')
.model('anthropic/claude-haiku-4-5')
.instructions('test')
.tool(conflicting)
.mcp(client);
try {
await expect(agent.generate('hello')).rejects.toThrow(/collision/i);
} finally {
await client.close();
}
});
});
// ---------------------------------------------------------------------------
// requireToolApproval with MCP tools — requires ANTHROPIC_API_KEY
// ---------------------------------------------------------------------------
describe_llm('requireToolApproval() with MCP tools', () => {
let server: TestServer;
beforeAll(async () => {
server = await startSseServer();
});
afterAll(async () => {
await server.close();
});
it('suspends the MCP tool call when requireToolApproval is enabled', async () => {
const client = new McpClient([{ name: 'tools', url: server.url }]);
const agent = new Agent('approval-mcp-agent')
.model(getModel('anthropic'))
.instructions('Use tools_echo to echo messages. Be concise.')
.mcp(client)
.requireToolApproval()
.checkpoint('memory');
const { stream } = await agent.stream('Echo "needs approval" using tools_echo.');
const chunks = await collectStreamChunks(stream);
const suspendedChunks = chunksOfType(chunks, 'tool-call-suspended');
expect(suspendedChunks.length).toBeGreaterThanOrEqual(1);
expect(suspendedChunks[0].toolName).toBe('tools_echo');
await client.close();
});
});
// ---------------------------------------------------------------------------
// McpServerConfig.requireApproval — builder validation (no LLM needed)
// ---------------------------------------------------------------------------
describe('McpServerConfig.requireApproval — builder validation', () => {
it('throws when requireApproval: true is set without a checkpoint store', async () => {
const client = new McpClient([
{ name: 'tools', url: 'http://localhost:9999/sse', requireApproval: true },
]);
const agent = new Agent('no-checkpoint')
.model('anthropic/claude-haiku-4-5')
.instructions('test')
.mcp(client);
// build() is triggered by generate() — fails before attempting connection
await expect(agent.generate('test')).rejects.toThrow(/checkpoint/i);
});
it('throws when requireApproval: string[] is set without a checkpoint store', async () => {
const client = new McpClient([
{ name: 'tools', url: 'http://localhost:9999/sse', requireApproval: ['echo'] },
]);
const agent = new Agent('no-checkpoint-selective')
.model('anthropic/claude-haiku-4-5')
.instructions('test')
.mcp(client);
await expect(agent.generate('test')).rejects.toThrow(/checkpoint/i);
});
it('does not throw when requireApproval: true is set with a checkpoint store', () => {
expect(() =>
new Agent('with-checkpoint')
.model('anthropic/claude-haiku-4-5')
.instructions('test')
.mcp(
new McpClient([
{ name: 'tools', url: 'http://localhost:9999/sse', requireApproval: true },
]),
)
.checkpoint('memory'),
).not.toThrow();
});
it('does not throw when requireApproval: false is set without a checkpoint store', () => {
expect(() =>
new Agent('no-approval')
.model('anthropic/claude-haiku-4-5')
.instructions('test')
.mcp(
new McpClient([
{ name: 'tools', url: 'http://localhost:9999/sse', requireApproval: false },
]),
),
).not.toThrow();
});
it('does not throw when requireApproval is an empty array without a checkpoint store', () => {
expect(() =>
new Agent('empty-approval')
.model('anthropic/claude-haiku-4-5')
.instructions('test')
.mcp(
new McpClient([{ name: 'tools', url: 'http://localhost:9999/sse', requireApproval: [] }]),
),
).not.toThrow();
});
});
// ---------------------------------------------------------------------------
// McpServerConfig.requireApproval end-to-end — requires ANTHROPIC_API_KEY
// ---------------------------------------------------------------------------
describe_llm('McpServerConfig.requireApproval with MCP tools', () => {
let server: TestServer;
beforeAll(async () => {
server = await startSseServer();
});
afterAll(async () => {
await server.close();
});
it('suspends all MCP tools when config.requireApproval: true', async () => {
const client = new McpClient([{ name: 'tools', url: server.url, requireApproval: true }]);
const agent = new Agent('config-approval-all-agent')
.model(getModel('anthropic'))
.instructions('Use tools_echo to echo messages. Be concise.')
.mcp(client)
.checkpoint('memory');
const { stream } = await agent.stream('Echo "needs approval" using tools_echo.');
const chunks = await collectStreamChunks(stream);
const suspendedChunks = chunksOfType(chunks, 'tool-call-suspended');
expect(suspendedChunks.length).toBeGreaterThanOrEqual(1);
expect(suspendedChunks[0].toolName).toBe('tools_echo');
await client.close();
});
it('suspends only the listed tool when config.requireApproval is a string array', async () => {
const client = new McpClient([{ name: 'tools', url: server.url, requireApproval: ['echo'] }]);
const agent = new Agent('config-approval-selective-agent')
.model(getModel('anthropic'))
.instructions('Use tools_echo to echo messages. Be concise.')
.mcp(client)
.checkpoint('memory');
const { stream } = await agent.stream('Echo "selective approval" using tools_echo.');
const chunks = await collectStreamChunks(stream);
const suspendedChunks = chunksOfType(chunks, 'tool-call-suspended');
expect(suspendedChunks.length).toBeGreaterThanOrEqual(1);
expect(suspendedChunks[0].toolName).toBe('tools_echo');
await client.close();
});
it('does not suspend a tool not listed in config.requireApproval', async () => {
// Only 'echo' requires approval; 'add' should run to completion without suspension.
const client = new McpClient([{ name: 'tools', url: server.url, requireApproval: ['echo'] }]);
const agent = new Agent('config-approval-unlisted-agent')
.model(getModel('anthropic'))
.instructions('Use tools.add to add numbers. Do not use any other tool. Be concise.')
.mcp(client)
.checkpoint('memory');
const result = await agent.generate('Use tools.add to add 10 and 32.');
expect(result.finishReason).not.toBe('error');
const text = findLastTextContent(result.messages);
expect(text).toContain('42');
await client.close();
});
});

View file

@ -0,0 +1,164 @@
/**
* In-process MCP test server helpers.
* Creates real MCP servers (SSE and StreamableHTTP) bound to random localhost ports
* for use in integration tests. No mocking of SDK internals.
*/
import { Server as McpServer } from '@modelcontextprotocol/sdk/server/index.js';
import { SSEServerTransport } from '@modelcontextprotocol/sdk/server/sse.js';
import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/streamableHttp.js';
import { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js';
import http from 'http';
/** 1×1 transparent PNG in base64 (smallest valid PNG). Used for image tool tests. */
export const TINY_PNG =
'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==';
export interface TestServer {
url: string;
close: () => Promise<void>;
}
/** Create an in-process MCP Server with three test tools: echo, add, and image. */
export function createTestMcpServer(): McpServer {
const server = new McpServer(
{ name: 'test-mcp-server', version: '1.0.0' },
{ capabilities: { tools: {} } },
);
server.setRequestHandler(ListToolsRequestSchema, async () => ({
tools: [
{
name: 'echo',
description: 'Echo the message back as-is',
inputSchema: {
type: 'object',
properties: { message: { type: 'string', description: 'Message to echo' } },
required: ['message'],
},
},
{
name: 'add',
description: 'Add two numbers together',
inputSchema: {
type: 'object',
properties: {
a: { type: 'number', description: 'First number' },
b: { type: 'number', description: 'Second number' },
},
required: ['a', 'b'],
},
},
{
name: 'image',
description: 'Return a small image with a caption',
inputSchema: {
type: 'object',
properties: { caption: { type: 'string', description: 'Image caption' } },
required: ['caption'],
},
},
],
}));
server.setRequestHandler(CallToolRequestSchema, async (request) => {
const { name, arguments: args = {} } = request.params;
if (name === 'echo') {
// eslint-disable-next-line @typescript-eslint/no-base-to-string
return { content: [{ type: 'text', text: String(args.message ?? '') }] };
}
if (name === 'add') {
const sum = Number(args.a ?? 0) + Number(args.b ?? 0);
return { content: [{ type: 'text', text: String(sum) }] };
}
if (name === 'image') {
return {
content: [
// eslint-disable-next-line @typescript-eslint/no-base-to-string
{ type: 'text', text: String(args.caption ?? '') },
{ type: 'image', data: TINY_PNG, mimeType: 'image/png' },
],
};
}
return {
isError: true,
content: [{ type: 'text', text: `Unknown tool: ${name}` }],
};
});
return server;
}
/** Start an SSE MCP server on a random port. Returns the SSE endpoint URL and a close function. */
export async function startSseServer(): Promise<TestServer> {
const transports = new Map<string, SSEServerTransport>();
const httpServer = http.createServer(async (req, res) => {
try {
if (req.method === 'GET' && req.url === '/sse') {
// Create a fresh McpServer per client connection — the Server class holds
// a single active transport reference and rejects a second connect() call
// if the first transport hasn't been fully torn down yet.
const mcpServer = createTestMcpServer();
const transport = new SSEServerTransport('/message', res);
transports.set(transport.sessionId, transport);
await mcpServer.connect(transport);
} else if (req.method === 'POST' && req.url?.startsWith('/message')) {
const sessionId = new URL(req.url, 'http://localhost').searchParams.get('sessionId') ?? '';
const transport = transports.get(sessionId);
if (transport) {
await transport.handlePostMessage(req, res);
} else {
res.writeHead(404).end(`No transport for sessionId: ${sessionId}`);
}
} else {
res.writeHead(404).end('Not found');
}
} catch {
if (!res.headersSent) res.writeHead(500).end('Internal server error');
}
});
await new Promise<void>((resolve) => httpServer.listen(0, '127.0.0.1', resolve));
const { port } = httpServer.address() as { port: number };
return {
url: `http://127.0.0.1:${port}/sse`,
close: async () => {
httpServer.closeAllConnections();
await new Promise<void>((resolve) => httpServer.close(() => resolve()));
},
};
}
/** Start a Streamable HTTP MCP server on a random port. Returns the endpoint URL and a close function. */
export async function startStreamableHttpServer(): Promise<TestServer> {
// In stateless mode (sessionIdGenerator: undefined) the SDK enforces that each
// transport instance handles exactly one HTTP request. A fresh McpServer + transport
// must therefore be created per-request, mirroring the SSE server pattern above.
const httpServer = http.createServer(async (req, res) => {
try {
const mcpServer = createTestMcpServer();
const transport = new StreamableHTTPServerTransport({ sessionIdGenerator: undefined });
await mcpServer.connect(transport);
await transport.handleRequest(req, res);
} catch {
if (!res.headersSent) res.writeHead(500).end('Internal server error');
}
});
await new Promise<void>((resolve) => httpServer.listen(0, '127.0.0.1', resolve));
const { port } = httpServer.address() as { port: number };
return {
url: `http://127.0.0.1:${port}/mcp`,
close: async () => {
httpServer.closeAllConnections();
await new Promise<void>((resolve) => httpServer.close(() => resolve()));
},
};
}

View file

@ -0,0 +1,201 @@
/**
* Integration tests for McpConnection with SSE transport.
* Uses a real in-process HTTP server implementing the MCP SSE protocol.
* No mocking of SDK internals or McpConnection.
*/
import { afterAll, beforeAll, describe, expect, it } from 'vitest';
import { startSseServer, TINY_PNG, type TestServer } from './mcp-server-helpers';
import { McpConnection } from '../../runtime/mcp-connection';
import type { ContentFile, ContentText, Message } from '../../types/sdk/message';
import { isZodSchema } from '../../utils/zod';
describe('McpConnection — SSE transport', () => {
let server: TestServer;
beforeAll(async () => {
server = await startSseServer();
});
afterAll(async () => {
await server.close();
});
it('connects to an SSE server and lists tools', async () => {
const conn = new McpConnection({ name: 'test', url: server.url });
await conn.connect();
const tools = await conn.listTools();
expect(tools).toHaveLength(3);
expect(tools.map((t) => t.name)).toEqual(
expect.arrayContaining(['test_echo', 'test_add', 'test_image']),
);
await conn.disconnect();
});
it('calls echo tool and returns text content', async () => {
const conn = new McpConnection({ name: 'test', url: server.url });
await conn.connect();
const result = await conn.callTool('echo', { message: 'hello from sse' });
expect(result.isError).toBeFalsy();
expect(result.content).toHaveLength(1);
expect(result.content[0]).toEqual({ type: 'text', text: 'hello from sse' });
await conn.disconnect();
});
it('calls add tool and returns calculated result', async () => {
const conn = new McpConnection({ name: 'test', url: server.url });
await conn.connect();
const result = await conn.callTool('add', { a: 7, b: 13 });
expect(result.isError).toBeFalsy();
expect(result.content[0]).toEqual({ type: 'text', text: '20' });
await conn.disconnect();
});
it('calls image tool and returns mixed text + image content', async () => {
const conn = new McpConnection({ name: 'test', url: server.url });
await conn.connect();
const result = await conn.callTool('image', { caption: 'landscape' });
expect(result.isError).toBeFalsy();
expect(result.content).toHaveLength(2);
expect(result.content[0]).toMatchObject({ type: 'text', text: 'landscape' });
expect(result.content[1]).toMatchObject({
type: 'image',
data: TINY_PNG,
mimeType: 'image/png',
});
await conn.disconnect();
});
it('disconnects cleanly without throwing', async () => {
const conn = new McpConnection({ name: 'test', url: server.url });
await conn.connect();
await expect(conn.disconnect()).resolves.toBeUndefined();
});
it('throws when listTools() is called without connecting first', async () => {
const conn = new McpConnection({ name: 'test', url: server.url });
// Do NOT call conn.connect()
await expect(conn.listTools()).rejects.toThrow();
});
it('throws when callTool() is called without connecting first', async () => {
const conn = new McpConnection({ name: 'test', url: server.url });
await expect(conn.callTool('echo', { message: 'hi' })).rejects.toThrow();
});
it('is idempotent — calling connect() twice resolves without starting a second connection', async () => {
const conn = new McpConnection({ name: 'test', url: server.url });
await conn.connect();
await expect(conn.connect()).resolves.toBeUndefined();
await conn.disconnect();
});
it('deduplicates concurrent connect() calls — both resolve via the same promise', async () => {
const conn = new McpConnection({ name: 'test', url: server.url });
const [r1, r2] = await Promise.all([conn.connect(), conn.connect()]);
expect(r1).toBeUndefined();
expect(r2).toBeUndefined();
await conn.disconnect();
});
describe('listTools() resolved tools', () => {
it('prefixes tool names with the server name', async () => {
const conn = new McpConnection({ name: 'browser', url: server.url });
await conn.connect();
const builtTools = await conn.listTools();
expect(builtTools.every((t) => t.name.startsWith('browser_'))).toBe(true);
expect(builtTools.map((t) => t.name)).toEqual(
expect.arrayContaining(['browser_echo', 'browser_add', 'browser_image']),
);
await conn.disconnect();
});
it('sets inputSchema as raw JSON Schema (not Zod) and sets mcpTool flag', async () => {
const conn = new McpConnection({ name: 'test', url: server.url });
await conn.connect();
const builtTools = await conn.listTools();
for (const t of builtTools) {
expect(t.inputSchema).toBeDefined();
expect(isZodSchema(t.inputSchema!)).toBe(false);
expect(t.mcpTool).toBe(true);
expect(t.mcpServerName).toBe('test');
}
await conn.disconnect();
});
it('handler calls the tool and returns the MCP result', async () => {
const conn = new McpConnection({ name: 'test', url: server.url });
await conn.connect();
const builtTools = await conn.listTools();
const echoTool = builtTools.find((t) => t.name === 'test_echo')!;
const result = await echoTool.handler!({ message: 'from handler' }, {} as never);
const mcpResult = result as { content: Array<{ type: string; text: string }> };
expect(mcpResult.content[0]).toEqual({ type: 'text', text: 'from handler' });
await conn.disconnect();
});
it('toMessage returns undefined for text-only results', async () => {
const conn = new McpConnection({ name: 'test', url: server.url });
await conn.connect();
const builtTools = await conn.listTools();
const echoTool = builtTools.find((t) => t.name === 'test_echo')!;
const mcpResult = await conn.callTool('echo', { message: 'text only' });
const message = echoTool.toMessage!(mcpResult);
expect(message).toBeUndefined();
await conn.disconnect();
});
it('toMessage returns a user message with file part for image results', async () => {
const conn = new McpConnection({ name: 'test', url: server.url });
await conn.connect();
const builtTools = await conn.listTools();
const imageTool = builtTools.find((t) => t.name === 'test_image')!;
const mcpResult = await conn.callTool('image', { caption: 'my photo' });
const message = imageTool.toMessage!(mcpResult);
expect(message).toBeDefined();
const llmMessage = message as Message;
expect(llmMessage.role).toBe('assistant');
const content = llmMessage.content as Array<ContentText | ContentFile>;
const textPart = content.find((c): c is ContentText => c.type === 'text');
const filePart = content.find((c): c is ContentFile => c.type === 'file');
expect(textPart).toBeDefined();
expect(textPart!.text).toBe('my photo');
expect(filePart).toBeDefined();
expect(filePart!.mediaType).toBe('image/png');
expect(filePart!.data).toBe(TINY_PNG);
await conn.disconnect();
});
});
});

View file

@ -0,0 +1,118 @@
/**
* Integration tests for McpConnection with stdio transport.
* Spawns a real child process (mcp-stdio-server.mjs) and communicates via stdin/stdout.
* No mocking of SDK internals or McpConnection.
*/
import path from 'path';
import { describe, expect, it } from 'vitest';
import { TINY_PNG } from './mcp-server-helpers';
import { McpConnection } from '../../runtime/mcp-connection';
// vitest injects __dirname for TypeScript test files in the node environment.
const FIXTURE_PATH = path.resolve(__dirname, '../fixtures/mcp-stdio-server.mjs');
/** Config that spawns the stdio fixture server. */
function stdioConfig(name = 'test') {
return {
name,
command: 'node',
args: [FIXTURE_PATH],
};
}
describe('McpConnection — stdio transport', () => {
it('connects to a stdio server and lists tools', async () => {
const conn = new McpConnection(stdioConfig());
await conn.connect();
const tools = await conn.listTools();
expect(tools).toHaveLength(3);
expect(tools.map((t) => t.name)).toEqual(
expect.arrayContaining(['test_echo', 'test_add', 'test_image']),
);
await conn.disconnect();
});
it('calls echo tool and returns text content', async () => {
const conn = new McpConnection(stdioConfig());
await conn.connect();
const result = await conn.callTool('echo', { message: 'hello from stdio' });
expect(result.isError).toBeFalsy();
expect(result.content).toHaveLength(1);
expect(result.content[0]).toEqual({ type: 'text', text: 'hello from stdio' });
await conn.disconnect();
});
it('calls add tool and returns calculated result', async () => {
const conn = new McpConnection(stdioConfig());
await conn.connect();
const result = await conn.callTool('add', { a: 42, b: 58 });
expect(result.isError).toBeFalsy();
expect(result.content[0]).toEqual({ type: 'text', text: '100' });
await conn.disconnect();
});
it('calls image tool and returns mixed text + image content', async () => {
const conn = new McpConnection(stdioConfig());
await conn.connect();
const result = await conn.callTool('image', { caption: 'forest' });
expect(result.isError).toBeFalsy();
expect(result.content).toHaveLength(2);
expect(result.content[0]).toMatchObject({ type: 'text', text: 'forest' });
expect(result.content[1]).toMatchObject({
type: 'image',
data: TINY_PNG,
mimeType: 'image/png',
});
await conn.disconnect();
});
it('disconnects cleanly, terminating the child process', async () => {
const conn = new McpConnection(stdioConfig());
await conn.connect();
await expect(conn.disconnect()).resolves.toBeUndefined();
});
describe('listTools() resolved tools', () => {
it('prefixes tool names with the server name', async () => {
const conn = new McpConnection(stdioConfig('fs'));
await conn.connect();
const builtTools = await conn.listTools();
expect(builtTools.every((t) => t.name.startsWith('fs_'))).toBe(true);
expect(builtTools.map((t) => t.name)).toEqual(
expect.arrayContaining(['fs_echo', 'fs_add', 'fs_image']),
);
await conn.disconnect();
});
it('handler invokes the child process tool and returns MCP result', async () => {
const conn = new McpConnection(stdioConfig());
await conn.connect();
const builtTools = await conn.listTools();
const addTool = builtTools.find((t) => t.name === 'test_add')!;
const result = await addTool.handler!({ a: 3, b: 4 }, {} as never);
const mcpResult = result as { content: Array<{ type: string; text: string }> };
expect(mcpResult.content[0]).toEqual({ type: 'text', text: '7' });
await conn.disconnect();
});
});
});

View file

@ -0,0 +1,131 @@
/**
* Integration tests for McpConnection with Streamable HTTP transport.
* Uses a real in-process HTTP server implementing the MCP Streamable HTTP protocol.
* No mocking of SDK internals or McpConnection.
*/
import { afterAll, beforeAll, describe, expect, it } from 'vitest';
import { startStreamableHttpServer, TINY_PNG, type TestServer } from './mcp-server-helpers';
import { McpConnection } from '../../runtime/mcp-connection';
import type { ContentFile, ContentText, Message } from '../../types/sdk/message';
describe('McpConnection — Streamable HTTP transport', () => {
let server: TestServer;
beforeAll(async () => {
server = await startStreamableHttpServer();
});
afterAll(async () => {
await server.close();
});
it('connects to a Streamable HTTP server and lists tools', async () => {
const conn = new McpConnection({ name: 'test', url: server.url, transport: 'streamableHttp' });
await conn.connect();
const tools = await conn.listTools();
expect(tools).toHaveLength(3);
expect(tools.map((t) => t.name)).toEqual(
expect.arrayContaining(['test_echo', 'test_add', 'test_image']),
);
await conn.disconnect();
});
it('calls echo tool and returns text content', async () => {
const conn = new McpConnection({ name: 'test', url: server.url, transport: 'streamableHttp' });
await conn.connect();
const result = await conn.callTool('echo', { message: 'hello from streamable-http' });
expect(result.isError).toBeFalsy();
expect(result.content).toHaveLength(1);
expect(result.content[0]).toEqual({ type: 'text', text: 'hello from streamable-http' });
await conn.disconnect();
});
it('calls add tool and returns calculated result', async () => {
const conn = new McpConnection({ name: 'test', url: server.url, transport: 'streamableHttp' });
await conn.connect();
const result = await conn.callTool('add', { a: 100, b: 200 });
expect(result.isError).toBeFalsy();
expect(result.content[0]).toEqual({ type: 'text', text: '300' });
await conn.disconnect();
});
it('calls image tool and returns mixed text + image content', async () => {
const conn = new McpConnection({ name: 'test', url: server.url, transport: 'streamableHttp' });
await conn.connect();
const result = await conn.callTool('image', { caption: 'mountains' });
expect(result.isError).toBeFalsy();
expect(result.content).toHaveLength(2);
expect(result.content[0]).toMatchObject({ type: 'text', text: 'mountains' });
expect(result.content[1]).toMatchObject({
type: 'image',
data: TINY_PNG,
mimeType: 'image/png',
});
await conn.disconnect();
});
it('disconnects cleanly without throwing', async () => {
const conn = new McpConnection({ name: 'test', url: server.url, transport: 'streamableHttp' });
await conn.connect();
await expect(conn.disconnect()).resolves.toBeUndefined();
});
describe('listTools() resolved tools', () => {
it('prefixes tool names with the server name', async () => {
const conn = new McpConnection({
name: 'devtools',
url: server.url,
transport: 'streamableHttp',
});
await conn.connect();
const builtTools = await conn.listTools();
expect(builtTools.every((t) => t.name.startsWith('devtools_'))).toBe(true);
expect(builtTools.map((t) => t.name)).toEqual(
expect.arrayContaining(['devtools_echo', 'devtools_add', 'devtools_image']),
);
await conn.disconnect();
});
it('toMessage returns a user message with file part for image results', async () => {
const conn = new McpConnection({
name: 'test',
url: server.url,
transport: 'streamableHttp',
});
await conn.connect();
const builtTools = await conn.listTools();
const imageTool = builtTools.find((t) => t.name === 'test_image')!;
const mcpResult = await conn.callTool('image', { caption: 'sunset' });
const message = imageTool.toMessage!(mcpResult);
expect(message).toBeDefined();
const llmMessage = message as Message;
expect(llmMessage.role).toBe('assistant');
const content = llmMessage.content as Array<ContentText | ContentFile>;
const filePart = content.find((c): c is ContentFile => c.type === 'file');
expect(filePart).toBeDefined();
expect(filePart!.mediaType).toBe('image/png');
await conn.disconnect();
});
});
});

View file

@ -0,0 +1,302 @@
/**
* Integration test: custom BuiltMemory backend.
*
* Proves that any object implementing the BuiltMemory interface works with the
* agent runtime no SDK-provided storage class needed. This is the contract
* that Redis, DynamoDB, TypeORM, or any other persistence layer must satisfy.
*/
import { expect, it, beforeEach } from 'vitest';
import { Agent, Memory, toDbMessage, type AgentDbMessage, type AgentMessage } from '../../../index';
import type { BuiltMemory, Thread } from '../../../types/sdk/memory';
import { describeIf, findLastTextContent, getModel } from '../helpers';
const describe = describeIf('anthropic');
// ---------------------------------------------------------------------------
// Custom in-memory BuiltMemory implementation (simulates Redis, DynamoDB, etc.)
// ---------------------------------------------------------------------------
class CustomMapMemory implements BuiltMemory {
readonly threads = new Map<string, Thread>();
readonly messages = new Map<string, AgentDbMessage[]>();
readonly workingMemory = new Map<string, string>();
// --- Thread management ---
async getThread(threadId: string): Promise<Thread | null> {
return this.threads.get(threadId) ?? null;
}
async saveThread(thread: Omit<Thread, 'createdAt' | 'updatedAt'>): Promise<Thread> {
const now = new Date();
const full: Thread = { ...thread, createdAt: now, updatedAt: now };
this.threads.set(thread.id, full);
return full;
}
async deleteThread(threadId: string): Promise<void> {
this.threads.delete(threadId);
this.messages.delete(threadId);
}
// --- Message persistence ---
async getMessages(
threadId: string,
opts?: { limit?: number; before?: Date },
): Promise<AgentDbMessage[]> {
let msgs = this.messages.get(threadId) ?? [];
if (opts?.before) {
msgs = msgs.filter((m) => {
const ts = 'createdAt' in m ? (m as Record<string, unknown>).createdAt : undefined;
return ts instanceof Date ? ts < opts.before! : true;
});
}
if (opts?.limit) {
msgs = msgs.slice(-opts.limit);
}
return msgs.map(toDbMessage);
}
async saveMessages(args: {
threadId: string;
resourceId?: string;
messages: AgentMessage[];
}): Promise<void> {
const existing = this.messages.get(args.threadId) ?? [];
this.messages.set(args.threadId, [...existing, ...args.messages.map(toDbMessage)]);
}
async deleteMessages(messageIds: string[]): Promise<void> {
for (const [threadId, msgs] of this.messages) {
const idSet = new Set(messageIds);
this.messages.set(
threadId,
msgs.filter((m) => !idSet.has(m.id)),
);
}
}
// --- Working memory (Tier 2) ---
async getWorkingMemory(params: {
threadId: string;
resourceId: string;
scope: 'resource' | 'thread';
}): Promise<string | null> {
return (
this.workingMemory.get(params.scope === 'resource' ? params.resourceId : params.threadId) ??
null
);
}
async saveWorkingMemory(
params: { threadId: string; resourceId: string; scope: 'resource' | 'thread' },
content: string,
): Promise<void> {
const id = params.scope === 'resource' ? params.resourceId : params.threadId;
this.workingMemory.set(id, content);
}
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
describe('custom BuiltMemory backend', () => {
let store: CustomMapMemory;
beforeEach(() => {
store = new CustomMapMemory();
});
it('recalls previous messages across turns', async () => {
const memory = new Memory().storage(store).lastMessages(10);
const agent = new Agent('custom-mem-recall')
.model(getModel('anthropic'))
.instructions('You are a helpful assistant. Be concise.')
.memory(memory);
const threadId = `custom-thread-${Date.now()}`;
const options = { persistence: { threadId, resourceId: 'user-1' } };
await agent.generate('My name is Valentina. Just acknowledge.', options);
const result = await agent.generate('What is my name?', options);
expect(findLastTextContent(result.messages)?.toLowerCase()).toContain('valentina');
// Verify the custom store actually received messages
const stored = store.messages.get(threadId);
expect(stored).toBeDefined();
expect(stored!.length).toBeGreaterThanOrEqual(2);
});
it('isolates threads in the custom backend', async () => {
const memory = new Memory().storage(store).lastMessages(10);
const agent = new Agent('custom-mem-isolation')
.model(getModel('anthropic'))
.instructions(
'You are a helpful assistant. Be concise. If you don\'t know something, say "I don\'t know".',
)
.memory(memory);
const thread1 = `custom-t1-${Date.now()}`;
const thread2 = `custom-t2-${Date.now()}`;
await agent.generate('The secret word is NEPTUNE. Just acknowledge.', {
persistence: { threadId: thread1, resourceId: 'user-1' },
});
const result = await agent.generate('What is the secret word?', {
persistence: { threadId: thread2, resourceId: 'user-1' },
});
expect(findLastTextContent(result.messages)?.toLowerCase()).not.toContain('neptune');
// Thread 1 should have messages, thread 2 should have its own
expect(store.messages.get(thread1)!.length).toBeGreaterThan(0);
expect(store.messages.get(thread2)!.length).toBeGreaterThan(0);
});
it('persists and retrieves resource-scoped working memory via custom backend', async () => {
const memory = new Memory()
.storage(store)
.lastMessages(10)
.scope('resource')
.freeform('# User Profile\n- **Name**:\n- **Favorite color**:');
const agent = new Agent('custom-mem-working')
.model(getModel('anthropic'))
.instructions('You are a helpful assistant. Be concise. Always update your working memory.')
.memory(memory);
const threadId = `custom-wm-${Date.now()}`;
const resourceId = 'user-wm-1';
const options = { persistence: { threadId, resourceId } };
await agent.generate('My name is Kenji and my favorite color is teal.', options);
// Working memory should have been persisted keyed by resourceId
const wm = store.workingMemory.get(resourceId);
expect(wm).toBeDefined();
expect(wm!.toLowerCase()).toContain('kenji');
// New thread, same resourceId — resource-scoped working memory carries over
const thread2 = `custom-wm2-${Date.now()}`;
const result = await agent.generate('What is my name?', {
persistence: { threadId: thread2, resourceId },
});
expect(findLastTextContent(result.messages)?.toLowerCase()).toContain('kenji');
});
it('persists and retrieves thread-scoped working memory via custom backend', async () => {
const memory = new Memory()
.storage(store)
.lastMessages(10)
.scope('thread')
.freeform('# Conversation Notes\n- **Topic**:\n- **Key facts**:');
const agent = new Agent('custom-mem-thread-wm')
.model(getModel('anthropic'))
.instructions('You are a helpful assistant. Be concise. Always update your working memory.')
.memory(memory);
const threadId = `custom-twm-${Date.now()}`;
const resourceId = 'user-twm-1';
await agent.generate('The project codename is AURORA. Just acknowledge.', {
persistence: { threadId, resourceId },
});
// Working memory should be stored keyed by threadId
const wmByThread = store.workingMemory.get(threadId);
expect(wmByThread).toBeDefined();
expect(wmByThread!.toLowerCase()).toContain('aurora');
// Different thread for same resource — should NOT see the previous working memory
const thread2 = `custom-twm2-${Date.now()}`;
const result = await agent.generate(
'What is the project codename? Answer "unknown" if you have no information.',
{ persistence: { threadId: thread2, resourceId } },
);
expect(findLastTextContent(result.messages)?.toLowerCase()).not.toContain('aurora');
// Thread 2 working memory should be independent
expect(store.workingMemory.get(thread2)).not.toContain('aurora');
});
it('thread-scoped working memory allows recall within the same thread when history is truncated', async () => {
// Use lastMessages: 1 so earlier turns are pushed out of the history window.
// The agent must rely on working memory — not chat history — to recall old facts.
const memory = new Memory()
.storage(store)
.lastMessages(1)
.scope('thread')
.freeform('# Key facts\n- **Secret word**:\n- **User name**:');
const agent = new Agent('custom-mem-thread-wm-recall')
.model(getModel('anthropic'))
.instructions(
'You are a helpful assistant. Be concise. ' +
'Always update your working memory with any important facts you learn.',
)
.memory(memory);
const threadId = `custom-twm-recall-${Date.now()}`;
const options = { persistence: { threadId, resourceId: 'user-twm-recall' } };
// Turn 1: share a fact — agent writes it into working memory
await agent.generate('The secret word is COBALT. Remember it. Just acknowledge.', options);
// Turn 2: filler turn — this pushes turn 1 out of the 1-message history window
await agent.generate('Just say "ok".', options);
// Turn 3: ask for the fact — only working memory can supply it now (turn 1 is truncated)
const result = await agent.generate('What was the secret word I told you earlier?', options);
expect(findLastTextContent(result.messages)?.toLowerCase()).toContain('cobalt');
});
it('works with stream() path', async () => {
const memory = new Memory().storage(store).lastMessages(10);
const agent = new Agent('custom-mem-stream')
.model(getModel('anthropic'))
.instructions('You are a helpful assistant. Be concise.')
.memory(memory);
const threadId = `custom-stream-${Date.now()}`;
const options = { persistence: { threadId, resourceId: 'user-stream' } };
// Turn 1 via stream
const result1 = await agent.stream('The capital of France is Paris. Acknowledge.', options);
const reader = result1.stream.getReader();
while (true) {
const { done } = await reader.read();
if (done) break;
}
// Turn 2 via generate — should recall from custom store
const result = await agent.generate('What is the capital of France?', options);
expect(findLastTextContent(result.messages)?.toLowerCase()).toContain('paris');
expect(store.messages.get(threadId)!.length).toBeGreaterThanOrEqual(2);
});
it('works when passed directly to agent.memory() as bare BuiltMemory', async () => {
// Skip the Memory builder entirely — pass the raw BuiltMemory object
const agent = new Agent('custom-mem-bare')
.model(getModel('anthropic'))
.instructions('You are a helpful assistant. Be concise.')
.memory(store);
const threadId = `custom-bare-${Date.now()}`;
const options = { persistence: { threadId, resourceId: 'user-bare' } };
await agent.generate('Remember: the answer is 42. Acknowledge.', options);
const result = await agent.generate('What is the answer?', options);
expect(findLastTextContent(result.messages)?.toLowerCase()).toContain('42');
});
});

View file

@ -0,0 +1,108 @@
import { expect, it, afterEach } from 'vitest';
import { Agent, Memory } from '../../../index';
import { SqliteMemory } from '../../../storage/sqlite-memory';
import { describeIf, findLastTextContent, getModel, createSqliteMemory } from '../helpers';
const describe = describeIf('anthropic');
const cleanups: Array<() => void> = [];
afterEach(() => {
cleanups.forEach((fn) => fn());
cleanups.length = 0;
});
describe('freeform working memory', () => {
const template = '# User Context\n- **Name**:\n- **City**:\n- **Pet**:';
it('agent recalls info via working memory across turns', async () => {
const memory = new Memory().storage('memory').lastMessages(10).freeform(template);
const agent = new Agent('freeform-test')
.model(getModel('anthropic'))
.instructions('You are a helpful assistant. Be concise.')
.memory(memory);
const threadId = `freeform-${Date.now()}`;
const options = { persistence: { threadId, resourceId: 'test-user' } };
await agent.generate('My name is Alice and I live in Berlin.', options);
const result = await agent.generate('What city do I live in?', options);
expect(findLastTextContent(result.messages)?.toLowerCase()).toContain('berlin');
});
it('working memory tags are stripped from visible response', async () => {
const memory = new Memory().storage('memory').lastMessages(10).freeform(template);
const agent = new Agent('strip-test')
.model(getModel('anthropic'))
.instructions('You are a helpful assistant. Be concise.')
.memory(memory);
const threadId = `strip-${Date.now()}`;
const options = { persistence: { threadId, resourceId: 'test-user' } };
const result = await agent.generate('My name is Bob.', options);
const allText = result.messages
.flatMap((m) => ('content' in m ? m.content : []))
.filter((c) => c.type === 'text')
.map((c) => (c as { text: string }).text)
.join(' ');
expect(allText).not.toContain('<working_memory>');
expect(allText).not.toContain('</working_memory>');
});
it('working memory persists across threads with same resourceId', async () => {
const { memory, cleanup } = createSqliteMemory();
cleanups.push(cleanup);
const mem = new Memory().storage(memory).lastMessages(10).freeform(template);
const agent = new Agent('cross-thread-test')
.model(getModel('anthropic'))
.instructions('You are a helpful assistant. Be concise.')
.memory(mem);
const resourceId = `user-${Date.now()}`;
await agent.generate('My name is Charlie and I have a dog named Rex.', {
persistence: { threadId: `thread-1-${Date.now()}`, resourceId },
});
const result = await agent.generate("What's my dog's name?", {
persistence: { threadId: `thread-2-${Date.now()}`, resourceId },
});
expect(findLastTextContent(result.messages)?.toLowerCase()).toContain('rex');
});
it('working memory survives SqliteMemory restart', async () => {
const { memory, cleanup, url } = createSqliteMemory();
cleanups.push(cleanup);
const mem = new Memory().storage(memory).lastMessages(10).freeform(template);
const agent1 = new Agent('restart-wm-1')
.model(getModel('anthropic'))
.instructions('You are a helpful assistant. Be concise.')
.memory(mem);
const resourceId = `user-${Date.now()}`;
const threadId = `restart-wm-${Date.now()}`;
await agent1.generate('My name is Diana.', { persistence: { threadId, resourceId } });
const memory2 = new SqliteMemory({ url });
const mem2 = new Memory().storage(memory2).lastMessages(10).freeform(template);
const agent2 = new Agent('restart-wm-2')
.model(getModel('anthropic'))
.instructions('You are a helpful assistant. Be concise.')
.memory(mem2);
const result = await agent2.generate('What is my name?', {
persistence: { threadId: `new-thread-${Date.now()}`, resourceId },
});
expect(findLastTextContent(result.messages)?.toLowerCase()).toContain('diana');
});
});

View file

@ -0,0 +1,627 @@
/**
* Integration test: PostgresMemory with pgvector semantic recall.
*
* Uses testcontainers to spin up a real Postgres instance with pgvector,
* then runs the agent against it to verify full end-to-end memory behavior.
*/
import { execSync } from 'node:child_process';
import { Pool } from 'pg';
import { GenericContainer, Wait, type StartedTestContainer } from 'testcontainers';
import { afterAll, beforeAll, describe, expect, it } from 'vitest';
import { Agent, Memory, PostgresMemory } from '../../../index';
import { describeIf, findLastTextContent, getModel } from '../helpers';
const describeWithApi = describeIf('anthropic');
/**
* Check if Docker is available synchronously. testcontainers requires a running
* Docker daemon; skip the entire file in environments without it.
*/
function isDockerAvailable(): boolean {
try {
execSync('docker info', { stdio: 'ignore' });
return true;
} catch {
return false;
}
}
const hasDocker = isDockerAvailable();
let container: StartedTestContainer;
let connectionString: string;
beforeAll(async () => {
if (!hasDocker) return;
container = await new GenericContainer('pgvector/pgvector:pg17')
.withExposedPorts(5432)
.withEnvironment({
POSTGRES_USER: 'test',
POSTGRES_PASSWORD: 'test',
POSTGRES_DB: 'testdb',
})
// Postgres emits this message twice: once during initdb (temporary) and once when truly ready.
// Waiting for the second occurrence ensures we don't connect during the brief restart window.
.withWaitStrategy(Wait.forLogMessage('database system is ready to accept connections', 2))
.start();
const host = container.getHost();
const port = container.getMappedPort(5432);
connectionString = `postgresql://test:test@${host}:${port}/testdb`;
}, 60_000);
afterAll(async () => {
try {
if (container) await container.stop();
} catch (error) {
console.error('Error stopping container:', error);
}
}, 30_000);
/** describe that requires Docker — tests are no-ops without it. */
function describeWithDocker(name: string, fn: () => void) {
describe(name, () => {
if (!hasDocker) {
it('skipped — Docker not available', () => {});
return;
}
fn();
});
}
describeWithDocker('PostgresMemory saveThread upsert', () => {
it('preserves existing title and metadata when not provided', async () => {
const mem = new PostgresMemory({ connection: connectionString, namespace: 'upsert_test' });
await mem.saveThread({
id: 'upsert-t1',
resourceId: 'user-1',
title: 'Original Title',
metadata: { key: 'value' },
});
// Upsert without title or metadata (simulates saveMessagesToThread)
await mem.saveThread({ id: 'upsert-t1', resourceId: 'user-1' });
const thread = await mem.getThread('upsert-t1');
expect(thread).not.toBeNull();
expect(thread!.title).toBe('Original Title');
expect(thread!.metadata).toEqual({ key: 'value' });
await mem.close();
});
it('overwrites title and metadata when explicitly provided', async () => {
const mem = new PostgresMemory({ connection: connectionString, namespace: 'upsert_ow' });
await mem.saveThread({
id: 'upsert-t2',
resourceId: 'user-1',
title: 'Old Title',
metadata: { old: true },
});
await mem.saveThread({
id: 'upsert-t2',
resourceId: 'user-1',
title: 'New Title',
metadata: { new: true },
});
const thread = await mem.getThread('upsert-t2');
expect(thread!.title).toBe('New Title');
expect(thread!.metadata).toEqual({ new: true });
await mem.close();
});
});
describeWithDocker('PostgresMemory unit tests', () => {
it('creates tables on first use and round-trips a thread', async () => {
const mem = new PostgresMemory({ connection: connectionString });
const thread = await mem.saveThread({
id: 'thread-1',
resourceId: 'user-1',
title: 'Test Thread',
});
expect(thread.id).toBe('thread-1');
expect(thread.createdAt).toBeInstanceOf(Date);
const loaded = await mem.getThread('thread-1');
expect(loaded).not.toBeNull();
expect(loaded!.title).toBe('Test Thread');
expect(loaded!.resourceId).toBe('user-1');
await mem.close();
});
it('saves and retrieves messages with limit', async () => {
const mem = new PostgresMemory({ connection: connectionString, namespace: 'msg_test' });
await mem.saveThread({ id: 't1', resourceId: 'u1' });
const messages = [
{ role: 'user' as const, content: [{ type: 'text' as const, text: 'Hello' }] },
{ role: 'assistant' as const, content: [{ type: 'text' as const, text: 'Hi there' }] },
{ role: 'user' as const, content: [{ type: 'text' as const, text: 'How are you?' }] },
];
await mem.saveMessages({ threadId: 't1', messages });
// Get last 2 messages
const last2 = await mem.getMessages('t1', { limit: 2 });
expect(last2).toHaveLength(2);
// Get all messages
const all = await mem.getMessages('t1');
expect(all).toHaveLength(3);
await mem.close();
});
it('saves and retrieves working memory keyed by resourceId', async () => {
const mem = new PostgresMemory({ connection: connectionString, namespace: 'wm_test' });
expect(
await mem.getWorkingMemory({ threadId: 'thread-1', resourceId: 'user-1', scope: 'resource' }),
).toBeNull();
await mem.saveWorkingMemory(
{ threadId: 'thread-1', resourceId: 'user-1', scope: 'resource' },
'# Profile\n- Name: Alice',
);
expect(
await mem.getWorkingMemory({ threadId: 'thread-1', resourceId: 'user-1', scope: 'resource' }),
).toBe('# Profile\n- Name: Alice');
// Overwrite
await mem.saveWorkingMemory(
{ threadId: 'thread-1', resourceId: 'user-1', scope: 'resource' },
'# Profile\n- Name: Alice\n- Role: Engineer',
);
expect(
await mem.getWorkingMemory({ threadId: 'thread-1', resourceId: 'user-1', scope: 'resource' }),
).toContain('Engineer');
await mem.close();
});
it('saves and retrieves working memory keyed by threadId (no resourceId)', async () => {
const mem = new PostgresMemory({ connection: connectionString, namespace: 'wm_thread_test' });
expect(
await mem.getWorkingMemory({ threadId: 'thread-1', resourceId: 'user-1', scope: 'thread' }),
).toBeNull();
await mem.saveWorkingMemory(
{ threadId: 'thread-1', resourceId: 'user-1', scope: 'thread' },
'thread context',
);
expect(
await mem.getWorkingMemory({ threadId: 'thread-1', resourceId: 'user-1', scope: 'thread' }),
).toBe('thread context');
await mem.close();
});
it('isolates working memory by resourceId', async () => {
const mem = new PostgresMemory({ connection: connectionString, namespace: 'wm_iso_test' });
await mem.saveWorkingMemory(
{ threadId: 'thread-a', resourceId: 'user-a', scope: 'resource' },
'data for user-a',
);
await mem.saveWorkingMemory(
{ threadId: 'thread-b', resourceId: 'user-b', scope: 'resource' },
'data for user-b',
);
expect(
await mem.getWorkingMemory({ threadId: 'thread-a', resourceId: 'user-a', scope: 'resource' }),
).toBe('data for user-a');
expect(
await mem.getWorkingMemory({ threadId: 'thread-b', resourceId: 'user-b', scope: 'resource' }),
).toBe('data for user-b');
await mem.close();
});
it('stores scope=resource when resourceId is provided', async () => {
const mem = new PostgresMemory({ connection: connectionString, namespace: 'wm_scope_test' });
await mem.saveWorkingMemory(
{ threadId: 'thread-1', resourceId: 'res-1', scope: 'resource' },
'resource content',
);
const pool = new Pool({ connectionString });
const result = await pool.query<{ scope: string }>(
'SELECT scope FROM wm_scope_test_working_memory WHERE key = $1',
['res-1'],
);
expect(result.rows[0].scope).toBe('resource');
await pool.end();
await mem.close();
});
it('stores scope=thread when only threadId is provided', async () => {
const mem = new PostgresMemory({
connection: connectionString,
namespace: 'wm_scope_thread_test',
});
await mem.saveWorkingMemory(
{ threadId: 'thread-1', resourceId: 'user-1', scope: 'thread' },
'thread content',
);
const pool = new Pool({ connectionString });
const result = await pool.query<{ scope: string }>(
'SELECT scope FROM wm_scope_thread_test_working_memory WHERE key = $1',
['thread-1'],
);
expect(result.rows[0].scope).toBe('thread');
await pool.end();
await mem.close();
});
it('does not mix resource-scoped and thread-scoped entries with the same key value', async () => {
const mem = new PostgresMemory({
connection: connectionString,
namespace: 'wm_scope_iso_test',
});
const sharedKey = 'same-id';
await mem.saveWorkingMemory(
{ threadId: 'thread-1', resourceId: sharedKey, scope: 'resource' },
'resource data',
);
await mem.saveWorkingMemory(
{ threadId: sharedKey, resourceId: sharedKey, scope: 'thread' },
'thread data',
);
expect(
await mem.getWorkingMemory({
threadId: 'thread-1',
resourceId: sharedKey,
scope: 'resource',
}),
).toBe('resource data');
expect(
await mem.getWorkingMemory({ threadId: sharedKey, resourceId: sharedKey, scope: 'thread' }),
).toBe('thread data');
await mem.close();
});
it('deletes thread and cascades to messages', async () => {
const mem = new PostgresMemory({ connection: connectionString, namespace: 'del_test' });
await mem.saveThread({ id: 'del-t1', resourceId: 'u1' });
await mem.saveMessages({
threadId: 'del-t1',
messages: [{ role: 'user' as const, content: [{ type: 'text' as const, text: 'test' }] }],
});
await mem.deleteThread('del-t1');
expect(await mem.getThread('del-t1')).toBeNull();
expect(await mem.getMessages('del-t1')).toHaveLength(0);
await mem.close();
});
it('stores and queries embeddings with pgvector', async () => {
const mem = new PostgresMemory({ connection: connectionString, namespace: 'vec_test' });
await mem.saveThread({ id: 'vec-t1', resourceId: 'u1' });
// Save some embeddings (3-dimensional for simplicity)
await mem.saveEmbeddings({
threadId: 'vec-t1',
resourceId: 'u1',
entries: [
{ id: 'msg-1', vector: [1.0, 0.0, 0.0], text: 'About cats', model: 'test' },
{ id: 'msg-2', vector: [0.0, 1.0, 0.0], text: 'About dogs', model: 'test' },
{ id: 'msg-3', vector: [0.9, 0.1, 0.0], text: 'About kittens', model: 'test' },
],
});
// Query for vectors close to [1, 0, 0] — should return msg-1 and msg-3 first
const results = await mem.queryEmbeddings({
scope: 'resource',
resourceId: 'u1',
vector: [1.0, 0.0, 0.0],
topK: 2,
});
expect(results).toHaveLength(2);
expect(results[0].id).toBe('msg-1');
expect(results[0].score).toBeGreaterThan(0.9);
// msg-3 should be second (cosine similarity ~0.99 with [0.9, 0.1, 0])
expect(results[1].id).toBe('msg-3');
await mem.close();
});
it('filters embeddings by resourceId with scope=resource (default)', async () => {
const mem = new PostgresMemory({ connection: connectionString, namespace: 'vec_res' });
await mem.saveEmbeddings({
threadId: 't1',
resourceId: 'user-a',
entries: [{ id: 'msg-a1', vector: [1.0, 0.0, 0.0], text: 'User A thread 1', model: 'test' }],
});
await mem.saveEmbeddings({
threadId: 't2',
resourceId: 'user-a',
entries: [{ id: 'msg-a2', vector: [0.9, 0.1, 0.0], text: 'User A thread 2', model: 'test' }],
});
await mem.saveEmbeddings({
threadId: 't3',
resourceId: 'user-b',
entries: [{ id: 'msg-b1', vector: [1.0, 0.0, 0.0], text: 'User B thread 3', model: 'test' }],
});
// Default scope is 'resource' — should return both user-a embeddings across threads
const results = await mem.queryEmbeddings({
resourceId: 'user-a',
vector: [1.0, 0.0, 0.0],
topK: 10,
});
expect(results).toHaveLength(2);
const ids = results.map((r) => r.id);
expect(ids).toContain('msg-a1');
expect(ids).toContain('msg-a2');
expect(ids).not.toContain('msg-b1');
await mem.close();
});
it('filters embeddings by threadId with scope=thread', async () => {
const mem = new PostgresMemory({ connection: connectionString, namespace: 'vec_thr' });
await mem.saveEmbeddings({
threadId: 't1',
resourceId: 'user-1',
entries: [
{ id: 'msg-t1a', vector: [1.0, 0.0, 0.0], text: 'Thread 1 A', model: 'test' },
{ id: 'msg-t1b', vector: [0.0, 1.0, 0.0], text: 'Thread 1 B', model: 'test' },
],
});
await mem.saveEmbeddings({
threadId: 't2',
resourceId: 'user-1',
entries: [{ id: 'msg-t2', vector: [1.0, 0.0, 0.0], text: 'Thread 2', model: 'test' }],
});
const results = await mem.queryEmbeddings({
scope: 'thread',
threadId: 't1',
vector: [1.0, 0.0, 0.0],
topK: 10,
});
expect(results).toHaveLength(2);
const ids = results.map((r) => r.id);
expect(ids).toContain('msg-t1a');
expect(ids).toContain('msg-t1b');
expect(ids).not.toContain('msg-t2');
await mem.close();
});
it('resource scope excludes embeddings from other resources', async () => {
const mem = new PostgresMemory({ connection: connectionString, namespace: 'vec_iso' });
await mem.saveEmbeddings({
threadId: 't1',
resourceId: 'res-1',
entries: [{ id: 'msg-r1', vector: [1.0, 0.0, 0.0], text: 'Resource 1', model: 'test' }],
});
await mem.saveEmbeddings({
threadId: 't2',
resourceId: 'res-2',
entries: [{ id: 'msg-r2', vector: [1.0, 0.0, 0.0], text: 'Resource 2', model: 'test' }],
});
const results = await mem.queryEmbeddings({
scope: 'resource',
resourceId: 'res-1',
vector: [1.0, 0.0, 0.0],
topK: 10,
});
expect(results).toHaveLength(1);
expect(results[0].id).toBe('msg-r1');
await mem.close();
});
it('stores resourceId in the embeddings table', async () => {
const mem = new PostgresMemory({ connection: connectionString, namespace: 'vec_col' });
await mem.saveEmbeddings({
threadId: 't1',
resourceId: 'my-resource',
entries: [
{ id: 'msg-check', vector: [1.0, 0.0, 0.0], text: 'Check resourceId', model: 'test' },
],
});
const pool = new Pool({ connectionString });
const result = await pool.query<{ resourceId: string }>(
'SELECT "resourceId" FROM vec_col_message_embeddings WHERE id = $1',
['msg-check'],
);
expect(result.rows[0].resourceId).toBe('my-resource');
await pool.end();
await mem.close();
});
it('isolates namespaces', async () => {
const mem1 = new PostgresMemory({ connection: connectionString, namespace: 'ns_a' });
const mem2 = new PostgresMemory({ connection: connectionString, namespace: 'ns_b' });
await mem1.saveThread({ id: 'shared-id', resourceId: 'u1', title: 'From A' });
await mem2.saveThread({ id: 'shared-id', resourceId: 'u1', title: 'From B' });
expect((await mem1.getThread('shared-id'))!.title).toBe('From A');
expect((await mem2.getThread('shared-id'))!.title).toBe('From B');
await mem1.close();
await mem2.close();
});
});
/** describe that requires both Docker and an Anthropic API key. */
function describeWithDockerAndApi(name: string, fn: () => void) {
const describeOrSkip = describeWithApi;
describeOrSkip(name, () => {
if (!hasDocker) {
it('skipped — Docker not available', () => {});
return;
}
fn();
});
}
describeWithDockerAndApi('PostgresMemory agent integration', () => {
it('recalls previous messages across turns', async () => {
const store = new PostgresMemory({ connection: connectionString, namespace: 'agent_recall' });
const memory = new Memory().storage(store).lastMessages(10);
const agent = new Agent('pg-recall-test')
.model(getModel('anthropic'))
.instructions('You are a helpful assistant. Be concise.')
.memory(memory);
const threadId = `pg-thread-${Date.now()}`;
const options = { persistence: { threadId, resourceId: 'pg-user-1' } };
await agent.generate('My favorite planet is Saturn. Just acknowledge.', options);
const result = await agent.generate('What is my favorite planet?', options);
expect(findLastTextContent(result.messages)?.toLowerCase()).toContain('saturn');
await store.close();
});
it('persists resource-scoped working memory via Postgres backend', async () => {
const store = new PostgresMemory({ connection: connectionString, namespace: 'agent_wm' });
const memory = new Memory()
.storage(store)
.lastMessages(10)
.scope('resource')
.freeform('# User Profile\n- **Name**:\n- **Hobby**:');
const agent = new Agent('pg-wm-test')
.model(getModel('anthropic'))
.instructions('You are a helpful assistant. Be concise. Always update your working memory.')
.memory(memory);
const threadId = `pg-wm-${Date.now()}`;
const resourceId = 'pg-wm-user';
await agent.generate('My name is Hiro and I enjoy cycling.', {
persistence: { threadId, resourceId },
});
// Working memory should be persisted in Postgres (keyed by resourceId)
const wm = await store.getWorkingMemory({ threadId, resourceId, scope: 'resource' });
expect(wm).toBeDefined();
expect(wm!.toLowerCase()).toContain('hiro');
// New thread, same resourceId — resource-scoped working memory carries over
const result = await agent.generate('What is my name?', {
persistence: { threadId: `pg-wm2-${Date.now()}`, resourceId },
});
expect(findLastTextContent(result.messages)?.toLowerCase()).toContain('hiro');
await store.close();
});
it('persists thread-scoped working memory via Postgres backend', async () => {
const store = new PostgresMemory({
connection: connectionString,
namespace: 'agent_thread_wm',
});
const memory = new Memory()
.storage(store)
.lastMessages(10)
.scope('thread')
.freeform('# Conversation Notes\n- **Topic**:\n- **Key facts**:');
const agent = new Agent('pg-thread-wm-test')
.model(getModel('anthropic'))
.instructions('You are a helpful assistant. Be concise. Always update your working memory.')
.memory(memory);
const threadId = `pg-twm-${Date.now()}`;
const resourceId = 'pg-twm-user';
await agent.generate('The secret project name is HELIOS. Just acknowledge.', {
persistence: { threadId, resourceId },
});
// Working memory should be stored keyed by threadId
const wmByThread = await store.getWorkingMemory({ threadId, resourceId, scope: 'thread' });
expect(wmByThread).toBeDefined();
expect(wmByThread!.toLowerCase()).toContain('helios');
// resourceId key should be empty — nothing stored there
const wmByResource = await store.getWorkingMemory({ threadId, resourceId, scope: 'resource' });
expect(wmByResource).toBeNull();
// New thread for same resource — should NOT carry over thread-scoped working memory
const thread2 = `pg-twm2-${Date.now()}`;
const result = await agent.generate(
'What is the project name? Answer "unknown" if you have no information.',
{ persistence: { threadId: thread2, resourceId } },
);
expect(findLastTextContent(result.messages)?.toLowerCase()).not.toContain('helios');
await store.close();
});
it('works with stream() path', async () => {
const store = new PostgresMemory({ connection: connectionString, namespace: 'agent_stream' });
const memory = new Memory().storage(store).lastMessages(10);
const agent = new Agent('pg-stream-test')
.model(getModel('anthropic'))
.instructions('You are a helpful assistant. Be concise.')
.memory(memory);
const threadId = `pg-stream-${Date.now()}`;
const options = { persistence: { threadId, resourceId: 'pg-stream-user' } };
// Turn 1 via stream
const { stream } = await agent.stream(
'The speed of light is approximately 300,000 km/s. Acknowledge.',
options,
);
const reader = stream.getReader();
while (true) {
const { done } = await reader.read();
if (done) break;
}
// Turn 2 via generate — should recall
const genResult = await agent.generate('What is the speed of light approximately?', options);
const text = findLastTextContent(genResult.messages);
expect(text).toBeTruthy();
expect(text!.toLowerCase()).toContain('300');
await store.close();
});
});

View file

@ -0,0 +1,94 @@
import { expect, it, afterEach, describe as _describe } from 'vitest';
import { Agent, Memory } from '../../../index';
import { findLastTextContent, getModel, createSqliteMemory } from '../helpers';
// Only run when both API keys are present
const describe =
process.env.ANTHROPIC_API_KEY && process.env.OPENAI_API_KEY ? _describe : _describe.skip;
const cleanups: Array<() => void> = [];
afterEach(() => {
cleanups.forEach((fn) => fn());
cleanups.length = 0;
});
describe('semantic recall', () => {
it('recalls relevant info beyond the lastMessages window', async () => {
const { memory, cleanup } = createSqliteMemory();
cleanups.push(cleanup);
const mem = new Memory()
.storage(memory)
.lastMessages(3)
.semanticRecall({ topK: 3, embedder: 'openai/text-embedding-3-small' });
const agent = new Agent('semantic-test')
.model(getModel('anthropic'))
.instructions('You are a helpful assistant. Be concise. Answer from your context.')
.memory(mem);
const threadId = `semantic-${Date.now()}`;
const resourceId = 'test-user';
const options = { persistence: { threadId, resourceId } };
// Turn 1: unique fact that will be pushed out of the 3-message window
await agent.generate(
'The annual rainfall in Timbuktu is approximately 200mm. Just acknowledge.',
options,
);
// Filler turns to push turn 1 out of the lastMessages window
await agent.generate('What is 2 + 2?', options);
await agent.generate('Tell me a one-word synonym for happy.', options);
await agent.generate('What color is the sky?', options);
// Ask about the fact from turn 1 — should be recalled via semantic search
const result = await agent.generate('What is the annual rainfall in Timbuktu?', options);
expect(findLastTextContent(result.messages)?.toLowerCase()).toContain('200');
});
it('works combined with freeform working memory', async () => {
const { memory, cleanup } = createSqliteMemory();
cleanups.push(cleanup);
const template = '# User Context\n- **Name**:\n- **Interest**:';
const mem = new Memory()
.storage(memory)
.lastMessages(3)
.freeform(template)
.semanticRecall({ topK: 3, embedder: 'openai/text-embedding-3-small' });
const agent = new Agent('combined-test')
.model(getModel('anthropic'))
.instructions('You are a helpful assistant. Be concise.')
.memory(mem);
const threadId = `combined-${Date.now()}`;
const resourceId = `user-${Date.now()}`;
const options = { persistence: { threadId, resourceId } };
// Turn 1: name (working memory) + unique fact (semantic recall)
await agent.generate(
'My name is Frank. Also, the capital of Bhutan is Thimphu. Just acknowledge both.',
options,
);
// Filler turns
await agent.generate('What is 3 + 3?', options);
await agent.generate('Name a primary color.', options);
await agent.generate('What day comes after Monday?', options);
// Ask about both — name from working memory, fact from semantic recall
const result = await agent.generate(
'What is my name, and what is the capital of Bhutan?',
options,
);
const text = findLastTextContent(result.messages)?.toLowerCase() ?? '';
expect(text).toContain('frank');
expect(text).toContain('thimphu');
});
});

View file

@ -0,0 +1,105 @@
import { describe as _describe, expect, it, afterEach } from 'vitest';
import { Agent, Memory } from '../../../index';
import { SqliteMemory } from '../../../storage/sqlite-memory';
import { describeIf, findLastTextContent, getModel, createSqliteMemory } from '../helpers';
const describe = describeIf('anthropic');
const cleanups: Array<() => void> = [];
afterEach(() => {
cleanups.forEach((fn) => fn());
cleanups.length = 0;
});
_describe('SqliteMemory saveThread upsert', () => {
it('preserves existing title and metadata when not provided', async () => {
const { memory, cleanup } = createSqliteMemory();
cleanups.push(cleanup);
await memory.saveThread({
id: 'upsert-t1',
resourceId: 'user-1',
title: 'Original Title',
metadata: { key: 'value' },
});
// Upsert without title or metadata (simulates saveMessagesToThread)
await memory.saveThread({ id: 'upsert-t1', resourceId: 'user-1' });
const thread = await memory.getThread('upsert-t1');
expect(thread).not.toBeNull();
expect(thread!.title).toBe('Original Title');
expect(thread!.metadata).toEqual({ key: 'value' });
});
it('overwrites title and metadata when explicitly provided', async () => {
const { memory, cleanup } = createSqliteMemory();
cleanups.push(cleanup);
await memory.saveThread({
id: 'upsert-t2',
resourceId: 'user-1',
title: 'Old Title',
metadata: { old: true },
});
await memory.saveThread({
id: 'upsert-t2',
resourceId: 'user-1',
title: 'New Title',
metadata: { new: true },
});
const thread = await memory.getThread('upsert-t2');
expect(thread!.title).toBe('New Title');
expect(thread!.metadata).toEqual({ new: true });
});
});
describe('SQLite memory integration', () => {
it('agent recalls info from previous turn with SqliteMemory', async () => {
const { memory, cleanup } = createSqliteMemory();
cleanups.push(cleanup);
const mem = new Memory().storage(memory).lastMessages(10);
const agent = new Agent('sqlite-test')
.model(getModel('anthropic'))
.instructions('You are a helpful assistant. Be concise.')
.memory(mem);
const threadId = `sqlite-${Date.now()}`;
const options = { persistence: { threadId, resourceId: 'test-user' } };
await agent.generate('My favorite number is 42. Just acknowledge.', options);
const result = await agent.generate('What is my favorite number?', options);
expect(findLastTextContent(result.messages)?.toLowerCase()).toContain('42');
});
it('data survives a fresh SqliteMemory instance', async () => {
const { memory, cleanup, url } = createSqliteMemory();
cleanups.push(cleanup);
const mem1 = new Memory().storage(memory).lastMessages(10);
const agent1 = new Agent('persist-test-1')
.model(getModel('anthropic'))
.instructions('You are a helpful assistant. Be concise.')
.memory(mem1);
const threadId = `persist-${Date.now()}`;
const options = { persistence: { threadId, resourceId: 'test-user' } };
await agent1.generate('My favorite animal is a dolphin. Just acknowledge.', options);
// New SqliteMemory instance, same file
const memory2 = new SqliteMemory({ url });
const mem2 = new Memory().storage(memory2).lastMessages(10);
const agent2 = new Agent('persist-test-2')
.model(getModel('anthropic'))
.instructions('You are a helpful assistant. Be concise.')
.memory(mem2);
const result = await agent2.generate('What is my favorite animal?', options);
expect(findLastTextContent(result.messages)?.toLowerCase()).toContain('dolphin');
});
});

View file

@ -0,0 +1,40 @@
import { expect, it } from 'vitest';
import { z } from 'zod';
import { Agent, Memory } from '../../../index';
import { describeIf, findLastTextContent, getModel } from '../helpers';
const describe = describeIf('anthropic');
describe('structured working memory', () => {
const schema = z.object({
userName: z.string().optional().describe("The user's name"),
favoriteColor: z.string().optional().describe('Favorite color'),
location: z.string().optional().describe('Where the user lives'),
});
it('agent fills structured fields across turns', async () => {
const memory = new Memory().storage('memory').lastMessages(10).structured(schema);
const agent = new Agent('structured-test')
.model(getModel('anthropic'))
.instructions('You are a helpful assistant. Be concise.')
.memory(memory);
const threadId = `structured-${Date.now()}`;
const options = { persistence: { threadId, resourceId: 'test-user' } };
await agent.generate('My name is Eve and I love purple.', options);
const result = await agent.generate('What is my name and favorite color?', options);
const text = findLastTextContent(result.messages)?.toLowerCase() ?? '';
expect(text).toContain('eve');
expect(text).toContain('purple');
});
it('throws when both .structured() and .freeform() are used', () => {
expect(() => {
new Memory().storage('memory').structured(schema).freeform('# Template').build();
}).toThrow(/cannot use both/i);
});
});

View file

@ -0,0 +1,148 @@
import { expect, it } from 'vitest';
import { z } from 'zod';
import { Agent, Memory, Tool } from '../../../index';
import { describeIf, findLastTextContent, findLastToolCallContent, getModel } from '../helpers';
const describe = describeIf('anthropic');
describe('memory integration', () => {
it('recalls previous messages within the same thread', async () => {
const memory = new Memory().storage('memory').lastMessages(10);
const agent = new Agent('memory-test')
.model(getModel('anthropic'))
.instructions('You are a helpful assistant. Be concise.')
.memory(memory);
const threadId = `test-thread-${Date.now()}`;
const options = { persistence: { threadId, resourceId: 'test-user' } };
const result1 = await agent.generate(
'My favorite color is purple. Just acknowledge this.',
options,
);
expect(findLastTextContent(result1.messages)).toBeTruthy();
const result2 = await agent.generate('What is my favorite color?', options);
expect(findLastTextContent(result2.messages)?.toLowerCase()).toContain('purple');
});
it('isolates separate threads', async () => {
const memory = new Memory().storage('memory').lastMessages(10);
const agent = new Agent('thread-isolation-test')
.model(getModel('anthropic'))
.instructions(
'You are a helpful assistant. Be concise. If you do not know something, say "I don\'t know".',
)
.memory(memory);
const thread1 = `test-thread-1-${Date.now()}`;
const thread2 = `test-thread-2-${Date.now()}`;
await agent.generate('Remember this secret code: ALPHA-7. Just acknowledge.', {
persistence: { threadId: thread1, resourceId: 'test-user' },
});
const result2 = await agent.generate('What is the secret code I told you?', {
persistence: { threadId: thread2, resourceId: 'test-user' },
});
expect(findLastTextContent(result2.messages)?.toLowerCase()).not.toContain('alpha-7');
});
it('recalls tool results with generate()', async () => {
const memory = new Memory().storage('memory').lastMessages(20);
const lookupTool = new Tool('lookup_inventory')
.description('Look up the current inventory count for a product')
.input(
z.object({
product: z.string().describe('Product name'),
}),
)
.handler(async ({ product }) => ({
product,
count: 42,
warehouse: 'Building-7',
}));
const agent = new Agent('store-results-run-test')
.model(getModel('anthropic'))
.instructions(
'You are an inventory assistant. Use the lookup_inventory tool when asked about stock. Be concise.',
)
.tool(lookupTool)
.memory(memory);
const threadId = `test-store-results-run-${Date.now()}`;
const options = { persistence: { threadId, resourceId: 'test-user' } };
// Turn 1: trigger the tool via generate()
const result1 = await agent.generate('How many widgets do we have in stock?', options);
expect(findLastTextContent(result1.messages)).toBeTruthy();
expect(findLastToolCallContent(result1.messages)).toBeTruthy();
// Turn 2: ask about the tool result without re-triggering the tool
const result2 = await agent.generate(
'Which warehouse are the widgets stored in? Do NOT call any tools — answer from what you already know.',
options,
);
expect(findLastTextContent(result2.messages)?.toLowerCase()).toContain('building-7');
expect(findLastToolCallContent(result2.messages)).toBeUndefined();
});
it('recalls tool results with stream()', async () => {
const memory = new Memory().storage('memory').lastMessages(20);
const lookupTool = new Tool('lookup_inventory')
.description('Look up the current inventory count for a product')
.input(
z.object({
product: z.string().describe('Product name'),
}),
)
.handler(async ({ product }) => ({
product,
count: 42,
warehouse: 'Building-7',
}));
const agent = new Agent('store-results-stream-test')
.model(getModel('anthropic'))
.instructions(
'You are an inventory assistant. Use the lookup_inventory tool when asked about stock. Be concise.',
)
.tool(lookupTool)
.memory(memory);
const threadId = `test-store-results-stream-${Date.now()}`;
const options = { persistence: { threadId, resourceId: 'test-user' } };
// Turn 1: trigger the tool via stream()
const { stream: stream1 } = await agent.stream(
'How many widgets do we have in stock?',
options,
);
// Must consume the stream AND call getResult() to trigger saveToolResultsToMemory
const reader = stream1.getReader();
while (true) {
const { done } = await reader.read();
if (done) break;
}
const result1 = await agent.generate('How many widgets do we have in stock?', options);
expect(findLastToolCallContent(result1.messages)).toBeTruthy();
// Turn 2: ask about the tool result
const result2 = await agent.generate(
'Which warehouse are the widgets stored in? Do NOT call any tools — answer from what you already know.',
options,
);
expect(findLastTextContent(result2.messages)?.toLowerCase()).toContain('building-7');
expect(findLastToolCallContent(result2.messages)).toBeUndefined();
});
});

View file

@ -0,0 +1,141 @@
import { expect, it } from 'vitest';
import { z } from 'zod';
import {
describeIf,
collectStreamChunks,
getModel,
chunksOfType,
findAllToolResults,
collectTextDeltas,
} from './helpers';
import { Agent, Tool } from '../../index';
const describe = describeIf('anthropic');
describe('multi-tool-calls integration', () => {
it('correctly merges results when the same tool is called multiple times', async () => {
let callCount = 0;
const lookupTool = new Tool('lookup_price')
.description('Look up the price of a product by name')
.input(z.object({ product: z.string().describe('Product name') }))
.output(z.object({ product: z.string(), price: z.number() }))
.handler(async ({ product }) => {
callCount++;
const prices: Record<string, number> = {
apple: 1.5,
banana: 0.75,
cherry: 3.0,
};
return { product, price: prices[product.toLowerCase()] ?? 0 };
});
const agent = new Agent('multi-call-test')
.model(getModel('anthropic'))
.instructions(
'You are a price checker. When asked about prices, use the lookup_price tool for EACH product separately. Be concise.',
)
.tool(lookupTool);
const { stream: fullStream } = await agent.stream(
'What are the prices of apple, banana, and cherry? Look up each one.',
);
const chunks = await collectStreamChunks(fullStream);
const messageChunks = chunksOfType(chunks, 'message');
const toolCallResults = findAllToolResults(messageChunks.map((c) => c.message));
// Should have called the tool multiple times
const priceCalls = toolCallResults.filter((tc) => tc.toolName === 'lookup_price');
expect(priceCalls.length).toBeGreaterThanOrEqual(2);
// Each call should have its own correct output (not all pointing to the first result)
const outputs = priceCalls.map((tc) => tc.result as { product: string; price: number });
// Verify that different products got different prices (index-based merging works)
const uniquePrices = new Set(outputs.map((o) => o.price));
expect(uniquePrices.size).toBeGreaterThanOrEqual(2);
// The response should mention the prices
const text = collectTextDeltas(chunks);
expect(text).toBeTruthy();
expect(text).toMatch(/apple/i);
expect(text).toMatch(/banana/i);
expect(text).toMatch(/cherry/i);
expect(text).toMatch(/1\.5/i);
expect(text).toMatch(/0\.75/i);
expect(text).toMatch(/3\.0/i);
});
it('correctly merges results when different tools are called in sequence', async () => {
const addTool = new Tool('add')
.description('Add two numbers')
.input(z.object({ a: z.number(), b: z.number() }))
.handler(async ({ a, b }) => ({ result: a + b }));
const multiplyTool = new Tool('multiply')
.description('Multiply two numbers')
.input(z.object({ a: z.number(), b: z.number() }))
.handler(async ({ a, b }) => ({ result: a * b }));
const agent = new Agent('mixed-tools-test')
.model(getModel('anthropic'))
.instructions(
'You are a calculator. Use the add tool for addition and multiply tool for multiplication. Be concise.',
)
.tool(addTool)
.tool(multiplyTool);
const { stream: fullStream } = await agent.stream('What is 3 + 4 and also what is 5 * 6?');
const chunks = await collectStreamChunks(fullStream);
const messageChunks = chunksOfType(chunks, 'message');
const toolCallResults = findAllToolResults(messageChunks.map((c) => c.message));
const toolCalls = toolCallResults.filter(
(tc) => tc.toolName === 'add' || tc.toolName === 'multiply',
);
expect(toolCalls.length).toBeGreaterThanOrEqual(2);
const addCall = toolCallResults.find((tc) => tc.toolName === 'add');
const multiplyCall = toolCallResults.find((tc) => tc.toolName === 'multiply');
expect(addCall).toBeDefined();
expect(multiplyCall).toBeDefined();
expect((addCall!.result as { result: number }).result).toBe(7);
expect((multiplyCall!.result as { result: number }).result).toBe(30);
});
it('correctly merges results via the run() path', async () => {
const lookupTool = new Tool('get_length')
.description('Get the length of a string')
.input(z.object({ text: z.string() }))
.output(z.object({ text: z.string(), length: z.number() }))
.handler(async ({ text }) => ({ text, length: text.length }));
const agent = new Agent('multi-call-run-test')
.model(getModel('anthropic'))
.instructions(
'You are a string utility. When asked about string lengths, use the get_length tool for EACH string separately. Be concise.',
)
.tool(lookupTool);
const { stream: fullStream } = await agent.stream(
'What are the lengths of "hello" and "world"? Look up each one separately.',
);
const chunks = await collectStreamChunks(fullStream);
const messageChunks = chunksOfType(chunks, 'message');
const toolCallResults = findAllToolResults(messageChunks.map((c) => c.message));
const lengthCalls = toolCallResults.filter((tc) => tc.toolName === 'get_length');
expect(lengthCalls.length).toBeGreaterThanOrEqual(2);
// Each should have correct output
for (const call of lengthCalls) {
const output = call.result as { text: string; length: number };
expect(output.length).toBe(output.text.length);
}
});
});

View file

@ -0,0 +1,126 @@
import { expect, it } from 'vitest';
import {
describeIf,
collectStreamChunks,
chunksOfType,
getModel,
findLastTextContent,
} from './helpers';
import { Agent } from '../../index';
import type { Message, StreamChunk } from '../../index';
const describe = describeIf('anthropic');
/** Convert a base64 string to Uint8Array for the AI SDK file part. */
function base64ToUint8Array(base64: string): Uint8Array {
return Uint8Array.from(Buffer.from(base64, 'base64'));
}
// Valid 1×1 red PNG pixel
const RED_PIXEL_BASE64 =
'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAIAAACQd1PeAAAADElEQVR4nGP4z8AAAAMBAQDJ/pLvAAAAAElFTkSuQmCC';
// Valid 1×1 blue PNG pixel
const BLUE_PIXEL_BASE64 =
'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAIAAACQd1PeAAAADElEQVR4nGNgYPgPAAEDAQAIicLsAAAAAElFTkSuQmCC';
describe('multimodal integration', () => {
it('accepts an image via binary data and references it in the response', async () => {
const messages: Message[] = [
{
role: 'user',
content: [
{
type: 'file',
mediaType: 'image/png',
data: base64ToUint8Array(RED_PIXEL_BASE64),
},
{
type: 'text',
text: 'What color is this image? Reply with just the color name, nothing else.',
},
],
},
];
const agent = new Agent('vision-test')
.model(getModel('anthropic'))
.instructions('You are a vision assistant. Describe images concisely.');
const { stream: fullStream } = await agent.stream(messages);
const chunks = await collectStreamChunks(fullStream);
const textChunks = chunksOfType(chunks, 'text-delta') as Array<
StreamChunk & { type: 'text-delta' }
>;
expect(textChunks.length).toBeGreaterThan(0);
const text = textChunks.map((c) => c.delta).join('');
expect(text).toBeTruthy();
expect(text).toMatch(/red/i);
});
it('accepts multiple content blocks (text + image) in a single message', async () => {
const messages: Message[] = [
{
role: 'user',
content: [
{ type: 'text', text: 'I have two questions.' },
{
type: 'file',
mediaType: 'image/png',
data: base64ToUint8Array(BLUE_PIXEL_BASE64),
},
{
type: 'text',
text: 'Question 1: Can you see an image above? Answer only YES or NO. Question 2: What is 2+2? Answer both briefly.',
},
],
},
];
const agent = new Agent('multi-content-test')
.model(getModel('anthropic'))
.instructions('You are a helpful assistant with vision capabilities. Be concise.');
const { stream: fullStream } = await agent.stream(messages);
const chunks = await collectStreamChunks(fullStream);
const textChunks = chunksOfType(chunks, 'text-delta') as Array<
StreamChunk & { type: 'text-delta' }
>;
expect(textChunks.length).toBeGreaterThan(0);
const text = textChunks.map((c) => c.delta).join('');
expect(text).toBeTruthy();
expect(text).toMatch(/4/);
expect(text).toMatch(/yes/i);
});
it('passes image content through the run() path (non-streaming)', async () => {
const messages: Message[] = [
{
role: 'user',
content: [
{
type: 'file',
mediaType: 'image/png',
data: base64ToUint8Array(RED_PIXEL_BASE64),
},
{
type: 'text',
text: 'What color is this image? Reply with just the color name.',
},
],
},
];
const agent = new Agent('vision-run-test')
.model(getModel('anthropic'))
.instructions('You are a vision assistant. Be concise.');
const result = await agent.generate(messages);
const text = findLastTextContent(result.messages);
expect(text).toBeTruthy();
expect(text).toMatch(/red/i);
});
});

View file

@ -0,0 +1,164 @@
import { expect, it, afterEach } from 'vitest';
import { z } from 'zod';
import { describeIf, getModel, createSqliteMemory } from './helpers';
import { Agent, Memory, Tool } from '../../index';
import type { AgentMessage } from '../../index';
const describe = describeIf('anthropic');
describe('orphaned tool messages in memory', () => {
const cleanups: Array<() => void> = [];
afterEach(() => {
for (const fn of cleanups) fn();
cleanups.length = 0;
});
/**
* Build a dummy tool so the agent has a valid tool schema.
* The tool itself should never be called in these tests.
*/
function buildLookupTool() {
return new Tool('lookup')
.description('Look up data by id')
.input(z.object({ id: z.string() }))
.output(z.object({ count: z.number() }))
.handler(async () => ({ count: 99 }));
}
/**
* Seed memory with a conversation that has tool-call / tool-result pairs
* surrounded by plain user/assistant exchanges.
*
* Message layout (indices 07):
* 0: user "How many widgets?"
* 1: assistant text + tool-call(call_1)
* 2: tool tool-result(call_1)
* 3: assistant "There are 10 widgets"
* 4: user "What about gadgets?"
* 5: assistant text + tool-call(call_2)
* 6: tool tool-result(call_2)
* 7: assistant "There are 5 gadgets"
*/
function buildSeedMessages(): AgentMessage[] {
return [
{
role: 'user',
content: [{ type: 'text', text: 'How many widgets do we have?' }],
},
{
role: 'assistant',
content: [
{ type: 'text', text: 'Let me look that up.' },
{ type: 'tool-call', toolCallId: 'call_1', toolName: 'lookup', input: { id: 'widgets' } },
],
},
{
role: 'tool',
content: [
{ type: 'tool-result', toolCallId: 'call_1', toolName: 'lookup', result: { count: 10 } },
],
},
{
role: 'assistant',
content: [{ type: 'text', text: 'There are 10 widgets in stock.' }],
},
{
role: 'user',
content: [{ type: 'text', text: 'What about gadgets?' }],
},
{
role: 'assistant',
content: [
{ type: 'text', text: 'Let me check.' },
{ type: 'tool-call', toolCallId: 'call_2', toolName: 'lookup', input: { id: 'gadgets' } },
],
},
{
role: 'tool',
content: [
{ type: 'tool-result', toolCallId: 'call_2', toolName: 'lookup', result: { count: 5 } },
],
},
{
role: 'assistant',
content: [{ type: 'text', text: 'There are 5 gadgets in stock.' }],
},
];
}
it('handles orphaned tool results when tool-call message is truncated from history', async () => {
const { memory, cleanup } = createSqliteMemory();
cleanups.push(cleanup);
const threadId = 'thread-orphan-result';
// Seed 8 messages into the thread
await memory.saveMessages({ threadId, messages: buildSeedMessages() });
// lastMessages=6 → loads messages 27
// Message at index 2 is a tool-result for call_1, but the matching
// assistant+tool-call (index 1) is truncated. This is an orphaned tool result.
const mem = new Memory().storage(memory).lastMessages(6);
const agent = new Agent('orphan-result-test')
.model(getModel('anthropic'))
.instructions('You are an inventory assistant. Use lookup to check stock. Be concise.')
.tool(buildLookupTool())
.memory(mem);
// This should NOT throw even though history contains an orphaned tool-result
const result = await agent.generate('Can you summarize what we discussed?', {
persistence: { threadId, resourceId: 'test' },
});
expect(result.finishReason).toBe('stop');
});
it('handles orphaned tool calls when tool-result message is truncated from history', async () => {
const { memory, cleanup } = createSqliteMemory();
cleanups.push(cleanup);
const threadId = 'thread-orphan-call';
// Store a conversation where the last saved message is an assistant
// with a tool-call but the tool-result was never persisted (simulating
// a partial save / interrupted turn).
const messages: AgentMessage[] = [
{
role: 'user',
content: [{ type: 'text', text: 'How many widgets?' }],
},
{
role: 'assistant',
content: [
{ type: 'text', text: 'Checking inventory.' },
{
type: 'tool-call',
toolCallId: 'call_orphan',
toolName: 'lookup',
input: { id: 'widgets' },
},
],
},
];
await memory.saveMessages({ threadId, messages });
const mem = new Memory().storage(memory).lastMessages(10);
const agent = new Agent('orphan-call-test')
.model(getModel('anthropic'))
.instructions('You are an inventory assistant. Use lookup to check stock. Be concise.')
.tool(buildLookupTool())
.memory(mem);
// This should NOT throw even though history has a tool-call with no result
const result = await agent.generate('Actually, never mind. How are you?', {
persistence: { threadId, resourceId: 'test' },
});
expect(result.finishReason).toBe('stop');
});
});

View file

@ -0,0 +1,65 @@
import { expect, it } from 'vitest';
import { describeIf, collectStreamChunks, getModel, chunksOfType } from './helpers';
import { Agent } from '../../index';
const describe = describeIf('anthropic');
describe('provider metadata integration', () => {
it('includes finishReason in finish chunks', async () => {
const agent = new Agent('metadata-test')
.model(getModel('anthropic'))
.instructions('Reply with exactly: "OK". Nothing else.');
const { stream: fullStream } = await agent.stream('Acknowledge');
const chunks = await collectStreamChunks(fullStream);
const finishChunks = chunksOfType(chunks, 'finish');
expect(finishChunks.length).toBeGreaterThan(0);
for (const chunk of finishChunks) {
if (chunk.type === 'finish') {
expect(chunk.finishReason).toBeDefined();
expect(['stop', 'length', 'content-filter', 'tool-calls', 'error', 'other']).toContain(
chunk.finishReason,
);
}
}
});
it('finish reason is "stop" for a normal completion', async () => {
const agent = new Agent('stop-reason-test')
.model(getModel('anthropic'))
.instructions('Reply with exactly: "Done". Nothing else.');
const { stream: fullStream } = await agent.stream('Say done');
const chunks = await collectStreamChunks(fullStream);
const finishChunks = chunksOfType(chunks, 'finish');
// The last finish chunk should be 'stop'
const lastFinish = finishChunks[finishChunks.length - 1];
expect(lastFinish).toBeDefined();
if (lastFinish?.type === 'finish') {
expect(lastFinish.finishReason).toBe('stop');
}
});
it('result contains usage metadata from the provider', async () => {
const agent = new Agent('usage-metadata-test')
.model(getModel('anthropic'))
.instructions('You are a helpful assistant. Be concise.');
const { stream: fullStream } = await agent.stream('What is 1+1?');
const chunks = await collectStreamChunks(fullStream);
const finishChunks = chunksOfType(chunks, 'finish');
const usage = finishChunks[0].usage;
expect(usage).toBeDefined();
expect(typeof usage!.promptTokens).toBe('number');
expect(typeof usage!.completionTokens).toBe('number');
expect(typeof usage!.totalTokens).toBe('number');
expect(usage!.totalTokens).toBeGreaterThan(0);
});
});

View file

@ -0,0 +1,225 @@
import { expect, it } from 'vitest';
import { collectStreamChunks, chunksOfType, describeIf } from './helpers';
import { Agent } from '../../index';
const describe = describeIf('anthropic');
/**
* Integration tests for provider options: prompt caching, deep merge with
* thinking, external abort signal, and model config object form.
*
* Prompt caching requires a system prompt of at least 1024 tokens for
* Anthropic, so we generate a long instruction string.
*/
// A system prompt long enough to be eligible for Anthropic prompt caching.
// Claude Haiku requires at least 2048 tokens for caching to activate.
const LONG_SYSTEM_PROMPT =
'You are a concise assistant. Reply in one short sentence. ' +
'Here is additional context to ensure the prompt is long enough for caching: ' +
Array.from(
{ length: 500 },
(_, i) => `Rule ${i + 1}: Always be helpful and accurate in your responses.`,
).join(' ');
// ---------------------------------------------------------------------------
// Prompt caching — instruction-level
// ---------------------------------------------------------------------------
describe('prompt caching via instruction providerOptions', () => {
it('second call with cached instructions reports cacheRead tokens', async () => {
const agent = new Agent('cache-instructions-test')
.model('anthropic/claude-haiku-4-5')
.instructions(LONG_SYSTEM_PROMPT, {
providerOptions: {
anthropic: { cacheControl: { type: 'ephemeral' } },
},
});
// First call — creates the cache entry
const result1 = await agent.generate('Say hello', {
persistence: { resourceId: 'user1', threadId: 'thread1' },
});
expect(result1.finishReason).toBe('stop');
// Second call — should read from cache
const result2 = await agent.generate('Say goodbye', {
persistence: { resourceId: 'user1', threadId: 'thread2' },
});
expect(result2.finishReason).toBe('stop');
// At least one of the two calls should show cache activity (write or read)
const write1 = result1.usage?.inputTokenDetails?.cacheWrite ?? 0;
const read2 = result2.usage?.inputTokenDetails?.cacheRead ?? 0;
expect(write1 + read2).toBeGreaterThan(0);
});
});
// ---------------------------------------------------------------------------
// Prompt caching — call-level providerOptions
// ---------------------------------------------------------------------------
describe('prompt caching via call-level providerOptions', () => {
it('second call with call-level cacheControl reports cacheRead tokens', async () => {
// Call-level cacheControl applies to the API request, not individual messages.
// For Anthropic, prompt caching at call level needs instruction-level cacheControl
// to mark which content to cache. This test verifies call-level options don't error.
const agent = new Agent('cache-call-level-test')
.model('anthropic/claude-haiku-4-5')
.instructions(LONG_SYSTEM_PROMPT);
const result = await agent.generate('Say hello', {
persistence: { resourceId: 'user1', threadId: 'thread1' },
providerOptions: {
anthropic: { cacheControl: { type: 'ephemeral' } },
},
});
expect(result.finishReason).toBe('stop');
expect(result.messages.length).toBeGreaterThan(0);
});
});
// ---------------------------------------------------------------------------
// Prompt caching — streaming path
// ---------------------------------------------------------------------------
describe('prompt caching via stream', () => {
it('second stream with cached instructions reports cacheRead tokens in finish chunk', async () => {
const agent = new Agent('cache-stream-test')
.model('anthropic/claude-haiku-4-5')
.instructions(LONG_SYSTEM_PROMPT, {
providerOptions: {
anthropic: { cacheControl: { type: 'ephemeral' } },
},
});
// First call — creates the cache entry
const { stream: stream1 } = await agent.stream('Say hello', {
persistence: { resourceId: 'user1', threadId: 'thread1' },
});
await collectStreamChunks(stream1);
// Second call — should read from cache
const { stream: stream2 } = await agent.stream('Say goodbye', {
persistence: { resourceId: 'user1', threadId: 'thread2' },
});
const chunks = await collectStreamChunks(stream2);
const finishChunks = chunksOfType(chunks, 'finish');
expect(finishChunks.length).toBeGreaterThan(0);
const usage = finishChunks[0].usage;
expect(usage).toBeDefined();
// At least one stream should show cache activity
const write = usage!.inputTokenDetails?.cacheWrite ?? 0;
const read = usage!.inputTokenDetails?.cacheRead ?? 0;
expect(write + read).toBeGreaterThan(0);
});
});
// ---------------------------------------------------------------------------
// Thinking + cacheControl coexistence (deep merge)
// ---------------------------------------------------------------------------
describe('thinking + cacheControl coexistence', () => {
it('both thinking and prompt caching work simultaneously', async () => {
const agent = new Agent('thinking-cache-test')
.model('anthropic', 'claude-sonnet-4-5')
.thinking('anthropic', { budgetTokens: 5000 })
.instructions(LONG_SYSTEM_PROMPT, {
providerOptions: {
anthropic: { cacheControl: { type: 'ephemeral' } },
},
});
// First call — cache miss, but thinking should work
const { stream: stream1 } = await agent.stream('What is 7 * 8?', {
persistence: { resourceId: 'user1', threadId: 'thread1' },
});
const chunks1 = await collectStreamChunks(stream1);
// Should have reasoning chunks (thinking is enabled)
const reasoningChunks = chunksOfType(chunks1, 'reasoning-delta');
expect(reasoningChunks.length).toBeGreaterThan(0);
// Second call — cache hit, thinking should still work
const { stream: stream2 } = await agent.stream('What is 12 * 13?', {
persistence: { resourceId: 'user1', threadId: 'thread2' },
});
const chunks2 = await collectStreamChunks(stream2);
// Should still have reasoning
const reasoning2 = chunksOfType(chunks2, 'reasoning-delta');
expect(reasoning2.length).toBeGreaterThan(0);
// At least one call should show cache activity
const finishChunks = chunksOfType(chunks2, 'finish');
expect(finishChunks.length).toBeGreaterThan(0);
const usage = finishChunks[0].usage;
expect(usage).toBeDefined();
const write = usage!.inputTokenDetails?.cacheWrite ?? 0;
const read = usage!.inputTokenDetails?.cacheRead ?? 0;
expect(write + read).toBeGreaterThan(0);
});
});
// ---------------------------------------------------------------------------
// External abort signal
// ---------------------------------------------------------------------------
describe('external abort signal', () => {
it('cancels a generate() call via external AbortSignal', async () => {
const agent = new Agent('abort-signal-test')
.model('anthropic/claude-haiku-4-5')
.instructions('You are a helpful assistant. Tell me a very long story.');
const controller = new AbortController();
setTimeout(() => controller.abort(), 100);
const result = await agent.generate('Tell me a very long detailed story about a dragon', {
persistence: { resourceId: 'user1', threadId: 'thread1' },
abortSignal: controller.signal,
});
expect(result.finishReason).toBe('error');
expect(agent.getState().status).toBe('cancelled');
});
it('cancels a stream() call via external AbortSignal', async () => {
const agent = new Agent('abort-stream-signal-test')
.model('anthropic/claude-haiku-4-5')
.instructions('You are a helpful assistant. Tell me a very long story.');
const controller = new AbortController();
setTimeout(() => controller.abort(), 100);
const { stream } = await agent.stream('Tell me a very long detailed story about a dragon', {
persistence: { resourceId: 'user1', threadId: 'thread1' },
abortSignal: controller.signal,
});
const chunks = await collectStreamChunks(stream);
const errorChunks = chunks.filter((c) => c.type === 'error');
expect(errorChunks.length).toBeGreaterThan(0);
});
});
// ---------------------------------------------------------------------------
// Model config object form
// ---------------------------------------------------------------------------
describe('model config object form', () => {
it('generates with model config object', async () => {
const agent = new Agent('model-config-test')
.model({ id: 'anthropic/claude-haiku-4-5' })
.instructions('You are a concise assistant. Reply in one short sentence.');
const result = await agent.generate('Say hello', {
persistence: { resourceId: 'user1', threadId: 'thread1' },
});
expect(result.finishReason).toBe('stop');
expect(result.messages.length).toBeGreaterThan(0);
});
});

View file

@ -0,0 +1,132 @@
import { expect, it } from 'vitest';
import { z } from 'zod';
import {
describeIf,
collectStreamChunks,
getModel,
chunksOfType,
collectTextDeltas,
findAllToolCalls,
} from './helpers';
import { Agent, Tool, providerTools, type StreamChunk } from '../../index';
const describe = describeIf('anthropic');
/**
* Instructions that force the model to use web search before answering.
* Required because the model may otherwise answer from its training data.
*/
const WEB_SEARCH_INSTRUCTIONS =
'You MUST call the web_search tool before answering any question, even if you think you already know the answer. Never answer without searching first.';
describe('provider tools integration', () => {
it('generate: the model calls the web search provider tool', async () => {
const agent = new Agent('provider-tool-generate-test')
.model(getModel('anthropic'))
.instructions(WEB_SEARCH_INSTRUCTIONS)
.providerTool(providerTools.anthropicWebSearch());
const result = await agent.generate('What is the weather in Tokyo?');
expect(result.finishReason).toBe('stop');
expect(result.pendingSuspend).toBeUndefined();
const toolCalls = findAllToolCalls(result.messages);
const webSearchCall = toolCalls.find((tc) => tc.toolName.includes('web_search'));
expect(webSearchCall).toBeDefined();
});
it('stream: the model calls the web search provider tool without suspending', async () => {
const agent = new Agent('provider-tool-stream-test')
.model(getModel('anthropic'))
.instructions(WEB_SEARCH_INSTRUCTIONS)
.providerTool(providerTools.anthropicWebSearch());
const { stream } = await agent.stream('What is the weather in Tokyo?');
const chunks = await collectStreamChunks(stream);
// Provider tools must never cause a suspension
const suspendChunks = chunksOfType(chunks, 'tool-call-suspended');
expect(suspendChunks.length).toBe(0);
// Must finish cleanly
const finishChunks = chunksOfType(chunks, 'finish');
const lastFinish = finishChunks[finishChunks.length - 1];
expect(lastFinish?.type === 'finish' && lastFinish.finishReason).toBe('stop');
// Collect tool calls from message chunks
const messageChunks = chunksOfType(chunks, 'message');
const allMessages = messageChunks.map((c) => c.message);
const toolCalls = findAllToolCalls(allMessages);
const webSearchCall = toolCalls.find((tc) => tc.toolName.includes('web_search'));
expect(webSearchCall).toBeDefined();
// Must include a text response
const text = collectTextDeltas(chunks);
expect(text).toBeTruthy();
});
it('provider tool executes without interruption while a mixed-in interruptible tool suspends', async () => {
const saveToDbTool = new Tool('save_to_db')
.description('Save weather data to the database.')
.input(z.object({ data: z.string().describe('The data to save') }))
.output(z.object({ saved: z.boolean() }))
.suspend(z.object({ message: z.string() }))
.resume(z.object({ approved: z.boolean() }))
.handler(async ({ data }, ctx) => {
if (!ctx.resumeData) {
return await ctx.suspend({ message: `Save "${data}" to the database?` });
}
return { saved: ctx.resumeData.approved };
});
const agent = new Agent('mixed-provider-hitl-test')
.model(getModel('anthropic'))
.instructions(
'When asked about weather: first search the web for current weather, then call save_to_db with the result. You MUST call both tools.',
)
.providerTool(providerTools.anthropicWebSearch())
.tool(saveToDbTool)
.checkpoint('memory');
const { stream } = await agent.stream(
'Get the current weather in London and save the result to the database.',
);
const chunks = await collectStreamChunks(stream);
// The web search provider tool must NOT cause a suspension
// Only save_to_db (the interruptible tool) should suspend
const suspendChunks = chunksOfType(chunks, 'tool-call-suspended');
expect(suspendChunks.length).toBe(1);
const suspended = suspendChunks[0] as StreamChunk & { type: 'tool-call-suspended' };
expect(suspended.toolName).toBe('save_to_db');
expect(suspended.runId).toBeTruthy();
expect(suspended.toolCallId).toBeTruthy();
// The web search provider tool call should appear in the message history
const messageChunks = chunksOfType(chunks, 'message');
const toolCalls = findAllToolCalls(messageChunks.map((c) => c.message));
const webSearchCall = toolCalls.find((tc) => tc.toolName.includes('web_search'));
expect(webSearchCall).toBeDefined();
// Resume with approval — agent should complete cleanly
const resumeStream = await agent.resume(
'stream',
{ approved: true },
{
runId: suspended.runId!,
toolCallId: suspended.toolCallId!,
},
);
const resumeChunks = await collectStreamChunks(resumeStream.stream);
// console.log('Second', JSON.stringify(resumeChunks, null, 2));
const errorChunks = resumeChunks.filter((c) => c.type === 'error');
expect(errorChunks).toHaveLength(0);
const finishChunks = chunksOfType(resumeChunks, 'finish');
const lastFinish = finishChunks[finishChunks.length - 1];
expect(lastFinish?.type === 'finish' && lastFinish.finishReason).toBe('stop');
});
});

View file

@ -0,0 +1,221 @@
import { expect, it } from 'vitest';
import { z } from 'zod';
import { collectStreamChunks, chunksOfType, describeIf, getModel } from './helpers';
import type { StreamChunk } from './helpers';
import { Agent, Tool } from '../../index';
import type { CheckpointStore, SerializableAgentState } from '../../types';
const describe = describeIf('anthropic');
/**
* A minimal CheckpointStore backed by a plain Map so it can be shared across
* agent instances to simulate durable external storage (database, Redis, etc.).
*/
class InMemoryCheckpointStore implements CheckpointStore {
private store = new Map<string, SerializableAgentState>();
async save(key: string, state: SerializableAgentState): Promise<void> {
this.store.set(key, structuredClone(state));
}
async load(key: string): Promise<SerializableAgentState | undefined> {
const state = this.store.get(key);
return state ? structuredClone(state) : undefined;
}
async delete(key: string): Promise<void> {
this.store.delete(key);
}
get size(): number {
return this.store.size;
}
}
/**
* Build an agent that has a delete_file tool that always suspends on the first
* call and resumes with approval/denial on the second.
*/
function buildDeleteAgent(checkpointStore: CheckpointStore): Agent {
const deleteTool = new Tool('delete_file')
.description('Delete a file at the given path')
.input(z.object({ path: z.string().describe('File path to delete') }))
.output(z.object({ deleted: z.boolean(), path: z.string() }))
.suspend(z.object({ message: z.string(), severity: z.string() }))
.resume(z.object({ approved: z.boolean() }))
.handler(async ({ path }, ctx) => {
if (!ctx.resumeData) {
return await ctx.suspend({ message: `Delete "${path}"?`, severity: 'destructive' });
}
if (!ctx.resumeData.approved) return { deleted: false, path };
return { deleted: true, path };
});
return new Agent('file-manager')
.model(getModel('anthropic'))
.instructions(
'You are a file manager. When asked to delete a file, use the delete_file tool. After the tool result, confirm what happened concisely.',
)
.tool(deleteTool)
.checkpoint(checkpointStore);
}
describe('state restore after suspension', () => {
it('resumes with generate after agent instance is destroyed and recreated', async () => {
const checkpointStore = new InMemoryCheckpointStore();
// --- Agent 1: run until suspended ---
let suspendedRunId: string;
let suspendedToolCallId: string;
{
const agent1 = buildDeleteAgent(checkpointStore);
const result = await agent1.generate('Delete the file /tmp/important.log');
expect(result.finishReason).toBe('tool-calls');
expect(result.pendingSuspend).toBeDefined();
suspendedRunId = result.pendingSuspend![0].runId;
suspendedToolCallId = result.pendingSuspend![0].toolCallId;
expect(suspendedRunId).toBeTruthy();
expect(suspendedToolCallId).toBeTruthy();
// Checkpoint store now holds the suspended state
expect(checkpointStore.size).toBe(1);
// agent1 goes out of scope here — its in-flight Map is gone
}
// --- Agent 2: freshly created, loads state from the shared CheckpointStore ---
const agent2 = buildDeleteAgent(checkpointStore);
const result2 = await agent2.resume(
'generate',
{ approved: true },
{ runId: suspendedRunId, toolCallId: suspendedToolCallId },
);
expect(result2.finishReason).not.toBe('error');
expect(result2.finishReason).not.toBe('tool-calls');
// The resumed result should contain a text response from the assistant
const assistantMessages = result2.messages.filter((m) => 'role' in m && m.role === 'assistant');
expect(assistantMessages.length).toBeGreaterThan(0);
const hasText = assistantMessages.some(
(m) => 'content' in m && m.content.some((c) => c.type === 'text'),
);
expect(hasText).toBe(true);
// Checkpoint should have been cleaned up after successful resumption
expect(checkpointStore.size).toBe(0);
});
it('resumes with stream after agent instance is destroyed and recreated', async () => {
const checkpointStore = new InMemoryCheckpointStore();
let suspendedRunId: string;
let suspendedToolCallId: string;
{
const agent1 = buildDeleteAgent(checkpointStore);
const { stream } = await agent1.stream('Delete the file /tmp/data.csv');
const chunks = await collectStreamChunks(stream);
const suspendedChunks = chunksOfType(chunks, 'tool-call-suspended');
expect(suspendedChunks.length).toBe(1);
const suspended = suspendedChunks[0] as StreamChunk & { type: 'tool-call-suspended' };
expect(suspended.toolName).toBe('delete_file');
suspendedRunId = suspended.runId!;
suspendedToolCallId = suspended.toolCallId!;
// State is persisted in the external store
expect(checkpointStore.size).toBe(1);
// agent1 is destroyed here
}
// --- Agent 2: new instance, same checkpoint store ---
const agent2 = buildDeleteAgent(checkpointStore);
const resumedStream = await agent2.resume(
'stream',
{ approved: true },
{ runId: suspendedRunId, toolCallId: suspendedToolCallId },
);
const resumedChunks = await collectStreamChunks(resumedStream.stream);
// No error chunks
const errorChunks = resumedChunks.filter((c) => c.type === 'error');
expect(errorChunks).toHaveLength(0);
// Stream must contain the tool result message
const toolResultChunks = resumedChunks.filter(
(c) =>
c.type === 'message' &&
'message' in c &&
'content' in (c.message as object) &&
(c.message as { content: Array<{ type: string }> }).content.some(
(part) => part.type === 'tool-result',
),
);
expect(toolResultChunks.length).toBeGreaterThan(0);
// Stream must end with a finish chunk (not error)
const finishChunks = chunksOfType(resumedChunks, 'finish') as Array<
StreamChunk & { type: 'finish' }
>;
expect(finishChunks.length).toBeGreaterThan(0);
expect(finishChunks[0].finishReason).not.toBe('error');
// At least one text-delta should arrive (the LLM's final response)
const textDeltas = chunksOfType(resumedChunks, 'text-delta');
expect(textDeltas.length).toBeGreaterThan(0);
});
it('correctly restores message history so the LLM has full context', async () => {
const checkpointStore = new InMemoryCheckpointStore();
let suspendedRunId: string;
let suspendedToolCallId: string;
let originalPath: string;
{
originalPath = '/tmp/critical-data.db';
const agent1 = buildDeleteAgent(checkpointStore);
const result = await agent1.generate(`Delete the file ${originalPath}`);
expect(result.pendingSuspend).toBeDefined();
suspendedRunId = result.pendingSuspend![0].runId;
suspendedToolCallId = result.pendingSuspend![0].toolCallId;
}
const agent2 = buildDeleteAgent(checkpointStore);
const result2 = await agent2.resume(
'generate',
{ approved: true },
{ runId: suspendedRunId, toolCallId: suspendedToolCallId },
);
expect(result2.finishReason).not.toBe('error');
// The assistant response should reference the original file path,
// proving the full conversation context was restored correctly
const textContent = result2.messages
.filter((m) => 'role' in m && m.role === 'assistant')
.flatMap((m) => ('content' in m ? m.content : []))
.filter((c) => c.type === 'text')
.map((c) => ('text' in c ? c.text : ''))
.join('');
expect(textContent.length).toBeGreaterThan(0);
// The LLM should confirm what happened (mentioning the file or deletion)
expect(textContent.toLowerCase()).toMatch(/delete|delet|remov|file/);
});
});

View file

@ -0,0 +1,72 @@
import { expect, it } from 'vitest';
import { z } from 'zod';
import { describeIf, getModel } from './helpers';
import { Agent, Tool } from '../../index';
const describe = describeIf('anthropic');
describe('stream timing', () => {
it('tool-call-delta chunks arrive incrementally (not all buffered)', async () => {
const agent = new Agent('timing-test')
.model(getModel('anthropic'))
.instructions(
'When asked to write code, call the set_code tool with the code. Write at least 10 lines.',
)
.tool(
new Tool('set_code')
.description('Set code in the editor')
.input(
z.object({
code: z.string().describe('The complete source code'),
}),
)
.providerOptions({ anthropic: { eagerInputStreaming: true } })
.handler(async ({ code }) => ({ ok: true, length: code.length })),
);
const result = await agent.stream(
'Write a TypeScript function that implements bubble sort. Use the set_code tool.',
);
const reader = result.stream.getReader();
// Track timestamps of each reader.read() that returns a tool-call-delta
// This measures when the reader YIELDS each chunk, not when the agent enqueues it.
const deltaReadTimes: number[] = [];
const start = Date.now();
while (true) {
const { done, value } = await reader.read();
if (done) break;
const chunk = value;
if (chunk.type === 'tool-call-delta' && (chunk as { name?: string }).name === 'set_code') {
deltaReadTimes.push(Date.now() - start);
}
}
expect(deltaReadTimes.length).toBeGreaterThan(0);
console.log(`set_code delta reads: ${deltaReadTimes.length}`);
if (deltaReadTimes.length > 1) {
const first = deltaReadTimes[0];
const last = deltaReadTimes[deltaReadTimes.length - 1];
const spread = last - first;
console.log(`Time spread: ${spread}ms (first read: ${first}ms, last read: ${last}ms)`);
// Count how many distinct timestamps (ms resolution)
const uniqueTimes = new Set(deltaReadTimes).size;
console.log(`Unique timestamps: ${uniqueTimes} out of ${deltaReadTimes.length} reads`);
// If truly streaming: spread should be significant (>500ms for code generation)
// If buffered: spread will be near 0 and most reads share the same timestamp
const bufferingRatio = uniqueTimes / deltaReadTimes.length;
console.log(`Buffering ratio: ${(bufferingRatio * 100).toFixed(1)}% unique timestamps`);
console.log(
bufferingRatio < 0.1
? 'BUFFERED: The agent releases all deltas in one burst'
: 'STREAMING: Deltas arrive incrementally',
);
}
});
});

View file

@ -0,0 +1,223 @@
import { expect, it } from 'vitest';
import { z } from 'zod';
import { describeIf, collectStreamChunks, chunksOfType, getModel } from './helpers';
import { Agent, Tool } from '../../index';
import type { StreamChunk } from '../../index';
const answerSchema = z.object({
city: z.string().describe('The name of the city'),
country: z.string().describe('The country the city is in'),
population_millions: z.number().describe('Approximate population in millions'),
});
function createStructuredAgent(provider: 'anthropic' | 'openai'): Agent {
return new Agent('structured-output-test')
.model(getModel(provider))
.instructions(
'You answer geography questions. Always respond with the structured output schema. Be precise and factual.',
)
.structuredOutput(answerSchema);
}
function createStructuredAgentWithTool(provider: 'anthropic' | 'openai'): Agent {
const lookupTool = new Tool('lookup_capital')
.description('Look up the capital city of a country')
.input(z.object({ country: z.string().describe('Country name') }))
.output(z.object({ capital: z.string(), population_millions: z.number() }))
.handler(async ({ country }) => {
const data: Record<string, { capital: string; population_millions: number }> = {
france: { capital: 'Paris', population_millions: 2.1 },
japan: { capital: 'Tokyo', population_millions: 13.9 },
brazil: { capital: 'Brasília', population_millions: 3.0 },
};
return data[country.toLowerCase()] ?? { capital: 'Unknown', population_millions: 0 };
});
return new Agent('structured-tool-test')
.model(getModel(provider))
.instructions(
'You answer geography questions. Use the lookup_capital tool when asked about capitals. Always respond with the structured output schema.',
)
.tool(lookupTool)
.structuredOutput(answerSchema);
}
function createStructuredAgentWithInterruptibleTool(provider: 'anthropic' | 'openai'): Agent {
const deleteTool = new Tool('delete_record')
.description('Delete a geographic record — requires confirmation')
.input(z.object({ city: z.string().describe('City to delete') }))
.output(z.object({ deleted: z.boolean(), city: z.string() }))
.suspend(z.object({ message: z.string() }))
.resume(z.object({ approved: z.boolean() }))
.handler(async ({ city }, ctx) => {
if (!ctx.resumeData) {
return await ctx.suspend({ message: `Delete record for "${city}"?` });
}
return { deleted: ctx.resumeData.approved, city };
});
const resultSchema = z.object({
action: z.string().describe('The action that was performed'),
city: z.string().describe('The city affected'),
success: z.boolean().describe('Whether the action succeeded'),
});
return new Agent('structured-interrupt-test')
.model(getModel(provider))
.instructions(
'You manage geographic records. When asked to delete a record, use the delete_record tool. Always respond with the structured output schema.',
)
.tool(deleteTool)
.structuredOutput(resultSchema)
.checkpoint('memory');
}
const describe = describeIf('anthropic');
describe('structured output integration', () => {
it('returns parsed structuredOutput via generate()', async () => {
const agent = createStructuredAgent('anthropic');
const result = await agent.generate('What is the capital of France?');
expect(result.finishReason).toBe('stop');
expect(result.structuredOutput).toBeDefined();
const parsed = answerSchema.safeParse(result.structuredOutput);
expect(parsed.success).toBe(true);
if (parsed.success) {
expect(parsed.data.city.toLowerCase()).toContain('paris');
expect(parsed.data.country.toLowerCase()).toContain('france');
expect(parsed.data.population_millions).toBeGreaterThan(0);
}
});
it('returns parsed structuredOutput in stream finish chunk', async () => {
const agent = createStructuredAgent('anthropic');
const { stream } = await agent.stream('What is the capital of Japan?');
const chunks = await collectStreamChunks(stream);
const finishChunks = chunksOfType(chunks, 'finish');
expect(finishChunks.length).toBeGreaterThan(0);
const finish = finishChunks[0] as StreamChunk & { type: 'finish' };
expect(finish.structuredOutput).toBeDefined();
const parsed = answerSchema.safeParse(finish.structuredOutput);
expect(parsed.success).toBe(true);
if (parsed.success) {
expect(parsed.data.city.toLowerCase()).toContain('tokyo');
}
});
it('returns structuredOutput after tool use via generate()', async () => {
const agent = createStructuredAgentWithTool('anthropic');
const result = await agent.generate('What is the capital of France? Use the lookup tool.');
expect(result.finishReason).toBe('stop');
expect(result.structuredOutput).toBeDefined();
const parsed = answerSchema.safeParse(result.structuredOutput);
expect(parsed.success).toBe(true);
if (parsed.success) {
expect(parsed.data.city.toLowerCase()).toContain('paris');
}
});
it('returns structuredOutput after tool use via stream()', async () => {
const agent = createStructuredAgentWithTool('anthropic');
const { stream } = await agent.stream('What is the capital of Japan? Use the lookup tool.');
const chunks = await collectStreamChunks(stream);
const finishChunks = chunksOfType(chunks, 'finish');
expect(finishChunks.length).toBeGreaterThan(0);
const finish = finishChunks[0] as StreamChunk & { type: 'finish' };
expect(finish.structuredOutput).toBeDefined();
const parsed = answerSchema.safeParse(finish.structuredOutput);
expect(parsed.success).toBe(true);
});
it('returns structuredOutput after resume("generate")', async () => {
const agent = createStructuredAgentWithInterruptibleTool('anthropic');
const first = await agent.generate('Delete the record for Paris');
expect(first.pendingSuspend).toBeDefined();
const { runId, toolCallId } = first.pendingSuspend![0];
const resumed = await agent.resume('generate', { approved: true }, { runId, toolCallId });
expect(resumed.finishReason).toBe('stop');
expect(resumed.structuredOutput).toBeDefined();
const resultSchema = z.object({
action: z.string(),
city: z.string(),
success: z.boolean(),
});
const parsed = resultSchema.safeParse(resumed.structuredOutput);
expect(parsed.success).toBe(true);
});
it('returns structuredOutput after resume("stream")', async () => {
const agent = createStructuredAgentWithInterruptibleTool('anthropic');
const first = await agent.generate('Delete the record for Tokyo');
expect(first.pendingSuspend).toBeDefined();
const { runId, toolCallId } = first.pendingSuspend![0];
const resumedStream = await agent.resume('stream', { approved: true }, { runId, toolCallId });
const chunks = await collectStreamChunks(resumedStream.stream);
const finishChunks = chunksOfType(chunks, 'finish');
expect(finishChunks.length).toBeGreaterThan(0);
const finish = finishChunks[0] as StreamChunk & { type: 'finish' };
expect(finish.structuredOutput).toBeDefined();
const resultSchema = z.object({
action: z.string(),
city: z.string(),
success: z.boolean(),
});
const parsed = resultSchema.safeParse(finish.structuredOutput);
expect(parsed.success).toBe(true);
});
it('structuredOutput conforms to the schema', async () => {
const strictSchema = z.object({
name: z.string(),
is_capital: z.boolean(),
continent: z.enum([
'Africa',
'Antarctica',
'Asia',
'Europe',
'North America',
'Oceania',
'South America',
]),
});
const agent = new Agent('strict-schema-test')
.model(getModel('anthropic'))
.instructions('Answer geography questions using the structured output schema.')
.structuredOutput(strictSchema);
const result = await agent.generate('Tell me about Berlin');
expect(result.structuredOutput).toBeDefined();
const parsed = strictSchema.safeParse(result.structuredOutput);
expect(parsed.success).toBe(true);
if (parsed.success) {
expect(parsed.data.name.toLowerCase()).toContain('berlin');
expect(parsed.data.continent).toBe('Europe');
expect(typeof parsed.data.is_capital).toBe('boolean');
}
});
});

View file

@ -0,0 +1,96 @@
import { expect, it } from 'vitest';
import {
chunksOfType,
collectStreamChunks,
collectTextDeltas,
describeIf,
findAllToolResults,
getModel,
} from './helpers';
import type { StreamChunk } from '../../index';
import { Agent } from '../../index';
const describe = describeIf('anthropic');
describe('sub-agent (asTool) integration', () => {
it('orchestrator calls a sub-agent as a tool and gets its response', async () => {
const mathAgent = new Agent('math-specialist')
.model(getModel('anthropic'))
.instructions(
'You are a math specialist. When given a math problem, compute the answer and reply with just the number. No explanation.',
);
const orchestrator = new Agent('orchestrator')
.model(getModel('anthropic'))
.instructions(
'You are a coordinator. When asked a math question, delegate to the math_specialist tool. ' +
'Pass the question as the prompt. Then relay the answer back.',
)
.tool(mathAgent.asTool('A math specialist that can solve math problems'));
const { stream: fullStream } = await orchestrator.stream('What is 15 * 4?');
const chunks = await collectStreamChunks(fullStream);
const text = collectTextDeltas(chunks);
const messageChunks = chunksOfType(chunks, 'message') as Array<
StreamChunk & { type: 'message' }
>;
const toolResults = findAllToolResults(messageChunks.map((c) => c.message));
// The orchestrator should have called the sub-agent tool
expect(toolResults.length).toBeGreaterThan(0);
const mathCall = toolResults.find((tc) => tc.toolName === 'math-specialist');
expect(mathCall).toBeDefined();
// The output should contain the sub-agent's response
expect(mathCall!.result).toBeDefined();
// The final text should reference 60
expect(text).toBeTruthy();
expect(text).toContain('60');
});
it('handles a chain of two sub-agents', async () => {
const translatorAgent = new Agent('translator')
.model(getModel('anthropic'))
.instructions(
'You are a translator. Translate the given text to French. Reply with only the French translation.',
);
const uppercaseAgent = new Agent('uppercaser')
.model(getModel('anthropic'))
.instructions(
'You convert text to uppercase. Reply with the input text in all uppercase letters. Nothing else.',
);
const orchestrator = new Agent('chain-orchestrator')
.model(getModel('anthropic'))
.instructions(
'You are a coordinator with two tools. ' +
'When asked to translate and uppercase text: ' +
'1. First use the translator tool to translate to French. ' +
'2. Then use the uppercaser tool to convert the French text to uppercase. ' +
'Return the final uppercase French text.',
)
.tool(translatorAgent.asTool('Translates text to French'))
.tool(uppercaseAgent.asTool('Converts text to uppercase'));
const { stream: fullStream } = await orchestrator.stream(
'Translate "hello" to French and then make it uppercase.',
);
const chunks = await collectStreamChunks(fullStream);
const messageChunks = chunksOfType(chunks, 'message') as Array<
StreamChunk & { type: 'message' }
>;
const toolResults = findAllToolResults(messageChunks.map((c) => c.message));
// Should have called both tools
expect(toolResults.length).toBeGreaterThanOrEqual(2);
const text = collectTextDeltas(chunks);
expect(text).toBeTruthy();
// The result should contain BONJOUR (or SALUT) — uppercase French for hello
expect(text).toMatch(/BONJOUR/i);
});
});

View file

@ -0,0 +1,197 @@
/**
* Integration test: Telemetry LangSmith.
*
* Runs a real agent against a real model with LangSmith telemetry configured,
* uses a local HTTP server to capture the trace data that would be sent to
* LangSmith, and verifies the full pipeline works end-to-end.
*
* Pipeline under test:
* Agent.generate() AI SDK (generateText with experimental_telemetry)
* OTel spans with ai.operationId LangSmithOTLPSpanProcessor
* LangSmithOTLPTraceExporter HTTP POST captured by local server
*/
import * as http from 'node:http';
import { afterAll, afterEach, beforeAll, expect, it } from 'vitest';
import { z } from 'zod';
import { describeIf, getModel } from './helpers';
import { Agent, LangSmithTelemetry, type Telemetry, type BuiltTelemetry, Tool } from '../../index';
const describe = describeIf('anthropic');
interface CapturedRequest {
url: string;
headers: http.IncomingHttpHeaders;
body: Buffer;
}
describe('Telemetry → LangSmith integration', () => {
let server: http.Server;
let serverPort: number;
let captured: CapturedRequest[];
let previousTracingV2: string | undefined;
beforeAll(async () => {
// LangSmith exporter requires this env var to be set, otherwise it silently drops spans
previousTracingV2 = process.env.LANGCHAIN_TRACING_V2;
process.env.LANGCHAIN_TRACING_V2 = 'true';
captured = [];
server = http.createServer((req, res) => {
const chunks: Buffer[] = [];
req.on('data', (c: Buffer) => chunks.push(c));
req.on('end', () => {
captured.push({
url: req.url ?? '',
headers: req.headers,
body: Buffer.concat(chunks),
});
res.writeHead(200, { 'Content-Type': 'application/json' });
res.end('{}');
});
});
await new Promise<void>((resolve) => {
server.listen(0, () => {
const addr = server.address();
serverPort = typeof addr === 'object' && addr ? addr.port : 0;
resolve();
});
});
});
afterEach(() => {
captured = [];
});
afterAll(async () => {
if (previousTracingV2 === undefined) {
delete process.env.LANGCHAIN_TRACING_V2;
} else {
process.env.LANGCHAIN_TRACING_V2 = previousTracingV2;
}
await new Promise<void>((resolve) => {
server.close(() => resolve());
});
});
function createTestAgent(telemetry: Telemetry | BuiltTelemetry) {
return new Agent('langsmith-test')
.model(getModel('anthropic'))
.instructions('You are a calculator. Use the add tool when asked to add. Be concise.')
.telemetry(telemetry as Telemetry)
.tool(
new Tool('add')
.description('Add two numbers')
.input(z.object({ a: z.number(), b: z.number() }))
.handler(async ({ a, b }) => ({ result: a + b })),
);
}
it('sends trace data to LangSmith using LangSmithTelemetry', async () => {
const built = await new LangSmithTelemetry({
apiKey: 'ls-test-key-12345',
project: 'agents-test',
url: `http://localhost:${serverPort}/otel/v1/traces`,
})
.functionId('calc-agent')
.build();
const agent = createTestAgent(built);
const result = await agent.generate('What is 3 + 4?');
if (built.provider) await built.provider.forceFlush();
// Verify the agent produced a response
expect(result.messages.length).toBeGreaterThan(0);
// Verify LangSmith received trace data
expect(captured.length).toBeGreaterThan(0);
// Verify the request hit the OTLP traces endpoint
expect(captured.some((r) => r.url.includes('/otel/v1/traces'))).toBe(true);
// Verify the API key was sent in the header
expect(captured.some((r) => r.headers['x-api-key'] === 'ls-test-key-12345')).toBe(true);
// Verify the body is non-empty (actual protobuf trace data)
const totalBytes = captured.reduce((sum, r) => sum + r.body.length, 0);
expect(totalBytes).toBeGreaterThan(0);
if (built.provider) await built.provider.shutdown();
});
it('supports endpoint shorthand (auto-appends /otel/v1/traces)', async () => {
const built = await new LangSmithTelemetry({
apiKey: 'ls-endpoint-key',
project: 'agents-test',
endpoint: `http://localhost:${serverPort}`,
})
.functionId('endpoint-test')
.build();
const agent = createTestAgent(built);
const result = await agent.generate('What is 10 + 20?');
if (built.provider) await built.provider.forceFlush();
expect(result.messages.length).toBeGreaterThan(0);
expect(captured.length).toBeGreaterThan(0);
expect(captured.some((r) => r.headers['x-api-key'] === 'ls-endpoint-key')).toBe(true);
if (built.provider) await built.provider.shutdown();
});
it('includes tool call spans in the trace', async () => {
const built = await new LangSmithTelemetry({
apiKey: 'ls-tool-test',
project: 'agents-test',
url: `http://localhost:${serverPort}/otel/v1/traces`,
})
.functionId('tool-trace-test')
.build();
const agent = createTestAgent(built);
await agent.generate('What is 5 + 7?');
if (built.provider) await built.provider.forceFlush();
// Multiple spans exported as protobuf
expect(captured.length).toBeGreaterThan(0);
const totalBytes = captured.reduce((sum, r) => sum + r.body.length, 0);
expect(totalBytes).toBeGreaterThan(50);
if (built.provider) await built.provider.shutdown();
});
it('fires TelemetryIntegration hooks alongside LangSmith traces', async () => {
const hookEvents: string[] = [];
const built = await new LangSmithTelemetry({
apiKey: 'ls-hooks-test',
project: 'agents-test',
url: `http://localhost:${serverPort}/otel/v1/traces`,
})
.functionId('hooks-test')
.integration({
onStart: () => {
hookEvents.push('start');
},
onFinish: () => {
hookEvents.push('finish');
},
})
.build();
const agent = createTestAgent(built);
await agent.generate('What is 1 + 1?');
if (built.provider) await built.provider.forceFlush();
// Both LangSmith traces and integration hooks should fire
expect(captured.length).toBeGreaterThan(0);
expect(hookEvents).toContain('start');
expect(hookEvents).toContain('finish');
if (built.provider) await built.provider.shutdown();
});
});

View file

@ -0,0 +1,67 @@
import { expect, it } from 'vitest';
import { describeIf, collectStreamChunks, chunksOfType } from './helpers';
import { Agent } from '../../index';
/**
* Thinking / reasoning stream integration tests.
*
* These require models that support extended thinking:
* - Anthropic: claude-sonnet-4-5 (not haiku it doesn't support thinking)
* - OpenAI: o3-mini (reasoning model)
*/
const describeAnthropic = describeIf('anthropic');
describeAnthropic('thinking stream (Anthropic)', () => {
it('emits reasoning-delta chunks when thinking is enabled', async () => {
const agent = new Agent('thinking-test')
.model('anthropic', 'claude-sonnet-4-5')
.thinking('anthropic', { budgetTokens: 5000 })
.instructions('You are a helpful assistant. Think carefully before answering.');
const { stream: fullStream } = await agent.stream('What is 17 * 23?');
const chunks = await collectStreamChunks(fullStream);
const reasoningChunks = chunksOfType(chunks, 'reasoning-delta');
expect(reasoningChunks.length).toBeGreaterThan(0);
// Verify reasoning chunks have non-empty delta content
const deltas = reasoningChunks
.filter((c): c is typeof c & { delta: string } => 'delta' in c)
.map((c) => c.delta);
const fullReasoning = deltas.join('');
expect(fullReasoning.length).toBeGreaterThan(0);
// Should also have text-delta chunks (the actual answer)
const textChunks = chunksOfType(chunks, 'text-delta');
expect(textChunks.length).toBeGreaterThan(0);
});
});
const describeOpenAI = describeIf('openai');
describeOpenAI('thinking stream (OpenAI)', () => {
it('works with reasoning model and .thinking() enabled', async () => {
const agent = new Agent('openai-thinking-test')
.model('openai', 'o3-mini')
.thinking('openai', { reasoningEffort: 'medium' })
.instructions('You are a helpful assistant.');
const { stream: fullStream } = await agent.stream('What is 17 * 23?');
const chunks = await collectStreamChunks(fullStream);
// OpenAI reasoning models do internal reasoning but don't expose it
// as streamed chunks — verify the agent produces a text response.
const textChunks = chunksOfType(chunks, 'text-delta');
expect(textChunks.length).toBeGreaterThan(0);
const text = textChunks
.filter((c): c is typeof c & { delta: string } => 'delta' in c)
.map((c) => c.delta)
.join('');
expect(text).toContain('391');
});
});

View file

@ -0,0 +1,154 @@
import { expect, it, vi, afterEach, beforeEach } from 'vitest';
import { describeIf, getModel, collectStreamChunks, createSqliteMemory } from './helpers';
import { Agent, Memory } from '../../index';
const describe = describeIf('anthropic');
describe('title generation integration', () => {
let sqliteCtx: ReturnType<typeof createSqliteMemory>;
beforeEach(() => {
sqliteCtx = createSqliteMemory();
});
afterEach(async () => {
sqliteCtx.cleanup();
});
it('auto-generates a thread title after generate() on a new thread', async () => {
const memory = new Memory().storage(sqliteCtx.memory).lastMessages(10).titleGeneration(true);
const agent = new Agent('title-gen-test')
.model(getModel('anthropic'))
.instructions('You are a helpful assistant. Be concise.')
.memory(memory);
const threadId = `title-test-${Date.now()}`;
const resourceId = 'test-user';
const threadBefore = await sqliteCtx.memory.getThread(threadId);
expect(threadBefore).toBeNull();
await agent.generate('Tell me about the history of Rome', {
persistence: { threadId, resourceId },
});
await vi.waitFor(
async () => {
const thread = await sqliteCtx.memory.getThread(threadId);
expect(thread).toBeDefined();
expect(thread!.title).toBeTruthy();
expect(thread!.title!.length).toBeGreaterThan(0);
expect(thread!.title!.length).toBeLessThanOrEqual(80);
},
{ timeout: 30_000, interval: 500 },
);
});
it('auto-generates a thread title after stream() on a new thread', async () => {
const memory = new Memory().storage(sqliteCtx.memory).lastMessages(10).titleGeneration(true);
const agent = new Agent('title-gen-stream-test')
.model(getModel('anthropic'))
.instructions('You are a helpful assistant. Be concise.')
.memory(memory);
const threadId = `title-stream-test-${Date.now()}`;
const resourceId = 'test-user';
const { stream } = await agent.stream('Explain quantum computing basics', {
persistence: { threadId, resourceId },
});
await collectStreamChunks(stream);
await vi.waitFor(
async () => {
const thread = await sqliteCtx.memory.getThread(threadId);
expect(thread).toBeDefined();
expect(thread!.title).toBeTruthy();
expect(thread!.title!.length).toBeGreaterThan(0);
expect(thread!.title!.length).toBeLessThanOrEqual(80);
},
{ timeout: 30_000, interval: 500 },
);
});
it('does not generate a title when titleGeneration is not configured', async () => {
const memory = new Memory().storage(sqliteCtx.memory).lastMessages(10);
const agent = new Agent('no-title-gen-test')
.model(getModel('anthropic'))
.instructions('You are a helpful assistant. Be concise.')
.memory(memory);
const threadId = `no-title-test-${Date.now()}`;
await agent.generate('Hello, how are you?', {
persistence: { threadId, resourceId: 'test-user' },
});
await new Promise((r) => setTimeout(r, 3_000));
const thread = await sqliteCtx.memory.getThread(threadId);
expect(thread).toBeDefined();
expect(thread!.title).toBeFalsy();
});
it('does not overwrite a pre-existing thread title', async () => {
const existingTitle = 'My Pre-Existing Title';
await sqliteCtx.memory.saveThread({
id: 'pre-titled-thread',
resourceId: 'test-user',
title: existingTitle,
metadata: { custom: 'data' },
});
const memory = new Memory().storage(sqliteCtx.memory).lastMessages(10).titleGeneration(true);
const agent = new Agent('title-no-overwrite-test')
.model(getModel('anthropic'))
.instructions('You are a helpful assistant. Be concise.')
.memory(memory);
await agent.generate('What is 2+2?', {
persistence: { threadId: 'pre-titled-thread', resourceId: 'test-user' },
});
// Allow fire-and-forget title generation to settle
await new Promise((r) => setTimeout(r, 5_000));
const thread = await sqliteCtx.memory.getThread('pre-titled-thread');
expect(thread!.title).toBe(existingTitle);
expect(thread!.metadata).toEqual({ custom: 'data' });
});
it('accepts a custom model for title generation', async () => {
const memory = new Memory().storage(sqliteCtx.memory).lastMessages(10).titleGeneration({
model: 'anthropic/claude-haiku-4-5',
});
const agent = new Agent('title-custom-model-test')
.model(getModel('anthropic'))
.instructions('You are a helpful assistant. Be concise.')
.memory(memory);
const threadId = `title-custom-model-${Date.now()}`;
await agent.generate('What are the best practices for growing tomatoes?', {
persistence: { threadId, resourceId: 'test-user' },
});
await vi.waitFor(
async () => {
const thread = await sqliteCtx.memory.getThread(threadId);
expect(thread).toBeDefined();
expect(thread!.title).toBeTruthy();
expect(thread!.title!.length).toBeGreaterThan(0);
},
{ timeout: 30_000, interval: 500 },
);
});
});

View file

@ -0,0 +1,211 @@
import { expect, it, vi } from 'vitest';
import { z } from 'zod';
import {
describeIf,
getModel,
collectStreamChunks,
chunksOfType,
findAllToolResults,
collectTextDeltas,
} from './helpers';
import { Agent, filterLlmMessages, Tool } from '../../index';
const describe = describeIf('anthropic');
describe('toModelOutput integration', () => {
it('sends the transformed output to the LLM while preserving raw output in toolCalls', async () => {
const handlerSpy = vi.fn();
const searchTool = new Tool('search_db')
.description('Search the database and return matching records')
.input(z.object({ query: z.string().describe('Search query') }))
.output(
z.object({
records: z.array(z.object({ id: z.number(), name: z.string(), data: z.string() })),
total: z.number(),
}),
)
.handler(async ({ query }) => {
handlerSpy(query);
return {
records: [
{ id: 1, name: 'Widget A', data: 'x'.repeat(200) },
{ id: 2, name: 'Widget B', data: 'y'.repeat(200) },
{ id: 3, name: 'Gadget C', data: 'z'.repeat(200) },
],
total: 3,
};
})
.toModelOutput((output) => ({
summary: `Found ${output.total} records: ${output.records.map((r) => r.name).join(', ')}`,
}));
const agent = new Agent('to-model-output-test')
.model(getModel('anthropic'))
.instructions(
'You are a database assistant. Use search_db to find records. Be concise in your response.',
)
.tool(searchTool);
const result = await agent.generate('Search for widgets in the database');
expect(handlerSpy).toHaveBeenCalled();
// toolCalls on GenerateResult stores the raw output
expect(result.toolCalls).toBeDefined();
const searchEntry = result.toolCalls!.find((tc) => tc.tool === 'search_db');
expect(searchEntry).toBeDefined();
const rawOutput = searchEntry!.output as {
records: Array<{ id: number; name: string; data: string }>;
total: number;
};
expect(rawOutput.total).toBe(3);
expect(rawOutput.records[0].data).toBe('x'.repeat(200));
// ContentToolResult in messages stores the transformed output (what the LLM saw)
const toolResults = findAllToolResults(result.messages);
const searchToolResult = toolResults.find((tr) => tr.toolName === 'search_db');
expect(searchToolResult).toBeDefined();
const modelOutput = searchToolResult!.result as { summary: string };
expect(modelOutput.summary).toContain('Found 3 records');
expect(modelOutput.summary).toContain('Widget A');
});
it('works with stream() — LLM receives transformed output', async () => {
const fetchTool = new Tool('fetch_report')
.description('Fetch a detailed report by ID')
.input(z.object({ reportId: z.string().describe('Report ID') }))
.output(
z.object({
id: z.string(),
title: z.string(),
body: z.string(),
metadata: z.object({ pages: z.number(), author: z.string() }),
}),
)
.handler(async ({ reportId }) => ({
id: reportId,
title: 'Q4 Sales Report',
body: 'Detailed analysis spanning multiple pages...'.repeat(10),
metadata: { pages: 42, author: 'Jane Doe' },
}))
.toModelOutput((output) => ({
id: output.id,
title: output.title,
pageCount: output.metadata.pages,
}));
const agent = new Agent('to-model-output-stream-test')
.model(getModel('anthropic'))
.instructions(
'You are a report assistant. Use fetch_report to retrieve reports. Mention the title and page count. Be concise.',
)
.tool(fetchTool);
const { stream } = await agent.stream('Get report RPT-001');
const chunks = await collectStreamChunks(stream);
// The tool result messages in the stream contain the transformed output
const messageChunks = chunksOfType(chunks, 'message');
const toolResults = findAllToolResults(messageChunks.map((c) => c.message));
const reportResult = toolResults.find((tr) => tr.toolName === 'fetch_report');
expect(reportResult).toBeDefined();
// The model output (transformed) should have the truncated fields
const modelOutput = reportResult!.result as { id: string; title: string; pageCount: number };
expect(modelOutput.id).toBe('RPT-001');
expect(modelOutput.title).toBe('Q4 Sales Report');
expect(modelOutput.pageCount).toBe(42);
// The body should NOT be in the model output (it was stripped by toModelOutput)
expect((modelOutput as Record<string, unknown>).body).toBeUndefined();
const text = collectTextDeltas(chunks);
expect(text).toBeTruthy();
expect(text).toMatch(/Q4 Sales Report/i);
});
it('does not affect the LLM output when toModelOutput is not set', async () => {
const echoTool = new Tool('echo')
.description('Echo back the input message')
.input(z.object({ message: z.string().describe('Message to echo') }))
.output(z.object({ echoed: z.string() }))
.handler(async ({ message }) => ({ echoed: message }));
const agent = new Agent('no-to-model-output-test')
.model(getModel('anthropic'))
.instructions('You are a simple echo bot. Use echo tool and repeat the result. Be concise.')
.tool(echoTool);
const result = await agent.generate('Echo the message "hello world"');
// Without toModelOutput, tool result in messages should have the raw output
const toolResults = findAllToolResults(result.messages);
const echoResult = toolResults.find((tr) => tr.toolName === 'echo');
expect(echoResult).toBeDefined();
expect((echoResult!.result as { echoed: string }).echoed).toBe('hello world');
// And toolCalls should also have the same raw output
expect(result.toolCalls).toBeDefined();
const echoEntry = result.toolCalls!.find((tc) => tc.tool === 'echo');
expect(echoEntry).toBeDefined();
expect((echoEntry!.output as { echoed: string }).echoed).toBe('hello world');
});
it('works alongside toMessage — both transforms apply independently', async () => {
const calcTool = new Tool('multiply')
.description('Multiply two numbers')
.input(
z.object({
a: z.number().describe('First number'),
b: z.number().describe('Second number'),
}),
)
.output(z.object({ result: z.number() }))
.handler(async ({ a, b }) => ({ result: a * b }))
.toModelOutput((output) => ({
answer: output.result,
note: 'multiplication complete',
}))
.toMessage((output) => ({
type: 'custom',
data: {
dummy: `Product is ${output.result}`,
},
}));
const agent = new Agent('both-transforms-test')
.model(getModel('anthropic'))
.instructions('You are a calculator. Use multiply to multiply numbers. Be concise.')
.tool(calcTool);
const result = await agent.generate('What is 7 times 8?');
// Custom message from toMessage should be present (uses raw output)
const customMessages = result.messages.filter((m) => m.type === 'custom') as Array<{
type: 'custom';
data: { dummy: string };
}>;
expect(customMessages.length).toBeGreaterThan(0);
expect(customMessages[0].data.dummy).toBe('Product is 56');
// toolCalls stores the raw output
expect(result.toolCalls).toBeDefined();
const multiplyEntry = result.toolCalls!.find((tc) => tc.tool === 'multiply');
expect(multiplyEntry).toBeDefined();
expect((multiplyEntry!.output as { result: number }).result).toBe(56);
// Tool result in messages stores the transformed output for the LLM
const toolResults = findAllToolResults(result.messages);
const multiplyToolResult = toolResults.find((tr) => tr.toolName === 'multiply');
expect(multiplyToolResult).toBeDefined();
const modelOutput = multiplyToolResult!.result as { answer: number; note: string };
expect(modelOutput.answer).toBe(56);
expect(modelOutput.note).toBe('multiplication complete');
// The custom messages should be filtered out for the LLM
const llmMessages = filterLlmMessages(result.messages);
expect(llmMessages.length).toBeLessThan(result.messages.length);
});
});

View file

@ -0,0 +1,115 @@
import { expect, it } from 'vitest';
import {
describeIf,
collectStreamChunks,
chunksOfType,
getModel,
createAgentWithAddTool,
} from './helpers';
import { Agent } from '../../index';
import type { StreamChunk } from '../../index';
const describe = describeIf('anthropic');
describe('token usage integration', () => {
it('reports token usage on a simple text response via streamText', async () => {
const agent = new Agent('token-test')
.model(getModel('anthropic'))
.instructions('Reply with exactly: "Hello". Nothing else.');
const { stream: fullStream } = await agent.stream('Say hello');
const chunks = await collectStreamChunks(fullStream);
const finishChunks = chunksOfType(chunks, 'finish');
expect(finishChunks.length).toBeGreaterThan(0);
const finish = finishChunks[0] as StreamChunk & { type: 'finish' };
expect(finish.usage).toBeDefined();
expect(finish.usage!.promptTokens).toBeGreaterThan(0);
expect(finish.usage!.completionTokens).toBeGreaterThan(0);
expect(finish.usage!.totalTokens).toBe(
finish.usage!.promptTokens + finish.usage!.completionTokens,
);
});
it('reports token usage on a simple text response via run()', async () => {
const agent = new Agent('token-run-test')
.model(getModel('anthropic'))
.instructions('Reply with exactly: "Hello". Nothing else.');
const result = await agent.generate('Say hello');
expect(result.usage).toBeDefined();
expect(result.usage!.promptTokens).toBeGreaterThan(0);
expect(result.usage!.completionTokens).toBeGreaterThan(0);
expect(result.usage!.totalTokens).toBe(
result.usage!.promptTokens + result.usage!.completionTokens,
);
});
it('reports token usage after a multi-step tool call', async () => {
const agent = createAgentWithAddTool('anthropic');
const { stream: fullStream } = await agent.stream('What is 7 + 13?');
const chunks = await collectStreamChunks(fullStream);
const finishChunks = chunksOfType(chunks, 'finish');
expect(finishChunks.length).toBeGreaterThan(0);
const finish = finishChunks[0] as StreamChunk & { type: 'finish' };
expect(finish.usage).toBeDefined();
// Multi-step should use more tokens than a simple response
expect(finish.usage!.promptTokens).toBeGreaterThan(0);
expect(finish.usage!.completionTokens).toBeGreaterThan(0);
expect(finish.usage!.totalTokens).toBe(
finish.usage!.promptTokens + finish.usage!.completionTokens,
);
});
it('emits finish chunks with token usage in the stream', async () => {
const agent = new Agent('finish-chunk-test')
.model(getModel('anthropic'))
.instructions('Reply with exactly: "OK". Nothing else.');
const { stream: fullStream } = await agent.stream('Acknowledge');
const chunks = await collectStreamChunks(fullStream);
const finishChunks = chunksOfType(chunks, 'finish');
expect(finishChunks.length).toBeGreaterThan(0);
const finish = finishChunks[0] as StreamChunk & { type: 'finish' };
expect(finish.finishReason).toBeDefined();
// Finish chunks should carry usage when available
if (finish.usage) {
expect(finish.usage.promptTokens).toBeGreaterThanOrEqual(0);
expect(finish.usage.completionTokens).toBeGreaterThanOrEqual(0);
}
});
it('accumulates higher token counts with more complex prompts', async () => {
const agent = new Agent('token-scale-test')
.model(getModel('anthropic'))
.instructions('You are a helpful assistant. Be concise.');
// Short prompt
const { stream: short } = await agent.stream('Hi');
const chunks = await collectStreamChunks(short);
const finishChunks = chunksOfType(chunks, 'finish');
expect(finishChunks.length).toBeGreaterThan(0);
const finishShort = finishChunks[0] as StreamChunk & { type: 'finish' };
// Longer prompt
const { stream: long } = await agent.stream(
'Explain the difference between TCP and UDP networking protocols. Include at least three key differences.',
);
const chunksLong = await collectStreamChunks(long);
const finishChunksLong = chunksOfType(chunksLong, 'finish');
expect(finishChunksLong.length).toBeGreaterThan(0);
const finishLong = finishChunksLong[0] as StreamChunk & { type: 'finish' };
// Longer prompt should use more completion tokens (longer response)
expect(finishLong.usage!.completionTokens).toBeGreaterThan(finishShort.usage!.completionTokens);
});
});

View file

@ -0,0 +1,104 @@
import { expect, it } from 'vitest';
import {
describeIf,
collectStreamChunks,
chunksOfType,
collectTextDeltas,
findAllToolResults,
createAgentWithAlwaysErrorTool,
createAgentWithFlakyTool,
} from './helpers';
import type { StreamChunk } from './helpers';
const describe = describeIf('anthropic');
describe('tool error handling integration', () => {
it('does not crash when a tool throws — stream completes with a finish chunk', async () => {
const agent = createAgentWithAlwaysErrorTool('anthropic');
const { stream } = await agent.stream('Fetch the data for id "abc123".');
const chunks = await collectStreamChunks(stream);
// Stream must never emit an error chunk
const errorChunks = chunks.filter((c) => c.type === 'error');
expect(errorChunks).toHaveLength(0);
// Stream must close with a finish chunk whose reason is not 'error'
const finishChunks = chunksOfType(chunks, 'finish');
expect(finishChunks.length).toBeGreaterThan(0);
const finish = finishChunks[0] as StreamChunk & { type: 'finish' };
expect(finish.finishReason).not.toBe('error');
});
it('does not crash when a tool throws — generate returns finishReason stop', async () => {
const agent = createAgentWithAlwaysErrorTool('anthropic');
const result = await agent.generate('Fetch the data for id "abc123".');
expect(result.error).toBeUndefined();
expect(result.finishReason).toBe('stop');
});
it('LLM receives the error message and acknowledges it in the response', async () => {
const agent = createAgentWithAlwaysErrorTool('anthropic');
const { stream } = await agent.stream('Fetch the data for id "abc123".');
const chunks = await collectStreamChunks(stream);
// Verify there IS a text response (LLM acknowledged the error)
const text = collectTextDeltas(chunks);
expect(text.length).toBeGreaterThan(0);
// The response should mention the failure (error was visible to LLM)
const mentionsFailure = /error|fail|unavailable|timeout|unable|could not/i.test(text);
expect(mentionsFailure).toBe(true);
});
it('error tool-result appears in the message list', async () => {
const agent = createAgentWithAlwaysErrorTool('anthropic');
const { stream } = await agent.stream('Fetch the data for id "abc123".');
const chunks = await collectStreamChunks(stream);
// There should be a tool-result message in the stream
const messageChunks = chunksOfType(chunks, 'message');
const toolResults = findAllToolResults(messageChunks.map((c) => c.message));
// The tool should have been called and produced a result (even if it errored)
expect(toolResults.length).toBeGreaterThan(0);
const brokenResult = toolResults.find((r) => r.toolName === 'broken_tool');
expect(brokenResult).toBeDefined();
});
it('LLM can self-correct by retrying a flaky tool', async () => {
const { agent, callCount } = createAgentWithFlakyTool('anthropic');
const result = await agent.generate('Fetch the data for id "xyz".');
// Tool was called more than once — LLM retried after seeing the error
expect(callCount()).toBeGreaterThanOrEqual(2);
// Agent completed successfully
expect(result.error).toBeUndefined();
expect(result.finishReason).toBe('stop');
});
it('LLM self-correction: stream mode — flaky tool succeeds on retry', async () => {
const { agent, callCount } = createAgentWithFlakyTool('anthropic');
const { stream } = await agent.stream('Fetch the data for id "xyz".');
const chunks = await collectStreamChunks(stream);
// No error chunk in the stream
const errorChunks = chunks.filter((c) => c.type === 'error');
expect(errorChunks).toHaveLength(0);
// Tool was retried
expect(callCount()).toBeGreaterThanOrEqual(2);
// Response should mention success or the value
const text = collectTextDeltas(chunks);
expect(text.length).toBeGreaterThan(0);
});
});

View file

@ -0,0 +1,185 @@
import { expect, it } from 'vitest';
import {
describeIf,
collectStreamChunks,
chunksOfType,
createAgentWithInterruptibleTool,
createAgentWithMixedTools,
createAgentWithParallelInterruptibleCalls,
} from './helpers';
import { isLlmMessage, type StreamChunk } from '../../index';
const describe = describeIf('anthropic');
describe('tool interrupt integration', () => {
it('pauses the stream when a tool suspends', async () => {
const agent = createAgentWithInterruptibleTool('anthropic');
const { stream: fullStream } = await agent.stream('Delete the file /tmp/test.txt');
const chunks = await collectStreamChunks(fullStream);
const chunkTypes = chunks.map((c) => c.type);
expect(chunkTypes).toContain('tool-call-suspended');
const suspendedChunks = chunksOfType(chunks, 'tool-call-suspended');
expect(suspendedChunks.length).toBe(1);
const suspended = suspendedChunks[0] as StreamChunk & { type: 'tool-call-suspended' };
expect(suspended.toolName).toBe('delete_file');
expect(suspended.runId).toBeTruthy();
expect(suspended.toolCallId).toBeTruthy();
expect(suspended.suspendPayload).toEqual(
// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
expect.objectContaining({ message: expect.any(String), severity: 'destructive' }),
);
// No tool-result should appear (tool is suspended)
const contentChunks = chunks.filter(
(c) =>
c.type === 'message' &&
'content' in c &&
(c.content as { type: string }).type === 'tool-result',
);
expect(contentChunks).toHaveLength(0);
});
it('resumes the stream after resume with approval', async () => {
const agent = createAgentWithInterruptibleTool('anthropic');
const { stream: fullStream } = await agent.stream('Delete the file /tmp/test.txt');
const chunks = await collectStreamChunks(fullStream);
const suspendedChunks = chunksOfType(chunks, 'tool-call-suspended');
expect(suspendedChunks.length).toBe(1);
const suspended = suspendedChunks[0] as StreamChunk & { type: 'tool-call-suspended' };
const resumedStream = await agent.resume(
'stream',
{ approved: true },
{ runId: suspended.runId!, toolCallId: suspended.toolCallId! },
);
const resumedChunks = await collectStreamChunks(resumedStream.stream);
const resumedTypes = resumedChunks.map((c) => c.type);
// After approval, tool-result should appear as content chunk
const toolResultChunks = resumedChunks.filter(
(c) =>
c.type === 'message' &&
isLlmMessage(c.message) &&
c.message.content.some((c) => c.type === 'tool-result'),
);
expect(toolResultChunks.length).toBeGreaterThan(0);
expect(resumedTypes).toContain('text-delta');
});
it('resumes the stream after resume with denial', async () => {
const agent = createAgentWithInterruptibleTool('anthropic');
const { stream: fullStream } = await agent.stream('Delete the file /tmp/test.txt');
const chunks = await collectStreamChunks(fullStream);
const suspendedChunks = chunksOfType(chunks, 'tool-call-suspended');
expect(suspendedChunks.length).toBe(1);
const suspended = suspendedChunks[0] as StreamChunk & { type: 'tool-call-suspended' };
const resumedStream = await agent.resume(
'stream',
{ approved: false },
{ runId: suspended.runId!, toolCallId: suspended.toolCallId! },
);
const resumedChunks = await collectStreamChunks(resumedStream.stream);
const resumedTypes = resumedChunks.map((c) => c.type);
expect(resumedTypes).toContain('text-delta');
});
it('resumes each pending tool call one by one when multiple tool calls are suspended', async () => {
const agent = createAgentWithParallelInterruptibleCalls('anthropic');
const { stream: fullStream } = await agent.stream(
'Delete these two files: /tmp/a.txt and /tmp/b.txt. You MUST call delete_file for each file in a single turn using parallel tool calls. After deleting all files, tell if you succeeded',
);
const chunks = await collectStreamChunks(fullStream);
const suspendedChunks = chunksOfType(chunks, 'tool-call-suspended');
// The first interruptible tool call suspends, halting the loop.
// Only 1 suspended chunk is emitted even though 2 tool calls were made.
expect(suspendedChunks.length).toBe(1);
const suspended1 = suspendedChunks[0] as StreamChunk & { type: 'tool-call-suspended' };
expect(suspended1.toolName).toBe('delete_file');
// Resume the first suspended tool call
const stream2 = await agent.resume(
'stream',
{ approved: true },
{ runId: suspended1.runId!, toolCallId: suspended1.toolCallId! },
);
const chunks2 = await collectStreamChunks(stream2.stream);
const suspendedChunks2 = chunksOfType(chunks2, 'tool-call-suspended');
// The second tool call should now be suspended (not an error)
expect(suspendedChunks2.length).toBe(1);
const suspended2 = suspendedChunks2[0] as StreamChunk & { type: 'tool-call-suspended' };
expect(suspended2.toolCallId).not.toBe(suspended1.toolCallId);
expect(suspended2.toolName).toBe('delete_file');
// Resume the second suspended tool call
const stream3 = await agent.resume(
'stream',
{ approved: true },
{ runId: suspended2.runId!, toolCallId: suspended2.toolCallId! },
);
const chunks3 = await collectStreamChunks(stream3.stream);
// After all original tool calls are resolved, the agent loop should
// continue without crashing (no AI_MissingToolResultsError).
// The LLM may respond with text or make additional tool calls.
const errorChunks = chunks3.filter((c) => c.type === 'error');
expect(errorChunks).toHaveLength(0);
const finishChunks = chunksOfType(chunks3, 'finish');
expect(finishChunks.length).toBeGreaterThan(0);
const finish = finishChunks[0] as StreamChunk & { type: 'finish' };
expect(finish.finishReason).not.toBe('error');
});
it('auto-executes non-interruptible tools while suspending interruptible ones', async () => {
const agent = createAgentWithMixedTools('anthropic');
const { stream: fullStream } = await agent.stream(
'You must call both tools: first call list_files with dir="/home", then call delete_file with path="/home/readme.md". Do not skip either tool.',
);
const chunks = await collectStreamChunks(fullStream);
// list_files should auto-execute — its result should appear as content
const toolResultChunks = chunks.filter(
(c) =>
c.type === 'message' &&
isLlmMessage(c.message) &&
c.message.content.some((c) => c.type === 'tool-result'),
);
expect(toolResultChunks.length).toBeGreaterThan(0);
// delete_file should be suspended
const suspendedChunks = chunksOfType(chunks, 'tool-call-suspended');
const deleteSuspended = suspendedChunks.find(
(c) => (c as StreamChunk & { type: 'tool-call-suspended' }).toolName === 'delete_file',
);
// If the LLM called delete_file, it should have been suspended
if (deleteSuspended) {
expect(deleteSuspended).toBeDefined();
}
});
});

View file

@ -0,0 +1,55 @@
import { expect, it } from 'vitest';
import {
chunksOfType,
collectStreamChunks,
createAgentWithToContentTool,
describeIf,
} from './helpers';
import { filterLlmMessages } from '../../index';
import type { AgentMessage, StreamChunk } from '../../index';
const describe = describeIf('anthropic');
describe('tool-result to message integration', () => {
it('adds a custom message to generate result that is visible to user but not to the LLM', async () => {
const agent = createAgentWithToContentTool('anthropic');
const result = await agent.generate('What is 3 + 4?');
// The custom message must appear in result.messages
const customMessages = result.messages.filter((m) => m.type === 'custom');
expect(customMessages.length).toBeGreaterThan(0);
const toolResultMsg = customMessages.find((m) => m.type === 'custom' && 'dummy' in m.data) as
| { type: 'custom'; data: { dummy: string } }
| undefined;
expect(toolResultMsg).toBeDefined();
expect(toolResultMsg!.data.dummy).toContain('dummy message. Tool output');
// filterLlmMessages must strip the custom message — the LLM never sees it.
// The filtered count must be less than total because custom messages were removed.
const llmMessages = filterLlmMessages(result.messages);
expect(llmMessages.length).toBeLessThan(result.messages.length);
});
it('emits toContent result as a content chunk in the stream', async () => {
const agent = createAgentWithToContentTool('anthropic');
const { stream } = await agent.stream('What is 5 + 6?');
const chunks = await collectStreamChunks(stream);
// Must contain at least one content chunk with the custom text from toContent
const messageChunks = chunksOfType(chunks, 'message') as Array<
StreamChunk & { type: 'message'; message: AgentMessage }
>;
const toContentChunk = messageChunks.find(
(c) => c.message.type === 'custom' && 'dummy' in c.message.data,
);
expect(toContentChunk).toBeDefined();
expect(
(toContentChunk!.message as { type: 'custom'; data: { dummy: string } }).data.dummy,
).toContain('dummy message. Tool output');
});
});

View file

@ -0,0 +1,170 @@
import { expect, it } from 'vitest';
import { describeIf, collectStreamChunks, chunksOfType, getModel } from './helpers';
import { Agent } from '../../index';
import type { StreamChunk } from '../../index';
const describeAnthropic = describeIf('anthropic');
describeAnthropic('usage and cost (Anthropic)', () => {
it('returns token usage on generate result', async () => {
const agent = new Agent('usage-test').model(getModel('anthropic')).instructions('Be concise.');
const result = await agent.generate('Say hello');
expect(result.usage).toBeDefined();
expect(result.usage!.promptTokens).toBeGreaterThan(0);
expect(result.usage!.completionTokens).toBeGreaterThan(0);
expect(result.usage!.totalTokens).toBe(
result.usage!.promptTokens + result.usage!.completionTokens,
);
});
it('returns token usage on stream finish chunk', async () => {
const agent = new Agent('usage-stream-test')
.model(getModel('anthropic'))
.instructions('Be concise.');
const { stream: fullStream } = await agent.stream('Say hello');
const chunks = await collectStreamChunks(fullStream);
const finishChunks = chunksOfType(chunks, 'finish');
expect(finishChunks.length).toBeGreaterThan(0);
const finish = finishChunks[0] as StreamChunk & { type: 'finish' };
expect(finish.usage).toBeDefined();
expect(finish.usage!.promptTokens).toBeGreaterThan(0);
expect(finish.usage!.completionTokens).toBeGreaterThan(0);
expect(finish.usage!.totalTokens).toBe(
finish.usage!.promptTokens + finish.usage!.completionTokens,
);
});
it('includes estimated cost from models.dev pricing', async () => {
const agent = new Agent('cost-test').model(getModel('anthropic')).instructions('Be concise.');
const result = await agent.generate('Say hello');
expect(result.usage).toBeDefined();
expect(result.usage!.cost).toBeDefined();
expect(result.usage!.cost).toBeGreaterThan(0);
// Sanity check: a simple "say hello" should cost less than $0.01
expect(result.usage!.cost!).toBeLessThan(0.01);
});
it('includes model ID in generate result', async () => {
const agent = new Agent('model-test').model(getModel('anthropic')).instructions('Be concise.');
const result = await agent.generate('Say hello');
expect(result.model).toBe(getModel('anthropic'));
});
it('includes cost in stream finish chunk', async () => {
const agent = new Agent('cost-stream-test')
.model(getModel('anthropic'))
.instructions('Be concise.');
const { stream: fullStream } = await agent.stream('Say hello');
const chunks = await collectStreamChunks(fullStream);
const finishChunks = chunksOfType(chunks, 'finish');
expect(finishChunks.length).toBeGreaterThan(0);
const finish = finishChunks[0] as StreamChunk & { type: 'finish' };
expect(finish.usage).toBeDefined();
expect(finish.usage!.cost).toBeDefined();
expect(finish.usage!.cost).toBeGreaterThan(0);
});
it('aggregates sub-agent usage when using asTool()', async () => {
const subAgent = new Agent('translator')
.model(getModel('anthropic'))
.instructions('Translate the input to French. Reply with only the translation.');
const parentAgent = new Agent('orchestrator')
.model(getModel('anthropic'))
.instructions(
'You are an orchestrator. When asked to translate, use the translator tool. Be concise.',
)
.tool(subAgent.asTool('Translate text to French'));
const result = await parentAgent.generate('Translate "hello world" to French');
// Parent should have its own usage
expect(result.usage).toBeDefined();
expect(result.usage!.promptTokens).toBeGreaterThan(0);
expect(result.usage!.cost).toBeGreaterThan(0);
expect(result.model).toBe(getModel('anthropic'));
// Sub-agent usage should be captured
expect(result.subAgentUsage).toBeDefined();
expect(result.subAgentUsage!.length).toBeGreaterThan(0);
const translatorUsage = result.subAgentUsage!.find((s) => s.agent === 'translator');
expect(translatorUsage).toBeDefined();
expect(translatorUsage!.usage.promptTokens).toBeGreaterThan(0);
expect(translatorUsage!.usage.cost).toBeGreaterThan(0);
// Total cost should be parent + sub-agent
expect(result.totalCost).toBeDefined();
expect(result.totalCost!).toBeGreaterThan(result.usage!.cost!);
expect(result.totalCost!).toBeCloseTo(result.usage!.cost! + translatorUsage!.usage.cost!, 6);
});
it('aggregates sub-agent usage via stream()', async () => {
const subAgent = new Agent('stream-translator')
.model(getModel('anthropic'))
.instructions('Translate the input to French. Reply with only the translation.');
const parentAgent = new Agent('stream-orchestrator')
.model(getModel('anthropic'))
.instructions(
'You are an orchestrator. When asked to translate, use the stream-translator tool. Be concise.',
)
.tool(subAgent.asTool('Translate text to French'));
const { stream: fullStream } = await parentAgent.stream('Translate "goodbye" to French');
const chunks = await collectStreamChunks(fullStream);
const finishChunks = chunksOfType(chunks, 'finish');
expect(finishChunks.length).toBeGreaterThan(0);
const finish = finishChunks[finishChunks.length - 1] as StreamChunk & { type: 'finish' };
// Should have usage with cost
expect(finish.usage).toBeDefined();
expect(finish.usage!.cost).toBeGreaterThan(0);
// Should include model
expect(finish.model).toBe(getModel('anthropic'));
// Should include sub-agent usage
expect(finish.subAgentUsage).toBeDefined();
expect(finish.subAgentUsage!.length).toBeGreaterThan(0);
const translatorUsage = finish.subAgentUsage!.find((s) => s.agent === 'stream-translator');
expect(translatorUsage).toBeDefined();
expect(translatorUsage!.usage.promptTokens).toBeGreaterThan(0);
expect(translatorUsage!.usage.cost).toBeGreaterThan(0);
// Total cost should include parent + sub-agent
expect(finish.totalCost).toBeDefined();
expect(finish.totalCost!).toBeGreaterThan(finish.usage!.cost!);
});
});
const describeOpenAI = describeIf('openai');
describeOpenAI('usage and cost (OpenAI)', () => {
it('returns token usage and cost on generate result', async () => {
const agent = new Agent('openai-usage-test')
.model(getModel('openai'))
.instructions('Be concise.');
const result = await agent.generate('Say hello');
expect(result.usage).toBeDefined();
expect(result.usage!.promptTokens).toBeGreaterThan(0);
expect(result.usage!.completionTokens).toBeGreaterThan(0);
expect(result.usage!.cost).toBeDefined();
expect(result.usage!.cost).toBeGreaterThan(0);
});
});

View file

@ -0,0 +1,240 @@
import { afterEach, beforeEach, expect, it } from 'vitest';
import { Agent } from '../../../sdk/agent';
import type { FileEntry } from '../../../workspace/types';
import { Workspace } from '../../../workspace/workspace';
import { InMemoryFilesystem, FakeProcessManager, FakeSandbox } from '../../workspace/test-utils';
import {
chunksOfType,
collectStreamChunks,
collectTextDeltas,
describeIf,
findAllToolCalls,
findAllToolResults,
getModel,
} from '../helpers';
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
const describe = describeIf('anthropic');
describe('workspace agent integration', () => {
let memFs: InMemoryFilesystem;
let fakeProcessManager: FakeProcessManager;
let fakeSandbox: FakeSandbox;
let workspace: Workspace;
beforeEach(async () => {
memFs = new InMemoryFilesystem('agent-test-fs');
fakeProcessManager = new FakeProcessManager();
fakeSandbox = new FakeSandbox('agent-test', fakeProcessManager);
workspace = new Workspace({
id: 'agent-ws',
filesystem: memFs,
sandbox: fakeSandbox,
});
await workspace.init();
});
afterEach(async () => {
await workspace.destroy();
});
it('agent uses workspace_write_file and workspace_read_file tools', async () => {
const agent = new Agent('workspace-file-test')
.model(getModel('anthropic'))
.instructions(
'You are a file manager. When asked to create a file, use workspace_write_file. ' +
'When asked to read a file, use workspace_read_file. Be concise.',
)
.workspace(workspace);
const result = await agent.generate(
'Write "Hello from n8n!" to /greeting.txt, then read it back and tell me the contents. You MUST call both tools',
);
expect(result.finishReason).toBe('stop');
expect(result.error).toBeUndefined();
const toolCalls = findAllToolCalls(result.messages);
const toolResults = findAllToolResults(result.messages);
const writeCall = toolCalls.find((tc) => tc.toolName === 'workspace_write_file');
expect(writeCall).toBeDefined();
const readCall = toolCalls.find((tc) => tc.toolName === 'workspace_read_file');
expect(readCall).toBeDefined();
const readResult = toolResults.find((tr) => tr.toolName === 'workspace_read_file');
expect(readResult).toBeDefined();
expect((readResult!.result as { content: string }).content).toContain('Hello from n8n!');
expect(memFs.getFileContent('/greeting.txt')).toBe('Hello from n8n!');
});
it('agent uses workspace_execute_command tool', async () => {
fakeProcessManager.commandHandler = (cmd) => {
if (cmd.includes('echo')) {
const match = cmd.match(/echo\s+"?([^"]*)"?/);
const text = match?.[1] ?? 'unknown';
return { stdout: `${text}\n`, stderr: '', exitCode: 0 };
}
return { stdout: `ran: ${cmd}\n`, stderr: '', exitCode: 0 };
};
const agent = new Agent('workspace-exec-test')
.model(getModel('anthropic'))
.instructions(
'You are a shell assistant. When asked to run a command, use workspace_execute_command. Be concise.',
)
.workspace(workspace);
const result = await agent.generate('Run the command: echo "n8n workspace test"');
expect(result.finishReason).toBe('stop');
expect(result.error).toBeUndefined();
const toolCalls = findAllToolCalls(result.messages);
const execCall = toolCalls.find((tc) => tc.toolName === 'workspace_execute_command');
expect(execCall).toBeDefined();
const toolResults = findAllToolResults(result.messages);
const execResult = toolResults.find((tr) => tr.toolName === 'workspace_execute_command');
expect(execResult).toBeDefined();
expect((execResult!.result as { success: boolean }).success).toBe(true);
});
it('agent uses workspace_mkdir and workspace_list_files together', async () => {
await memFs.mkdir('/project', { recursive: true });
await memFs.writeFile('/project/index.ts', 'console.log("hello")');
await memFs.writeFile('/project/README.md', '# Project');
const agent = new Agent('workspace-list-test')
.model(getModel('anthropic'))
.instructions(
'You are a file manager. Use workspace_list_files to list files. Be concise and list the filenames you find.',
)
.workspace(workspace);
const result = await agent.generate('List the files in the /project directory.');
expect(result.finishReason).toBe('stop');
expect(result.error).toBeUndefined();
const toolCalls = findAllToolCalls(result.messages);
const listCall = toolCalls.find((tc) => tc.toolName === 'workspace_list_files');
expect(listCall).toBeDefined();
const toolResults = findAllToolResults(result.messages);
const listResult = toolResults.find((tr) => tr.toolName === 'workspace_list_files');
expect(listResult).toBeDefined();
const entries = (listResult!.result as unknown as { entries: FileEntry[] }).entries;
const names = entries.map((e) => e.name);
expect(names).toContain('index.ts');
expect(names).toContain('README.md');
});
it('workspace instructions are appended to agent instructions', () => {
new Agent('workspace-instructions-test')
.model(getModel('anthropic'))
.instructions('Base instructions.')
.workspace(workspace);
const tools = workspace.getTools();
expect(tools.length).toBe(13);
const instructions = workspace.getInstructions();
expect(instructions).toContain('Fake sandbox');
expect(instructions).toContain('In-memory filesystem');
});
it('stream: agent writes a file and streams the response', async () => {
const agent = new Agent('workspace-stream-test')
.model(getModel('anthropic'))
.instructions(
'You are a file manager. When asked to create a file, use workspace_write_file. Be very concise.',
)
.workspace(workspace);
const { stream } = await agent.stream(
'Create a file at /hello.txt with the content "streaming works"',
);
const chunks = await collectStreamChunks(stream);
const errorChunks = chunks.filter((c) => c.type === 'error');
expect(errorChunks).toHaveLength(0);
const finishChunks = chunksOfType(chunks, 'finish');
expect(finishChunks.length).toBeGreaterThan(0);
const lastFinish = finishChunks[finishChunks.length - 1] as {
type: 'finish';
finishReason: string;
};
expect(lastFinish.finishReason).toBe('stop');
const text = collectTextDeltas(chunks);
expect(text.length).toBeGreaterThan(0);
expect(memFs.getFileContent('/hello.txt')).toBe('streaming works');
});
it('agent uses workspace_file_stat to get file metadata', async () => {
await memFs.writeFile('/data.json', '{"key": "value", "count": 42}');
const agent = new Agent('workspace-stat-test')
.model(getModel('anthropic'))
.instructions(
'You are a file manager. Use workspace_file_stat to get file info. Report the file size and type. Be concise.',
)
.workspace(workspace);
const result = await agent.generate('What is the size and type of /data.json?');
expect(result.finishReason).toBe('stop');
expect(result.error).toBeUndefined();
const toolCalls = findAllToolCalls(result.messages);
const statCall = toolCalls.find((tc) => tc.toolName === 'workspace_file_stat');
expect(statCall).toBeDefined();
const toolResults = findAllToolResults(result.messages);
const statResult = toolResults.find((tr) => tr.toolName === 'workspace_file_stat');
expect(statResult).toBeDefined();
const stat = statResult!.result as { type: string; size: number };
expect(stat.type).toBe('file');
expect(stat.size).toBe(29);
});
it('agent handles multi-step workflow: mkdir, write, list, read', async () => {
const agent = new Agent('workspace-workflow-test')
.model(getModel('anthropic'))
.instructions(
"You are a file manager. Follow the user's instructions step by step using workspace tools. " +
'Available: workspace_mkdir, workspace_write_file, workspace_list_files, workspace_read_file. Be concise.',
)
.workspace(workspace);
const result = await agent.generate(
'1. Create a directory /app\n' +
'2. Write "export default {}" to /app/config.ts\n' +
'3. List files in /app\n' +
'4. Read /app/config.ts and tell me its contents',
);
expect(result.finishReason).toBe('stop');
expect(result.error).toBeUndefined();
const toolResults = findAllToolResults(result.messages);
const resultToolNames = toolResults.map((tr) => tr.toolName);
expect(resultToolNames).toContain('workspace_write_file');
expect(resultToolNames).toContain('workspace_read_file');
const readResult = toolResults.find((tr) => tr.toolName === 'workspace_read_file');
expect(readResult).toBeDefined();
expect((readResult!.result as { content: string }).content).toContain('export default {}');
expect(memFs.getFileContent('/app/config.ts')).toBe('export default {}');
});
});

View file

@ -0,0 +1,59 @@
import { expect, it } from 'vitest';
import { z } from 'zod';
import { describeIf, findLastTextContent } from './helpers';
import { Agent, Tool, filterLlmMessages } from '../../index';
const describe = describeIf('anthropic');
describe('Zod validation errors surface to LLM and allow self-correction', () => {
/**
* Verify that when the LLM receives a Zod error result, it shows up in the
* conversation as an actual tool-result message with an error payload (not a
* thrown exception), so the agent loop continues rather than aborting.
*/
it('includes the Zod error text in the tool-result visible to the LLM', async () => {
const strictTool = new Tool('find_user')
.description('Find a user by their numeric age (1899 only).')
.input(
z.object({
age: z
.number()
.int()
.min(18, 'age must be at least 18')
.max(99, 'age must be at most 99')
.describe('User age (1899)'),
}),
)
.output(z.object({ user: z.string() }))
.handler(async ({ age }) => ({ user: `User aged ${age}` }));
const agent = new Agent('age-correction-agent')
.model('anthropic/claude-haiku-4-5')
.instructions(
'You are a user directory. Use find_user to look up users by age. ' +
'The age must be between 18 and 99. ' +
'If validation fails, correct the age and retry. Be very concise.',
)
.tool(strictTool);
// "150" is out of range — should trigger a Zod error, then retry with a valid age
const result = await agent.generate(
'Find a user aged 150. If that age is invalid, use 25 instead and retry. You MUST find a user aged 150, and only then use 25',
);
expect(result.finishReason).toBe('stop');
expect(result.error).toBeUndefined();
// At least two tool-result messages: one error, one success
const allMessages = filterLlmMessages(result.messages);
const toolResultMessages = allMessages.filter((m) =>
m.content.some((c) => c.type === 'tool-result'),
);
expect(toolResultMessages.length).toBeGreaterThanOrEqual(2);
// The final response should mention a user (age 25 or similar)
const text = findLastTextContent(result.messages);
expect(text).toBeTruthy();
});
});

View file

@ -0,0 +1,66 @@
import type { LanguageModel } from 'ai';
import { createModel } from '../runtime/model-factory';
jest.mock('@ai-sdk/anthropic', () => ({
createAnthropic: (opts?: { apiKey?: string; baseURL?: string }) => (model: string) => ({
provider: 'anthropic',
modelId: model,
apiKey: opts?.apiKey,
baseURL: opts?.baseURL,
specificationVersion: 'v3',
}),
}));
jest.mock('@ai-sdk/openai', () => ({
createOpenAI: (opts?: { apiKey?: string; baseURL?: string }) => (model: string) => ({
provider: 'openai',
modelId: model,
apiKey: opts?.apiKey,
baseURL: opts?.baseURL,
specificationVersion: 'v3',
}),
}));
describe('createModel', () => {
it('should accept a string config', () => {
const model = createModel('anthropic/claude-sonnet-4-5') as unknown as Record<string, unknown>;
expect(model.provider).toBe('anthropic');
expect(model.modelId).toBe('claude-sonnet-4-5');
});
it('should accept an object config with url', () => {
const model = createModel({
id: 'openai/gpt-4o',
apiKey: 'sk-test',
url: 'https://custom.endpoint.com/v1',
}) as unknown as Record<string, unknown>;
expect(model.provider).toBe('openai');
expect(model.modelId).toBe('gpt-4o');
expect(model.apiKey).toBe('sk-test');
expect(model.baseURL).toBe('https://custom.endpoint.com/v1');
});
it('should pass through a prebuilt LanguageModel', () => {
const prebuilt = {
doGenerate: jest.fn(),
doStream: jest.fn(),
specificationVersion: 'v2' as const,
modelId: 'custom-model',
provider: 'custom',
defaultObjectGenerationMode: undefined,
} as unknown as LanguageModel;
const result = createModel(prebuilt);
expect(result).toBe(prebuilt);
});
it('should handle model IDs with multiple slashes', () => {
const model = createModel('openai/ft:gpt-4o:my-org:custom:abc123') as unknown as Record<
string,
unknown
>;
expect(model.provider).toBe('openai');
expect(model.modelId).toBe('ft:gpt-4o:my-org:custom:abc123');
});
});

View file

@ -0,0 +1,553 @@
import * as fs from 'fs';
import * as os from 'os';
import * as path from 'path';
import { SqliteMemory } from '../storage/sqlite-memory';
import type { AgentMessage, Message } from '../types/sdk/message';
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
function makeTempDb(): string {
return path.join(os.tmpdir(), `test-${Date.now()}-${Math.random().toString(36).slice(2)}.db`);
}
function makeMsg(role: 'user' | 'assistant', text: string): Message {
return { role, content: [{ type: 'text', text }] };
}
function textOf(msg: AgentMessage): string {
const m = msg as Message;
return (m.content[0] as { text: string }).text;
}
function makeMemory(dbPath: string, namespace?: string): SqliteMemory {
return new SqliteMemory({ url: `file:${dbPath}`, namespace });
}
// ---------------------------------------------------------------------------
// Thread management
// ---------------------------------------------------------------------------
describe('SqliteMemory — threads', () => {
let dbPath: string;
beforeEach(() => {
dbPath = makeTempDb();
});
afterEach(() => {
try {
fs.unlinkSync(dbPath);
} catch {
/* ignore */
}
});
it('saves and retrieves a thread', async () => {
const mem = makeMemory(dbPath);
const saved = await mem.saveThread({
id: 't-1',
resourceId: 'user-1',
title: 'Hello',
metadata: { foo: 'bar' },
});
expect(saved.id).toBe('t-1');
expect(saved.resourceId).toBe('user-1');
expect(saved.title).toBe('Hello');
expect(saved.metadata).toEqual({ foo: 'bar' });
expect(saved.createdAt).toBeInstanceOf(Date);
expect(saved.updatedAt).toBeInstanceOf(Date);
const fetched = await mem.getThread('t-1');
expect(fetched).not.toBeNull();
expect(fetched!.id).toBe('t-1');
expect(fetched!.title).toBe('Hello');
expect(fetched!.metadata).toEqual({ foo: 'bar' });
});
it('returns null for an unknown thread', async () => {
const mem = makeMemory(dbPath);
const result = await mem.getThread('nonexistent');
expect(result).toBeNull();
});
it('deletes a thread and its messages', async () => {
const mem = makeMemory(dbPath);
await mem.saveThread({ id: 't-del', resourceId: 'user-1' });
await mem.saveMessages({ threadId: 't-del', messages: [makeMsg('user', 'hi')] });
await mem.deleteThread('t-del');
expect(await mem.getThread('t-del')).toBeNull();
expect(await mem.getMessages('t-del')).toEqual([]);
});
it('preserves createdAt on re-save, updates updatedAt', async () => {
const mem = makeMemory(dbPath);
const first = await mem.saveThread({ id: 't-resave', resourceId: 'user-1', title: 'v1' });
// Small delay to ensure updatedAt differs
await new Promise((r) => setTimeout(r, 20));
const second = await mem.saveThread({ id: 't-resave', resourceId: 'user-1', title: 'v2' });
expect(second.createdAt.getTime()).toBe(first.createdAt.getTime());
expect(second.updatedAt.getTime()).toBeGreaterThanOrEqual(first.updatedAt.getTime());
expect(second.title).toBe('v2');
});
});
// ---------------------------------------------------------------------------
// Message persistence
// ---------------------------------------------------------------------------
describe('SqliteMemory — messages', () => {
let dbPath: string;
beforeEach(() => {
dbPath = makeTempDb();
});
afterEach(() => {
try {
fs.unlinkSync(dbPath);
} catch {
/* ignore */
}
});
it('saves and retrieves messages in order', async () => {
const mem = makeMemory(dbPath);
await mem.saveMessages({
threadId: 't-1',
messages: [
makeMsg('user', 'first'),
makeMsg('assistant', 'second'),
makeMsg('user', 'third'),
],
});
const msgs = await mem.getMessages('t-1');
expect(msgs).toHaveLength(3);
expect(textOf(msgs[0])).toBe('first');
expect(textOf(msgs[1])).toBe('second');
expect(textOf(msgs[2])).toBe('third');
});
it('respects limit — returns last N messages', async () => {
const mem = makeMemory(dbPath);
// Save messages one at a time to guarantee distinct createdAt timestamps
await mem.saveMessages({ threadId: 't-1', messages: [makeMsg('user', 'msg-1')] });
await mem.saveMessages({ threadId: 't-1', messages: [makeMsg('assistant', 'msg-2')] });
await mem.saveMessages({ threadId: 't-1', messages: [makeMsg('user', 'msg-3')] });
await mem.saveMessages({ threadId: 't-1', messages: [makeMsg('assistant', 'msg-4')] });
const msgs = await mem.getMessages('t-1', { limit: 2 });
expect(msgs).toHaveLength(2);
expect(textOf(msgs[0])).toBe('msg-3');
expect(textOf(msgs[1])).toBe('msg-4');
});
it('isolates messages by thread', async () => {
const mem = makeMemory(dbPath);
await mem.saveMessages({ threadId: 't-a', messages: [makeMsg('user', 'thread-a')] });
await mem.saveMessages({ threadId: 't-b', messages: [makeMsg('user', 'thread-b')] });
const msgsA = await mem.getMessages('t-a');
expect(msgsA).toHaveLength(1);
expect(textOf(msgsA[0])).toBe('thread-a');
const msgsB = await mem.getMessages('t-b');
expect(msgsB).toHaveLength(1);
expect(textOf(msgsB[0])).toBe('thread-b');
});
it('assigns stable IDs — preserves existing, generates for missing', async () => {
const mem = makeMemory(dbPath);
const withId = { ...makeMsg('user', 'has-id'), id: 'custom-id-123' } as unknown as AgentMessage;
const withoutId = makeMsg('assistant', 'no-id');
await mem.saveMessages({ threadId: 't-1', messages: [withId, withoutId] });
const msgs = await mem.getMessages('t-1');
expect(msgs).toHaveLength(2);
// The message with a pre-existing id should keep it
const first = msgs[0] as unknown as { id: string };
expect(first.id).toBe('custom-id-123');
// The message without id should have gotten one assigned
const second = msgs[1] as unknown as { id: string };
expect(typeof second.id).toBe('string');
expect(second.id.length).toBeGreaterThan(0);
});
it('deletes specific messages', async () => {
const mem = makeMemory(dbPath);
const m1 = { ...makeMsg('user', 'keep'), id: 'keep-1' } as unknown as AgentMessage;
const m2 = { ...makeMsg('user', 'delete-me'), id: 'del-1' } as unknown as AgentMessage;
await mem.saveMessages({ threadId: 't-1', messages: [m1, m2] });
await mem.deleteMessages(['del-1']);
const msgs = await mem.getMessages('t-1');
expect(msgs).toHaveLength(1);
expect((msgs[0] as unknown as { id: string }).id).toBe('keep-1');
});
});
// ---------------------------------------------------------------------------
// Working memory
// ---------------------------------------------------------------------------
describe('SqliteMemory — working memory', () => {
let dbPath: string;
beforeEach(() => {
dbPath = makeTempDb();
});
afterEach(() => {
try {
fs.unlinkSync(dbPath);
} catch {
/* ignore */
}
});
it('returns null for an unknown key', async () => {
const mem = makeMemory(dbPath);
const result = await mem.getWorkingMemory({ threadId: 'thread-x', resourceId: 'unknown' });
expect(result).toBeNull();
});
it('saves and retrieves working memory keyed by resourceId', async () => {
const mem = makeMemory(dbPath);
await mem.saveWorkingMemory({ threadId: 'thread-1', resourceId: 'res-1' }, 'some context');
const result = await mem.getWorkingMemory({ threadId: 'thread-1', resourceId: 'res-1' });
expect(result).toBe('some context');
});
it('overwrites working memory on re-save', async () => {
const mem = makeMemory(dbPath);
await mem.saveWorkingMemory({ threadId: 'thread-1', resourceId: 'res-1' }, 'v1');
await mem.saveWorkingMemory({ threadId: 'thread-1', resourceId: 'res-1' }, 'v2');
const result = await mem.getWorkingMemory({ threadId: 'thread-1', resourceId: 'res-1' });
expect(result).toBe('v2');
});
it('isolates working memory by resourceId', async () => {
const mem = makeMemory(dbPath);
await mem.saveWorkingMemory({ threadId: 'thread-a', resourceId: 'res-a' }, 'content-a');
await mem.saveWorkingMemory({ threadId: 'thread-b', resourceId: 'res-b' }, 'content-b');
expect(await mem.getWorkingMemory({ threadId: 'thread-a', resourceId: 'res-a' })).toBe(
'content-a',
);
expect(await mem.getWorkingMemory({ threadId: 'thread-b', resourceId: 'res-b' })).toBe(
'content-b',
);
});
it('saves and retrieves working memory keyed by threadId (no resourceId)', async () => {
const mem = makeMemory(dbPath);
await mem.saveWorkingMemory({ threadId: 'thread-1' }, 'thread context');
const result = await mem.getWorkingMemory({ threadId: 'thread-1' });
expect(result).toBe('thread context');
});
it('isolates working memory by threadId', async () => {
const mem = makeMemory(dbPath);
await mem.saveWorkingMemory({ threadId: 'thread-1' }, 'data 1');
await mem.saveWorkingMemory({ threadId: 'thread-2' }, 'data 2');
expect(await mem.getWorkingMemory({ threadId: 'thread-1' })).toBe('data 1');
expect(await mem.getWorkingMemory({ threadId: 'thread-2' })).toBe('data 2');
});
it('stores scope=resource when resourceId is provided', async () => {
const mem = makeMemory(dbPath);
await mem.saveWorkingMemory({ threadId: 'thread-1', resourceId: 'res-1' }, 'resource content');
const { createClient } = await import('@libsql/client');
const db = createClient({ url: `file:${dbPath}` });
const result = await db.execute('SELECT scope FROM working_memory WHERE key = ?', ['res-1']);
expect(result.rows[0].scope).toBe('resource');
});
it('stores scope=thread when only threadId is provided', async () => {
const mem = makeMemory(dbPath);
await mem.saveWorkingMemory({ threadId: 'thread-1' }, 'thread content');
const { createClient } = await import('@libsql/client');
const db = createClient({ url: `file:${dbPath}` });
const result = await db.execute('SELECT scope FROM working_memory WHERE key = ?', ['thread-1']);
expect(result.rows[0].scope).toBe('thread');
});
it('does not mix resource-scoped and thread-scoped entries with the same key value', async () => {
const mem = makeMemory(dbPath);
const sharedKey = 'same-id';
await mem.saveWorkingMemory({ threadId: 'thread-1', resourceId: sharedKey }, 'resource data');
await mem.saveWorkingMemory({ threadId: sharedKey }, 'thread data');
expect(await mem.getWorkingMemory({ threadId: 'thread-1', resourceId: sharedKey })).toBe(
'resource data',
);
expect(await mem.getWorkingMemory({ threadId: sharedKey })).toBe('thread data');
});
});
// ---------------------------------------------------------------------------
// Restart survival
// ---------------------------------------------------------------------------
describe('SqliteMemory — restart survival', () => {
let dbPath: string;
beforeEach(() => {
dbPath = makeTempDb();
});
afterEach(() => {
try {
fs.unlinkSync(dbPath);
} catch {
/* ignore */
}
});
it('data survives a new SqliteMemory instance on same file', async () => {
const mem1 = makeMemory(dbPath);
await mem1.saveThread({ id: 't-surv', resourceId: 'user-1', title: 'persistent' });
await mem1.saveMessages({ threadId: 't-surv', messages: [makeMsg('user', 'hello from past')] });
await mem1.saveWorkingMemory({ threadId: 't-surv', resourceId: 'user-1' }, 'wm-data');
// Create a brand new instance pointing at the same file
const mem2 = makeMemory(dbPath);
const thread = await mem2.getThread('t-surv');
expect(thread).not.toBeNull();
expect(thread!.title).toBe('persistent');
const msgs = await mem2.getMessages('t-surv');
expect(msgs).toHaveLength(1);
expect(textOf(msgs[0])).toBe('hello from past');
const wm = await mem2.getWorkingMemory({ threadId: 't-surv', resourceId: 'user-1' });
expect(wm).toBe('wm-data');
});
});
// ---------------------------------------------------------------------------
// Search
// ---------------------------------------------------------------------------
describe('SqliteMemory — queryEmbeddings', () => {
let dbPath: string;
let mem: SqliteMemory;
beforeEach(() => {
dbPath = makeTempDb();
mem = makeMemory(dbPath);
});
afterEach(() => {
try {
fs.unlinkSync(dbPath);
} catch {
/* ignore */
}
});
it('returns empty array when no embeddings stored', async () => {
const results = await mem.queryEmbeddings({
threadId: 't1',
vector: new Array<number>(3).fill(0),
topK: 5,
});
expect(results).toEqual([]);
});
it('returns nearest neighbours by cosine similarity', async () => {
await mem.saveThread({ id: 't1', resourceId: 'u1' });
await mem.saveEmbeddings({
threadId: 't1',
resourceId: 'u1',
entries: [
{ id: 'msg-cats', vector: [1.0, 0.0, 0.0], text: 'About cats', model: 'test' },
{ id: 'msg-dogs', vector: [0.0, 1.0, 0.0], text: 'About dogs', model: 'test' },
{ id: 'msg-kittens', vector: [0.9, 0.1, 0.0], text: 'About kittens', model: 'test' },
],
});
// Query close to [1,0,0] — should return cats first, then kittens
const results = await mem.queryEmbeddings({
scope: 'resource',
resourceId: 'u1',
vector: [1.0, 0.0, 0.0],
topK: 2,
});
expect(results).toHaveLength(2);
expect(results[0].id).toBe('msg-cats');
expect(results[0].score).toBeGreaterThan(0.9);
expect(results[1].id).toBe('msg-kittens');
});
it('filters by threadId with scope=thread', async () => {
await mem.saveThread({ id: 't1', resourceId: 'u1' });
await mem.saveThread({ id: 't2', resourceId: 'u1' });
await mem.saveEmbeddings({
threadId: 't1',
entries: [{ id: 'msg-t1', vector: [1.0, 0.0, 0.0], text: 'Thread 1', model: 'test' }],
});
await mem.saveEmbeddings({
threadId: 't2',
entries: [{ id: 'msg-t2', vector: [1.0, 0.0, 0.0], text: 'Thread 2', model: 'test' }],
});
const results = await mem.queryEmbeddings({
scope: 'thread',
threadId: 't1',
vector: [1.0, 0.0, 0.0],
topK: 10,
});
expect(results).toHaveLength(1);
expect(results[0].id).toBe('msg-t1');
});
it('filters by resourceId with scope=resource', async () => {
await mem.saveThread({ id: 't1', resourceId: 'user-a' });
await mem.saveThread({ id: 't2', resourceId: 'user-a' });
await mem.saveThread({ id: 't3', resourceId: 'user-b' });
await mem.saveEmbeddings({
threadId: 't1',
resourceId: 'user-a',
entries: [{ id: 'msg-1', vector: [1.0, 0.0, 0.0], text: 'User A thread 1', model: 'test' }],
});
await mem.saveEmbeddings({
threadId: 't2',
resourceId: 'user-a',
entries: [{ id: 'msg-2', vector: [0.9, 0.1, 0.0], text: 'User A thread 2', model: 'test' }],
});
await mem.saveEmbeddings({
threadId: 't3',
resourceId: 'user-b',
entries: [{ id: 'msg-3', vector: [1.0, 0.0, 0.0], text: 'User B thread 3', model: 'test' }],
});
const results = await mem.queryEmbeddings({
scope: 'resource',
resourceId: 'user-a',
vector: [1.0, 0.0, 0.0],
topK: 10,
});
expect(results).toHaveLength(2);
const ids = results.map((r) => r.id);
expect(ids).toContain('msg-1');
expect(ids).toContain('msg-2');
expect(ids).not.toContain('msg-3');
});
it('defaults to resource scope — returns all embeddings for a resourceId across threads', async () => {
await mem.saveThread({ id: 't1', resourceId: 'user-x' });
await mem.saveThread({ id: 't2', resourceId: 'user-x' });
await mem.saveEmbeddings({
threadId: 't1',
resourceId: 'user-x',
entries: [{ id: 'msg-a', vector: [1.0, 0.0, 0.0], text: 'Thread 1 msg', model: 'test' }],
});
await mem.saveEmbeddings({
threadId: 't2',
resourceId: 'user-x',
entries: [{ id: 'msg-b', vector: [0.9, 0.1, 0.0], text: 'Thread 2 msg', model: 'test' }],
});
// No explicit scope — should default to 'resource'
const results = await mem.queryEmbeddings({
resourceId: 'user-x',
vector: [1.0, 0.0, 0.0],
topK: 10,
});
expect(results).toHaveLength(2);
});
it('resource scope excludes embeddings from other resources', async () => {
await mem.saveEmbeddings({
threadId: 't1',
resourceId: 'res-1',
entries: [{ id: 'msg-r1', vector: [1.0, 0.0, 0.0], text: 'Resource 1', model: 'test' }],
});
await mem.saveEmbeddings({
threadId: 't2',
resourceId: 'res-2',
entries: [{ id: 'msg-r2', vector: [1.0, 0.0, 0.0], text: 'Resource 2', model: 'test' }],
});
const results = await mem.queryEmbeddings({
scope: 'resource',
resourceId: 'res-1',
vector: [1.0, 0.0, 0.0],
topK: 10,
});
expect(results).toHaveLength(1);
expect(results[0].id).toBe('msg-r1');
});
it('thread scope only returns embeddings from the specified thread', async () => {
await mem.saveEmbeddings({
threadId: 't1',
resourceId: 'user-1',
entries: [
{ id: 'msg-t1a', vector: [1.0, 0.0, 0.0], text: 'Thread 1 A', model: 'test' },
{ id: 'msg-t1b', vector: [0.0, 1.0, 0.0], text: 'Thread 1 B', model: 'test' },
],
});
await mem.saveEmbeddings({
threadId: 't2',
resourceId: 'user-1',
entries: [{ id: 'msg-t2', vector: [1.0, 0.0, 0.0], text: 'Thread 2', model: 'test' }],
});
const results = await mem.queryEmbeddings({
scope: 'thread',
threadId: 't1',
vector: [1.0, 0.0, 0.0],
topK: 10,
});
expect(results).toHaveLength(2);
const ids = results.map((r) => r.id);
expect(ids).toContain('msg-t1a');
expect(ids).toContain('msg-t1b');
expect(ids).not.toContain('msg-t2');
});
});
// ---------------------------------------------------------------------------
// Namespace validation
// ---------------------------------------------------------------------------
describe('SqliteMemory — namespace', () => {
it('rejects invalid namespace characters', () => {
expect(() => new SqliteMemory({ url: 'file::memory:', namespace: 'bad-ns!' })).toThrow(
/Invalid namespace/,
);
});
it('accepts valid namespace', () => {
expect(() => new SqliteMemory({ url: 'file::memory:', namespace: 'my_ns_01' })).not.toThrow();
});
});

View file

@ -0,0 +1,157 @@
import { stripOrphanedToolMessages } from '../runtime/strip-orphaned-tool-messages';
import { isLlmMessage, toDbMessage } from '../sdk/message';
import type { AgentDbMessage, AgentMessage, Message } from '../types/sdk/message';
function seed(messages: AgentMessage[]): AgentDbMessage[] {
return messages.map(toDbMessage);
}
describe('stripOrphanedToolMessages', () => {
it('returns messages unchanged when all tool pairs are complete', () => {
const messages = seed([
{ role: 'user', content: [{ type: 'text', text: 'Hello' }] },
{
role: 'assistant',
content: [
{ type: 'text', text: 'Looking up...' },
{ type: 'tool-call', toolCallId: 'c1', toolName: 'lookup', input: {} },
],
},
{
role: 'tool',
content: [{ type: 'tool-result', toolCallId: 'c1', toolName: 'lookup', result: 42 }],
},
{ role: 'assistant', content: [{ type: 'text', text: 'Done.' }] },
]);
const result = stripOrphanedToolMessages(messages);
expect(result).toBe(messages);
});
it('strips orphaned tool-result when matching tool-call is missing', () => {
const messages = seed([
{
role: 'tool',
content: [{ type: 'tool-result', toolCallId: 'c1', toolName: 'lookup', result: 42 }],
},
{ role: 'assistant', content: [{ type: 'text', text: 'There are 42.' }] },
{ role: 'user', content: [{ type: 'text', text: 'Thanks' }] },
]);
const result = stripOrphanedToolMessages(messages).filter(isLlmMessage) as Message[];
expect(result).toHaveLength(2);
expect(result[0].role).toBe('assistant');
expect(result[1].role).toBe('user');
});
it('strips orphaned tool-call when matching tool-result is missing', () => {
const messages = seed([
{ role: 'user', content: [{ type: 'text', text: 'Check it' }] },
{
role: 'assistant',
content: [
{ type: 'text', text: 'Checking...' },
{ type: 'tool-call', toolCallId: 'c1', toolName: 'lookup', input: {} },
],
},
]);
const result = stripOrphanedToolMessages(messages).filter(isLlmMessage) as Message[];
expect(result).toHaveLength(2);
const assistantMsg = result[1];
expect(assistantMsg.role).toBe('assistant');
expect(assistantMsg.content).toHaveLength(1);
expect(assistantMsg.content[0].type).toBe('text');
});
it('drops assistant message entirely if it only contained an orphaned tool-call', () => {
const messages = seed([
{ role: 'user', content: [{ type: 'text', text: 'Do it' }] },
{
role: 'assistant',
content: [{ type: 'tool-call', toolCallId: 'c1', toolName: 'action', input: {} }],
},
]);
const result = stripOrphanedToolMessages(messages).filter(isLlmMessage) as Message[];
expect(result).toHaveLength(1);
expect(result[0].role).toBe('user');
});
it('handles mixed scenario: one complete pair and one orphaned result', () => {
const messages = seed([
{
role: 'tool',
content: [
{ type: 'tool-result', toolCallId: 'orphan', toolName: 'lookup', result: 'stale' },
],
},
{ role: 'assistant', content: [{ type: 'text', text: 'Old result' }] },
{ role: 'user', content: [{ type: 'text', text: 'New question' }] },
{
role: 'assistant',
content: [
{ type: 'text', text: 'Looking up...' },
{ type: 'tool-call', toolCallId: 'c2', toolName: 'lookup', input: {} },
],
},
{
role: 'tool',
content: [{ type: 'tool-result', toolCallId: 'c2', toolName: 'lookup', result: 99 }],
},
{ role: 'assistant', content: [{ type: 'text', text: '99 items' }] },
]);
const result = stripOrphanedToolMessages(messages).filter(isLlmMessage) as Message[];
expect(result).toHaveLength(5);
expect(result[0].role).toBe('assistant');
expect(result[0].content[0]).toEqual(
expect.objectContaining({ type: 'text', text: 'Old result' }),
);
const toolCallMsg = result.find(
(m) => m.role === 'assistant' && m.content.some((c) => c.type === 'tool-call'),
);
expect(toolCallMsg).toBeDefined();
const toolResultMsg = result.find((m) => m.role === 'tool');
expect(toolResultMsg).toBeDefined();
});
it('preserves custom (non-LLM) messages', () => {
const customMsg: AgentDbMessage = {
id: 'custom-1',
type: 'custom',
messageType: 'notification',
data: { info: 'hello' },
} as unknown as AgentDbMessage;
const messages: AgentDbMessage[] = [
customMsg,
...seed([
{
role: 'tool',
content: [{ type: 'tool-result', toolCallId: 'orphan', toolName: 'x', result: null }],
},
]),
];
const result = stripOrphanedToolMessages(messages);
expect(result).toHaveLength(1);
expect(result[0]).toBe(customMsg);
});
it('returns same array reference when no orphans exist (no-op fast path)', () => {
const messages = seed([
{ role: 'user', content: [{ type: 'text', text: 'Hi' }] },
{ role: 'assistant', content: [{ type: 'text', text: 'Hello!' }] },
]);
const result = stripOrphanedToolMessages(messages);
expect(result).toBe(messages);
});
});

View file

@ -0,0 +1,170 @@
import type { TelemetryIntegration } from 'ai';
import { Telemetry } from '../sdk/telemetry';
describe('Telemetry builder', () => {
it('builds with defaults', async () => {
const built = await new Telemetry().build();
expect(built.enabled).toBe(true);
expect(built.recordInputs).toBe(true);
expect(built.recordOutputs).toBe(true);
expect(built.functionId).toBeUndefined();
expect(built.metadata).toBeUndefined();
expect(built.integrations).toEqual([]);
expect(built.tracer).toBeUndefined();
expect(built.provider).toBeUndefined();
});
it('sets all scalar fields', async () => {
const built = await new Telemetry()
.enabled(false)
.functionId('my-agent')
.metadata({ team: 'platform', version: 2 })
.recordInputs(false)
.recordOutputs(false)
.build();
expect(built.enabled).toBe(false);
expect(built.functionId).toBe('my-agent');
expect(built.metadata).toEqual({ team: 'platform', version: 2 });
expect(built.recordInputs).toBe(false);
expect(built.recordOutputs).toBe(false);
});
it('accepts a pre-built tracer', async () => {
const fakeTracer = { startSpan: jest.fn() };
const built = await new Telemetry().tracer(fakeTracer).build();
expect(built.tracer).toBe(fakeTracer);
});
it('throws when both .tracer() and .otlpEndpoint() are set', async () => {
await expect(
new Telemetry()
.tracer({ startSpan: jest.fn() })
.otlpEndpoint('http://localhost:4318')
.build(),
).rejects.toThrow('Cannot set both .tracer() and .otlpEndpoint()');
});
it('collects multiple integrations', async () => {
const int1: TelemetryIntegration = { onStart: jest.fn() };
const int2: TelemetryIntegration = { onFinish: jest.fn() };
const built = await new Telemetry().integration(int1).integration(int2).build();
expect(built.integrations).toHaveLength(2);
});
});
describe('Telemetry — redaction wrapping', () => {
it('wraps integrations with redaction when .redact() is set', async () => {
const receivedEvents: unknown[] = [];
const integration: TelemetryIntegration = {
onStart: (event) => {
receivedEvents.push(event);
},
onFinish: (event) => {
receivedEvents.push(event);
},
};
const built = await new Telemetry()
.redact((data) => {
const filtered = { ...data };
delete filtered.secret;
return filtered;
})
.integration(integration)
.build();
// Call the wrapped onStart hook
const startEvent = { model: { modelId: 'test' }, messages: { secret: 'hidden', safe: 'ok' } };
built.integrations[0].onStart!(startEvent as never);
// The secret should be redacted from nested objects
const received = receivedEvents[0] as Record<string, unknown>;
const messages = received.messages as Record<string, unknown>;
expect(messages.secret).toBeUndefined();
expect(messages.safe).toBe('ok');
});
it('does not wrap integrations when .redact() is not set', async () => {
const integration: TelemetryIntegration = { onStart: jest.fn() };
const built = await new Telemetry().integration(integration).build();
// The integration should be a copy (not the same reference due to spread) but functionally identical
expect(built.integrations[0].onStart).toBe(integration.onStart);
});
it('redacts top-level scalar fields via the redact callback', async () => {
const receivedEvents: unknown[] = [];
const integration: TelemetryIntegration = {
onStart: (event) => {
receivedEvents.push(event);
},
};
const built = await new Telemetry()
.redact((data) => {
const filtered = { ...data };
delete filtered.secret;
return filtered;
})
.integration(integration)
.build();
const startEvent = { secret: 'top-level-secret', safe: 'ok', nested: { a: 1 } };
built.integrations[0].onStart!(startEvent as never);
const received = receivedEvents[0] as Record<string, unknown>;
expect(received.secret).toBeUndefined();
expect(received.safe).toBe('ok');
});
it('redacts objects inside arrays', async () => {
const receivedEvents: unknown[] = [];
const integration: TelemetryIntegration = {
onStart: (event) => {
receivedEvents.push(event);
},
};
const built = await new Telemetry()
.redact((data) => {
const filtered = { ...data };
delete filtered.secret;
return filtered;
})
.integration(integration)
.build();
const startEvent = {
items: [
{ secret: 'hidden', safe: 'ok' },
{ secret: 'also-hidden', value: 42 },
],
};
built.integrations[0].onStart!(startEvent as never);
const received = receivedEvents[0] as Record<string, unknown>;
const items = received.items as Array<Record<string, unknown>>;
expect(items[0].secret).toBeUndefined();
expect(items[0].safe).toBe('ok');
expect(items[1].secret).toBeUndefined();
expect(items[1].value).toBe(42);
});
});
describe('Telemetry.shutdown()', () => {
it('calls provider.shutdown() when provider exists', async () => {
const shutdownMock = jest.fn().mockResolvedValue(undefined);
const built = await new Telemetry().build();
// Manually inject a mock provider
const withProvider = {
...built,
provider: { forceFlush: jest.fn(), shutdown: shutdownMock },
};
await Telemetry.shutdown(withProvider);
expect(shutdownMock).toHaveBeenCalled();
});
it('does nothing when no provider exists', async () => {
const built = await new Telemetry().build();
// Should not throw
await Telemetry.shutdown(built);
});
});

View file

@ -0,0 +1,191 @@
import type { JSONSchema7 } from 'json-schema';
import { z } from 'zod';
import { toAiSdkTools } from '../runtime/tool-adapter';
import type { BuiltTool } from '../types';
// ---------------------------------------------------------------------------
// Module mocks
// ---------------------------------------------------------------------------
// eslint-disable-next-line @typescript-eslint/consistent-type-imports
type AiImport = typeof import('ai');
const jsonSchemaMock = jest.fn((schema: JSONSchema7) => ({ __jsonSchema: schema }));
jest.mock('ai', () => {
const actual = jest.requireActual<AiImport>('ai');
return {
...actual,
tool: jest.fn((config: unknown) => config),
jsonSchema: (schema: JSONSchema7) => jsonSchemaMock(schema),
};
});
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
function makeJsonSchemaTool(
inputSchema: JSONSchema7,
overrides: Partial<BuiltTool> = {},
): BuiltTool {
return {
name: 'testTool',
description: 'A test tool',
inputSchema,
...overrides,
};
}
function makeZodSchemaTool(overrides: Partial<BuiltTool> = {}): BuiltTool {
return {
name: 'zodTool',
description: 'A zod schema tool',
inputSchema: z.object({ id: z.string() }),
...overrides,
};
}
// ---------------------------------------------------------------------------
// toAiSdkTools — empty / missing input
// ---------------------------------------------------------------------------
describe('toAiSdkTools — empty / missing input', () => {
it('returns an empty object when tools is undefined', () => {
expect(toAiSdkTools(undefined)).toEqual({});
});
it('returns an empty object when tools is an empty array', () => {
expect(toAiSdkTools([])).toEqual({});
});
it('skips tools that have no inputSchema', () => {
const tool: BuiltTool = { name: 'noSchema', description: 'no schema' };
const result = toAiSdkTools([tool]);
expect(result).toEqual({});
});
});
// ---------------------------------------------------------------------------
// toAiSdkTools — Zod schemas
// ---------------------------------------------------------------------------
describe('toAiSdkTools — Zod schemas', () => {
beforeEach(() => {
jsonSchemaMock.mockClear();
});
it('registers a tool keyed by its name', () => {
const result = toAiSdkTools([makeZodSchemaTool()]);
expect(result).toHaveProperty('zodTool');
});
it('does NOT call jsonSchema() for Zod schema tools', () => {
toAiSdkTools([makeZodSchemaTool()]);
expect(jsonSchemaMock).not.toHaveBeenCalled();
});
it('passes the Zod schema directly as inputSchema', () => {
const zodSchema = z.object({ query: z.string() });
const result = toAiSdkTools([
{ name: 'search', description: 'Search', inputSchema: zodSchema },
]);
expect((result['search'] as { inputSchema: unknown }).inputSchema).toBe(zodSchema);
});
});
// ---------------------------------------------------------------------------
// toAiSdkTools — JSON Schema (fixSchema behaviour)
// ---------------------------------------------------------------------------
describe('toAiSdkTools — JSON Schema / fixSchema', () => {
beforeEach(() => {
jsonSchemaMock.mockClear();
});
it('registers a tool keyed by its name', () => {
const result = toAiSdkTools([makeJsonSchemaTool({ properties: { id: { type: 'string' } } })]);
expect(result).toHaveProperty('testTool');
});
it('calls jsonSchema() for JSON Schema tools', () => {
toAiSdkTools([makeJsonSchemaTool({ type: 'object', properties: { id: { type: 'string' } } })]);
expect(jsonSchemaMock).toHaveBeenCalledTimes(1);
});
it('fixSchema: adds type "object" when properties is present but type is absent', () => {
const rawSchema: JSONSchema7 = {
properties: { name: { type: 'string' } },
};
toAiSdkTools([makeJsonSchemaTool(rawSchema)]);
expect(jsonSchemaMock).toHaveBeenCalledWith(
expect.objectContaining({ type: 'object', properties: { name: { type: 'string' } } }),
);
});
it('fixSchema: preserves existing type when type is already set alongside properties', () => {
const rawSchema: JSONSchema7 = {
type: 'object',
properties: { count: { type: 'number' } },
};
toAiSdkTools([makeJsonSchemaTool(rawSchema)]);
expect(jsonSchemaMock).toHaveBeenCalledWith(
expect.objectContaining({ type: 'object', properties: { count: { type: 'number' } } }),
);
// Confirm type was not altered from original
const received = jsonSchemaMock.mock.calls[0][0];
expect(received.type).toBe('object');
});
it('fixSchema: does not add type when properties is absent', () => {
const rawSchema: JSONSchema7 = { description: 'no properties' };
toAiSdkTools([makeJsonSchemaTool(rawSchema)]);
const received = jsonSchemaMock.mock.calls[0][0];
expect(received).not.toHaveProperty('type');
});
it('fixSchema: does not mutate the original schema object', () => {
const rawSchema: JSONSchema7 = { properties: { x: { type: 'string' } } };
toAiSdkTools([makeJsonSchemaTool(rawSchema)]);
expect(rawSchema).not.toHaveProperty('type');
});
it('handles multiple JSON Schema tools independently', () => {
const schemaWithProps: JSONSchema7 = { properties: { a: { type: 'string' } } };
const schemaWithType: JSONSchema7 = { type: 'object', properties: { b: { type: 'number' } } };
const result = toAiSdkTools([
makeJsonSchemaTool(schemaWithProps, { name: 'toolA' }),
makeJsonSchemaTool(schemaWithType, { name: 'toolB' }),
]);
expect(result).toHaveProperty('toolA');
expect(result).toHaveProperty('toolB');
expect(jsonSchemaMock).toHaveBeenCalledTimes(2);
const firstCall = jsonSchemaMock.mock.calls[0][0];
const secondCall = jsonSchemaMock.mock.calls[1][0];
expect(firstCall.type).toBe('object');
expect(secondCall.type).toBe('object');
});
});
// ---------------------------------------------------------------------------
// toAiSdkTools — description forwarding
// ---------------------------------------------------------------------------
describe('toAiSdkTools — description forwarding', () => {
it('forwards the tool description to the AI SDK tool config', () => {
const zodSchema = z.object({ q: z.string() });
const result = toAiSdkTools([
{ name: 'myTool', description: 'Does something useful', inputSchema: zodSchema },
]);
expect((result['myTool'] as { description: string }).description).toBe('Does something useful');
});
});

View file

@ -0,0 +1,289 @@
import { z } from 'zod';
import { Tool, wrapToolForApproval } from '../sdk/tool';
import type { BuiltTelemetry, BuiltTool, InterruptibleToolContext, ToolContext } from '../types';
// ---------------------------------------------------------------------------
// Test helpers
// ---------------------------------------------------------------------------
function makeBuiltTool(overrides: Partial<BuiltTool> = {}): BuiltTool {
return {
name: 'testTool',
description: 'A test tool',
inputSchema: z.object({ id: z.string() }),
handler: async (input) => {
return await Promise.resolve({ result: (input as { id: string }).id });
},
...overrides,
};
}
function makeCtx(resumeData?: unknown): { ctx: InterruptibleToolContext; suspendMock: jest.Mock } {
const suspendMock = jest.fn().mockImplementation(async (payload: unknown) => {
return await Promise.resolve({ __suspended: true, payload });
});
const ctx: InterruptibleToolContext = {
suspend: suspendMock as unknown as InterruptibleToolContext['suspend'],
resumeData,
};
return { ctx, suspendMock };
}
// ---------------------------------------------------------------------------
// Tool builder — .requireApproval()
// ---------------------------------------------------------------------------
describe('Tool builder — .requireApproval()', () => {
it('build() returns a tool with suspendSchema and resumeSchema when .requireApproval() is set', () => {
const tool = new Tool('delete')
.description('Delete a record')
.input(z.object({ id: z.string() }))
.requireApproval()
.handler(async ({ id }) => {
return await Promise.resolve({ deleted: id });
})
.build();
expect(tool.suspendSchema).toBeDefined();
expect(tool.resumeSchema).toBeDefined();
});
it('build() throws when .requireApproval() is combined with .suspend()/.resume()', () => {
expect(() => {
new Tool('delete')
.description('Delete a record')
.input(z.object({ id: z.string() }))
.requireApproval()
.suspend(z.object({ msg: z.string() }))
.resume(z.object({ ok: z.boolean() }))
.handler(async (_input, _ctx) => {
return await Promise.resolve({});
})
.build();
}).toThrow('cannot use both approval');
});
});
// ---------------------------------------------------------------------------
// Tool builder — .needsApprovalFn()
// ---------------------------------------------------------------------------
describe('Tool builder — .needsApprovalFn()', () => {
it('build() returns a tool with suspendSchema and resumeSchema when .needsApprovalFn() is set', () => {
const tool = new Tool('query')
.description('Run a query')
.input(z.object({ id: z.string() }))
.needsApprovalFn(async (args) => {
return await Promise.resolve((args as { id: string }).id === 'secret');
})
.handler(async ({ id }) => {
return await Promise.resolve({ result: id });
})
.build();
expect(tool.suspendSchema).toBeDefined();
expect(tool.resumeSchema).toBeDefined();
});
it('build() throws when .needsApprovalFn() is combined with .suspend()/.resume()', () => {
expect(() => {
new Tool('query')
.description('Run a query')
.input(z.object({ id: z.string() }))
.needsApprovalFn(async () => {
return await Promise.resolve(true);
})
.suspend(z.object({ msg: z.string() }))
.resume(z.object({ ok: z.boolean() }))
.handler(async (_input, _ctx) => {
return await Promise.resolve({});
})
.build();
}).toThrow('cannot use both approval');
});
});
// ---------------------------------------------------------------------------
// Tool builder — without approval
// ---------------------------------------------------------------------------
describe('Tool builder — without approval', () => {
it('build() returns a normal tool (no suspendSchema) when neither .requireApproval() nor .needsApprovalFn() is set', () => {
const tool = new Tool('fetch')
.description('Fetch data')
.input(z.object({ id: z.string() }))
.handler(async ({ id }) => {
return await Promise.resolve({ data: id });
})
.build();
expect(tool.suspendSchema).toBeUndefined();
expect(tool.resumeSchema).toBeUndefined();
});
});
// ---------------------------------------------------------------------------
// wrapToolForApproval — requireApproval: true
// ---------------------------------------------------------------------------
describe('wrapToolForApproval — requireApproval: true', () => {
it('suspends on first call when requireApproval is true', async () => {
const baseTool = makeBuiltTool();
const wrapped = wrapToolForApproval(baseTool, { requireApproval: true });
const { ctx, suspendMock } = makeCtx(); // resumeData = undefined → first call
await wrapped.handler!({ id: '1' }, ctx);
expect(suspendMock).toHaveBeenCalledWith({
type: 'approval',
toolName: 'testTool',
args: { id: '1' },
});
});
it('executes original handler when approved on resume', async () => {
const baseTool = makeBuiltTool();
const wrapped = wrapToolForApproval(baseTool, { requireApproval: true });
const { ctx } = makeCtx({ approved: true });
const result = await wrapped.handler!({ id: 'abc' }, ctx);
expect(result).toEqual({ result: 'abc' });
});
it('returns declined message when not approved on resume', async () => {
const baseTool = makeBuiltTool();
const wrapped = wrapToolForApproval(baseTool, { requireApproval: true });
const { ctx } = makeCtx({ approved: false });
const result = await wrapped.handler!({ id: 'abc' }, ctx);
expect(result).toEqual({ declined: true, message: 'Tool "testTool" was not approved' });
});
});
// ---------------------------------------------------------------------------
// wrapToolForApproval — needsApprovalFn
// ---------------------------------------------------------------------------
describe('wrapToolForApproval — needsApprovalFn', () => {
it('does not suspend when needsApprovalFn returns false', async () => {
const baseTool = makeBuiltTool();
const wrapped = wrapToolForApproval(baseTool, {
needsApprovalFn: async () => {
return await Promise.resolve(false);
},
});
const { ctx, suspendMock } = makeCtx(); // resumeData = undefined
const result = await wrapped.handler!({ id: 'safe' }, ctx);
expect(suspendMock).not.toHaveBeenCalled();
expect(result).toEqual({ result: 'safe' });
});
it('suspends when needsApprovalFn returns true', async () => {
const baseTool = makeBuiltTool();
const wrapped = wrapToolForApproval(baseTool, {
needsApprovalFn: async (args) => {
return await Promise.resolve((args as { id: string }).id === 'secret');
},
});
const { ctx, suspendMock } = makeCtx(); // resumeData = undefined
await wrapped.handler!({ id: 'secret' }, ctx);
expect(suspendMock).toHaveBeenCalledWith({
type: 'approval',
toolName: 'testTool',
args: { id: 'secret' },
});
});
it('does not suspend when needsApprovalFn returns false for non-matching args', async () => {
const baseTool = makeBuiltTool();
const wrapped = wrapToolForApproval(baseTool, {
needsApprovalFn: async (args) => {
return await Promise.resolve((args as { id: string }).id === 'secret');
},
});
const { ctx, suspendMock } = makeCtx();
const result = await wrapped.handler!({ id: 'public' }, ctx);
expect(suspendMock).not.toHaveBeenCalled();
expect(result).toEqual({ result: 'public' });
});
});
// ---------------------------------------------------------------------------
// wrapToolForApproval — config: { requireApproval: true } (agent-level wrapping)
// ---------------------------------------------------------------------------
describe('wrapToolForApproval — config: { requireApproval: true } (agent-level wrapping)', () => {
it('always suspends regardless of original tool settings', async () => {
const baseTool = makeBuiltTool();
const wrapped = wrapToolForApproval(baseTool, { requireApproval: true });
const { ctx, suspendMock } = makeCtx(); // resumeData = undefined
await wrapped.handler!({ id: 'any-id' }, ctx);
expect(suspendMock).toHaveBeenCalledWith({
type: 'approval',
toolName: 'testTool',
args: { id: 'any-id' },
});
});
});
// ---------------------------------------------------------------------------
// wrapToolForApproval — telemetry propagation
// ---------------------------------------------------------------------------
describe('wrapToolForApproval — telemetry propagation', () => {
const fakeTelemetry: BuiltTelemetry = {
enabled: true,
functionId: 'parent-agent',
recordInputs: true,
recordOutputs: true,
integrations: [],
tracer: { startSpan: jest.fn() },
};
it('forwards parentTelemetry to the original handler when approval is not needed', async () => {
let capturedCtx: ToolContext | undefined;
const baseTool = makeBuiltTool({
handler: async (_input, ctx) => {
capturedCtx = ctx as ToolContext;
return await Promise.resolve({ result: 'ok' });
},
});
const wrapped = wrapToolForApproval(baseTool, { requireApproval: false });
const { ctx } = makeCtx(); // no resumeData
ctx.parentTelemetry = fakeTelemetry;
await wrapped.handler!({ id: 'test' }, ctx);
expect(capturedCtx).toBeDefined();
expect(capturedCtx!.parentTelemetry).toBe(fakeTelemetry);
});
it('forwards parentTelemetry to the original handler after approval', async () => {
let capturedCtx: ToolContext | undefined;
const baseTool = makeBuiltTool({
handler: async (_input, ctx) => {
capturedCtx = ctx as ToolContext;
return await Promise.resolve({ result: 'ok' });
},
});
const wrapped = wrapToolForApproval(baseTool, { requireApproval: true });
const { ctx } = makeCtx({ approved: true }); // resumeData = approved
ctx.parentTelemetry = fakeTelemetry;
await wrapped.handler!({ id: 'test' }, ctx);
expect(capturedCtx).toBeDefined();
expect(capturedCtx!.parentTelemetry).toBe(fakeTelemetry);
});
});

View file

@ -0,0 +1,197 @@
import { z } from 'zod';
import {
parseWorkingMemory,
buildWorkingMemoryInstruction,
templateFromSchema,
WorkingMemoryStreamFilter,
} from '../runtime/working-memory';
import type { StreamChunk } from '../types';
describe('parseWorkingMemory', () => {
it('extracts content between tags at end of text', () => {
const text = 'Hello world.\n<working_memory>\n# Name: Alice\n</working_memory>';
const result = parseWorkingMemory(text);
expect(result.cleanText).toBe('Hello world.');
expect(result.workingMemory).toBe('# Name: Alice');
});
it('extracts content between tags in middle of text', () => {
const text = 'Before.\n<working_memory>\ndata\n</working_memory>\nAfter.';
const result = parseWorkingMemory(text);
expect(result.cleanText).toBe('Before.\nAfter.');
expect(result.workingMemory).toBe('data');
});
it('returns null when no tags present', () => {
const text = 'Just a normal response.';
const result = parseWorkingMemory(text);
expect(result.cleanText).toBe('Just a normal response.');
expect(result.workingMemory).toBeNull();
});
it('handles empty working memory', () => {
const text = 'Response.\n<working_memory>\n</working_memory>';
const result = parseWorkingMemory(text);
expect(result.cleanText).toBe('Response.');
expect(result.workingMemory).toBe('');
});
it('handles multiline content with markdown', () => {
const wm = '# User Context\n- **Name**: Alice\n- **City**: Berlin';
const text = `Response text.\n<working_memory>\n${wm}\n</working_memory>`;
const result = parseWorkingMemory(text);
expect(result.workingMemory).toBe(wm);
});
});
describe('buildWorkingMemoryInstruction', () => {
it('generates freeform instruction', () => {
const result = buildWorkingMemoryInstruction('# Context\n- Name:', false);
expect(result).toContain('<working_memory>');
expect(result).toContain('</working_memory>');
expect(result).toContain('# Context\n- Name:');
});
it('generates structured instruction mentioning JSON', () => {
const result = buildWorkingMemoryInstruction('{"userName": ""}', true);
expect(result).toContain('JSON');
expect(result).toContain('<working_memory>');
});
});
describe('templateFromSchema', () => {
it('converts Zod schema to JSON template', () => {
const schema = z.object({
userName: z.string().optional().describe("The user's name"),
favoriteColor: z.string().optional().describe('Favorite color'),
});
const result = templateFromSchema(schema);
expect(result).toContain('userName');
expect(result).toContain('favoriteColor');
// Should be valid JSON
let parsed: unknown;
try {
parsed = JSON.parse(result);
} catch {
parsed = undefined;
}
expect(parsed).toHaveProperty('userName');
});
});
/**
* Helper that feeds chunks through a WorkingMemoryStreamFilter and collects
* the output text and any persisted working memory content.
*/
async function runStreamFilter(
chunks: string[],
): Promise<{ outputText: string; persisted: string[] }> {
const persisted: string[] = [];
const stream = new TransformStream<StreamChunk>();
const writer = stream.writable.getWriter();
// eslint-disable-next-line @typescript-eslint/require-await
const filter = new WorkingMemoryStreamFilter(writer, async (content) => {
persisted.push(content);
});
// Read the readable side concurrently to avoid backpressure deadlock
const reader = stream.readable.getReader();
const readAll = (async () => {
let outputText = '';
while (true) {
const result = await reader.read();
if (result.done) break;
const chunk = result.value as StreamChunk;
if (chunk.type === 'text-delta') outputText += chunk.delta;
}
return outputText;
})();
for (const chunk of chunks) {
await filter.write({ type: 'text-delta', delta: chunk });
}
await filter.flush();
await writer.close();
const outputText = await readAll;
return { outputText, persisted };
}
describe('WorkingMemoryStreamFilter with tag split across multiple chunks', () => {
it('handles tag split mid-open-tag', async () => {
const { outputText, persisted } = await runStreamFilter([
'Hello <work',
'ing_memory>state</working_memory>',
]);
expect(outputText).toBe('Hello ');
expect(persisted).toEqual(['state']);
});
it('handles tag split mid-close-tag', async () => {
const { outputText, persisted } = await runStreamFilter([
'<working_memory>state</worki',
'ng_memory> after',
]);
expect(persisted).toEqual(['state']);
expect(outputText).toBe(' after');
});
it('handles tag spread across 3+ chunks', async () => {
const { outputText, persisted } = await runStreamFilter([
'<wor',
'king_mem',
'ory>data</working_memory>',
]);
expect(persisted).toEqual(['data']);
expect(outputText).toBe('');
});
it('handles partial < that is not a tag', async () => {
const { outputText, persisted } = await runStreamFilter(['Hello <', 'div>world']);
expect(outputText).toBe('Hello <div>world');
expect(persisted).toEqual([]);
});
});
describe('parseWorkingMemory with invalid structured content', () => {
it('strips tags and extracts content regardless of JSON validity', () => {
const invalidJson = '{not valid json!!!}';
const text = `Here is my response.\n<working_memory>\n${invalidJson}\n</working_memory>`;
const result = parseWorkingMemory(text);
expect(result.cleanText).toBe('Here is my response.');
expect(result.workingMemory).toBe(invalidJson);
});
it('strips tags with content that fails Zod schema validation', () => {
// Content is valid JSON but wrong shape for the schema
const wrongShape = '{"unexpected": true}';
const text = `Response text.\n<working_memory>\n${wrongShape}\n</working_memory>`;
const result = parseWorkingMemory(text);
// Tags are stripped from response regardless
expect(result.cleanText).toBe('Response text.');
// Raw content is returned — caller decides whether it passes validation
expect(result.workingMemory).toBe(wrongShape);
// Verify the content would indeed fail schema validation
expect(result.workingMemory).not.toBeNull();
let parsed: unknown;
try {
parsed = JSON.parse(result.workingMemory!);
} catch {
parsed = undefined;
}
expect(parsed).toBeDefined();
});
it('strips tags even when content is completely non-JSON', () => {
const text =
'My reply.\n<working_memory>\nthis is just plain text, not JSON at all\n</working_memory>';
const result = parseWorkingMemory(text);
expect(result.cleanText).toBe('My reply.');
expect(result.workingMemory).toBe('this is just plain text, not JSON at all');
});
});

View file

@ -0,0 +1,271 @@
import { BaseFilesystem } from '../../workspace/filesystem/base-filesystem';
import type { BaseFilesystemOptions } from '../../workspace/filesystem/base-filesystem';
import type {
FileContent,
FileStat,
FileEntry,
ReadOptions,
WriteOptions,
ListOptions,
RemoveOptions,
CopyOptions,
ProviderStatus,
} from '../../workspace/types';
class TestFilesystem extends BaseFilesystem {
readonly id: string;
readonly name = 'TestFS';
readonly provider = 'test';
status: ProviderStatus = 'pending';
initFn = jest.fn().mockResolvedValue(undefined);
destroyFn = jest.fn().mockResolvedValue(undefined);
constructor(id: string, options?: BaseFilesystemOptions) {
super(options);
this.id = id;
}
override async init(): Promise<void> {
await this.initFn();
}
override async destroy(): Promise<void> {
await this.destroyFn();
}
async readFile(_path: string, _options?: ReadOptions): Promise<string | Buffer> {
await this.ensureReady();
return 'test content';
}
async writeFile(_path: string, _content: FileContent, _options?: WriteOptions): Promise<void> {
await this.ensureReady();
}
async appendFile(_path: string, _content: FileContent): Promise<void> {
await this.ensureReady();
}
async deleteFile(_path: string, _options?: RemoveOptions): Promise<void> {
await this.ensureReady();
}
async copyFile(_src: string, _dest: string, _options?: CopyOptions): Promise<void> {
await this.ensureReady();
}
async moveFile(_src: string, _dest: string, _options?: CopyOptions): Promise<void> {
await this.ensureReady();
}
async mkdir(_path: string, _options?: { recursive?: boolean }): Promise<void> {
await this.ensureReady();
}
async rmdir(_path: string, _options?: RemoveOptions): Promise<void> {
await this.ensureReady();
}
async readdir(_path: string, _options?: ListOptions): Promise<FileEntry[]> {
await this.ensureReady();
return [];
}
async exists(_path: string): Promise<boolean> {
await this.ensureReady();
return false;
}
async stat(_path: string): Promise<FileStat> {
await this.ensureReady();
return {
name: 'test',
path: _path,
type: 'file',
size: 0,
createdAt: new Date(),
modifiedAt: new Date(),
};
}
}
describe('BaseFilesystem', () => {
describe('lifecycle state transitions', () => {
it('starts in pending status', () => {
const fs = new TestFilesystem('1');
expect(fs.status).toBe('pending');
});
it('transitions pending → initializing → ready on _init', async () => {
const statuses: string[] = [];
const fs = new TestFilesystem('1');
fs.initFn.mockImplementation(() => {
statuses.push(fs.status);
});
await fs._init();
expect(statuses).toContain('initializing');
expect(fs.status).toBe('ready');
});
it('_init is idempotent when already ready', async () => {
const fs = new TestFilesystem('1');
await fs._init();
fs.initFn.mockClear();
await fs._init();
expect(fs.initFn).not.toHaveBeenCalled();
expect(fs.status).toBe('ready');
});
it('transitions to error on init failure', async () => {
const fs = new TestFilesystem('1');
fs.initFn.mockRejectedValue(new Error('init boom'));
await expect(fs._init()).rejects.toThrow('init boom');
expect(fs.status).toBe('error');
expect(fs.error).toBe('init boom');
});
it('transitions to destroyed on _destroy after ready', async () => {
const fs = new TestFilesystem('1');
await fs._init();
const statuses: string[] = [];
fs.destroyFn.mockImplementation(() => {
statuses.push(fs.status);
});
await fs._destroy();
expect(statuses).toContain('destroying');
expect(fs.status).toBe('destroyed');
});
it('_destroy from pending goes directly to destroyed', async () => {
const fs = new TestFilesystem('1');
await fs._destroy();
expect(fs.status).toBe('destroyed');
expect(fs.destroyFn).not.toHaveBeenCalled();
});
it('_destroy is idempotent when already destroyed', async () => {
const fs = new TestFilesystem('1');
await fs._init();
await fs._destroy();
fs.destroyFn.mockClear();
await fs._destroy();
expect(fs.destroyFn).not.toHaveBeenCalled();
});
it('transitions to error on destroy failure', async () => {
const fs = new TestFilesystem('1');
await fs._init();
fs.destroyFn.mockRejectedValue(new Error('destroy boom'));
await expect(fs._destroy()).rejects.toThrow('destroy boom');
expect(fs.status).toBe('error');
});
});
describe('lifecycle hooks', () => {
it('calls onInit hook after successful init', async () => {
const onInit = jest.fn();
const fs = new TestFilesystem('1', { onInit });
await fs._init();
expect(onInit).toHaveBeenCalledWith({ filesystem: fs });
});
it('does not fail when onInit hook throws', async () => {
const onInit = jest.fn().mockRejectedValue(new Error('hook err'));
const fs = new TestFilesystem('1', { onInit });
await fs._init();
expect(fs.status).toBe('ready');
});
it('calls onDestroy hook during destroy', async () => {
const onDestroy = jest.fn();
const fs = new TestFilesystem('1', { onDestroy });
await fs._init();
await fs._destroy();
expect(onDestroy).toHaveBeenCalledWith({ filesystem: fs });
});
});
describe('ensureReady', () => {
it('auto-initializes when calling a fs method from pending', async () => {
const fs = new TestFilesystem('1');
const content = await fs.readFile('/test');
expect(content).toBe('test content');
expect(fs.status).toBe('ready');
});
it('throws if init fails when auto-initializing', async () => {
const fs = new TestFilesystem('1');
fs.initFn.mockRejectedValue(new Error('init fail'));
await expect(fs.readFile('/test')).rejects.toThrow();
});
});
describe('concurrent lifecycle calls', () => {
it('deduplicates concurrent _init calls', async () => {
const fs = new TestFilesystem('1');
let resolveInit: () => void;
fs.initFn.mockImplementation(
async () =>
await new Promise<void>((r) => {
resolveInit = r;
}),
);
const p1 = fs._init();
const p2 = fs._init();
resolveInit!();
await Promise.all([p1, p2]);
expect(fs.initFn).toHaveBeenCalledTimes(1);
expect(fs.status).toBe('ready');
});
it('deduplicates concurrent _destroy calls', async () => {
const fs = new TestFilesystem('1');
await fs._init();
let resolveDestroy!: () => void;
fs.destroyFn.mockImplementation(
async () =>
await new Promise<void>((r) => {
resolveDestroy = r;
}),
);
const p1 = fs._destroy();
// Flush microtasks so executeDestroy reaches destroyFn
await Promise.resolve();
await Promise.resolve();
const p2 = fs._destroy();
resolveDestroy();
await Promise.all([p1, p2]);
expect(fs.destroyFn).toHaveBeenCalledTimes(1);
expect(fs.status).toBe('destroyed');
});
});
});

View file

@ -0,0 +1,369 @@
import { BaseSandbox } from '../../workspace/sandbox/base-sandbox';
import type {
CommandResult,
SandboxProcessManager,
BaseSandboxOptions,
} from '../../workspace/types';
import { ProcessHandle } from '../../workspace/types';
class StubProcessHandle extends ProcessHandle {
readonly pid: number;
private resolvedExitCode: number | undefined;
constructor(pid: number) {
super();
this.pid = pid;
}
get exitCode(): number | undefined {
return this.resolvedExitCode;
}
async kill(): Promise<boolean> {
this.resolvedExitCode = 137;
return await Promise.resolve(true);
}
async sendStdin(_data: string): Promise<void> {}
protected async _wait(): Promise<CommandResult> {
this.resolvedExitCode = 0;
this.emitStdout('ok\n');
return await Promise.resolve({
success: true,
exitCode: 0,
stdout: this.stdout,
stderr: this.stderr,
executionTimeMs: 1,
});
}
}
function makeStubProcessManager(): SandboxProcessManager & {
spawnMock: jest.Mock;
} {
const handle = new StubProcessHandle(1);
const spawnMock = jest.fn().mockResolvedValue(handle);
return {
spawn: spawnMock,
list: jest.fn().mockResolvedValue([]),
get: jest.fn().mockResolvedValue(undefined),
kill: jest.fn().mockResolvedValue(false),
spawnMock,
} as unknown as SandboxProcessManager & { spawnMock: jest.Mock };
}
class TestSandbox extends BaseSandbox {
readonly id: string;
readonly name: string;
readonly provider = 'test';
startFn = jest.fn().mockResolvedValue(undefined);
stopFn = jest.fn().mockResolvedValue(undefined);
destroyFn = jest.fn().mockResolvedValue(undefined);
constructor(id: string, options?: BaseSandboxOptions) {
super(options);
this.id = id;
this.name = `test-sandbox-${id}`;
}
async start(): Promise<void> {
await this.startFn();
}
async stop(): Promise<void> {
await this.stopFn();
}
async destroy(): Promise<void> {
await this.destroyFn();
}
}
describe('BaseSandbox', () => {
describe('lifecycle state transitions', () => {
it('starts in pending status', () => {
const sb = new TestSandbox('1');
expect(sb.status).toBe('pending');
});
it('transitions pending → starting → running on _start', async () => {
const statuses: string[] = [];
const sb = new TestSandbox('1');
sb.startFn.mockImplementation(() => {
statuses.push(sb.status);
});
await sb._start();
expect(statuses).toContain('starting');
expect(sb.status).toBe('running');
});
it('_start is idempotent when already running', async () => {
const sb = new TestSandbox('1');
await sb._start();
sb.startFn.mockClear();
await sb._start();
expect(sb.startFn).not.toHaveBeenCalled();
expect(sb.status).toBe('running');
});
it('transitions to error on start failure', async () => {
const sb = new TestSandbox('1');
sb.startFn.mockRejectedValue(new Error('start boom'));
await expect(sb._start()).rejects.toThrow('start boom');
expect(sb.status).toBe('error');
});
it('transitions running → stopping → stopped on _stop', async () => {
const sb = new TestSandbox('1');
await sb._start();
const statuses: string[] = [];
sb.stopFn.mockImplementation(() => {
statuses.push(sb.status);
});
await sb._stop();
expect(statuses).toContain('stopping');
expect(sb.status).toBe('stopped');
});
it('_stop is no-op when already stopped', async () => {
const sb = new TestSandbox('1');
await sb._start();
await sb._stop();
sb.stopFn.mockClear();
await sb._stop();
expect(sb.stopFn).not.toHaveBeenCalled();
});
it('_stop is no-op when pending', async () => {
const sb = new TestSandbox('1');
await sb._stop();
expect(sb.stopFn).not.toHaveBeenCalled();
});
it('transitions to error on stop failure', async () => {
const sb = new TestSandbox('1');
await sb._start();
sb.stopFn.mockRejectedValue(new Error('stop boom'));
await expect(sb._stop()).rejects.toThrow('stop boom');
expect(sb.status).toBe('error');
});
it('transitions running → destroying → destroyed on _destroy', async () => {
const sb = new TestSandbox('1');
await sb._start();
const statuses: string[] = [];
sb.destroyFn.mockImplementation(() => {
statuses.push(sb.status);
});
await sb._destroy();
expect(statuses).toContain('destroying');
expect(sb.status).toBe('destroyed');
});
it('_destroy from pending goes directly to destroyed', async () => {
const sb = new TestSandbox('1');
await sb._destroy();
expect(sb.status).toBe('destroyed');
expect(sb.destroyFn).not.toHaveBeenCalled();
});
it('_destroy is idempotent when already destroyed', async () => {
const sb = new TestSandbox('1');
await sb._start();
await sb._destroy();
sb.destroyFn.mockClear();
await sb._destroy();
expect(sb.destroyFn).not.toHaveBeenCalled();
});
it('throws when trying to _start a destroyed sandbox', async () => {
const sb = new TestSandbox('1');
await sb._start();
await sb._destroy();
await expect(sb._start()).rejects.toThrow('Cannot start a destroyed sandbox');
});
it('transitions to error on destroy failure', async () => {
const sb = new TestSandbox('1');
await sb._start();
sb.destroyFn.mockRejectedValue(new Error('destroy boom'));
await expect(sb._destroy()).rejects.toThrow('destroy boom');
expect(sb.status).toBe('error');
});
});
describe('lifecycle hooks', () => {
it('calls onStart hook after successful start', async () => {
const onStart = jest.fn();
const sb = new TestSandbox('1', { onStart });
await sb._start();
expect(onStart).toHaveBeenCalledWith({ sandbox: sb });
});
it('does not fail when onStart hook throws', async () => {
const onStart = jest.fn().mockRejectedValue(new Error('hook error'));
const sb = new TestSandbox('1', { onStart });
await sb._start();
expect(sb.status).toBe('running');
});
it('calls onStop hook before stopping', async () => {
const onStop = jest.fn();
const sb = new TestSandbox('1', { onStop });
await sb._start();
await sb._stop();
expect(onStop).toHaveBeenCalledWith({ sandbox: sb });
});
it('calls onDestroy hook before destroying', async () => {
const onDestroy = jest.fn();
const sb = new TestSandbox('1', { onDestroy });
await sb._start();
await sb._destroy();
expect(onDestroy).toHaveBeenCalledWith({ sandbox: sb });
});
});
describe('ensureRunning', () => {
it('starts the sandbox if not running', async () => {
const sb = new TestSandbox('1');
await sb.ensureRunning();
expect(sb.status).toBe('running');
expect(sb.startFn).toHaveBeenCalled();
});
it('does nothing if already running', async () => {
const sb = new TestSandbox('1');
await sb._start();
sb.startFn.mockClear();
await sb.ensureRunning();
expect(sb.startFn).not.toHaveBeenCalled();
});
it('throws if sandbox is destroyed', async () => {
const sb = new TestSandbox('1');
await sb._start();
await sb._destroy();
await expect(sb.ensureRunning()).rejects.toThrow('has been destroyed');
});
});
describe('executeCommand', () => {
it('spawns a process and returns results', async () => {
const pm = makeStubProcessManager();
const sb = new TestSandbox('1', { processes: pm });
await sb._start();
const result = await sb.executeCommand('echo', ['hello']);
expect(pm.spawnMock).toHaveBeenCalledTimes(1);
expect((pm.spawnMock.mock.calls as unknown as string[][])[0][0]).toBe('echo hello');
expect(result.success).toBe(true);
expect(result.stdout).toBe('ok\n');
});
it('auto-starts sandbox before executing', async () => {
const pm = makeStubProcessManager();
const sb = new TestSandbox('1', { processes: pm });
const result = await sb.executeCommand('ls');
expect(sb.status).toBe('running');
expect(result.success).toBe(true);
});
it('throws when no process manager is available', async () => {
const sb = new TestSandbox('1');
await sb._start();
await expect(sb.executeCommand('ls')).rejects.toThrow('no process manager');
});
});
describe('getInstructions', () => {
it('returns empty string by default', () => {
const sb = new TestSandbox('1');
expect(sb.getInstructions()).toBe('');
});
});
describe('concurrent lifecycle calls', () => {
it('deduplicates concurrent _start calls', async () => {
const sb = new TestSandbox('1');
let resolveStart: () => void;
sb.startFn.mockImplementation(
async () =>
await new Promise<void>((r) => {
resolveStart = r;
}),
);
const p1 = sb._start();
const p2 = sb._start();
resolveStart!();
await Promise.all([p1, p2]);
expect(sb.startFn).toHaveBeenCalledTimes(1);
expect(sb.status).toBe('running');
});
it('deduplicates concurrent _destroy calls', async () => {
const sb = new TestSandbox('1');
await sb._start();
let resolveDestroy!: () => void;
sb.destroyFn.mockImplementation(
async () =>
await new Promise<void>((r) => {
resolveDestroy = r;
}),
);
const p1 = sb._destroy();
// Flush microtasks so executeDestroy reaches destroyFn
await Promise.resolve();
await Promise.resolve();
const p2 = sb._destroy();
resolveDestroy();
await Promise.all([p1, p2]);
expect(sb.destroyFn).toHaveBeenCalledTimes(1);
expect(sb.status).toBe('destroyed');
});
});
});

View file

@ -0,0 +1,90 @@
import { callLifecycle } from '../../workspace/lifecycle';
describe('callLifecycle', () => {
it('calls _init when both _init and init exist', async () => {
const target = {
_init: jest.fn().mockResolvedValue(undefined),
init: jest.fn().mockResolvedValue(undefined),
};
await callLifecycle(target, 'init');
expect(target._init).toHaveBeenCalledTimes(1);
expect(target.init).not.toHaveBeenCalled();
});
it('falls back to init when _init is undefined', async () => {
const target = {
init: jest.fn().mockResolvedValue(undefined),
};
await callLifecycle(target, 'init');
expect(target.init).toHaveBeenCalledTimes(1);
});
it('calls _start when both _start and start exist', async () => {
const target = {
_start: jest.fn().mockResolvedValue(undefined),
start: jest.fn().mockResolvedValue(undefined),
};
await callLifecycle(target, 'start');
expect(target._start).toHaveBeenCalledTimes(1);
expect(target.start).not.toHaveBeenCalled();
});
it('calls _stop over stop', async () => {
const target = {
_stop: jest.fn().mockResolvedValue(undefined),
stop: jest.fn().mockResolvedValue(undefined),
};
await callLifecycle(target, 'stop');
expect(target._stop).toHaveBeenCalledTimes(1);
expect(target.stop).not.toHaveBeenCalled();
});
it('calls _destroy over destroy', async () => {
const target = {
_destroy: jest.fn().mockResolvedValue(undefined),
destroy: jest.fn().mockResolvedValue(undefined),
};
await callLifecycle(target, 'destroy');
expect(target._destroy).toHaveBeenCalledTimes(1);
expect(target.destroy).not.toHaveBeenCalled();
});
it('does nothing if neither underscore nor plain method exists', async () => {
const target = {};
await expect(callLifecycle(target, 'init')).resolves.toBeUndefined();
});
it('propagates errors from lifecycle methods', async () => {
const error = new Error('lifecycle failure');
const target = {
_start: jest.fn().mockRejectedValue(error),
};
await expect(callLifecycle(target, 'start')).rejects.toThrow('lifecycle failure');
});
it('binds correctly (calls with proper this)', async () => {
const target = {
value: 42,
// eslint-disable-next-line @typescript-eslint/require-await
_init: jest.fn(async function (this: { value: number }) {
expect(this.value).toBe(42);
}),
};
await callLifecycle(target, 'init');
expect(target._init).toHaveBeenCalled();
});
});

View file

@ -0,0 +1,338 @@
import { BaseFilesystem } from '../../workspace/filesystem/base-filesystem';
import { BaseSandbox } from '../../workspace/sandbox/base-sandbox';
import { ProcessHandle, SandboxProcessManager } from '../../workspace/types';
import type {
CommandResult,
FileContent,
FileEntry,
FileStat,
ListOptions,
MountConfig,
ProcessInfo,
ProviderStatus,
ReadOptions,
RemoveOptions,
SpawnProcessOptions,
WriteOptions,
} from '../../workspace/types';
// ---------------------------------------------------------------------------
// In-memory filesystem (fake)
// ---------------------------------------------------------------------------
export class InMemoryFilesystem extends BaseFilesystem {
readonly id: string;
readonly name = 'InMemoryFilesystem';
readonly provider = 'memory';
readonly basePath = '/mem';
status: ProviderStatus = 'pending';
private files = new Map<string, Buffer>();
private dirs = new Set<string>();
constructor(id = 'mem-fs') {
super();
this.id = id;
}
// eslint-disable-next-line @typescript-eslint/require-await
override async init(): Promise<void> {
this.dirs.add('/');
}
private normalizePath(p: string): string {
return p.startsWith('/') ? p : `/${p}`;
}
private parentDir(p: string): string {
const parts = p.split('/');
parts.pop();
return parts.join('/') || '/';
}
async readFile(filePath: string, options?: ReadOptions): Promise<string | Buffer> {
await this.ensureReady();
const p = this.normalizePath(filePath);
const buf = this.files.get(p);
if (!buf) throw new Error(`ENOENT: ${p}`);
if (options?.encoding) return buf.toString(options.encoding);
return buf;
}
async writeFile(filePath: string, content: FileContent, options?: WriteOptions): Promise<void> {
await this.ensureReady();
const p = this.normalizePath(filePath);
if (options?.recursive) {
this.mkdirRecursive(this.parentDir(p));
}
const parent = this.parentDir(p);
if (!this.dirs.has(parent))
throw new Error(`ENOENT: parent directory ${parent} does not exist`);
this.files.set(p, Buffer.from(content));
}
async appendFile(filePath: string, content: FileContent): Promise<void> {
await this.ensureReady();
const p = this.normalizePath(filePath);
const existing = this.files.get(p) ?? Buffer.alloc(0);
const append = typeof content === 'string' ? Buffer.from(content) : Buffer.from(content);
this.files.set(p, Buffer.concat([existing, append]));
}
async deleteFile(filePath: string): Promise<void> {
await this.ensureReady();
const p = this.normalizePath(filePath);
if (!this.files.has(p)) throw new Error(`ENOENT: ${p}`);
this.files.delete(p);
}
async copyFile(src: string, dest: string): Promise<void> {
await this.ensureReady();
const content = await this.readFile(src);
await this.writeFile(dest, content);
}
async moveFile(src: string, dest: string): Promise<void> {
await this.ensureReady();
await this.copyFile(src, dest);
await this.deleteFile(src);
}
async mkdir(dirPath: string, options?: { recursive?: boolean }): Promise<void> {
await this.ensureReady();
const p = this.normalizePath(dirPath);
if (options?.recursive) {
this.mkdirRecursive(p);
} else {
this.dirs.add(p);
}
}
async rmdir(dirPath: string, options?: RemoveOptions): Promise<void> {
await this.ensureReady();
const p = this.normalizePath(dirPath);
if (options?.recursive) {
for (const key of [...this.files.keys()]) {
if (key.startsWith(p + '/') || key === p) this.files.delete(key);
}
for (const d of [...this.dirs]) {
if (d.startsWith(p + '/') || d === p) this.dirs.delete(d);
}
} else {
this.dirs.delete(p);
}
}
async readdir(dirPath: string, options?: ListOptions): Promise<FileEntry[]> {
await this.ensureReady();
const p = this.normalizePath(dirPath);
const entries: FileEntry[] = [];
const seen = new Set<string>();
for (const d of this.dirs) {
if (d === p) continue;
if (!d.startsWith(p + '/')) continue;
const rel = d.slice(p.length + 1);
if (!rel) continue;
const isDirectChild = !rel.includes('/');
if (isDirectChild || options?.recursive) {
const name = rel.split('/').pop()!;
if (!seen.has(`dir:${name}`)) {
seen.add(`dir:${name}`);
entries.push({ name, type: 'directory' });
}
}
}
for (const [filePath] of this.files) {
if (!filePath.startsWith(p + '/')) continue;
const rel = filePath.slice(p.length + 1);
if (!rel) continue;
const isDirectChild = !rel.includes('/');
if (isDirectChild || options?.recursive) {
const name = filePath.split('/').pop()!;
if (options?.extension) {
const ext = options.extension.startsWith('.')
? options.extension
: `.${options.extension}`;
if (!name.endsWith(ext)) continue;
}
if (!seen.has(`file:${name}`)) {
seen.add(`file:${name}`);
entries.push({ name, type: 'file' });
}
}
}
return entries;
}
async exists(filePath: string): Promise<boolean> {
await this.ensureReady();
const p = this.normalizePath(filePath);
return this.files.has(p) || this.dirs.has(p);
}
async stat(filePath: string): Promise<FileStat> {
await this.ensureReady();
const p = this.normalizePath(filePath);
const now = new Date();
if (this.dirs.has(p)) {
return {
name: p.split('/').pop() ?? '/',
path: filePath,
type: 'directory',
size: 0,
createdAt: now,
modifiedAt: now,
};
}
const buf = this.files.get(p);
if (!buf) throw new Error(`ENOENT: ${p}`);
return {
name: p.split('/').pop()!,
path: filePath,
type: 'file',
size: buf.length,
createdAt: now,
modifiedAt: now,
};
}
getMountConfig(): MountConfig {
return { type: 'local', basePath: '/mem' };
}
getInstructions(): string {
return 'In-memory filesystem. All file paths are relative to /mem.';
}
getFileContent(filePath: string): string | undefined {
const p = this.normalizePath(filePath);
return this.files.get(p)?.toString('utf-8');
}
private mkdirRecursive(p: string): void {
const parts = p.split('/');
let current = '';
for (const part of parts) {
current += current === '/' ? part : `/${part}`;
if (!current) current = '/';
this.dirs.add(current);
}
}
}
export class FakeProcessHandle extends ProcessHandle {
readonly pid: number;
private resolvedExitCode: number | undefined;
private readonly outputFn: (command: string) => {
stdout: string;
stderr: string;
exitCode: number;
};
private readonly cmdString: string;
constructor(
pid: number,
command: string,
outputFn: (cmd: string) => { stdout: string; stderr: string; exitCode: number },
) {
super();
this.pid = pid;
this.cmdString = command;
this.command = command;
this.outputFn = outputFn;
}
get exitCode(): number | undefined {
return this.resolvedExitCode;
}
async kill(): Promise<boolean> {
this.resolvedExitCode = 137;
return await Promise.resolve(true);
}
async sendStdin(_data: string): Promise<void> {}
protected async _wait(): Promise<CommandResult> {
const result = this.outputFn(this.cmdString);
this.emitStdout(result.stdout);
if (result.stderr) this.emitStderr(result.stderr);
this.resolvedExitCode = result.exitCode;
return await Promise.resolve({
success: result.exitCode === 0,
exitCode: result.exitCode,
stdout: this.stdout,
stderr: this.stderr,
executionTimeMs: 1,
command: this.command,
});
}
}
// ---------------------------------------------------------------------------
// Fake process manager
// ---------------------------------------------------------------------------
export class FakeProcessManager extends SandboxProcessManager {
private nextPid = 1;
private tracked = new Map<number, FakeProcessHandle>();
commandHandler: (command: string) => { stdout: string; stderr: string; exitCode: number };
constructor() {
super();
this.commandHandler = (cmd) => ({ stdout: `executed: ${cmd}\n`, stderr: '', exitCode: 0 });
}
async spawn(command: string, _options?: SpawnProcessOptions): Promise<ProcessHandle> {
const pid = this.nextPid++;
const handle = new FakeProcessHandle(pid, command, this.commandHandler);
this.tracked.set(pid, handle);
return await Promise.resolve(handle);
}
async list(): Promise<ProcessInfo[]> {
return await Promise.resolve(
[...this.tracked.entries()].map(([pid, h]) => ({
pid,
command: h.command,
exitCode: h.exitCode,
})),
);
}
async get(pid: number): Promise<ProcessHandle | undefined> {
return await Promise.resolve(this.tracked.get(pid));
}
async kill(pid: number): Promise<boolean> {
const h = this.tracked.get(pid);
if (!h) return false;
const result = await h.kill();
this.tracked.delete(pid);
return result;
}
}
export class FakeSandbox extends BaseSandbox {
readonly id: string;
readonly name: string;
readonly provider = 'fake';
constructor(id: string, pm: FakeProcessManager) {
super({ processes: pm });
this.id = id;
this.name = `fake-sandbox-${id}`;
}
async start(): Promise<void> {}
async stop(): Promise<void> {}
async destroy(): Promise<void> {}
override getInstructions(): string {
return 'Fake sandbox for executing commands.';
}
}

View file

@ -0,0 +1,325 @@
import { InMemoryFilesystem, FakeProcessManager, FakeSandbox } from './test-utils';
import type { FileEntry } from '../../workspace/types';
import { Workspace } from '../../workspace/workspace';
// ---------------------------------------------------------------------------
// Integration tests
// ---------------------------------------------------------------------------
describe('Workspace integration with fakes', () => {
let memFs: InMemoryFilesystem;
let fakeProcessManager: FakeProcessManager;
let fakeSandbox: FakeSandbox;
let workspace: Workspace;
beforeEach(async () => {
memFs = new InMemoryFilesystem();
fakeProcessManager = new FakeProcessManager();
fakeSandbox = new FakeSandbox('test', fakeProcessManager);
workspace = new Workspace({
id: 'integration-test',
filesystem: memFs,
sandbox: fakeSandbox,
});
await workspace.init();
});
afterEach(async () => {
await workspace.destroy();
});
it('initializes all providers and reaches ready state', () => {
expect(workspace.status).toBe('ready');
expect(memFs.status).toBe('ready');
expect(fakeSandbox.status).toBe('running');
});
it('returns combined instructions', () => {
const instructions = workspace.getInstructions();
expect(instructions).toContain('Fake sandbox');
expect(instructions).toContain('In-memory filesystem');
});
it('exposes all expected tools', () => {
const tools = workspace.getTools();
const names = tools.map((t) => t.name);
expect(names).toContain('workspace_read_file');
expect(names).toContain('workspace_write_file');
expect(names).toContain('workspace_list_files');
expect(names).toContain('workspace_file_stat');
expect(names).toContain('workspace_mkdir');
expect(names).toContain('workspace_execute_command');
});
describe('filesystem tools end-to-end', () => {
it('write_file → read_file round-trip', async () => {
const tools = workspace.getTools();
const write = tools.find((t) => t.name === 'workspace_write_file')!;
const read = tools.find((t) => t.name === 'workspace_read_file')!;
await write.handler!(
{ path: '/hello.txt', content: 'Hello from integration test!' },
{} as never,
);
const result = await read.handler!({ path: '/hello.txt', encoding: 'utf-8' }, {} as never);
expect((result as { content: string }).content).toBe('Hello from integration test!');
});
it('mkdir → write → list round-trip', async () => {
const tools = workspace.getTools();
const mkdirTool = tools.find((t) => t.name === 'workspace_mkdir')!;
const write = tools.find((t) => t.name === 'workspace_write_file')!;
const list = tools.find((t) => t.name === 'workspace_list_files')!;
await mkdirTool.handler!({ path: '/project' }, {} as never);
await write.handler!({ path: '/project/index.ts', content: 'export {}' }, {} as never);
await write.handler!({ path: '/project/readme.md', content: '# Readme' }, {} as never);
const result = (await list.handler!({ path: '/project' }, {} as never)) as {
entries: FileEntry[];
};
expect(result.entries).toHaveLength(2);
const names = result.entries.map((e) => e.name);
expect(names).toContain('index.ts');
expect(names).toContain('readme.md');
});
it('write → stat returns metadata', async () => {
const tools = workspace.getTools();
const write = tools.find((t) => t.name === 'workspace_write_file')!;
const stat = tools.find((t) => t.name === 'workspace_file_stat')!;
await write.handler!({ path: '/data.json', content: '{"key": "value"}' }, {} as never);
const result = (await stat.handler!({ path: '/data.json' }, {} as never)) as {
name: string;
type: string;
size: number;
};
expect(result.name).toBe('data.json');
expect(result.type).toBe('file');
expect(result.size).toBe(16);
});
});
describe('sandbox tools end-to-end', () => {
it('executes a command through the tool', async () => {
fakeProcessManager.commandHandler = (cmd) => ({
stdout: `ran: ${cmd}\n`,
stderr: '',
exitCode: 0,
});
const tools = workspace.getTools();
const exec = tools.find((t) => t.name === 'workspace_execute_command')!;
const result = (await exec.handler!({ command: 'echo test' }, {} as never)) as {
success: boolean;
stdout: string;
exitCode: number;
};
expect(result.success).toBe(true);
expect(result.stdout).toBe('ran: echo test\n');
expect(result.exitCode).toBe(0);
});
it('reports command failure', async () => {
fakeProcessManager.commandHandler = () => ({
stdout: '',
stderr: 'command not found',
exitCode: 127,
});
const tools = workspace.getTools();
const exec = tools.find((t) => t.name === 'workspace_execute_command')!;
const result = (await exec.handler!({ command: 'invalid-cmd' }, {} as never)) as {
success: boolean;
stderr: string;
exitCode: number;
};
expect(result.success).toBe(false);
expect(result.exitCode).toBe(127);
expect(result.stderr).toBe('command not found');
});
});
describe('full lifecycle', () => {
it('init → use → destroy cycle', async () => {
const ws = new Workspace({
filesystem: new InMemoryFilesystem('lc-fs'),
sandbox: new FakeSandbox('lc-sb', new FakeProcessManager()),
});
expect(ws.status).toBe('pending');
await ws.init();
expect(ws.status).toBe('ready');
expect(ws.filesystem!.status).toBe('ready');
expect(ws.sandbox!.status).toBe('running');
const tools = ws.getTools();
expect(tools.length).toBeGreaterThan(0);
await ws.destroy();
expect(ws.status).toBe('destroyed');
expect(ws.sandbox!.status).toBe('destroyed');
expect(ws.filesystem!.status).toBe('destroyed');
});
it('workspace with only filesystem', async () => {
const ws = new Workspace({ filesystem: new InMemoryFilesystem('fs-only') });
await ws.init();
const tools = ws.getTools();
const names = tools.map((t) => t.name);
expect(names).not.toContain('workspace_execute_command');
expect(names).toContain('workspace_read_file');
await ws.destroy();
});
it('workspace with only sandbox', async () => {
const ws = new Workspace({
sandbox: new FakeSandbox('sb-only', new FakeProcessManager()),
});
await ws.init();
const tools = ws.getTools();
const names = tools.map((t) => t.name);
expect(names).toContain('workspace_execute_command');
expect(names).not.toContain('workspace_read_file');
await ws.destroy();
});
it('empty workspace lifecycle', async () => {
const ws = new Workspace({});
await ws.init();
expect(ws.status).toBe('ready');
expect(ws.getTools()).toEqual([]);
await ws.destroy();
expect(ws.status).toBe('destroyed');
});
});
describe('in-memory filesystem operations', () => {
it('supports append', async () => {
await memFs.writeFile('/log.txt', 'line1\n');
await memFs.appendFile('/log.txt', 'line2\n');
const content = await memFs.readFile('/log.txt', { encoding: 'utf-8' });
expect(content).toBe('line1\nline2\n');
});
it('supports copy and move', async () => {
await memFs.writeFile('/original.txt', 'original');
await memFs.copyFile('/original.txt', '/copy.txt');
expect(await memFs.readFile('/copy.txt', { encoding: 'utf-8' })).toBe('original');
await memFs.moveFile('/copy.txt', '/moved.txt');
expect(await memFs.exists('/copy.txt')).toBe(false);
expect(await memFs.readFile('/moved.txt', { encoding: 'utf-8' })).toBe('original');
});
it('supports rmdir recursive', async () => {
await memFs.mkdir('/deep/nested', { recursive: true });
await memFs.writeFile('/deep/nested/file.txt', 'data');
await memFs.rmdir('/deep', { recursive: true });
expect(await memFs.exists('/deep')).toBe(false);
expect(await memFs.exists('/deep/nested/file.txt')).toBe(false);
});
it('readFile throws on non-existent file', async () => {
await expect(memFs.readFile('/nonexistent')).rejects.toThrow('ENOENT');
});
it('deleteFile throws on non-existent file', async () => {
await expect(memFs.deleteFile('/nonexistent')).rejects.toThrow('ENOENT');
});
});
describe('fake process manager', () => {
it('tracks spawned processes', async () => {
const handle = await fakeProcessManager.spawn('echo hello');
const processes = await fakeProcessManager.list();
expect(processes).toHaveLength(1);
expect(processes[0].pid).toBe(handle.pid);
});
it('can retrieve a handle by pid', async () => {
const handle = await fakeProcessManager.spawn('ls');
const retrieved = await fakeProcessManager.get(handle.pid);
expect(retrieved).toBe(handle);
});
it('returns undefined for unknown pid', async () => {
expect(await fakeProcessManager.get(999)).toBeUndefined();
});
it('can kill a process', async () => {
const handle = await fakeProcessManager.spawn('sleep 100');
const killed = await fakeProcessManager.kill(handle.pid);
expect(killed).toBe(true);
expect(handle.exitCode).toBe(137);
});
it('kill returns false for unknown pid', async () => {
expect(await fakeProcessManager.kill(999)).toBe(false);
});
});
describe('ProcessHandle stdout/stderr buffering', () => {
it('buffers stdout and stderr', async () => {
fakeProcessManager.commandHandler = () => ({
stdout: 'output data',
stderr: 'error data',
exitCode: 0,
});
const handle = await fakeProcessManager.spawn('test');
const collected: string[] = [];
await handle.wait({
onStdout: (data) => collected.push(`out:${data}`),
onStderr: (data) => collected.push(`err:${data}`),
});
expect(handle.stdout).toBe('output data');
expect(handle.stderr).toBe('error data');
expect(collected).toContain('out:output data');
expect(collected).toContain('err:error data');
});
it('supports multiple stdout/stderr listeners', async () => {
fakeProcessManager.commandHandler = () => ({
stdout: 'hello',
stderr: '',
exitCode: 0,
});
const handle = await fakeProcessManager.spawn('test');
const listener1: string[] = [];
const listener2: string[] = [];
handle.addStdoutListener((d) => listener1.push(d));
handle.addStdoutListener((d) => listener2.push(d));
await handle.wait();
expect(listener1).toEqual(['hello']);
expect(listener2).toEqual(['hello']);
});
});
});

View file

@ -0,0 +1,268 @@
import { createWorkspaceTools } from '../../workspace/tools/workspace-tools';
import type { WorkspaceFilesystem, WorkspaceSandbox, CommandResult } from '../../workspace/types';
function makeFakeFilesystem(overrides: Partial<WorkspaceFilesystem> = {}): WorkspaceFilesystem {
return {
id: 'test-fs',
name: 'TestFS',
provider: 'test',
status: 'ready',
readFile: jest.fn().mockResolvedValue('file content'),
writeFile: jest.fn().mockResolvedValue(undefined),
appendFile: jest.fn().mockResolvedValue(undefined),
deleteFile: jest.fn().mockResolvedValue(undefined),
copyFile: jest.fn().mockResolvedValue(undefined),
moveFile: jest.fn().mockResolvedValue(undefined),
mkdir: jest.fn().mockResolvedValue(undefined),
rmdir: jest.fn().mockResolvedValue(undefined),
readdir: jest.fn().mockResolvedValue([
{ name: 'file1.txt', type: 'file' as const },
{ name: 'subdir', type: 'directory' as const },
]),
exists: jest.fn().mockResolvedValue(true),
stat: jest.fn().mockResolvedValue({
name: 'test.txt',
path: '/test.txt',
type: 'file' as const,
size: 100,
createdAt: new Date('2024-01-01'),
modifiedAt: new Date('2024-06-01'),
}),
...overrides,
};
}
function makeFakeSandbox(overrides: Partial<WorkspaceSandbox> = {}): WorkspaceSandbox {
const mockResult: CommandResult = {
success: true,
exitCode: 0,
stdout: 'hello world',
stderr: '',
executionTimeMs: 42,
};
return {
id: 'test-sandbox',
name: 'TestSandbox',
provider: 'test',
status: 'running',
executeCommand: jest.fn().mockResolvedValue(mockResult),
...overrides,
};
}
describe('createWorkspaceTools', () => {
it('returns no tools when workspace has no providers', () => {
const tools = createWorkspaceTools({});
expect(tools).toEqual([]);
});
it('returns filesystem tools when filesystem is set', () => {
const tools = createWorkspaceTools({ filesystem: makeFakeFilesystem() });
const names = tools.map((t) => t.name);
expect(names).toEqual([
'workspace_read_file',
'workspace_write_file',
'workspace_list_files',
'workspace_file_stat',
'workspace_mkdir',
'workspace_delete_file',
'workspace_append_file',
'workspace_copy_file',
'workspace_move_file',
'workspace_rmdir',
]);
});
it('returns execute_command when sandbox has executeCommand', () => {
const tools = createWorkspaceTools({ sandbox: makeFakeSandbox() });
const names = tools.map((t) => t.name);
expect(names).toEqual(['workspace_execute_command']);
});
it('does not return execute_command when sandbox lacks executeCommand', () => {
const tools = createWorkspaceTools({
sandbox: makeFakeSandbox({ executeCommand: undefined }),
});
expect(tools).toEqual([]);
});
it('returns all tools when both filesystem and sandbox are set', () => {
const tools = createWorkspaceTools({
filesystem: makeFakeFilesystem(),
sandbox: makeFakeSandbox(),
});
const names = tools.map((t) => t.name);
expect(names).toContain('workspace_read_file');
expect(names).toContain('workspace_execute_command');
expect(names).toHaveLength(11);
});
describe('tool handlers', () => {
it('read_file handler calls filesystem.readFile', async () => {
const fs = makeFakeFilesystem();
const tools = createWorkspaceTools({ filesystem: fs });
const readTool = tools.find((t) => t.name === 'workspace_read_file')!;
const result = await readTool.handler!({ path: '/test.txt', encoding: 'utf-8' }, {} as never);
expect(fs.readFile).toHaveBeenCalledWith('/test.txt', { encoding: 'utf-8' });
expect(result).toEqual({ content: 'file content' });
});
it('write_file handler calls filesystem.writeFile', async () => {
const fs = makeFakeFilesystem();
const tools = createWorkspaceTools({ filesystem: fs });
const writeTool = tools.find((t) => t.name === 'workspace_write_file')!;
const result = await writeTool.handler!(
{ path: '/out.txt', content: 'hello', recursive: true },
{} as never,
);
expect(fs.writeFile).toHaveBeenCalledWith('/out.txt', 'hello', { recursive: true });
expect(result).toEqual({ success: true });
});
it('list_files handler calls filesystem.readdir', async () => {
const fs = makeFakeFilesystem();
const tools = createWorkspaceTools({ filesystem: fs });
const listTool = tools.find((t) => t.name === 'workspace_list_files')!;
const result = await listTool.handler!({ path: '/', recursive: false }, {} as never);
expect(fs.readdir).toHaveBeenCalledWith('/', { recursive: false });
expect(result).toEqual({
entries: [
{ name: 'file1.txt', type: 'file' },
{ name: 'subdir', type: 'directory' },
],
});
});
it('file_stat handler calls filesystem.stat', async () => {
const fs = makeFakeFilesystem();
const tools = createWorkspaceTools({ filesystem: fs });
const statTool = tools.find((t) => t.name === 'workspace_file_stat')!;
const result = await statTool.handler!({ path: '/test.txt' }, {} as never);
expect(fs.stat).toHaveBeenCalledWith('/test.txt');
expect(result).toEqual({
name: 'test.txt',
path: '/test.txt',
type: 'file',
size: 100,
createdAt: '2024-01-01T00:00:00.000Z',
modifiedAt: '2024-06-01T00:00:00.000Z',
});
});
it('mkdir handler calls filesystem.mkdir', async () => {
const fs = makeFakeFilesystem();
const tools = createWorkspaceTools({ filesystem: fs });
const mkdirTool = tools.find((t) => t.name === 'workspace_mkdir')!;
const result = await mkdirTool.handler!({ path: '/new-dir', recursive: true }, {} as never);
expect(fs.mkdir).toHaveBeenCalledWith('/new-dir', { recursive: true });
expect(result).toEqual({ success: true });
});
it('delete_file handler calls filesystem.deleteFile', async () => {
const fs = makeFakeFilesystem();
const tools = createWorkspaceTools({ filesystem: fs });
const deleteTool = tools.find((t) => t.name === 'workspace_delete_file')!;
const result = await deleteTool.handler!(
{ path: '/old.txt', recursive: false, force: true },
{} as never,
);
expect(fs.deleteFile).toHaveBeenCalledWith('/old.txt', { recursive: false, force: true });
expect(result).toEqual({ success: true });
});
it('append_file handler calls filesystem.appendFile', async () => {
const fs = makeFakeFilesystem();
const tools = createWorkspaceTools({ filesystem: fs });
const appendTool = tools.find((t) => t.name === 'workspace_append_file')!;
const result = await appendTool.handler!(
{ path: '/log.txt', content: 'new line' },
{} as never,
);
expect(fs.appendFile).toHaveBeenCalledWith('/log.txt', 'new line');
expect(result).toEqual({ success: true });
});
it('copy_file handler calls filesystem.copyFile', async () => {
const fs = makeFakeFilesystem();
const tools = createWorkspaceTools({ filesystem: fs });
const copyTool = tools.find((t) => t.name === 'workspace_copy_file')!;
const result = await copyTool.handler!(
{ src: '/a.txt', dest: '/b.txt', overwrite: true },
{} as never,
);
expect(fs.copyFile).toHaveBeenCalledWith('/a.txt', '/b.txt', { overwrite: true });
expect(result).toEqual({ success: true });
});
it('move_file handler calls filesystem.moveFile', async () => {
const fs = makeFakeFilesystem();
const tools = createWorkspaceTools({ filesystem: fs });
const moveTool = tools.find((t) => t.name === 'workspace_move_file')!;
const result = await moveTool.handler!(
{ src: '/old.txt', dest: '/new.txt', overwrite: false },
{} as never,
);
expect(fs.moveFile).toHaveBeenCalledWith('/old.txt', '/new.txt', { overwrite: false });
expect(result).toEqual({ success: true });
});
it('rmdir handler calls filesystem.rmdir', async () => {
const fs = makeFakeFilesystem();
const tools = createWorkspaceTools({ filesystem: fs });
const rmdirTool = tools.find((t) => t.name === 'workspace_rmdir')!;
const result = await rmdirTool.handler!(
{ path: '/old-dir', recursive: true, force: false },
{} as never,
);
expect(fs.rmdir).toHaveBeenCalledWith('/old-dir', { recursive: true, force: false });
expect(result).toEqual({ success: true });
});
it('execute_command handler calls sandbox.executeCommand', async () => {
const sb = makeFakeSandbox();
const tools = createWorkspaceTools({ sandbox: sb });
const execTool = tools.find((t) => t.name === 'workspace_execute_command')!;
const result = await execTool.handler!(
{ command: 'echo hello', cwd: '/tmp', timeout: 5000 },
{} as never,
);
expect(sb.executeCommand).toHaveBeenCalledWith('echo hello', undefined, {
cwd: '/tmp',
timeout: 5000,
});
expect(result).toEqual({
success: true,
exitCode: 0,
stdout: 'hello world',
stderr: '',
executionTimeMs: 42,
});
});
});
});

View file

@ -0,0 +1,309 @@
import type { WorkspaceFilesystem, WorkspaceSandbox } from '../../workspace/types';
import { Workspace } from '../../workspace/workspace';
function makeFakeFilesystem(overrides: Partial<WorkspaceFilesystem> = {}): WorkspaceFilesystem {
return {
id: 'test-fs',
name: 'TestFS',
provider: 'test',
status: 'pending',
readFile: jest.fn(),
writeFile: jest.fn(),
appendFile: jest.fn(),
deleteFile: jest.fn(),
copyFile: jest.fn(),
moveFile: jest.fn(),
mkdir: jest.fn(),
rmdir: jest.fn(),
readdir: jest.fn(),
exists: jest.fn(),
stat: jest.fn(),
...overrides,
};
}
function makeFakeSandbox(overrides: Partial<WorkspaceSandbox> = {}): WorkspaceSandbox {
return {
id: 'test-sandbox',
name: 'TestSandbox',
provider: 'test',
status: 'pending',
...overrides,
};
}
describe('Workspace', () => {
describe('constructor', () => {
it('generates an id when none is provided', () => {
const ws = new Workspace({});
expect(ws.id).toMatch(/^workspace-[0-9a-f-]+$/);
});
it('uses a custom id when provided', () => {
const ws = new Workspace({ id: 'my-ws' });
expect(ws.id).toBe('my-ws');
});
it('generates a name from the id when none is provided', () => {
const ws = new Workspace({ id: 'abc' });
expect(ws.name).toBe('workspace-abc');
});
it('uses a custom name when provided', () => {
const ws = new Workspace({ id: 'abc', name: 'My Workspace' });
expect(ws.name).toBe('My Workspace');
});
it('starts with pending status', () => {
const ws = new Workspace({});
expect(ws.status).toBe('pending');
});
it('exposes filesystem and sandbox', () => {
const fs = makeFakeFilesystem();
const sb = makeFakeSandbox();
const ws = new Workspace({ filesystem: fs, sandbox: sb });
expect(ws.filesystem).toBe(fs);
expect(ws.sandbox).toBe(sb);
});
it('returns undefined for absent filesystem and sandbox', () => {
const ws = new Workspace({});
expect(ws.filesystem).toBeUndefined();
expect(ws.sandbox).toBeUndefined();
});
it('generates unique IDs using randomUUID', () => {
const ws1 = new Workspace({});
const ws2 = new Workspace({});
expect(ws1.id).not.toBe(ws2.id);
expect(ws1.id).toMatch(/^workspace-/);
});
});
describe('init', () => {
it('calls filesystem._init then sandbox._start', async () => {
const order: string[] = [];
const fs = makeFakeFilesystem({
_init: jest.fn(async () => {
await Promise.resolve();
order.push('fs-init');
}),
});
const sb = makeFakeSandbox({
_start: jest.fn(async () => {
await Promise.resolve();
order.push('sb-start');
}),
});
const ws = new Workspace({ filesystem: fs, sandbox: sb });
await ws.init();
expect(order).toEqual(['fs-init', 'sb-start']);
expect(ws.status).toBe('ready');
});
it('sets status to ready when no providers', async () => {
const ws = new Workspace({});
await ws.init();
expect(ws.status).toBe('ready');
});
it('initializes only filesystem when no sandbox', async () => {
const fs = makeFakeFilesystem({
_init: jest.fn().mockResolvedValue(undefined),
});
const ws = new Workspace({ filesystem: fs });
await ws.init();
expect(fs._init).toHaveBeenCalled();
expect(ws.status).toBe('ready');
});
it('starts only sandbox when no filesystem', async () => {
const sb = makeFakeSandbox({
_start: jest.fn().mockResolvedValue(undefined),
});
const ws = new Workspace({ sandbox: sb });
await ws.init();
expect(sb._start).toHaveBeenCalled();
expect(ws.status).toBe('ready');
});
it('destroys filesystem and sets error status when sandbox start fails', async () => {
const fs = makeFakeFilesystem({
_init: jest.fn().mockResolvedValue(undefined),
_destroy: jest.fn().mockResolvedValue(undefined),
});
const sb = makeFakeSandbox({
_start: jest.fn().mockRejectedValue(new Error('sandbox start failed')),
});
const ws = new Workspace({ filesystem: fs, sandbox: sb });
await expect(ws.init()).rejects.toThrow('sandbox start failed');
expect(fs._init).toHaveBeenCalled();
expect(fs._destroy).toHaveBeenCalled();
expect(ws.status).toBe('error');
});
it('is idempotent when already ready', async () => {
const fs = makeFakeFilesystem({
_init: jest.fn().mockResolvedValue(undefined),
});
const ws = new Workspace({ filesystem: fs });
await ws.init();
(fs._init as jest.Mock).mockClear();
await ws.init();
expect(fs._init).not.toHaveBeenCalled();
});
it('deduplicates concurrent init calls', async () => {
let resolveInit: () => void;
const fs = makeFakeFilesystem({
_init: jest.fn(
async () =>
await new Promise<void>((r) => {
resolveInit = r;
}),
),
});
const ws = new Workspace({ filesystem: fs });
const p1 = ws.init();
const p2 = ws.init();
resolveInit!();
await Promise.all([p1, p2]);
expect(fs._init).toHaveBeenCalledTimes(1);
expect(ws.status).toBe('ready');
});
});
describe('destroy', () => {
it('calls sandbox._destroy then filesystem._destroy', async () => {
const order: string[] = [];
const fs = makeFakeFilesystem({
_destroy: jest.fn(async () => {
await Promise.resolve();
order.push('fs-destroy');
}),
});
const sb = makeFakeSandbox({
_destroy: jest.fn(async () => {
await Promise.resolve();
order.push('sb-destroy');
}),
});
const ws = new Workspace({ filesystem: fs, sandbox: sb });
await ws.destroy();
expect(order).toEqual(['sb-destroy', 'fs-destroy']);
expect(ws.status).toBe('destroyed');
});
it('sets status to destroyed when no providers', async () => {
const ws = new Workspace({});
await ws.destroy();
expect(ws.status).toBe('destroyed');
});
it('transitions to error when sandbox destroy throws', async () => {
const fs = makeFakeFilesystem({
_destroy: jest.fn().mockResolvedValue(undefined),
});
const sb = makeFakeSandbox({
_destroy: jest.fn().mockRejectedValue(new Error('sandbox boom')),
});
const ws = new Workspace({ filesystem: fs, sandbox: sb });
await expect(ws.destroy()).rejects.toThrow('sandbox boom');
expect(fs._destroy).toHaveBeenCalled();
expect(ws.status).toBe('error');
});
});
describe('getInstructions', () => {
it('combines sandbox and filesystem instructions', () => {
const fs = makeFakeFilesystem({
getInstructions: () => 'FS instructions',
});
const sb = makeFakeSandbox({
getInstructions: () => 'SB instructions',
});
const ws = new Workspace({ filesystem: fs, sandbox: sb });
expect(ws.getInstructions()).toBe('SB instructions\n\nFS instructions');
});
it('returns empty string when no providers', () => {
const ws = new Workspace({});
expect(ws.getInstructions()).toBe('');
});
it('omits empty instruction strings', () => {
const fs = makeFakeFilesystem({
getInstructions: () => '',
});
const sb = makeFakeSandbox({
getInstructions: () => 'SB only',
});
const ws = new Workspace({ filesystem: fs, sandbox: sb });
expect(ws.getInstructions()).toBe('SB only');
});
});
describe('getTools', () => {
it('returns filesystem tools when filesystem is set', () => {
const fs = makeFakeFilesystem();
const ws = new Workspace({ filesystem: fs });
const tools = ws.getTools();
const names = tools.map((t) => t.name);
expect(names).toContain('workspace_read_file');
expect(names).toContain('workspace_write_file');
expect(names).toContain('workspace_list_files');
expect(names).toContain('workspace_file_stat');
expect(names).toContain('workspace_mkdir');
});
it('returns execute_command tool when sandbox has executeCommand', () => {
const sb = makeFakeSandbox({
executeCommand: jest.fn(),
});
const ws = new Workspace({ sandbox: sb });
const tools = ws.getTools();
const names = tools.map((t) => t.name);
expect(names).toContain('workspace_execute_command');
});
it('returns empty array when no providers', () => {
const ws = new Workspace({});
expect(ws.getTools()).toEqual([]);
});
it('does not include execute_command if sandbox has no executeCommand', () => {
const sb = makeFakeSandbox();
const ws = new Workspace({ sandbox: sb });
const tools = ws.getTools();
const names = tools.map((t) => t.name);
expect(names).not.toContain('workspace_execute_command');
});
});
});

View file

@ -0,0 +1,25 @@
import { Eval } from '../sdk/eval';
/** Deterministic categorization eval — checks if output matches the expected label. */
export function categorization(): Eval {
return new Eval('categorization')
.description('Checks if output matches the expected category label')
.check(({ output, expected }) => {
if (!expected) {
return { pass: false, reasoning: 'No expected category provided' };
}
const normalOutput = output.toLowerCase().trim();
const normalExpected = expected.toLowerCase().trim();
if (normalOutput === normalExpected) {
return { pass: true, reasoning: 'Exact match' };
}
if (normalOutput.includes(normalExpected)) {
return { pass: true, reasoning: `Output contains expected label "${expected}"` };
}
return { pass: false, reasoning: `Expected "${expected}", got "${output}"` };
});
}

View file

@ -0,0 +1,35 @@
import { Eval } from '../sdk/eval';
/**
* Deterministic keyword presence eval.
* Expects `expected` to be a comma-separated list of keywords.
* Passes only if ALL keywords are found in the output.
*/
export function containsKeywords(): Eval {
return new Eval('contains-keywords')
.description('Checks if output contains all expected keywords')
.check(({ output, expected }) => {
if (!expected) {
return { pass: false, reasoning: 'No expected keywords provided' };
}
const keywords = expected
.split(',')
.map((k) => k.trim().toLowerCase())
.filter(Boolean);
if (keywords.length === 0) {
return { pass: false, reasoning: 'No keywords to check' };
}
const normalOutput = output.toLowerCase();
const missing = keywords.filter((k) => !normalOutput.includes(k));
return {
pass: missing.length === 0,
reasoning:
missing.length === 0
? `All ${keywords.length} keywords found`
: `Missing ${missing.length}/${keywords.length} keywords: ${missing.join(', ')}`,
};
});
}

View file

@ -0,0 +1,30 @@
import { parseJudgeResponse } from './parse-judge-response';
import { Eval } from '../sdk/eval';
/**
* LLM-as-judge correctness eval. Returns an Eval pre-configured with a
* judge handler caller must still set `.model()` and `.credential()`.
*/
export function correctness(): Eval {
return new Eval('correctness')
.description('Judges if the output is factually correct compared to the expected answer')
.judge(async ({ input, output, expected, llm }) => {
const prompt = [
'You are evaluating an AI assistant response for factual correctness.',
'',
`User question: ${input}`,
`Expected answer: ${expected ?? '(none provided)'}`,
`Actual answer: ${output}`,
'',
'Does the actual answer correctly address the question and match the expected answer?',
'Answer with pass or fail:',
'- pass = the answer is correct and addresses the question',
'- fail = the answer is incorrect, incomplete, or irrelevant',
'',
'Respond with ONLY a JSON object (no markdown fences): {"pass": true/false, "reasoning": "<explanation>"}',
].join('\n');
const result = await llm(prompt);
return parseJudgeResponse(result.text);
});
}

View file

@ -0,0 +1,28 @@
import { parseJudgeResponse } from './parse-judge-response';
import { Eval } from '../sdk/eval';
/**
* LLM-as-judge helpfulness eval. Returns an Eval pre-configured with a
* judge handler caller must still set `.model()` and `.credential()`.
*/
export function helpfulness(): Eval {
return new Eval('helpfulness')
.description('Judges whether the response is helpful for the user query')
.judge(async ({ input, output, llm }) => {
const prompt = [
'You are evaluating an AI assistant response for helpfulness.',
'',
`User question: ${input}`,
`Assistant response: ${output}`,
'',
'Is this response helpful to the user?',
'- pass = the response is helpful, addresses the question, and provides useful information',
'- fail = the response is unhelpful, off-topic, or lacks useful information',
'',
'Respond with ONLY a JSON object (no markdown fences): {"pass": true/false, "reasoning": "<explanation>"}',
].join('\n');
const result = await llm(prompt);
return parseJudgeResponse(result.text);
});
}

View file

@ -0,0 +1,7 @@
export { correctness } from './correctness';
export { helpfulness } from './helpfulness';
export { stringSimilarity } from './string-similarity';
export { categorization } from './categorization';
export { containsKeywords } from './contains-keywords';
export { jsonValidity } from './json-validity';
export { toolCallAccuracy } from './tool-call-accuracy';

View file

@ -0,0 +1,18 @@
import { Eval } from '../sdk/eval';
/** Deterministic JSON validity eval — checks if the output is parseable JSON. */
export function jsonValidity(): Eval {
return new Eval('json-validity')
.description('Checks if output is valid JSON')
.check(({ output }) => {
try {
JSON.parse(output);
return { pass: true, reasoning: 'Valid JSON' };
} catch (e) {
return {
pass: false,
reasoning: `Invalid JSON: ${e instanceof Error ? e.message : 'parse error'}`,
};
}
});
}

View file

@ -0,0 +1,32 @@
import type { EvalScore } from '../types';
/**
* Parse an LLM judge response into an EvalScore (pass/fail).
* Handles JSON wrapped in markdown fences, plain JSON, or raw text.
*/
export function parseJudgeResponse(text: string): EvalScore {
// Strip markdown code fences if present: ```json ... ``` or ``` ... ```
const stripped = text
.replace(/^```(?:json)?\s*\n?/i, '')
.replace(/\n?```\s*$/i, '')
.trim();
try {
const parsed = JSON.parse(stripped) as { pass?: boolean; score?: number; reasoning?: string };
// Support both { pass: true } and legacy { score: 0.8 } formats
const pass = parsed.pass ?? (parsed.score !== undefined ? parsed.score >= 0.7 : false);
return {
pass,
reasoning: parsed.reasoning ?? stripped,
};
} catch {
// Fallback: detect pass/fail from plain text or malformed JSON
const lowerText = stripped.toLowerCase();
const hasPassTrue = lowerText.includes('"pass": true') || lowerText.includes('"pass":true');
const hasFailFalse = lowerText.includes('"pass": false') || lowerText.includes('"pass":false');
// If no JSON-like pattern, check for plain-text "pass" or "fail" keywords
const pass =
hasPassTrue || (!hasFailFalse && /\bpass\b/i.test(stripped) && !/\bfail\b/i.test(stripped));
return { pass, reasoning: stripped };
}
}

View file

@ -0,0 +1,46 @@
import { Eval } from '../sdk/eval';
/**
* Dice coefficient string similarity measures overlap of bigrams between
* two strings. Returns 0-1 where 1 is identical.
*/
function diceSimilarity(a: string, b: string): number {
const normalA = a.toLowerCase().trim();
const normalB = b.toLowerCase().trim();
if (normalA === normalB) return 1;
if (normalA.length < 2 || normalB.length < 2) return 0;
const bigrams = (s: string): Set<string> => {
const set = new Set<string>();
for (let i = 0; i < s.length - 1; i++) {
set.add(s.slice(i, i + 2));
}
return set;
};
const aBigrams = bigrams(normalA);
const bBigrams = bigrams(normalB);
let intersection = 0;
for (const bg of aBigrams) {
if (bBigrams.has(bg)) intersection++;
}
return (2 * intersection) / (aBigrams.size + bBigrams.size);
}
/** Deterministic string similarity eval using Dice coefficient. */
export function stringSimilarity(): Eval {
return new Eval('string-similarity')
.description('Measures string similarity between output and expected answer')
.check(({ output, expected }) => {
if (expected === undefined) {
return { pass: false, reasoning: 'No expected value provided' };
}
const similarity = diceSimilarity(output, expected);
return {
pass: similarity >= 0.7,
reasoning: `Dice similarity: ${(similarity * 100).toFixed(1)}%`,
};
});
}

View file

@ -0,0 +1,35 @@
import { Eval } from '../sdk/eval';
/**
* Deterministic tool call accuracy eval.
* Expects `expected` to be a comma-separated list of tool names that should have been called.
* Passes only if ALL expected tools were called.
*/
export function toolCallAccuracy(): Eval {
return new Eval('tool-call-accuracy')
.description('Checks if the agent called all expected tools')
.check(({ expected, toolCalls }) => {
if (!expected) {
return { pass: false, reasoning: 'No expected tool names provided' };
}
const expectedTools = expected
.split(',')
.map((t) => t.trim().toLowerCase())
.filter(Boolean);
if (expectedTools.length === 0) {
return { pass: false, reasoning: 'No expected tools to check' };
}
const calledTools = new Set((toolCalls ?? []).map((tc) => tc.tool.toLowerCase()));
const missing = expectedTools.filter((t) => !calledTools.has(t));
return {
pass: missing.length === 0,
reasoning:
missing.length === 0
? `All ${expectedTools.length} expected tools were called`
: `Missing tools: ${missing.join(', ')}. Called: [${[...calledTools].join(', ') || 'none'}]`,
};
});
}

View file

@ -0,0 +1,129 @@
export type {
BuiltTool,
BuiltProviderTool,
BuiltAgent,
BuiltMemory,
BuiltGuardrail,
BuiltEval,
RunOptions,
AgentResult,
GenerateResult,
StreamResult,
EvalInput,
EvalScore,
EvalRunResult,
EvalResults,
ToolContext,
InterruptibleToolContext,
CheckpointStore,
StreamChunk,
SubAgentUsage,
Provider,
ThinkingConfig,
ThinkingConfigFor,
AnthropicThinkingConfig,
OpenAIThinkingConfig,
GoogleThinkingConfig,
XaiThinkingConfig,
SerializableAgentState,
AgentRunState,
MemoryConfig,
TitleGenerationConfig,
Thread,
SemanticRecallConfig,
ResumeOptions,
McpServerConfig,
McpVerifyResult,
ModelConfig,
ExecutionOptions,
PersistedExecutionOptions,
BuiltTelemetry,
AttributeValue,
} from './types';
export type { ProviderOptions } from '@ai-sdk/provider-utils';
export { AgentEvent } from './types';
export type { AgentEventData, AgentEventHandler } from './types';
export { Tool } from './sdk/tool';
export { Memory } from './sdk/memory';
export { Guardrail } from './sdk/guardrail';
export { Eval } from './sdk/eval';
export { evaluate } from './sdk/evaluate';
export type { DatasetRow, EvaluateConfig } from './sdk/evaluate';
export * as evals from './evals/index';
export { Telemetry } from './sdk/telemetry';
export { LangSmithTelemetry } from './integrations/langsmith';
export type { LangSmithTelemetryConfig } from './integrations/langsmith';
export { Agent } from './sdk/agent';
export { McpClient } from './sdk/mcp-client';
export { Network } from './sdk/network';
export { providerTools } from './sdk/provider-tools';
export { verify } from './sdk/verify';
export type { VerifyResult } from './sdk/verify';
export type {
ContentCitation,
ContentFile,
ContentMetadata,
ContentReasoning,
ContentText,
ContentToolCall,
ContentToolResult,
Message,
MessageContent,
MessageRole,
AgentMessage,
CustomAgentMessages,
AgentDbMessage,
} from './types/sdk/message';
export {
toDbMessage,
filterLlmMessages,
isLlmMessage,
} from './sdk/message';
export { fetchProviderCatalog } from './sdk/catalog';
export type {
ProviderCatalog,
ProviderInfo,
ModelInfo,
ModelCost,
ModelLimits,
} from './sdk/catalog';
export { SqliteMemory } from './storage/sqlite-memory';
export type { SqliteMemoryConfig } from './storage/sqlite-memory';
export { PostgresMemory } from './storage/postgres-memory';
export type { PostgresMemoryConfig } from './storage/postgres-memory';
export { Workspace } from './workspace';
export { BaseFilesystem } from './workspace';
export { BaseSandbox } from './workspace';
export { createWorkspaceTools } from './workspace';
export { SandboxProcessManager, ProcessHandle } from './workspace';
export type {
BaseFilesystemOptions,
FilesystemLifecycleHook,
WorkspaceFilesystem,
WorkspaceSandbox,
WorkspaceConfig,
CommandResult,
CommandOptions,
ExecuteCommandOptions,
FileContent,
FileStat,
FileEntry,
ReadOptions,
WriteOptions,
ListOptions,
RemoveOptions,
CopyOptions,
ProviderStatus,
SandboxInfo,
LocalFilesystemOptions,
LocalSandboxOptions,
DaytonaSandboxOptions,
BaseSandboxOptions,
MountConfig,
MountResult,
SpawnProcessOptions,
ProcessInfo,
} from './workspace';

View file

@ -0,0 +1,131 @@
import { Telemetry } from '../sdk/telemetry';
import type { BuiltTelemetry, OpaqueTracer, OpaqueTracerProvider } from '../types/telemetry';
export interface LangSmithTelemetryConfig {
/** LangSmith API key. If omitted, resolved via `.credential()` or LANGSMITH_API_KEY env var. */
apiKey?: string;
/** LangSmith project name. Falls back to LANGSMITH_PROJECT env var, then 'default'. */
project?: string;
/** LangSmith API base URL. Falls back to LANGSMITH_ENDPOINT env var. */
endpoint?: string;
/**
* Override the full OTLP traces URL. Normally derived from `endpoint`
* as `${endpoint}/otel/v1/traces`. Use this for custom collectors or testing.
*/
url?: string;
}
/**
* Create the LangSmith OTel tracer + provider from config.
* Dynamically imports langsmith and OTel packages so they remain
* optional peer dependencies.
*/
async function createLangSmithTracer(
config?: LangSmithTelemetryConfig,
resolvedApiKey?: string,
): Promise<{ tracer: OpaqueTracer; provider: OpaqueTracerProvider }> {
const { NodeTracerProvider } = (await import('@opentelemetry/sdk-trace-node')) as {
NodeTracerProvider: new (cfg?: {
spanProcessors?: unknown[];
}) => OpaqueTracerProvider & {
getTracer(name: string): OpaqueTracer;
};
};
const { LangSmithOTLPTraceExporter } = (await import('langsmith/experimental/otel/exporter')) as {
LangSmithOTLPTraceExporter: new (cfg?: {
apiKey?: string;
projectName?: string;
endpoint?: string;
}) => unknown;
};
const { LangSmithOTLPSpanProcessor } = (await import(
'langsmith/experimental/otel/processor'
)) as {
LangSmithOTLPSpanProcessor: new (exporter: unknown) => unknown;
};
// SECURITY: When the engine-resolved credential is the active key (i.e. no
// explicit config.apiKey overrides it), ignore user-provided url/endpoint to
// prevent redirecting the injected API key to an arbitrary host.
const apiKey = config?.apiKey ?? resolvedApiKey;
const usingResolvedKey = !config?.apiKey && resolvedApiKey !== undefined;
const url = usingResolvedKey
? undefined
: (config?.url ??
(config?.endpoint ? `${config.endpoint.replace(/\/$/, '')}/otel/v1/traces` : undefined));
const exporter = new LangSmithOTLPTraceExporter({
apiKey,
projectName: config?.project,
...(url ? { url } : {}),
});
const processor = new LangSmithOTLPSpanProcessor(exporter);
const provider = new NodeTracerProvider({
spanProcessors: [processor],
});
// Do NOT call provider.register() — avoid polluting the global tracer provider.
return { tracer: provider.getTracer('@n8n/agents'), provider };
}
/**
* Pre-built telemetry for LangSmith. Extends `Telemetry` so all builder
* methods (`.credential()`, `.functionId()`, `.recordOutputs()`, `.redact()`,
* etc.) are available.
*
* Requires `langsmith` and `@opentelemetry/sdk-trace-node` as peer dependencies.
*
* @example
* ```typescript
* import { Agent, LangSmithTelemetry } from '@n8n/agents';
*
* const telemetry = new LangSmithTelemetry({ project: 'my-project' })
* .credential('langsmith')
* .recordOutputs(false);
*
* const agent = new Agent('assistant')
* .model('anthropic/claude-sonnet-4-5')
* .telemetry(telemetry)
* .instructions('...');
* ```
*/
export class LangSmithTelemetry extends Telemetry {
private langsmithConfig?: LangSmithTelemetryConfig;
constructor(config?: LangSmithTelemetryConfig) {
super();
this.langsmithConfig = config;
}
/** @override Build telemetry config, creating the LangSmith tracer. */
override async build(): Promise<BuiltTelemetry> {
if (this.otlpEndpointValue !== undefined) {
throw new Error('LangSmithTelemetry creates its own tracer — do not use .otlpEndpoint().');
}
// Clear any tracer from a previous build() so the parent's
// .tracer()/.otlpEndpoint() mutual-exclusion check passes cleanly.
this.tracerValue = undefined;
// The LangSmith exporter silently drops all spans unless this is set.
// Auto-enable it so users don't have to remember a magic env var.
process.env.LANGCHAIN_TRACING_V2 ??= 'true';
const { tracer, provider } = await createLangSmithTracer(
this.langsmithConfig,
this.resolvedKey,
);
this.tracerValue = tracer;
// Call parent build() which handles integrations, redaction, etc.
const built = await super.build();
// Attach the provider for flush/shutdown (parent build sets it from
// otlpEndpoint but not from .tracer(), so we add it here).
return { ...built, provider };
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,82 @@
import { AgentEvent } from '../types/runtime/event';
import type { AgentEventData, AgentEventHandler } from '../types/runtime/event';
/**
* Internal event bus for agent lifecycle events.
*
* Shared between Agent (public API) and AgentRuntime (emitter).
* Handlers registered via `on()` are called synchronously when
* `emit()` is invoked from the agentic loop.
*
* Cancellation uses a standard `AbortController`. The signal is passed
* directly to the AI SDK's `generateText` / `streamText` calls so that
* in-flight HTTP requests are cancelled immediately when `abort()` is called,
* rather than waiting for the current LLM call to finish.
*
* A new controller is created for each run via `resetAbort()` so the same
* agent instance can be reused after cancellation.
*/
export class AgentEventBus {
private handlers = new Map<AgentEvent, Set<AgentEventHandler>>();
private controller = new AbortController();
private externalCleanup?: () => void;
on(event: AgentEvent, handler: AgentEventHandler): void {
let set = this.handlers.get(event);
if (!set) {
set = new Set();
this.handlers.set(event, set);
}
set.add(handler);
}
emit(data: AgentEventData): void {
const set = this.handlers.get(data.type);
if (!set) return;
for (const handler of set) {
handler(data);
}
}
abort(): void {
this.controller.abort();
}
/**
* Replace the AbortController with a fresh one.
* Called at the start of each generate() / stream() so the agent
* can be reused after a previous cancellation.
*
* When an external signal is provided, its abort is forwarded to the
* internal controller so that either `abort()` or the external signal
* can cancel the current run.
*/
resetAbort(externalSignal?: AbortSignal): void {
this.externalCleanup?.();
this.externalCleanup = undefined;
this.controller = new AbortController();
if (externalSignal) {
if (externalSignal.aborted) {
this.controller.abort(externalSignal.reason);
} else {
const onAbort = () => this.controller.abort(externalSignal.reason);
externalSignal.addEventListener('abort', onAbort, { once: true });
this.externalCleanup = () => externalSignal.removeEventListener('abort', onAbort);
}
}
}
/** The AbortSignal for the current run. Pass to generateText / streamText. */
get signal(): AbortSignal {
return this.controller.signal;
}
get isAborted(): boolean {
return this.controller.signal.aborted;
}
}
export { AgentEvent };

View file

@ -0,0 +1,45 @@
/**
* Filtered logger that suppresses known noisy warnings from the runtime.
* All other messages are forwarded to console.
*/
const SUPPRESSED_PATTERNS = [
'No memory is configured but resourceId and threadId were passed in args',
];
function isSuppressed(message: string): boolean {
return SUPPRESSED_PATTERNS.some((pattern) => message.includes(pattern));
}
/**
* Creates a logger that drops messages matching known suppressed patterns
* and forwards everything else to console.
*/
export function createFilteredLogger() {
return {
debug(message: string, ...args: unknown[]) {
if (!isSuppressed(message)) console.debug(message, ...args);
},
info(message: string, ...args: unknown[]) {
if (!isSuppressed(message)) console.info(message, ...args);
},
warn(message: string, ...args: unknown[]) {
if (!isSuppressed(message)) console.warn(message, ...args);
},
error(message: string, ...args: unknown[]) {
if (!isSuppressed(message)) console.error(message, ...args);
},
trackException() {},
getTransports() {
return new Map();
},
// eslint-disable-next-line @typescript-eslint/require-await
async listLogs() {
return { logs: [] as unknown[], total: 0, page: 1, perPage: 100, hasMore: false };
},
// eslint-disable-next-line @typescript-eslint/require-await
async listLogsByRunId() {
return { logs: [] as unknown[], total: 0, page: 1, perPage: 100, hasMore: false };
},
};
}

View file

@ -0,0 +1,178 @@
/** Don't remove the .js extensions. That's how the @modelcontextprotocol/sdk is packaged. */
import { Client } from '@modelcontextprotocol/sdk/client/index.js';
import { SSEClientTransport } from '@modelcontextprotocol/sdk/client/sse.js';
import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js';
import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js';
import { CallToolResultSchema, type CallToolResult } from '@modelcontextprotocol/sdk/types.js';
import { McpToolResolver } from './mcp-tool-resolver';
import { wrapToolForApproval } from '../sdk/tool';
import type { McpServerConfig } from '../types/sdk/mcp';
import type { BuiltTool } from '../types/sdk/tool';
/** The raw result returned by an MCP tool call. */
export type McpCallToolResult = CallToolResult;
/** Wraps a single MCP SDK Client instance for one server. Not publicly exported. */
export class McpConnection {
private client: Client;
private config: McpServerConfig;
private readonly shouldRequireToolApproval: boolean;
private connectionPromise: Promise<void> | undefined = undefined;
private disconnectPromise: Promise<void> | undefined = undefined;
private closed = false;
constructor(config: McpServerConfig, requireToolApproval = false) {
this.config = config;
this.shouldRequireToolApproval = requireToolApproval;
this.client = new Client({ name: '@n8n/agents', version: '0.1.0' }, { capabilities: {} });
}
async connect(): Promise<void> {
if (this.connectionPromise !== undefined) {
return await this.connectionPromise;
}
this.connectionPromise = this.connectWithTransport(this.createTransport(this.config));
try {
await this.connectionPromise;
} catch (error) {
this.connectionPromise = undefined;
throw error;
}
}
private async connectWithTransport(
transport: SSEClientTransport | StreamableHTTPClientTransport | StdioClientTransport,
): Promise<void> {
const timeoutMs = this.config.connectionTimeoutMs;
if (timeoutMs === undefined) {
await this.client.connect(transport);
return;
}
if (!Number.isFinite(timeoutMs) || timeoutMs <= 0) {
throw new Error(
`MCP server "${this.config.name}": connectionTimeoutMs must be a positive finite number`,
);
}
let timeoutId: ReturnType<typeof setTimeout> | undefined;
try {
await Promise.race([
this.client.connect(transport),
new Promise<never>((_, reject) => {
timeoutId = setTimeout(() => {
reject(
new Error(
`MCP server "${this.config.name}": connection timed out after ${timeoutMs}ms`,
),
);
}, timeoutMs);
}),
]);
} catch (error) {
await this.client.close().catch(() => {});
throw error;
} finally {
if (timeoutId !== undefined) clearTimeout(timeoutId);
}
}
/** List tools from the server, resolving them into BuiltTool instances with prefixed names. */
async listTools(): Promise<BuiltTool[]> {
const result = await this.client.listTools();
const resolver = new McpToolResolver();
const tools = resolver.resolve(this, result.tools);
return tools.map((t) =>
t.suspendSchema || !this.needsApproval(t)
? t
: wrapToolForApproval(t, { requireApproval: true }),
);
}
/**
* Returns true when a resolved tool should be wrapped with an approval gate.
*
* A tool needs approval when either:
* - the global `shouldRequireToolApproval` flag (set via Agent.requireToolApproval()) is true, OR
* - `config.requireApproval` is `true` (all tools on this server), OR
* - `config.requireApproval` is a string array that includes the tool's original (un-prefixed) name.
*/
private needsApproval(tool: BuiltTool): boolean {
if (this.shouldRequireToolApproval) return true;
const { requireApproval } = this.config;
if (requireApproval === true) return true;
if (Array.isArray(requireApproval) && requireApproval.length > 0) {
const prefix = `${this.config.name}_`;
const originalName = tool.name.startsWith(prefix)
? tool.name.slice(prefix.length)
: tool.name;
return requireApproval.includes(originalName);
}
return false;
}
async callTool(name: string, args: Record<string, unknown>): Promise<McpCallToolResult> {
const result = await this.client.callTool({ name, arguments: args }, CallToolResultSchema);
return result as McpCallToolResult;
}
async disconnect(): Promise<void> {
if (this.disconnectPromise) return await this.disconnectPromise;
const promise = this.doDisconnect();
this.disconnectPromise = promise;
return await promise.finally(() => {
if (this.disconnectPromise === promise) this.disconnectPromise = undefined;
});
}
private async doDisconnect(): Promise<void> {
if (this.closed) return;
await this.client.close();
this.connectionPromise = undefined;
this.closed = true;
}
get name(): string {
return this.config.name;
}
/**
* Returns true when this server's config declares per-server approval requirements
* without requiring a network connection.
*/
declaresApproval(): boolean {
const { requireApproval } = this.config;
return (
requireApproval === true || (Array.isArray(requireApproval) && requireApproval.length > 0)
);
}
private createTransport(
config: McpServerConfig,
): SSEClientTransport | StreamableHTTPClientTransport | StdioClientTransport {
if (config.command) {
return new StdioClientTransport({
command: config.command,
args: config.args,
env: config.env,
});
} else if (config.url) {
const url = new URL(config.url);
const requestInit: RequestInit | undefined = config.headers
? { headers: config.headers }
: undefined;
if (config.transport === 'streamableHttp') {
return new StreamableHTTPClientTransport(url, { requestInit });
}
return new SSEClientTransport(url, { requestInit });
}
throw new Error(`MCP server "${config.name}": provide either "url" or "command"`);
}
}

View file

@ -0,0 +1,92 @@
import type { Tool } from '@modelcontextprotocol/sdk/types.js';
import type { JSONSchema7 } from 'json-schema';
import type { McpCallToolResult, McpConnection } from './mcp-connection';
import type { AgentMessage, ContentFile, ContentText } from '../types/sdk/message';
import type { BuiltTool, InterruptibleToolContext, ToolContext } from '../types/sdk/tool';
type McpContentBlock = McpCallToolResult['content'][number];
/**
* Convert raw MCP tool definitions into BuiltTool instances.
* Tool names are prefixed with the server name to prevent collisions.
* Not publicly exported.
*/
export class McpToolResolver {
resolve(connection: McpConnection, tools: Tool[]): BuiltTool[] {
return tools.map((tool) => this.resolveTool(connection, tool));
}
private resolveTool(connection: McpConnection, tool: Tool): BuiltTool {
const prefixedName = `${connection.name}_${tool.name}`;
const originalName = tool.name;
const handler = async (
input: unknown,
_ctx: ToolContext | InterruptibleToolContext,
): Promise<unknown> => {
const args = (input ?? {}) as Record<string, unknown>;
return await connection.callTool(originalName, args);
};
const toMessage = (output: unknown): AgentMessage | undefined => {
return buildRichMessage(output as McpCallToolResult);
};
const builtTool: BuiltTool = {
name: prefixedName,
description: tool.description ?? '',
inputSchema: tool.inputSchema as JSONSchema7,
handler,
toMessage,
mcpTool: true,
mcpServerName: connection.name,
};
return builtTool;
}
}
/**
* Convert an MCP CallToolResult into a rich AgentMessage containing text and image content parts.
* Returns undefined if the result contains only text (the tool-result JSON is sufficient for the LLM).
* Returns an assistant Message with ContentFile parts for image blocks so multimodal models can process them.
*/
function buildRichMessage(result: McpCallToolResult): AgentMessage | undefined {
if (!result?.content) return undefined;
const hasImages = result.content.some((block) => block.type === 'image');
if (!hasImages) return undefined;
const contentParts: Array<ContentText | ContentFile> = [];
for (const block of result.content) {
const part = blockToContentPart(block);
if (part) contentParts.push(part);
}
if (contentParts.length === 0) return undefined;
return { role: 'assistant', content: contentParts };
}
function blockToContentPart(block: McpContentBlock): ContentText | ContentFile | undefined {
if (block.type === 'text' && block.text) {
return { type: 'text', text: block.text };
}
if (block.type === 'image' && block.data) {
return {
type: 'file',
data: block.data,
mediaType: block.mimeType ?? 'image/png',
};
}
if (block.type === 'resource' && block.resource) {
const text = 'text' in block.resource ? block.resource.text : block.resource.uri;
return { type: 'text', text };
}
return undefined;
}

View file

@ -0,0 +1,122 @@
import { toDbMessage } from '../sdk/message';
import type { BuiltMemory, Thread } from '../types';
import type { AgentDbMessage, AgentMessage } from '../types/sdk/message';
interface StoredMessage {
message: AgentDbMessage;
createdAt: Date;
}
/**
* In-memory implementation of BuiltMemory.
* All data is lost on process restart suitable for development and testing.
*
* Thread context for `saveMessages` is established by calling `saveThread` first.
* The most recently saved thread is used when `saveMessages` is called.
*/
export class InMemoryMemory implements BuiltMemory {
private threads = new Map<string, Thread>();
private messagesByThread = new Map<string, StoredMessage[]>();
private workingMemoryByKey = new Map<string, string>();
// eslint-disable-next-line @typescript-eslint/require-await
async getWorkingMemory(params: { threadId: string; resourceId?: string }): Promise<
string | null
> {
return this.workingMemoryByKey.get(params.resourceId ?? params.threadId) ?? null;
}
// eslint-disable-next-line @typescript-eslint/require-await
async saveWorkingMemory(
params: { threadId: string; resourceId?: string },
content: string,
): Promise<void> {
this.workingMemoryByKey.set(params.resourceId ?? params.threadId, content);
}
// eslint-disable-next-line @typescript-eslint/require-await
async getThread(threadId: string): Promise<Thread | null> {
return this.threads.get(threadId) ?? null;
}
// eslint-disable-next-line @typescript-eslint/require-await
async saveThread(thread: Omit<Thread, 'createdAt' | 'updatedAt'>): Promise<Thread> {
const existing = this.threads.get(thread.id);
const now = new Date();
const saved: Thread = {
...thread,
title: thread.title ?? existing?.title,
metadata: thread.metadata ?? existing?.metadata,
createdAt: existing?.createdAt ?? now,
updatedAt: now,
};
this.threads.set(thread.id, saved);
return saved;
}
// eslint-disable-next-line @typescript-eslint/require-await
async deleteThread(threadId: string): Promise<void> {
this.threads.delete(threadId);
this.messagesByThread.delete(threadId);
}
// eslint-disable-next-line @typescript-eslint/require-await
async getMessages(
threadId: string,
opts?: { limit?: number; before?: Date },
): Promise<AgentDbMessage[]> {
let stored = this.messagesByThread.get(threadId) ?? [];
if (opts?.before) {
const cutoff = opts.before.getTime();
stored = stored.filter((s) => s.createdAt.getTime() < cutoff);
}
if (opts?.limit) stored = stored.slice(-opts.limit);
return stored.map((s) => s.message);
}
/**
* Save messages to the thread established by the most recent `saveThread` call.
* Always call `saveThread` before `saveMessages` to set the thread context.
*/
// eslint-disable-next-line @typescript-eslint/require-await
async saveMessages(args: {
threadId: string;
resourceId?: string;
messages: AgentMessage[];
}): Promise<void> {
const existing = this.messagesByThread.get(args.threadId) ?? [];
const now = new Date();
for (const msg of args.messages) {
existing.push({ message: toDbMessage(msg), createdAt: now });
}
this.messagesByThread.set(args.threadId, existing);
}
// eslint-disable-next-line @typescript-eslint/require-await
async deleteMessages(messageIds: string[]): Promise<void> {
const idSet = new Set(messageIds);
for (const [threadId, messages] of this.messagesByThread.entries()) {
this.messagesByThread.set(
threadId,
messages.filter((s) => !idSet.has(s.message.id)),
);
}
}
}
/**
* Save messages to a specific thread, ensuring the thread exists first.
* Always call this instead of `memory.saveMessages()` directly, as it
* establishes the thread context required by implementations like InMemoryMemory.
*/
export async function saveMessagesToThread(
memory: BuiltMemory,
threadId: string,
resourceId: string,
messages: AgentMessage[],
): Promise<void> {
await memory.saveThread({ id: threadId, resourceId });
await memory.saveMessages({ threadId, resourceId, messages });
}

View file

@ -0,0 +1,128 @@
import type { ProviderOptions } from '@ai-sdk/provider-utils';
import type { ModelMessage } from 'ai';
import { toAiMessages } from './messages';
import { stripOrphanedToolMessages } from './strip-orphaned-tool-messages';
import { buildWorkingMemoryInstruction } from './working-memory';
import { filterLlmMessages } from '../sdk/message';
import type { SerializedMessageList } from '../types/runtime/message-list';
import type { AgentDbMessage } from '../types/sdk/message';
export type { SerializedMessageList };
export interface WorkingMemoryContext {
template: string;
structured: boolean;
/** The current persisted state, or null if not yet loaded. Falls back to template. */
state: string | null;
}
/**
* Append-only message container with Set-based source tracking.
*
* Three named sources:
* history messages loaded from memory at the start of the turn.
* Never included in turnDelta(); already persisted.
* input the caller's raw input for this turn (custom messages preserved).
* response LLM replies, tool results, and custom tool messages from this turn.
*
* Serialization stores the flat message array plus the IDs of each set so
* the full three-way source distinction survives a round-trip.
*/
export class AgentMessageList {
private all: AgentDbMessage[] = [];
private historySet = new Set<AgentDbMessage>();
private inputSet = new Set<AgentDbMessage>();
private responseSet = new Set<AgentDbMessage>();
/** Working memory context for this run. Set by buildMessageList / resume. */
workingMemory: WorkingMemoryContext | undefined;
addHistory(messages: AgentDbMessage[]): void {
for (const m of messages) {
this.all.push(m);
this.historySet.add(m);
}
}
addInput(messages: AgentDbMessage[]): void {
for (const m of messages) {
this.all.push(m);
this.inputSet.add(m);
}
}
addResponse(messages: AgentDbMessage[]): void {
for (const m of messages) {
this.all.push(m);
this.responseSet.add(m);
}
}
/**
* Full LLM context for a generateText / streamText call.
* Prepends the system prompt (with working memory appended if configured),
* strips custom messages via filterLlmMessages.
*/
forLlm(baseInstructions: string, instructionProviderOptions?: ProviderOptions): ModelMessage[] {
let systemPrompt = baseInstructions;
if (this.workingMemory) {
const wmInstruction = buildWorkingMemoryInstruction(
this.workingMemory.template,
this.workingMemory.structured,
);
const wmState = this.workingMemory.state ?? this.workingMemory.template;
systemPrompt +=
wmInstruction + '\n\nCurrent working memory state:\n```\n' + wmState + '\n```';
}
const systemMessage: ModelMessage = instructionProviderOptions
? { role: 'system', content: systemPrompt, providerOptions: instructionProviderOptions }
: { role: 'system', content: systemPrompt };
return [systemMessage, ...toAiMessages(filterLlmMessages(stripOrphanedToolMessages(this.all)))];
}
/**
* Current-turn delta for memory persistence (input + responses).
* Non-destructive safe to call multiple times (e.g. on retry).
*/
turnDelta(): AgentDbMessage[] {
return this.all.filter((m) => this.inputSet.has(m) || this.responseSet.has(m));
}
/**
* Only the LLM-produced messages from this turn (responses + tool results).
* Used for GenerateResult.messages callers should not see their own input echoed back.
*/
responseDelta(): AgentDbMessage[] {
return this.all.filter((m) => this.responseSet.has(m));
}
serialize(): SerializedMessageList {
const toIds = (set: Set<AgentDbMessage>) => Array.from(set).map((m) => m.id);
return {
messages: [...this.all],
historyIds: toIds(this.historySet),
inputIds: toIds(this.inputSet),
responseIds: toIds(this.responseSet),
};
}
static deserialize(data: SerializedMessageList): AgentMessageList {
const list = new AgentMessageList();
const historyIdSet = new Set(data.historyIds);
const inputIdSet = new Set(data.inputIds);
const responseIdSet = new Set(data.responseIds);
for (const m of data.messages) {
list.all.push(m);
if (historyIdSet.has(m.id)) list.historySet.add(m);
if (inputIdSet.has(m.id)) list.inputSet.add(m);
if (responseIdSet.has(m.id)) list.responseSet.add(m);
}
return list;
}
}

View file

@ -0,0 +1,299 @@
import type {
FilePart,
ModelMessage,
TextPart,
ToolCallPart,
ToolResultPart,
ImagePart,
ToolApprovalRequest,
ToolApprovalResponse,
FinishReason as AiFinishReason,
} from 'ai';
import { toDbMessage } from '../sdk/message';
import type { FinishReason } from '../types';
import type {
AgentDbMessage,
AgentMessage,
ContentFile,
ContentReasoning,
ContentText,
ContentToolCall,
ContentToolResult,
Message,
MessageContent,
} from '../types/sdk/message';
import type { JSONValue } from '../types/utils/json';
/** Reasoning content part — mirrors @ai-sdk/provider-utils ReasoningPart (not re-exported by 'ai'). */
type ReasoningPart = { type: 'reasoning'; text: string };
type AiContentPart =
| TextPart
| FilePart
| ImagePart
| ReasoningPart
| ToolCallPart
| ToolResultPart
| ToolApprovalRequest
| ToolApprovalResponse;
// --- Type guards for MessageContent blocks ---
function isText(block: MessageContent): block is ContentText {
return block.type === 'text';
}
function isReasoning(block: MessageContent): block is ContentReasoning {
return block.type === 'reasoning';
}
function isFile(block: MessageContent): block is ContentFile {
return block.type === 'file';
}
function isToolCall(block: MessageContent): block is ContentToolCall {
return block.type === 'tool-call';
}
function isToolResult(block: MessageContent): block is ContentToolResult {
return block.type === 'tool-result';
}
/**
* Parse a JSONValue that may be a stringified JSON object back into
* its parsed form. Non-string values pass through unchanged.
*/
function parseJsonValue(value: JSONValue): unknown {
if (typeof value === 'string') {
try {
return JSON.parse(value);
} catch {
return value;
}
}
return value;
}
/** Convert a single n8n MessageContent block to an AI SDK content part. */
function toAiContent(block: MessageContent): AiContentPart | undefined {
let base: AiContentPart | undefined;
if (isText(block)) {
base = { type: 'text', text: block.text };
} else if (isFile(block)) {
base = {
type: 'file',
data: block.data,
mediaType: block.mediaType ?? 'application/octet-stream',
};
} else if (isToolCall(block)) {
base = {
type: 'tool-call',
toolCallId: block.toolCallId ?? '',
toolName: block.toolName,
input: parseJsonValue(block.input),
providerExecuted: block.providerExecuted,
};
}
if (isToolResult(block)) {
if (block.isError) {
if (typeof block.result === 'string') {
base = {
type: 'tool-result',
toolCallId: block.toolCallId,
toolName: block.toolName,
output: { type: 'error-text', value: block.result },
};
} else {
base = {
type: 'tool-result',
toolCallId: block.toolCallId,
toolName: block.toolName,
output: { type: 'error-json', value: block.result },
};
}
} else {
base = {
type: 'tool-result',
toolCallId: block.toolCallId,
toolName: block.toolName,
output: { type: 'json', value: block.result },
};
}
} else if (isReasoning(block)) {
base = { type: 'reasoning', text: block.text };
}
if (base && block.providerOptions) {
return { ...base, providerOptions: block.providerOptions } as AiContentPart;
}
return base;
}
/** Convert a single AI SDK content part to an n8n MessageContent block. */
function fromAiContent(part: AiContentPart): MessageContent | undefined {
const providerOptions = 'providerOptions' in part ? part.providerOptions : undefined;
let base: MessageContent | undefined;
switch (part.type) {
case 'text':
base = { type: 'text', text: part.text };
break;
case 'file': {
const data =
part.data instanceof URL ? part.data.toString() : (part.data as ContentFile['data']);
base = { type: 'file', data, mediaType: part.mediaType };
break;
}
case 'image': {
const data =
part.image instanceof URL ? part.image.toString() : (part.image as ContentFile['data']);
base = { type: 'file', data, mediaType: part.mediaType };
break;
}
case 'reasoning':
base = { type: 'reasoning', text: part.text };
break;
case 'tool-call':
base = {
type: 'tool-call',
toolCallId: part.toolCallId,
toolName: part.toolName,
input: part.input as JSONValue,
providerExecuted: part.providerExecuted,
};
break;
case 'tool-result': {
const { output } = part;
let result: JSONValue;
let isError: boolean | undefined;
if (output.type === 'json') {
result = output.value;
} else if (output.type === 'text') {
result = output.value;
} else if (output.type === 'error-json') {
result = output.value;
isError = true;
} else if (output.type === 'error-text') {
result = output.value;
isError = true;
} else {
result = null;
isError = true;
}
base = {
type: 'tool-result',
toolCallId: part.toolCallId,
toolName: part.toolName,
result,
isError,
};
break;
}
// Ignore these types, because HITL is handled by our runtime
case 'tool-approval-request':
case 'tool-approval-response':
default:
return undefined;
}
if (base && providerOptions) {
return { ...base, providerOptions };
}
return base;
}
/** Convert a single n8n Message to an AI SDK ModelMessage. */
export function toAiMessage(msg: Message): ModelMessage {
let base: ModelMessage;
switch (msg.role) {
case 'system': {
const text = msg.content
.filter(isText)
.map((b) => b.text)
.join('');
base = { role: 'system', content: text };
break;
}
case 'user': {
const parts = msg.content
.map(toAiContent)
.filter((p): p is TextPart | FilePart => p?.type === 'text' || p?.type === 'file');
base = { role: 'user', content: parts };
break;
}
case 'assistant': {
const parts = msg.content
.map(toAiContent)
.filter(
(p): p is TextPart | ReasoningPart | ToolCallPart | ToolResultPart | FilePart =>
p?.type === 'text' ||
p?.type === 'reasoning' ||
p?.type === 'tool-call' ||
p?.type === 'tool-result' ||
p?.type === 'file',
);
base = { role: 'assistant', content: parts };
break;
}
case 'tool': {
const parts = msg.content
.map(toAiContent)
.filter((p): p is ToolResultPart => p?.type === 'tool-result');
base = { role: 'tool', content: parts };
break;
}
default:
throw new Error(`Unknown role: ${msg.role as string}`);
}
if (msg.providerOptions) {
return { ...base, providerOptions: msg.providerOptions };
}
return base;
}
/** Convert n8n Messages to AI SDK ModelMessages for passing to stream/generateText. */
export function toAiMessages(messages: Message[]): ModelMessage[] {
return messages.map(toAiMessage);
}
/** Convert a single AI SDK ModelMessage to an n8n AgentDbMessage (with a generated id). */
export function fromAiMessage(msg: ModelMessage): AgentDbMessage {
const rawContent = msg.content;
const content: MessageContent[] =
typeof rawContent === 'string'
? [{ type: 'text', text: rawContent }]
: rawContent.map(fromAiContent).filter((p): p is MessageContent => p !== undefined);
const message: AgentMessage = { role: msg.role, content };
if ('providerOptions' in msg && msg.providerOptions) {
message.providerOptions = msg.providerOptions;
}
return toDbMessage(message);
}
/** Convert AI SDK ModelMessages to n8n AgentDbMessages (each with a generated id). */
export function fromAiMessages(messages: ModelMessage[]): AgentDbMessage[] {
return messages.map(fromAiMessage);
}
export function fromAiFinishReason(reason: AiFinishReason): FinishReason {
switch (reason) {
case 'stop':
return 'stop';
case 'length':
return 'length';
case 'content-filter':
return 'content-filter';
case 'tool-calls':
return 'tool-calls';
case 'error':
return 'error';
case 'other':
return 'other';
}
}

View file

@ -0,0 +1,116 @@
/* eslint-disable @typescript-eslint/no-require-imports */
import type { EmbeddingModel, LanguageModel } from 'ai';
import type { ModelConfig } from '../types/sdk/agent';
type CreateProviderFn = (opts?: {
apiKey?: string;
baseURL?: string;
}) => (model: string) => LanguageModel;
type CreateEmbeddingProviderFn = (opts?: { apiKey?: string }) => {
embeddingModel(model: string): EmbeddingModel;
};
function isLanguageModel(config: unknown): config is LanguageModel {
return typeof config === 'object' && config !== null && 'doGenerate' in config;
}
/**
* Provider packages are loaded dynamically via require() so only the
* provider needed at runtime must be installed.
*/
export function createModel(config: ModelConfig): LanguageModel {
if (isLanguageModel(config)) {
return config;
}
const stripEmpty = <T>(value: T | undefined): T | undefined => {
if (!value) return undefined;
if (typeof value === 'string' && value.trim() === '') return undefined;
return value;
};
const modelId = stripEmpty(typeof config === 'string' ? config : config.id);
const apiKey = stripEmpty(typeof config === 'string' ? undefined : config.apiKey);
const baseURL = stripEmpty(typeof config === 'string' ? undefined : config.url);
if (!modelId) {
throw new Error('Model ID is required');
}
const [provider, ...rest] = modelId.split('/');
const modelName = rest.join('/');
switch (provider) {
case 'anthropic': {
const { createAnthropic } = require('@ai-sdk/anthropic') as {
createAnthropic: CreateProviderFn;
};
return createAnthropic({ apiKey, baseURL })(modelName);
}
case 'openai': {
const { createOpenAI } = require('@ai-sdk/openai') as {
createOpenAI: CreateProviderFn;
};
return createOpenAI({ apiKey, baseURL })(modelName);
}
case 'google': {
const { createGoogleGenerativeAI } = require('@ai-sdk/google') as {
createGoogleGenerativeAI: CreateProviderFn;
};
return createGoogleGenerativeAI({ apiKey, baseURL })(modelName);
}
case 'xai': {
const { createXai } = require('@ai-sdk/xai') as {
createXai: CreateProviderFn;
};
return createXai({ apiKey, baseURL })(modelName);
}
default:
throw new Error(
`Unsupported provider: "${provider}". Supported: anthropic, openai, google, xai`,
);
}
}
/**
* Registry of embedding provider packages and their factory function names.
* Each AI SDK provider follows the same pattern:
* createProvider({ apiKey }).embeddingModel(modelName)
*
* To add a new provider, install its @ai-sdk/* package and add an entry here.
*/
const EMBEDDING_PROVIDERS = {
openai: { pkg: '@ai-sdk/openai', factory: 'createOpenAI' },
google: { pkg: '@ai-sdk/google', factory: 'createGoogleGenerativeAI' },
mistral: { pkg: '@ai-sdk/mistral', factory: 'createMistral' },
cohere: { pkg: '@ai-sdk/cohere', factory: 'createCohere' },
amazon: { pkg: '@ai-sdk/amazon-bedrock', factory: 'createAmazonBedrock' },
bedrock: { pkg: '@ai-sdk/amazon-bedrock', factory: 'createAmazonBedrock' },
} as const;
type EmbeddingProvider = keyof typeof EMBEDDING_PROVIDERS;
type EmbeddingModelId = `${EmbeddingProvider}/${string}`;
/**
* Create an embedding model from a "provider/model" string (e.g. "openai/text-embedding-3-small").
* Supports any AI SDK provider that exposes `.embeddingModel()`.
* The provider package must be installed at runtime.
*/
export function createEmbeddingModel(
embedderString: EmbeddingModelId | (string & {}),
apiKey?: string,
): EmbeddingModel {
const [provider, ...rest] = embedderString.split('/');
const modelName = rest.join('/');
const entry = EMBEDDING_PROVIDERS[provider as EmbeddingProvider];
if (!entry) {
const supported = Object.keys(EMBEDDING_PROVIDERS).join(', ');
throw new Error(`Unsupported embedding provider: "${provider}". Supported: ${supported}`);
}
const mod = require(entry.pkg) as Record<string, CreateEmbeddingProviderFn>;
const factory = mod[entry.factory];
return factory({ apiKey }).embeddingModel(modelName);
}

View file

@ -0,0 +1,68 @@
import type { CheckpointStore, SerializableAgentState } from '../types';
/**
* Default in-memory CheckpointStore implementation.
* Used when no external store is configured (storage: 'memory' or omitted).
*
* Note: Suspended runs that are never resumed accumulate indefinitely.
* For long-running processes a TTL-based eviction mechanism should be added
* to prevent unbounded memory growth.
*/
class MemoryCheckpointStore implements CheckpointStore {
private store = new Map<string, SerializableAgentState>();
async save(key: string, state: SerializableAgentState): Promise<void> {
await Promise.resolve(this.store.set(key, state));
}
async load(key: string): Promise<SerializableAgentState | undefined> {
return await Promise.resolve(this.store.get(key));
}
async delete(key: string): Promise<void> {
await Promise.resolve(this.store.delete(key));
}
}
/**
* Manages suspended agent run state for tool approval (HITL).
* Delegates all persistence to a CheckpointStore either the provided
* external store or the default MemoryCheckpointStore.
*/
export class RunStateManager {
private store: CheckpointStore;
constructor(storage?: 'memory' | CheckpointStore) {
this.store = storage && storage !== 'memory' ? storage : new MemoryCheckpointStore();
}
/** Save a suspended run state. */
async suspend(runId: string, state: SerializableAgentState): Promise<void> {
await this.store.save(runId, { ...state, status: 'suspended' });
}
/** Load a suspended run state for resumption and mark it running. Status is not updated in the store. */
async resume(runId: string): Promise<SerializableAgentState | undefined> {
const state = await this.store.load(runId);
if (!state) return undefined;
if (state.status !== 'suspended') {
throw new Error(`Run ${runId} is not suspended. Cannot resume.`);
}
const newState: SerializableAgentState = { ...state, status: 'running' };
return newState;
}
/** Delete a finished run from storage. Called when a resumed run completes without re-suspending. */
async complete(runId: string): Promise<void> {
try {
await this.store.delete(runId);
} catch (deleteError: unknown) {
console.error(`[RunStateManager] Failed to delete checkpoint ${runId}:`, deleteError);
}
}
}
/** Generate a unique run ID. */
export function generateRunId(): string {
return `run_${crypto.randomUUID()}`;
}

View file

@ -0,0 +1,145 @@
/**
* Pure utility functions used by AgentRuntime that require no class context.
* These are extracted here to keep agent-runtime.ts focused on orchestration logic.
*/
import { toDbMessage } from '../sdk/message';
import type { GenerateResult, StreamChunk, TokenUsage } from '../types';
import { toTokenUsage } from './stream';
import type { AgentDbMessage, AgentMessage, ContentToolResult } from '../types/sdk/message';
import type { JSONValue } from '../types/utils/json';
/** Normalize a string input to an AgentDbMessage array, assigning ids where missing. */
export function normalizeInput(input: AgentMessage[] | string): AgentDbMessage[] {
if (typeof input === 'string') {
return [toDbMessage({ role: 'user', content: [{ type: 'text', text: input }] })];
}
return input.map(toDbMessage);
}
/** Build an AI SDK tool ModelMessage for a tool execution result. */
export function makeToolResultMessage(
toolCallId: string,
toolName: string,
result: unknown,
): AgentDbMessage {
return toDbMessage({
role: 'tool',
content: [
{
type: 'tool-result',
toolCallId,
toolName,
result: result as JSONValue,
},
],
});
}
/**
* Build an AI SDK tool ModelMessage for a tool execution error.
* The LLM receives this as a tool result so it can self-correct on the next iteration.
* The error is surfaced via the output json value so the LLM can read and reason about it.
*/
export function makeErrorToolResultMessage(
toolCallId: string,
toolName: string,
error: unknown,
): AgentDbMessage {
const message = error instanceof Error ? `${error.name}: ${error.message}` : String(error);
return toDbMessage({
role: 'tool',
content: [
{
type: 'tool-result',
toolCallId,
toolName,
result: { error: message } as JSONValue,
isError: true,
},
],
});
}
/** Extract all tool-result content parts from a flat list of agent messages. */
export function extractToolResults(messages: AgentDbMessage[]): ContentToolResult[] {
return messages
.flatMap((m) => ('content' in m ? m.content : []))
.filter((c): c is ContentToolResult => c.type === 'tool-result');
}
/**
* Return a ReadableStream that immediately yields an error chunk followed by
* a finish chunk. Used when setup errors prevent the normal stream loop from
* starting, so callers always receive a well-formed stream.
*/
export function makeErrorStream(error: unknown): ReadableStream<StreamChunk> {
const { readable, writable } = new TransformStream<StreamChunk, StreamChunk>();
const writer = writable.getWriter();
writer.write({ type: 'error', error }).catch(() => {});
writer.write({ type: 'finish', finishReason: 'error' }).catch(() => {});
writer.close().catch(() => {});
return readable;
}
/** Accumulate token usage across two values, returning undefined if both are absent. */
export function mergeUsage(
current: TokenUsage | undefined,
next: TokenUsage | undefined,
): TokenUsage | undefined {
if (!next) return current;
if (!current) return next;
const merged: TokenUsage = {
promptTokens: current.promptTokens + next.promptTokens,
completionTokens: current.completionTokens + next.completionTokens,
totalTokens: current.totalTokens + next.totalTokens,
};
const cacheRead =
(current.inputTokenDetails?.cacheRead ?? 0) + (next.inputTokenDetails?.cacheRead ?? 0);
const cacheWrite =
(current.inputTokenDetails?.cacheWrite ?? 0) + (next.inputTokenDetails?.cacheWrite ?? 0);
if (cacheRead > 0 || cacheWrite > 0) {
merged.inputTokenDetails = {
...(cacheRead > 0 && { cacheRead }),
...(cacheWrite > 0 && { cacheWrite }),
};
}
const reasoning =
(current.outputTokenDetails?.reasoning ?? 0) + (next.outputTokenDetails?.reasoning ?? 0);
if (reasoning > 0) {
merged.outputTokenDetails = { reasoning };
}
return merged;
}
/**
* Accumulate token usage across loop iterations.
* Wraps mergeUsage + toTokenUsage to keep call sites concise.
*/
export function accumulateUsage(
current: TokenUsage | undefined,
raw:
| {
inputTokens?: number | undefined;
outputTokens?: number | undefined;
totalTokens?: number | undefined;
inputTokenDetails?: { cacheReadTokens?: number; cacheWriteTokens?: number };
outputTokenDetails?: { reasoningTokens?: number };
}
| undefined,
): TokenUsage | undefined {
if (!raw) return current;
return mergeUsage(current, toTokenUsage(raw));
}
/** Compute totalCost from sub-agent usage already present on the result. */
export function applySubAgentUsage(result: GenerateResult): GenerateResult {
if (!result.subAgentUsage || result.subAgentUsage.length === 0) return result;
const parentCost = result.usage?.cost ?? 0;
const subCost = result.subAgentUsage.reduce((sum, s) => sum + (s.usage.cost ?? 0), 0);
return { ...result, totalCost: parentCost + subCost };
}

View file

@ -0,0 +1,120 @@
import type { TextStreamPart, ToolSet } from 'ai';
import type { FinishReason, StreamChunk, TokenUsage } from '../types';
import type { JSONValue } from '../types/utils/json';
/** Map AI SDK v6 LanguageModelUsage to our TokenUsage type. */
export function toTokenUsage(
usage:
| {
inputTokens?: number;
outputTokens?: number;
totalTokens?: number;
inputTokenDetails?: { cacheReadTokens?: number; cacheWriteTokens?: number };
outputTokenDetails?: { reasoningTokens?: number };
}
| undefined,
): TokenUsage | undefined {
if (!usage) return undefined;
const result: TokenUsage = {
promptTokens: usage.inputTokens ?? 0,
completionTokens: usage.outputTokens ?? 0,
totalTokens: usage.totalTokens ?? (usage.inputTokens ?? 0) + (usage.outputTokens ?? 0),
};
const cacheRead = usage.inputTokenDetails?.cacheReadTokens;
const cacheWrite = usage.inputTokenDetails?.cacheWriteTokens;
if (cacheRead || cacheWrite) {
result.inputTokenDetails = {
...(cacheRead && { cacheRead }),
...(cacheWrite && { cacheWrite }),
};
}
if (usage.outputTokenDetails?.reasoningTokens !== undefined) {
result.outputTokenDetails = { reasoning: usage.outputTokenDetails.reasoningTokens };
}
return result;
}
/** Convert a single AI SDK v6 fullStream chunk to an n8n StreamChunk (or undefined to skip). */
export function convertChunk(c: TextStreamPart<ToolSet>): StreamChunk | undefined {
switch (c.type) {
case 'text-delta':
return { type: 'text-delta', delta: c.text ?? '' };
case 'reasoning-delta':
return { type: 'reasoning-delta', delta: c.text ?? '' };
case 'tool-call':
return {
type: 'message',
message: {
role: 'tool',
content: [
{
type: 'tool-call',
toolCallId: c.toolCallId,
toolName: c.toolName ?? '',
input: c.input as JSONValue,
},
],
},
};
case 'tool-input-start':
return {
type: 'tool-call-delta',
name: c.toolName,
};
case 'tool-input-delta':
return {
type: 'tool-call-delta',
...(c.delta !== undefined && { argumentsDelta: c.delta }),
};
case 'tool-result':
return {
type: 'message',
message: {
role: 'tool',
content: [
{
type: 'tool-result',
toolCallId: c.toolCallId ?? '',
toolName: c.toolName ?? '',
// eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
result: c.output && 'value' in c.output ? (c.output.value as JSONValue) : null,
},
],
},
};
case 'error':
return { type: 'error', error: c.error };
case 'finish-step': {
const usage = toTokenUsage(c.usage);
return {
type: 'finish',
finishReason: (c.finishReason ?? 'stop') as FinishReason,
...(usage && { usage }),
};
}
case 'finish': {
const usage = toTokenUsage(c.totalUsage);
return {
type: 'finish',
finishReason: (c.finishReason ?? 'stop') as FinishReason,
...(usage && { usage }),
};
}
default:
return undefined;
}
}

View file

@ -0,0 +1,70 @@
import { isLlmMessage } from '../sdk/message';
import type { AgentDbMessage, MessageContent } from '../types/sdk/message';
/**
* Strip orphaned tool-call and tool-result content from a message list.
*
* When memory loads the last N messages, the window boundary can split
* tool-call / tool-result pairs, leaving one side without its counterpart.
* Sending these orphans to the LLM causes provider errors because tool
* calls and results must always be paired.
*
* This function:
* 1. Collects all toolCallIds present in tool-call and tool-result blocks.
* 2. Identifies orphans calls without a matching result and vice-versa.
* 3. Strips orphaned content blocks from their messages.
* 4. Drops messages that become empty after stripping (e.g. a tool message
* whose only content was the orphaned result).
* 5. Preserves non-tool content (text, reasoning, files) in mixed messages.
*/
export function stripOrphanedToolMessages(messages: AgentDbMessage[]): AgentDbMessage[] {
const callIds = new Set<string>();
const resultIds = new Set<string>();
for (const msg of messages) {
if (!isLlmMessage(msg)) continue;
for (const block of msg.content) {
if (block.type === 'tool-call' && block.toolCallId) {
callIds.add(block.toolCallId);
} else if (block.type === 'tool-result' && block.toolCallId) {
resultIds.add(block.toolCallId);
}
}
}
const orphanedCallIds = new Set([...callIds].filter((id) => !resultIds.has(id)));
const orphanedResultIds = new Set([...resultIds].filter((id) => !callIds.has(id)));
if (orphanedCallIds.size === 0 && orphanedResultIds.size === 0) {
return messages;
}
const result: AgentDbMessage[] = [];
for (const msg of messages) {
if (!isLlmMessage(msg)) {
result.push(msg);
continue;
}
const filtered = msg.content.filter((block: MessageContent) => {
if (block.type === 'tool-call' && block.toolCallId && orphanedCallIds.has(block.toolCallId)) {
return false;
}
if (
block.type === 'tool-result' &&
block.toolCallId &&
orphanedResultIds.has(block.toolCallId)
) {
return false;
}
return true;
});
if (filtered.length === 0) continue;
result.push({ ...msg, content: filtered });
}
return result;
}

View file

@ -0,0 +1,77 @@
import { generateText } from 'ai';
import type { BuiltMemory, TitleGenerationConfig } from '../types';
import { createFilteredLogger } from './logger';
import { createModel } from './model-factory';
import type { ModelConfig } from '../types/sdk/agent';
import type { AgentDbMessage } from '../types/sdk/message';
const logger = createFilteredLogger();
const DEFAULT_TITLE_INSTRUCTIONS = [
'- you will generate a short title based on the first message a user begins a conversation with',
'- ensure it is not more than 80 characters long',
"- the title should be a summary of the user's message",
'- do not use quotes or colons',
'- the entire text you return will be used as the title',
].join('\n');
/**
* Generate a title for a thread if it doesn't already have one.
*
* Designed to run fire-and-forget after the agent response is complete.
* All errors are caught and logged title generation failures never
* block or break the agent response.
*/
export async function generateThreadTitle(opts: {
memory: BuiltMemory;
threadId: string;
resourceId: string;
titleConfig: TitleGenerationConfig;
/** The agent's own model, used as fallback when titleConfig.model is not set. */
agentModel: ModelConfig;
/** Messages from the current turn, used to find the first user message. */
turnDelta: AgentDbMessage[];
}): Promise<void> {
try {
const thread = await opts.memory.getThread(opts.threadId);
if (thread?.title) return;
const userMessage = opts.turnDelta.find((m) => 'role' in m && m.role === 'user');
if (!userMessage || !('content' in userMessage)) return;
const userText = (userMessage.content as Array<{ type: string; text?: string }>)
.filter((c) => c.type === 'text' && c.text)
.map((c) => c.text!)
.join(' ');
if (!userText) return;
const titleModelId = opts.titleConfig.model ?? opts.agentModel;
const titleModel = createModel(titleModelId);
const instructions = opts.titleConfig.instructions ?? DEFAULT_TITLE_INSTRUCTIONS;
const result = await generateText({
model: titleModel,
messages: [
{ role: 'system', content: instructions },
{ role: 'user', content: userText },
],
});
let title = result.text?.trim();
if (!title) return;
// Strip <think>...</think> blocks (e.g. from DeepSeek R1)
title = title.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
if (!title) return;
await opts.memory.saveThread({
id: opts.threadId,
resourceId: opts.resourceId,
title,
metadata: thread?.metadata,
});
} catch (error) {
logger.warn('Failed to generate thread title', { error });
}
}

View file

@ -0,0 +1,182 @@
import { tool, jsonSchema, type Tool as AiSdkTool } from 'ai';
import type { JSONSchema7 } from 'json-schema';
import { z } from 'zod';
import {
type BuiltProviderTool,
type BuiltTool,
type BuiltTelemetry,
type InterruptibleToolContext,
type ToolContext,
} from '../types';
import type { SubAgentUsage } from '../types/sdk/agent';
import { isZodSchema } from '../utils/zod';
type AiSdkProviderTool = AiSdkTool & {
type: 'provider';
};
/**
* Branded symbol used to tag the return value of `ctx.suspend(payload)`.
* The agent runtime checks for this brand on the tool's return value
* instead of catching a thrown error.
*/
const SUSPEND_BRAND = Symbol('SuspendBrand');
/**
* Branded symbol used to tag tool results from agent-as-tool calls.
* Carries sub-agent usage so the parent runtime can aggregate costs
* without any external state (WeakMap, mutable tool fields, etc.).
*/
const AGENT_TOOL_BRAND = Symbol('AgentToolBrand');
export interface SuspendedToolResult {
readonly [SUSPEND_BRAND]: true;
payload: unknown;
}
/** Type guard: returns true when a tool's return value is a suspend signal. */
export function isSuspendedToolResult(value: unknown): value is SuspendedToolResult {
return typeof value === 'object' && value !== null && SUSPEND_BRAND in value;
}
export interface AgentToolResult {
readonly [AGENT_TOOL_BRAND]: true;
/** The actual tool output (passed back to the LLM). */
readonly output: unknown;
/** Sub-agent usage entries to aggregate into the parent's result. */
readonly subAgentUsage: SubAgentUsage[];
}
/** Type guard: returns true when a tool result carries sub-agent usage. */
export function isAgentToolResult(value: unknown): value is AgentToolResult {
return typeof value === 'object' && value !== null && AGENT_TOOL_BRAND in value;
}
/**
* Create a branded agent-tool result that carries sub-agent usage alongside the output.
* The output properties are spread onto the object so it remains a valid tool output
* even when accessed directly (e.g. in tests). The runtime detects the brand via
* isAgentToolResult() and extracts the sub-agent usage.
* Typed as `never` so `return createAgentToolResult(...)` satisfies any handler return type
* (same pattern as ctx.suspend).
*/
export function createAgentToolResult(output: unknown, subAgentUsage: SubAgentUsage[]): never {
const base = typeof output === 'object' && output !== null ? output : {};
return { ...base, [AGENT_TOOL_BRAND]: true, output, subAgentUsage } as never;
}
/**
* Convert an array of BuiltProviderTools into a Record of AI SDK provider-defined tool objects.
* Provider tools are executed on the provider's infrastructure (e.g. Anthropic web search,
* OpenAI code interpreter) they are never executed locally by the agent loop.
*
* The cast to AiSdkTool is required because the AI SDK's ToolSet type demands `inputSchema`
* on every entry, but provider-defined tools have no input schema (the provider handles it).
* At runtime the AI SDK correctly recognises the `type: 'provider'` discriminant.
*/
export function toAiSdkProviderTools(tools?: BuiltProviderTool[]): Record<string, AiSdkTool> {
if (!tools || tools.length === 0) return {};
const result: Record<string, AiSdkTool> = {};
for (const t of tools) {
const providerTool: AiSdkProviderTool = {
type: 'provider',
id: t.name,
args: t.args,
inputSchema: t.inputSchema ?? z.any(),
};
result[t.name] = providerTool;
}
return result;
}
const fixSchema = (schema: JSONSchema7): JSONSchema7 => {
// Ensure 'type: object' is present when properties are present (required by some providers):
if (
typeof schema === 'object' &&
schema !== null &&
'properties' in schema &&
!('type' in schema)
) {
return { ...schema, type: 'object' as const };
}
return schema;
};
/**
* Convert an array of BuiltTools into a Record of AI SDK tool definitions.
* Tools are created WITHOUT execute the agent loop handles execution manually.
* Supports both Zod schemas (SDK-defined tools) and raw JSON Schema (MCP tools).
*/
export function toAiSdkTools(tools?: BuiltTool[]): Record<string, AiSdkTool> {
if (!tools || tools.length === 0) return {};
const result: Record<string, AiSdkTool> = {};
for (const t of tools) {
if (t.inputSchema) {
if (isZodSchema(t.inputSchema)) {
result[t.name] = tool({
description: t.description,
inputSchema: t.inputSchema,
providerOptions: t.providerOptions,
});
} else {
result[t.name] = tool({
description: t.description,
inputSchema: jsonSchema(fixSchema(t.inputSchema)),
providerOptions: t.providerOptions,
});
}
}
}
return result;
}
/**
* Execute a tool call by finding its handler and running it.
* For tools with suspend/resume schemas, passes an InterruptibleToolContext
* that lets the handler call `suspend(payload)`.
*/
export async function executeTool(
args: unknown,
builtTool: BuiltTool,
resumeData?: unknown,
parentTelemetry?: BuiltTelemetry,
): Promise<unknown> {
if (!builtTool.handler) {
throw new Error(`No handler found for tool "${builtTool.name}"`);
}
if (builtTool.suspendSchema) {
const ctx: InterruptibleToolContext = {
suspend: async (payload: unknown): Promise<never> => {
return await Promise.resolve({ [SUSPEND_BRAND]: true, payload } as never);
},
resumeData,
parentTelemetry,
};
return await builtTool.handler(args, ctx);
}
const ctx: ToolContext = { parentTelemetry };
return await builtTool.handler(args, ctx);
}
/**
* Check if a tool has suspend/resume schemas (i.e. is interruptible).
*/
export function isInterruptible(toolName: string, toolMap: Map<string, BuiltTool>): boolean {
const builtTool = toolMap.get(toolName);
return !!builtTool?.suspendSchema;
}
/** Build a Map from tool name to BuiltTool for quick lookups. */
export function buildToolMap(tools?: BuiltTool[]): Map<string, BuiltTool> {
const map = new Map<string, BuiltTool>();
if (tools) {
for (const t of tools) {
map.set(t.name, t);
}
}
return map;
}

View file

@ -0,0 +1,183 @@
import type { z } from 'zod';
import type { StreamChunk } from '../types';
import { createFilteredLogger } from './logger';
const logger = createFilteredLogger();
type ZodObjectSchema = z.ZodObject<z.ZodRawShape>;
const OPEN_TAG = '<working_memory>';
const CLOSE_TAG = '</working_memory>';
/**
* Extract working memory content from an LLM response.
* Returns the clean text (tags stripped) and the extracted working memory (or null).
*/
export function parseWorkingMemory(text: string): {
cleanText: string;
workingMemory: string | null;
} {
const openIdx = text.indexOf(OPEN_TAG);
if (openIdx === -1) return { cleanText: text, workingMemory: null };
const closeIdx = text.indexOf(CLOSE_TAG, openIdx);
if (closeIdx === -1) return { cleanText: text, workingMemory: null };
const contentStart = openIdx + OPEN_TAG.length;
const rawContent = text.slice(contentStart, closeIdx);
const workingMemory = rawContent.replace(/^\n/, '').replace(/\n$/, '');
const before = text.slice(0, openIdx).replace(/\n$/, '');
const after = text.slice(closeIdx + CLOSE_TAG.length).replace(/^\n/, '');
const cleanText = (before + (after ? '\n' + after : '')).trim();
return { cleanText, workingMemory };
}
/**
* Generate the system prompt instruction for working memory.
*/
export function buildWorkingMemoryInstruction(template: string, structured: boolean): string {
const format = structured
? 'Emit the updated state as valid JSON matching the schema'
: 'Update the template with any new information learned';
return [
'',
'## Working Memory',
'',
'You have persistent working memory that survives across conversations.',
'The current state will be shown to you in a system message.',
'IMPORTANT: Always respond to the user first with your normal reply.',
`Then, at the very end of your response, emit your updated working memory inside ${OPEN_TAG}...${CLOSE_TAG} tags on a new line.`,
`${format}. If nothing changed, emit the current state unchanged.`,
'The working memory block must be the last thing in your response, after your reply to the user.',
'',
'Current template:',
'```',
template,
'```',
].join('\n');
}
/**
* Convert a Zod object schema to a JSON template string for structured working memory.
*/
export function templateFromSchema(schema: ZodObjectSchema): string {
const obj: Record<string, string> = {};
for (const [key, field] of Object.entries(schema.shape)) {
const desc = field.description;
obj[key] = desc ?? '';
}
return JSON.stringify(obj, null, 2);
}
type PersistFn = (content: string) => Promise<void>;
/**
* Wraps a stream writer to intercept <working_memory> tags from text-delta chunks.
* All non-text-delta chunks pass through unchanged.
* Text inside the tags is buffered and persisted when the closing tag is detected.
*/
export class WorkingMemoryStreamFilter {
private writer: WritableStreamDefaultWriter<StreamChunk>;
private persist: PersistFn;
private state: 'normal' | 'inside' = 'normal';
private buffer = '';
private pendingText = '';
constructor(writer: WritableStreamDefaultWriter<StreamChunk>, persist: PersistFn) {
this.writer = writer;
this.persist = persist;
}
async write(chunk: StreamChunk): Promise<void> {
if (chunk.type !== 'text-delta') {
await this.writer.write(chunk);
return;
}
this.pendingText += chunk.delta;
while (this.pendingText.length > 0) {
if (this.state === 'normal') {
const openIdx = this.pendingText.indexOf(OPEN_TAG);
if (openIdx === -1) {
// No full open tag found. Check if the tail is a valid prefix of OPEN_TAG.
const lastLt = this.pendingText.lastIndexOf('<');
if (
lastLt !== -1 &&
this.pendingText.length - lastLt < OPEN_TAG.length &&
OPEN_TAG.startsWith(this.pendingText.slice(lastLt))
) {
// Potential partial tag at end — forward everything before it, hold the rest
if (lastLt > 0) {
await this.writer.write({
type: 'text-delta',
delta: this.pendingText.slice(0, lastLt),
});
}
this.pendingText = this.pendingText.slice(lastLt);
} else {
// No partial tag concern — forward everything
await this.writer.write({ type: 'text-delta', delta: this.pendingText });
this.pendingText = '';
}
break;
}
// Forward text before the tag
if (openIdx > 0) {
await this.writer.write({
type: 'text-delta',
delta: this.pendingText.slice(0, openIdx),
});
}
this.state = 'inside';
this.pendingText = this.pendingText.slice(openIdx + OPEN_TAG.length);
this.buffer = '';
} else {
// Inside tag — look for closing tag
const closeIdx = this.pendingText.indexOf(CLOSE_TAG);
if (closeIdx === -1) {
// Check if the tail is a valid prefix of CLOSE_TAG — hold it back
const lastLt = this.pendingText.lastIndexOf('<');
if (
lastLt !== -1 &&
this.pendingText.length - lastLt < CLOSE_TAG.length &&
CLOSE_TAG.startsWith(this.pendingText.slice(lastLt))
) {
this.buffer += this.pendingText.slice(0, lastLt);
this.pendingText = this.pendingText.slice(lastLt);
} else {
this.buffer += this.pendingText;
this.pendingText = '';
}
break;
}
this.buffer += this.pendingText.slice(0, closeIdx);
this.pendingText = this.pendingText.slice(closeIdx + CLOSE_TAG.length);
this.state = 'normal';
const content = this.buffer.replace(/^\n/, '').replace(/\n$/, '');
this.persist(content).catch((error: unknown) => {
logger.warn('Failed to persist working memory', { error });
});
this.buffer = '';
}
}
}
async flush(): Promise<void> {
if (this.state === 'normal' && this.pendingText.length > 0) {
await this.writer.write({ type: 'text-delta', delta: this.pendingText });
}
// Reset all state so the filter is clean for reuse after abort/completion.
this.pendingText = '';
this.buffer = '';
this.state = 'normal';
}
}

View file

@ -0,0 +1,676 @@
import type { ProviderOptions } from '@ai-sdk/provider-utils';
import { z } from 'zod';
import type { Eval } from './eval';
import type { McpClient } from './mcp-client';
import { Memory } from './memory';
import { Telemetry } from './telemetry';
import { Tool, wrapToolForApproval } from './tool';
import { AgentRuntime } from '../runtime/agent-runtime';
import { AgentEventBus } from '../runtime/event-bus';
import { createAgentToolResult } from '../runtime/tool-adapter';
import type {
AgentEvent,
AgentEventHandler,
AgentMiddleware,
BuiltAgent,
BuiltEval,
BuiltGuardrail,
BuiltMemory,
BuiltProviderTool,
BuiltTool,
BuiltTelemetry,
CheckpointStore,
ExecutionOptions,
GenerateResult,
MemoryConfig,
ModelConfig,
Provider,
RunOptions,
SerializableAgentState,
StreamResult,
SubAgentUsage,
ThinkingConfig,
ThinkingConfigFor,
ResumeOptions,
} from '../types';
import type { AgentMessage } from '../types/sdk/message';
import type { Workspace } from '../workspace/workspace';
const DEFAULT_LAST_MESSAGES = 10;
type ToolParameter = BuiltTool | { build(): BuiltTool };
/**
* Builder for creating AI agents with a fluent API.
*
* Usage:
* ```typescript
* const agent = new Agent('assistant')
* .model('anthropic', 'claude-sonnet-4') // typed: Agent<'anthropic'>
* .credential('anthropic')
* .instructions('You are a helpful assistant.')
* .tool(searchTool);
*
* const result = await agent.generate('Hello!');
* ```
*/
export class Agent implements BuiltAgent {
readonly name: string;
private modelId?: string;
private modelConfigObj?: ModelConfig;
private instructionProviderOpts?: ProviderOptions;
private instructionsText?: string;
private tools: BuiltTool[] = [];
private providerTools: BuiltProviderTool[] = [];
private memoryConfig?: MemoryConfig;
// TODO: Guardrails are accepted by the builder API for forward
// compatibility but not yet wired to the runtime.
private inputGuardrails: BuiltGuardrail[] = [];
private outputGuardrails: BuiltGuardrail[] = [];
private agentEvals: BuiltEval[] = [];
private outputSchema?: z.ZodType;
private checkpointStore?: 'memory' | CheckpointStore;
private thinkingConfig?: ThinkingConfig;
private credentialName?: string;
private resolvedKey?: string;
private runtime?: AgentRuntime;
private concurrencyValue?: number;
private telemetryBuilder?: Telemetry;
private telemetryConfig?: BuiltTelemetry;
private middlewares: AgentMiddleware[] = [];
private requireToolApprovalValue = false;
private mcpClients: McpClient[] = [];
private buildPromise: Promise<AgentRuntime> | undefined;
private eventBus = new AgentEventBus();
private workspaceInstance?: Workspace;
constructor(name: string) {
this.name = name;
}
/**
* Set the model with provider type information.
*
* @example
* ```typescript
* // Typed form — enables provider-specific config on .thinking() etc.
* agent.model('anthropic', 'claude-sonnet-4-5')
*
* // Untyped form — backwards compatible
* agent.model('anthropic/claude-sonnet-4-5')
* ```
*/
model(providerOrIdOrConfig: string | ModelConfig, modelName?: string): this {
if (typeof providerOrIdOrConfig === 'string') {
this.modelId = modelName ? `${providerOrIdOrConfig}/${modelName}` : providerOrIdOrConfig;
this.modelConfigObj = undefined;
} else {
this.modelConfigObj = providerOrIdOrConfig;
this.modelId = undefined;
}
return this;
}
/** Set the system instructions for the agent. Required before building. */
instructions(text: string, options?: { providerOptions?: ProviderOptions }): this {
this.instructionsText = text;
this.instructionProviderOpts = options?.providerOptions;
return this;
}
/** Add a tool to the agent's capabilities. Accepts a built tool or a Tool builder (which will be built automatically). Can also accept an array of tools. */
tool(t: ToolParameter | ToolParameter[]): this {
if (Array.isArray(t)) {
for (const tool of t) {
this.tool(tool);
}
return this;
}
const built = 'build' in t ? t.build() : t;
this.tools.push(built);
return this;
}
/** Add a provider-defined tool (e.g. Anthropic web search, OpenAI code interpreter). */
providerTool(builtProviderTool: BuiltProviderTool): this {
this.providerTools.push(builtProviderTool);
return this;
}
/** Set the memory configuration. Accepts a MemoryConfig, Memory builder, or bare BuiltMemory. */
memory(m: MemoryConfig | Memory | BuiltMemory): this {
if (m instanceof Memory) {
// Memory builder — call build()
this.memoryConfig = m.build();
} else if ('memory' in m && 'lastMessages' in m) {
// MemoryConfig — use directly
this.memoryConfig = m;
} else {
// Bare BuiltMemory — wrap in minimal config
this.memoryConfig = { memory: m, lastMessages: DEFAULT_LAST_MESSAGES };
}
return this;
}
/** Add a middleware. */
middleware(m: AgentMiddleware): this {
this.middlewares.push(m);
return this;
}
// TODO: guardrails can be a middleware internally
/** Add an input guardrail. Accepts a built guardrail or a Guardrail builder. */
inputGuardrail(g: BuiltGuardrail | { build(): BuiltGuardrail }): this {
this.inputGuardrails.push('_config' in g ? g : g.build());
return this;
}
/** Add an output guardrail. Accepts a built guardrail or a Guardrail builder. */
outputGuardrail(g: BuiltGuardrail | { build(): BuiltGuardrail }): this {
this.outputGuardrails.push('_config' in g ? g : g.build());
return this;
}
/** Add an eval to run after each agent response. Accepts an Eval builder or BuiltEval. */
eval(e: Eval | BuiltEval | { ensureBuilt(): BuiltEval }): this {
const built = '_run' in e ? e : (e as Eval).ensureBuilt();
this.agentEvals.push(built);
return this;
}
/**
* Set the checkpoint storage for tool suspend/resume (human-in-the-loop).
* Required when any tool uses `.suspend()` / `.resume()`.
*
* - `'memory'` in-process storage (lost on restart, fine for dev)
* - A storage provider instance (e.g. `new LibSQLStore(...)`, `new PgStore(...)`)
*
* @example
* ```typescript
* const agent = new Agent('assistant')
* .model('anthropic/claude-sonnet-4-5')
* .instructions('...')
* .tool(dangerousTool) // has .suspend() / .resume()
* .checkpoint('memory');
* ```
*/
checkpoint(storage: 'memory' | CheckpointStore): this {
this.checkpointStore = storage;
return this;
}
/**
* Declare a credential this agent requires. The execution engine resolves
* the credential name to an API key at build time and injects it into the
* model configuration user code never handles raw keys.
*
* @example
* ```typescript
* const agent = new Agent('assistant')
* .model('anthropic/claude-sonnet-4-5')
* .credential('anthropic')
* .instructions('You are helpful.');
* ```
*/
credential(name: string): this {
this.credentialName = name;
return this;
}
/** @internal Read the declared credential name (used by the execution engine). */
protected get declaredCredential(): string | undefined {
return this.credentialName;
}
/** @internal Set the resolved API key (called by the execution engine before super.build()). */
protected set resolvedApiKey(key: string) {
this.resolvedKey = key;
}
/**
* Set a structured output schema. When set, the agent's response will be
* parsed into a typed object matching the schema, available as `result.output`.
*
* @example
* ```typescript
* const agent = new Agent('extractor')
* .model('anthropic/claude-sonnet-4-5')
* .instructions('Extract structured data.')
* .structuredOutput(z.object({
* code: z.string(),
* explanation: z.string(),
* }));
*
* const result = await agent.generate('...');
* console.log(result.structuredOutput); // { code: '...', explanation: '...' }
* ```
*/
structuredOutput(schema: z.ZodType): this {
this.outputSchema = schema;
return this;
}
/**
* Enable extended thinking / reasoning for the agent.
* The config type is inferred from the provider set via `.model()`.
*
* @example
* ```typescript
* // Anthropic — budgetTokens
* new Agent('thinker')
* .model('anthropic', 'claude-sonnet-4-5')
* .thinking({ budgetTokens: 10000 })
*
* // OpenAI — reasoningEffort
* new Agent('thinker')
* .model('openai', 'o3-mini')
* .thinking({ reasoningEffort: 'high' })
* ```
*/
thinking<P extends Provider>(_provider: P, config?: ThinkingConfigFor<P>): this {
this.thinkingConfig = config ?? {};
return this;
}
/** Set telemetry configuration for this agent. Accepts a Telemetry builder or pre-built config. */
telemetry(t: Telemetry | BuiltTelemetry): this {
if (t instanceof Telemetry) {
this.telemetryBuilder = t;
this.telemetryConfig = undefined;
} else {
this.telemetryBuilder = undefined;
this.telemetryConfig = t;
}
return this;
}
/** @internal Read the declared telemetry builder (used by the execution engine to resolve credentials). */
protected get declaredTelemetry(): Telemetry | undefined {
return this.telemetryBuilder;
}
/**
* Set the number of tool calls to execute concurrently within a single LLM turn.
*
* - `1` (default) sequential execution, fully backward-compatible.
* - `Infinity` unlimited parallelism (all tool calls start at once).
* - Any number in between bounded concurrency (e.g. `5` = at most 5 tools run simultaneously).
*/
toolCallConcurrency(n: number): this {
if ((n !== Infinity && !Number.isInteger(n)) || n < 1) {
throw new Error('toolCallConcurrency must be a positive integer or Infinity');
}
this.concurrencyValue = n;
return this;
}
/**
* Require human approval before any tool executes.
* Tools that already have .suspend()/.resume() (suspendSchema) are skipped.
* Requires .checkpoint() to be set.
*/
requireToolApproval(): this {
this.requireToolApprovalValue = true;
return this;
}
/**
* Attach a workspace to this agent. Workspace tools and instructions
* are injected at build time.
*/
workspace(ws: Workspace): this {
this.workspaceInstance = ws;
return this;
}
/**
* Add an MCP client as a tool source for this agent.
* Tools from all servers in the client become available to the agent.
* Multiple clients can be added; tools are merged across all of them.
*
* @example
* ```typescript
* const client = new McpClient([
* { name: 'browser', url: 'http://localhost:9222/mcp', transport: 'streamableHttp' },
* { name: 'fs', command: 'npx', args: ['@anthropic/mcp-fs', '/tmp'] },
* ]);
*
* const agent = new Agent('assistant')
* .model('anthropic', 'claude-sonnet-4')
* .mcp(client)
* .instructions('You are a helpful assistant.');
* ```
*/
mcp(client: McpClient): this {
this.mcpClients.push(client);
return this;
}
/** Get the evals attached to this agent. */
get evaluations(): BuiltEval[] {
return [...this.agentEvals];
}
/**
* Register a handler for an agent lifecycle event.
* Handlers are called synchronously during the agentic loop.
*/
on(event: AgentEvent, handler: AgentEventHandler): void {
this.eventBus.on(event, handler);
}
/**
* Wrap this agent as a tool for use in multi-agent composition.
* The tool sends a text prompt to this agent and returns the text of the response.
*
* @example
* ```typescript
* const coordinatorAgent = new Agent('coordinator')
* .model('anthropic/claude-sonnet-4-5')
* .instructions('Route tasks to specialist agents.')
* .tool(writerAgent.asTool('Write content given a topic'));
* ```
*/
asTool(description: string): BuiltTool {
// eslint-disable-next-line @typescript-eslint/no-this-alias
const agent = this;
const tool = new Tool(this.name)
.description(description)
.input(
z.object({
input: z.string().describe('The input to send to the agent'),
}),
)
.output(
z.object({
result: z.string().describe('The result of the agent'),
}),
)
.handler(async (rawInput, ctx) => {
const { input } = rawInput as { input: string };
const result = await agent.generate(input, {
telemetry: ctx.parentTelemetry,
} as RunOptions & ExecutionOptions);
const text = result.messages
.filter((m) => 'role' in m && m.role === 'assistant')
.flatMap((m) => ('content' in m ? m.content : []))
.filter((c) => c.type === 'text')
.map((c) => ('text' in c ? c.text : ''))
.join('');
// Collect sub-agent usage: this agent's own + any nested sub-agents
const subAgentUsage: SubAgentUsage[] = [];
if (result.usage) {
subAgentUsage.push({ agent: agent.name, model: result.model, usage: result.usage });
}
if (result.subAgentUsage) {
subAgentUsage.push(...result.subAgentUsage);
}
// Return branded result — the runtime unwraps it to extract sub-agent usage.
// createAgentToolResult returns `never`, same pattern as ctx.suspend().
if (subAgentUsage.length > 0) {
return createAgentToolResult({ result: text }, subAgentUsage);
}
return { result: text };
});
return tool.build();
}
/** Return the latest state snapshot of the agent. Returns `{ status: 'idle' }` before first run. */
getState(): SerializableAgentState {
if (!this.runtime) {
return {
persistence: undefined,
status: 'idle',
messageList: { messages: [], historyIds: [], inputIds: [], responseIds: [] },
pendingToolCalls: {},
};
}
return this.runtime.getState();
}
/**
* Cancel the currently running agent.
* Synchronous sets an abort flag; the agentic loop checks it asynchronously.
*/
abort(): void {
this.eventBus.abort();
}
/** Generate a response (non-streaming). Lazy-builds on first call. */
async generate(
input: AgentMessage[] | string,
options?: RunOptions & ExecutionOptions,
): Promise<GenerateResult> {
const runtime = await this.ensureBuilt();
return await runtime.generate(this.toMessages(input), options);
}
/** Stream a response. Lazy-builds on first call. */
async stream(
input: AgentMessage[] | string,
options?: RunOptions & ExecutionOptions,
): Promise<StreamResult> {
const runtime = await this.ensureBuilt();
return await runtime.stream(this.toMessages(input), options);
}
/** Resume a suspended tool call with data. Lazy-builds on first call. */
async resume(
method: 'generate',
data: unknown,
options: ResumeOptions & ExecutionOptions,
): Promise<GenerateResult>;
async resume(
method: 'stream',
data: unknown,
options: ResumeOptions & ExecutionOptions,
): Promise<StreamResult>;
async resume(
method: 'generate' | 'stream',
data: unknown,
options: ResumeOptions & ExecutionOptions,
): Promise<GenerateResult | StreamResult> {
const runtime = await this.ensureBuilt();
if (method === 'generate') {
return await runtime.resume('generate', data, options);
}
return await runtime.resume('stream', data, options);
}
approve(method: 'generate', options: ResumeOptions & ExecutionOptions): Promise<GenerateResult>;
approve(method: 'stream', options: ResumeOptions & ExecutionOptions): Promise<StreamResult>;
async approve(
method: 'generate' | 'stream',
options: ResumeOptions & ExecutionOptions,
): Promise<GenerateResult | StreamResult> {
if (method === 'generate') {
return await this.resume('generate', { approved: true }, options);
}
return await this.resume('stream', { approved: true }, options);
}
deny(method: 'generate', options: ResumeOptions & ExecutionOptions): Promise<GenerateResult>;
deny(method: 'stream', options: ResumeOptions & ExecutionOptions): Promise<StreamResult>;
async deny(
method: 'generate' | 'stream',
options: ResumeOptions & ExecutionOptions,
): Promise<GenerateResult | StreamResult> {
if (method === 'generate') {
return await this.resume('generate', { approved: false }, options);
}
return await this.resume('stream', { approved: false }, options);
}
/**
* @internal Lazy-build the agent on first use. Stores the promise so
* concurrent callers share one build operation. On error the promise is
* cleared so the caller can retry.
*/
private async ensureBuilt(): Promise<AgentRuntime> {
if (!this.buildPromise) {
const p = this.build();
this.buildPromise = p;
p.catch(() => {
if (this.buildPromise === p) this.buildPromise = undefined;
});
}
return await this.buildPromise;
}
private toMessages(input: string | AgentMessage[]): AgentMessage[] {
if (Array.isArray(input)) return input;
return [{ role: 'user', content: [{ type: 'text', text: input }] }];
}
/** @internal Validate configuration and produce an AgentRuntime. Overridden by the execution engine. */
protected async build(): Promise<AgentRuntime> {
const hasModel = this.modelId ?? this.modelConfigObj;
if (!hasModel) {
throw new Error(`Agent "${this.name}" requires a model`);
}
if (!this.instructionsText) {
throw new Error(`Agent "${this.name}" requires instructions`);
}
const finalTools = [...this.tools];
if (this.workspaceInstance) {
const wsTools = this.workspaceInstance.getTools();
finalTools.push(...wsTools);
}
let finalStaticTools = finalTools;
if (this.requireToolApprovalValue) {
finalStaticTools = finalTools.map((t) =>
t.suspendSchema ? t : wrapToolForApproval(t, { requireApproval: true }),
);
}
// Validate checkpoint requirement from static tools and known MCP approval config
// before attempting any network connections (allows fast failure).
const staticNeedsCheckpoint = finalStaticTools.some((t) => t.suspendSchema);
const mcpNeedsCheckpoint =
(this.requireToolApprovalValue && this.mcpClients.length > 0) ||
this.mcpClients.some((c) => c.declaresApproval());
if ((staticNeedsCheckpoint || mcpNeedsCheckpoint) && !this.checkpointStore) {
throw new Error(
`Agent "${this.name}" has tools requiring approval or suspend/resume but no checkpoint storage. ` +
"Add .checkpoint('memory') for in-process storage, " +
'or pass a persistent store (e.g. LibSQLStore, PgStore).',
);
}
// Resolve tools from all MCP clients.
const mcpToolLists = await Promise.all(this.mcpClients.map(async (c) => await c.listTools()));
let mcpTools = mcpToolLists.flat();
// Apply global requireToolApproval to MCP tools (per-server approval is already
// handled inside McpClient/McpConnection.listTools()).
if (this.requireToolApprovalValue) {
mcpTools = mcpTools.map((t) =>
t.suspendSchema ? t : wrapToolForApproval(t, { requireApproval: true }),
);
}
// Detect collisions between MCP tools and static tools.
const staticNames = new Set(finalStaticTools.map((t) => t.name));
const collisions = mcpTools.filter((t) => staticNames.has(t.name)).map((t) => t.name);
if (collisions.length > 0) {
throw new Error(
`MCP tool name collision — the following tool names resolve to duplicates: ${collisions.join(', ')}`,
);
}
const allTools = [...finalStaticTools, ...mcpTools];
// Validate checkpoint again after discovering actual MCP tools
// (catches the case where MCP tools have suspendSchema after listing).
const allNeedCheckpoint = allTools.some((t) => t.suspendSchema);
if (allNeedCheckpoint && !this.checkpointStore) {
throw new Error(
`Agent "${this.name}" has tools requiring approval or suspend/resume but no checkpoint storage. ` +
"Add .checkpoint('memory') for in-process storage, " +
'or pass a persistent store (e.g. LibSQLStore, PgStore).',
);
}
let modelConfig: ModelConfig;
if (this.modelConfigObj) {
if (
this.resolvedKey &&
typeof this.modelConfigObj === 'object' &&
'id' in this.modelConfigObj
) {
modelConfig = { ...this.modelConfigObj, apiKey: this.resolvedKey };
} else {
modelConfig = this.modelConfigObj;
}
} else if (this.resolvedKey) {
modelConfig = { id: this.modelId!, apiKey: this.resolvedKey };
} else {
modelConfig = this.modelId!;
}
let instructions = this.instructionsText;
if (this.workspaceInstance) {
const wsInstructions = this.workspaceInstance.getInstructions();
if (wsInstructions) {
instructions = `${instructions}\n\n${wsInstructions}`;
}
}
this.runtime = new AgentRuntime({
name: this.name,
model: modelConfig,
instructions,
tools: allTools.length > 0 ? allTools : undefined,
instructionProviderOptions: this.instructionProviderOpts,
providerTools: this.providerTools.length > 0 ? this.providerTools : undefined,
memory: this.memoryConfig?.memory,
lastMessages: this.memoryConfig?.lastMessages,
workingMemory: this.memoryConfig?.workingMemory,
semanticRecall: this.memoryConfig?.semanticRecall,
structuredOutput: this.outputSchema,
checkpointStorage: this.checkpointStore,
thinking: this.thinkingConfig,
eventBus: this.eventBus,
toolCallConcurrency: this.concurrencyValue,
titleGeneration: this.memoryConfig?.titleGeneration,
telemetry: this.telemetryConfig ?? (await this.telemetryBuilder?.build()),
});
return this.runtime;
}
}

View file

@ -0,0 +1,186 @@
const MODELS_DEV_URL = 'https://models.dev/api.json';
/** Cost per million tokens. */
export interface ModelCost {
/** Cost per million input tokens (USD). */
input: number;
/** Cost per million output tokens (USD). */
output: number;
/** Cost per million cached input tokens (USD). */
cacheRead?: number;
/** Cost per million cache write tokens (USD). */
cacheWrite?: number;
}
/** Model context/output limits. */
export interface ModelLimits {
/** Maximum context window size in tokens. */
context?: number;
/** Maximum output tokens. */
output?: number;
}
/** Information about a single model. */
export interface ModelInfo {
/** Model ID (e.g. 'claude-sonnet-4-5'). */
id: string;
/** Human-readable name (e.g. 'Claude Sonnet 4.5'). */
name: string;
/** Whether the model supports reasoning / thinking. */
reasoning: boolean;
/** Whether the model supports tool calling. */
toolCall: boolean;
/** Cost per million tokens. */
cost?: ModelCost;
/** Token limits. */
limits?: ModelLimits;
}
/** Information about a provider. */
export interface ProviderInfo {
/** Provider ID (e.g. 'anthropic'). */
id: string;
/** Human-readable name (e.g. 'Anthropic'). */
name: string;
/** Available models keyed by model ID. */
models: Record<string, ModelInfo>;
}
/** The full catalog of providers and their models. */
export type ProviderCatalog = Record<string, ProviderInfo>;
interface ModelsDevModel {
id: string;
name: string;
reasoning?: boolean;
tool_call?: boolean;
cost?: { input?: number; output?: number; cache_read?: number; cache_write?: number };
limit?: { context?: number; output?: number };
}
interface ModelsDevProvider {
id: string;
name: string;
models?: Record<string, ModelsDevModel>;
}
/**
* Fetch the provider/model catalog from models.dev.
*
* Returns a map of provider ID ProviderInfo with all available models.
* The catalog is fetched once and can be cached by the caller.
*
* @example
* ```typescript
* import { fetchProviderCatalog } from '@n8n/agents';
*
* const catalog = await fetchProviderCatalog();
* console.log(Object.keys(catalog)); // ['anthropic', 'openai', ...]
* console.log(catalog.anthropic.models['claude-sonnet-4-5'].reasoning); // true
* ```
*/
export async function fetchProviderCatalog(): Promise<ProviderCatalog> {
const response = await fetch(MODELS_DEV_URL);
if (!response.ok) {
throw new Error(`Failed to fetch provider catalog: ${response.statusText}`);
}
const data = (await response.json()) as Record<string, ModelsDevProvider>;
const catalog: ProviderCatalog = {};
for (const [key, provider] of Object.entries(data)) {
if (!provider.models || Object.keys(provider.models).length === 0) continue;
const models: Record<string, ModelInfo> = {};
for (const [modelId, model] of Object.entries(provider.models)) {
const info: ModelInfo = {
id: model.id,
name: model.name,
reasoning: model.reasoning ?? false,
toolCall: model.tool_call ?? false,
};
if (model.cost?.input !== undefined && model.cost?.output !== undefined) {
info.cost = {
input: model.cost.input,
output: model.cost.output,
...(model.cost.cache_read !== undefined && { cacheRead: model.cost.cache_read }),
...(model.cost.cache_write !== undefined && { cacheWrite: model.cost.cache_write }),
};
}
if (model.limit) {
info.limits = {
...(model.limit.context !== undefined && { context: model.limit.context }),
...(model.limit.output !== undefined && { output: model.limit.output }),
};
}
models[modelId] = info;
}
catalog[key] = {
id: provider.id,
name: provider.name,
models,
};
}
return catalog;
}
// --- Global cached catalog for internal use ---
let cachedCatalog: ProviderCatalog | undefined;
let catalogFetchPromise: Promise<ProviderCatalog | undefined> | undefined;
/**
* Get the cached catalog, fetching once if needed.
* Returns undefined if the fetch fails (offline, timeout, etc.).
* On failure, clears the in-flight promise so the next call retries.
* @internal
*/
export async function getCachedCatalog(): Promise<ProviderCatalog | undefined> {
if (cachedCatalog) return cachedCatalog;
catalogFetchPromise ??= fetchProviderCatalog()
.then((c) => {
cachedCatalog = c;
return c;
})
.catch((error: unknown) => {
// Clear so subsequent calls retry
catalogFetchPromise = undefined;
console.warn(
'[agents] Failed to fetch model catalog from models.dev — cost data will be unavailable:',
error instanceof Error ? error.message : error,
);
return undefined;
});
return await catalogFetchPromise;
}
/**
* Look up cost info for a model by its full ID (e.g. 'anthropic/claude-sonnet-4-5').
* Returns undefined if catalog is unavailable or model not found.
* @internal
*/
export async function getModelCost(modelId: string): Promise<ModelCost | undefined> {
const catalog = await getCachedCatalog();
if (!catalog) return undefined;
const [provider, ...rest] = modelId.split('/');
const modelName = rest.join('/');
return catalog[provider]?.models[modelName]?.cost;
}
/**
* Compute the cost in USD from token usage and per-million-token pricing.
*/
export function computeCost(
usage: { promptTokens: number; completionTokens: number },
cost: ModelCost,
): number {
const inputCost = (usage.promptTokens / 1_000_000) * cost.input;
const outputCost = (usage.completionTokens / 1_000_000) * cost.output;
return inputCost + outputCost;
}

View file

@ -0,0 +1,183 @@
import { filterLlmMessages } from './message';
import { AgentRuntime } from '../runtime/agent-runtime';
import type { BuiltEval, CheckFn, EvalInput, EvalScore, JudgeFn, JudgeHandlerFn } from '../types';
import type { AgentMessage } from '../types/sdk/message';
/** Extract text content from LLM messages (custom messages are skipped). */
function extractText(messages: AgentMessage[]): string {
return filterLlmMessages(messages)
.flatMap((m) => m.content)
.filter((c) => c.type === 'text')
.map((c) => (c as { text: string }).text)
.join('');
}
/**
* Builder for creating evaluations with a fluent API.
*
* Two modes:
* - **Deterministic**: `.check(fn)` pure function scoring
* - **LLM-as-judge**: `.model()` + `.credential()` + `.judge(fn)` LLM-powered scoring
*
* Usage:
* ```typescript
* // Deterministic
* const jsonCheck = new Eval('json-check')
* .description('Verify output is valid JSON')
* .check(({ output }) => {
* try { JSON.parse(output); return { score: 1, reasoning: 'Valid JSON' }; }
* catch { return { score: 0, reasoning: 'Invalid JSON' }; }
* });
*
* // LLM-as-judge
* const correctness = new Eval('correctness')
* .description('Judge factual correctness')
* .model('anthropic/claude-haiku-4-5')
* .credential('anthropic')
* .judge(async ({ input, output, expected, llm }) => {
* const result = await llm(`Is "${output}" correct for "${input}"? Expected: ${expected}`);
* const score = parseFloat(result.text.match(/[\d.]+/)?.[0] ?? '0');
* return { score: Math.min(1, Math.max(0, score)), reasoning: result.text };
* });
* ```
*/
export class Eval {
private evalName: string;
private desc?: string;
private checkFn?: CheckFn;
private judgeFn?: JudgeHandlerFn;
private modelId?: string;
private credentialName?: string;
private _resolvedApiKey?: string;
constructor(name: string) {
this.evalName = name;
}
/** Human-readable description of what this eval measures. */
description(desc: string): this {
this.desc = desc;
return this;
}
/** Set the judge model (LLM-as-judge mode). */
model(modelId: string): this {
this.modelId = modelId;
return this;
}
/** Declare a credential for the judge model. */
credential(name: string): this {
this.credentialName = name;
return this;
}
/** @internal Read the declared credential name (used by the execution engine). */
protected get declaredCredential(): string | undefined {
return this.credentialName;
}
/** @internal Set the resolved API key for the judge model. */
protected set resolvedApiKey(key: string) {
this._resolvedApiKey = key;
}
/**
* Set a deterministic check function.
* Mutually exclusive with `.judge()`.
*/
check(fn: CheckFn): this {
if (this.judgeFn) {
throw new Error(`Eval "${this.evalName}": cannot use both .check() and .judge()`);
}
this.checkFn = fn;
return this;
}
/**
* Set an LLM-as-judge handler. Requires `.model()` and `.credential()`.
* The handler receives `{ input, output, expected, llm }` where `llm`
* is a callable function bound to the judge model.
* Mutually exclusive with `.check()`.
*/
judge(fn: JudgeHandlerFn): this {
if (this.checkFn) {
throw new Error(`Eval "${this.evalName}": cannot use both .check() and .judge()`);
}
this.judgeFn = fn;
return this;
}
/** The eval name. */
get name(): string {
return this.evalName;
}
/** @internal Build the eval into a runnable form. */
protected build(): BuiltEval {
if (!this.checkFn && !this.judgeFn) {
throw new Error(`Eval "${this.evalName}" requires either .check() or .judge()`);
}
if (this.judgeFn && !this.modelId) {
throw new Error(`Eval "${this.evalName}" uses .judge() but no .model() was set`);
}
const name = this.evalName;
const desc = this.desc;
if (this.checkFn) {
const checkFn = this.checkFn;
return {
name,
description: desc,
_run: async (input: EvalInput) => await checkFn(input),
};
}
// LLM-as-judge mode
const judgeFn = this.judgeFn!;
const modelConfig: string | { id: `${string}/${string}`; apiKey: string } = this._resolvedApiKey
? { id: this.modelId! as `${string}/${string}`, apiKey: this._resolvedApiKey }
: this.modelId!;
const runtime = new AgentRuntime({
name: `${name}-judge`,
model: modelConfig,
instructions: 'You are an evaluation judge. Respond precisely as instructed.',
});
const llm: JudgeFn = async (prompt: string) => {
const result = await runtime.generate([
{ role: 'user', content: [{ type: 'text', text: prompt }] },
]);
return { text: extractText(result.messages) };
};
return {
name,
description: desc,
_run: async (input: EvalInput) => await judgeFn({ ...input, llm }),
};
}
/** @internal Ensure the eval is built (lazy). */
private _built?: BuiltEval;
/** @internal */
ensureBuilt(): BuiltEval {
this._built ??= this.build();
return this._built;
}
/** Run this eval against a single input. Lazy-builds on first call. */
async run(input: EvalInput): Promise<EvalScore> {
return await this.ensureBuilt()._run(input);
}
}

View file

@ -0,0 +1,159 @@
import type { Agent } from './agent';
import type { Eval } from './eval';
import { filterLlmMessages } from './message';
import type { EvalResults, EvalRunResult, EvalScore, GenerateResult } from '../types';
import type { AgentMessage } from '../types/sdk/message';
/** Extract text content from messages. */
function extractText(messages: AgentMessage[]): string {
return filterLlmMessages(messages)
.flatMap((m) => m.content)
.filter((c) => c.type === 'text')
.map((c) => (c as { text: string }).text)
.join('');
}
export interface DatasetRow {
/** The prompt to send to the agent. */
input: string;
/** Expected answer (used by evals like correctness/similarity). */
expected?: string;
/**
* Per-tool resume data overrides for evaluation. By default all suspended
* tools are auto-resumed with `{ approved: true }` during evaluations.
* Use this to test denial or custom resume scenarios.
*
* - `'deny'` is shorthand for `{ approved: false }`
* - An object value is passed as-is to `agent.resume()`
*/
resumeData?: Record<string, 'deny' | Record<string, unknown>>;
}
export interface EvaluateConfig {
/** Dataset of test cases to run through the agent. */
dataset: DatasetRow[];
/** Evals to run against each agent response. */
evals: Eval[];
}
/**
* Run a dataset through an agent and score the results with evals.
*
* All dataset rows and evals run in parallel for maximum throughput.
* Suspended tool calls are **auto-resumed with `{ approved: true }`** during
* evals. Use `resumeData` in dataset rows to override per tool.
*
* @example
* ```typescript
* const results = await evaluate(agent, {
* dataset: [
* { input: 'What is 2+2?', expected: '4' },
* { input: 'Delete temp files', resumeData: { delete_file: 'deny' } },
* { input: 'Book flight', resumeData: { book: { seat: '12A' } } },
* ],
* evals: [correctness, similarity],
* });
* ```
*/
export async function evaluate(agent: Agent, config: EvaluateConfig): Promise<EvalResults> {
const { dataset, evals } = config;
const runs: EvalRunResult[] = await Promise.all(
dataset.map(async (row) => {
const result = await runWithInterrupts(agent, row.input, row.resumeData);
const toolCalls = result.toolCalls ?? [];
// Build composite output: if the agent's text is empty but it made
// tool calls, include the tool outputs so evals have something to score.
let output = extractText(result.messages);
if (!output.trim() && toolCalls.length > 0) {
const toolOutputs = toolCalls
.filter((tc) => tc.output !== undefined)
.map((tc) => `[${tc.tool}] ${JSON.stringify(tc.output)}`);
if (toolOutputs.length > 0) {
output = toolOutputs.join('\n');
}
}
const scoreEntries = await Promise.all(
evals.map(async (ev): Promise<[string, EvalScore]> => {
const score = await ev.run({
input: row.input,
output,
expected: row.expected,
toolCalls,
});
return [ev.name, score];
}),
);
return {
input: row.input,
output,
expected: row.expected,
scores: Object.fromEntries(scoreEntries),
};
}),
);
const summary: EvalResults['summary'] = {};
for (const ev of evals) {
const results = runs
.map((r) => r.scores[ev.name]?.pass)
.filter((p): p is boolean => p !== undefined);
if (results.length > 0) {
const passed = results.filter(Boolean).length;
summary[ev.name] = {
passed,
failed: results.length - passed,
total: results.length,
};
}
}
return { runs, summary };
}
/**
* Run the agent with automatic interrupt handling.
* Uses generate() and loops: if the result has a pendingSuspend, resolves
* the resume data and calls agent.resume('generate', ...) to get a
* GenerateResult directly without needing to stream-and-re-generate.
*
* Tools are auto-resumed with `{ approved: true }` by default;
* use `resumeOverrides` to override per tool.
*/
async function runWithInterrupts(
agent: Agent,
input: string,
resumeOverrides?: Record<string, 'deny' | Record<string, unknown>>,
): Promise<GenerateResult> {
let result = await agent.generate(input);
const allToolCalls: Array<{ tool: string; input: unknown; output: unknown }> = [
...(result.toolCalls ?? []),
];
while (result.pendingSuspend && result.pendingSuspend.length > 0) {
const { runId, toolCallId, toolName } = result.pendingSuspend[0];
const override = toolName ? resumeOverrides?.[toolName] : undefined;
let data: Record<string, unknown>;
if (override === 'deny') {
data = { approved: false };
} else if (override && typeof override === 'object') {
data = override;
} else {
data = { approved: true };
}
result = await agent.resume('generate', data, { runId, toolCallId });
allToolCalls.push(...(result.toolCalls ?? []));
}
return {
...result,
...(allToolCalls.length > 0 ? { toolCalls: allToolCalls } : {}),
};
}

View file

@ -0,0 +1,52 @@
import type { BuiltGuardrail, GuardrailType, GuardrailStrategy, PiiDetectionType } from '../types';
export class Guardrail {
private name: string;
private guardType?: GuardrailType;
private strategyType?: GuardrailStrategy;
private detectionTypes?: PiiDetectionType[];
private thresholdValue?: number;
constructor(name: string) {
this.name = name;
}
type(guardType: GuardrailType): this {
this.guardType = guardType;
return this;
}
strategy(strategy: GuardrailStrategy): this {
this.strategyType = strategy;
return this;
}
detect(types: PiiDetectionType[]): this {
this.detectionTypes = types;
return this;
}
threshold(value: number): this {
this.thresholdValue = value;
return this;
}
build(): BuiltGuardrail {
if (!this.guardType) throw new Error(`Guardrail "${this.name}" requires a type`);
if (!this.strategyType) throw new Error(`Guardrail "${this.name}" requires a strategy`);
return {
name: this.name,
guardType: this.guardType,
strategy: this.strategyType,
_config: {
detectionTypes: this.detectionTypes,
threshold: this.thresholdValue,
},
};
}
}

View file

@ -0,0 +1,231 @@
import { McpConnection } from '../runtime/mcp-connection';
import type { McpServerConfig, McpVerifyResult } from '../types/sdk/mcp';
import type { BuiltTool } from '../types/sdk/tool';
/**
* Manages connections to one or more MCP servers and exposes their tools
* as a flat list of BuiltTool instances.
*
* Connections are established lazily on the first `listTools()` call and
* kept alive until `close()` is called. Both operations deduplicate
* concurrent calls via stored promises, so calling `listTools()` from
* multiple concurrent `generate()` runs is safe.
*
* @example
* ```typescript
* const client = new McpClient([
* { name: 'browser', url: 'http://localhost:9222/mcp', transport: 'streamableHttp' },
* { name: 'fs', command: 'npx', args: ['@anthropic/mcp-fs', '/tmp'] },
* ]);
*
* const agent = new Agent('assistant')
* .model('anthropic/claude-sonnet-4-5')
* .instructions('You are a helpful assistant.')
* .mcp(client);
*
* const result = await agent.generate('List files in /tmp');
* await client.close();
* ```
*/
export class McpClient {
private readonly configs: McpServerConfig[];
private connections: McpConnection[];
private listToolsPromise: Promise<BuiltTool[]> | undefined;
private closePromise: Promise<void> | undefined;
/**
* @param configs - Server configurations. Each must have either `url` or `command`.
* Duplicate names within the list are rejected.
* @param requireToolApproval - When true, every tool from every server is wrapped
* with a human-approval gate (requires `.checkpoint()` on the Agent).
*/
constructor(configs: McpServerConfig[], requireToolApproval = false) {
for (const cfg of configs) {
if (!cfg.url && !cfg.command) {
throw new Error(
`MCP server "${cfg.name}": exactly one of "url" or "command" must be provided`,
);
}
if (cfg.url && cfg.command) {
throw new Error(`MCP server "${cfg.name}": provide either "url" or "command", not both`);
}
}
const seen = new Set<string>();
for (const cfg of configs) {
if (seen.has(cfg.name)) {
throw new Error(`MCP server name "${cfg.name}" is already registered`);
}
seen.add(cfg.name);
}
this.configs = configs;
this.connections = configs.map((cfg) => new McpConnection(cfg, requireToolApproval));
}
/**
* Explicitly connect to all servers without listing tools.
* Optional `listTools()` connects implicitly.
*/
async connect(): Promise<void> {
await this.listTools();
}
/**
* Connect to all servers (if not already connected) and return the full
* flat list of tools. Subsequent calls return the cached list without
* additional network round-trips. On error the cache is cleared so the
* caller can retry.
*/
async listTools(): Promise<BuiltTool[]> {
if (!this.listToolsPromise) {
const p = this.doListTools();
this.listToolsPromise = p;
p.catch(() => {
if (this.listToolsPromise === p) this.listToolsPromise = undefined;
});
}
return await this.listToolsPromise;
}
/**
* Disconnect from all servers. Subsequent calls are no-ops.
* Best-effort errors are logged but not thrown.
*/
async close(): Promise<void> {
this.closePromise ??= this.doClose();
return await this.closePromise;
}
/**
* Verify connectivity to all configured servers.
* Each server is connected to with a temporary connection, its tools are
* listed, and the connection is closed this does NOT affect the
* long-lived connections used by `listTools()`.
*
* Never throws returns a result object indicating success or per-server
* errors so callers can handle partial failures gracefully.
*
* @example
* ```typescript
* const result = await client.verify();
* if (!result.ok) {
* console.error('MCP connection failed:', result.errors);
* }
* ```
*/
async verify(): Promise<McpVerifyResult> {
if (this.configs.length === 0) {
return { ok: true, servers: [] };
}
const results = await Promise.allSettled(
this.configs.map(async (cfg) => {
const conn = new McpConnection(cfg);
try {
await conn.connect();
const tools = await conn.listTools();
return { name: cfg.name, tools: tools.length };
} finally {
await conn.disconnect().catch(() => {});
}
}),
);
const errors: Array<{ server: string; error: string }> = [];
const servers: Array<{ name: string; tools: number }> = [];
for (let i = 0; i < results.length; i++) {
const result = results[i];
if (result.status === 'rejected') {
errors.push({
server: this.configs[i].name,
error: result.reason instanceof Error ? result.reason.message : String(result.reason),
});
} else {
servers.push(result.value);
}
}
return errors.length > 0 ? { ok: false, errors } : { ok: true, servers };
}
/**
* Returns true when any configured server declares per-server approval
* requirements (`requireApproval: true` or a non-empty `requireApproval`
* string array). Does NOT require a network connection.
*
* Used by the Agent builder to validate checkpoint configuration before
* attempting to connect.
*/
declaresApproval(): boolean {
return this.connections.some((conn) => conn.declaresApproval());
}
private async doListTools(): Promise<BuiltTool[]> {
const connectedConnections: McpConnection[] = [];
const settled = await Promise.allSettled(
this.connections.map(async (conn) => {
await conn.connect();
connectedConnections.push(conn);
return await conn.listTools();
}),
);
const failed = settled
.map((r, i) => ({ result: r, name: this.connections[i].name }))
.filter((x) => x.result.status === 'rejected');
if (failed.length > 0) {
await Promise.allSettled(connectedConnections.map(async (c) => await c.disconnect()));
const details = failed
.map((x) => {
const reason =
x.result.status === 'rejected'
? x.result.reason instanceof Error
? x.result.reason.message
: String(x.result.reason)
: '';
return `${x.name}: ${reason}`;
})
.join('; ');
throw new Error(`MCP connection failed — ${details}`);
}
const tools = settled.flatMap((r) => (r.status === 'fulfilled' ? r.value : []));
const seen = new Set<string>();
const duplicates: string[] = [];
for (const tool of tools) {
if (seen.has(tool.name)) {
duplicates.push(tool.name);
}
seen.add(tool.name);
}
if (duplicates.length > 0) {
await Promise.allSettled(connectedConnections.map(async (c) => await c.disconnect()));
throw new Error(
`MCP tool name collision — the following tool names resolve to duplicates: ${duplicates.join(', ')}`,
);
}
return tools;
}
private async doClose(): Promise<void> {
await Promise.allSettled(
this.connections.map(async (conn) => {
try {
await conn.disconnect();
} catch (error) {
console.error(`MCP disconnect error for server "${conn.name}":`, error);
}
}),
);
}
}

View file

@ -0,0 +1,187 @@
import type { z } from 'zod';
import { InMemoryMemory } from '../runtime/memory-store';
import { templateFromSchema } from '../runtime/working-memory';
import type {
BuiltMemory,
MemoryConfig,
SemanticRecallConfig,
TitleGenerationConfig,
} from '../types';
type ZodObjectSchema = z.ZodObject<z.ZodRawShape>;
const DEFAULT_LAST_MESSAGES = 10;
/**
* Builder for configuring conversation memory.
*
* Usage:
* ```typescript
* const memory = new Memory()
* .storage('memory')
* .lastMessages(20)
* .freeform('# User Context\n- **Name**:\n- **City**:');
*
* agent.memory(memory);
* ```
*/
export class Memory {
private lastMessagesValue: number = DEFAULT_LAST_MESSAGES;
private semanticRecallConfig?: SemanticRecallConfig;
private workingMemorySchema?: ZodObjectSchema;
private workingMemoryTemplate?: string;
private workingMemoryScope: 'resource' | 'thread' = 'resource';
private memoryBackend?: BuiltMemory;
private titleGenerationConfig?: TitleGenerationConfig;
/** The configured number of recent messages to include. */
get lastMessageCount(): number {
return this.lastMessagesValue;
}
/**
* Set the storage backend for conversation history.
*
* - `'memory'` in-process memory (default, lost on restart)
* - A `BuiltMemory` instance for persistent storage (e.g. SqliteMemory)
*/
storage(backend: 'memory' | BuiltMemory): this {
if (backend === 'memory') {
this.memoryBackend = undefined;
} else {
this.memoryBackend = backend;
}
return this;
}
/** Set the number of recent messages to include in context. */
lastMessages(count: number): this {
this.lastMessagesValue = count;
return this;
}
/** Enable semantic recall (RAG-based retrieval of relevant past messages). */
semanticRecall(config: SemanticRecallConfig): this {
this.semanticRecallConfig = config;
return this;
}
/**
* Enable structured working memory with a Zod schema.
* Mutually exclusive with `.freeform()`.
*/
structured(schema: ZodObjectSchema): this {
this.workingMemorySchema = schema;
return this;
}
/**
* Enable free-form working memory with a markdown/text template.
* Mutually exclusive with `.structured()`.
*/
freeform(template: string): this {
this.workingMemoryTemplate = template;
return this;
}
/**
* Set the working memory scope.
*
* - `'resource'` (default) working memory is shared across all threads for the same resource/user.
* - `'thread'` working memory is scoped to a single conversation thread.
*/
scope(s: 'resource' | 'thread'): this {
this.workingMemoryScope = s;
return this;
}
/**
* Enable automatic title generation for new threads.
*
* - `true` uses the agent's own model and default instructions.
* - `{ model, instructions }` custom model and/or custom instructions.
*
* Titles are generated once per thread (only when the thread has no title)
* and run asynchronously so they never block the agent response.
*/
titleGeneration(config: boolean | TitleGenerationConfig): this {
if (config === true) {
this.titleGenerationConfig = {};
} else if (config === false) {
this.titleGenerationConfig = undefined;
} else {
this.titleGenerationConfig = config;
}
return this;
}
/**
* Validate configuration and produce a `MemoryConfig`.
*
* @throws if both `.structured()` and `.freeform()` are used
* @throws if `.freeform()` template is empty
* @throws if `.semanticRecall()` is used with a backend that doesn't support search()
*/
build(): MemoryConfig {
if (this.workingMemorySchema && this.workingMemoryTemplate !== undefined) {
throw new Error(
'Working memory cannot use both .structured() and .freeform(). ' +
'Choose one: .structured(zodSchema) for typed state, or .freeform(template) for free-form text.',
);
}
if (this.workingMemoryTemplate !== undefined && this.workingMemoryTemplate.trim() === '') {
throw new Error(
'Free-form working memory template cannot be empty. ' +
'Provide a markdown template with slots for the agent to fill.',
);
}
const memory: BuiltMemory = this.memoryBackend ?? new InMemoryMemory();
if (this.semanticRecallConfig) {
if (!memory.queryEmbeddings && !memory.search) {
throw new Error(
'Semantic recall requires a storage backend with queryEmbeddings() or search() support.',
);
}
if (!memory.search && !this.semanticRecallConfig.embedder) {
throw new Error(
'Semantic recall requires an embedder when using queryEmbeddings(). Add embedder to your semanticRecall config: ' +
".semanticRecall({ topK: 5, embedder: 'openai/text-embedding-3-small' })",
);
}
}
let workingMemory: MemoryConfig['workingMemory'];
if (this.workingMemorySchema) {
workingMemory = {
template: templateFromSchema(this.workingMemorySchema),
structured: true,
schema: this.workingMemorySchema,
scope: this.workingMemoryScope,
};
} else if (this.workingMemoryTemplate !== undefined) {
workingMemory = {
template: this.workingMemoryTemplate,
structured: false,
scope: this.workingMemoryScope,
};
}
return {
memory,
lastMessages: this.lastMessagesValue,
workingMemory,
semanticRecall: this.semanticRecallConfig,
titleGeneration: this.titleGenerationConfig,
};
}
}

Some files were not shown because too many files have changed in this diff Show more