mirror of
https://github.com/n8n-io/n8n
synced 2026-04-21 15:47:20 +00:00
feat: Add @n8n/agents package (#27560)
This commit is contained in:
parent
d3e45bc126
commit
58fbaf4a88
147 changed files with 24961 additions and 227 deletions
2
packages/@n8n/agents/.env.example
Normal file
2
packages/@n8n/agents/.env.example
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
OPENAI_API_KEY=
|
||||
ANTHROPIC_API_KEY=
|
||||
138
packages/@n8n/agents/AGENTS.md
Normal file
138
packages/@n8n/agents/AGENTS.md
Normal file
|
|
@ -0,0 +1,138 @@
|
|||
# AGENTS.md
|
||||
|
||||
Conventions for the `@n8n/agents` package.
|
||||
|
||||
## Code Style
|
||||
|
||||
- **No `_` prefix on private properties** — use `private` access modifier
|
||||
without underscore. Write `private name: string`, not `private _name: string`.
|
||||
- **Builder pattern with lazy build** — all public primitives use a fluent
|
||||
builder API. **User code never calls `.build()`**. Builders are passed
|
||||
directly to the consuming method (e.g. `agent.tool(myTool)`) which calls
|
||||
`.build()` internally. Agent and Network have `run()`/`stream()` directly
|
||||
on the class, which lazy-build via `ensureBuilt()` on first call. `build()`
|
||||
is `protected` on Agent and Network to keep it out of the public API.
|
||||
- **Zod for schemas** — all input/output schemas use Zod.
|
||||
|
||||
## Package Structure
|
||||
|
||||
```
|
||||
src/
|
||||
index.ts # Public API barrel export
|
||||
types/ # Public TypeScript types
|
||||
index.ts # Re-exports consumable types
|
||||
telemetry.ts
|
||||
sdk/ # Types aligned with builders (agent, eval, guardrail, mcp, memory, message, provider, tool)
|
||||
runtime/ # Serializable runtime shapes (events, message lists)
|
||||
utils/ # JSON typing helpers re-exported with public types
|
||||
sdk/ # Fluent builders and SDK entry points
|
||||
agent.ts # Agent builder
|
||||
catalog.ts # Provider catalog fetch
|
||||
eval.ts # Evaluation primitives
|
||||
evaluate.ts # Evaluation runner over agents + dataset
|
||||
guardrail.ts # Guardrail builder
|
||||
mcp-client.ts # MCP client integration
|
||||
memory.ts # Memory builder
|
||||
message.ts # LLM/DB message helpers
|
||||
network.ts # Network builder
|
||||
provider-tools.ts # Provider-defined tool factories
|
||||
telemetry.ts # Telemetry builder (OTel, redaction)
|
||||
tool.ts # Tool builder
|
||||
verify.ts # Verification utilities
|
||||
evals/ # Built-in eval scorers; exported as namespace `evals` from index
|
||||
runtime/ # Internal — never exported from index.ts
|
||||
agent-runtime.ts # Core agent execution engine (AI SDK)
|
||||
tool-adapter.ts # Tool execution, branded suspend detection
|
||||
stream.ts # Streaming helpers
|
||||
model-factory.ts # Model instantiation
|
||||
memory-store.ts # Conversation / working-memory persistence hooks
|
||||
working-memory.ts # In-run working memory
|
||||
message-list.ts # Message list + serialization for agent loop
|
||||
messages.ts # Message normalization
|
||||
mcp-connection.ts # MCP connection lifecycle
|
||||
mcp-tool-resolver.ts
|
||||
run-state.ts # Run / checkpoint state
|
||||
event-bus.ts # Internal agent events
|
||||
runtime-helpers.ts
|
||||
title-generation.ts
|
||||
strip-orphaned-tool-messages.ts
|
||||
logger.ts
|
||||
storage/ # Optional persisted memory backends (exported)
|
||||
sqlite-memory.ts
|
||||
postgres-memory.ts
|
||||
workspace/ # Workspace, sandbox, filesystem, built-in tools (exported)
|
||||
integrations/ # Optional integrations (exported where applicable)
|
||||
langsmith.ts # LangSmith telemetry adapter (peer `langsmith`)
|
||||
utils/ # Internal helpers (e.g. Zod utilities); not barrel-exported
|
||||
examples/
|
||||
basic-agent.ts # Sample snippet; included in format/lint paths
|
||||
docs/
|
||||
agent-runtime-architecture.md # In-package runtime notes
|
||||
```
|
||||
|
||||
The **`index.ts`** surface also exports `Workspace` / sandbox / filesystem types,
|
||||
`SqliteMemory` / `PostgresMemory`, `LangSmithTelemetry`, and `evals` alongside the
|
||||
core SDK builders.
|
||||
|
||||
Optional **peer dependencies** (telemetry): `langsmith`, `@opentelemetry/sdk-trace-node`,
|
||||
`@opentelemetry/sdk-trace-base`, `@opentelemetry/exporter-trace-otlp-http` — all
|
||||
optional; install only when wiring that telemetry.
|
||||
|
||||
## Credential Pattern
|
||||
|
||||
Agents declare credential requirements via `.credential('name')`. The execution
|
||||
engine resolves the name to an API key and injects it into the model config.
|
||||
User code never touches raw API keys.
|
||||
|
||||
```typescript
|
||||
const agent = new Agent('assistant')
|
||||
.model('anthropic/claude-sonnet-4-5')
|
||||
.credential('anthropic')
|
||||
.instructions('You are helpful.');
|
||||
```
|
||||
|
||||
## Engine Injection (EngineAgent)
|
||||
|
||||
The execution engine extends `Agent` and overrides `protected build()` to
|
||||
inject infrastructure (checkpoint storage, credentials) before calling
|
||||
`super.build()`. This is the pattern for all engine-level concerns:
|
||||
|
||||
```typescript
|
||||
class EngineAgent extends Agent {
|
||||
build() {
|
||||
this.checkpoint(store);
|
||||
const cred = this.declaredCredential;
|
||||
if (cred) this.resolvedApiKey = resolve(cred);
|
||||
return super.build();
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
## Testing
|
||||
|
||||
- Unit tests live in `src/__tests__/`, integration tests in `src/__tests__/integration/`
|
||||
- Unit tests use Jest (`pnpm test` / `pnpm test:unit`)
|
||||
- Integration tests use Vitest (`pnpm test:integration`) with real LLM calls
|
||||
- A `.env` file at the package root is loaded automatically by the vitest config.
|
||||
Always assume it exists when running integration tests. Never commit it.
|
||||
- Required keys:
|
||||
- `ANTHROPIC_API_KEY` — all integration tests
|
||||
- `OPENAI_API_KEY` — semantic recall tests (embeddings)
|
||||
- Tests skip automatically when the required API key is not set
|
||||
- Run from the package directory: `cd packages/@n8n/agents && pnpm test`
|
||||
|
||||
## Documentation
|
||||
|
||||
- Runtime architecture notes: `docs/agent-runtime-architecture.md` (this package).
|
||||
- Spec-driven work in the wider repo may use `.claude/specs/` (see repo
|
||||
`.claude/skills/spec-driven-development`).
|
||||
|
||||
## Building
|
||||
|
||||
```bash
|
||||
cd packages/@n8n/agents
|
||||
pnpm build # rimraf dist && tsc -p tsconfig.build.json → dist/
|
||||
pnpm typecheck # tsc --noEmit
|
||||
pnpm test # jest (unit)
|
||||
```
|
||||
451
packages/@n8n/agents/docs/agent-runtime-architecture.md
Normal file
451
packages/@n8n/agents/docs/agent-runtime-architecture.md
Normal file
|
|
@ -0,0 +1,451 @@
|
|||
# Agent Runtime Architecture
|
||||
|
||||
This document describes the internal architecture of the `@n8n/agents` agent
|
||||
runtime — the execution engine that drives a single agent turn from input to
|
||||
final response.
|
||||
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
`AgentRuntime` (`src/runtime/agent-runtime.ts`) is the core execution engine
|
||||
for a single agent turn. It uses the Vercel AI SDK directly (`generateText` /
|
||||
`streamText`) and is responsible for:
|
||||
|
||||
- Building the LLM message context (memory history, semantic recall, working
|
||||
memory in the system prompt, user input)
|
||||
- Stripping orphaned tool-call/tool-result pairs before LLM calls
|
||||
(`stripOrphanedToolMessages`)
|
||||
- Running the agentic tool-call loop (default **20** iterations,
|
||||
`MAX_LOOP_ITERATIONS`)
|
||||
- **Configurable tool-call concurrency** — tools in one LLM turn run in batches
|
||||
of `toolCallConcurrency` (default `1`; `Infinity` runs all executable calls
|
||||
in parallel)
|
||||
- Suspending and resuming runs for Human-in-the-Loop (HITL) **and** for tools
|
||||
that return a branded suspend result (`suspendSchema` / `resumeSchema`)
|
||||
- Persisting new messages to a memory store at the end of each completed turn,
|
||||
optionally saving **embeddings** for semantic recall
|
||||
- Extracting and persisting **working memory** from assistant output when
|
||||
configured
|
||||
- Optional **structured output** (`Output.object` + Zod), **thinking** /
|
||||
reasoning provider options, **title generation**, and **telemetry** (AI SDK
|
||||
`experimental_telemetry`)
|
||||
- **Token usage and cost** (catalog pricing via `getModelCost` / `computeCost`)
|
||||
- Emitting lifecycle events via `AgentEventBus`
|
||||
- Tracking run state (`idle` → `running` → `success / failed / suspended / cancelled`)
|
||||
|
||||
There are two parallel execution paths — non-streaming (`generate`) and
|
||||
streaming (`stream`) — that mirror each other in structure.
|
||||
|
||||
```mermaid
|
||||
graph TD
|
||||
A[User Input] --> B[normalizeInput]
|
||||
B --> C[buildMessageList]
|
||||
C --> D{generate or stream?}
|
||||
D -->|generate| E[runGenerateLoop]
|
||||
D -->|stream| F[startStreamLoop → runStreamLoop]
|
||||
E --> G[saveToMemory]
|
||||
F --> G
|
||||
G --> H[Return Result]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Public API — BuiltAgent
|
||||
|
||||
`Agent` implements `BuiltAgent`, which exposes the full public surface:
|
||||
|
||||
| Method | Description |
|
||||
|--------|-------------|
|
||||
| `generate(input, options?)` | Non-streaming run; returns `GenerateResult` (errors often surface as `finishReason: 'error'` and `error` instead of throwing) |
|
||||
| `stream(input, options?)` | Streaming run; returns `StreamResult` with `runId` and `stream` |
|
||||
| `resume(method, data, options)` | Resume a suspended tool with payload `data`; `options` must include `runId` and `toolCallId` |
|
||||
| `approve(method, options)` | HITL approval — calls `resume` with `{ approved: true }` |
|
||||
| `deny(method, options)` | HITL decline — calls `resume` with `{ approved: false }` |
|
||||
| `on(event, handler)` | Register a lifecycle event handler |
|
||||
| `abort()` | Cancel the currently running agent |
|
||||
| `getState()` | Return the latest `SerializableAgentState` snapshot |
|
||||
| `asTool(description)` | Wrap the agent as a `BuiltTool` for multi-agent composition |
|
||||
|
||||
`ExecutionOptions` includes `abortSignal?: AbortSignal`, forwarded into
|
||||
`AgentEventBus.resetAbort()` so callers can cancel via an external signal as
|
||||
well as `agent.abort()`.
|
||||
|
||||
---
|
||||
|
||||
## Event system
|
||||
|
||||
### AgentEventBus
|
||||
|
||||
`AgentEventBus` (`src/runtime/event-bus.ts`) is the internal publish/subscribe
|
||||
channel shared between `Agent` (registers handlers via `on()`) and
|
||||
`AgentRuntime` (emits events during the loop). A single bus instance is created
|
||||
when the SDK wires the runtime and passed in via `AgentRuntimeConfig`.
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
UserCode -->|"agent.on(event, handler)"| AgentEventBus
|
||||
AgentEventBus -->|"passed via config"| AgentRuntime
|
||||
AgentRuntime -->|"bus.emit(data)"| AgentEventBus
|
||||
AgentEventBus -->|"calls handlers synchronously"| UserCode
|
||||
```
|
||||
|
||||
Handlers have the signature `(data: AgentEventData) => void` — there is **no**
|
||||
separate “controls” object; cancellation is done with `agent.abort()` on the
|
||||
same bus that holds the `AbortController`.
|
||||
|
||||
`AgentMiddleware` in `types/runtime/event.ts` is a small alias type
|
||||
(`on` mirrors the agent) for future middleware-style composition.
|
||||
|
||||
### Event types
|
||||
|
||||
| Event | When emitted | Payload |
|
||||
|-------|----------------|---------|
|
||||
| `AgentStart` | Start of `initRun`, right after `status: running`; before `ensureModelCost` / `buildMessageList` | — |
|
||||
| `AgentEnd` | Successful completion after persistence / cleanup; payload is assistant-facing messages (`finalized.messages` in `generate`, `list.responseDelta()` in `stream`) | `{ messages }` |
|
||||
| `TurnStart` | Top of each loop iteration, before the LLM call | — |
|
||||
| `TurnEnd` | After tool calls for the iteration are processed; requires an assistant message in the new messages | `{ message, toolResults }` |
|
||||
| `ToolExecutionStart` | Before `processToolCall` runs the handler | `{ toolCallId, toolName, args }` |
|
||||
| `ToolExecutionEnd` | After the tool returns, errors, or is satisfied from an existing AI SDK tool-result | `{ toolCallId, toolName, result, isError }` |
|
||||
| `Error` | Unhandled failures (not user **abort**); also emitted on some stream failures | `{ message, error }` |
|
||||
|
||||
---
|
||||
|
||||
## abort()
|
||||
|
||||
`agent.abort()` synchronously aborts the internal `AbortController`. The
|
||||
resulting signal is passed to `generateText` / `streamText` as `abortSignal`
|
||||
so in-flight HTTP cancels promptly. The loop also checks `bus.isAborted` at
|
||||
batch boundaries.
|
||||
|
||||
`AgentEventBus.resetAbort(externalSignal?)` runs at the start of each run: it
|
||||
replaces the controller and, if `ExecutionOptions.abortSignal` is set, forwards
|
||||
that signal’s abort to the internal controller.
|
||||
|
||||
### Abort behaviour
|
||||
|
||||
| Mode | Behaviour on abort |
|
||||
|------|-------------------|
|
||||
| `generate` | Catches abort and returns `{ runId, messages, finishReason: 'error', ... }` without emitting `AgentEvent.Error` |
|
||||
| `stream` | Writes `{ type: 'error', error }` then finishes / closes cleanly |
|
||||
|
||||
State becomes `cancelled`. `resetAbort()` supplies a fresh controller per run
|
||||
so the same `Agent` instance can run again.
|
||||
|
||||
---
|
||||
|
||||
## getState()
|
||||
|
||||
`agent.getState()` returns a shallow copy of `SerializableAgentState`. Before
|
||||
the first `generate()` / `stream()`, the `Agent` builder returns a minimal idle
|
||||
state with an empty `messageList` (`messages`, `historyIds`, `inputIds`,
|
||||
`responseIds` all empty).
|
||||
|
||||
### State machine
|
||||
|
||||
```mermaid
|
||||
stateDiagram-v2
|
||||
[*] --> idle: constructed
|
||||
idle --> running: generate() / stream() / resume()
|
||||
running --> success: loop completes normally
|
||||
running --> failed: unhandled error
|
||||
running --> suspended: tool suspends (HITL or suspend/resume)
|
||||
running --> cancelled: abort() / external signal
|
||||
suspended --> running: resume() / approve() / deny() loads checkpoint
|
||||
```
|
||||
|
||||
### AgentRunState values
|
||||
|
||||
| Status | Meaning |
|
||||
|--------|---------|
|
||||
| `idle` | No run yet (or builder before first lazy build) |
|
||||
| `running` | Loop in progress |
|
||||
| `success` | Turn finished and checkpoint cleaned up when applicable |
|
||||
| `failed` | Unrecoverable error path |
|
||||
| `suspended` | Awaiting resume (checkpoint stored under `runId`) |
|
||||
| `cancelled` | Aborted |
|
||||
| `waiting` | Reserved |
|
||||
|
||||
### SerializableAgentState
|
||||
|
||||
Important fields (see `types/sdk/agent.ts`):
|
||||
|
||||
```typescript
|
||||
interface SerializableAgentState {
|
||||
persistence?: AgentPersistenceOptions; // threadId + resourceId when using memory
|
||||
status: AgentRunState;
|
||||
messageList: SerializedMessageList;
|
||||
resumeData?: AgentResumeData;
|
||||
pendingToolCalls: Record<string, PendingToolCall>;
|
||||
finishReason?: FinishReason;
|
||||
usage?: TokenUsage;
|
||||
executionOptions?: PersistedExecutionOptions; // maxIterations only — persisted on suspend
|
||||
}
|
||||
```
|
||||
|
||||
`PendingToolCall` distinguishes tools already suspended (`suspended: true`,
|
||||
`suspendPayload`, `resumeSchema`) from calls not yet executed (`suspended:
|
||||
false`) when a batch stops at the first suspension.
|
||||
|
||||
---
|
||||
|
||||
## asTool()
|
||||
|
||||
`agent.asTool(description)` wraps the agent as a `BuiltTool`. The handler calls
|
||||
`agent.generate(input, { telemetry: ctx.parentTelemetry })`, collects assistant
|
||||
text, and returns `{ result: string }`. When the sub-run produces usage,
|
||||
results are wrapped so the parent runtime can merge **`SubAgentUsage`** and
|
||||
**`totalCost`** into the parent `GenerateResult` / stream `finish` chunk.
|
||||
|
||||
---
|
||||
|
||||
## Message types
|
||||
|
||||
| Type | Definition | Purpose |
|
||||
|------|------------|---------|
|
||||
| `AgentMessage` | `Message \| CustomMessage` | Internal representation; custom messages are UI-facing |
|
||||
| `ModelMessage` (AI SDK) | Roles wired to the provider | LLM-facing; custom messages never appear here |
|
||||
|
||||
**Custom messages** are stripped for the model via `filterLlmMessages()` before
|
||||
`toAiMessages()`.
|
||||
|
||||
`messages.ts` provides `toAiMessages`, `fromAiMessages`, and consumers rely on
|
||||
`filterLlmMessages` from `sdk/message.ts`.
|
||||
|
||||
**Tool results vs model:** optional `BuiltTool.toModelOutput` maps the stored /
|
||||
event result before building the `tool-result` the LLM sees; `toMessage` still
|
||||
uses the raw result for custom DB messages.
|
||||
|
||||
---
|
||||
|
||||
## AgentMessageList
|
||||
|
||||
`AgentMessageList` (`src/runtime/message-list.ts`) is the central structure for
|
||||
one turn. It keeps a single append-only array and **three Sets** for
|
||||
provenance: history, input, response.
|
||||
|
||||
### Sources
|
||||
|
||||
| Source | Added by | `turnDelta()` | `responseDelta()` | `forLlm()` |
|
||||
|--------|----------|---------------|-------------------|------------|
|
||||
| **history** | `addHistory()` | No | No | Yes (after filters) |
|
||||
| **input** | `addInput()` | Yes | No | Yes (after filters) |
|
||||
| **response** | `addResponse()` | Yes | Yes | Yes (after filters) |
|
||||
|
||||
### Key methods
|
||||
|
||||
```
|
||||
forLlm(baseInstructions, instructionProviderOptions?)
|
||||
→ [system + working memory block, ...toAiMessages(filterLlm(stripOrphaned(all)))]
|
||||
turnDelta() → input ∪ response messages (memory persistence)
|
||||
responseDelta() → response only (user-facing / GenerateResult.messages)
|
||||
serialize() → { messages, historyIds, inputIds, responseIds }
|
||||
deserialize() → restores all three sets via stable message ids
|
||||
```
|
||||
|
||||
### Serialization
|
||||
|
||||
Serialized state stores **message id arrays** per set (`historyIds`,
|
||||
`inputIds`, `responseIds`), not a single `historyCount`. After a round-trip,
|
||||
history / input / response classification is fully restored — required for
|
||||
correct `turnDelta()` after suspend/resume.
|
||||
|
||||
`stripOrphanedToolMessages` runs on loaded history and inside `forLlm()` so
|
||||
incomplete tool pairs do not reach the model.
|
||||
|
||||
---
|
||||
|
||||
## Agentic loop
|
||||
|
||||
Both `runGenerateLoop` and `runStreamLoop` follow the same high-level pattern:
|
||||
emit `TurnStart`, call the model with `list.forLlm(...)`, append assistant /
|
||||
tool traffic via `addResponse`, process tool calls through
|
||||
`iterateToolCallsConcurrent` (batched by `toolCallConcurrency`), handle
|
||||
suspension / persistence, repeat until finish or max iterations.
|
||||
|
||||
### Tool execution and concurrency
|
||||
|
||||
- Executable tool calls (non–provider-executed) are processed in windows of size
|
||||
`this.concurrency` (`toolCallConcurrency ?? 1`).
|
||||
- Each window uses `Promise.allSettled` so all tools in the batch settle; a
|
||||
suspension in the batch stops **subsequent** batches and records remaining
|
||||
calls in `pending` without `suspendPayload`.
|
||||
- **HITL** and **suspend/resume** flows share the same pending-map machinery;
|
||||
`processToolCall` validates JSON Schema or Zod **input** schemas (Ajv / Zod)
|
||||
before invoking the handler.
|
||||
|
||||
### Loop invariants
|
||||
|
||||
1. **Single list** — `addResponse` accumulates assistant, tool, and custom
|
||||
messages for the turn.
|
||||
2. **System prompt** — rebuilt each call via `forLlm`; working memory content
|
||||
is injected there, not as separate list rows.
|
||||
3. **Suspension preserves pending calls** — remaining calls in the batch and
|
||||
later calls are recorded for resume.
|
||||
4. **Max iterations** — default 20 (`MAX_LOOP_ITERATIONS`).
|
||||
5. **Abort** — checked between batches; signal passed into AI SDK calls.
|
||||
|
||||
### Non-streaming vs streaming
|
||||
|
||||
| Aspect | `runGenerateLoop` | `runStreamLoop` |
|
||||
|--------|-------------------|-----------------|
|
||||
| AI SDK | `generateText()` | `streamText()` |
|
||||
| Output | `GenerateResult` | `StreamChunk`s via `WritableStream` |
|
||||
| Errors | Returned on `GenerateResult` (`error`, `finishReason: 'error'`) for many paths | Error chunks + `closeStreamWithError` |
|
||||
| Suspension | `pendingSuspend` array on `GenerateResult` | `tool-call-suspended` chunks, then `finish` |
|
||||
|
||||
---
|
||||
|
||||
## HITL and suspend/resume
|
||||
|
||||
**HITL (approval):** tools can require approval (`requiresApproval` /
|
||||
`needsApprovalFn`). The runtime treats approval outcomes like resume data:
|
||||
`approve()` / `deny()` delegate to `resume()` with `{ approved: true | false }`.
|
||||
|
||||
**Programmatic suspend:** tools can return a branded suspend object; the runtime
|
||||
requires `resumeSchema` (Zod → JSON Schema for clients) and validates
|
||||
`suspendPayload` when `suspendSchema` is set.
|
||||
|
||||
```mermaid
|
||||
sequenceDiagram
|
||||
participant Caller
|
||||
participant AgentRuntime
|
||||
participant CheckpointStore
|
||||
participant LLM
|
||||
|
||||
Caller->>AgentRuntime: generate/stream(input)
|
||||
AgentRuntime->>LLM: generateText/streamText
|
||||
LLM-->>AgentRuntime: tool calls
|
||||
Note over AgentRuntime: Suspension: persist pendingToolCalls + messageList
|
||||
AgentRuntime->>CheckpointStore: suspend(runId, state)
|
||||
AgentRuntime-->>Caller: pendingSuspend / tool-call-suspended chunks
|
||||
|
||||
Caller->>AgentRuntime: resume/approve/deny(method, …)
|
||||
AgentRuntime->>CheckpointStore: resume(runId) — load only
|
||||
AgentRuntime->>AgentRuntime: processToolCall / iteratePendingToolCallsConcurrent
|
||||
AgentRuntime->>LLM: Continue loop if needed
|
||||
AgentRuntime->>CheckpointStore: complete(runId) when finished
|
||||
```
|
||||
|
||||
With **concurrency > 1**, multiple tools may suspend in the same turn; the
|
||||
stream can emit **multiple** `tool-call-suspended` chunks, and `GenerateResult`
|
||||
can carry **`pendingSuspend`** with multiple entries.
|
||||
|
||||
---
|
||||
|
||||
## RunStateManager
|
||||
|
||||
`RunStateManager` (`src/runtime/run-state.ts`) persists suspended runs through
|
||||
a **`CheckpointStore`**:
|
||||
|
||||
- Default: in-memory `MemoryCheckpointStore` when `checkpointStorage` is
|
||||
`'memory'` or omitted.
|
||||
- Custom: pass a `CheckpointStore` implementation for durability.
|
||||
|
||||
`suspend(runId, state)` writes the state. `resume(runId)` **loads** the state
|
||||
and returns it with `status: 'running'`; it does **not** delete the key.
|
||||
`complete(runId)` deletes the checkpoint when the run finishes without remaining
|
||||
suspensions.
|
||||
|
||||
### Known limitations
|
||||
|
||||
In-memory checkpoints grow until `complete()` runs. Production stores should
|
||||
implement TTL or eviction as needed.
|
||||
|
||||
---
|
||||
|
||||
## Memory persistence
|
||||
|
||||
At end of turn, `saveToMemory()` uses `list.turnDelta()` and
|
||||
`saveMessagesToThread`. If **semantic recall** is configured with an embedder
|
||||
and `memory.saveEmbeddings`, new messages are embedded and stored.
|
||||
|
||||
**Working memory:** when configured, the runtime parses `<working_memory>` …
|
||||
`</working_memory>` regions from assistant text, validates structured JSON if a
|
||||
schema exists, strips the tags from the visible message, and asynchronously
|
||||
persists via `memory.saveWorkingMemory`.
|
||||
|
||||
**Thread titles:** `titleGeneration` triggers `generateThreadTitle` (fire-and-forget)
|
||||
after a successful save when persistence and memory are present.
|
||||
|
||||
---
|
||||
|
||||
## Stream architecture
|
||||
|
||||
The streaming path uses a `TransformStream`: `startStreamLoop` returns the
|
||||
readable side immediately; the loop writes chunks in the background.
|
||||
|
||||
`stream.ts` **`convertChunk`** maps AI SDK v6 `TextStreamPart` values to our
|
||||
`StreamChunk` union (including `finish-step` / `finish` consolidation).
|
||||
|
||||
### StreamChunk types (representative)
|
||||
|
||||
| Type | Content |
|
||||
|------|---------|
|
||||
| `text-delta` | Incremental text |
|
||||
| `reasoning-delta` | Thinking / reasoning text |
|
||||
| `tool-call-delta` | Streaming tool name / arguments |
|
||||
| `message` | Full assistant or tool message |
|
||||
| `tool-call-suspended` | Suspension: `runId`, `toolCallId`, tool metadata, optional `resumeSchema`, `suspendPayload` |
|
||||
| `finish` | `finishReason`, `usage` (with optional **cost**), `model`, optional **`structuredOutput`**, **`subAgentUsage`**, **`totalCost`** |
|
||||
| `error` | Failure or abort |
|
||||
|
||||
---
|
||||
|
||||
## File map
|
||||
|
||||
```
|
||||
src/
|
||||
runtime/
|
||||
agent-runtime.ts — AgentRuntime (generate/stream/resume loops, HITL, state)
|
||||
event-bus.ts — AgentEventBus + AbortController
|
||||
message-list.ts — AgentMessageList
|
||||
run-state.ts — RunStateManager, generateRunId
|
||||
memory-store.ts — saveMessagesToThread helper
|
||||
messages.ts — AI SDK message conversion
|
||||
model-factory.ts — createModel / createEmbeddingModel
|
||||
tool-adapter.ts — buildToolMap, executeTool, toAiSdkTools, suspend / agent-result guards
|
||||
stream.ts — convertChunk, toTokenUsage
|
||||
runtime-helpers.ts — normalizeInput, usage merge, stream error helpers, …
|
||||
working-memory.ts — instruction text, parse/filter for working_memory tags
|
||||
strip-orphaned-tool-messages.ts
|
||||
title-generation.ts
|
||||
logger.ts
|
||||
types/
|
||||
sdk/agent.ts — BuiltAgent, GenerateResult, StreamChunk, SerializableAgentState, …
|
||||
sdk/tool.ts, sdk/memory.ts, … — Public SDK contracts
|
||||
runtime/event.ts — AgentEvent enum + AgentEventData
|
||||
runtime/message-list.ts — SerializedMessageList
|
||||
telemetry.ts — BuiltTelemetry shape
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Design decisions (selected)
|
||||
|
||||
### Set-based message list + id serialization
|
||||
|
||||
Three Sets plus stable **`id` on each message** allow `turnDelta()` /
|
||||
`responseDelta()` without losing custom tool messages, and checkpointed runs
|
||||
restore history vs turn data correctly after resume.
|
||||
|
||||
### `responseDelta()` vs `turnDelta()`
|
||||
|
||||
User input must not appear in `GenerateResult.messages`; memory persistence
|
||||
must store the full turn including input — hence two views over the same list.
|
||||
|
||||
### Concurrency preserves suspension semantics
|
||||
|
||||
Batches run in parallel when configured, but the first suspension still
|
||||
captures **unexecuted** tool calls in `pending` so nothing is dropped. Approval
|
||||
tools and programmatic suspends use the same pending-map format.
|
||||
|
||||
### Why one event bus per agent
|
||||
|
||||
The bus is shared between `Agent` and `AgentRuntime` so `on()` registrations and
|
||||
`abort()` always target the controller used by the active loop.
|
||||
|
||||
### Why `AbortSignal`
|
||||
|
||||
Signals cancel HTTP immediately in the AI SDK and compose with caller-provided
|
||||
`abortSignal` via `resetAbort`.
|
||||
23
packages/@n8n/agents/eslint.config.mjs
Normal file
23
packages/@n8n/agents/eslint.config.mjs
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
import { defineConfig } from 'eslint/config';
|
||||
import { nodeConfig } from '@n8n/eslint-config/node';
|
||||
|
||||
export default defineConfig(
|
||||
{ ignores: ['examples/**', 'vitest.integration.config.*', 'src/__tests__/fixtures/**'] },
|
||||
nodeConfig,
|
||||
{
|
||||
rules: {
|
||||
'unicorn/filename-case': ['error', { case: 'kebabCase' }],
|
||||
'@typescript-eslint/naming-convention': ['error', {
|
||||
'selector': 'enumMember',
|
||||
'format': ['UPPER_CASE', 'PascalCase'],
|
||||
}]
|
||||
},
|
||||
},
|
||||
{
|
||||
files: ['src/__tests__/integration/**/*.ts'],
|
||||
rules: {
|
||||
'@typescript-eslint/require-await': 'off',
|
||||
'n8n-local-rules/no-uncaught-json-parse': 'off',
|
||||
},
|
||||
},
|
||||
);
|
||||
153
packages/@n8n/agents/examples/basic-agent.ts
Normal file
153
packages/@n8n/agents/examples/basic-agent.ts
Normal file
|
|
@ -0,0 +1,153 @@
|
|||
/**
|
||||
* @n8n/agents — Full API Demonstration
|
||||
*
|
||||
* This example demonstrates the complete builder-pattern API for creating
|
||||
* and running AI agents. It shows: tools, agents, memory, guardrails,
|
||||
* scorers, multi-agent patterns (agent-as-tool), and tool interrupts.
|
||||
*
|
||||
* To run with real LLM calls, set ANTHROPIC_API_KEY.
|
||||
* Without keys, the runtime will throw on actual LLM calls.
|
||||
*/
|
||||
import { z } from 'zod';
|
||||
|
||||
import { Agent, Guardrail, Memory, Tool } from '../src';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tools
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const searchTool = new Tool('web-search')
|
||||
.description('Search the web for information on a topic')
|
||||
.input(
|
||||
z.object({
|
||||
query: z.string().describe('The search query'),
|
||||
maxResults: z.number().default(3).describe('Maximum results to return'),
|
||||
}),
|
||||
)
|
||||
.output(
|
||||
z.object({
|
||||
results: z.array(
|
||||
z.object({
|
||||
title: z.string(),
|
||||
snippet: z.string(),
|
||||
}),
|
||||
),
|
||||
}),
|
||||
)
|
||||
.handler(async ({ query, maxResults }) => ({
|
||||
results: Array.from({ length: maxResults }, (_, i) => ({
|
||||
title: `Result ${i + 1} for "${query}"`,
|
||||
snippet: `This is a mock search result about ${query}.`,
|
||||
})),
|
||||
}));
|
||||
|
||||
const writeFileTool = new Tool('write-file')
|
||||
.description('Write content to a file (suspends for confirmation)')
|
||||
.input(
|
||||
z.object({
|
||||
path: z.string().describe('File path to write to'),
|
||||
content: z.string().describe('Content to write'),
|
||||
}),
|
||||
)
|
||||
.suspend(z.object({ message: z.string(), severity: z.string() }))
|
||||
.resume(z.object({ approved: z.boolean() }))
|
||||
.handler(async ({ path, content }, ctx) => {
|
||||
if (!ctx.resumeData) {
|
||||
return await ctx.suspend({ message: `Write to "${path}"?`, severity: 'warning' });
|
||||
}
|
||||
if (!ctx.resumeData.approved) return { written: false };
|
||||
console.log(` [Mock] Would write ${content.length} chars to ${path}`);
|
||||
return { written: true };
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Memory
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const memory = new Memory()
|
||||
.lastMessages(20)
|
||||
.semanticRecall({ topK: 4, messageRange: { before: 1, after: 1 } });
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Agents
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const researcher = new Agent('researcher')
|
||||
.model('anthropic/claude-sonnet-4')
|
||||
.instructions(
|
||||
'You are a research assistant. Search for information and return structured findings.',
|
||||
)
|
||||
.tool(searchTool)
|
||||
.memory(memory)
|
||||
.inputGuardrail(
|
||||
new Guardrail('injection-detector').type('prompt-injection').strategy('block').threshold(0.8),
|
||||
);
|
||||
|
||||
const writer = new Agent('writer')
|
||||
.model('anthropic/claude-sonnet-4')
|
||||
.instructions('You write clear, engaging content based on research provided to you.')
|
||||
.tool(writeFileTool)
|
||||
.checkpoint('memory');
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Multi-Agent: Agent as Tool
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const orchestrator = new Agent('orchestrator')
|
||||
.model('anthropic/claude-sonnet-4')
|
||||
.instructions(
|
||||
'You coordinate research and writing. Delegate research to the researcher and writing to the writer.',
|
||||
)
|
||||
.tool(researcher.asTool('Delegate research tasks to the research specialist'))
|
||||
.tool(writer.asTool('Delegate writing tasks to the content writer'));
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Execution
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
async function main() {
|
||||
console.log('=== @n8n/agents ===\n');
|
||||
|
||||
// --- 1. Single agent generate ---
|
||||
console.log('1. Single agent generate:');
|
||||
try {
|
||||
const result = await researcher.generate('Find information about RAG architectures', {
|
||||
persistence: {
|
||||
resourceId: 'user-123',
|
||||
threadId: 'session-1',
|
||||
},
|
||||
});
|
||||
const text = result.messages
|
||||
.flatMap((m) => ('content' in m ? m.content : []))
|
||||
.filter((c) => c.type === 'text')
|
||||
.map((c) => ('text' in c ? c.text : ''))
|
||||
.join('');
|
||||
console.log(` Result: ${text.slice(0, 100)}...`);
|
||||
console.log(
|
||||
` Usage: ${result.usage?.promptTokens} in, ${result.usage?.completionTokens} out`,
|
||||
);
|
||||
} catch (error) {
|
||||
console.log(` (Expected) Error: ${(error as Error).message}`);
|
||||
console.log(' (Set ANTHROPIC_API_KEY to run with real LLM calls)');
|
||||
}
|
||||
|
||||
// --- 2. Orchestrator (agent-as-tool pattern) ---
|
||||
console.log('\n2. Orchestrator (agent-as-tool pattern):');
|
||||
try {
|
||||
const orchResult = await orchestrator.generate(
|
||||
'Research RAG architectures and write a summary',
|
||||
);
|
||||
const text = orchResult.messages
|
||||
.flatMap((m) => ('content' in m ? m.content : []))
|
||||
.filter((c) => c.type === 'text')
|
||||
.map((c) => ('text' in c ? c.text : ''))
|
||||
.join('');
|
||||
console.log(` Result: ${text.slice(0, 100)}...`);
|
||||
} catch (error) {
|
||||
console.log(` (Expected) Error: ${(error as Error).message}`);
|
||||
}
|
||||
|
||||
console.log('\n=== Complete ===');
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
7
packages/@n8n/agents/jest.config.js
Normal file
7
packages/@n8n/agents/jest.config.js
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
/** @type {import('jest').Config} */
|
||||
const base = require('../../../jest.config');
|
||||
|
||||
module.exports = {
|
||||
...base,
|
||||
testPathIgnorePatterns: [...(base.testPathIgnorePatterns || []), '/integration/'],
|
||||
};
|
||||
65
packages/@n8n/agents/package.json
Normal file
65
packages/@n8n/agents/package.json
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
{
|
||||
"name": "@n8n/agents",
|
||||
"version": "0.1.0",
|
||||
"description": "AI agent SDK for n8n's code-first execution engine",
|
||||
"main": "dist/index.js",
|
||||
"module": "dist/index.js",
|
||||
"types": "dist/index.d.ts",
|
||||
"files": [
|
||||
"dist/**/*"
|
||||
],
|
||||
"scripts": {
|
||||
"clean": "rimraf dist .turbo",
|
||||
"dev": "pnpm watch",
|
||||
"typecheck": "tsc --noEmit",
|
||||
"build": "rimraf dist && tsc -p tsconfig.build.json",
|
||||
"format": "biome format --write src examples",
|
||||
"format:check": "biome ci src examples",
|
||||
"lint": "eslint . --quiet",
|
||||
"lint:fix": "eslint . --fix",
|
||||
"watch": "tsc -p tsconfig.build.json --watch",
|
||||
"test": "jest",
|
||||
"test:unit": "jest",
|
||||
"test:dev": "jest --watch",
|
||||
"test:integration": "vitest run --config vitest.integration.config.mjs"
|
||||
},
|
||||
"dependencies": {
|
||||
"@ai-sdk/anthropic": "^3.0.58",
|
||||
"@ai-sdk/google": "^3.0.43",
|
||||
"@ai-sdk/openai": "^3.0.41",
|
||||
"@ai-sdk/xai": "^3.0.67",
|
||||
"@ai-sdk/provider-utils": "^4.0.21",
|
||||
"@modelcontextprotocol/sdk": "1.26.0",
|
||||
"ajv": "^8.18.0",
|
||||
"@libsql/client": "^0.17.0",
|
||||
"ai": "^6.0.116",
|
||||
"pg": "catalog:",
|
||||
"zod": "catalog:"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"langsmith": ">=0.3.0",
|
||||
"@opentelemetry/sdk-trace-node": ">=1.0.0",
|
||||
"@opentelemetry/sdk-trace-base": ">=1.0.0",
|
||||
"@opentelemetry/exporter-trace-otlp-http": ">=0.50.0"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"langsmith": {
|
||||
"optional": true
|
||||
},
|
||||
"@opentelemetry/sdk-trace-node": {
|
||||
"optional": true
|
||||
},
|
||||
"@opentelemetry/sdk-trace-base": {
|
||||
"optional": true
|
||||
},
|
||||
"@opentelemetry/exporter-trace-otlp-http": {
|
||||
"optional": true
|
||||
}
|
||||
},
|
||||
"devDependencies": {
|
||||
"@n8n/typescript-config": "workspace:*",
|
||||
"@types/json-schema": "^7.0.15",
|
||||
"@types/pg": "^8.15.6",
|
||||
"testcontainers": "11.11.0"
|
||||
}
|
||||
}
|
||||
2163
packages/@n8n/agents/src/__tests__/agent-runtime.test.ts
Normal file
2163
packages/@n8n/agents/src/__tests__/agent-runtime.test.ts
Normal file
File diff suppressed because it is too large
Load diff
51
packages/@n8n/agents/src/__tests__/event-bus.test.ts
Normal file
51
packages/@n8n/agents/src/__tests__/event-bus.test.ts
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
import { AgentEventBus } from '../runtime/event-bus';
|
||||
|
||||
describe('AgentEventBus', () => {
|
||||
describe('resetAbort', () => {
|
||||
it('should create a fresh signal on reset', () => {
|
||||
const bus = new AgentEventBus();
|
||||
bus.resetAbort();
|
||||
expect(bus.isAborted).toBe(false);
|
||||
expect(bus.signal.aborted).toBe(false);
|
||||
});
|
||||
|
||||
it('should respect agent.abort()', () => {
|
||||
const bus = new AgentEventBus();
|
||||
bus.resetAbort();
|
||||
bus.abort();
|
||||
expect(bus.isAborted).toBe(true);
|
||||
expect(bus.signal.aborted).toBe(true);
|
||||
});
|
||||
|
||||
it('should respect external abort signal', () => {
|
||||
const bus = new AgentEventBus();
|
||||
const external = new AbortController();
|
||||
bus.resetAbort(external.signal);
|
||||
|
||||
expect(bus.isAborted).toBe(false);
|
||||
external.abort();
|
||||
expect(bus.isAborted).toBe(true);
|
||||
expect(bus.signal.aborted).toBe(true);
|
||||
});
|
||||
|
||||
it('should abort when either internal or external signal fires', () => {
|
||||
const bus = new AgentEventBus();
|
||||
const external = new AbortController();
|
||||
bus.resetAbort(external.signal);
|
||||
|
||||
bus.abort();
|
||||
expect(bus.isAborted).toBe(true);
|
||||
expect(external.signal.aborted).toBe(false);
|
||||
});
|
||||
|
||||
it('should allow reuse after reset', () => {
|
||||
const bus = new AgentEventBus();
|
||||
bus.resetAbort();
|
||||
bus.abort();
|
||||
expect(bus.isAborted).toBe(true);
|
||||
|
||||
bus.resetAbort();
|
||||
expect(bus.isAborted).toBe(false);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,82 @@
|
|||
/**
|
||||
* Minimal MCP server for stdio transport integration tests.
|
||||
* Spawned as a child process by mcp-stdio-transport.test.ts.
|
||||
* Run with: node mcp-stdio-server.mjs
|
||||
*/
|
||||
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
|
||||
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
||||
import { ListToolsRequestSchema, CallToolRequestSchema } from '@modelcontextprotocol/sdk/types.js';
|
||||
|
||||
// 1×1 transparent PNG in base64 (smallest valid PNG)
|
||||
const TINY_PNG =
|
||||
'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==';
|
||||
|
||||
const server = new Server(
|
||||
{ name: 'test-stdio-server', version: '1.0.0' },
|
||||
{ capabilities: { tools: {} } },
|
||||
);
|
||||
|
||||
server.setRequestHandler(ListToolsRequestSchema, async () => ({
|
||||
tools: [
|
||||
{
|
||||
name: 'echo',
|
||||
description: 'Echo the message back as-is',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: { message: { type: 'string', description: 'Message to echo' } },
|
||||
required: ['message'],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: 'add',
|
||||
description: 'Add two numbers together',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
a: { type: 'number', description: 'First number' },
|
||||
b: { type: 'number', description: 'Second number' },
|
||||
},
|
||||
required: ['a', 'b'],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: 'image',
|
||||
description: 'Return a small image with a caption',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: { caption: { type: 'string', description: 'Image caption' } },
|
||||
required: ['caption'],
|
||||
},
|
||||
},
|
||||
],
|
||||
}));
|
||||
|
||||
server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
||||
const { name, arguments: args = {} } = request.params;
|
||||
|
||||
if (name === 'echo') {
|
||||
return { content: [{ type: 'text', text: String(args.message ?? '') }] };
|
||||
}
|
||||
|
||||
if (name === 'add') {
|
||||
const sum = Number(args.a ?? 0) + Number(args.b ?? 0);
|
||||
return { content: [{ type: 'text', text: String(sum) }] };
|
||||
}
|
||||
|
||||
if (name === 'image') {
|
||||
return {
|
||||
content: [
|
||||
{ type: 'text', text: String(args.caption ?? '') },
|
||||
{ type: 'image', data: TINY_PNG, mimeType: 'image/png' },
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
isError: true,
|
||||
content: [{ type: 'text', text: `Unknown tool: ${name}` }],
|
||||
};
|
||||
});
|
||||
|
||||
const transport = new StdioServerTransport();
|
||||
await server.connect(transport);
|
||||
|
|
@ -0,0 +1,57 @@
|
|||
import { InMemoryMemory } from '../runtime/memory-store';
|
||||
|
||||
describe('InMemoryMemory working memory', () => {
|
||||
it('returns null for unknown key', async () => {
|
||||
const mem = new InMemoryMemory();
|
||||
expect(await mem.getWorkingMemory({ threadId: 'thread-x', resourceId: 'unknown' })).toBeNull();
|
||||
});
|
||||
|
||||
it('saves and retrieves working memory keyed by resourceId', async () => {
|
||||
const mem = new InMemoryMemory();
|
||||
await mem.saveWorkingMemory(
|
||||
{ threadId: 'thread-1', resourceId: 'user-1' },
|
||||
'# Context\n- Name: Alice',
|
||||
);
|
||||
expect(await mem.getWorkingMemory({ threadId: 'thread-1', resourceId: 'user-1' })).toBe(
|
||||
'# Context\n- Name: Alice',
|
||||
);
|
||||
});
|
||||
|
||||
it('overwrites on subsequent save', async () => {
|
||||
const mem = new InMemoryMemory();
|
||||
await mem.saveWorkingMemory({ threadId: 'thread-1', resourceId: 'user-1' }, 'v1');
|
||||
await mem.saveWorkingMemory({ threadId: 'thread-1', resourceId: 'user-1' }, 'v2');
|
||||
expect(await mem.getWorkingMemory({ threadId: 'thread-1', resourceId: 'user-1' })).toBe('v2');
|
||||
});
|
||||
|
||||
it('isolates by resourceId (resource scope)', async () => {
|
||||
const mem = new InMemoryMemory();
|
||||
await mem.saveWorkingMemory({ threadId: 'thread-a', resourceId: 'user-1' }, 'Alice data');
|
||||
await mem.saveWorkingMemory({ threadId: 'thread-b', resourceId: 'user-2' }, 'Bob data');
|
||||
expect(await mem.getWorkingMemory({ threadId: 'thread-a', resourceId: 'user-1' })).toBe(
|
||||
'Alice data',
|
||||
);
|
||||
expect(await mem.getWorkingMemory({ threadId: 'thread-b', resourceId: 'user-2' })).toBe(
|
||||
'Bob data',
|
||||
);
|
||||
});
|
||||
|
||||
it('returns null for unknown threadId (thread scope)', async () => {
|
||||
const mem = new InMemoryMemory();
|
||||
expect(await mem.getWorkingMemory({ threadId: 'unknown' })).toBeNull();
|
||||
});
|
||||
|
||||
it('saves and retrieves working memory keyed by threadId', async () => {
|
||||
const mem = new InMemoryMemory();
|
||||
await mem.saveWorkingMemory({ threadId: 'thread-1' }, '# Thread Notes');
|
||||
expect(await mem.getWorkingMemory({ threadId: 'thread-1' })).toBe('# Thread Notes');
|
||||
});
|
||||
|
||||
it('isolates by threadId (thread scope)', async () => {
|
||||
const mem = new InMemoryMemory();
|
||||
await mem.saveWorkingMemory({ threadId: 'thread-1' }, 'data for thread 1');
|
||||
await mem.saveWorkingMemory({ threadId: 'thread-2' }, 'data for thread 2');
|
||||
expect(await mem.getWorkingMemory({ threadId: 'thread-1' })).toBe('data for thread 1');
|
||||
expect(await mem.getWorkingMemory({ threadId: 'thread-2' })).toBe('data for thread 2');
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,134 @@
|
|||
import { expect, it } from 'vitest';
|
||||
|
||||
import {
|
||||
describeIf,
|
||||
collectStreamChunks,
|
||||
chunksOfType,
|
||||
createAgentWithBatchedInterruptibleCalls,
|
||||
createAgentWithBatchedNormalCalls,
|
||||
} from './helpers';
|
||||
import type { StreamChunk } from '../../index';
|
||||
|
||||
const describe = describeIf('anthropic');
|
||||
|
||||
describe('batched tool execution integration', () => {
|
||||
it('normal tools with bounded concurrency complete without errors (generate)', async () => {
|
||||
const agent = createAgentWithBatchedNormalCalls('anthropic', 2);
|
||||
|
||||
const result = await agent.generate(
|
||||
'Check if these three files exist: /home/a.txt, /home/b.txt, /home/c.txt. You MUST call check_file for each file using parallel tool calls in the same turn.',
|
||||
);
|
||||
|
||||
expect(result.finishReason).toBe('stop');
|
||||
expect(result.pendingSuspend).toBeUndefined();
|
||||
expect(result.toolCalls).toBeDefined();
|
||||
expect(result.toolCalls!.length).toBeGreaterThanOrEqual(3);
|
||||
|
||||
for (const tc of result.toolCalls!) {
|
||||
expect(tc.tool).toBe('check_file');
|
||||
expect(tc.output).toEqual(expect.objectContaining({ exists: true }));
|
||||
}
|
||||
});
|
||||
|
||||
it('normal tools with bounded concurrency complete without errors (stream)', async () => {
|
||||
const agent = createAgentWithBatchedNormalCalls('anthropic', 2);
|
||||
|
||||
const { stream: fullStream } = await agent.stream(
|
||||
'Check if these three files exist: /home/a.txt, /home/b.txt, /home/c.txt. You MUST call check_file for each file using parallel tool calls in the same turn.',
|
||||
);
|
||||
|
||||
const chunks = await collectStreamChunks(fullStream);
|
||||
|
||||
const errorChunks = chunks.filter((c) => c.type === 'error');
|
||||
expect(errorChunks).toHaveLength(0);
|
||||
|
||||
const finishChunks = chunksOfType(chunks, 'finish');
|
||||
expect(finishChunks.length).toBe(1);
|
||||
expect(finishChunks[0].finishReason).toBe('stop');
|
||||
|
||||
expect(chunks.filter((c) => c.type === 'tool-call-suspended')).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('bounded concurrency suspends first batch and saves unexecuted tools, then resumes all (generate)', async () => {
|
||||
const agent = createAgentWithBatchedInterruptibleCalls('anthropic', 2);
|
||||
|
||||
const first = await agent.generate(
|
||||
'Delete these three files: /tmp/a.txt, /tmp/b.txt, /tmp/c.txt. You MUST call delete_file for each file using parallel tool calls in the same turn. After deleting, confirm success.',
|
||||
);
|
||||
|
||||
expect(first.finishReason).toBe('tool-calls');
|
||||
expect(first.pendingSuspend).toBeDefined();
|
||||
|
||||
// With concurrency=2 and 3 tools: batch 1 runs 2 tools (both suspend),
|
||||
// batch 2 (1 tool) is skipped. So we get 2 suspended + 1 unexecuted.
|
||||
expect(first.pendingSuspend!.length).toBe(2);
|
||||
|
||||
// Resume each suspension one at a time until the LLM loop continues.
|
||||
// The unexecuted tools from later batches should run during resume
|
||||
// and suspend in turn, so we expect multiple resume cycles.
|
||||
let result = first;
|
||||
let resumeCount = 0;
|
||||
|
||||
while (result.pendingSuspend && result.pendingSuspend.length > 0) {
|
||||
const { runId, toolCallId } = result.pendingSuspend[0];
|
||||
result = await agent.resume('generate', { approved: true }, { runId, toolCallId });
|
||||
resumeCount++;
|
||||
|
||||
if (resumeCount > 10) {
|
||||
throw new Error('Too many resume cycles — likely an infinite loop');
|
||||
}
|
||||
}
|
||||
|
||||
// All tools should eventually be resolved
|
||||
expect(result.finishReason).toBe('stop');
|
||||
expect(result.pendingSuspend).toBeUndefined();
|
||||
expect(resumeCount).toBeGreaterThanOrEqual(2);
|
||||
});
|
||||
|
||||
it('bounded concurrency suspends first batch and saves unexecuted tools, then resumes all (stream)', async () => {
|
||||
const agent = createAgentWithBatchedInterruptibleCalls('anthropic', 2);
|
||||
|
||||
const { stream: fullStream } = await agent.stream(
|
||||
'Delete these three files: /tmp/a.txt, /tmp/b.txt, /tmp/c.txt. You MUST call delete_file for each file using parallel tool calls in the same turn. After deleting, tell me if you succeeded.',
|
||||
);
|
||||
|
||||
const chunks = await collectStreamChunks(fullStream);
|
||||
let pendingSuspensions = chunksOfType(chunks, 'tool-call-suspended') as Array<
|
||||
StreamChunk & { type: 'tool-call-suspended' }
|
||||
>;
|
||||
|
||||
expect(pendingSuspensions.length).toBe(2);
|
||||
|
||||
let resumeCount = 0;
|
||||
|
||||
while (pendingSuspensions.length > 0) {
|
||||
const next = pendingSuspensions[0];
|
||||
const resumedStream = await agent.resume(
|
||||
'stream',
|
||||
{ approved: true },
|
||||
{ runId: next.runId!, toolCallId: next.toolCallId! },
|
||||
);
|
||||
|
||||
const resumedChunks = await collectStreamChunks(resumedStream.stream);
|
||||
pendingSuspensions = chunksOfType(resumedChunks, 'tool-call-suspended') as Array<
|
||||
StreamChunk & { type: 'tool-call-suspended' }
|
||||
>;
|
||||
resumeCount++;
|
||||
|
||||
if (pendingSuspensions.length === 0) {
|
||||
const errorChunks = resumedChunks.filter((c) => c.type === 'error');
|
||||
expect(errorChunks).toHaveLength(0);
|
||||
|
||||
const finishChunks = chunksOfType(resumedChunks, 'finish');
|
||||
expect(finishChunks.length).toBeGreaterThan(0);
|
||||
expect(finishChunks[0].finishReason).not.toBe('error');
|
||||
}
|
||||
|
||||
if (resumeCount > 10) {
|
||||
throw new Error('Too many resume cycles — likely an infinite loop');
|
||||
}
|
||||
}
|
||||
|
||||
expect(resumeCount).toBeGreaterThanOrEqual(2);
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,213 @@
|
|||
import { expect, it } from 'vitest';
|
||||
|
||||
import {
|
||||
describeIf,
|
||||
collectStreamChunks,
|
||||
chunksOfType,
|
||||
createAgentWithConcurrentInterruptibleCalls,
|
||||
createAgentWithConcurrentMixedTools,
|
||||
collectTextDeltas,
|
||||
} from './helpers';
|
||||
import { isLlmMessage, type StreamChunk } from '../../index';
|
||||
|
||||
const describe = describeIf('anthropic');
|
||||
|
||||
describe('concurrent tool execution integration', () => {
|
||||
it('suspends all interruptible tool calls concurrently and returns them as an array (generate)', async () => {
|
||||
const agent = createAgentWithConcurrentInterruptibleCalls('anthropic');
|
||||
|
||||
const result = await agent.generate(
|
||||
'Delete these two files: /tmp/a.txt and /tmp/b.txt. You MUST call delete_file for each file in a single turn using parallel tool calls.',
|
||||
);
|
||||
|
||||
expect(result.finishReason).toBe('tool-calls');
|
||||
expect(result.pendingSuspend).toBeDefined();
|
||||
// With concurrent execution, ALL interruptible tool calls suspend at once
|
||||
expect(result.pendingSuspend!.length).toBeGreaterThanOrEqual(2);
|
||||
|
||||
const toolNames = result.pendingSuspend!.map((s) => s.toolName);
|
||||
expect(toolNames.every((n) => n === 'delete_file')).toBe(true);
|
||||
|
||||
// All entries share the same runId
|
||||
const runIds = new Set(result.pendingSuspend!.map((s) => s.runId));
|
||||
expect(runIds.size).toBe(1);
|
||||
|
||||
// Each entry has a unique toolCallId and a suspendPayload
|
||||
const toolCallIds = result.pendingSuspend!.map((s) => s.toolCallId);
|
||||
expect(new Set(toolCallIds).size).toBe(result.pendingSuspend!.length);
|
||||
|
||||
for (const s of result.pendingSuspend!) {
|
||||
expect(s.suspendPayload).toEqual(
|
||||
// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
|
||||
expect.objectContaining({ message: expect.any(String), severity: 'destructive' }),
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
it('suspends all interruptible tool calls concurrently and emits multiple chunks (stream)', async () => {
|
||||
const agent = createAgentWithConcurrentInterruptibleCalls('anthropic');
|
||||
|
||||
const { stream: fullStream } = await agent.stream(
|
||||
'Delete these two files: /tmp/a.txt and /tmp/b.txt. You MUST call delete_file for each file in a single turn using parallel tool calls.',
|
||||
);
|
||||
|
||||
const chunks = await collectStreamChunks(fullStream);
|
||||
const suspendedChunks = chunksOfType(chunks, 'tool-call-suspended');
|
||||
|
||||
// With concurrent execution, ALL suspensions are emitted before finish
|
||||
expect(suspendedChunks.length).toBeGreaterThanOrEqual(2);
|
||||
|
||||
// Each suspended chunk has a unique toolCallId
|
||||
const toolCallIds = suspendedChunks.map((c) => c.toolCallId);
|
||||
expect(new Set(toolCallIds).size).toBe(suspendedChunks.length);
|
||||
|
||||
// All share the same runId
|
||||
const runIds = new Set(suspendedChunks.map((c) => c.runId));
|
||||
expect(runIds.size).toBe(1);
|
||||
|
||||
// A single finish chunk follows the suspended chunks
|
||||
const finishChunks = chunksOfType(chunks, 'finish');
|
||||
expect(finishChunks.length).toBe(1);
|
||||
expect(finishChunks[0].finishReason).toBe('tool-calls');
|
||||
});
|
||||
|
||||
it('resume resolves one tool at a time, carrying forward the rest (generate)', async () => {
|
||||
const agent = createAgentWithConcurrentInterruptibleCalls('anthropic');
|
||||
|
||||
const first = await agent.generate(
|
||||
'Delete these two files: /tmp/a.txt and /tmp/b.txt. You MUST call delete_file for each file in a single turn using parallel tool calls.',
|
||||
);
|
||||
|
||||
expect(first.pendingSuspend!.length).toBeGreaterThanOrEqual(2);
|
||||
|
||||
const { runId } = first.pendingSuspend![0];
|
||||
const firstToolCallId = first.pendingSuspend![0].toolCallId;
|
||||
|
||||
// Resume the first tool
|
||||
const second = await agent.resume(
|
||||
'generate',
|
||||
{ approved: true },
|
||||
{ runId, toolCallId: firstToolCallId },
|
||||
);
|
||||
|
||||
// The remaining tool(s) should still be pending
|
||||
expect(second.pendingSuspend).toBeDefined();
|
||||
expect(second.pendingSuspend!.length).toBe(first.pendingSuspend!.length - 1);
|
||||
|
||||
// The resumed tool should NOT be in the remaining list
|
||||
const remainingIds = second.pendingSuspend!.map((s) => s.toolCallId);
|
||||
expect(remainingIds).not.toContain(firstToolCallId);
|
||||
});
|
||||
|
||||
it('resumes all suspended tools one by one until the LLM loop continues (stream)', async () => {
|
||||
const agent = createAgentWithConcurrentInterruptibleCalls('anthropic');
|
||||
|
||||
const { stream: fullStream } = await agent.stream(
|
||||
'Delete these two files: /tmp/a.txt and /tmp/b.txt. You MUST call delete_file for each file in a single turn using parallel tool calls. After deleting all files, tell me if you succeeded.',
|
||||
);
|
||||
|
||||
const chunks = await collectStreamChunks(fullStream);
|
||||
const suspendedChunks = chunksOfType(chunks, 'tool-call-suspended');
|
||||
expect(suspendedChunks.length).toBeGreaterThanOrEqual(2);
|
||||
|
||||
// Resume each one until no suspensions remain
|
||||
let pendingSuspensions = suspendedChunks as Array<
|
||||
StreamChunk & { type: 'tool-call-suspended' }
|
||||
>;
|
||||
|
||||
while (pendingSuspensions.length > 0) {
|
||||
const next = pendingSuspensions[0];
|
||||
const resumedStream = await agent.resume(
|
||||
'stream',
|
||||
{ approved: true },
|
||||
{ runId: next.runId!, toolCallId: next.toolCallId! },
|
||||
);
|
||||
|
||||
const resumedChunks = await collectStreamChunks(resumedStream.stream);
|
||||
pendingSuspensions = chunksOfType(resumedChunks, 'tool-call-suspended');
|
||||
|
||||
// If there are no more suspensions, the LLM should have produced text
|
||||
if (pendingSuspensions.length === 0) {
|
||||
const errorChunks = resumedChunks.filter((c) => c.type === 'error');
|
||||
expect(errorChunks).toHaveLength(0);
|
||||
|
||||
const finishChunks = chunksOfType(resumedChunks, 'finish');
|
||||
expect(finishChunks.length).toBeGreaterThan(0);
|
||||
expect(finishChunks[0].finishReason).not.toBe('error');
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
it('auto-executes non-interruptible tools concurrently while suspending interruptible ones', async () => {
|
||||
const agent = createAgentWithConcurrentMixedTools('anthropic');
|
||||
|
||||
const { stream: fullStream } = await agent.stream(
|
||||
'You must call both tools in parallel: call list_files with dir="/home" AND call delete_file with path="/home/readme.md". Do not skip either tool.',
|
||||
);
|
||||
|
||||
const chunks = await collectStreamChunks(fullStream);
|
||||
|
||||
// list_files should auto-execute — its result should appear as a message chunk
|
||||
const toolResultChunks = chunks.filter(
|
||||
(c) =>
|
||||
c.type === 'message' &&
|
||||
isLlmMessage(c.message) &&
|
||||
c.message.content.some((p) => p.type === 'tool-result'),
|
||||
);
|
||||
|
||||
// delete_file should be suspended
|
||||
const suspendedChunks = chunksOfType(chunks, 'tool-call-suspended');
|
||||
const deleteSuspended = suspendedChunks.find((c) => c.toolName === 'delete_file');
|
||||
|
||||
expect(deleteSuspended).toBeDefined();
|
||||
expect(toolResultChunks.length).toBeGreaterThan(0);
|
||||
// If the LLM issued both tool calls in parallel:
|
||||
if (deleteSuspended && toolResultChunks.length > 0) {
|
||||
expect(deleteSuspended.toolName).toBe('delete_file');
|
||||
expect(deleteSuspended.suspendPayload).toEqual(
|
||||
// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
|
||||
expect.objectContaining({ message: expect.any(String) }),
|
||||
);
|
||||
|
||||
// list_files result should be present even though delete_file suspended
|
||||
const listResult = toolResultChunks.find(
|
||||
(c) =>
|
||||
c.type === 'message' &&
|
||||
isLlmMessage(c.message) &&
|
||||
c.message.content.some((p) => p.type === 'tool-result' && p.toolName === 'list_files'),
|
||||
);
|
||||
expect(listResult).toBeDefined();
|
||||
}
|
||||
});
|
||||
|
||||
it('generate: resumes all tools and receives a final text response', async () => {
|
||||
const agent = createAgentWithConcurrentInterruptibleCalls('anthropic');
|
||||
|
||||
let result = await agent.generate(
|
||||
'Delete these two files: /tmp/a.txt and /tmp/b.txt. You MUST call delete_file for each file in a single turn using parallel tool calls. After deleting, confirm success.',
|
||||
);
|
||||
|
||||
// Iterate through all pending suspensions
|
||||
while (result.pendingSuspend && result.pendingSuspend.length > 0) {
|
||||
const { runId, toolCallId } = result.pendingSuspend[0];
|
||||
result = await agent.resume('generate', { approved: true }, { runId, toolCallId });
|
||||
}
|
||||
|
||||
// After all tools resumed, the agent should complete with a text response
|
||||
expect(result.finishReason).toBe('stop');
|
||||
expect(result.pendingSuspend).toBeUndefined();
|
||||
|
||||
const text = collectTextDeltas(
|
||||
result.messages
|
||||
.filter((m) => 'role' in m && m.role === 'assistant')
|
||||
.flatMap((m) =>
|
||||
'content' in m
|
||||
? m.content
|
||||
.filter((c) => c.type === 'text')
|
||||
.map((c) => ({ type: 'text-delta' as const, delta: c.text }))
|
||||
: [],
|
||||
),
|
||||
);
|
||||
expect(text.length).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,92 @@
|
|||
import { expect, it } from 'vitest';
|
||||
import { z } from 'zod';
|
||||
|
||||
import { describeIf, getModel } from './helpers';
|
||||
import { Agent, Memory, Tool } from '../../index';
|
||||
|
||||
const describe = describeIf('anthropic');
|
||||
|
||||
describe('custom message survives suspend/resume', () => {
|
||||
it('preserves custom tool message in stream after resume + complete', async () => {
|
||||
const memory = new Memory().storage('memory').lastMessages(20);
|
||||
|
||||
const deleteTool = new Tool('delete_file')
|
||||
.description('Delete a file at the given path')
|
||||
.input(
|
||||
z.object({
|
||||
path: z.string().describe('File path to delete'),
|
||||
}),
|
||||
)
|
||||
.output(z.object({ deleted: z.boolean(), path: z.string() }))
|
||||
.suspend(z.object({ message: z.string() }))
|
||||
.resume(z.object({ approved: z.boolean() }))
|
||||
.handler(async ({ path }, ctx) => {
|
||||
if (!ctx.resumeData) {
|
||||
return await ctx.suspend({ message: `Delete "${path}"?` });
|
||||
}
|
||||
if (!ctx.resumeData.approved) return { deleted: false, path };
|
||||
return { deleted: true, path };
|
||||
})
|
||||
.toMessage((output) => ({
|
||||
type: 'custom' as const,
|
||||
data: {
|
||||
dummy: `deleted:${(output as { path: string }).path}`,
|
||||
},
|
||||
}));
|
||||
|
||||
const agent = new Agent('custom-msg-suspend-resume-stream-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions(
|
||||
'You are a file manager. When asked to delete files, use the delete_file tool. Be concise.',
|
||||
)
|
||||
.tool(deleteTool)
|
||||
.memory(memory)
|
||||
.checkpoint('memory');
|
||||
|
||||
const threadId = `test-custom-msg-stream-${Date.now()}`;
|
||||
const options = { persistence: { threadId, resourceId: 'test-user' } };
|
||||
|
||||
// Turn 1: stream, agent suspends
|
||||
const result1 = await agent.stream('Delete the file /tmp/stream-test.txt', options);
|
||||
const reader1 = result1.stream.getReader();
|
||||
const chunks1: Array<{ type: string; [key: string]: unknown }> = [];
|
||||
while (true) {
|
||||
const { done, value } = await reader1.read();
|
||||
if (done) break;
|
||||
chunks1.push(value as { type: string; [key: string]: unknown });
|
||||
}
|
||||
|
||||
const suspendedChunk = chunks1.find((c) => c.type === 'tool-call-suspended') as
|
||||
| { type: 'tool-call-suspended'; runId: string; toolCallId: string }
|
||||
| undefined;
|
||||
expect(suspendedChunk).toBeDefined();
|
||||
|
||||
// Resume with approval and get the resumed stream
|
||||
const result2 = await agent.resume(
|
||||
'stream',
|
||||
{ approved: true },
|
||||
{ runId: suspendedChunk!.runId, toolCallId: suspendedChunk!.toolCallId },
|
||||
);
|
||||
|
||||
const reader2 = result2.stream.getReader();
|
||||
const chunks2: Array<{ type: string; [key: string]: unknown }> = [];
|
||||
while (true) {
|
||||
const { done, value } = await reader2.read();
|
||||
if (done) break;
|
||||
chunks2.push(value as { type: string; [key: string]: unknown });
|
||||
}
|
||||
|
||||
// The custom message must appear in the resumed stream
|
||||
const customChunk = chunks2.find(
|
||||
(c) =>
|
||||
c.type === 'message' &&
|
||||
(c.message as { type?: string }).type === 'custom' &&
|
||||
'data' in (c.message as object) &&
|
||||
'dummy' in (c.message as { data: { dummy: string } }).data,
|
||||
) as { type: 'message'; message: { type: 'custom'; data: { dummy: string } } } | undefined;
|
||||
|
||||
expect(customChunk).toBeDefined();
|
||||
expect(customChunk!.message.data.dummy).toContain('deleted:');
|
||||
expect(customChunk!.message.data.dummy).toContain('/tmp/stream-test.txt');
|
||||
});
|
||||
});
|
||||
378
packages/@n8n/agents/src/__tests__/integration/evaluate.test.ts
Normal file
378
packages/@n8n/agents/src/__tests__/integration/evaluate.test.ts
Normal file
|
|
@ -0,0 +1,378 @@
|
|||
import { expect, it } from 'vitest';
|
||||
import { z } from 'zod';
|
||||
|
||||
import { createAgentWithInterruptibleTool, describeIf, getModel } from './helpers';
|
||||
import { parseJudgeResponse } from '../../evals/parse-judge-response';
|
||||
import { Agent, Tool, Eval, evaluate, evals } from '../../index';
|
||||
|
||||
/**
|
||||
* Create a fruit-bowl agent with a tool that generates random fruit coordinates.
|
||||
*/
|
||||
function createFruitBowlAgent(provider: 'anthropic' | 'openai'): Agent {
|
||||
const createFruitBowlTool = new Tool('create_fruit_bowl')
|
||||
.description(
|
||||
'Generate a fruit bowl with random 3D coordinates for fruits. Always use this tool when asked to create a fruit bowl.',
|
||||
)
|
||||
.input(
|
||||
z.object({
|
||||
num_apples: z.number().optional().describe('Number of apples (default: 3)'),
|
||||
}),
|
||||
)
|
||||
.handler(async (input) => {
|
||||
const numApples = input.num_apples ?? 3;
|
||||
const fruits = Array.from({ length: numApples }, () => ({
|
||||
type: 'apple',
|
||||
x: Math.round((Math.random() * 20 - 10) * 10) / 10,
|
||||
y: Math.round((Math.random() * 20 - 10) * 10) / 10,
|
||||
z: Math.round((Math.random() * 20 - 10) * 10) / 10,
|
||||
}));
|
||||
return { fruits };
|
||||
});
|
||||
|
||||
return new Agent('fruit-bowl-bot')
|
||||
.model(getModel(provider))
|
||||
.instructions(
|
||||
'You are a fruit bowl generator. When asked to create a fruit bowl, use the create_fruit_bowl tool and then describe the contents including each fruit type and its x, y, z coordinates.',
|
||||
)
|
||||
.tool(createFruitBowlTool);
|
||||
}
|
||||
|
||||
const describe = describeIf('anthropic');
|
||||
|
||||
describe('evaluate() integration', () => {
|
||||
it('runs deterministic evals against a fruit bowl agent', async () => {
|
||||
const mentionsFruit = new Eval('mentions-fruit')
|
||||
.description('Check if response mentions apples with coordinates')
|
||||
.check(({ output }) => {
|
||||
const lower = output.toLowerCase();
|
||||
const hasApple = lower.includes('apple');
|
||||
const hasCoord = /\d+\.\d/.test(output);
|
||||
return {
|
||||
pass: hasApple && hasCoord,
|
||||
reasoning:
|
||||
hasApple && hasCoord
|
||||
? 'Mentions apples with coordinates'
|
||||
: hasApple
|
||||
? 'Mentions apples but no coordinates'
|
||||
: 'No mention of apples',
|
||||
};
|
||||
});
|
||||
|
||||
const usedTool = new Eval('used-tool')
|
||||
.description('Check if create_fruit_bowl tool was called')
|
||||
.check(({ toolCalls }) => {
|
||||
const used = (toolCalls ?? []).some((tc) => tc.tool === 'create_fruit_bowl');
|
||||
return {
|
||||
pass: used,
|
||||
reasoning: used ? 'Tool was called' : 'Tool was NOT called',
|
||||
};
|
||||
});
|
||||
|
||||
const agent = createFruitBowlAgent('anthropic');
|
||||
|
||||
const results = await evaluate(agent, {
|
||||
dataset: [{ input: 'Create a fruit bowl with 3 apples' }],
|
||||
evals: [mentionsFruit, usedTool],
|
||||
});
|
||||
|
||||
expect(results.runs).toHaveLength(1);
|
||||
|
||||
const run = results.runs[0];
|
||||
expect(run.output).toBeTruthy();
|
||||
|
||||
expect(run.scores['mentions-fruit'].pass).toBe(true);
|
||||
expect(run.scores['used-tool'].pass).toBe(true);
|
||||
|
||||
expect(results.summary['mentions-fruit'].total).toBe(1);
|
||||
expect(results.summary['used-tool'].passed).toBe(1);
|
||||
});
|
||||
|
||||
it('runs multiple dataset rows in parallel', async () => {
|
||||
const hasContent = new Eval('has-content')
|
||||
.description('Check response is non-empty')
|
||||
.check(({ output }) => ({
|
||||
pass: output.length > 10,
|
||||
reasoning: `Response length: ${output.length}`,
|
||||
}));
|
||||
|
||||
const agent = createFruitBowlAgent('anthropic');
|
||||
|
||||
const results = await evaluate(agent, {
|
||||
dataset: [
|
||||
{ input: 'Create a fruit bowl with 2 apples' },
|
||||
{ input: 'Create a fruit bowl with 5 apples' },
|
||||
],
|
||||
evals: [hasContent],
|
||||
});
|
||||
|
||||
expect(results.runs).toHaveLength(2);
|
||||
expect(results.summary['has-content'].total).toBe(2);
|
||||
expect(results.summary['has-content'].passed).toBe(2);
|
||||
});
|
||||
|
||||
it('runs built-in string similarity eval', async () => {
|
||||
const agent = new Agent('echo-agent')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions(
|
||||
'When asked "What is the capital of France?", reply with exactly: "Paris". Nothing else.',
|
||||
);
|
||||
|
||||
const similarity = evals.stringSimilarity();
|
||||
|
||||
const results = await evaluate(agent, {
|
||||
dataset: [{ input: 'What is the capital of France?', expected: 'Paris' }],
|
||||
evals: [similarity],
|
||||
});
|
||||
|
||||
expect(results.runs).toHaveLength(1);
|
||||
expect(results.runs[0].scores['string-similarity'].pass).toBe(true);
|
||||
});
|
||||
|
||||
it('runs LLM-as-judge correctness eval', async () => {
|
||||
const agent = new Agent('math-agent')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('Answer math questions with just the number. No explanation.');
|
||||
|
||||
const correctness = evals.correctness().model(getModel('anthropic'));
|
||||
|
||||
const results = await evaluate(agent, {
|
||||
dataset: [{ input: 'What is 2 + 2?', expected: '4' }],
|
||||
evals: [correctness],
|
||||
});
|
||||
|
||||
expect(results.runs).toHaveLength(1);
|
||||
expect(results.runs[0].scores['correctness'].pass).toBe(true);
|
||||
expect(results.runs[0].scores['correctness'].reasoning).toBeTruthy();
|
||||
});
|
||||
|
||||
it('runs LLM correctness eval on fruit bowl agent with expected output', async () => {
|
||||
const agent = createFruitBowlAgent('anthropic');
|
||||
|
||||
const correctness = evals.correctness().model(getModel('anthropic'));
|
||||
|
||||
const domainHelpfulness = new Eval('domain-helpfulness')
|
||||
.description('Judge helpfulness in the context of a fruit-picking robot simulation')
|
||||
.model(getModel('anthropic'))
|
||||
.judge(async ({ input, output, llm }) => {
|
||||
const result = await llm(
|
||||
[
|
||||
'You are evaluating a response from a simple fruit-picking robot simulation tool.',
|
||||
'This is a demo/toy agent. The robot generates fruit bowls with 3D coordinates.',
|
||||
'Judge ONLY whether the response fulfills what the user asked for — not production quality.',
|
||||
'',
|
||||
`User request: ${input}`,
|
||||
`Robot response: ${output}`,
|
||||
'',
|
||||
'Did the response deliver what was asked?',
|
||||
'Respond with ONLY JSON (no markdown fences): {"pass": true/false, "reasoning": "<explanation>"}',
|
||||
].join('\n'),
|
||||
);
|
||||
return parseJudgeResponse(result.text);
|
||||
});
|
||||
|
||||
const results = await evaluate(agent, {
|
||||
dataset: [
|
||||
{
|
||||
input: 'Create a fruit bowl',
|
||||
expected: 'A fruit bowl with a number of apples and their coordinates',
|
||||
},
|
||||
],
|
||||
evals: [correctness, domainHelpfulness],
|
||||
});
|
||||
|
||||
expect(results.runs).toHaveLength(1);
|
||||
|
||||
const run = results.runs[0];
|
||||
expect(run.output.toLowerCase()).toContain('apple');
|
||||
|
||||
expect(run.scores['correctness'].pass).toBe(true);
|
||||
expect(run.scores['correctness'].reasoning).toBeTruthy();
|
||||
|
||||
expect(run.scores['domain-helpfulness'].pass).toBe(true);
|
||||
expect(run.scores['domain-helpfulness'].reasoning).toBeTruthy();
|
||||
});
|
||||
|
||||
it('auto-resumes interruptible tool calls during eval', async () => {
|
||||
const { createAgentWithMixedTools } = await import('./helpers');
|
||||
const agent = createAgentWithMixedTools('anthropic');
|
||||
|
||||
const usedTool = new Eval('used-list-tool')
|
||||
.description('Check if list_files was called')
|
||||
.check(({ toolCalls }) => {
|
||||
const used = (toolCalls ?? []).some((tc) => tc.tool === 'list_files');
|
||||
return {
|
||||
pass: used,
|
||||
reasoning: used ? 'Called list_files' : 'Did not call list_files',
|
||||
};
|
||||
});
|
||||
|
||||
const hasOutput = new Eval('has-output')
|
||||
.description('Check response is non-empty')
|
||||
.check(({ output }) => ({
|
||||
pass: output.length > 5,
|
||||
reasoning: `Output length: ${output.length}`,
|
||||
}));
|
||||
|
||||
const results = await evaluate(agent, {
|
||||
dataset: [{ input: 'List files in /home' }],
|
||||
evals: [usedTool, hasOutput],
|
||||
});
|
||||
|
||||
expect(results.runs).toHaveLength(1);
|
||||
expect(results.runs[0].scores['used-list-tool'].pass).toBe(true);
|
||||
expect(results.runs[0].scores['has-output'].pass).toBe(true);
|
||||
});
|
||||
|
||||
it('provides tool call inputs and outputs as JSON objects, not strings', async () => {
|
||||
const agent = createFruitBowlAgent('anthropic');
|
||||
|
||||
const toolTypesEval = new Eval('tool-types')
|
||||
.description('Verify tool call inputs/outputs are JSON objects')
|
||||
.check(({ toolCalls }) => {
|
||||
if (!toolCalls || toolCalls.length === 0) {
|
||||
return { pass: false, reasoning: 'No tool calls' };
|
||||
}
|
||||
for (const tc of toolCalls) {
|
||||
if (typeof tc.input === 'string') {
|
||||
return { pass: false, reasoning: `Tool "${tc.tool}" input is a string: ${tc.input}` };
|
||||
}
|
||||
if (typeof tc.output === 'string') {
|
||||
return { pass: false, reasoning: `Tool "${tc.tool}" output is a string: ${tc.output}` };
|
||||
}
|
||||
}
|
||||
return { pass: true, reasoning: 'All tool inputs/outputs are JSON objects' };
|
||||
});
|
||||
|
||||
const results = await evaluate(agent, {
|
||||
dataset: [{ input: 'Create a fruit bowl with 2 apples' }],
|
||||
evals: [toolTypesEval],
|
||||
});
|
||||
|
||||
expect(results.runs).toHaveLength(1);
|
||||
expect(results.runs[0].scores['tool-types'].pass).toBe(true);
|
||||
expect(results.runs[0].scores['tool-types'].reasoning).toContain('JSON objects');
|
||||
});
|
||||
|
||||
it('resume("generate") result includes the resumed tool call in toolCalls', async () => {
|
||||
const agent = createAgentWithInterruptibleTool('anthropic');
|
||||
|
||||
// First generate: agent suspends on delete_file
|
||||
const first = await agent.generate('Delete the file /tmp/test.txt');
|
||||
|
||||
expect(first.pendingSuspend).toBeDefined();
|
||||
const { runId, toolCallId } = first.pendingSuspend![0];
|
||||
|
||||
// Resume with approval
|
||||
const resumed = await agent.resume('generate', { approved: true }, { runId, toolCallId });
|
||||
|
||||
// The resumed tool call must appear in toolCalls.
|
||||
// Bug: toolCalls is undefined or empty because runGenerateLoop() starts
|
||||
// with a fresh toolCallSummary and the resume-phase tool execution is
|
||||
// never captured.
|
||||
expect(resumed.toolCalls).toBeDefined();
|
||||
expect(resumed.toolCalls!.length).toBeGreaterThan(0);
|
||||
|
||||
const deletedCall = resumed.toolCalls!.find((tc) => tc.tool === 'delete_file');
|
||||
expect(deletedCall).toBeDefined();
|
||||
expect(deletedCall!.output).toMatchObject({ deleted: true, path: '/tmp/test.txt' });
|
||||
});
|
||||
|
||||
it('resume("generate") result includes the resumed tool call when denied', async () => {
|
||||
const agent = createAgentWithInterruptibleTool('anthropic');
|
||||
|
||||
const first = await agent.generate('Delete the file /tmp/secret.txt');
|
||||
expect(first.pendingSuspend).toBeDefined();
|
||||
const { runId, toolCallId } = first.pendingSuspend![0];
|
||||
|
||||
const resumed = await agent.resume('generate', { approved: false }, { runId, toolCallId });
|
||||
|
||||
expect(resumed.toolCalls).toBeDefined();
|
||||
const deletedCall = resumed.toolCalls!.find((tc) => tc.tool === 'delete_file');
|
||||
expect(deletedCall).toBeDefined();
|
||||
// denied: deleted should be false
|
||||
expect(deletedCall!.output).toMatchObject({ deleted: false });
|
||||
});
|
||||
|
||||
it('evaluate() includes HITL tool calls in toolCalls passed to eval scorers', async () => {
|
||||
const agent = createAgentWithInterruptibleTool('anthropic');
|
||||
|
||||
const sawDeleteCall = new Eval('saw-delete-call')
|
||||
.description('Check that delete_file tool call appears in toolCalls after auto-resume')
|
||||
.check(({ toolCalls }) => {
|
||||
const found = (toolCalls ?? []).some((tc) => tc.tool === 'delete_file');
|
||||
return {
|
||||
pass: found,
|
||||
reasoning: found
|
||||
? 'delete_file present in toolCalls'
|
||||
: `delete_file missing — toolCalls: ${JSON.stringify(toolCalls ?? [])}`,
|
||||
};
|
||||
});
|
||||
|
||||
const results = await evaluate(agent, {
|
||||
dataset: [
|
||||
{
|
||||
input: 'Delete the file /tmp/test.txt',
|
||||
// auto-resume with approved: true (default) so the tool completes
|
||||
},
|
||||
],
|
||||
evals: [sawDeleteCall],
|
||||
});
|
||||
|
||||
expect(results.runs).toHaveLength(1);
|
||||
// Bug: this fails because result.toolCalls is empty after resume,
|
||||
// so the eval scorer receives toolCalls=[] and pass=false.
|
||||
expect(results.runs[0].scores['saw-delete-call'].pass).toBe(true);
|
||||
expect(results.runs[0].scores['saw-delete-call'].reasoning).toContain('present');
|
||||
});
|
||||
|
||||
it('evaluate() output is non-empty when agent only uses an interruptible tool (no text response)', async () => {
|
||||
// If the agent produces no text and only tool output, evaluate() uses
|
||||
// toolCalls to build the composite output string. With the bug, toolCalls
|
||||
// is empty after resume and output becomes "".
|
||||
const silentAgent = new Agent('silent-tool-agent')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions(
|
||||
'When asked to delete a file, call delete_file and return ONLY the raw JSON tool result. Do not add any explanatory text — your entire response must be the tool result only.',
|
||||
)
|
||||
.tool(
|
||||
new Tool('delete_file')
|
||||
.description('Delete a file')
|
||||
.input(z.object({ path: z.string() }))
|
||||
.output(z.object({ deleted: z.boolean(), path: z.string() }))
|
||||
.suspend(z.object({ message: z.string(), severity: z.string() }))
|
||||
.resume(z.object({ approved: z.boolean() }))
|
||||
.handler(async ({ path }, ctx) => {
|
||||
if (!ctx.resumeData) {
|
||||
return await ctx.suspend({
|
||||
message: `Delete "${path}"?`,
|
||||
severity: 'destructive',
|
||||
});
|
||||
}
|
||||
return { deleted: ctx.resumeData.approved, path };
|
||||
}),
|
||||
)
|
||||
.checkpoint('memory');
|
||||
|
||||
const hasOutput = new Eval('has-output')
|
||||
.description('Composite output must be non-empty after HITL auto-resume')
|
||||
.check(({ output, toolCalls }) => {
|
||||
const pass = output.length > 0;
|
||||
return {
|
||||
pass,
|
||||
reasoning: pass
|
||||
? `output="${output}"`
|
||||
: `output is empty; toolCalls=${JSON.stringify(toolCalls ?? [])}`,
|
||||
};
|
||||
});
|
||||
|
||||
const results = await evaluate(silentAgent, {
|
||||
dataset: [{ input: 'Delete /tmp/test.txt' }],
|
||||
evals: [hasOutput],
|
||||
});
|
||||
|
||||
expect(results.runs).toHaveLength(1);
|
||||
// Bug: output is "" because toolCalls is empty, so the fallback path in
|
||||
// evaluate() that builds output from tool outputs is never triggered.
|
||||
expect(results.runs[0].scores['has-output'].pass).toBe(true);
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,279 @@
|
|||
import { expect, it } from 'vitest';
|
||||
import { z } from 'zod';
|
||||
|
||||
import { collectStreamChunks, describeIf, getModel } from './helpers';
|
||||
import { Agent, AgentEvent, Tool, type AgentEventData } from '../../index';
|
||||
|
||||
const describe = describeIf('anthropic');
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function createSimpleAgent(provider: 'openai' | 'anthropic' = 'anthropic'): Agent {
|
||||
return new Agent('events-test-agent')
|
||||
.model(getModel(provider))
|
||||
.instructions('You are a concise assistant. Reply in one short sentence.');
|
||||
}
|
||||
|
||||
function createAgentWithTool(provider: 'openai' | 'anthropic' = 'anthropic'): Agent {
|
||||
const addTool = new Tool('add_numbers')
|
||||
.description('Add two numbers together')
|
||||
.input(z.object({ a: z.number(), b: z.number() }))
|
||||
.handler(async ({ a, b }) => ({ result: a + b }));
|
||||
|
||||
return new Agent('events-tool-agent')
|
||||
.model(getModel(provider))
|
||||
.instructions('You are a calculator. Use the add_numbers tool when asked to add.')
|
||||
.tool(addTool);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Event system — generate path
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('event system — generate', () => {
|
||||
it('emits AgentStart and AgentEnd around a generate() call', async () => {
|
||||
const agent = createSimpleAgent();
|
||||
|
||||
const fired: AgentEvent[] = [];
|
||||
agent.on(AgentEvent.AgentStart, () => {
|
||||
fired.push(AgentEvent.AgentStart);
|
||||
});
|
||||
agent.on(AgentEvent.AgentEnd, () => {
|
||||
fired.push(AgentEvent.AgentEnd);
|
||||
});
|
||||
|
||||
await agent.generate('Say hello');
|
||||
|
||||
expect(fired).toContain(AgentEvent.AgentStart);
|
||||
expect(fired).toContain(AgentEvent.AgentEnd);
|
||||
expect(fired.indexOf(AgentEvent.AgentStart)).toBeLessThan(fired.indexOf(AgentEvent.AgentEnd));
|
||||
});
|
||||
|
||||
it('emits TurnStart and TurnEnd for each LLM call', async () => {
|
||||
const agent = createSimpleAgent();
|
||||
|
||||
const fired: AgentEvent[] = [];
|
||||
agent.on(AgentEvent.TurnStart, () => fired.push(AgentEvent.TurnStart));
|
||||
agent.on(AgentEvent.TurnEnd, () => fired.push(AgentEvent.TurnEnd));
|
||||
|
||||
await agent.generate('Say hello');
|
||||
|
||||
expect(fired).toContain(AgentEvent.TurnStart);
|
||||
expect(fired).toContain(AgentEvent.TurnEnd);
|
||||
});
|
||||
|
||||
it('emits ToolExecutionStart and ToolExecutionEnd when a tool runs', async () => {
|
||||
const agent = createAgentWithTool();
|
||||
|
||||
const toolEvents: AgentEventData[] = [];
|
||||
agent.on(AgentEvent.ToolExecutionStart, (data) => toolEvents.push(data));
|
||||
agent.on(AgentEvent.ToolExecutionEnd, (data) => toolEvents.push(data));
|
||||
|
||||
await agent.generate('What is 7 plus 3?');
|
||||
|
||||
const starts = toolEvents.filter((e) => e.type === AgentEvent.ToolExecutionStart);
|
||||
const ends = toolEvents.filter((e) => e.type === AgentEvent.ToolExecutionEnd);
|
||||
|
||||
expect(starts.length).toBeGreaterThan(0);
|
||||
expect(ends.length).toBeGreaterThan(0);
|
||||
|
||||
const start = starts[0] as AgentEventData & { type: AgentEvent.ToolExecutionStart };
|
||||
expect(start.toolName).toBe('add_numbers');
|
||||
|
||||
const end = ends[0] as AgentEventData & { type: AgentEvent.ToolExecutionEnd };
|
||||
expect(end.isError).toBe(false);
|
||||
expect((end.result as { result: number }).result).toBe(10);
|
||||
});
|
||||
|
||||
it('ToolExecutionEnd carries the correct toolCallId matching ToolExecutionStart', async () => {
|
||||
const agent = createAgentWithTool();
|
||||
|
||||
const starts: Array<AgentEventData & { type: AgentEvent.ToolExecutionStart }> = [];
|
||||
const ends: Array<AgentEventData & { type: AgentEvent.ToolExecutionEnd }> = [];
|
||||
|
||||
agent.on(AgentEvent.ToolExecutionStart, (data) => {
|
||||
starts.push(data as AgentEventData & { type: AgentEvent.ToolExecutionStart });
|
||||
});
|
||||
agent.on(AgentEvent.ToolExecutionEnd, (data) => {
|
||||
ends.push(data as AgentEventData & { type: AgentEvent.ToolExecutionEnd });
|
||||
});
|
||||
|
||||
await agent.generate('What is 5 plus 5?');
|
||||
|
||||
expect(starts.length).toBeGreaterThan(0);
|
||||
expect(ends.length).toBe(starts.length);
|
||||
expect(ends[0].toolCallId).toBe(starts[0].toolCallId);
|
||||
});
|
||||
|
||||
it('multiple handlers on the same event are all called', async () => {
|
||||
const agent = createSimpleAgent();
|
||||
|
||||
const calls: number[] = [];
|
||||
agent.on(AgentEvent.AgentEnd, () => calls.push(1));
|
||||
agent.on(AgentEvent.AgentEnd, () => calls.push(2));
|
||||
agent.on(AgentEvent.AgentEnd, () => calls.push(3));
|
||||
|
||||
await agent.generate('Say hello');
|
||||
|
||||
expect(calls).toEqual(expect.arrayContaining([1, 2, 3]));
|
||||
});
|
||||
|
||||
it('AgentEnd data contains the response messages', async () => {
|
||||
const agent = createSimpleAgent();
|
||||
|
||||
let capturedMessages: unknown[] = [];
|
||||
agent.on(AgentEvent.AgentEnd, (data) => {
|
||||
if (data.type === AgentEvent.AgentEnd) {
|
||||
capturedMessages = data.messages;
|
||||
}
|
||||
});
|
||||
|
||||
await agent.generate('Say hello');
|
||||
|
||||
expect(capturedMessages.length).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Event system — stream path
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('event system — stream', () => {
|
||||
it('emits AgentStart and AgentEnd around a stream() call', async () => {
|
||||
const agent = createSimpleAgent();
|
||||
|
||||
const fired: AgentEvent[] = [];
|
||||
agent.on(AgentEvent.AgentStart, () => fired.push(AgentEvent.AgentStart));
|
||||
agent.on(AgentEvent.AgentEnd, () => fired.push(AgentEvent.AgentEnd));
|
||||
|
||||
const { stream } = await agent.stream('Say hello');
|
||||
await collectStreamChunks(stream);
|
||||
|
||||
expect(fired).toContain(AgentEvent.AgentStart);
|
||||
expect(fired).toContain(AgentEvent.AgentEnd);
|
||||
expect(fired.indexOf(AgentEvent.AgentStart)).toBeLessThan(fired.indexOf(AgentEvent.AgentEnd));
|
||||
});
|
||||
|
||||
it('emits ToolExecutionStart and ToolExecutionEnd during streaming', async () => {
|
||||
const agent = createAgentWithTool();
|
||||
|
||||
const toolEvents: AgentEventData[] = [];
|
||||
agent.on(AgentEvent.ToolExecutionStart, (data) => toolEvents.push(data));
|
||||
agent.on(AgentEvent.ToolExecutionEnd, (data) => toolEvents.push(data));
|
||||
|
||||
const { stream } = await agent.stream('What is 4 plus 6?');
|
||||
await collectStreamChunks(stream);
|
||||
|
||||
const starts = toolEvents.filter((e) => e.type === AgentEvent.ToolExecutionStart);
|
||||
expect(starts.length).toBeGreaterThan(0);
|
||||
|
||||
const start = starts[0] as AgentEventData & { type: AgentEvent.ToolExecutionStart };
|
||||
expect(start.toolName).toBe('add_numbers');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// getState()
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('getState()', () => {
|
||||
it('returns idle before first run', () => {
|
||||
const agent = createSimpleAgent();
|
||||
const state = agent.getState();
|
||||
expect(state.status).toBe('idle');
|
||||
expect(state.messageList.messages).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('returns success after a successful generate()', async () => {
|
||||
const agent = createSimpleAgent();
|
||||
await agent.generate('Say hello');
|
||||
const state = agent.getState();
|
||||
expect(state.status).toBe('success');
|
||||
});
|
||||
|
||||
it('returns success after a completed stream()', async () => {
|
||||
const agent = createSimpleAgent();
|
||||
const { stream } = await agent.stream('Say hello');
|
||||
await collectStreamChunks(stream);
|
||||
const state = agent.getState();
|
||||
expect(state.status).toBe('success');
|
||||
});
|
||||
|
||||
it('state is running during the generate loop (observed via event)', async () => {
|
||||
const agent = createSimpleAgent();
|
||||
|
||||
let stateWhileRunning: string | undefined;
|
||||
agent.on(AgentEvent.TurnStart, () => {
|
||||
stateWhileRunning = agent.getState().status;
|
||||
});
|
||||
|
||||
await agent.generate('Say hello');
|
||||
|
||||
expect(stateWhileRunning).toBe('running');
|
||||
});
|
||||
|
||||
it('reflects resourceId and threadId from RunOptions', async () => {
|
||||
const agent = createSimpleAgent();
|
||||
await agent.generate('Say hello', {
|
||||
persistence: { resourceId: 'user-123', threadId: 'thread-abc' },
|
||||
});
|
||||
const state = agent.getState();
|
||||
expect(state.persistence?.resourceId).toBe('user-123');
|
||||
expect(state.persistence?.threadId).toBe('thread-abc');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// asTool()
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('asTool()', () => {
|
||||
it('wraps the agent as a BuiltTool with the correct name and description', () => {
|
||||
const agent = createSimpleAgent();
|
||||
const tool = agent.asTool('A helpful assistant tool');
|
||||
|
||||
expect(tool.name).toBe('events-test-agent');
|
||||
expect(tool.description).toBe('A helpful assistant tool');
|
||||
expect(tool.inputSchema).toBeDefined();
|
||||
expect(typeof tool.handler).toBe('function');
|
||||
});
|
||||
|
||||
it('asTool handler calls the agent and returns text result', async () => {
|
||||
const agent = createSimpleAgent();
|
||||
const tool = agent.asTool('A helpful assistant tool');
|
||||
|
||||
const result = await tool.handler!({ input: 'Say "pong"' }, {});
|
||||
|
||||
expect(result).toHaveProperty('result');
|
||||
expect(typeof (result as { result: string }).result).toBe('string');
|
||||
expect((result as { result: string }).result.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('coordinator agent can use sub-agent via asTool', async () => {
|
||||
const specialist = new Agent('specialist')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('You are a specialist. When asked, reply with exactly "SPECIALIST_RESPONSE".');
|
||||
|
||||
const coordinator = new Agent('coordinator')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions(
|
||||
'You coordinate tasks. Use the specialist tool to answer questions. Relay the exact response.',
|
||||
)
|
||||
.tool(specialist.asTool('A specialist agent'));
|
||||
|
||||
const result = await coordinator.generate(
|
||||
'Ask the specialist for their response and tell me what they said.',
|
||||
);
|
||||
|
||||
const text = result.messages
|
||||
.filter((m) => 'role' in m && m.role === 'assistant')
|
||||
.flatMap((m) => ('content' in m ? m.content : []))
|
||||
.filter((c) => c.type === 'text')
|
||||
.map((c) => ('text' in c ? c.text : ''))
|
||||
.join('');
|
||||
|
||||
expect(text.length).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
441
packages/@n8n/agents/src/__tests__/integration/helpers.ts
Normal file
441
packages/@n8n/agents/src/__tests__/integration/helpers.ts
Normal file
|
|
@ -0,0 +1,441 @@
|
|||
import * as fs from 'fs';
|
||||
import * as os from 'os';
|
||||
import * as path from 'path';
|
||||
import { describe as _describe } from 'vitest';
|
||||
import { z } from 'zod';
|
||||
|
||||
import {
|
||||
Agent,
|
||||
type ContentToolCall,
|
||||
type ContentToolResult,
|
||||
filterLlmMessages,
|
||||
Tool,
|
||||
type StreamChunk,
|
||||
type AgentMessage,
|
||||
} from '../../index';
|
||||
import { SqliteMemory } from '../../storage/sqlite-memory';
|
||||
|
||||
export type { StreamChunk };
|
||||
|
||||
/**
|
||||
* Returns `describe` or `describe.skip` depending on whether the API key is set.
|
||||
*/
|
||||
export function describeIf(provider: 'anthropic' | 'openai') {
|
||||
const envVar = provider === 'anthropic' ? 'ANTHROPIC_API_KEY' : 'OPENAI_API_KEY';
|
||||
return process.env[envVar] ? _describe : _describe.skip;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read all chunks from a ReadableStream into an array.
|
||||
*/
|
||||
export async function collectStreamChunks(stream: ReadableStream<unknown>): Promise<StreamChunk[]> {
|
||||
const chunks: StreamChunk[] = [];
|
||||
const reader = stream.getReader();
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
chunks.push(value as StreamChunk);
|
||||
}
|
||||
return chunks;
|
||||
}
|
||||
|
||||
/**
|
||||
* Filter chunks by type.
|
||||
*/
|
||||
export function chunksOfType<T extends StreamChunk['type']>(
|
||||
chunks: StreamChunk[],
|
||||
type: T,
|
||||
): Array<StreamChunk & { type: T }> {
|
||||
return chunks.filter((c) => c.type === type) as Array<StreamChunk & { type: T }>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the default model for a provider.
|
||||
*/
|
||||
export function getModel(provider: 'anthropic' | 'openai'): string {
|
||||
return provider === 'anthropic' ? 'anthropic/claude-haiku-4-5' : 'openai/gpt-4o-mini';
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a simple agent with an add_numbers tool for testing.
|
||||
*/
|
||||
export function createAgentWithAddTool(provider: 'anthropic' | 'openai'): Agent {
|
||||
const addTool = new Tool('add_numbers')
|
||||
.description('Add two numbers together and return the result')
|
||||
.input(
|
||||
z.object({
|
||||
a: z.number().describe('First number'),
|
||||
b: z.number().describe('Second number'),
|
||||
}),
|
||||
)
|
||||
.output(
|
||||
z.object({
|
||||
result: z.number().describe('The sum'),
|
||||
}),
|
||||
)
|
||||
.handler(async ({ a, b }) => ({ result: a + b }));
|
||||
|
||||
return new Agent('test-agent')
|
||||
.model(getModel(provider))
|
||||
.instructions(
|
||||
'You are a calculator. When asked to add numbers, use the add_numbers tool. Be concise.',
|
||||
)
|
||||
.tool(addTool);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an agent with a tool that can suspend (interrupt) for confirmation.
|
||||
*/
|
||||
export function createAgentWithInterruptibleTool(provider: 'anthropic' | 'openai'): Agent {
|
||||
const deleteTool = new Tool('delete_file')
|
||||
.description('Delete a file at the given path')
|
||||
.input(z.object({ path: z.string().describe('File path to delete') }))
|
||||
.output(z.object({ deleted: z.boolean(), path: z.string() }))
|
||||
.suspend(z.object({ message: z.string(), severity: z.string() }))
|
||||
.resume(z.object({ approved: z.boolean() }))
|
||||
.handler(async ({ path }, ctx) => {
|
||||
if (!ctx.resumeData) {
|
||||
return await ctx.suspend({ message: `Delete "${path}"?`, severity: 'destructive' });
|
||||
}
|
||||
if (!ctx.resumeData.approved) return { deleted: false, path };
|
||||
return { deleted: true, path };
|
||||
});
|
||||
|
||||
return new Agent('test-interrupt-agent')
|
||||
.model(getModel(provider))
|
||||
.instructions(
|
||||
'You are a file manager. When asked to delete a file, use the delete_file tool. Be concise.',
|
||||
)
|
||||
.tool(deleteTool)
|
||||
.checkpoint('memory');
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an agent with two tools — one interruptible, one not.
|
||||
*/
|
||||
export function createAgentWithMixedTools(provider: 'anthropic' | 'openai'): Agent {
|
||||
const listTool = new Tool('list_files')
|
||||
.description('List files in a directory')
|
||||
.input(z.object({ dir: z.string().describe('Directory path') }))
|
||||
.handler(async ({ dir }) => ({
|
||||
files: ['readme.md', 'index.ts', 'package.json'],
|
||||
dir,
|
||||
}));
|
||||
|
||||
const deleteTool = new Tool('delete_file')
|
||||
.description('Delete a file at the given path — dangerous operation')
|
||||
.input(z.object({ path: z.string().describe('File path to delete') }))
|
||||
.output(z.object({ deleted: z.boolean(), path: z.string() }))
|
||||
.suspend(z.object({ message: z.string(), severity: z.string() }))
|
||||
.resume(z.object({ approved: z.boolean() }))
|
||||
.handler(async ({ path }, ctx) => {
|
||||
if (!ctx.resumeData) {
|
||||
return await ctx.suspend({ message: `Delete "${path}"?`, severity: 'destructive' });
|
||||
}
|
||||
if (!ctx.resumeData.approved) return { deleted: false, path };
|
||||
return { deleted: true, path };
|
||||
});
|
||||
|
||||
return new Agent('test-mixed-agent')
|
||||
.model(getModel(provider))
|
||||
.instructions(
|
||||
'You are a file manager. Use list_files to list and delete_file to delete. Be concise.',
|
||||
)
|
||||
.tool(listTool)
|
||||
.tool(deleteTool)
|
||||
.checkpoint('memory');
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an agent with a tool that uses `.toContent()` to emit a custom message.
|
||||
* The tool adds two numbers; toContent produces a text MessageContent visible to the
|
||||
* user but never forwarded to the LLM.
|
||||
*/
|
||||
export function createAgentWithToContentTool(provider: 'anthropic' | 'openai'): Agent {
|
||||
const calcTool = new Tool('add_numbers')
|
||||
.description('Add two numbers together and return the result')
|
||||
.input(
|
||||
z.object({
|
||||
a: z.number().describe('First number'),
|
||||
b: z.number().describe('Second number'),
|
||||
}),
|
||||
)
|
||||
.output(z.object({ result: z.number().describe('The sum') }))
|
||||
.handler(async ({ a, b }) => ({ result: a + b }))
|
||||
.toMessage((output) => ({
|
||||
type: 'custom',
|
||||
messageType: '___dummyCustomMessage',
|
||||
data: {
|
||||
dummy: `dummy message. Tool output ${output.result}`,
|
||||
},
|
||||
}));
|
||||
|
||||
return new Agent('test-to-content-agent')
|
||||
.model(getModel(provider))
|
||||
.instructions(
|
||||
'You are a calculator. When asked to add numbers, use the add_numbers tool. Be concise.',
|
||||
)
|
||||
.tool(calcTool);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an agent with one interruptible tool designed for parallel-call
|
||||
* scenarios. The tool only deletes one file at a time, and the instructions
|
||||
* strongly encourage parallel tool calling.
|
||||
*/
|
||||
export function createAgentWithParallelInterruptibleCalls(provider: 'anthropic' | 'openai'): Agent {
|
||||
const deleteTool = new Tool('delete_file')
|
||||
.description('Delete a single file at the given path. Can only delete one file per call.')
|
||||
.input(z.object({ path: z.string().describe('File path to delete') }))
|
||||
.output(z.object({ deleted: z.boolean(), path: z.string() }))
|
||||
.suspend(z.object({ message: z.string(), severity: z.string() }))
|
||||
.resume(z.object({ approved: z.boolean() }))
|
||||
.handler(async ({ path }, ctx) => {
|
||||
if (!ctx.resumeData) {
|
||||
return await ctx.suspend({ message: `Delete "${path}"?`, severity: 'destructive' });
|
||||
}
|
||||
if (!ctx.resumeData.approved) return { deleted: false, path };
|
||||
return { deleted: true, path };
|
||||
});
|
||||
|
||||
return new Agent('test-parallel-interrupt-agent')
|
||||
.model(getModel(provider))
|
||||
.instructions(
|
||||
'You are a file manager. When asked to delete multiple files, you MUST call delete_file for EACH file using parallel tool calls in the same turn. Never skip a file.',
|
||||
)
|
||||
.tool(deleteTool)
|
||||
.checkpoint('memory');
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an agent with concurrent tool execution and an interruptible tool.
|
||||
* Uses `toolCallConcurrency(Infinity)` so all tool calls in a single LLM turn
|
||||
* are executed concurrently. Suspensions do not block subsequent tool calls.
|
||||
*/
|
||||
export function createAgentWithConcurrentInterruptibleCalls(
|
||||
provider: 'anthropic' | 'openai',
|
||||
): Agent {
|
||||
const deleteTool = new Tool('delete_file')
|
||||
.description('Delete a single file at the given path. Can only delete one file per call.')
|
||||
.input(z.object({ path: z.string().describe('File path to delete') }))
|
||||
.output(z.object({ deleted: z.boolean(), path: z.string() }))
|
||||
.suspend(z.object({ message: z.string(), severity: z.string() }))
|
||||
.resume(z.object({ approved: z.boolean() }))
|
||||
.handler(async ({ path }, ctx) => {
|
||||
if (!ctx.resumeData) {
|
||||
return await ctx.suspend({ message: `Delete "${path}"?`, severity: 'destructive' });
|
||||
}
|
||||
if (!ctx.resumeData.approved) return { deleted: false, path };
|
||||
return { deleted: true, path };
|
||||
});
|
||||
|
||||
return new Agent('test-concurrent-interrupt-agent')
|
||||
.model(getModel(provider))
|
||||
.instructions(
|
||||
'You are a file manager. When asked to delete multiple files, you MUST call delete_file for EACH file using parallel tool calls in the same turn. Never skip a file.',
|
||||
)
|
||||
.tool(deleteTool)
|
||||
.toolCallConcurrency(Infinity)
|
||||
.checkpoint('memory');
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an agent with concurrent tool execution mixing interruptible and
|
||||
* non-interruptible tools. `list_files` runs immediately; `delete_file` suspends.
|
||||
*/
|
||||
export function createAgentWithConcurrentMixedTools(provider: 'anthropic' | 'openai'): Agent {
|
||||
const listTool = new Tool('list_files')
|
||||
.description('List files in a directory')
|
||||
.input(z.object({ dir: z.string().describe('Directory path') }))
|
||||
.handler(async ({ dir }) => ({
|
||||
files: ['readme.md', 'index.ts', 'package.json'],
|
||||
dir,
|
||||
}));
|
||||
|
||||
const deleteTool = new Tool('delete_file')
|
||||
.description('Delete a file at the given path — dangerous operation')
|
||||
.input(z.object({ path: z.string().describe('File path to delete') }))
|
||||
.output(z.object({ deleted: z.boolean(), path: z.string() }))
|
||||
.suspend(z.object({ message: z.string(), severity: z.string() }))
|
||||
.resume(z.object({ approved: z.boolean() }))
|
||||
.handler(async ({ path }, ctx) => {
|
||||
if (!ctx.resumeData) {
|
||||
return await ctx.suspend({ message: `Delete "${path}"?`, severity: 'destructive' });
|
||||
}
|
||||
if (!ctx.resumeData.approved) return { deleted: false, path };
|
||||
return { deleted: true, path };
|
||||
});
|
||||
|
||||
return new Agent('test-concurrent-mixed-agent')
|
||||
.model(getModel(provider))
|
||||
.instructions(
|
||||
'You are a file manager. Use list_files to list and delete_file to delete. Be concise.',
|
||||
)
|
||||
.tool(listTool)
|
||||
.tool(deleteTool)
|
||||
.toolCallConcurrency(Infinity)
|
||||
.checkpoint('memory');
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an agent with bounded concurrency and an interruptible tool.
|
||||
* Uses `toolCallConcurrency(concurrency)` to control batching.
|
||||
*/
|
||||
export function createAgentWithBatchedInterruptibleCalls(
|
||||
provider: 'anthropic' | 'openai',
|
||||
concurrency: number,
|
||||
): Agent {
|
||||
const deleteTool = new Tool('delete_file')
|
||||
.description('Delete a single file at the given path. Can only delete one file per call.')
|
||||
.input(z.object({ path: z.string().describe('File path to delete') }))
|
||||
.output(z.object({ deleted: z.boolean(), path: z.string() }))
|
||||
.suspend(z.object({ message: z.string(), severity: z.string() }))
|
||||
.resume(z.object({ approved: z.boolean() }))
|
||||
.handler(async ({ path }, ctx) => {
|
||||
if (!ctx.resumeData) {
|
||||
return await ctx.suspend({ message: `Delete "${path}"?`, severity: 'destructive' });
|
||||
}
|
||||
if (!ctx.resumeData.approved) return { deleted: false, path };
|
||||
return { deleted: true, path };
|
||||
});
|
||||
|
||||
return new Agent('test-batched-interrupt-agent')
|
||||
.model(getModel(provider))
|
||||
.instructions(
|
||||
'You are a file manager. When asked to delete multiple files, you MUST call delete_file for EACH file using parallel tool calls in the same turn. Never skip a file.',
|
||||
)
|
||||
.tool(deleteTool)
|
||||
.toolCallConcurrency(concurrency)
|
||||
.checkpoint('memory');
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an agent with bounded concurrency and a non-interruptible tool.
|
||||
*/
|
||||
export function createAgentWithBatchedNormalCalls(
|
||||
provider: 'anthropic' | 'openai',
|
||||
concurrency: number,
|
||||
): Agent {
|
||||
const checkTool = new Tool('check_file')
|
||||
.description('Check if a file exists at the given path. Can only check one file per call.')
|
||||
.input(z.object({ path: z.string().describe('File path to check') }))
|
||||
.output(z.object({ exists: z.boolean(), path: z.string() }))
|
||||
.handler(async ({ path }) => ({ exists: true, path }));
|
||||
|
||||
return new Agent('test-batched-normal-agent')
|
||||
.model(getModel(provider))
|
||||
.instructions(
|
||||
'You are a file manager. When asked to check multiple files, you MUST call check_file for EACH file using parallel tool calls in the same turn. Never skip a file. After checking, summarize the results concisely.',
|
||||
)
|
||||
.tool(checkTool)
|
||||
.toolCallConcurrency(concurrency)
|
||||
.checkpoint('memory');
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an agent with a tool that always throws an error.
|
||||
* Used to verify that tool errors surface as LLM-visible messages.
|
||||
*/
|
||||
export function createAgentWithAlwaysErrorTool(provider: 'anthropic' | 'openai'): Agent {
|
||||
const brokenTool = new Tool('broken_tool')
|
||||
.description('Fetch data from a remote service')
|
||||
.input(z.object({ id: z.string().describe('Resource ID to fetch') }))
|
||||
.handler(async () => {
|
||||
throw new Error('Service unavailable: connection timeout');
|
||||
});
|
||||
|
||||
return new Agent('test-error-agent')
|
||||
.model(getModel(provider))
|
||||
.instructions(
|
||||
'You are a data fetcher. Use broken_tool to fetch data. ' +
|
||||
'If the tool fails, acknowledge the error in your response and explain what happened. Be concise.',
|
||||
)
|
||||
.tool(brokenTool);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an agent with a tool that fails on the first call and succeeds on the second.
|
||||
* Used to verify that the LLM can self-correct by retrying after seeing the error result.
|
||||
*/
|
||||
export function createAgentWithFlakyTool(provider: 'anthropic' | 'openai'): {
|
||||
agent: Agent;
|
||||
callCount: () => number;
|
||||
} {
|
||||
let calls = 0;
|
||||
|
||||
const flakyTool = new Tool('fetch_data')
|
||||
.description('Fetch data. May fail on the first attempt — retry if it does.')
|
||||
.input(z.object({ id: z.string().describe('Resource ID to fetch') }))
|
||||
.output(z.object({ id: z.string(), value: z.number() }))
|
||||
.handler(async ({ id }) => {
|
||||
calls++;
|
||||
if (calls === 1) throw new Error('Transient error: rate limit exceeded, please retry');
|
||||
return { id, value: 42 };
|
||||
});
|
||||
|
||||
const agent = new Agent('test-flaky-agent')
|
||||
.model(getModel(provider))
|
||||
.instructions(
|
||||
'You are a data fetcher. Use fetch_data to fetch data. ' +
|
||||
'If the tool fails with a transient error, retry the SAME call once. Be concise.',
|
||||
)
|
||||
.tool(flakyTool);
|
||||
|
||||
return { agent, callCount: () => calls };
|
||||
}
|
||||
|
||||
export const findLastTextContent = (messages: AgentMessage[]): string | undefined => {
|
||||
return filterLlmMessages(messages)
|
||||
.reverse()
|
||||
.find((m) => m.content.find((c) => c.type === 'text'))
|
||||
?.content.find((c) => c.type === 'text')?.text;
|
||||
};
|
||||
|
||||
export const findLastToolCallContent = (messages: AgentMessage[]): ContentToolCall | undefined => {
|
||||
return filterLlmMessages(messages)
|
||||
.reverse()
|
||||
.find((m) => m.content.find((c) => c.type === 'tool-call'))
|
||||
?.content.find((c) => c.type === 'tool-call');
|
||||
};
|
||||
|
||||
export const findAllToolCalls = (messages: AgentMessage[]): ContentToolCall[] => {
|
||||
return filterLlmMessages(messages)
|
||||
.filter((m) => m.content.find((c) => c.type === 'tool-call'))
|
||||
.map((m) => m.content.filter((c) => c.type === 'tool-call'))
|
||||
.flat();
|
||||
};
|
||||
export const findAllToolResults = (messages: AgentMessage[]): ContentToolResult[] => {
|
||||
return filterLlmMessages(messages)
|
||||
.filter((m) => m.content.find((c) => c.type === 'tool-result'))
|
||||
.map((m) => m.content.find((c) => c.type === 'tool-result') as ContentToolResult);
|
||||
};
|
||||
export const collectTextDeltas = (chunks: StreamChunk[]): string => {
|
||||
return chunks
|
||||
.filter((c) => c.type === 'text-delta')
|
||||
.map((c) => c.delta)
|
||||
.join('');
|
||||
};
|
||||
|
||||
export function createSqliteMemory(): {
|
||||
memory: SqliteMemory;
|
||||
cleanup: () => void;
|
||||
url: string;
|
||||
} {
|
||||
const dbPath = path.join(
|
||||
os.tmpdir(),
|
||||
`test-${Date.now()}-${Math.random().toString(36).slice(2)}.db`,
|
||||
);
|
||||
const url = `file:${dbPath}`;
|
||||
const memory = new SqliteMemory({ url });
|
||||
return {
|
||||
memory,
|
||||
url,
|
||||
cleanup: () => {
|
||||
try {
|
||||
fs.unlinkSync(dbPath);
|
||||
} catch {
|
||||
// File may already be removed — ignore
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
|
|
@ -0,0 +1,126 @@
|
|||
/**
|
||||
* Integration tests for JSON Schema input validation on regular (non-MCP) tools.
|
||||
*
|
||||
* Covers: valid input passes through, type errors surface as tool-result errors,
|
||||
* missing required properties surface as tool-result errors, and the LLM can
|
||||
* self-correct after receiving a JSON Schema validation error.
|
||||
*
|
||||
* Tests that call agent.generate() are gated on ANTHROPIC_API_KEY.
|
||||
*/
|
||||
import { expect, it, vi } from 'vitest';
|
||||
|
||||
import { describeIf, findLastTextContent } from './helpers';
|
||||
import { Agent, filterLlmMessages } from '../../index';
|
||||
import type { BuiltTool } from '../../types/sdk/tool';
|
||||
|
||||
const describe = describeIf('anthropic');
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Build a BuiltTool whose inputSchema is a raw JSON Schema object (not Zod).
|
||||
* This mimics the shape that MCP tools use — and the scenario we want to test
|
||||
* for first-party tools that expose a JSONSchema7 directly.
|
||||
*/
|
||||
function makeJsonSchemaTool(overrides: Partial<BuiltTool> = {}): BuiltTool {
|
||||
return {
|
||||
name: 'find_user',
|
||||
description: 'Find a user by their numeric age (18–99 only).',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
age: {
|
||||
type: 'integer',
|
||||
minimum: 18,
|
||||
maximum: 99,
|
||||
description: 'User age (18–99)',
|
||||
},
|
||||
},
|
||||
required: ['age'],
|
||||
},
|
||||
handler: async (input) => {
|
||||
const { age } = input as { age: number };
|
||||
return { user: `User aged ${age}` };
|
||||
},
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// No-LLM tests: validation outcome is determined by the tool-result message
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('JSON Schema validation — non-MCP tools with raw JSON Schema', () => {
|
||||
it('passes valid input to the handler and returns a successful tool result', async () => {
|
||||
const handler = vi.fn().mockResolvedValue({ user: 'User aged 25' });
|
||||
const tool = makeJsonSchemaTool({ handler });
|
||||
|
||||
const result = await new Agent('test')
|
||||
.model('anthropic/claude-haiku-4-5')
|
||||
.instructions(
|
||||
'You are a user directory. Use find_user to look up users. ' +
|
||||
'Call the tool with age=25 and then summarise the result. Be concise.',
|
||||
)
|
||||
.tool(tool)
|
||||
.generate('Find user aged 25.');
|
||||
|
||||
expect(result.finishReason).toBe('stop');
|
||||
expect(result.error).toBeUndefined();
|
||||
|
||||
// The handler should have been called with valid data
|
||||
expect(handler).toHaveBeenCalledWith(expect.objectContaining({ age: 25 }), expect.anything());
|
||||
|
||||
// No tool-result should carry an error flag
|
||||
const allMessages = filterLlmMessages(result.messages);
|
||||
const toolResults = allMessages.flatMap((m) =>
|
||||
m.content.filter((c) => c.type === 'tool-result'),
|
||||
);
|
||||
expect(toolResults.every((r) => !r.isError)).toBe(true);
|
||||
});
|
||||
|
||||
it('allows the LLM to self-correct after receiving a JSON Schema validation error', async () => {
|
||||
let callCount = 0;
|
||||
const handler = vi.fn().mockImplementation(async (input: unknown) => {
|
||||
callCount++;
|
||||
return { user: `User aged ${(input as { age: number }).age}` };
|
||||
});
|
||||
|
||||
// The schema enforces age ≥ 18. The prompt asks for age 5 first, then
|
||||
// instructs the LLM to retry with 25 if validation fails.
|
||||
const result = await new Agent('age-self-correction')
|
||||
.model('anthropic/claude-haiku-4-5')
|
||||
.instructions(
|
||||
'You are a user directory. Use find_user to look up users by age. ' +
|
||||
'The age must be an integer between 18 and 99. ' +
|
||||
'If validation fails, correct the age to 25 and retry. Be very concise.',
|
||||
)
|
||||
.tool(makeJsonSchemaTool({ handler }))
|
||||
.generate(
|
||||
'Find a user aged 5. If that age is invalid, use 25 instead and retry. ' +
|
||||
'You MUST try age 5 first, and only then use 25.',
|
||||
);
|
||||
|
||||
expect(result.finishReason).toBe('stop');
|
||||
expect(result.error).toBeUndefined();
|
||||
|
||||
// There should be at least two tool-result messages: one error, one success
|
||||
const allMessages = filterLlmMessages(result.messages);
|
||||
const toolResultMessages = allMessages.filter((m) =>
|
||||
m.content.some((c) => c.type === 'tool-result'),
|
||||
);
|
||||
expect(toolResultMessages.length).toBeGreaterThanOrEqual(2);
|
||||
|
||||
// The successful handler call should have received a valid age
|
||||
expect(callCount).toBeGreaterThanOrEqual(1);
|
||||
const validCallArgs = handler.mock.calls.find(
|
||||
([input]) => (input as { age: number }).age === 25,
|
||||
);
|
||||
expect(validCallArgs).toBeDefined();
|
||||
|
||||
// The final LLM response should acknowledge finding a user
|
||||
const text = findLastTextContent(result.messages);
|
||||
expect(text).toBeTruthy();
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,252 @@
|
|||
/**
|
||||
* Unit-style tests for McpConnection.listTools() approval wrapping.
|
||||
*
|
||||
* These tests use a real in-process MCP SSE server but do NOT require an LLM.
|
||||
* They verify that the `requireApproval` field on McpServerConfig (and the
|
||||
* global `shouldRequireToolApproval` constructor flag) correctly wrap the
|
||||
* appropriate tools with a suspend/resume approval gate.
|
||||
*
|
||||
* Tool names from the test server: echo, add, image (prefixed: tools_echo, tools_add, tools_image).
|
||||
*/
|
||||
import { afterAll, afterEach, beforeAll, describe, expect, it, vi } from 'vitest';
|
||||
|
||||
import { startSseServer, type TestServer } from './mcp-server-helpers';
|
||||
import { McpConnection } from '../../runtime/mcp-connection';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Returns true when the tool has been wrapped with an approval gate (has a suspendSchema). */
|
||||
function isApprovalWrapped(tool: { suspendSchema?: unknown }): boolean {
|
||||
return tool.suspendSchema !== undefined;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('McpConnection.listTools() — requireApproval config', () => {
|
||||
let server: TestServer;
|
||||
let connection: McpConnection | undefined;
|
||||
|
||||
beforeAll(async () => {
|
||||
server = await startSseServer();
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await server.close();
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
if (connection) {
|
||||
await connection.disconnect();
|
||||
connection = undefined;
|
||||
}
|
||||
});
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// no approval
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
it('does not wrap any tools when requireApproval is not set', async () => {
|
||||
connection = new McpConnection({ name: 'tools', url: server.url });
|
||||
await connection.connect();
|
||||
const tools = await connection.listTools();
|
||||
|
||||
expect(tools.length).toBeGreaterThan(0);
|
||||
expect(tools.every((t) => !isApprovalWrapped(t))).toBe(true);
|
||||
});
|
||||
|
||||
it('does not wrap any tools when requireApproval is false', async () => {
|
||||
connection = new McpConnection({ name: 'tools', url: server.url, requireApproval: false });
|
||||
await connection.connect();
|
||||
const tools = await connection.listTools();
|
||||
|
||||
expect(tools.every((t) => !isApprovalWrapped(t))).toBe(true);
|
||||
});
|
||||
|
||||
it('does not wrap any tools when requireApproval is an empty array', async () => {
|
||||
connection = new McpConnection({ name: 'tools', url: server.url, requireApproval: [] });
|
||||
await connection.connect();
|
||||
const tools = await connection.listTools();
|
||||
|
||||
expect(tools.every((t) => !isApprovalWrapped(t))).toBe(true);
|
||||
});
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// requireApproval: true — all tools
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
it('wraps all tools when requireApproval: true in server config', async () => {
|
||||
connection = new McpConnection({ name: 'tools', url: server.url, requireApproval: true });
|
||||
await connection.connect();
|
||||
const tools = await connection.listTools();
|
||||
|
||||
expect(tools.length).toBeGreaterThan(0);
|
||||
expect(tools.every((t) => isApprovalWrapped(t))).toBe(true);
|
||||
});
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// requireApproval: string[] — selective tools
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
it('wraps only the listed tool when requireApproval names a single tool', async () => {
|
||||
connection = new McpConnection({
|
||||
name: 'tools',
|
||||
url: server.url,
|
||||
requireApproval: ['echo'],
|
||||
});
|
||||
await connection.connect();
|
||||
const tools = await connection.listTools();
|
||||
|
||||
const echo = tools.find((t) => t.name === 'tools_echo');
|
||||
const add = tools.find((t) => t.name === 'tools_add');
|
||||
const image = tools.find((t) => t.name === 'tools_image');
|
||||
|
||||
expect(echo).toBeDefined();
|
||||
expect(add).toBeDefined();
|
||||
expect(image).toBeDefined();
|
||||
|
||||
expect(isApprovalWrapped(echo!)).toBe(true);
|
||||
expect(isApprovalWrapped(add!)).toBe(false);
|
||||
expect(isApprovalWrapped(image!)).toBe(false);
|
||||
});
|
||||
|
||||
it('wraps multiple listed tools when requireApproval names several tools', async () => {
|
||||
connection = new McpConnection({
|
||||
name: 'tools',
|
||||
url: server.url,
|
||||
requireApproval: ['echo', 'add'],
|
||||
});
|
||||
await connection.connect();
|
||||
const tools = await connection.listTools();
|
||||
|
||||
const echo = tools.find((t) => t.name === 'tools_echo');
|
||||
const add = tools.find((t) => t.name === 'tools_add');
|
||||
const image = tools.find((t) => t.name === 'tools_image');
|
||||
|
||||
expect(isApprovalWrapped(echo!)).toBe(true);
|
||||
expect(isApprovalWrapped(add!)).toBe(true);
|
||||
expect(isApprovalWrapped(image!)).toBe(false);
|
||||
});
|
||||
|
||||
it('does not wrap tools that are not in the requireApproval list', async () => {
|
||||
connection = new McpConnection({
|
||||
name: 'tools',
|
||||
url: server.url,
|
||||
requireApproval: ['image'],
|
||||
});
|
||||
await connection.connect();
|
||||
const tools = await connection.listTools();
|
||||
|
||||
const echo = tools.find((t) => t.name === 'tools_echo');
|
||||
const add = tools.find((t) => t.name === 'tools_add');
|
||||
const image = tools.find((t) => t.name === 'tools_image');
|
||||
|
||||
expect(isApprovalWrapped(echo!)).toBe(false);
|
||||
expect(isApprovalWrapped(add!)).toBe(false);
|
||||
expect(isApprovalWrapped(image!)).toBe(true);
|
||||
});
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// global shouldRequireToolApproval flag
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
it('wraps all tools when global shouldRequireToolApproval flag is true', async () => {
|
||||
connection = new McpConnection({ name: 'tools', url: server.url }, true);
|
||||
await connection.connect();
|
||||
const tools = await connection.listTools();
|
||||
|
||||
expect(tools.every((t) => isApprovalWrapped(t))).toBe(true);
|
||||
});
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// global flag + config.requireApproval interaction
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
it('wraps all tools when global flag is true even if config.requireApproval names only some tools', async () => {
|
||||
connection = new McpConnection(
|
||||
{ name: 'tools', url: server.url, requireApproval: ['echo'] },
|
||||
true,
|
||||
);
|
||||
await connection.connect();
|
||||
const tools = await connection.listTools();
|
||||
|
||||
expect(tools.every((t) => isApprovalWrapped(t))).toBe(true);
|
||||
});
|
||||
|
||||
it('wraps all tools when config.requireApproval: true even if global flag is false', async () => {
|
||||
connection = new McpConnection(
|
||||
{ name: 'tools', url: server.url, requireApproval: true },
|
||||
false,
|
||||
);
|
||||
await connection.connect();
|
||||
const tools = await connection.listTools();
|
||||
|
||||
expect(tools.every((t) => isApprovalWrapped(t))).toBe(true);
|
||||
});
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// prefix stripping — server name used as prefix
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
it('matches tool names without prefix when requireApproval contains un-prefixed names', async () => {
|
||||
// The server is named 'srv'; tools will be 'srv_echo', 'srv_add', 'srv_image'.
|
||||
// requireApproval uses the un-prefixed original names.
|
||||
connection = new McpConnection({ name: 'srv', url: server.url, requireApproval: ['echo'] });
|
||||
await connection.connect();
|
||||
const tools = await connection.listTools();
|
||||
|
||||
const echo = tools.find((t) => t.name === 'srv_echo');
|
||||
const add = tools.find((t) => t.name === 'srv_add');
|
||||
|
||||
expect(isApprovalWrapped(echo!)).toBe(true);
|
||||
expect(isApprovalWrapped(add!)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Disconnect idempotency
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
type McpConnectionInternals = { client: { close(): Promise<void> } };
|
||||
|
||||
describe('McpConnection.disconnect() — idempotency', () => {
|
||||
let server: TestServer;
|
||||
|
||||
beforeAll(async () => {
|
||||
server = await startSseServer();
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await server.close();
|
||||
});
|
||||
|
||||
it('does not throw and does not call client.close() again when disconnect is called on an already-closed connection', async () => {
|
||||
const conn = new McpConnection({ name: 'tools', url: server.url });
|
||||
await conn.connect();
|
||||
|
||||
const clientClose = vi
|
||||
.spyOn((conn as unknown as McpConnectionInternals).client, 'close')
|
||||
.mockResolvedValue(undefined);
|
||||
|
||||
await conn.disconnect();
|
||||
await conn.disconnect();
|
||||
|
||||
expect(clientClose).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('does not throw and calls client.close() exactly once when disconnect is called concurrently', async () => {
|
||||
const conn = new McpConnection({ name: 'tools', url: server.url });
|
||||
await conn.connect();
|
||||
|
||||
const clientClose = vi
|
||||
.spyOn((conn as unknown as McpConnectionInternals).client, 'close')
|
||||
.mockResolvedValue(undefined);
|
||||
|
||||
await Promise.all([conn.disconnect(), conn.disconnect()]);
|
||||
|
||||
expect(clientClose).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,473 @@
|
|||
/**
|
||||
* Integration tests for MCP lifecycle via McpClient and the Agent builder.
|
||||
* Covers: McpClient constructor validation, connect/listTools/close, tool merge,
|
||||
* name collision, requireToolApproval, and rich content handling.
|
||||
*
|
||||
* Tests that don't require a real LLM run unconditionally.
|
||||
* Tests that call agent.generate() / agent.stream() are gated on ANTHROPIC_API_KEY.
|
||||
*/
|
||||
import { afterAll, beforeAll, describe, expect, it } from 'vitest';
|
||||
import { z } from 'zod';
|
||||
|
||||
import {
|
||||
describeIf,
|
||||
getModel,
|
||||
findLastTextContent,
|
||||
collectStreamChunks,
|
||||
chunksOfType,
|
||||
} from './helpers';
|
||||
import { startSseServer, type TestServer } from './mcp-server-helpers';
|
||||
import { Agent, McpClient, Tool, isLlmMessage } from '../../index';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// McpClient constructor validation — no MCP server required
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('McpClient constructor validation', () => {
|
||||
it('throws if neither url nor command is provided', () => {
|
||||
expect(() => new McpClient([{ name: 'bad' }])).toThrow(
|
||||
'exactly one of "url" or "command" must be provided',
|
||||
);
|
||||
});
|
||||
|
||||
it('throws if both url and command are provided', () => {
|
||||
expect(
|
||||
() => new McpClient([{ name: 'bad', url: 'http://localhost', command: 'node' }]),
|
||||
).toThrow('provide either "url" or "command", not both');
|
||||
});
|
||||
|
||||
it('throws if a duplicate server name is registered', () => {
|
||||
expect(
|
||||
() =>
|
||||
new McpClient([
|
||||
{ name: 'browser', url: 'http://localhost:9999/sse' },
|
||||
{ name: 'browser', url: 'http://localhost:9998/sse' },
|
||||
]),
|
||||
).toThrow('MCP server name "browser" is already registered');
|
||||
});
|
||||
|
||||
it('accepts valid url-based config', () => {
|
||||
expect(() => new McpClient([{ name: 'srv', url: 'http://localhost:9999/sse' }])).not.toThrow();
|
||||
});
|
||||
|
||||
it('accepts valid command-based config', () => {
|
||||
expect(
|
||||
() => new McpClient([{ name: 'stdio-srv', command: 'node', args: ['server.mjs'] }]),
|
||||
).not.toThrow();
|
||||
});
|
||||
|
||||
it('accepts multiple servers with distinct names', () => {
|
||||
expect(
|
||||
() =>
|
||||
new McpClient([
|
||||
{ name: 'srv-a', url: 'http://localhost:9999/sse' },
|
||||
{ name: 'srv-b', url: 'http://localhost:9998/sse' },
|
||||
]),
|
||||
).not.toThrow();
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// McpClient.listTools() — needs in-process MCP server, no LLM
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('McpClient.listTools()', () => {
|
||||
let server: TestServer;
|
||||
|
||||
beforeAll(async () => {
|
||||
server = await startSseServer();
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await server.close();
|
||||
});
|
||||
|
||||
it('connects and returns tools when server is reachable', async () => {
|
||||
const client = new McpClient([{ name: 'tools', url: server.url }]);
|
||||
const tools = await client.listTools();
|
||||
|
||||
expect(tools.length).toBe(3);
|
||||
expect(tools.map((t) => t.name).sort()).toEqual(['tools_add', 'tools_echo', 'tools_image']);
|
||||
|
||||
await client.close();
|
||||
});
|
||||
|
||||
it('returns cached tools on subsequent calls without reconnecting', async () => {
|
||||
const client = new McpClient([{ name: 'tools', url: server.url }]);
|
||||
|
||||
const first = await client.listTools();
|
||||
const second = await client.listTools();
|
||||
|
||||
expect(first).toBe(second);
|
||||
|
||||
await client.close();
|
||||
});
|
||||
|
||||
it('returns empty array when no servers are configured', async () => {
|
||||
const client = new McpClient([]);
|
||||
const tools = await client.listTools();
|
||||
|
||||
expect(tools).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('throws and clears cache when server is unreachable', async () => {
|
||||
const client = new McpClient([{ name: 'dead', url: 'http://127.0.0.1:1/sse' }]);
|
||||
|
||||
await expect(client.listTools()).rejects.toThrow();
|
||||
});
|
||||
|
||||
it('reports per-server errors for partially-failing multi-server configs', async () => {
|
||||
const client = new McpClient([
|
||||
{ name: 'ok', url: server.url },
|
||||
{ name: 'dead', url: 'http://127.0.0.1:1/sse' },
|
||||
]);
|
||||
|
||||
await expect(client.listTools()).rejects.toThrow(/dead/);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// generate() with MCP tools — requires ANTHROPIC_API_KEY
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const describe_llm = describeIf('anthropic');
|
||||
|
||||
describe_llm('agent generate() with MCP tool', () => {
|
||||
let server: TestServer;
|
||||
|
||||
beforeAll(async () => {
|
||||
server = await startSseServer();
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await server.close();
|
||||
});
|
||||
|
||||
it('calls an MCP tool during generation and returns the result', async () => {
|
||||
const client = new McpClient([{ name: 'tools', url: server.url }]);
|
||||
const agent = new Agent('mcp-agent')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions(
|
||||
'You are a helpful assistant. When asked to echo a message, use the tools_echo tool. Be concise.',
|
||||
)
|
||||
.mcp(client);
|
||||
|
||||
const result = await agent.generate(
|
||||
'Echo the message "integration test passed" using the tools_echo tool.',
|
||||
);
|
||||
|
||||
expect(result.finishReason).not.toBe('error');
|
||||
|
||||
const text = findLastTextContent(result.messages);
|
||||
expect(text?.toLowerCase()).toContain('integration test passed');
|
||||
|
||||
await client.close();
|
||||
});
|
||||
|
||||
it('merges static tools and MCP tools in the same agent', async () => {
|
||||
const staticTool = new Tool('double')
|
||||
.description('Double a number')
|
||||
.input(z.object({ n: z.number().describe('The number to double') }))
|
||||
.output(z.object({ result: z.number() }))
|
||||
.handler(async ({ n }) => ({ result: n * 2 }));
|
||||
|
||||
const client = new McpClient([{ name: 'tools', url: server.url }]);
|
||||
const agent = new Agent('mixed-tools-agent')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions(
|
||||
'You are a calculator. ' +
|
||||
'Use the double tool to double numbers and the tools.add tool to add numbers. ' +
|
||||
'Be concise.',
|
||||
)
|
||||
.tool(staticTool)
|
||||
.mcp(client);
|
||||
|
||||
const result = await agent.generate('Use the tools.add tool to add 15 and 27.');
|
||||
|
||||
expect(result.finishReason).not.toBe('error');
|
||||
const text = findLastTextContent(result.messages);
|
||||
expect(text).toContain('42');
|
||||
|
||||
await client.close();
|
||||
});
|
||||
|
||||
it('MCP connections persist across multiple generate() calls', async () => {
|
||||
// Connections are kept alive by McpClient and reused across runs.
|
||||
const client = new McpClient([{ name: 'tools', url: server.url }]);
|
||||
const agent = new Agent('lifecycle-agent')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('Use tools.add to add numbers. Be concise.')
|
||||
.mcp(client);
|
||||
|
||||
const result1 = await agent.generate('Use tools.add to add 1 and 2.');
|
||||
const result2 = await agent.generate('Use tools.add to add 3 and 4.');
|
||||
|
||||
expect(result1.finishReason).not.toBe('error');
|
||||
expect(result2.finishReason).not.toBe('error');
|
||||
|
||||
await client.close();
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// stream() with MCP tools — requires ANTHROPIC_API_KEY
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe_llm('agent stream() with MCP tool', () => {
|
||||
let server: TestServer;
|
||||
|
||||
beforeAll(async () => {
|
||||
server = await startSseServer();
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await server.close();
|
||||
});
|
||||
|
||||
it('streams a response that includes an MCP tool call', async () => {
|
||||
const client = new McpClient([{ name: 'tools', url: server.url }]);
|
||||
const agent = new Agent('stream-mcp-agent')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('Use tools_echo to echo messages. Be concise.')
|
||||
.mcp(client);
|
||||
|
||||
const { stream } = await agent.stream('Echo "stream works" using tools_echo.');
|
||||
|
||||
const chunks = await collectStreamChunks(stream);
|
||||
const messageChunks = chunksOfType(chunks, 'message');
|
||||
const messages = messageChunks.map((c) => c.message);
|
||||
|
||||
const hasToolCall = messages.some(
|
||||
(m) => isLlmMessage(m) && m.content.some((c) => c.type === 'tool-call'),
|
||||
);
|
||||
expect(hasToolCall).toBe(true);
|
||||
|
||||
await client.close();
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// generate() error cases — no LLM needed for the connection failure case
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('generate() with unreachable MCP server', () => {
|
||||
it('rejects when MCP server is unreachable', async () => {
|
||||
const client = new McpClient([{ name: 'dead', url: 'http://127.0.0.1:1/sse' }]);
|
||||
const agent = new Agent('bad-mcp-agent')
|
||||
.model('anthropic/claude-haiku-4-5')
|
||||
.instructions('test')
|
||||
.mcp(client);
|
||||
|
||||
await expect(agent.generate('hello')).rejects.toThrow(/dead/i);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// MCP tool name collision detection — no LLM needed
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('MCP tool name collision detection', () => {
|
||||
let server: TestServer;
|
||||
|
||||
beforeAll(async () => {
|
||||
server = await startSseServer();
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await server.close();
|
||||
});
|
||||
|
||||
it('throws when a static tool and an MCP tool share the same prefixed name', async () => {
|
||||
const conflicting = new Tool('tools_echo')
|
||||
.description('conflicts with MCP echo')
|
||||
.input(z.object({ message: z.string() }))
|
||||
.handler(async ({ message }) => ({ result: message }));
|
||||
|
||||
const client = new McpClient([{ name: 'tools', url: server.url }]);
|
||||
const agent = new Agent('collision-agent')
|
||||
.model('anthropic/claude-haiku-4-5')
|
||||
.instructions('test')
|
||||
.tool(conflicting)
|
||||
.mcp(client);
|
||||
|
||||
try {
|
||||
await expect(agent.generate('hello')).rejects.toThrow(/collision/i);
|
||||
} finally {
|
||||
await client.close();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// requireToolApproval with MCP tools — requires ANTHROPIC_API_KEY
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe_llm('requireToolApproval() with MCP tools', () => {
|
||||
let server: TestServer;
|
||||
|
||||
beforeAll(async () => {
|
||||
server = await startSseServer();
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await server.close();
|
||||
});
|
||||
|
||||
it('suspends the MCP tool call when requireToolApproval is enabled', async () => {
|
||||
const client = new McpClient([{ name: 'tools', url: server.url }]);
|
||||
const agent = new Agent('approval-mcp-agent')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('Use tools_echo to echo messages. Be concise.')
|
||||
.mcp(client)
|
||||
.requireToolApproval()
|
||||
.checkpoint('memory');
|
||||
|
||||
const { stream } = await agent.stream('Echo "needs approval" using tools_echo.');
|
||||
const chunks = await collectStreamChunks(stream);
|
||||
|
||||
const suspendedChunks = chunksOfType(chunks, 'tool-call-suspended');
|
||||
expect(suspendedChunks.length).toBeGreaterThanOrEqual(1);
|
||||
expect(suspendedChunks[0].toolName).toBe('tools_echo');
|
||||
|
||||
await client.close();
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// McpServerConfig.requireApproval — builder validation (no LLM needed)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('McpServerConfig.requireApproval — builder validation', () => {
|
||||
it('throws when requireApproval: true is set without a checkpoint store', async () => {
|
||||
const client = new McpClient([
|
||||
{ name: 'tools', url: 'http://localhost:9999/sse', requireApproval: true },
|
||||
]);
|
||||
const agent = new Agent('no-checkpoint')
|
||||
.model('anthropic/claude-haiku-4-5')
|
||||
.instructions('test')
|
||||
.mcp(client);
|
||||
|
||||
// build() is triggered by generate() — fails before attempting connection
|
||||
await expect(agent.generate('test')).rejects.toThrow(/checkpoint/i);
|
||||
});
|
||||
|
||||
it('throws when requireApproval: string[] is set without a checkpoint store', async () => {
|
||||
const client = new McpClient([
|
||||
{ name: 'tools', url: 'http://localhost:9999/sse', requireApproval: ['echo'] },
|
||||
]);
|
||||
const agent = new Agent('no-checkpoint-selective')
|
||||
.model('anthropic/claude-haiku-4-5')
|
||||
.instructions('test')
|
||||
.mcp(client);
|
||||
|
||||
await expect(agent.generate('test')).rejects.toThrow(/checkpoint/i);
|
||||
});
|
||||
|
||||
it('does not throw when requireApproval: true is set with a checkpoint store', () => {
|
||||
expect(() =>
|
||||
new Agent('with-checkpoint')
|
||||
.model('anthropic/claude-haiku-4-5')
|
||||
.instructions('test')
|
||||
.mcp(
|
||||
new McpClient([
|
||||
{ name: 'tools', url: 'http://localhost:9999/sse', requireApproval: true },
|
||||
]),
|
||||
)
|
||||
.checkpoint('memory'),
|
||||
).not.toThrow();
|
||||
});
|
||||
|
||||
it('does not throw when requireApproval: false is set without a checkpoint store', () => {
|
||||
expect(() =>
|
||||
new Agent('no-approval')
|
||||
.model('anthropic/claude-haiku-4-5')
|
||||
.instructions('test')
|
||||
.mcp(
|
||||
new McpClient([
|
||||
{ name: 'tools', url: 'http://localhost:9999/sse', requireApproval: false },
|
||||
]),
|
||||
),
|
||||
).not.toThrow();
|
||||
});
|
||||
|
||||
it('does not throw when requireApproval is an empty array without a checkpoint store', () => {
|
||||
expect(() =>
|
||||
new Agent('empty-approval')
|
||||
.model('anthropic/claude-haiku-4-5')
|
||||
.instructions('test')
|
||||
.mcp(
|
||||
new McpClient([{ name: 'tools', url: 'http://localhost:9999/sse', requireApproval: [] }]),
|
||||
),
|
||||
).not.toThrow();
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// McpServerConfig.requireApproval end-to-end — requires ANTHROPIC_API_KEY
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe_llm('McpServerConfig.requireApproval with MCP tools', () => {
|
||||
let server: TestServer;
|
||||
|
||||
beforeAll(async () => {
|
||||
server = await startSseServer();
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await server.close();
|
||||
});
|
||||
|
||||
it('suspends all MCP tools when config.requireApproval: true', async () => {
|
||||
const client = new McpClient([{ name: 'tools', url: server.url, requireApproval: true }]);
|
||||
const agent = new Agent('config-approval-all-agent')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('Use tools_echo to echo messages. Be concise.')
|
||||
.mcp(client)
|
||||
.checkpoint('memory');
|
||||
|
||||
const { stream } = await agent.stream('Echo "needs approval" using tools_echo.');
|
||||
const chunks = await collectStreamChunks(stream);
|
||||
|
||||
const suspendedChunks = chunksOfType(chunks, 'tool-call-suspended');
|
||||
expect(suspendedChunks.length).toBeGreaterThanOrEqual(1);
|
||||
expect(suspendedChunks[0].toolName).toBe('tools_echo');
|
||||
|
||||
await client.close();
|
||||
});
|
||||
|
||||
it('suspends only the listed tool when config.requireApproval is a string array', async () => {
|
||||
const client = new McpClient([{ name: 'tools', url: server.url, requireApproval: ['echo'] }]);
|
||||
const agent = new Agent('config-approval-selective-agent')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('Use tools_echo to echo messages. Be concise.')
|
||||
.mcp(client)
|
||||
.checkpoint('memory');
|
||||
|
||||
const { stream } = await agent.stream('Echo "selective approval" using tools_echo.');
|
||||
const chunks = await collectStreamChunks(stream);
|
||||
|
||||
const suspendedChunks = chunksOfType(chunks, 'tool-call-suspended');
|
||||
expect(suspendedChunks.length).toBeGreaterThanOrEqual(1);
|
||||
expect(suspendedChunks[0].toolName).toBe('tools_echo');
|
||||
|
||||
await client.close();
|
||||
});
|
||||
|
||||
it('does not suspend a tool not listed in config.requireApproval', async () => {
|
||||
// Only 'echo' requires approval; 'add' should run to completion without suspension.
|
||||
const client = new McpClient([{ name: 'tools', url: server.url, requireApproval: ['echo'] }]);
|
||||
const agent = new Agent('config-approval-unlisted-agent')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('Use tools.add to add numbers. Do not use any other tool. Be concise.')
|
||||
.mcp(client)
|
||||
.checkpoint('memory');
|
||||
|
||||
const result = await agent.generate('Use tools.add to add 10 and 32.');
|
||||
|
||||
expect(result.finishReason).not.toBe('error');
|
||||
const text = findLastTextContent(result.messages);
|
||||
expect(text).toContain('42');
|
||||
|
||||
await client.close();
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,164 @@
|
|||
/**
|
||||
* In-process MCP test server helpers.
|
||||
* Creates real MCP servers (SSE and StreamableHTTP) bound to random localhost ports
|
||||
* for use in integration tests. No mocking of SDK internals.
|
||||
*/
|
||||
|
||||
import { Server as McpServer } from '@modelcontextprotocol/sdk/server/index.js';
|
||||
import { SSEServerTransport } from '@modelcontextprotocol/sdk/server/sse.js';
|
||||
import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/streamableHttp.js';
|
||||
import { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js';
|
||||
import http from 'http';
|
||||
|
||||
/** 1×1 transparent PNG in base64 (smallest valid PNG). Used for image tool tests. */
|
||||
export const TINY_PNG =
|
||||
'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==';
|
||||
|
||||
export interface TestServer {
|
||||
url: string;
|
||||
close: () => Promise<void>;
|
||||
}
|
||||
|
||||
/** Create an in-process MCP Server with three test tools: echo, add, and image. */
|
||||
export function createTestMcpServer(): McpServer {
|
||||
const server = new McpServer(
|
||||
{ name: 'test-mcp-server', version: '1.0.0' },
|
||||
{ capabilities: { tools: {} } },
|
||||
);
|
||||
|
||||
server.setRequestHandler(ListToolsRequestSchema, async () => ({
|
||||
tools: [
|
||||
{
|
||||
name: 'echo',
|
||||
description: 'Echo the message back as-is',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: { message: { type: 'string', description: 'Message to echo' } },
|
||||
required: ['message'],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: 'add',
|
||||
description: 'Add two numbers together',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
a: { type: 'number', description: 'First number' },
|
||||
b: { type: 'number', description: 'Second number' },
|
||||
},
|
||||
required: ['a', 'b'],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: 'image',
|
||||
description: 'Return a small image with a caption',
|
||||
inputSchema: {
|
||||
type: 'object',
|
||||
properties: { caption: { type: 'string', description: 'Image caption' } },
|
||||
required: ['caption'],
|
||||
},
|
||||
},
|
||||
],
|
||||
}));
|
||||
|
||||
server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
||||
const { name, arguments: args = {} } = request.params;
|
||||
|
||||
if (name === 'echo') {
|
||||
// eslint-disable-next-line @typescript-eslint/no-base-to-string
|
||||
return { content: [{ type: 'text', text: String(args.message ?? '') }] };
|
||||
}
|
||||
|
||||
if (name === 'add') {
|
||||
const sum = Number(args.a ?? 0) + Number(args.b ?? 0);
|
||||
return { content: [{ type: 'text', text: String(sum) }] };
|
||||
}
|
||||
|
||||
if (name === 'image') {
|
||||
return {
|
||||
content: [
|
||||
// eslint-disable-next-line @typescript-eslint/no-base-to-string
|
||||
{ type: 'text', text: String(args.caption ?? '') },
|
||||
{ type: 'image', data: TINY_PNG, mimeType: 'image/png' },
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
isError: true,
|
||||
content: [{ type: 'text', text: `Unknown tool: ${name}` }],
|
||||
};
|
||||
});
|
||||
|
||||
return server;
|
||||
}
|
||||
|
||||
/** Start an SSE MCP server on a random port. Returns the SSE endpoint URL and a close function. */
|
||||
export async function startSseServer(): Promise<TestServer> {
|
||||
const transports = new Map<string, SSEServerTransport>();
|
||||
|
||||
const httpServer = http.createServer(async (req, res) => {
|
||||
try {
|
||||
if (req.method === 'GET' && req.url === '/sse') {
|
||||
// Create a fresh McpServer per client connection — the Server class holds
|
||||
// a single active transport reference and rejects a second connect() call
|
||||
// if the first transport hasn't been fully torn down yet.
|
||||
const mcpServer = createTestMcpServer();
|
||||
const transport = new SSEServerTransport('/message', res);
|
||||
transports.set(transport.sessionId, transport);
|
||||
await mcpServer.connect(transport);
|
||||
} else if (req.method === 'POST' && req.url?.startsWith('/message')) {
|
||||
const sessionId = new URL(req.url, 'http://localhost').searchParams.get('sessionId') ?? '';
|
||||
const transport = transports.get(sessionId);
|
||||
if (transport) {
|
||||
await transport.handlePostMessage(req, res);
|
||||
} else {
|
||||
res.writeHead(404).end(`No transport for sessionId: ${sessionId}`);
|
||||
}
|
||||
} else {
|
||||
res.writeHead(404).end('Not found');
|
||||
}
|
||||
} catch {
|
||||
if (!res.headersSent) res.writeHead(500).end('Internal server error');
|
||||
}
|
||||
});
|
||||
|
||||
await new Promise<void>((resolve) => httpServer.listen(0, '127.0.0.1', resolve));
|
||||
const { port } = httpServer.address() as { port: number };
|
||||
|
||||
return {
|
||||
url: `http://127.0.0.1:${port}/sse`,
|
||||
close: async () => {
|
||||
httpServer.closeAllConnections();
|
||||
await new Promise<void>((resolve) => httpServer.close(() => resolve()));
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/** Start a Streamable HTTP MCP server on a random port. Returns the endpoint URL and a close function. */
|
||||
export async function startStreamableHttpServer(): Promise<TestServer> {
|
||||
// In stateless mode (sessionIdGenerator: undefined) the SDK enforces that each
|
||||
// transport instance handles exactly one HTTP request. A fresh McpServer + transport
|
||||
// must therefore be created per-request, mirroring the SSE server pattern above.
|
||||
const httpServer = http.createServer(async (req, res) => {
|
||||
try {
|
||||
const mcpServer = createTestMcpServer();
|
||||
const transport = new StreamableHTTPServerTransport({ sessionIdGenerator: undefined });
|
||||
await mcpServer.connect(transport);
|
||||
await transport.handleRequest(req, res);
|
||||
} catch {
|
||||
if (!res.headersSent) res.writeHead(500).end('Internal server error');
|
||||
}
|
||||
});
|
||||
|
||||
await new Promise<void>((resolve) => httpServer.listen(0, '127.0.0.1', resolve));
|
||||
const { port } = httpServer.address() as { port: number };
|
||||
|
||||
return {
|
||||
url: `http://127.0.0.1:${port}/mcp`,
|
||||
close: async () => {
|
||||
httpServer.closeAllConnections();
|
||||
await new Promise<void>((resolve) => httpServer.close(() => resolve()));
|
||||
},
|
||||
};
|
||||
}
|
||||
|
|
@ -0,0 +1,201 @@
|
|||
/**
|
||||
* Integration tests for McpConnection with SSE transport.
|
||||
* Uses a real in-process HTTP server implementing the MCP SSE protocol.
|
||||
* No mocking of SDK internals or McpConnection.
|
||||
*/
|
||||
import { afterAll, beforeAll, describe, expect, it } from 'vitest';
|
||||
|
||||
import { startSseServer, TINY_PNG, type TestServer } from './mcp-server-helpers';
|
||||
import { McpConnection } from '../../runtime/mcp-connection';
|
||||
import type { ContentFile, ContentText, Message } from '../../types/sdk/message';
|
||||
import { isZodSchema } from '../../utils/zod';
|
||||
|
||||
describe('McpConnection — SSE transport', () => {
|
||||
let server: TestServer;
|
||||
|
||||
beforeAll(async () => {
|
||||
server = await startSseServer();
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await server.close();
|
||||
});
|
||||
|
||||
it('connects to an SSE server and lists tools', async () => {
|
||||
const conn = new McpConnection({ name: 'test', url: server.url });
|
||||
await conn.connect();
|
||||
|
||||
const tools = await conn.listTools();
|
||||
|
||||
expect(tools).toHaveLength(3);
|
||||
expect(tools.map((t) => t.name)).toEqual(
|
||||
expect.arrayContaining(['test_echo', 'test_add', 'test_image']),
|
||||
);
|
||||
|
||||
await conn.disconnect();
|
||||
});
|
||||
|
||||
it('calls echo tool and returns text content', async () => {
|
||||
const conn = new McpConnection({ name: 'test', url: server.url });
|
||||
await conn.connect();
|
||||
|
||||
const result = await conn.callTool('echo', { message: 'hello from sse' });
|
||||
|
||||
expect(result.isError).toBeFalsy();
|
||||
expect(result.content).toHaveLength(1);
|
||||
expect(result.content[0]).toEqual({ type: 'text', text: 'hello from sse' });
|
||||
|
||||
await conn.disconnect();
|
||||
});
|
||||
|
||||
it('calls add tool and returns calculated result', async () => {
|
||||
const conn = new McpConnection({ name: 'test', url: server.url });
|
||||
await conn.connect();
|
||||
|
||||
const result = await conn.callTool('add', { a: 7, b: 13 });
|
||||
|
||||
expect(result.isError).toBeFalsy();
|
||||
expect(result.content[0]).toEqual({ type: 'text', text: '20' });
|
||||
|
||||
await conn.disconnect();
|
||||
});
|
||||
|
||||
it('calls image tool and returns mixed text + image content', async () => {
|
||||
const conn = new McpConnection({ name: 'test', url: server.url });
|
||||
await conn.connect();
|
||||
|
||||
const result = await conn.callTool('image', { caption: 'landscape' });
|
||||
|
||||
expect(result.isError).toBeFalsy();
|
||||
expect(result.content).toHaveLength(2);
|
||||
expect(result.content[0]).toMatchObject({ type: 'text', text: 'landscape' });
|
||||
expect(result.content[1]).toMatchObject({
|
||||
type: 'image',
|
||||
data: TINY_PNG,
|
||||
mimeType: 'image/png',
|
||||
});
|
||||
|
||||
await conn.disconnect();
|
||||
});
|
||||
|
||||
it('disconnects cleanly without throwing', async () => {
|
||||
const conn = new McpConnection({ name: 'test', url: server.url });
|
||||
await conn.connect();
|
||||
await expect(conn.disconnect()).resolves.toBeUndefined();
|
||||
});
|
||||
|
||||
it('throws when listTools() is called without connecting first', async () => {
|
||||
const conn = new McpConnection({ name: 'test', url: server.url });
|
||||
// Do NOT call conn.connect()
|
||||
await expect(conn.listTools()).rejects.toThrow();
|
||||
});
|
||||
|
||||
it('throws when callTool() is called without connecting first', async () => {
|
||||
const conn = new McpConnection({ name: 'test', url: server.url });
|
||||
await expect(conn.callTool('echo', { message: 'hi' })).rejects.toThrow();
|
||||
});
|
||||
|
||||
it('is idempotent — calling connect() twice resolves without starting a second connection', async () => {
|
||||
const conn = new McpConnection({ name: 'test', url: server.url });
|
||||
await conn.connect();
|
||||
await expect(conn.connect()).resolves.toBeUndefined();
|
||||
await conn.disconnect();
|
||||
});
|
||||
|
||||
it('deduplicates concurrent connect() calls — both resolve via the same promise', async () => {
|
||||
const conn = new McpConnection({ name: 'test', url: server.url });
|
||||
const [r1, r2] = await Promise.all([conn.connect(), conn.connect()]);
|
||||
expect(r1).toBeUndefined();
|
||||
expect(r2).toBeUndefined();
|
||||
await conn.disconnect();
|
||||
});
|
||||
|
||||
describe('listTools() resolved tools', () => {
|
||||
it('prefixes tool names with the server name', async () => {
|
||||
const conn = new McpConnection({ name: 'browser', url: server.url });
|
||||
await conn.connect();
|
||||
|
||||
const builtTools = await conn.listTools();
|
||||
|
||||
expect(builtTools.every((t) => t.name.startsWith('browser_'))).toBe(true);
|
||||
expect(builtTools.map((t) => t.name)).toEqual(
|
||||
expect.arrayContaining(['browser_echo', 'browser_add', 'browser_image']),
|
||||
);
|
||||
|
||||
await conn.disconnect();
|
||||
});
|
||||
|
||||
it('sets inputSchema as raw JSON Schema (not Zod) and sets mcpTool flag', async () => {
|
||||
const conn = new McpConnection({ name: 'test', url: server.url });
|
||||
await conn.connect();
|
||||
|
||||
const builtTools = await conn.listTools();
|
||||
|
||||
for (const t of builtTools) {
|
||||
expect(t.inputSchema).toBeDefined();
|
||||
expect(isZodSchema(t.inputSchema!)).toBe(false);
|
||||
expect(t.mcpTool).toBe(true);
|
||||
expect(t.mcpServerName).toBe('test');
|
||||
}
|
||||
|
||||
await conn.disconnect();
|
||||
});
|
||||
|
||||
it('handler calls the tool and returns the MCP result', async () => {
|
||||
const conn = new McpConnection({ name: 'test', url: server.url });
|
||||
await conn.connect();
|
||||
|
||||
const builtTools = await conn.listTools();
|
||||
const echoTool = builtTools.find((t) => t.name === 'test_echo')!;
|
||||
|
||||
const result = await echoTool.handler!({ message: 'from handler' }, {} as never);
|
||||
const mcpResult = result as { content: Array<{ type: string; text: string }> };
|
||||
|
||||
expect(mcpResult.content[0]).toEqual({ type: 'text', text: 'from handler' });
|
||||
|
||||
await conn.disconnect();
|
||||
});
|
||||
|
||||
it('toMessage returns undefined for text-only results', async () => {
|
||||
const conn = new McpConnection({ name: 'test', url: server.url });
|
||||
await conn.connect();
|
||||
|
||||
const builtTools = await conn.listTools();
|
||||
const echoTool = builtTools.find((t) => t.name === 'test_echo')!;
|
||||
|
||||
const mcpResult = await conn.callTool('echo', { message: 'text only' });
|
||||
const message = echoTool.toMessage!(mcpResult);
|
||||
|
||||
expect(message).toBeUndefined();
|
||||
|
||||
await conn.disconnect();
|
||||
});
|
||||
|
||||
it('toMessage returns a user message with file part for image results', async () => {
|
||||
const conn = new McpConnection({ name: 'test', url: server.url });
|
||||
await conn.connect();
|
||||
|
||||
const builtTools = await conn.listTools();
|
||||
const imageTool = builtTools.find((t) => t.name === 'test_image')!;
|
||||
|
||||
const mcpResult = await conn.callTool('image', { caption: 'my photo' });
|
||||
const message = imageTool.toMessage!(mcpResult);
|
||||
|
||||
expect(message).toBeDefined();
|
||||
const llmMessage = message as Message;
|
||||
expect(llmMessage.role).toBe('assistant');
|
||||
|
||||
const content = llmMessage.content as Array<ContentText | ContentFile>;
|
||||
const textPart = content.find((c): c is ContentText => c.type === 'text');
|
||||
const filePart = content.find((c): c is ContentFile => c.type === 'file');
|
||||
|
||||
expect(textPart).toBeDefined();
|
||||
expect(textPart!.text).toBe('my photo');
|
||||
expect(filePart).toBeDefined();
|
||||
expect(filePart!.mediaType).toBe('image/png');
|
||||
expect(filePart!.data).toBe(TINY_PNG);
|
||||
|
||||
await conn.disconnect();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,118 @@
|
|||
/**
|
||||
* Integration tests for McpConnection with stdio transport.
|
||||
* Spawns a real child process (mcp-stdio-server.mjs) and communicates via stdin/stdout.
|
||||
* No mocking of SDK internals or McpConnection.
|
||||
*/
|
||||
import path from 'path';
|
||||
import { describe, expect, it } from 'vitest';
|
||||
|
||||
import { TINY_PNG } from './mcp-server-helpers';
|
||||
import { McpConnection } from '../../runtime/mcp-connection';
|
||||
|
||||
// vitest injects __dirname for TypeScript test files in the node environment.
|
||||
const FIXTURE_PATH = path.resolve(__dirname, '../fixtures/mcp-stdio-server.mjs');
|
||||
|
||||
/** Config that spawns the stdio fixture server. */
|
||||
function stdioConfig(name = 'test') {
|
||||
return {
|
||||
name,
|
||||
command: 'node',
|
||||
args: [FIXTURE_PATH],
|
||||
};
|
||||
}
|
||||
|
||||
describe('McpConnection — stdio transport', () => {
|
||||
it('connects to a stdio server and lists tools', async () => {
|
||||
const conn = new McpConnection(stdioConfig());
|
||||
await conn.connect();
|
||||
|
||||
const tools = await conn.listTools();
|
||||
|
||||
expect(tools).toHaveLength(3);
|
||||
expect(tools.map((t) => t.name)).toEqual(
|
||||
expect.arrayContaining(['test_echo', 'test_add', 'test_image']),
|
||||
);
|
||||
|
||||
await conn.disconnect();
|
||||
});
|
||||
|
||||
it('calls echo tool and returns text content', async () => {
|
||||
const conn = new McpConnection(stdioConfig());
|
||||
await conn.connect();
|
||||
|
||||
const result = await conn.callTool('echo', { message: 'hello from stdio' });
|
||||
|
||||
expect(result.isError).toBeFalsy();
|
||||
expect(result.content).toHaveLength(1);
|
||||
expect(result.content[0]).toEqual({ type: 'text', text: 'hello from stdio' });
|
||||
|
||||
await conn.disconnect();
|
||||
});
|
||||
|
||||
it('calls add tool and returns calculated result', async () => {
|
||||
const conn = new McpConnection(stdioConfig());
|
||||
await conn.connect();
|
||||
|
||||
const result = await conn.callTool('add', { a: 42, b: 58 });
|
||||
|
||||
expect(result.isError).toBeFalsy();
|
||||
expect(result.content[0]).toEqual({ type: 'text', text: '100' });
|
||||
|
||||
await conn.disconnect();
|
||||
});
|
||||
|
||||
it('calls image tool and returns mixed text + image content', async () => {
|
||||
const conn = new McpConnection(stdioConfig());
|
||||
await conn.connect();
|
||||
|
||||
const result = await conn.callTool('image', { caption: 'forest' });
|
||||
|
||||
expect(result.isError).toBeFalsy();
|
||||
expect(result.content).toHaveLength(2);
|
||||
expect(result.content[0]).toMatchObject({ type: 'text', text: 'forest' });
|
||||
expect(result.content[1]).toMatchObject({
|
||||
type: 'image',
|
||||
data: TINY_PNG,
|
||||
mimeType: 'image/png',
|
||||
});
|
||||
|
||||
await conn.disconnect();
|
||||
});
|
||||
|
||||
it('disconnects cleanly, terminating the child process', async () => {
|
||||
const conn = new McpConnection(stdioConfig());
|
||||
await conn.connect();
|
||||
await expect(conn.disconnect()).resolves.toBeUndefined();
|
||||
});
|
||||
|
||||
describe('listTools() resolved tools', () => {
|
||||
it('prefixes tool names with the server name', async () => {
|
||||
const conn = new McpConnection(stdioConfig('fs'));
|
||||
await conn.connect();
|
||||
|
||||
const builtTools = await conn.listTools();
|
||||
|
||||
expect(builtTools.every((t) => t.name.startsWith('fs_'))).toBe(true);
|
||||
expect(builtTools.map((t) => t.name)).toEqual(
|
||||
expect.arrayContaining(['fs_echo', 'fs_add', 'fs_image']),
|
||||
);
|
||||
|
||||
await conn.disconnect();
|
||||
});
|
||||
|
||||
it('handler invokes the child process tool and returns MCP result', async () => {
|
||||
const conn = new McpConnection(stdioConfig());
|
||||
await conn.connect();
|
||||
|
||||
const builtTools = await conn.listTools();
|
||||
const addTool = builtTools.find((t) => t.name === 'test_add')!;
|
||||
|
||||
const result = await addTool.handler!({ a: 3, b: 4 }, {} as never);
|
||||
const mcpResult = result as { content: Array<{ type: string; text: string }> };
|
||||
|
||||
expect(mcpResult.content[0]).toEqual({ type: 'text', text: '7' });
|
||||
|
||||
await conn.disconnect();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,131 @@
|
|||
/**
|
||||
* Integration tests for McpConnection with Streamable HTTP transport.
|
||||
* Uses a real in-process HTTP server implementing the MCP Streamable HTTP protocol.
|
||||
* No mocking of SDK internals or McpConnection.
|
||||
*/
|
||||
import { afterAll, beforeAll, describe, expect, it } from 'vitest';
|
||||
|
||||
import { startStreamableHttpServer, TINY_PNG, type TestServer } from './mcp-server-helpers';
|
||||
import { McpConnection } from '../../runtime/mcp-connection';
|
||||
import type { ContentFile, ContentText, Message } from '../../types/sdk/message';
|
||||
|
||||
describe('McpConnection — Streamable HTTP transport', () => {
|
||||
let server: TestServer;
|
||||
|
||||
beforeAll(async () => {
|
||||
server = await startStreamableHttpServer();
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await server.close();
|
||||
});
|
||||
|
||||
it('connects to a Streamable HTTP server and lists tools', async () => {
|
||||
const conn = new McpConnection({ name: 'test', url: server.url, transport: 'streamableHttp' });
|
||||
await conn.connect();
|
||||
|
||||
const tools = await conn.listTools();
|
||||
|
||||
expect(tools).toHaveLength(3);
|
||||
expect(tools.map((t) => t.name)).toEqual(
|
||||
expect.arrayContaining(['test_echo', 'test_add', 'test_image']),
|
||||
);
|
||||
|
||||
await conn.disconnect();
|
||||
});
|
||||
|
||||
it('calls echo tool and returns text content', async () => {
|
||||
const conn = new McpConnection({ name: 'test', url: server.url, transport: 'streamableHttp' });
|
||||
await conn.connect();
|
||||
|
||||
const result = await conn.callTool('echo', { message: 'hello from streamable-http' });
|
||||
|
||||
expect(result.isError).toBeFalsy();
|
||||
expect(result.content).toHaveLength(1);
|
||||
expect(result.content[0]).toEqual({ type: 'text', text: 'hello from streamable-http' });
|
||||
|
||||
await conn.disconnect();
|
||||
});
|
||||
|
||||
it('calls add tool and returns calculated result', async () => {
|
||||
const conn = new McpConnection({ name: 'test', url: server.url, transport: 'streamableHttp' });
|
||||
await conn.connect();
|
||||
|
||||
const result = await conn.callTool('add', { a: 100, b: 200 });
|
||||
|
||||
expect(result.isError).toBeFalsy();
|
||||
expect(result.content[0]).toEqual({ type: 'text', text: '300' });
|
||||
|
||||
await conn.disconnect();
|
||||
});
|
||||
|
||||
it('calls image tool and returns mixed text + image content', async () => {
|
||||
const conn = new McpConnection({ name: 'test', url: server.url, transport: 'streamableHttp' });
|
||||
await conn.connect();
|
||||
|
||||
const result = await conn.callTool('image', { caption: 'mountains' });
|
||||
|
||||
expect(result.isError).toBeFalsy();
|
||||
expect(result.content).toHaveLength(2);
|
||||
expect(result.content[0]).toMatchObject({ type: 'text', text: 'mountains' });
|
||||
expect(result.content[1]).toMatchObject({
|
||||
type: 'image',
|
||||
data: TINY_PNG,
|
||||
mimeType: 'image/png',
|
||||
});
|
||||
|
||||
await conn.disconnect();
|
||||
});
|
||||
|
||||
it('disconnects cleanly without throwing', async () => {
|
||||
const conn = new McpConnection({ name: 'test', url: server.url, transport: 'streamableHttp' });
|
||||
await conn.connect();
|
||||
await expect(conn.disconnect()).resolves.toBeUndefined();
|
||||
});
|
||||
|
||||
describe('listTools() resolved tools', () => {
|
||||
it('prefixes tool names with the server name', async () => {
|
||||
const conn = new McpConnection({
|
||||
name: 'devtools',
|
||||
url: server.url,
|
||||
transport: 'streamableHttp',
|
||||
});
|
||||
await conn.connect();
|
||||
|
||||
const builtTools = await conn.listTools();
|
||||
|
||||
expect(builtTools.every((t) => t.name.startsWith('devtools_'))).toBe(true);
|
||||
expect(builtTools.map((t) => t.name)).toEqual(
|
||||
expect.arrayContaining(['devtools_echo', 'devtools_add', 'devtools_image']),
|
||||
);
|
||||
|
||||
await conn.disconnect();
|
||||
});
|
||||
|
||||
it('toMessage returns a user message with file part for image results', async () => {
|
||||
const conn = new McpConnection({
|
||||
name: 'test',
|
||||
url: server.url,
|
||||
transport: 'streamableHttp',
|
||||
});
|
||||
await conn.connect();
|
||||
|
||||
const builtTools = await conn.listTools();
|
||||
const imageTool = builtTools.find((t) => t.name === 'test_image')!;
|
||||
|
||||
const mcpResult = await conn.callTool('image', { caption: 'sunset' });
|
||||
const message = imageTool.toMessage!(mcpResult);
|
||||
|
||||
expect(message).toBeDefined();
|
||||
const llmMessage = message as Message;
|
||||
expect(llmMessage.role).toBe('assistant');
|
||||
|
||||
const content = llmMessage.content as Array<ContentText | ContentFile>;
|
||||
const filePart = content.find((c): c is ContentFile => c.type === 'file');
|
||||
expect(filePart).toBeDefined();
|
||||
expect(filePart!.mediaType).toBe('image/png');
|
||||
|
||||
await conn.disconnect();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,302 @@
|
|||
/**
|
||||
* Integration test: custom BuiltMemory backend.
|
||||
*
|
||||
* Proves that any object implementing the BuiltMemory interface works with the
|
||||
* agent runtime — no SDK-provided storage class needed. This is the contract
|
||||
* that Redis, DynamoDB, TypeORM, or any other persistence layer must satisfy.
|
||||
*/
|
||||
import { expect, it, beforeEach } from 'vitest';
|
||||
|
||||
import { Agent, Memory, toDbMessage, type AgentDbMessage, type AgentMessage } from '../../../index';
|
||||
import type { BuiltMemory, Thread } from '../../../types/sdk/memory';
|
||||
import { describeIf, findLastTextContent, getModel } from '../helpers';
|
||||
|
||||
const describe = describeIf('anthropic');
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Custom in-memory BuiltMemory implementation (simulates Redis, DynamoDB, etc.)
|
||||
// ---------------------------------------------------------------------------
|
||||
class CustomMapMemory implements BuiltMemory {
|
||||
readonly threads = new Map<string, Thread>();
|
||||
readonly messages = new Map<string, AgentDbMessage[]>();
|
||||
readonly workingMemory = new Map<string, string>();
|
||||
|
||||
// --- Thread management ---
|
||||
|
||||
async getThread(threadId: string): Promise<Thread | null> {
|
||||
return this.threads.get(threadId) ?? null;
|
||||
}
|
||||
|
||||
async saveThread(thread: Omit<Thread, 'createdAt' | 'updatedAt'>): Promise<Thread> {
|
||||
const now = new Date();
|
||||
const full: Thread = { ...thread, createdAt: now, updatedAt: now };
|
||||
this.threads.set(thread.id, full);
|
||||
return full;
|
||||
}
|
||||
|
||||
async deleteThread(threadId: string): Promise<void> {
|
||||
this.threads.delete(threadId);
|
||||
this.messages.delete(threadId);
|
||||
}
|
||||
|
||||
// --- Message persistence ---
|
||||
|
||||
async getMessages(
|
||||
threadId: string,
|
||||
opts?: { limit?: number; before?: Date },
|
||||
): Promise<AgentDbMessage[]> {
|
||||
let msgs = this.messages.get(threadId) ?? [];
|
||||
if (opts?.before) {
|
||||
msgs = msgs.filter((m) => {
|
||||
const ts = 'createdAt' in m ? (m as Record<string, unknown>).createdAt : undefined;
|
||||
return ts instanceof Date ? ts < opts.before! : true;
|
||||
});
|
||||
}
|
||||
if (opts?.limit) {
|
||||
msgs = msgs.slice(-opts.limit);
|
||||
}
|
||||
return msgs.map(toDbMessage);
|
||||
}
|
||||
|
||||
async saveMessages(args: {
|
||||
threadId: string;
|
||||
resourceId?: string;
|
||||
messages: AgentMessage[];
|
||||
}): Promise<void> {
|
||||
const existing = this.messages.get(args.threadId) ?? [];
|
||||
this.messages.set(args.threadId, [...existing, ...args.messages.map(toDbMessage)]);
|
||||
}
|
||||
|
||||
async deleteMessages(messageIds: string[]): Promise<void> {
|
||||
for (const [threadId, msgs] of this.messages) {
|
||||
const idSet = new Set(messageIds);
|
||||
this.messages.set(
|
||||
threadId,
|
||||
msgs.filter((m) => !idSet.has(m.id)),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// --- Working memory (Tier 2) ---
|
||||
|
||||
async getWorkingMemory(params: {
|
||||
threadId: string;
|
||||
resourceId: string;
|
||||
scope: 'resource' | 'thread';
|
||||
}): Promise<string | null> {
|
||||
return (
|
||||
this.workingMemory.get(params.scope === 'resource' ? params.resourceId : params.threadId) ??
|
||||
null
|
||||
);
|
||||
}
|
||||
|
||||
async saveWorkingMemory(
|
||||
params: { threadId: string; resourceId: string; scope: 'resource' | 'thread' },
|
||||
content: string,
|
||||
): Promise<void> {
|
||||
const id = params.scope === 'resource' ? params.resourceId : params.threadId;
|
||||
this.workingMemory.set(id, content);
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('custom BuiltMemory backend', () => {
|
||||
let store: CustomMapMemory;
|
||||
|
||||
beforeEach(() => {
|
||||
store = new CustomMapMemory();
|
||||
});
|
||||
|
||||
it('recalls previous messages across turns', async () => {
|
||||
const memory = new Memory().storage(store).lastMessages(10);
|
||||
|
||||
const agent = new Agent('custom-mem-recall')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('You are a helpful assistant. Be concise.')
|
||||
.memory(memory);
|
||||
|
||||
const threadId = `custom-thread-${Date.now()}`;
|
||||
const options = { persistence: { threadId, resourceId: 'user-1' } };
|
||||
|
||||
await agent.generate('My name is Valentina. Just acknowledge.', options);
|
||||
|
||||
const result = await agent.generate('What is my name?', options);
|
||||
expect(findLastTextContent(result.messages)?.toLowerCase()).toContain('valentina');
|
||||
|
||||
// Verify the custom store actually received messages
|
||||
const stored = store.messages.get(threadId);
|
||||
expect(stored).toBeDefined();
|
||||
expect(stored!.length).toBeGreaterThanOrEqual(2);
|
||||
});
|
||||
|
||||
it('isolates threads in the custom backend', async () => {
|
||||
const memory = new Memory().storage(store).lastMessages(10);
|
||||
|
||||
const agent = new Agent('custom-mem-isolation')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions(
|
||||
'You are a helpful assistant. Be concise. If you don\'t know something, say "I don\'t know".',
|
||||
)
|
||||
.memory(memory);
|
||||
|
||||
const thread1 = `custom-t1-${Date.now()}`;
|
||||
const thread2 = `custom-t2-${Date.now()}`;
|
||||
|
||||
await agent.generate('The secret word is NEPTUNE. Just acknowledge.', {
|
||||
persistence: { threadId: thread1, resourceId: 'user-1' },
|
||||
});
|
||||
|
||||
const result = await agent.generate('What is the secret word?', {
|
||||
persistence: { threadId: thread2, resourceId: 'user-1' },
|
||||
});
|
||||
|
||||
expect(findLastTextContent(result.messages)?.toLowerCase()).not.toContain('neptune');
|
||||
|
||||
// Thread 1 should have messages, thread 2 should have its own
|
||||
expect(store.messages.get(thread1)!.length).toBeGreaterThan(0);
|
||||
expect(store.messages.get(thread2)!.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('persists and retrieves resource-scoped working memory via custom backend', async () => {
|
||||
const memory = new Memory()
|
||||
.storage(store)
|
||||
.lastMessages(10)
|
||||
.scope('resource')
|
||||
.freeform('# User Profile\n- **Name**:\n- **Favorite color**:');
|
||||
|
||||
const agent = new Agent('custom-mem-working')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('You are a helpful assistant. Be concise. Always update your working memory.')
|
||||
.memory(memory);
|
||||
|
||||
const threadId = `custom-wm-${Date.now()}`;
|
||||
const resourceId = 'user-wm-1';
|
||||
const options = { persistence: { threadId, resourceId } };
|
||||
|
||||
await agent.generate('My name is Kenji and my favorite color is teal.', options);
|
||||
|
||||
// Working memory should have been persisted keyed by resourceId
|
||||
const wm = store.workingMemory.get(resourceId);
|
||||
expect(wm).toBeDefined();
|
||||
expect(wm!.toLowerCase()).toContain('kenji');
|
||||
|
||||
// New thread, same resourceId — resource-scoped working memory carries over
|
||||
const thread2 = `custom-wm2-${Date.now()}`;
|
||||
const result = await agent.generate('What is my name?', {
|
||||
persistence: { threadId: thread2, resourceId },
|
||||
});
|
||||
expect(findLastTextContent(result.messages)?.toLowerCase()).toContain('kenji');
|
||||
});
|
||||
|
||||
it('persists and retrieves thread-scoped working memory via custom backend', async () => {
|
||||
const memory = new Memory()
|
||||
.storage(store)
|
||||
.lastMessages(10)
|
||||
.scope('thread')
|
||||
.freeform('# Conversation Notes\n- **Topic**:\n- **Key facts**:');
|
||||
|
||||
const agent = new Agent('custom-mem-thread-wm')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('You are a helpful assistant. Be concise. Always update your working memory.')
|
||||
.memory(memory);
|
||||
|
||||
const threadId = `custom-twm-${Date.now()}`;
|
||||
const resourceId = 'user-twm-1';
|
||||
|
||||
await agent.generate('The project codename is AURORA. Just acknowledge.', {
|
||||
persistence: { threadId, resourceId },
|
||||
});
|
||||
|
||||
// Working memory should be stored keyed by threadId
|
||||
const wmByThread = store.workingMemory.get(threadId);
|
||||
expect(wmByThread).toBeDefined();
|
||||
expect(wmByThread!.toLowerCase()).toContain('aurora');
|
||||
|
||||
// Different thread for same resource — should NOT see the previous working memory
|
||||
const thread2 = `custom-twm2-${Date.now()}`;
|
||||
const result = await agent.generate(
|
||||
'What is the project codename? Answer "unknown" if you have no information.',
|
||||
{ persistence: { threadId: thread2, resourceId } },
|
||||
);
|
||||
expect(findLastTextContent(result.messages)?.toLowerCase()).not.toContain('aurora');
|
||||
|
||||
// Thread 2 working memory should be independent
|
||||
expect(store.workingMemory.get(thread2)).not.toContain('aurora');
|
||||
});
|
||||
|
||||
it('thread-scoped working memory allows recall within the same thread when history is truncated', async () => {
|
||||
// Use lastMessages: 1 so earlier turns are pushed out of the history window.
|
||||
// The agent must rely on working memory — not chat history — to recall old facts.
|
||||
const memory = new Memory()
|
||||
.storage(store)
|
||||
.lastMessages(1)
|
||||
.scope('thread')
|
||||
.freeform('# Key facts\n- **Secret word**:\n- **User name**:');
|
||||
|
||||
const agent = new Agent('custom-mem-thread-wm-recall')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions(
|
||||
'You are a helpful assistant. Be concise. ' +
|
||||
'Always update your working memory with any important facts you learn.',
|
||||
)
|
||||
.memory(memory);
|
||||
|
||||
const threadId = `custom-twm-recall-${Date.now()}`;
|
||||
const options = { persistence: { threadId, resourceId: 'user-twm-recall' } };
|
||||
|
||||
// Turn 1: share a fact — agent writes it into working memory
|
||||
await agent.generate('The secret word is COBALT. Remember it. Just acknowledge.', options);
|
||||
|
||||
// Turn 2: filler turn — this pushes turn 1 out of the 1-message history window
|
||||
await agent.generate('Just say "ok".', options);
|
||||
|
||||
// Turn 3: ask for the fact — only working memory can supply it now (turn 1 is truncated)
|
||||
const result = await agent.generate('What was the secret word I told you earlier?', options);
|
||||
expect(findLastTextContent(result.messages)?.toLowerCase()).toContain('cobalt');
|
||||
});
|
||||
|
||||
it('works with stream() path', async () => {
|
||||
const memory = new Memory().storage(store).lastMessages(10);
|
||||
|
||||
const agent = new Agent('custom-mem-stream')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('You are a helpful assistant. Be concise.')
|
||||
.memory(memory);
|
||||
|
||||
const threadId = `custom-stream-${Date.now()}`;
|
||||
const options = { persistence: { threadId, resourceId: 'user-stream' } };
|
||||
|
||||
// Turn 1 via stream
|
||||
const result1 = await agent.stream('The capital of France is Paris. Acknowledge.', options);
|
||||
const reader = result1.stream.getReader();
|
||||
while (true) {
|
||||
const { done } = await reader.read();
|
||||
if (done) break;
|
||||
}
|
||||
|
||||
// Turn 2 via generate — should recall from custom store
|
||||
const result = await agent.generate('What is the capital of France?', options);
|
||||
expect(findLastTextContent(result.messages)?.toLowerCase()).toContain('paris');
|
||||
|
||||
expect(store.messages.get(threadId)!.length).toBeGreaterThanOrEqual(2);
|
||||
});
|
||||
|
||||
it('works when passed directly to agent.memory() as bare BuiltMemory', async () => {
|
||||
// Skip the Memory builder entirely — pass the raw BuiltMemory object
|
||||
const agent = new Agent('custom-mem-bare')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('You are a helpful assistant. Be concise.')
|
||||
.memory(store);
|
||||
|
||||
const threadId = `custom-bare-${Date.now()}`;
|
||||
const options = { persistence: { threadId, resourceId: 'user-bare' } };
|
||||
|
||||
await agent.generate('Remember: the answer is 42. Acknowledge.', options);
|
||||
|
||||
const result = await agent.generate('What is the answer?', options);
|
||||
expect(findLastTextContent(result.messages)?.toLowerCase()).toContain('42');
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,108 @@
|
|||
import { expect, it, afterEach } from 'vitest';
|
||||
|
||||
import { Agent, Memory } from '../../../index';
|
||||
import { SqliteMemory } from '../../../storage/sqlite-memory';
|
||||
import { describeIf, findLastTextContent, getModel, createSqliteMemory } from '../helpers';
|
||||
|
||||
const describe = describeIf('anthropic');
|
||||
|
||||
const cleanups: Array<() => void> = [];
|
||||
afterEach(() => {
|
||||
cleanups.forEach((fn) => fn());
|
||||
cleanups.length = 0;
|
||||
});
|
||||
|
||||
describe('freeform working memory', () => {
|
||||
const template = '# User Context\n- **Name**:\n- **City**:\n- **Pet**:';
|
||||
|
||||
it('agent recalls info via working memory across turns', async () => {
|
||||
const memory = new Memory().storage('memory').lastMessages(10).freeform(template);
|
||||
|
||||
const agent = new Agent('freeform-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('You are a helpful assistant. Be concise.')
|
||||
.memory(memory);
|
||||
|
||||
const threadId = `freeform-${Date.now()}`;
|
||||
const options = { persistence: { threadId, resourceId: 'test-user' } };
|
||||
|
||||
await agent.generate('My name is Alice and I live in Berlin.', options);
|
||||
const result = await agent.generate('What city do I live in?', options);
|
||||
|
||||
expect(findLastTextContent(result.messages)?.toLowerCase()).toContain('berlin');
|
||||
});
|
||||
|
||||
it('working memory tags are stripped from visible response', async () => {
|
||||
const memory = new Memory().storage('memory').lastMessages(10).freeform(template);
|
||||
|
||||
const agent = new Agent('strip-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('You are a helpful assistant. Be concise.')
|
||||
.memory(memory);
|
||||
|
||||
const threadId = `strip-${Date.now()}`;
|
||||
const options = { persistence: { threadId, resourceId: 'test-user' } };
|
||||
|
||||
const result = await agent.generate('My name is Bob.', options);
|
||||
|
||||
const allText = result.messages
|
||||
.flatMap((m) => ('content' in m ? m.content : []))
|
||||
.filter((c) => c.type === 'text')
|
||||
.map((c) => (c as { text: string }).text)
|
||||
.join(' ');
|
||||
expect(allText).not.toContain('<working_memory>');
|
||||
expect(allText).not.toContain('</working_memory>');
|
||||
});
|
||||
|
||||
it('working memory persists across threads with same resourceId', async () => {
|
||||
const { memory, cleanup } = createSqliteMemory();
|
||||
cleanups.push(cleanup);
|
||||
|
||||
const mem = new Memory().storage(memory).lastMessages(10).freeform(template);
|
||||
const agent = new Agent('cross-thread-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('You are a helpful assistant. Be concise.')
|
||||
.memory(mem);
|
||||
|
||||
const resourceId = `user-${Date.now()}`;
|
||||
|
||||
await agent.generate('My name is Charlie and I have a dog named Rex.', {
|
||||
persistence: { threadId: `thread-1-${Date.now()}`, resourceId },
|
||||
});
|
||||
|
||||
const result = await agent.generate("What's my dog's name?", {
|
||||
persistence: { threadId: `thread-2-${Date.now()}`, resourceId },
|
||||
});
|
||||
|
||||
expect(findLastTextContent(result.messages)?.toLowerCase()).toContain('rex');
|
||||
});
|
||||
|
||||
it('working memory survives SqliteMemory restart', async () => {
|
||||
const { memory, cleanup, url } = createSqliteMemory();
|
||||
cleanups.push(cleanup);
|
||||
|
||||
const mem = new Memory().storage(memory).lastMessages(10).freeform(template);
|
||||
const agent1 = new Agent('restart-wm-1')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('You are a helpful assistant. Be concise.')
|
||||
.memory(mem);
|
||||
|
||||
const resourceId = `user-${Date.now()}`;
|
||||
const threadId = `restart-wm-${Date.now()}`;
|
||||
|
||||
await agent1.generate('My name is Diana.', { persistence: { threadId, resourceId } });
|
||||
|
||||
const memory2 = new SqliteMemory({ url });
|
||||
const mem2 = new Memory().storage(memory2).lastMessages(10).freeform(template);
|
||||
const agent2 = new Agent('restart-wm-2')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('You are a helpful assistant. Be concise.')
|
||||
.memory(mem2);
|
||||
|
||||
const result = await agent2.generate('What is my name?', {
|
||||
persistence: { threadId: `new-thread-${Date.now()}`, resourceId },
|
||||
});
|
||||
|
||||
expect(findLastTextContent(result.messages)?.toLowerCase()).toContain('diana');
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,627 @@
|
|||
/**
|
||||
* Integration test: PostgresMemory with pgvector semantic recall.
|
||||
*
|
||||
* Uses testcontainers to spin up a real Postgres instance with pgvector,
|
||||
* then runs the agent against it to verify full end-to-end memory behavior.
|
||||
*/
|
||||
import { execSync } from 'node:child_process';
|
||||
import { Pool } from 'pg';
|
||||
import { GenericContainer, Wait, type StartedTestContainer } from 'testcontainers';
|
||||
import { afterAll, beforeAll, describe, expect, it } from 'vitest';
|
||||
|
||||
import { Agent, Memory, PostgresMemory } from '../../../index';
|
||||
import { describeIf, findLastTextContent, getModel } from '../helpers';
|
||||
|
||||
const describeWithApi = describeIf('anthropic');
|
||||
|
||||
/**
|
||||
* Check if Docker is available synchronously. testcontainers requires a running
|
||||
* Docker daemon; skip the entire file in environments without it.
|
||||
*/
|
||||
function isDockerAvailable(): boolean {
|
||||
try {
|
||||
execSync('docker info', { stdio: 'ignore' });
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
const hasDocker = isDockerAvailable();
|
||||
|
||||
let container: StartedTestContainer;
|
||||
let connectionString: string;
|
||||
|
||||
beforeAll(async () => {
|
||||
if (!hasDocker) return;
|
||||
|
||||
container = await new GenericContainer('pgvector/pgvector:pg17')
|
||||
.withExposedPorts(5432)
|
||||
.withEnvironment({
|
||||
POSTGRES_USER: 'test',
|
||||
POSTGRES_PASSWORD: 'test',
|
||||
POSTGRES_DB: 'testdb',
|
||||
})
|
||||
// Postgres emits this message twice: once during initdb (temporary) and once when truly ready.
|
||||
// Waiting for the second occurrence ensures we don't connect during the brief restart window.
|
||||
.withWaitStrategy(Wait.forLogMessage('database system is ready to accept connections', 2))
|
||||
.start();
|
||||
|
||||
const host = container.getHost();
|
||||
const port = container.getMappedPort(5432);
|
||||
connectionString = `postgresql://test:test@${host}:${port}/testdb`;
|
||||
}, 60_000);
|
||||
|
||||
afterAll(async () => {
|
||||
try {
|
||||
if (container) await container.stop();
|
||||
} catch (error) {
|
||||
console.error('Error stopping container:', error);
|
||||
}
|
||||
}, 30_000);
|
||||
|
||||
/** describe that requires Docker — tests are no-ops without it. */
|
||||
function describeWithDocker(name: string, fn: () => void) {
|
||||
describe(name, () => {
|
||||
if (!hasDocker) {
|
||||
it('skipped — Docker not available', () => {});
|
||||
return;
|
||||
}
|
||||
fn();
|
||||
});
|
||||
}
|
||||
|
||||
describeWithDocker('PostgresMemory saveThread upsert', () => {
|
||||
it('preserves existing title and metadata when not provided', async () => {
|
||||
const mem = new PostgresMemory({ connection: connectionString, namespace: 'upsert_test' });
|
||||
|
||||
await mem.saveThread({
|
||||
id: 'upsert-t1',
|
||||
resourceId: 'user-1',
|
||||
title: 'Original Title',
|
||||
metadata: { key: 'value' },
|
||||
});
|
||||
|
||||
// Upsert without title or metadata (simulates saveMessagesToThread)
|
||||
await mem.saveThread({ id: 'upsert-t1', resourceId: 'user-1' });
|
||||
|
||||
const thread = await mem.getThread('upsert-t1');
|
||||
expect(thread).not.toBeNull();
|
||||
expect(thread!.title).toBe('Original Title');
|
||||
expect(thread!.metadata).toEqual({ key: 'value' });
|
||||
|
||||
await mem.close();
|
||||
});
|
||||
|
||||
it('overwrites title and metadata when explicitly provided', async () => {
|
||||
const mem = new PostgresMemory({ connection: connectionString, namespace: 'upsert_ow' });
|
||||
|
||||
await mem.saveThread({
|
||||
id: 'upsert-t2',
|
||||
resourceId: 'user-1',
|
||||
title: 'Old Title',
|
||||
metadata: { old: true },
|
||||
});
|
||||
|
||||
await mem.saveThread({
|
||||
id: 'upsert-t2',
|
||||
resourceId: 'user-1',
|
||||
title: 'New Title',
|
||||
metadata: { new: true },
|
||||
});
|
||||
|
||||
const thread = await mem.getThread('upsert-t2');
|
||||
expect(thread!.title).toBe('New Title');
|
||||
expect(thread!.metadata).toEqual({ new: true });
|
||||
|
||||
await mem.close();
|
||||
});
|
||||
});
|
||||
|
||||
describeWithDocker('PostgresMemory unit tests', () => {
|
||||
it('creates tables on first use and round-trips a thread', async () => {
|
||||
const mem = new PostgresMemory({ connection: connectionString });
|
||||
|
||||
const thread = await mem.saveThread({
|
||||
id: 'thread-1',
|
||||
resourceId: 'user-1',
|
||||
title: 'Test Thread',
|
||||
});
|
||||
|
||||
expect(thread.id).toBe('thread-1');
|
||||
expect(thread.createdAt).toBeInstanceOf(Date);
|
||||
|
||||
const loaded = await mem.getThread('thread-1');
|
||||
expect(loaded).not.toBeNull();
|
||||
expect(loaded!.title).toBe('Test Thread');
|
||||
expect(loaded!.resourceId).toBe('user-1');
|
||||
|
||||
await mem.close();
|
||||
});
|
||||
|
||||
it('saves and retrieves messages with limit', async () => {
|
||||
const mem = new PostgresMemory({ connection: connectionString, namespace: 'msg_test' });
|
||||
|
||||
await mem.saveThread({ id: 't1', resourceId: 'u1' });
|
||||
|
||||
const messages = [
|
||||
{ role: 'user' as const, content: [{ type: 'text' as const, text: 'Hello' }] },
|
||||
{ role: 'assistant' as const, content: [{ type: 'text' as const, text: 'Hi there' }] },
|
||||
{ role: 'user' as const, content: [{ type: 'text' as const, text: 'How are you?' }] },
|
||||
];
|
||||
|
||||
await mem.saveMessages({ threadId: 't1', messages });
|
||||
|
||||
// Get last 2 messages
|
||||
const last2 = await mem.getMessages('t1', { limit: 2 });
|
||||
expect(last2).toHaveLength(2);
|
||||
|
||||
// Get all messages
|
||||
const all = await mem.getMessages('t1');
|
||||
expect(all).toHaveLength(3);
|
||||
|
||||
await mem.close();
|
||||
});
|
||||
|
||||
it('saves and retrieves working memory keyed by resourceId', async () => {
|
||||
const mem = new PostgresMemory({ connection: connectionString, namespace: 'wm_test' });
|
||||
|
||||
expect(
|
||||
await mem.getWorkingMemory({ threadId: 'thread-1', resourceId: 'user-1', scope: 'resource' }),
|
||||
).toBeNull();
|
||||
|
||||
await mem.saveWorkingMemory(
|
||||
{ threadId: 'thread-1', resourceId: 'user-1', scope: 'resource' },
|
||||
'# Profile\n- Name: Alice',
|
||||
);
|
||||
expect(
|
||||
await mem.getWorkingMemory({ threadId: 'thread-1', resourceId: 'user-1', scope: 'resource' }),
|
||||
).toBe('# Profile\n- Name: Alice');
|
||||
|
||||
// Overwrite
|
||||
await mem.saveWorkingMemory(
|
||||
{ threadId: 'thread-1', resourceId: 'user-1', scope: 'resource' },
|
||||
'# Profile\n- Name: Alice\n- Role: Engineer',
|
||||
);
|
||||
expect(
|
||||
await mem.getWorkingMemory({ threadId: 'thread-1', resourceId: 'user-1', scope: 'resource' }),
|
||||
).toContain('Engineer');
|
||||
|
||||
await mem.close();
|
||||
});
|
||||
|
||||
it('saves and retrieves working memory keyed by threadId (no resourceId)', async () => {
|
||||
const mem = new PostgresMemory({ connection: connectionString, namespace: 'wm_thread_test' });
|
||||
|
||||
expect(
|
||||
await mem.getWorkingMemory({ threadId: 'thread-1', resourceId: 'user-1', scope: 'thread' }),
|
||||
).toBeNull();
|
||||
|
||||
await mem.saveWorkingMemory(
|
||||
{ threadId: 'thread-1', resourceId: 'user-1', scope: 'thread' },
|
||||
'thread context',
|
||||
);
|
||||
expect(
|
||||
await mem.getWorkingMemory({ threadId: 'thread-1', resourceId: 'user-1', scope: 'thread' }),
|
||||
).toBe('thread context');
|
||||
|
||||
await mem.close();
|
||||
});
|
||||
|
||||
it('isolates working memory by resourceId', async () => {
|
||||
const mem = new PostgresMemory({ connection: connectionString, namespace: 'wm_iso_test' });
|
||||
|
||||
await mem.saveWorkingMemory(
|
||||
{ threadId: 'thread-a', resourceId: 'user-a', scope: 'resource' },
|
||||
'data for user-a',
|
||||
);
|
||||
await mem.saveWorkingMemory(
|
||||
{ threadId: 'thread-b', resourceId: 'user-b', scope: 'resource' },
|
||||
'data for user-b',
|
||||
);
|
||||
|
||||
expect(
|
||||
await mem.getWorkingMemory({ threadId: 'thread-a', resourceId: 'user-a', scope: 'resource' }),
|
||||
).toBe('data for user-a');
|
||||
expect(
|
||||
await mem.getWorkingMemory({ threadId: 'thread-b', resourceId: 'user-b', scope: 'resource' }),
|
||||
).toBe('data for user-b');
|
||||
|
||||
await mem.close();
|
||||
});
|
||||
|
||||
it('stores scope=resource when resourceId is provided', async () => {
|
||||
const mem = new PostgresMemory({ connection: connectionString, namespace: 'wm_scope_test' });
|
||||
|
||||
await mem.saveWorkingMemory(
|
||||
{ threadId: 'thread-1', resourceId: 'res-1', scope: 'resource' },
|
||||
'resource content',
|
||||
);
|
||||
|
||||
const pool = new Pool({ connectionString });
|
||||
const result = await pool.query<{ scope: string }>(
|
||||
'SELECT scope FROM wm_scope_test_working_memory WHERE key = $1',
|
||||
['res-1'],
|
||||
);
|
||||
expect(result.rows[0].scope).toBe('resource');
|
||||
await pool.end();
|
||||
|
||||
await mem.close();
|
||||
});
|
||||
|
||||
it('stores scope=thread when only threadId is provided', async () => {
|
||||
const mem = new PostgresMemory({
|
||||
connection: connectionString,
|
||||
namespace: 'wm_scope_thread_test',
|
||||
});
|
||||
|
||||
await mem.saveWorkingMemory(
|
||||
{ threadId: 'thread-1', resourceId: 'user-1', scope: 'thread' },
|
||||
'thread content',
|
||||
);
|
||||
|
||||
const pool = new Pool({ connectionString });
|
||||
const result = await pool.query<{ scope: string }>(
|
||||
'SELECT scope FROM wm_scope_thread_test_working_memory WHERE key = $1',
|
||||
['thread-1'],
|
||||
);
|
||||
expect(result.rows[0].scope).toBe('thread');
|
||||
await pool.end();
|
||||
|
||||
await mem.close();
|
||||
});
|
||||
|
||||
it('does not mix resource-scoped and thread-scoped entries with the same key value', async () => {
|
||||
const mem = new PostgresMemory({
|
||||
connection: connectionString,
|
||||
namespace: 'wm_scope_iso_test',
|
||||
});
|
||||
const sharedKey = 'same-id';
|
||||
|
||||
await mem.saveWorkingMemory(
|
||||
{ threadId: 'thread-1', resourceId: sharedKey, scope: 'resource' },
|
||||
'resource data',
|
||||
);
|
||||
await mem.saveWorkingMemory(
|
||||
{ threadId: sharedKey, resourceId: sharedKey, scope: 'thread' },
|
||||
'thread data',
|
||||
);
|
||||
|
||||
expect(
|
||||
await mem.getWorkingMemory({
|
||||
threadId: 'thread-1',
|
||||
resourceId: sharedKey,
|
||||
scope: 'resource',
|
||||
}),
|
||||
).toBe('resource data');
|
||||
expect(
|
||||
await mem.getWorkingMemory({ threadId: sharedKey, resourceId: sharedKey, scope: 'thread' }),
|
||||
).toBe('thread data');
|
||||
|
||||
await mem.close();
|
||||
});
|
||||
|
||||
it('deletes thread and cascades to messages', async () => {
|
||||
const mem = new PostgresMemory({ connection: connectionString, namespace: 'del_test' });
|
||||
|
||||
await mem.saveThread({ id: 'del-t1', resourceId: 'u1' });
|
||||
await mem.saveMessages({
|
||||
threadId: 'del-t1',
|
||||
messages: [{ role: 'user' as const, content: [{ type: 'text' as const, text: 'test' }] }],
|
||||
});
|
||||
|
||||
await mem.deleteThread('del-t1');
|
||||
|
||||
expect(await mem.getThread('del-t1')).toBeNull();
|
||||
expect(await mem.getMessages('del-t1')).toHaveLength(0);
|
||||
|
||||
await mem.close();
|
||||
});
|
||||
|
||||
it('stores and queries embeddings with pgvector', async () => {
|
||||
const mem = new PostgresMemory({ connection: connectionString, namespace: 'vec_test' });
|
||||
|
||||
await mem.saveThread({ id: 'vec-t1', resourceId: 'u1' });
|
||||
|
||||
// Save some embeddings (3-dimensional for simplicity)
|
||||
await mem.saveEmbeddings({
|
||||
threadId: 'vec-t1',
|
||||
resourceId: 'u1',
|
||||
entries: [
|
||||
{ id: 'msg-1', vector: [1.0, 0.0, 0.0], text: 'About cats', model: 'test' },
|
||||
{ id: 'msg-2', vector: [0.0, 1.0, 0.0], text: 'About dogs', model: 'test' },
|
||||
{ id: 'msg-3', vector: [0.9, 0.1, 0.0], text: 'About kittens', model: 'test' },
|
||||
],
|
||||
});
|
||||
|
||||
// Query for vectors close to [1, 0, 0] — should return msg-1 and msg-3 first
|
||||
const results = await mem.queryEmbeddings({
|
||||
scope: 'resource',
|
||||
resourceId: 'u1',
|
||||
vector: [1.0, 0.0, 0.0],
|
||||
topK: 2,
|
||||
});
|
||||
|
||||
expect(results).toHaveLength(2);
|
||||
expect(results[0].id).toBe('msg-1');
|
||||
expect(results[0].score).toBeGreaterThan(0.9);
|
||||
// msg-3 should be second (cosine similarity ~0.99 with [0.9, 0.1, 0])
|
||||
expect(results[1].id).toBe('msg-3');
|
||||
|
||||
await mem.close();
|
||||
});
|
||||
|
||||
it('filters embeddings by resourceId with scope=resource (default)', async () => {
|
||||
const mem = new PostgresMemory({ connection: connectionString, namespace: 'vec_res' });
|
||||
|
||||
await mem.saveEmbeddings({
|
||||
threadId: 't1',
|
||||
resourceId: 'user-a',
|
||||
entries: [{ id: 'msg-a1', vector: [1.0, 0.0, 0.0], text: 'User A thread 1', model: 'test' }],
|
||||
});
|
||||
await mem.saveEmbeddings({
|
||||
threadId: 't2',
|
||||
resourceId: 'user-a',
|
||||
entries: [{ id: 'msg-a2', vector: [0.9, 0.1, 0.0], text: 'User A thread 2', model: 'test' }],
|
||||
});
|
||||
await mem.saveEmbeddings({
|
||||
threadId: 't3',
|
||||
resourceId: 'user-b',
|
||||
entries: [{ id: 'msg-b1', vector: [1.0, 0.0, 0.0], text: 'User B thread 3', model: 'test' }],
|
||||
});
|
||||
|
||||
// Default scope is 'resource' — should return both user-a embeddings across threads
|
||||
const results = await mem.queryEmbeddings({
|
||||
resourceId: 'user-a',
|
||||
vector: [1.0, 0.0, 0.0],
|
||||
topK: 10,
|
||||
});
|
||||
|
||||
expect(results).toHaveLength(2);
|
||||
const ids = results.map((r) => r.id);
|
||||
expect(ids).toContain('msg-a1');
|
||||
expect(ids).toContain('msg-a2');
|
||||
expect(ids).not.toContain('msg-b1');
|
||||
|
||||
await mem.close();
|
||||
});
|
||||
|
||||
it('filters embeddings by threadId with scope=thread', async () => {
|
||||
const mem = new PostgresMemory({ connection: connectionString, namespace: 'vec_thr' });
|
||||
|
||||
await mem.saveEmbeddings({
|
||||
threadId: 't1',
|
||||
resourceId: 'user-1',
|
||||
entries: [
|
||||
{ id: 'msg-t1a', vector: [1.0, 0.0, 0.0], text: 'Thread 1 A', model: 'test' },
|
||||
{ id: 'msg-t1b', vector: [0.0, 1.0, 0.0], text: 'Thread 1 B', model: 'test' },
|
||||
],
|
||||
});
|
||||
await mem.saveEmbeddings({
|
||||
threadId: 't2',
|
||||
resourceId: 'user-1',
|
||||
entries: [{ id: 'msg-t2', vector: [1.0, 0.0, 0.0], text: 'Thread 2', model: 'test' }],
|
||||
});
|
||||
|
||||
const results = await mem.queryEmbeddings({
|
||||
scope: 'thread',
|
||||
threadId: 't1',
|
||||
vector: [1.0, 0.0, 0.0],
|
||||
topK: 10,
|
||||
});
|
||||
|
||||
expect(results).toHaveLength(2);
|
||||
const ids = results.map((r) => r.id);
|
||||
expect(ids).toContain('msg-t1a');
|
||||
expect(ids).toContain('msg-t1b');
|
||||
expect(ids).not.toContain('msg-t2');
|
||||
|
||||
await mem.close();
|
||||
});
|
||||
|
||||
it('resource scope excludes embeddings from other resources', async () => {
|
||||
const mem = new PostgresMemory({ connection: connectionString, namespace: 'vec_iso' });
|
||||
|
||||
await mem.saveEmbeddings({
|
||||
threadId: 't1',
|
||||
resourceId: 'res-1',
|
||||
entries: [{ id: 'msg-r1', vector: [1.0, 0.0, 0.0], text: 'Resource 1', model: 'test' }],
|
||||
});
|
||||
await mem.saveEmbeddings({
|
||||
threadId: 't2',
|
||||
resourceId: 'res-2',
|
||||
entries: [{ id: 'msg-r2', vector: [1.0, 0.0, 0.0], text: 'Resource 2', model: 'test' }],
|
||||
});
|
||||
|
||||
const results = await mem.queryEmbeddings({
|
||||
scope: 'resource',
|
||||
resourceId: 'res-1',
|
||||
vector: [1.0, 0.0, 0.0],
|
||||
topK: 10,
|
||||
});
|
||||
|
||||
expect(results).toHaveLength(1);
|
||||
expect(results[0].id).toBe('msg-r1');
|
||||
|
||||
await mem.close();
|
||||
});
|
||||
|
||||
it('stores resourceId in the embeddings table', async () => {
|
||||
const mem = new PostgresMemory({ connection: connectionString, namespace: 'vec_col' });
|
||||
|
||||
await mem.saveEmbeddings({
|
||||
threadId: 't1',
|
||||
resourceId: 'my-resource',
|
||||
entries: [
|
||||
{ id: 'msg-check', vector: [1.0, 0.0, 0.0], text: 'Check resourceId', model: 'test' },
|
||||
],
|
||||
});
|
||||
|
||||
const pool = new Pool({ connectionString });
|
||||
const result = await pool.query<{ resourceId: string }>(
|
||||
'SELECT "resourceId" FROM vec_col_message_embeddings WHERE id = $1',
|
||||
['msg-check'],
|
||||
);
|
||||
expect(result.rows[0].resourceId).toBe('my-resource');
|
||||
await pool.end();
|
||||
|
||||
await mem.close();
|
||||
});
|
||||
|
||||
it('isolates namespaces', async () => {
|
||||
const mem1 = new PostgresMemory({ connection: connectionString, namespace: 'ns_a' });
|
||||
const mem2 = new PostgresMemory({ connection: connectionString, namespace: 'ns_b' });
|
||||
|
||||
await mem1.saveThread({ id: 'shared-id', resourceId: 'u1', title: 'From A' });
|
||||
await mem2.saveThread({ id: 'shared-id', resourceId: 'u1', title: 'From B' });
|
||||
|
||||
expect((await mem1.getThread('shared-id'))!.title).toBe('From A');
|
||||
expect((await mem2.getThread('shared-id'))!.title).toBe('From B');
|
||||
|
||||
await mem1.close();
|
||||
await mem2.close();
|
||||
});
|
||||
});
|
||||
|
||||
/** describe that requires both Docker and an Anthropic API key. */
|
||||
function describeWithDockerAndApi(name: string, fn: () => void) {
|
||||
const describeOrSkip = describeWithApi;
|
||||
describeOrSkip(name, () => {
|
||||
if (!hasDocker) {
|
||||
it('skipped — Docker not available', () => {});
|
||||
return;
|
||||
}
|
||||
fn();
|
||||
});
|
||||
}
|
||||
|
||||
describeWithDockerAndApi('PostgresMemory agent integration', () => {
|
||||
it('recalls previous messages across turns', async () => {
|
||||
const store = new PostgresMemory({ connection: connectionString, namespace: 'agent_recall' });
|
||||
const memory = new Memory().storage(store).lastMessages(10);
|
||||
|
||||
const agent = new Agent('pg-recall-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('You are a helpful assistant. Be concise.')
|
||||
.memory(memory);
|
||||
|
||||
const threadId = `pg-thread-${Date.now()}`;
|
||||
const options = { persistence: { threadId, resourceId: 'pg-user-1' } };
|
||||
|
||||
await agent.generate('My favorite planet is Saturn. Just acknowledge.', options);
|
||||
const result = await agent.generate('What is my favorite planet?', options);
|
||||
|
||||
expect(findLastTextContent(result.messages)?.toLowerCase()).toContain('saturn');
|
||||
|
||||
await store.close();
|
||||
});
|
||||
|
||||
it('persists resource-scoped working memory via Postgres backend', async () => {
|
||||
const store = new PostgresMemory({ connection: connectionString, namespace: 'agent_wm' });
|
||||
const memory = new Memory()
|
||||
.storage(store)
|
||||
.lastMessages(10)
|
||||
.scope('resource')
|
||||
.freeform('# User Profile\n- **Name**:\n- **Hobby**:');
|
||||
|
||||
const agent = new Agent('pg-wm-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('You are a helpful assistant. Be concise. Always update your working memory.')
|
||||
.memory(memory);
|
||||
|
||||
const threadId = `pg-wm-${Date.now()}`;
|
||||
const resourceId = 'pg-wm-user';
|
||||
|
||||
await agent.generate('My name is Hiro and I enjoy cycling.', {
|
||||
persistence: { threadId, resourceId },
|
||||
});
|
||||
|
||||
// Working memory should be persisted in Postgres (keyed by resourceId)
|
||||
const wm = await store.getWorkingMemory({ threadId, resourceId, scope: 'resource' });
|
||||
expect(wm).toBeDefined();
|
||||
expect(wm!.toLowerCase()).toContain('hiro');
|
||||
|
||||
// New thread, same resourceId — resource-scoped working memory carries over
|
||||
const result = await agent.generate('What is my name?', {
|
||||
persistence: { threadId: `pg-wm2-${Date.now()}`, resourceId },
|
||||
});
|
||||
expect(findLastTextContent(result.messages)?.toLowerCase()).toContain('hiro');
|
||||
|
||||
await store.close();
|
||||
});
|
||||
|
||||
it('persists thread-scoped working memory via Postgres backend', async () => {
|
||||
const store = new PostgresMemory({
|
||||
connection: connectionString,
|
||||
namespace: 'agent_thread_wm',
|
||||
});
|
||||
const memory = new Memory()
|
||||
.storage(store)
|
||||
.lastMessages(10)
|
||||
.scope('thread')
|
||||
.freeform('# Conversation Notes\n- **Topic**:\n- **Key facts**:');
|
||||
|
||||
const agent = new Agent('pg-thread-wm-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('You are a helpful assistant. Be concise. Always update your working memory.')
|
||||
.memory(memory);
|
||||
|
||||
const threadId = `pg-twm-${Date.now()}`;
|
||||
const resourceId = 'pg-twm-user';
|
||||
|
||||
await agent.generate('The secret project name is HELIOS. Just acknowledge.', {
|
||||
persistence: { threadId, resourceId },
|
||||
});
|
||||
|
||||
// Working memory should be stored keyed by threadId
|
||||
const wmByThread = await store.getWorkingMemory({ threadId, resourceId, scope: 'thread' });
|
||||
expect(wmByThread).toBeDefined();
|
||||
expect(wmByThread!.toLowerCase()).toContain('helios');
|
||||
|
||||
// resourceId key should be empty — nothing stored there
|
||||
const wmByResource = await store.getWorkingMemory({ threadId, resourceId, scope: 'resource' });
|
||||
expect(wmByResource).toBeNull();
|
||||
|
||||
// New thread for same resource — should NOT carry over thread-scoped working memory
|
||||
const thread2 = `pg-twm2-${Date.now()}`;
|
||||
const result = await agent.generate(
|
||||
'What is the project name? Answer "unknown" if you have no information.',
|
||||
{ persistence: { threadId: thread2, resourceId } },
|
||||
);
|
||||
expect(findLastTextContent(result.messages)?.toLowerCase()).not.toContain('helios');
|
||||
|
||||
await store.close();
|
||||
});
|
||||
|
||||
it('works with stream() path', async () => {
|
||||
const store = new PostgresMemory({ connection: connectionString, namespace: 'agent_stream' });
|
||||
const memory = new Memory().storage(store).lastMessages(10);
|
||||
|
||||
const agent = new Agent('pg-stream-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('You are a helpful assistant. Be concise.')
|
||||
.memory(memory);
|
||||
|
||||
const threadId = `pg-stream-${Date.now()}`;
|
||||
const options = { persistence: { threadId, resourceId: 'pg-stream-user' } };
|
||||
|
||||
// Turn 1 via stream
|
||||
const { stream } = await agent.stream(
|
||||
'The speed of light is approximately 300,000 km/s. Acknowledge.',
|
||||
options,
|
||||
);
|
||||
const reader = stream.getReader();
|
||||
while (true) {
|
||||
const { done } = await reader.read();
|
||||
if (done) break;
|
||||
}
|
||||
|
||||
// Turn 2 via generate — should recall
|
||||
const genResult = await agent.generate('What is the speed of light approximately?', options);
|
||||
const text = findLastTextContent(genResult.messages);
|
||||
expect(text).toBeTruthy();
|
||||
expect(text!.toLowerCase()).toContain('300');
|
||||
|
||||
await store.close();
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,94 @@
|
|||
import { expect, it, afterEach, describe as _describe } from 'vitest';
|
||||
|
||||
import { Agent, Memory } from '../../../index';
|
||||
import { findLastTextContent, getModel, createSqliteMemory } from '../helpers';
|
||||
|
||||
// Only run when both API keys are present
|
||||
const describe =
|
||||
process.env.ANTHROPIC_API_KEY && process.env.OPENAI_API_KEY ? _describe : _describe.skip;
|
||||
|
||||
const cleanups: Array<() => void> = [];
|
||||
afterEach(() => {
|
||||
cleanups.forEach((fn) => fn());
|
||||
cleanups.length = 0;
|
||||
});
|
||||
|
||||
describe('semantic recall', () => {
|
||||
it('recalls relevant info beyond the lastMessages window', async () => {
|
||||
const { memory, cleanup } = createSqliteMemory();
|
||||
cleanups.push(cleanup);
|
||||
|
||||
const mem = new Memory()
|
||||
.storage(memory)
|
||||
.lastMessages(3)
|
||||
.semanticRecall({ topK: 3, embedder: 'openai/text-embedding-3-small' });
|
||||
|
||||
const agent = new Agent('semantic-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('You are a helpful assistant. Be concise. Answer from your context.')
|
||||
.memory(mem);
|
||||
|
||||
const threadId = `semantic-${Date.now()}`;
|
||||
const resourceId = 'test-user';
|
||||
const options = { persistence: { threadId, resourceId } };
|
||||
|
||||
// Turn 1: unique fact that will be pushed out of the 3-message window
|
||||
await agent.generate(
|
||||
'The annual rainfall in Timbuktu is approximately 200mm. Just acknowledge.',
|
||||
options,
|
||||
);
|
||||
|
||||
// Filler turns to push turn 1 out of the lastMessages window
|
||||
await agent.generate('What is 2 + 2?', options);
|
||||
await agent.generate('Tell me a one-word synonym for happy.', options);
|
||||
await agent.generate('What color is the sky?', options);
|
||||
|
||||
// Ask about the fact from turn 1 — should be recalled via semantic search
|
||||
const result = await agent.generate('What is the annual rainfall in Timbuktu?', options);
|
||||
|
||||
expect(findLastTextContent(result.messages)?.toLowerCase()).toContain('200');
|
||||
});
|
||||
|
||||
it('works combined with freeform working memory', async () => {
|
||||
const { memory, cleanup } = createSqliteMemory();
|
||||
cleanups.push(cleanup);
|
||||
|
||||
const template = '# User Context\n- **Name**:\n- **Interest**:';
|
||||
|
||||
const mem = new Memory()
|
||||
.storage(memory)
|
||||
.lastMessages(3)
|
||||
.freeform(template)
|
||||
.semanticRecall({ topK: 3, embedder: 'openai/text-embedding-3-small' });
|
||||
|
||||
const agent = new Agent('combined-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('You are a helpful assistant. Be concise.')
|
||||
.memory(mem);
|
||||
|
||||
const threadId = `combined-${Date.now()}`;
|
||||
const resourceId = `user-${Date.now()}`;
|
||||
const options = { persistence: { threadId, resourceId } };
|
||||
|
||||
// Turn 1: name (working memory) + unique fact (semantic recall)
|
||||
await agent.generate(
|
||||
'My name is Frank. Also, the capital of Bhutan is Thimphu. Just acknowledge both.',
|
||||
options,
|
||||
);
|
||||
|
||||
// Filler turns
|
||||
await agent.generate('What is 3 + 3?', options);
|
||||
await agent.generate('Name a primary color.', options);
|
||||
await agent.generate('What day comes after Monday?', options);
|
||||
|
||||
// Ask about both — name from working memory, fact from semantic recall
|
||||
const result = await agent.generate(
|
||||
'What is my name, and what is the capital of Bhutan?',
|
||||
options,
|
||||
);
|
||||
|
||||
const text = findLastTextContent(result.messages)?.toLowerCase() ?? '';
|
||||
expect(text).toContain('frank');
|
||||
expect(text).toContain('thimphu');
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,105 @@
|
|||
import { describe as _describe, expect, it, afterEach } from 'vitest';
|
||||
|
||||
import { Agent, Memory } from '../../../index';
|
||||
import { SqliteMemory } from '../../../storage/sqlite-memory';
|
||||
import { describeIf, findLastTextContent, getModel, createSqliteMemory } from '../helpers';
|
||||
|
||||
const describe = describeIf('anthropic');
|
||||
|
||||
const cleanups: Array<() => void> = [];
|
||||
afterEach(() => {
|
||||
cleanups.forEach((fn) => fn());
|
||||
cleanups.length = 0;
|
||||
});
|
||||
|
||||
_describe('SqliteMemory saveThread upsert', () => {
|
||||
it('preserves existing title and metadata when not provided', async () => {
|
||||
const { memory, cleanup } = createSqliteMemory();
|
||||
cleanups.push(cleanup);
|
||||
|
||||
await memory.saveThread({
|
||||
id: 'upsert-t1',
|
||||
resourceId: 'user-1',
|
||||
title: 'Original Title',
|
||||
metadata: { key: 'value' },
|
||||
});
|
||||
|
||||
// Upsert without title or metadata (simulates saveMessagesToThread)
|
||||
await memory.saveThread({ id: 'upsert-t1', resourceId: 'user-1' });
|
||||
|
||||
const thread = await memory.getThread('upsert-t1');
|
||||
expect(thread).not.toBeNull();
|
||||
expect(thread!.title).toBe('Original Title');
|
||||
expect(thread!.metadata).toEqual({ key: 'value' });
|
||||
});
|
||||
|
||||
it('overwrites title and metadata when explicitly provided', async () => {
|
||||
const { memory, cleanup } = createSqliteMemory();
|
||||
cleanups.push(cleanup);
|
||||
|
||||
await memory.saveThread({
|
||||
id: 'upsert-t2',
|
||||
resourceId: 'user-1',
|
||||
title: 'Old Title',
|
||||
metadata: { old: true },
|
||||
});
|
||||
|
||||
await memory.saveThread({
|
||||
id: 'upsert-t2',
|
||||
resourceId: 'user-1',
|
||||
title: 'New Title',
|
||||
metadata: { new: true },
|
||||
});
|
||||
|
||||
const thread = await memory.getThread('upsert-t2');
|
||||
expect(thread!.title).toBe('New Title');
|
||||
expect(thread!.metadata).toEqual({ new: true });
|
||||
});
|
||||
});
|
||||
|
||||
describe('SQLite memory integration', () => {
|
||||
it('agent recalls info from previous turn with SqliteMemory', async () => {
|
||||
const { memory, cleanup } = createSqliteMemory();
|
||||
cleanups.push(cleanup);
|
||||
|
||||
const mem = new Memory().storage(memory).lastMessages(10);
|
||||
const agent = new Agent('sqlite-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('You are a helpful assistant. Be concise.')
|
||||
.memory(mem);
|
||||
|
||||
const threadId = `sqlite-${Date.now()}`;
|
||||
const options = { persistence: { threadId, resourceId: 'test-user' } };
|
||||
|
||||
await agent.generate('My favorite number is 42. Just acknowledge.', options);
|
||||
const result = await agent.generate('What is my favorite number?', options);
|
||||
|
||||
expect(findLastTextContent(result.messages)?.toLowerCase()).toContain('42');
|
||||
});
|
||||
|
||||
it('data survives a fresh SqliteMemory instance', async () => {
|
||||
const { memory, cleanup, url } = createSqliteMemory();
|
||||
cleanups.push(cleanup);
|
||||
|
||||
const mem1 = new Memory().storage(memory).lastMessages(10);
|
||||
const agent1 = new Agent('persist-test-1')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('You are a helpful assistant. Be concise.')
|
||||
.memory(mem1);
|
||||
|
||||
const threadId = `persist-${Date.now()}`;
|
||||
const options = { persistence: { threadId, resourceId: 'test-user' } };
|
||||
await agent1.generate('My favorite animal is a dolphin. Just acknowledge.', options);
|
||||
|
||||
// New SqliteMemory instance, same file
|
||||
const memory2 = new SqliteMemory({ url });
|
||||
const mem2 = new Memory().storage(memory2).lastMessages(10);
|
||||
const agent2 = new Agent('persist-test-2')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('You are a helpful assistant. Be concise.')
|
||||
.memory(mem2);
|
||||
|
||||
const result = await agent2.generate('What is my favorite animal?', options);
|
||||
expect(findLastTextContent(result.messages)?.toLowerCase()).toContain('dolphin');
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
import { expect, it } from 'vitest';
|
||||
import { z } from 'zod';
|
||||
|
||||
import { Agent, Memory } from '../../../index';
|
||||
import { describeIf, findLastTextContent, getModel } from '../helpers';
|
||||
|
||||
const describe = describeIf('anthropic');
|
||||
|
||||
describe('structured working memory', () => {
|
||||
const schema = z.object({
|
||||
userName: z.string().optional().describe("The user's name"),
|
||||
favoriteColor: z.string().optional().describe('Favorite color'),
|
||||
location: z.string().optional().describe('Where the user lives'),
|
||||
});
|
||||
|
||||
it('agent fills structured fields across turns', async () => {
|
||||
const memory = new Memory().storage('memory').lastMessages(10).structured(schema);
|
||||
|
||||
const agent = new Agent('structured-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('You are a helpful assistant. Be concise.')
|
||||
.memory(memory);
|
||||
|
||||
const threadId = `structured-${Date.now()}`;
|
||||
const options = { persistence: { threadId, resourceId: 'test-user' } };
|
||||
|
||||
await agent.generate('My name is Eve and I love purple.', options);
|
||||
const result = await agent.generate('What is my name and favorite color?', options);
|
||||
|
||||
const text = findLastTextContent(result.messages)?.toLowerCase() ?? '';
|
||||
expect(text).toContain('eve');
|
||||
expect(text).toContain('purple');
|
||||
});
|
||||
|
||||
it('throws when both .structured() and .freeform() are used', () => {
|
||||
expect(() => {
|
||||
new Memory().storage('memory').structured(schema).freeform('# Template').build();
|
||||
}).toThrow(/cannot use both/i);
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,148 @@
|
|||
import { expect, it } from 'vitest';
|
||||
import { z } from 'zod';
|
||||
|
||||
import { Agent, Memory, Tool } from '../../../index';
|
||||
import { describeIf, findLastTextContent, findLastToolCallContent, getModel } from '../helpers';
|
||||
|
||||
const describe = describeIf('anthropic');
|
||||
|
||||
describe('memory integration', () => {
|
||||
it('recalls previous messages within the same thread', async () => {
|
||||
const memory = new Memory().storage('memory').lastMessages(10);
|
||||
|
||||
const agent = new Agent('memory-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('You are a helpful assistant. Be concise.')
|
||||
.memory(memory);
|
||||
|
||||
const threadId = `test-thread-${Date.now()}`;
|
||||
const options = { persistence: { threadId, resourceId: 'test-user' } };
|
||||
|
||||
const result1 = await agent.generate(
|
||||
'My favorite color is purple. Just acknowledge this.',
|
||||
options,
|
||||
);
|
||||
expect(findLastTextContent(result1.messages)).toBeTruthy();
|
||||
|
||||
const result2 = await agent.generate('What is my favorite color?', options);
|
||||
|
||||
expect(findLastTextContent(result2.messages)?.toLowerCase()).toContain('purple');
|
||||
});
|
||||
|
||||
it('isolates separate threads', async () => {
|
||||
const memory = new Memory().storage('memory').lastMessages(10);
|
||||
|
||||
const agent = new Agent('thread-isolation-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions(
|
||||
'You are a helpful assistant. Be concise. If you do not know something, say "I don\'t know".',
|
||||
)
|
||||
.memory(memory);
|
||||
|
||||
const thread1 = `test-thread-1-${Date.now()}`;
|
||||
const thread2 = `test-thread-2-${Date.now()}`;
|
||||
|
||||
await agent.generate('Remember this secret code: ALPHA-7. Just acknowledge.', {
|
||||
persistence: { threadId: thread1, resourceId: 'test-user' },
|
||||
});
|
||||
|
||||
const result2 = await agent.generate('What is the secret code I told you?', {
|
||||
persistence: { threadId: thread2, resourceId: 'test-user' },
|
||||
});
|
||||
|
||||
expect(findLastTextContent(result2.messages)?.toLowerCase()).not.toContain('alpha-7');
|
||||
});
|
||||
|
||||
it('recalls tool results with generate()', async () => {
|
||||
const memory = new Memory().storage('memory').lastMessages(20);
|
||||
|
||||
const lookupTool = new Tool('lookup_inventory')
|
||||
.description('Look up the current inventory count for a product')
|
||||
.input(
|
||||
z.object({
|
||||
product: z.string().describe('Product name'),
|
||||
}),
|
||||
)
|
||||
.handler(async ({ product }) => ({
|
||||
product,
|
||||
count: 42,
|
||||
warehouse: 'Building-7',
|
||||
}));
|
||||
|
||||
const agent = new Agent('store-results-run-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions(
|
||||
'You are an inventory assistant. Use the lookup_inventory tool when asked about stock. Be concise.',
|
||||
)
|
||||
.tool(lookupTool)
|
||||
.memory(memory);
|
||||
|
||||
const threadId = `test-store-results-run-${Date.now()}`;
|
||||
const options = { persistence: { threadId, resourceId: 'test-user' } };
|
||||
|
||||
// Turn 1: trigger the tool via generate()
|
||||
const result1 = await agent.generate('How many widgets do we have in stock?', options);
|
||||
expect(findLastTextContent(result1.messages)).toBeTruthy();
|
||||
expect(findLastToolCallContent(result1.messages)).toBeTruthy();
|
||||
|
||||
// Turn 2: ask about the tool result without re-triggering the tool
|
||||
const result2 = await agent.generate(
|
||||
'Which warehouse are the widgets stored in? Do NOT call any tools — answer from what you already know.',
|
||||
options,
|
||||
);
|
||||
|
||||
expect(findLastTextContent(result2.messages)?.toLowerCase()).toContain('building-7');
|
||||
expect(findLastToolCallContent(result2.messages)).toBeUndefined();
|
||||
});
|
||||
|
||||
it('recalls tool results with stream()', async () => {
|
||||
const memory = new Memory().storage('memory').lastMessages(20);
|
||||
|
||||
const lookupTool = new Tool('lookup_inventory')
|
||||
.description('Look up the current inventory count for a product')
|
||||
.input(
|
||||
z.object({
|
||||
product: z.string().describe('Product name'),
|
||||
}),
|
||||
)
|
||||
.handler(async ({ product }) => ({
|
||||
product,
|
||||
count: 42,
|
||||
warehouse: 'Building-7',
|
||||
}));
|
||||
|
||||
const agent = new Agent('store-results-stream-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions(
|
||||
'You are an inventory assistant. Use the lookup_inventory tool when asked about stock. Be concise.',
|
||||
)
|
||||
.tool(lookupTool)
|
||||
.memory(memory);
|
||||
|
||||
const threadId = `test-store-results-stream-${Date.now()}`;
|
||||
const options = { persistence: { threadId, resourceId: 'test-user' } };
|
||||
|
||||
// Turn 1: trigger the tool via stream()
|
||||
const { stream: stream1 } = await agent.stream(
|
||||
'How many widgets do we have in stock?',
|
||||
options,
|
||||
);
|
||||
// Must consume the stream AND call getResult() to trigger saveToolResultsToMemory
|
||||
const reader = stream1.getReader();
|
||||
while (true) {
|
||||
const { done } = await reader.read();
|
||||
if (done) break;
|
||||
}
|
||||
const result1 = await agent.generate('How many widgets do we have in stock?', options);
|
||||
expect(findLastToolCallContent(result1.messages)).toBeTruthy();
|
||||
|
||||
// Turn 2: ask about the tool result
|
||||
const result2 = await agent.generate(
|
||||
'Which warehouse are the widgets stored in? Do NOT call any tools — answer from what you already know.',
|
||||
options,
|
||||
);
|
||||
|
||||
expect(findLastTextContent(result2.messages)?.toLowerCase()).toContain('building-7');
|
||||
expect(findLastToolCallContent(result2.messages)).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,141 @@
|
|||
import { expect, it } from 'vitest';
|
||||
import { z } from 'zod';
|
||||
|
||||
import {
|
||||
describeIf,
|
||||
collectStreamChunks,
|
||||
getModel,
|
||||
chunksOfType,
|
||||
findAllToolResults,
|
||||
collectTextDeltas,
|
||||
} from './helpers';
|
||||
import { Agent, Tool } from '../../index';
|
||||
|
||||
const describe = describeIf('anthropic');
|
||||
|
||||
describe('multi-tool-calls integration', () => {
|
||||
it('correctly merges results when the same tool is called multiple times', async () => {
|
||||
let callCount = 0;
|
||||
|
||||
const lookupTool = new Tool('lookup_price')
|
||||
.description('Look up the price of a product by name')
|
||||
.input(z.object({ product: z.string().describe('Product name') }))
|
||||
.output(z.object({ product: z.string(), price: z.number() }))
|
||||
.handler(async ({ product }) => {
|
||||
callCount++;
|
||||
const prices: Record<string, number> = {
|
||||
apple: 1.5,
|
||||
banana: 0.75,
|
||||
cherry: 3.0,
|
||||
};
|
||||
return { product, price: prices[product.toLowerCase()] ?? 0 };
|
||||
});
|
||||
|
||||
const agent = new Agent('multi-call-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions(
|
||||
'You are a price checker. When asked about prices, use the lookup_price tool for EACH product separately. Be concise.',
|
||||
)
|
||||
.tool(lookupTool);
|
||||
|
||||
const { stream: fullStream } = await agent.stream(
|
||||
'What are the prices of apple, banana, and cherry? Look up each one.',
|
||||
);
|
||||
|
||||
const chunks = await collectStreamChunks(fullStream);
|
||||
const messageChunks = chunksOfType(chunks, 'message');
|
||||
const toolCallResults = findAllToolResults(messageChunks.map((c) => c.message));
|
||||
|
||||
// Should have called the tool multiple times
|
||||
const priceCalls = toolCallResults.filter((tc) => tc.toolName === 'lookup_price');
|
||||
expect(priceCalls.length).toBeGreaterThanOrEqual(2);
|
||||
|
||||
// Each call should have its own correct output (not all pointing to the first result)
|
||||
const outputs = priceCalls.map((tc) => tc.result as { product: string; price: number });
|
||||
|
||||
// Verify that different products got different prices (index-based merging works)
|
||||
const uniquePrices = new Set(outputs.map((o) => o.price));
|
||||
expect(uniquePrices.size).toBeGreaterThanOrEqual(2);
|
||||
|
||||
// The response should mention the prices
|
||||
const text = collectTextDeltas(chunks);
|
||||
expect(text).toBeTruthy();
|
||||
expect(text).toMatch(/apple/i);
|
||||
expect(text).toMatch(/banana/i);
|
||||
expect(text).toMatch(/cherry/i);
|
||||
expect(text).toMatch(/1\.5/i);
|
||||
expect(text).toMatch(/0\.75/i);
|
||||
expect(text).toMatch(/3\.0/i);
|
||||
});
|
||||
|
||||
it('correctly merges results when different tools are called in sequence', async () => {
|
||||
const addTool = new Tool('add')
|
||||
.description('Add two numbers')
|
||||
.input(z.object({ a: z.number(), b: z.number() }))
|
||||
.handler(async ({ a, b }) => ({ result: a + b }));
|
||||
|
||||
const multiplyTool = new Tool('multiply')
|
||||
.description('Multiply two numbers')
|
||||
.input(z.object({ a: z.number(), b: z.number() }))
|
||||
.handler(async ({ a, b }) => ({ result: a * b }));
|
||||
|
||||
const agent = new Agent('mixed-tools-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions(
|
||||
'You are a calculator. Use the add tool for addition and multiply tool for multiplication. Be concise.',
|
||||
)
|
||||
.tool(addTool)
|
||||
.tool(multiplyTool);
|
||||
|
||||
const { stream: fullStream } = await agent.stream('What is 3 + 4 and also what is 5 * 6?');
|
||||
|
||||
const chunks = await collectStreamChunks(fullStream);
|
||||
const messageChunks = chunksOfType(chunks, 'message');
|
||||
const toolCallResults = findAllToolResults(messageChunks.map((c) => c.message));
|
||||
|
||||
const toolCalls = toolCallResults.filter(
|
||||
(tc) => tc.toolName === 'add' || tc.toolName === 'multiply',
|
||||
);
|
||||
expect(toolCalls.length).toBeGreaterThanOrEqual(2);
|
||||
|
||||
const addCall = toolCallResults.find((tc) => tc.toolName === 'add');
|
||||
const multiplyCall = toolCallResults.find((tc) => tc.toolName === 'multiply');
|
||||
|
||||
expect(addCall).toBeDefined();
|
||||
expect(multiplyCall).toBeDefined();
|
||||
|
||||
expect((addCall!.result as { result: number }).result).toBe(7);
|
||||
expect((multiplyCall!.result as { result: number }).result).toBe(30);
|
||||
});
|
||||
|
||||
it('correctly merges results via the run() path', async () => {
|
||||
const lookupTool = new Tool('get_length')
|
||||
.description('Get the length of a string')
|
||||
.input(z.object({ text: z.string() }))
|
||||
.output(z.object({ text: z.string(), length: z.number() }))
|
||||
.handler(async ({ text }) => ({ text, length: text.length }));
|
||||
|
||||
const agent = new Agent('multi-call-run-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions(
|
||||
'You are a string utility. When asked about string lengths, use the get_length tool for EACH string separately. Be concise.',
|
||||
)
|
||||
.tool(lookupTool);
|
||||
|
||||
const { stream: fullStream } = await agent.stream(
|
||||
'What are the lengths of "hello" and "world"? Look up each one separately.',
|
||||
);
|
||||
const chunks = await collectStreamChunks(fullStream);
|
||||
const messageChunks = chunksOfType(chunks, 'message');
|
||||
const toolCallResults = findAllToolResults(messageChunks.map((c) => c.message));
|
||||
|
||||
const lengthCalls = toolCallResults.filter((tc) => tc.toolName === 'get_length');
|
||||
expect(lengthCalls.length).toBeGreaterThanOrEqual(2);
|
||||
|
||||
// Each should have correct output
|
||||
for (const call of lengthCalls) {
|
||||
const output = call.result as { text: string; length: number };
|
||||
expect(output.length).toBe(output.text.length);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,126 @@
|
|||
import { expect, it } from 'vitest';
|
||||
|
||||
import {
|
||||
describeIf,
|
||||
collectStreamChunks,
|
||||
chunksOfType,
|
||||
getModel,
|
||||
findLastTextContent,
|
||||
} from './helpers';
|
||||
import { Agent } from '../../index';
|
||||
import type { Message, StreamChunk } from '../../index';
|
||||
|
||||
const describe = describeIf('anthropic');
|
||||
|
||||
/** Convert a base64 string to Uint8Array for the AI SDK file part. */
|
||||
function base64ToUint8Array(base64: string): Uint8Array {
|
||||
return Uint8Array.from(Buffer.from(base64, 'base64'));
|
||||
}
|
||||
|
||||
// Valid 1×1 red PNG pixel
|
||||
const RED_PIXEL_BASE64 =
|
||||
'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAIAAACQd1PeAAAADElEQVR4nGP4z8AAAAMBAQDJ/pLvAAAAAElFTkSuQmCC';
|
||||
// Valid 1×1 blue PNG pixel
|
||||
const BLUE_PIXEL_BASE64 =
|
||||
'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAIAAACQd1PeAAAADElEQVR4nGNgYPgPAAEDAQAIicLsAAAAAElFTkSuQmCC';
|
||||
|
||||
describe('multimodal integration', () => {
|
||||
it('accepts an image via binary data and references it in the response', async () => {
|
||||
const messages: Message[] = [
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{
|
||||
type: 'file',
|
||||
mediaType: 'image/png',
|
||||
data: base64ToUint8Array(RED_PIXEL_BASE64),
|
||||
},
|
||||
{
|
||||
type: 'text',
|
||||
text: 'What color is this image? Reply with just the color name, nothing else.',
|
||||
},
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
const agent = new Agent('vision-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('You are a vision assistant. Describe images concisely.');
|
||||
|
||||
const { stream: fullStream } = await agent.stream(messages);
|
||||
|
||||
const chunks = await collectStreamChunks(fullStream);
|
||||
const textChunks = chunksOfType(chunks, 'text-delta') as Array<
|
||||
StreamChunk & { type: 'text-delta' }
|
||||
>;
|
||||
expect(textChunks.length).toBeGreaterThan(0);
|
||||
|
||||
const text = textChunks.map((c) => c.delta).join('');
|
||||
expect(text).toBeTruthy();
|
||||
expect(text).toMatch(/red/i);
|
||||
});
|
||||
|
||||
it('accepts multiple content blocks (text + image) in a single message', async () => {
|
||||
const messages: Message[] = [
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{ type: 'text', text: 'I have two questions.' },
|
||||
{
|
||||
type: 'file',
|
||||
mediaType: 'image/png',
|
||||
data: base64ToUint8Array(BLUE_PIXEL_BASE64),
|
||||
},
|
||||
{
|
||||
type: 'text',
|
||||
text: 'Question 1: Can you see an image above? Answer only YES or NO. Question 2: What is 2+2? Answer both briefly.',
|
||||
},
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
const agent = new Agent('multi-content-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('You are a helpful assistant with vision capabilities. Be concise.');
|
||||
|
||||
const { stream: fullStream } = await agent.stream(messages);
|
||||
const chunks = await collectStreamChunks(fullStream);
|
||||
const textChunks = chunksOfType(chunks, 'text-delta') as Array<
|
||||
StreamChunk & { type: 'text-delta' }
|
||||
>;
|
||||
expect(textChunks.length).toBeGreaterThan(0);
|
||||
|
||||
const text = textChunks.map((c) => c.delta).join('');
|
||||
expect(text).toBeTruthy();
|
||||
expect(text).toMatch(/4/);
|
||||
expect(text).toMatch(/yes/i);
|
||||
});
|
||||
|
||||
it('passes image content through the run() path (non-streaming)', async () => {
|
||||
const messages: Message[] = [
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{
|
||||
type: 'file',
|
||||
mediaType: 'image/png',
|
||||
data: base64ToUint8Array(RED_PIXEL_BASE64),
|
||||
},
|
||||
{
|
||||
type: 'text',
|
||||
text: 'What color is this image? Reply with just the color name.',
|
||||
},
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
const agent = new Agent('vision-run-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('You are a vision assistant. Be concise.');
|
||||
|
||||
const result = await agent.generate(messages);
|
||||
const text = findLastTextContent(result.messages);
|
||||
expect(text).toBeTruthy();
|
||||
expect(text).toMatch(/red/i);
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,164 @@
|
|||
import { expect, it, afterEach } from 'vitest';
|
||||
import { z } from 'zod';
|
||||
|
||||
import { describeIf, getModel, createSqliteMemory } from './helpers';
|
||||
import { Agent, Memory, Tool } from '../../index';
|
||||
import type { AgentMessage } from '../../index';
|
||||
|
||||
const describe = describeIf('anthropic');
|
||||
|
||||
describe('orphaned tool messages in memory', () => {
|
||||
const cleanups: Array<() => void> = [];
|
||||
|
||||
afterEach(() => {
|
||||
for (const fn of cleanups) fn();
|
||||
cleanups.length = 0;
|
||||
});
|
||||
|
||||
/**
|
||||
* Build a dummy tool so the agent has a valid tool schema.
|
||||
* The tool itself should never be called in these tests.
|
||||
*/
|
||||
function buildLookupTool() {
|
||||
return new Tool('lookup')
|
||||
.description('Look up data by id')
|
||||
.input(z.object({ id: z.string() }))
|
||||
.output(z.object({ count: z.number() }))
|
||||
.handler(async () => ({ count: 99 }));
|
||||
}
|
||||
|
||||
/**
|
||||
* Seed memory with a conversation that has tool-call / tool-result pairs
|
||||
* surrounded by plain user/assistant exchanges.
|
||||
*
|
||||
* Message layout (indices 0–7):
|
||||
* 0: user "How many widgets?"
|
||||
* 1: assistant text + tool-call(call_1)
|
||||
* 2: tool tool-result(call_1)
|
||||
* 3: assistant "There are 10 widgets"
|
||||
* 4: user "What about gadgets?"
|
||||
* 5: assistant text + tool-call(call_2)
|
||||
* 6: tool tool-result(call_2)
|
||||
* 7: assistant "There are 5 gadgets"
|
||||
*/
|
||||
function buildSeedMessages(): AgentMessage[] {
|
||||
return [
|
||||
{
|
||||
role: 'user',
|
||||
content: [{ type: 'text', text: 'How many widgets do we have?' }],
|
||||
},
|
||||
{
|
||||
role: 'assistant',
|
||||
content: [
|
||||
{ type: 'text', text: 'Let me look that up.' },
|
||||
{ type: 'tool-call', toolCallId: 'call_1', toolName: 'lookup', input: { id: 'widgets' } },
|
||||
],
|
||||
},
|
||||
{
|
||||
role: 'tool',
|
||||
content: [
|
||||
{ type: 'tool-result', toolCallId: 'call_1', toolName: 'lookup', result: { count: 10 } },
|
||||
],
|
||||
},
|
||||
{
|
||||
role: 'assistant',
|
||||
content: [{ type: 'text', text: 'There are 10 widgets in stock.' }],
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: [{ type: 'text', text: 'What about gadgets?' }],
|
||||
},
|
||||
{
|
||||
role: 'assistant',
|
||||
content: [
|
||||
{ type: 'text', text: 'Let me check.' },
|
||||
{ type: 'tool-call', toolCallId: 'call_2', toolName: 'lookup', input: { id: 'gadgets' } },
|
||||
],
|
||||
},
|
||||
{
|
||||
role: 'tool',
|
||||
content: [
|
||||
{ type: 'tool-result', toolCallId: 'call_2', toolName: 'lookup', result: { count: 5 } },
|
||||
],
|
||||
},
|
||||
{
|
||||
role: 'assistant',
|
||||
content: [{ type: 'text', text: 'There are 5 gadgets in stock.' }],
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
it('handles orphaned tool results when tool-call message is truncated from history', async () => {
|
||||
const { memory, cleanup } = createSqliteMemory();
|
||||
cleanups.push(cleanup);
|
||||
|
||||
const threadId = 'thread-orphan-result';
|
||||
|
||||
// Seed 8 messages into the thread
|
||||
await memory.saveMessages({ threadId, messages: buildSeedMessages() });
|
||||
|
||||
// lastMessages=6 → loads messages 2–7
|
||||
// Message at index 2 is a tool-result for call_1, but the matching
|
||||
// assistant+tool-call (index 1) is truncated. This is an orphaned tool result.
|
||||
const mem = new Memory().storage(memory).lastMessages(6);
|
||||
|
||||
const agent = new Agent('orphan-result-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('You are an inventory assistant. Use lookup to check stock. Be concise.')
|
||||
.tool(buildLookupTool())
|
||||
.memory(mem);
|
||||
|
||||
// This should NOT throw even though history contains an orphaned tool-result
|
||||
const result = await agent.generate('Can you summarize what we discussed?', {
|
||||
persistence: { threadId, resourceId: 'test' },
|
||||
});
|
||||
|
||||
expect(result.finishReason).toBe('stop');
|
||||
});
|
||||
|
||||
it('handles orphaned tool calls when tool-result message is truncated from history', async () => {
|
||||
const { memory, cleanup } = createSqliteMemory();
|
||||
cleanups.push(cleanup);
|
||||
|
||||
const threadId = 'thread-orphan-call';
|
||||
|
||||
// Store a conversation where the last saved message is an assistant
|
||||
// with a tool-call but the tool-result was never persisted (simulating
|
||||
// a partial save / interrupted turn).
|
||||
const messages: AgentMessage[] = [
|
||||
{
|
||||
role: 'user',
|
||||
content: [{ type: 'text', text: 'How many widgets?' }],
|
||||
},
|
||||
{
|
||||
role: 'assistant',
|
||||
content: [
|
||||
{ type: 'text', text: 'Checking inventory.' },
|
||||
{
|
||||
type: 'tool-call',
|
||||
toolCallId: 'call_orphan',
|
||||
toolName: 'lookup',
|
||||
input: { id: 'widgets' },
|
||||
},
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
await memory.saveMessages({ threadId, messages });
|
||||
|
||||
const mem = new Memory().storage(memory).lastMessages(10);
|
||||
|
||||
const agent = new Agent('orphan-call-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('You are an inventory assistant. Use lookup to check stock. Be concise.')
|
||||
.tool(buildLookupTool())
|
||||
.memory(mem);
|
||||
|
||||
// This should NOT throw even though history has a tool-call with no result
|
||||
const result = await agent.generate('Actually, never mind. How are you?', {
|
||||
persistence: { threadId, resourceId: 'test' },
|
||||
});
|
||||
|
||||
expect(result.finishReason).toBe('stop');
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,65 @@
|
|||
import { expect, it } from 'vitest';
|
||||
|
||||
import { describeIf, collectStreamChunks, getModel, chunksOfType } from './helpers';
|
||||
import { Agent } from '../../index';
|
||||
|
||||
const describe = describeIf('anthropic');
|
||||
|
||||
describe('provider metadata integration', () => {
|
||||
it('includes finishReason in finish chunks', async () => {
|
||||
const agent = new Agent('metadata-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('Reply with exactly: "OK". Nothing else.');
|
||||
|
||||
const { stream: fullStream } = await agent.stream('Acknowledge');
|
||||
|
||||
const chunks = await collectStreamChunks(fullStream);
|
||||
const finishChunks = chunksOfType(chunks, 'finish');
|
||||
|
||||
expect(finishChunks.length).toBeGreaterThan(0);
|
||||
|
||||
for (const chunk of finishChunks) {
|
||||
if (chunk.type === 'finish') {
|
||||
expect(chunk.finishReason).toBeDefined();
|
||||
expect(['stop', 'length', 'content-filter', 'tool-calls', 'error', 'other']).toContain(
|
||||
chunk.finishReason,
|
||||
);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
it('finish reason is "stop" for a normal completion', async () => {
|
||||
const agent = new Agent('stop-reason-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('Reply with exactly: "Done". Nothing else.');
|
||||
|
||||
const { stream: fullStream } = await agent.stream('Say done');
|
||||
|
||||
const chunks = await collectStreamChunks(fullStream);
|
||||
const finishChunks = chunksOfType(chunks, 'finish');
|
||||
|
||||
// The last finish chunk should be 'stop'
|
||||
const lastFinish = finishChunks[finishChunks.length - 1];
|
||||
expect(lastFinish).toBeDefined();
|
||||
if (lastFinish?.type === 'finish') {
|
||||
expect(lastFinish.finishReason).toBe('stop');
|
||||
}
|
||||
});
|
||||
|
||||
it('result contains usage metadata from the provider', async () => {
|
||||
const agent = new Agent('usage-metadata-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('You are a helpful assistant. Be concise.');
|
||||
|
||||
const { stream: fullStream } = await agent.stream('What is 1+1?');
|
||||
const chunks = await collectStreamChunks(fullStream);
|
||||
const finishChunks = chunksOfType(chunks, 'finish');
|
||||
const usage = finishChunks[0].usage;
|
||||
|
||||
expect(usage).toBeDefined();
|
||||
expect(typeof usage!.promptTokens).toBe('number');
|
||||
expect(typeof usage!.completionTokens).toBe('number');
|
||||
expect(typeof usage!.totalTokens).toBe('number');
|
||||
expect(usage!.totalTokens).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,225 @@
|
|||
import { expect, it } from 'vitest';
|
||||
|
||||
import { collectStreamChunks, chunksOfType, describeIf } from './helpers';
|
||||
import { Agent } from '../../index';
|
||||
|
||||
const describe = describeIf('anthropic');
|
||||
|
||||
/**
|
||||
* Integration tests for provider options: prompt caching, deep merge with
|
||||
* thinking, external abort signal, and model config object form.
|
||||
*
|
||||
* Prompt caching requires a system prompt of at least 1024 tokens for
|
||||
* Anthropic, so we generate a long instruction string.
|
||||
*/
|
||||
|
||||
// A system prompt long enough to be eligible for Anthropic prompt caching.
|
||||
// Claude Haiku requires at least 2048 tokens for caching to activate.
|
||||
const LONG_SYSTEM_PROMPT =
|
||||
'You are a concise assistant. Reply in one short sentence. ' +
|
||||
'Here is additional context to ensure the prompt is long enough for caching: ' +
|
||||
Array.from(
|
||||
{ length: 500 },
|
||||
(_, i) => `Rule ${i + 1}: Always be helpful and accurate in your responses.`,
|
||||
).join(' ');
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Prompt caching — instruction-level
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('prompt caching via instruction providerOptions', () => {
|
||||
it('second call with cached instructions reports cacheRead tokens', async () => {
|
||||
const agent = new Agent('cache-instructions-test')
|
||||
.model('anthropic/claude-haiku-4-5')
|
||||
.instructions(LONG_SYSTEM_PROMPT, {
|
||||
providerOptions: {
|
||||
anthropic: { cacheControl: { type: 'ephemeral' } },
|
||||
},
|
||||
});
|
||||
|
||||
// First call — creates the cache entry
|
||||
const result1 = await agent.generate('Say hello', {
|
||||
persistence: { resourceId: 'user1', threadId: 'thread1' },
|
||||
});
|
||||
expect(result1.finishReason).toBe('stop');
|
||||
|
||||
// Second call — should read from cache
|
||||
const result2 = await agent.generate('Say goodbye', {
|
||||
persistence: { resourceId: 'user1', threadId: 'thread2' },
|
||||
});
|
||||
expect(result2.finishReason).toBe('stop');
|
||||
|
||||
// At least one of the two calls should show cache activity (write or read)
|
||||
const write1 = result1.usage?.inputTokenDetails?.cacheWrite ?? 0;
|
||||
const read2 = result2.usage?.inputTokenDetails?.cacheRead ?? 0;
|
||||
expect(write1 + read2).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Prompt caching — call-level providerOptions
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('prompt caching via call-level providerOptions', () => {
|
||||
it('second call with call-level cacheControl reports cacheRead tokens', async () => {
|
||||
// Call-level cacheControl applies to the API request, not individual messages.
|
||||
// For Anthropic, prompt caching at call level needs instruction-level cacheControl
|
||||
// to mark which content to cache. This test verifies call-level options don't error.
|
||||
const agent = new Agent('cache-call-level-test')
|
||||
.model('anthropic/claude-haiku-4-5')
|
||||
.instructions(LONG_SYSTEM_PROMPT);
|
||||
|
||||
const result = await agent.generate('Say hello', {
|
||||
persistence: { resourceId: 'user1', threadId: 'thread1' },
|
||||
providerOptions: {
|
||||
anthropic: { cacheControl: { type: 'ephemeral' } },
|
||||
},
|
||||
});
|
||||
expect(result.finishReason).toBe('stop');
|
||||
expect(result.messages.length).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Prompt caching — streaming path
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('prompt caching via stream', () => {
|
||||
it('second stream with cached instructions reports cacheRead tokens in finish chunk', async () => {
|
||||
const agent = new Agent('cache-stream-test')
|
||||
.model('anthropic/claude-haiku-4-5')
|
||||
.instructions(LONG_SYSTEM_PROMPT, {
|
||||
providerOptions: {
|
||||
anthropic: { cacheControl: { type: 'ephemeral' } },
|
||||
},
|
||||
});
|
||||
|
||||
// First call — creates the cache entry
|
||||
const { stream: stream1 } = await agent.stream('Say hello', {
|
||||
persistence: { resourceId: 'user1', threadId: 'thread1' },
|
||||
});
|
||||
await collectStreamChunks(stream1);
|
||||
|
||||
// Second call — should read from cache
|
||||
const { stream: stream2 } = await agent.stream('Say goodbye', {
|
||||
persistence: { resourceId: 'user1', threadId: 'thread2' },
|
||||
});
|
||||
const chunks = await collectStreamChunks(stream2);
|
||||
const finishChunks = chunksOfType(chunks, 'finish');
|
||||
|
||||
expect(finishChunks.length).toBeGreaterThan(0);
|
||||
const usage = finishChunks[0].usage;
|
||||
expect(usage).toBeDefined();
|
||||
|
||||
// At least one stream should show cache activity
|
||||
const write = usage!.inputTokenDetails?.cacheWrite ?? 0;
|
||||
const read = usage!.inputTokenDetails?.cacheRead ?? 0;
|
||||
expect(write + read).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Thinking + cacheControl coexistence (deep merge)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('thinking + cacheControl coexistence', () => {
|
||||
it('both thinking and prompt caching work simultaneously', async () => {
|
||||
const agent = new Agent('thinking-cache-test')
|
||||
.model('anthropic', 'claude-sonnet-4-5')
|
||||
.thinking('anthropic', { budgetTokens: 5000 })
|
||||
.instructions(LONG_SYSTEM_PROMPT, {
|
||||
providerOptions: {
|
||||
anthropic: { cacheControl: { type: 'ephemeral' } },
|
||||
},
|
||||
});
|
||||
|
||||
// First call — cache miss, but thinking should work
|
||||
const { stream: stream1 } = await agent.stream('What is 7 * 8?', {
|
||||
persistence: { resourceId: 'user1', threadId: 'thread1' },
|
||||
});
|
||||
const chunks1 = await collectStreamChunks(stream1);
|
||||
|
||||
// Should have reasoning chunks (thinking is enabled)
|
||||
const reasoningChunks = chunksOfType(chunks1, 'reasoning-delta');
|
||||
expect(reasoningChunks.length).toBeGreaterThan(0);
|
||||
|
||||
// Second call — cache hit, thinking should still work
|
||||
const { stream: stream2 } = await agent.stream('What is 12 * 13?', {
|
||||
persistence: { resourceId: 'user1', threadId: 'thread2' },
|
||||
});
|
||||
const chunks2 = await collectStreamChunks(stream2);
|
||||
|
||||
// Should still have reasoning
|
||||
const reasoning2 = chunksOfType(chunks2, 'reasoning-delta');
|
||||
expect(reasoning2.length).toBeGreaterThan(0);
|
||||
|
||||
// At least one call should show cache activity
|
||||
const finishChunks = chunksOfType(chunks2, 'finish');
|
||||
expect(finishChunks.length).toBeGreaterThan(0);
|
||||
const usage = finishChunks[0].usage;
|
||||
expect(usage).toBeDefined();
|
||||
const write = usage!.inputTokenDetails?.cacheWrite ?? 0;
|
||||
const read = usage!.inputTokenDetails?.cacheRead ?? 0;
|
||||
expect(write + read).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// External abort signal
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('external abort signal', () => {
|
||||
it('cancels a generate() call via external AbortSignal', async () => {
|
||||
const agent = new Agent('abort-signal-test')
|
||||
.model('anthropic/claude-haiku-4-5')
|
||||
.instructions('You are a helpful assistant. Tell me a very long story.');
|
||||
|
||||
const controller = new AbortController();
|
||||
setTimeout(() => controller.abort(), 100);
|
||||
|
||||
const result = await agent.generate('Tell me a very long detailed story about a dragon', {
|
||||
persistence: { resourceId: 'user1', threadId: 'thread1' },
|
||||
abortSignal: controller.signal,
|
||||
});
|
||||
|
||||
expect(result.finishReason).toBe('error');
|
||||
expect(agent.getState().status).toBe('cancelled');
|
||||
});
|
||||
|
||||
it('cancels a stream() call via external AbortSignal', async () => {
|
||||
const agent = new Agent('abort-stream-signal-test')
|
||||
.model('anthropic/claude-haiku-4-5')
|
||||
.instructions('You are a helpful assistant. Tell me a very long story.');
|
||||
|
||||
const controller = new AbortController();
|
||||
setTimeout(() => controller.abort(), 100);
|
||||
|
||||
const { stream } = await agent.stream('Tell me a very long detailed story about a dragon', {
|
||||
persistence: { resourceId: 'user1', threadId: 'thread1' },
|
||||
abortSignal: controller.signal,
|
||||
});
|
||||
|
||||
const chunks = await collectStreamChunks(stream);
|
||||
const errorChunks = chunks.filter((c) => c.type === 'error');
|
||||
expect(errorChunks.length).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Model config object form
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('model config object form', () => {
|
||||
it('generates with model config object', async () => {
|
||||
const agent = new Agent('model-config-test')
|
||||
.model({ id: 'anthropic/claude-haiku-4-5' })
|
||||
.instructions('You are a concise assistant. Reply in one short sentence.');
|
||||
|
||||
const result = await agent.generate('Say hello', {
|
||||
persistence: { resourceId: 'user1', threadId: 'thread1' },
|
||||
});
|
||||
|
||||
expect(result.finishReason).toBe('stop');
|
||||
expect(result.messages.length).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,132 @@
|
|||
import { expect, it } from 'vitest';
|
||||
import { z } from 'zod';
|
||||
|
||||
import {
|
||||
describeIf,
|
||||
collectStreamChunks,
|
||||
getModel,
|
||||
chunksOfType,
|
||||
collectTextDeltas,
|
||||
findAllToolCalls,
|
||||
} from './helpers';
|
||||
import { Agent, Tool, providerTools, type StreamChunk } from '../../index';
|
||||
|
||||
const describe = describeIf('anthropic');
|
||||
|
||||
/**
|
||||
* Instructions that force the model to use web search before answering.
|
||||
* Required because the model may otherwise answer from its training data.
|
||||
*/
|
||||
const WEB_SEARCH_INSTRUCTIONS =
|
||||
'You MUST call the web_search tool before answering any question, even if you think you already know the answer. Never answer without searching first.';
|
||||
|
||||
describe('provider tools integration', () => {
|
||||
it('generate: the model calls the web search provider tool', async () => {
|
||||
const agent = new Agent('provider-tool-generate-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions(WEB_SEARCH_INSTRUCTIONS)
|
||||
.providerTool(providerTools.anthropicWebSearch());
|
||||
|
||||
const result = await agent.generate('What is the weather in Tokyo?');
|
||||
|
||||
expect(result.finishReason).toBe('stop');
|
||||
expect(result.pendingSuspend).toBeUndefined();
|
||||
|
||||
const toolCalls = findAllToolCalls(result.messages);
|
||||
const webSearchCall = toolCalls.find((tc) => tc.toolName.includes('web_search'));
|
||||
expect(webSearchCall).toBeDefined();
|
||||
});
|
||||
|
||||
it('stream: the model calls the web search provider tool without suspending', async () => {
|
||||
const agent = new Agent('provider-tool-stream-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions(WEB_SEARCH_INSTRUCTIONS)
|
||||
.providerTool(providerTools.anthropicWebSearch());
|
||||
|
||||
const { stream } = await agent.stream('What is the weather in Tokyo?');
|
||||
const chunks = await collectStreamChunks(stream);
|
||||
|
||||
// Provider tools must never cause a suspension
|
||||
const suspendChunks = chunksOfType(chunks, 'tool-call-suspended');
|
||||
expect(suspendChunks.length).toBe(0);
|
||||
|
||||
// Must finish cleanly
|
||||
const finishChunks = chunksOfType(chunks, 'finish');
|
||||
const lastFinish = finishChunks[finishChunks.length - 1];
|
||||
expect(lastFinish?.type === 'finish' && lastFinish.finishReason).toBe('stop');
|
||||
|
||||
// Collect tool calls from message chunks
|
||||
const messageChunks = chunksOfType(chunks, 'message');
|
||||
const allMessages = messageChunks.map((c) => c.message);
|
||||
const toolCalls = findAllToolCalls(allMessages);
|
||||
const webSearchCall = toolCalls.find((tc) => tc.toolName.includes('web_search'));
|
||||
expect(webSearchCall).toBeDefined();
|
||||
|
||||
// Must include a text response
|
||||
const text = collectTextDeltas(chunks);
|
||||
expect(text).toBeTruthy();
|
||||
});
|
||||
|
||||
it('provider tool executes without interruption while a mixed-in interruptible tool suspends', async () => {
|
||||
const saveToDbTool = new Tool('save_to_db')
|
||||
.description('Save weather data to the database.')
|
||||
.input(z.object({ data: z.string().describe('The data to save') }))
|
||||
.output(z.object({ saved: z.boolean() }))
|
||||
.suspend(z.object({ message: z.string() }))
|
||||
.resume(z.object({ approved: z.boolean() }))
|
||||
.handler(async ({ data }, ctx) => {
|
||||
if (!ctx.resumeData) {
|
||||
return await ctx.suspend({ message: `Save "${data}" to the database?` });
|
||||
}
|
||||
return { saved: ctx.resumeData.approved };
|
||||
});
|
||||
|
||||
const agent = new Agent('mixed-provider-hitl-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions(
|
||||
'When asked about weather: first search the web for current weather, then call save_to_db with the result. You MUST call both tools.',
|
||||
)
|
||||
.providerTool(providerTools.anthropicWebSearch())
|
||||
.tool(saveToDbTool)
|
||||
.checkpoint('memory');
|
||||
|
||||
const { stream } = await agent.stream(
|
||||
'Get the current weather in London and save the result to the database.',
|
||||
);
|
||||
const chunks = await collectStreamChunks(stream);
|
||||
// The web search provider tool must NOT cause a suspension
|
||||
// Only save_to_db (the interruptible tool) should suspend
|
||||
const suspendChunks = chunksOfType(chunks, 'tool-call-suspended');
|
||||
expect(suspendChunks.length).toBe(1);
|
||||
|
||||
const suspended = suspendChunks[0] as StreamChunk & { type: 'tool-call-suspended' };
|
||||
expect(suspended.toolName).toBe('save_to_db');
|
||||
expect(suspended.runId).toBeTruthy();
|
||||
expect(suspended.toolCallId).toBeTruthy();
|
||||
|
||||
// The web search provider tool call should appear in the message history
|
||||
const messageChunks = chunksOfType(chunks, 'message');
|
||||
const toolCalls = findAllToolCalls(messageChunks.map((c) => c.message));
|
||||
const webSearchCall = toolCalls.find((tc) => tc.toolName.includes('web_search'));
|
||||
expect(webSearchCall).toBeDefined();
|
||||
|
||||
// Resume with approval — agent should complete cleanly
|
||||
const resumeStream = await agent.resume(
|
||||
'stream',
|
||||
{ approved: true },
|
||||
{
|
||||
runId: suspended.runId!,
|
||||
toolCallId: suspended.toolCallId!,
|
||||
},
|
||||
);
|
||||
const resumeChunks = await collectStreamChunks(resumeStream.stream);
|
||||
|
||||
// console.log('Second', JSON.stringify(resumeChunks, null, 2));
|
||||
const errorChunks = resumeChunks.filter((c) => c.type === 'error');
|
||||
expect(errorChunks).toHaveLength(0);
|
||||
|
||||
const finishChunks = chunksOfType(resumeChunks, 'finish');
|
||||
const lastFinish = finishChunks[finishChunks.length - 1];
|
||||
expect(lastFinish?.type === 'finish' && lastFinish.finishReason).toBe('stop');
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,221 @@
|
|||
import { expect, it } from 'vitest';
|
||||
import { z } from 'zod';
|
||||
|
||||
import { collectStreamChunks, chunksOfType, describeIf, getModel } from './helpers';
|
||||
import type { StreamChunk } from './helpers';
|
||||
import { Agent, Tool } from '../../index';
|
||||
import type { CheckpointStore, SerializableAgentState } from '../../types';
|
||||
|
||||
const describe = describeIf('anthropic');
|
||||
|
||||
/**
|
||||
* A minimal CheckpointStore backed by a plain Map so it can be shared across
|
||||
* agent instances to simulate durable external storage (database, Redis, etc.).
|
||||
*/
|
||||
class InMemoryCheckpointStore implements CheckpointStore {
|
||||
private store = new Map<string, SerializableAgentState>();
|
||||
|
||||
async save(key: string, state: SerializableAgentState): Promise<void> {
|
||||
this.store.set(key, structuredClone(state));
|
||||
}
|
||||
|
||||
async load(key: string): Promise<SerializableAgentState | undefined> {
|
||||
const state = this.store.get(key);
|
||||
return state ? structuredClone(state) : undefined;
|
||||
}
|
||||
|
||||
async delete(key: string): Promise<void> {
|
||||
this.store.delete(key);
|
||||
}
|
||||
|
||||
get size(): number {
|
||||
return this.store.size;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Build an agent that has a delete_file tool that always suspends on the first
|
||||
* call and resumes with approval/denial on the second.
|
||||
*/
|
||||
function buildDeleteAgent(checkpointStore: CheckpointStore): Agent {
|
||||
const deleteTool = new Tool('delete_file')
|
||||
.description('Delete a file at the given path')
|
||||
.input(z.object({ path: z.string().describe('File path to delete') }))
|
||||
.output(z.object({ deleted: z.boolean(), path: z.string() }))
|
||||
.suspend(z.object({ message: z.string(), severity: z.string() }))
|
||||
.resume(z.object({ approved: z.boolean() }))
|
||||
.handler(async ({ path }, ctx) => {
|
||||
if (!ctx.resumeData) {
|
||||
return await ctx.suspend({ message: `Delete "${path}"?`, severity: 'destructive' });
|
||||
}
|
||||
if (!ctx.resumeData.approved) return { deleted: false, path };
|
||||
return { deleted: true, path };
|
||||
});
|
||||
|
||||
return new Agent('file-manager')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions(
|
||||
'You are a file manager. When asked to delete a file, use the delete_file tool. After the tool result, confirm what happened concisely.',
|
||||
)
|
||||
.tool(deleteTool)
|
||||
.checkpoint(checkpointStore);
|
||||
}
|
||||
|
||||
describe('state restore after suspension', () => {
|
||||
it('resumes with generate after agent instance is destroyed and recreated', async () => {
|
||||
const checkpointStore = new InMemoryCheckpointStore();
|
||||
|
||||
// --- Agent 1: run until suspended ---
|
||||
let suspendedRunId: string;
|
||||
let suspendedToolCallId: string;
|
||||
|
||||
{
|
||||
const agent1 = buildDeleteAgent(checkpointStore);
|
||||
|
||||
const result = await agent1.generate('Delete the file /tmp/important.log');
|
||||
|
||||
expect(result.finishReason).toBe('tool-calls');
|
||||
expect(result.pendingSuspend).toBeDefined();
|
||||
|
||||
suspendedRunId = result.pendingSuspend![0].runId;
|
||||
suspendedToolCallId = result.pendingSuspend![0].toolCallId;
|
||||
expect(suspendedRunId).toBeTruthy();
|
||||
expect(suspendedToolCallId).toBeTruthy();
|
||||
|
||||
// Checkpoint store now holds the suspended state
|
||||
expect(checkpointStore.size).toBe(1);
|
||||
|
||||
// agent1 goes out of scope here — its in-flight Map is gone
|
||||
}
|
||||
|
||||
// --- Agent 2: freshly created, loads state from the shared CheckpointStore ---
|
||||
const agent2 = buildDeleteAgent(checkpointStore);
|
||||
|
||||
const result2 = await agent2.resume(
|
||||
'generate',
|
||||
{ approved: true },
|
||||
{ runId: suspendedRunId, toolCallId: suspendedToolCallId },
|
||||
);
|
||||
|
||||
expect(result2.finishReason).not.toBe('error');
|
||||
expect(result2.finishReason).not.toBe('tool-calls');
|
||||
|
||||
// The resumed result should contain a text response from the assistant
|
||||
const assistantMessages = result2.messages.filter((m) => 'role' in m && m.role === 'assistant');
|
||||
expect(assistantMessages.length).toBeGreaterThan(0);
|
||||
|
||||
const hasText = assistantMessages.some(
|
||||
(m) => 'content' in m && m.content.some((c) => c.type === 'text'),
|
||||
);
|
||||
expect(hasText).toBe(true);
|
||||
|
||||
// Checkpoint should have been cleaned up after successful resumption
|
||||
expect(checkpointStore.size).toBe(0);
|
||||
});
|
||||
|
||||
it('resumes with stream after agent instance is destroyed and recreated', async () => {
|
||||
const checkpointStore = new InMemoryCheckpointStore();
|
||||
|
||||
let suspendedRunId: string;
|
||||
let suspendedToolCallId: string;
|
||||
|
||||
{
|
||||
const agent1 = buildDeleteAgent(checkpointStore);
|
||||
|
||||
const { stream } = await agent1.stream('Delete the file /tmp/data.csv');
|
||||
const chunks = await collectStreamChunks(stream);
|
||||
|
||||
const suspendedChunks = chunksOfType(chunks, 'tool-call-suspended');
|
||||
expect(suspendedChunks.length).toBe(1);
|
||||
|
||||
const suspended = suspendedChunks[0] as StreamChunk & { type: 'tool-call-suspended' };
|
||||
expect(suspended.toolName).toBe('delete_file');
|
||||
|
||||
suspendedRunId = suspended.runId!;
|
||||
suspendedToolCallId = suspended.toolCallId!;
|
||||
|
||||
// State is persisted in the external store
|
||||
expect(checkpointStore.size).toBe(1);
|
||||
|
||||
// agent1 is destroyed here
|
||||
}
|
||||
|
||||
// --- Agent 2: new instance, same checkpoint store ---
|
||||
const agent2 = buildDeleteAgent(checkpointStore);
|
||||
|
||||
const resumedStream = await agent2.resume(
|
||||
'stream',
|
||||
{ approved: true },
|
||||
{ runId: suspendedRunId, toolCallId: suspendedToolCallId },
|
||||
);
|
||||
|
||||
const resumedChunks = await collectStreamChunks(resumedStream.stream);
|
||||
|
||||
// No error chunks
|
||||
const errorChunks = resumedChunks.filter((c) => c.type === 'error');
|
||||
expect(errorChunks).toHaveLength(0);
|
||||
|
||||
// Stream must contain the tool result message
|
||||
const toolResultChunks = resumedChunks.filter(
|
||||
(c) =>
|
||||
c.type === 'message' &&
|
||||
'message' in c &&
|
||||
'content' in (c.message as object) &&
|
||||
(c.message as { content: Array<{ type: string }> }).content.some(
|
||||
(part) => part.type === 'tool-result',
|
||||
),
|
||||
);
|
||||
expect(toolResultChunks.length).toBeGreaterThan(0);
|
||||
|
||||
// Stream must end with a finish chunk (not error)
|
||||
const finishChunks = chunksOfType(resumedChunks, 'finish') as Array<
|
||||
StreamChunk & { type: 'finish' }
|
||||
>;
|
||||
expect(finishChunks.length).toBeGreaterThan(0);
|
||||
expect(finishChunks[0].finishReason).not.toBe('error');
|
||||
|
||||
// At least one text-delta should arrive (the LLM's final response)
|
||||
const textDeltas = chunksOfType(resumedChunks, 'text-delta');
|
||||
expect(textDeltas.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('correctly restores message history so the LLM has full context', async () => {
|
||||
const checkpointStore = new InMemoryCheckpointStore();
|
||||
|
||||
let suspendedRunId: string;
|
||||
let suspendedToolCallId: string;
|
||||
let originalPath: string;
|
||||
|
||||
{
|
||||
originalPath = '/tmp/critical-data.db';
|
||||
const agent1 = buildDeleteAgent(checkpointStore);
|
||||
const result = await agent1.generate(`Delete the file ${originalPath}`);
|
||||
|
||||
expect(result.pendingSuspend).toBeDefined();
|
||||
suspendedRunId = result.pendingSuspend![0].runId;
|
||||
suspendedToolCallId = result.pendingSuspend![0].toolCallId;
|
||||
}
|
||||
|
||||
const agent2 = buildDeleteAgent(checkpointStore);
|
||||
const result2 = await agent2.resume(
|
||||
'generate',
|
||||
{ approved: true },
|
||||
{ runId: suspendedRunId, toolCallId: suspendedToolCallId },
|
||||
);
|
||||
|
||||
expect(result2.finishReason).not.toBe('error');
|
||||
|
||||
// The assistant response should reference the original file path,
|
||||
// proving the full conversation context was restored correctly
|
||||
const textContent = result2.messages
|
||||
.filter((m) => 'role' in m && m.role === 'assistant')
|
||||
.flatMap((m) => ('content' in m ? m.content : []))
|
||||
.filter((c) => c.type === 'text')
|
||||
.map((c) => ('text' in c ? c.text : ''))
|
||||
.join('');
|
||||
|
||||
expect(textContent.length).toBeGreaterThan(0);
|
||||
// The LLM should confirm what happened (mentioning the file or deletion)
|
||||
expect(textContent.toLowerCase()).toMatch(/delete|delet|remov|file/);
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,72 @@
|
|||
import { expect, it } from 'vitest';
|
||||
import { z } from 'zod';
|
||||
|
||||
import { describeIf, getModel } from './helpers';
|
||||
import { Agent, Tool } from '../../index';
|
||||
|
||||
const describe = describeIf('anthropic');
|
||||
|
||||
describe('stream timing', () => {
|
||||
it('tool-call-delta chunks arrive incrementally (not all buffered)', async () => {
|
||||
const agent = new Agent('timing-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions(
|
||||
'When asked to write code, call the set_code tool with the code. Write at least 10 lines.',
|
||||
)
|
||||
.tool(
|
||||
new Tool('set_code')
|
||||
.description('Set code in the editor')
|
||||
.input(
|
||||
z.object({
|
||||
code: z.string().describe('The complete source code'),
|
||||
}),
|
||||
)
|
||||
.providerOptions({ anthropic: { eagerInputStreaming: true } })
|
||||
.handler(async ({ code }) => ({ ok: true, length: code.length })),
|
||||
);
|
||||
|
||||
const result = await agent.stream(
|
||||
'Write a TypeScript function that implements bubble sort. Use the set_code tool.',
|
||||
);
|
||||
|
||||
const reader = result.stream.getReader();
|
||||
|
||||
// Track timestamps of each reader.read() that returns a tool-call-delta
|
||||
// This measures when the reader YIELDS each chunk, not when the agent enqueues it.
|
||||
const deltaReadTimes: number[] = [];
|
||||
const start = Date.now();
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
const chunk = value;
|
||||
if (chunk.type === 'tool-call-delta' && (chunk as { name?: string }).name === 'set_code') {
|
||||
deltaReadTimes.push(Date.now() - start);
|
||||
}
|
||||
}
|
||||
|
||||
expect(deltaReadTimes.length).toBeGreaterThan(0);
|
||||
|
||||
console.log(`set_code delta reads: ${deltaReadTimes.length}`);
|
||||
if (deltaReadTimes.length > 1) {
|
||||
const first = deltaReadTimes[0];
|
||||
const last = deltaReadTimes[deltaReadTimes.length - 1];
|
||||
const spread = last - first;
|
||||
console.log(`Time spread: ${spread}ms (first read: ${first}ms, last read: ${last}ms)`);
|
||||
|
||||
// Count how many distinct timestamps (ms resolution)
|
||||
const uniqueTimes = new Set(deltaReadTimes).size;
|
||||
console.log(`Unique timestamps: ${uniqueTimes} out of ${deltaReadTimes.length} reads`);
|
||||
|
||||
// If truly streaming: spread should be significant (>500ms for code generation)
|
||||
// If buffered: spread will be near 0 and most reads share the same timestamp
|
||||
const bufferingRatio = uniqueTimes / deltaReadTimes.length;
|
||||
console.log(`Buffering ratio: ${(bufferingRatio * 100).toFixed(1)}% unique timestamps`);
|
||||
console.log(
|
||||
bufferingRatio < 0.1
|
||||
? 'BUFFERED: The agent releases all deltas in one burst'
|
||||
: 'STREAMING: Deltas arrive incrementally',
|
||||
);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,223 @@
|
|||
import { expect, it } from 'vitest';
|
||||
import { z } from 'zod';
|
||||
|
||||
import { describeIf, collectStreamChunks, chunksOfType, getModel } from './helpers';
|
||||
import { Agent, Tool } from '../../index';
|
||||
import type { StreamChunk } from '../../index';
|
||||
|
||||
const answerSchema = z.object({
|
||||
city: z.string().describe('The name of the city'),
|
||||
country: z.string().describe('The country the city is in'),
|
||||
population_millions: z.number().describe('Approximate population in millions'),
|
||||
});
|
||||
|
||||
function createStructuredAgent(provider: 'anthropic' | 'openai'): Agent {
|
||||
return new Agent('structured-output-test')
|
||||
.model(getModel(provider))
|
||||
.instructions(
|
||||
'You answer geography questions. Always respond with the structured output schema. Be precise and factual.',
|
||||
)
|
||||
.structuredOutput(answerSchema);
|
||||
}
|
||||
|
||||
function createStructuredAgentWithTool(provider: 'anthropic' | 'openai'): Agent {
|
||||
const lookupTool = new Tool('lookup_capital')
|
||||
.description('Look up the capital city of a country')
|
||||
.input(z.object({ country: z.string().describe('Country name') }))
|
||||
.output(z.object({ capital: z.string(), population_millions: z.number() }))
|
||||
.handler(async ({ country }) => {
|
||||
const data: Record<string, { capital: string; population_millions: number }> = {
|
||||
france: { capital: 'Paris', population_millions: 2.1 },
|
||||
japan: { capital: 'Tokyo', population_millions: 13.9 },
|
||||
brazil: { capital: 'Brasília', population_millions: 3.0 },
|
||||
};
|
||||
return data[country.toLowerCase()] ?? { capital: 'Unknown', population_millions: 0 };
|
||||
});
|
||||
|
||||
return new Agent('structured-tool-test')
|
||||
.model(getModel(provider))
|
||||
.instructions(
|
||||
'You answer geography questions. Use the lookup_capital tool when asked about capitals. Always respond with the structured output schema.',
|
||||
)
|
||||
.tool(lookupTool)
|
||||
.structuredOutput(answerSchema);
|
||||
}
|
||||
|
||||
function createStructuredAgentWithInterruptibleTool(provider: 'anthropic' | 'openai'): Agent {
|
||||
const deleteTool = new Tool('delete_record')
|
||||
.description('Delete a geographic record — requires confirmation')
|
||||
.input(z.object({ city: z.string().describe('City to delete') }))
|
||||
.output(z.object({ deleted: z.boolean(), city: z.string() }))
|
||||
.suspend(z.object({ message: z.string() }))
|
||||
.resume(z.object({ approved: z.boolean() }))
|
||||
.handler(async ({ city }, ctx) => {
|
||||
if (!ctx.resumeData) {
|
||||
return await ctx.suspend({ message: `Delete record for "${city}"?` });
|
||||
}
|
||||
return { deleted: ctx.resumeData.approved, city };
|
||||
});
|
||||
|
||||
const resultSchema = z.object({
|
||||
action: z.string().describe('The action that was performed'),
|
||||
city: z.string().describe('The city affected'),
|
||||
success: z.boolean().describe('Whether the action succeeded'),
|
||||
});
|
||||
|
||||
return new Agent('structured-interrupt-test')
|
||||
.model(getModel(provider))
|
||||
.instructions(
|
||||
'You manage geographic records. When asked to delete a record, use the delete_record tool. Always respond with the structured output schema.',
|
||||
)
|
||||
.tool(deleteTool)
|
||||
.structuredOutput(resultSchema)
|
||||
.checkpoint('memory');
|
||||
}
|
||||
|
||||
const describe = describeIf('anthropic');
|
||||
|
||||
describe('structured output integration', () => {
|
||||
it('returns parsed structuredOutput via generate()', async () => {
|
||||
const agent = createStructuredAgent('anthropic');
|
||||
|
||||
const result = await agent.generate('What is the capital of France?');
|
||||
|
||||
expect(result.finishReason).toBe('stop');
|
||||
expect(result.structuredOutput).toBeDefined();
|
||||
|
||||
const parsed = answerSchema.safeParse(result.structuredOutput);
|
||||
expect(parsed.success).toBe(true);
|
||||
if (parsed.success) {
|
||||
expect(parsed.data.city.toLowerCase()).toContain('paris');
|
||||
expect(parsed.data.country.toLowerCase()).toContain('france');
|
||||
expect(parsed.data.population_millions).toBeGreaterThan(0);
|
||||
}
|
||||
});
|
||||
|
||||
it('returns parsed structuredOutput in stream finish chunk', async () => {
|
||||
const agent = createStructuredAgent('anthropic');
|
||||
|
||||
const { stream } = await agent.stream('What is the capital of Japan?');
|
||||
const chunks = await collectStreamChunks(stream);
|
||||
|
||||
const finishChunks = chunksOfType(chunks, 'finish');
|
||||
expect(finishChunks.length).toBeGreaterThan(0);
|
||||
|
||||
const finish = finishChunks[0] as StreamChunk & { type: 'finish' };
|
||||
expect(finish.structuredOutput).toBeDefined();
|
||||
|
||||
const parsed = answerSchema.safeParse(finish.structuredOutput);
|
||||
expect(parsed.success).toBe(true);
|
||||
if (parsed.success) {
|
||||
expect(parsed.data.city.toLowerCase()).toContain('tokyo');
|
||||
}
|
||||
});
|
||||
|
||||
it('returns structuredOutput after tool use via generate()', async () => {
|
||||
const agent = createStructuredAgentWithTool('anthropic');
|
||||
|
||||
const result = await agent.generate('What is the capital of France? Use the lookup tool.');
|
||||
|
||||
expect(result.finishReason).toBe('stop');
|
||||
expect(result.structuredOutput).toBeDefined();
|
||||
|
||||
const parsed = answerSchema.safeParse(result.structuredOutput);
|
||||
expect(parsed.success).toBe(true);
|
||||
if (parsed.success) {
|
||||
expect(parsed.data.city.toLowerCase()).toContain('paris');
|
||||
}
|
||||
});
|
||||
|
||||
it('returns structuredOutput after tool use via stream()', async () => {
|
||||
const agent = createStructuredAgentWithTool('anthropic');
|
||||
|
||||
const { stream } = await agent.stream('What is the capital of Japan? Use the lookup tool.');
|
||||
const chunks = await collectStreamChunks(stream);
|
||||
|
||||
const finishChunks = chunksOfType(chunks, 'finish');
|
||||
expect(finishChunks.length).toBeGreaterThan(0);
|
||||
|
||||
const finish = finishChunks[0] as StreamChunk & { type: 'finish' };
|
||||
expect(finish.structuredOutput).toBeDefined();
|
||||
|
||||
const parsed = answerSchema.safeParse(finish.structuredOutput);
|
||||
expect(parsed.success).toBe(true);
|
||||
});
|
||||
|
||||
it('returns structuredOutput after resume("generate")', async () => {
|
||||
const agent = createStructuredAgentWithInterruptibleTool('anthropic');
|
||||
|
||||
const first = await agent.generate('Delete the record for Paris');
|
||||
expect(first.pendingSuspend).toBeDefined();
|
||||
const { runId, toolCallId } = first.pendingSuspend![0];
|
||||
|
||||
const resumed = await agent.resume('generate', { approved: true }, { runId, toolCallId });
|
||||
|
||||
expect(resumed.finishReason).toBe('stop');
|
||||
expect(resumed.structuredOutput).toBeDefined();
|
||||
|
||||
const resultSchema = z.object({
|
||||
action: z.string(),
|
||||
city: z.string(),
|
||||
success: z.boolean(),
|
||||
});
|
||||
const parsed = resultSchema.safeParse(resumed.structuredOutput);
|
||||
expect(parsed.success).toBe(true);
|
||||
});
|
||||
|
||||
it('returns structuredOutput after resume("stream")', async () => {
|
||||
const agent = createStructuredAgentWithInterruptibleTool('anthropic');
|
||||
|
||||
const first = await agent.generate('Delete the record for Tokyo');
|
||||
expect(first.pendingSuspend).toBeDefined();
|
||||
const { runId, toolCallId } = first.pendingSuspend![0];
|
||||
|
||||
const resumedStream = await agent.resume('stream', { approved: true }, { runId, toolCallId });
|
||||
|
||||
const chunks = await collectStreamChunks(resumedStream.stream);
|
||||
const finishChunks = chunksOfType(chunks, 'finish');
|
||||
expect(finishChunks.length).toBeGreaterThan(0);
|
||||
|
||||
const finish = finishChunks[0] as StreamChunk & { type: 'finish' };
|
||||
expect(finish.structuredOutput).toBeDefined();
|
||||
|
||||
const resultSchema = z.object({
|
||||
action: z.string(),
|
||||
city: z.string(),
|
||||
success: z.boolean(),
|
||||
});
|
||||
const parsed = resultSchema.safeParse(finish.structuredOutput);
|
||||
expect(parsed.success).toBe(true);
|
||||
});
|
||||
|
||||
it('structuredOutput conforms to the schema', async () => {
|
||||
const strictSchema = z.object({
|
||||
name: z.string(),
|
||||
is_capital: z.boolean(),
|
||||
continent: z.enum([
|
||||
'Africa',
|
||||
'Antarctica',
|
||||
'Asia',
|
||||
'Europe',
|
||||
'North America',
|
||||
'Oceania',
|
||||
'South America',
|
||||
]),
|
||||
});
|
||||
|
||||
const agent = new Agent('strict-schema-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('Answer geography questions using the structured output schema.')
|
||||
.structuredOutput(strictSchema);
|
||||
|
||||
const result = await agent.generate('Tell me about Berlin');
|
||||
|
||||
expect(result.structuredOutput).toBeDefined();
|
||||
const parsed = strictSchema.safeParse(result.structuredOutput);
|
||||
expect(parsed.success).toBe(true);
|
||||
if (parsed.success) {
|
||||
expect(parsed.data.name.toLowerCase()).toContain('berlin');
|
||||
expect(parsed.data.continent).toBe('Europe');
|
||||
expect(typeof parsed.data.is_capital).toBe('boolean');
|
||||
}
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,96 @@
|
|||
import { expect, it } from 'vitest';
|
||||
|
||||
import {
|
||||
chunksOfType,
|
||||
collectStreamChunks,
|
||||
collectTextDeltas,
|
||||
describeIf,
|
||||
findAllToolResults,
|
||||
getModel,
|
||||
} from './helpers';
|
||||
import type { StreamChunk } from '../../index';
|
||||
import { Agent } from '../../index';
|
||||
|
||||
const describe = describeIf('anthropic');
|
||||
|
||||
describe('sub-agent (asTool) integration', () => {
|
||||
it('orchestrator calls a sub-agent as a tool and gets its response', async () => {
|
||||
const mathAgent = new Agent('math-specialist')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions(
|
||||
'You are a math specialist. When given a math problem, compute the answer and reply with just the number. No explanation.',
|
||||
);
|
||||
|
||||
const orchestrator = new Agent('orchestrator')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions(
|
||||
'You are a coordinator. When asked a math question, delegate to the math_specialist tool. ' +
|
||||
'Pass the question as the prompt. Then relay the answer back.',
|
||||
)
|
||||
.tool(mathAgent.asTool('A math specialist that can solve math problems'));
|
||||
|
||||
const { stream: fullStream } = await orchestrator.stream('What is 15 * 4?');
|
||||
|
||||
const chunks = await collectStreamChunks(fullStream);
|
||||
const text = collectTextDeltas(chunks);
|
||||
const messageChunks = chunksOfType(chunks, 'message') as Array<
|
||||
StreamChunk & { type: 'message' }
|
||||
>;
|
||||
const toolResults = findAllToolResults(messageChunks.map((c) => c.message));
|
||||
|
||||
// The orchestrator should have called the sub-agent tool
|
||||
expect(toolResults.length).toBeGreaterThan(0);
|
||||
const mathCall = toolResults.find((tc) => tc.toolName === 'math-specialist');
|
||||
expect(mathCall).toBeDefined();
|
||||
|
||||
// The output should contain the sub-agent's response
|
||||
expect(mathCall!.result).toBeDefined();
|
||||
|
||||
// The final text should reference 60
|
||||
expect(text).toBeTruthy();
|
||||
expect(text).toContain('60');
|
||||
});
|
||||
|
||||
it('handles a chain of two sub-agents', async () => {
|
||||
const translatorAgent = new Agent('translator')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions(
|
||||
'You are a translator. Translate the given text to French. Reply with only the French translation.',
|
||||
);
|
||||
|
||||
const uppercaseAgent = new Agent('uppercaser')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions(
|
||||
'You convert text to uppercase. Reply with the input text in all uppercase letters. Nothing else.',
|
||||
);
|
||||
|
||||
const orchestrator = new Agent('chain-orchestrator')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions(
|
||||
'You are a coordinator with two tools. ' +
|
||||
'When asked to translate and uppercase text: ' +
|
||||
'1. First use the translator tool to translate to French. ' +
|
||||
'2. Then use the uppercaser tool to convert the French text to uppercase. ' +
|
||||
'Return the final uppercase French text.',
|
||||
)
|
||||
.tool(translatorAgent.asTool('Translates text to French'))
|
||||
.tool(uppercaseAgent.asTool('Converts text to uppercase'));
|
||||
|
||||
const { stream: fullStream } = await orchestrator.stream(
|
||||
'Translate "hello" to French and then make it uppercase.',
|
||||
);
|
||||
const chunks = await collectStreamChunks(fullStream);
|
||||
const messageChunks = chunksOfType(chunks, 'message') as Array<
|
||||
StreamChunk & { type: 'message' }
|
||||
>;
|
||||
const toolResults = findAllToolResults(messageChunks.map((c) => c.message));
|
||||
|
||||
// Should have called both tools
|
||||
expect(toolResults.length).toBeGreaterThanOrEqual(2);
|
||||
|
||||
const text = collectTextDeltas(chunks);
|
||||
expect(text).toBeTruthy();
|
||||
// The result should contain BONJOUR (or SALUT) — uppercase French for hello
|
||||
expect(text).toMatch(/BONJOUR/i);
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,197 @@
|
|||
/**
|
||||
* Integration test: Telemetry → LangSmith.
|
||||
*
|
||||
* Runs a real agent against a real model with LangSmith telemetry configured,
|
||||
* uses a local HTTP server to capture the trace data that would be sent to
|
||||
* LangSmith, and verifies the full pipeline works end-to-end.
|
||||
*
|
||||
* Pipeline under test:
|
||||
* Agent.generate() → AI SDK (generateText with experimental_telemetry)
|
||||
* → OTel spans with ai.operationId → LangSmithOTLPSpanProcessor
|
||||
* → LangSmithOTLPTraceExporter → HTTP POST → captured by local server
|
||||
*/
|
||||
import * as http from 'node:http';
|
||||
import { afterAll, afterEach, beforeAll, expect, it } from 'vitest';
|
||||
import { z } from 'zod';
|
||||
|
||||
import { describeIf, getModel } from './helpers';
|
||||
import { Agent, LangSmithTelemetry, type Telemetry, type BuiltTelemetry, Tool } from '../../index';
|
||||
|
||||
const describe = describeIf('anthropic');
|
||||
|
||||
interface CapturedRequest {
|
||||
url: string;
|
||||
headers: http.IncomingHttpHeaders;
|
||||
body: Buffer;
|
||||
}
|
||||
|
||||
describe('Telemetry → LangSmith integration', () => {
|
||||
let server: http.Server;
|
||||
let serverPort: number;
|
||||
let captured: CapturedRequest[];
|
||||
let previousTracingV2: string | undefined;
|
||||
|
||||
beforeAll(async () => {
|
||||
// LangSmith exporter requires this env var to be set, otherwise it silently drops spans
|
||||
previousTracingV2 = process.env.LANGCHAIN_TRACING_V2;
|
||||
process.env.LANGCHAIN_TRACING_V2 = 'true';
|
||||
captured = [];
|
||||
server = http.createServer((req, res) => {
|
||||
const chunks: Buffer[] = [];
|
||||
req.on('data', (c: Buffer) => chunks.push(c));
|
||||
req.on('end', () => {
|
||||
captured.push({
|
||||
url: req.url ?? '',
|
||||
headers: req.headers,
|
||||
body: Buffer.concat(chunks),
|
||||
});
|
||||
res.writeHead(200, { 'Content-Type': 'application/json' });
|
||||
res.end('{}');
|
||||
});
|
||||
});
|
||||
|
||||
await new Promise<void>((resolve) => {
|
||||
server.listen(0, () => {
|
||||
const addr = server.address();
|
||||
serverPort = typeof addr === 'object' && addr ? addr.port : 0;
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
captured = [];
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
if (previousTracingV2 === undefined) {
|
||||
delete process.env.LANGCHAIN_TRACING_V2;
|
||||
} else {
|
||||
process.env.LANGCHAIN_TRACING_V2 = previousTracingV2;
|
||||
}
|
||||
await new Promise<void>((resolve) => {
|
||||
server.close(() => resolve());
|
||||
});
|
||||
});
|
||||
|
||||
function createTestAgent(telemetry: Telemetry | BuiltTelemetry) {
|
||||
return new Agent('langsmith-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('You are a calculator. Use the add tool when asked to add. Be concise.')
|
||||
.telemetry(telemetry as Telemetry)
|
||||
.tool(
|
||||
new Tool('add')
|
||||
.description('Add two numbers')
|
||||
.input(z.object({ a: z.number(), b: z.number() }))
|
||||
.handler(async ({ a, b }) => ({ result: a + b })),
|
||||
);
|
||||
}
|
||||
|
||||
it('sends trace data to LangSmith using LangSmithTelemetry', async () => {
|
||||
const built = await new LangSmithTelemetry({
|
||||
apiKey: 'ls-test-key-12345',
|
||||
project: 'agents-test',
|
||||
url: `http://localhost:${serverPort}/otel/v1/traces`,
|
||||
})
|
||||
.functionId('calc-agent')
|
||||
.build();
|
||||
|
||||
const agent = createTestAgent(built);
|
||||
const result = await agent.generate('What is 3 + 4?');
|
||||
|
||||
if (built.provider) await built.provider.forceFlush();
|
||||
|
||||
// Verify the agent produced a response
|
||||
expect(result.messages.length).toBeGreaterThan(0);
|
||||
|
||||
// Verify LangSmith received trace data
|
||||
expect(captured.length).toBeGreaterThan(0);
|
||||
|
||||
// Verify the request hit the OTLP traces endpoint
|
||||
expect(captured.some((r) => r.url.includes('/otel/v1/traces'))).toBe(true);
|
||||
|
||||
// Verify the API key was sent in the header
|
||||
expect(captured.some((r) => r.headers['x-api-key'] === 'ls-test-key-12345')).toBe(true);
|
||||
|
||||
// Verify the body is non-empty (actual protobuf trace data)
|
||||
const totalBytes = captured.reduce((sum, r) => sum + r.body.length, 0);
|
||||
expect(totalBytes).toBeGreaterThan(0);
|
||||
|
||||
if (built.provider) await built.provider.shutdown();
|
||||
});
|
||||
|
||||
it('supports endpoint shorthand (auto-appends /otel/v1/traces)', async () => {
|
||||
const built = await new LangSmithTelemetry({
|
||||
apiKey: 'ls-endpoint-key',
|
||||
project: 'agents-test',
|
||||
endpoint: `http://localhost:${serverPort}`,
|
||||
})
|
||||
.functionId('endpoint-test')
|
||||
.build();
|
||||
|
||||
const agent = createTestAgent(built);
|
||||
const result = await agent.generate('What is 10 + 20?');
|
||||
|
||||
if (built.provider) await built.provider.forceFlush();
|
||||
|
||||
expect(result.messages.length).toBeGreaterThan(0);
|
||||
expect(captured.length).toBeGreaterThan(0);
|
||||
expect(captured.some((r) => r.headers['x-api-key'] === 'ls-endpoint-key')).toBe(true);
|
||||
|
||||
if (built.provider) await built.provider.shutdown();
|
||||
});
|
||||
|
||||
it('includes tool call spans in the trace', async () => {
|
||||
const built = await new LangSmithTelemetry({
|
||||
apiKey: 'ls-tool-test',
|
||||
project: 'agents-test',
|
||||
url: `http://localhost:${serverPort}/otel/v1/traces`,
|
||||
})
|
||||
.functionId('tool-trace-test')
|
||||
.build();
|
||||
|
||||
const agent = createTestAgent(built);
|
||||
await agent.generate('What is 5 + 7?');
|
||||
|
||||
if (built.provider) await built.provider.forceFlush();
|
||||
|
||||
// Multiple spans exported as protobuf
|
||||
expect(captured.length).toBeGreaterThan(0);
|
||||
const totalBytes = captured.reduce((sum, r) => sum + r.body.length, 0);
|
||||
expect(totalBytes).toBeGreaterThan(50);
|
||||
|
||||
if (built.provider) await built.provider.shutdown();
|
||||
});
|
||||
|
||||
it('fires TelemetryIntegration hooks alongside LangSmith traces', async () => {
|
||||
const hookEvents: string[] = [];
|
||||
|
||||
const built = await new LangSmithTelemetry({
|
||||
apiKey: 'ls-hooks-test',
|
||||
project: 'agents-test',
|
||||
url: `http://localhost:${serverPort}/otel/v1/traces`,
|
||||
})
|
||||
.functionId('hooks-test')
|
||||
.integration({
|
||||
onStart: () => {
|
||||
hookEvents.push('start');
|
||||
},
|
||||
onFinish: () => {
|
||||
hookEvents.push('finish');
|
||||
},
|
||||
})
|
||||
.build();
|
||||
|
||||
const agent = createTestAgent(built);
|
||||
await agent.generate('What is 1 + 1?');
|
||||
|
||||
if (built.provider) await built.provider.forceFlush();
|
||||
|
||||
// Both LangSmith traces and integration hooks should fire
|
||||
expect(captured.length).toBeGreaterThan(0);
|
||||
expect(hookEvents).toContain('start');
|
||||
expect(hookEvents).toContain('finish');
|
||||
|
||||
if (built.provider) await built.provider.shutdown();
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,67 @@
|
|||
import { expect, it } from 'vitest';
|
||||
|
||||
import { describeIf, collectStreamChunks, chunksOfType } from './helpers';
|
||||
import { Agent } from '../../index';
|
||||
|
||||
/**
|
||||
* Thinking / reasoning stream integration tests.
|
||||
*
|
||||
* These require models that support extended thinking:
|
||||
* - Anthropic: claude-sonnet-4-5 (not haiku — it doesn't support thinking)
|
||||
* - OpenAI: o3-mini (reasoning model)
|
||||
*/
|
||||
|
||||
const describeAnthropic = describeIf('anthropic');
|
||||
|
||||
describeAnthropic('thinking stream (Anthropic)', () => {
|
||||
it('emits reasoning-delta chunks when thinking is enabled', async () => {
|
||||
const agent = new Agent('thinking-test')
|
||||
.model('anthropic', 'claude-sonnet-4-5')
|
||||
.thinking('anthropic', { budgetTokens: 5000 })
|
||||
.instructions('You are a helpful assistant. Think carefully before answering.');
|
||||
|
||||
const { stream: fullStream } = await agent.stream('What is 17 * 23?');
|
||||
|
||||
const chunks = await collectStreamChunks(fullStream);
|
||||
const reasoningChunks = chunksOfType(chunks, 'reasoning-delta');
|
||||
|
||||
expect(reasoningChunks.length).toBeGreaterThan(0);
|
||||
|
||||
// Verify reasoning chunks have non-empty delta content
|
||||
const deltas = reasoningChunks
|
||||
.filter((c): c is typeof c & { delta: string } => 'delta' in c)
|
||||
.map((c) => c.delta);
|
||||
const fullReasoning = deltas.join('');
|
||||
expect(fullReasoning.length).toBeGreaterThan(0);
|
||||
|
||||
// Should also have text-delta chunks (the actual answer)
|
||||
const textChunks = chunksOfType(chunks, 'text-delta');
|
||||
expect(textChunks.length).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
|
||||
const describeOpenAI = describeIf('openai');
|
||||
|
||||
describeOpenAI('thinking stream (OpenAI)', () => {
|
||||
it('works with reasoning model and .thinking() enabled', async () => {
|
||||
const agent = new Agent('openai-thinking-test')
|
||||
.model('openai', 'o3-mini')
|
||||
.thinking('openai', { reasoningEffort: 'medium' })
|
||||
.instructions('You are a helpful assistant.');
|
||||
|
||||
const { stream: fullStream } = await agent.stream('What is 17 * 23?');
|
||||
|
||||
const chunks = await collectStreamChunks(fullStream);
|
||||
|
||||
// OpenAI reasoning models do internal reasoning but don't expose it
|
||||
// as streamed chunks — verify the agent produces a text response.
|
||||
const textChunks = chunksOfType(chunks, 'text-delta');
|
||||
expect(textChunks.length).toBeGreaterThan(0);
|
||||
|
||||
const text = textChunks
|
||||
.filter((c): c is typeof c & { delta: string } => 'delta' in c)
|
||||
.map((c) => c.delta)
|
||||
.join('');
|
||||
expect(text).toContain('391');
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,154 @@
|
|||
import { expect, it, vi, afterEach, beforeEach } from 'vitest';
|
||||
|
||||
import { describeIf, getModel, collectStreamChunks, createSqliteMemory } from './helpers';
|
||||
import { Agent, Memory } from '../../index';
|
||||
|
||||
const describe = describeIf('anthropic');
|
||||
|
||||
describe('title generation integration', () => {
|
||||
let sqliteCtx: ReturnType<typeof createSqliteMemory>;
|
||||
|
||||
beforeEach(() => {
|
||||
sqliteCtx = createSqliteMemory();
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
sqliteCtx.cleanup();
|
||||
});
|
||||
|
||||
it('auto-generates a thread title after generate() on a new thread', async () => {
|
||||
const memory = new Memory().storage(sqliteCtx.memory).lastMessages(10).titleGeneration(true);
|
||||
|
||||
const agent = new Agent('title-gen-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('You are a helpful assistant. Be concise.')
|
||||
.memory(memory);
|
||||
|
||||
const threadId = `title-test-${Date.now()}`;
|
||||
const resourceId = 'test-user';
|
||||
|
||||
const threadBefore = await sqliteCtx.memory.getThread(threadId);
|
||||
expect(threadBefore).toBeNull();
|
||||
|
||||
await agent.generate('Tell me about the history of Rome', {
|
||||
persistence: { threadId, resourceId },
|
||||
});
|
||||
|
||||
await vi.waitFor(
|
||||
async () => {
|
||||
const thread = await sqliteCtx.memory.getThread(threadId);
|
||||
expect(thread).toBeDefined();
|
||||
expect(thread!.title).toBeTruthy();
|
||||
expect(thread!.title!.length).toBeGreaterThan(0);
|
||||
expect(thread!.title!.length).toBeLessThanOrEqual(80);
|
||||
},
|
||||
{ timeout: 30_000, interval: 500 },
|
||||
);
|
||||
});
|
||||
|
||||
it('auto-generates a thread title after stream() on a new thread', async () => {
|
||||
const memory = new Memory().storage(sqliteCtx.memory).lastMessages(10).titleGeneration(true);
|
||||
|
||||
const agent = new Agent('title-gen-stream-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('You are a helpful assistant. Be concise.')
|
||||
.memory(memory);
|
||||
|
||||
const threadId = `title-stream-test-${Date.now()}`;
|
||||
const resourceId = 'test-user';
|
||||
|
||||
const { stream } = await agent.stream('Explain quantum computing basics', {
|
||||
persistence: { threadId, resourceId },
|
||||
});
|
||||
|
||||
await collectStreamChunks(stream);
|
||||
|
||||
await vi.waitFor(
|
||||
async () => {
|
||||
const thread = await sqliteCtx.memory.getThread(threadId);
|
||||
expect(thread).toBeDefined();
|
||||
expect(thread!.title).toBeTruthy();
|
||||
expect(thread!.title!.length).toBeGreaterThan(0);
|
||||
expect(thread!.title!.length).toBeLessThanOrEqual(80);
|
||||
},
|
||||
{ timeout: 30_000, interval: 500 },
|
||||
);
|
||||
});
|
||||
|
||||
it('does not generate a title when titleGeneration is not configured', async () => {
|
||||
const memory = new Memory().storage(sqliteCtx.memory).lastMessages(10);
|
||||
|
||||
const agent = new Agent('no-title-gen-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('You are a helpful assistant. Be concise.')
|
||||
.memory(memory);
|
||||
|
||||
const threadId = `no-title-test-${Date.now()}`;
|
||||
|
||||
await agent.generate('Hello, how are you?', {
|
||||
persistence: { threadId, resourceId: 'test-user' },
|
||||
});
|
||||
|
||||
await new Promise((r) => setTimeout(r, 3_000));
|
||||
|
||||
const thread = await sqliteCtx.memory.getThread(threadId);
|
||||
expect(thread).toBeDefined();
|
||||
expect(thread!.title).toBeFalsy();
|
||||
});
|
||||
|
||||
it('does not overwrite a pre-existing thread title', async () => {
|
||||
const existingTitle = 'My Pre-Existing Title';
|
||||
|
||||
await sqliteCtx.memory.saveThread({
|
||||
id: 'pre-titled-thread',
|
||||
resourceId: 'test-user',
|
||||
title: existingTitle,
|
||||
metadata: { custom: 'data' },
|
||||
});
|
||||
|
||||
const memory = new Memory().storage(sqliteCtx.memory).lastMessages(10).titleGeneration(true);
|
||||
|
||||
const agent = new Agent('title-no-overwrite-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('You are a helpful assistant. Be concise.')
|
||||
.memory(memory);
|
||||
|
||||
await agent.generate('What is 2+2?', {
|
||||
persistence: { threadId: 'pre-titled-thread', resourceId: 'test-user' },
|
||||
});
|
||||
|
||||
// Allow fire-and-forget title generation to settle
|
||||
await new Promise((r) => setTimeout(r, 5_000));
|
||||
|
||||
const thread = await sqliteCtx.memory.getThread('pre-titled-thread');
|
||||
expect(thread!.title).toBe(existingTitle);
|
||||
expect(thread!.metadata).toEqual({ custom: 'data' });
|
||||
});
|
||||
|
||||
it('accepts a custom model for title generation', async () => {
|
||||
const memory = new Memory().storage(sqliteCtx.memory).lastMessages(10).titleGeneration({
|
||||
model: 'anthropic/claude-haiku-4-5',
|
||||
});
|
||||
|
||||
const agent = new Agent('title-custom-model-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('You are a helpful assistant. Be concise.')
|
||||
.memory(memory);
|
||||
|
||||
const threadId = `title-custom-model-${Date.now()}`;
|
||||
|
||||
await agent.generate('What are the best practices for growing tomatoes?', {
|
||||
persistence: { threadId, resourceId: 'test-user' },
|
||||
});
|
||||
|
||||
await vi.waitFor(
|
||||
async () => {
|
||||
const thread = await sqliteCtx.memory.getThread(threadId);
|
||||
expect(thread).toBeDefined();
|
||||
expect(thread!.title).toBeTruthy();
|
||||
expect(thread!.title!.length).toBeGreaterThan(0);
|
||||
},
|
||||
{ timeout: 30_000, interval: 500 },
|
||||
);
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,211 @@
|
|||
import { expect, it, vi } from 'vitest';
|
||||
import { z } from 'zod';
|
||||
|
||||
import {
|
||||
describeIf,
|
||||
getModel,
|
||||
collectStreamChunks,
|
||||
chunksOfType,
|
||||
findAllToolResults,
|
||||
collectTextDeltas,
|
||||
} from './helpers';
|
||||
import { Agent, filterLlmMessages, Tool } from '../../index';
|
||||
|
||||
const describe = describeIf('anthropic');
|
||||
|
||||
describe('toModelOutput integration', () => {
|
||||
it('sends the transformed output to the LLM while preserving raw output in toolCalls', async () => {
|
||||
const handlerSpy = vi.fn();
|
||||
|
||||
const searchTool = new Tool('search_db')
|
||||
.description('Search the database and return matching records')
|
||||
.input(z.object({ query: z.string().describe('Search query') }))
|
||||
.output(
|
||||
z.object({
|
||||
records: z.array(z.object({ id: z.number(), name: z.string(), data: z.string() })),
|
||||
total: z.number(),
|
||||
}),
|
||||
)
|
||||
.handler(async ({ query }) => {
|
||||
handlerSpy(query);
|
||||
return {
|
||||
records: [
|
||||
{ id: 1, name: 'Widget A', data: 'x'.repeat(200) },
|
||||
{ id: 2, name: 'Widget B', data: 'y'.repeat(200) },
|
||||
{ id: 3, name: 'Gadget C', data: 'z'.repeat(200) },
|
||||
],
|
||||
total: 3,
|
||||
};
|
||||
})
|
||||
.toModelOutput((output) => ({
|
||||
summary: `Found ${output.total} records: ${output.records.map((r) => r.name).join(', ')}`,
|
||||
}));
|
||||
|
||||
const agent = new Agent('to-model-output-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions(
|
||||
'You are a database assistant. Use search_db to find records. Be concise in your response.',
|
||||
)
|
||||
.tool(searchTool);
|
||||
|
||||
const result = await agent.generate('Search for widgets in the database');
|
||||
|
||||
expect(handlerSpy).toHaveBeenCalled();
|
||||
|
||||
// toolCalls on GenerateResult stores the raw output
|
||||
expect(result.toolCalls).toBeDefined();
|
||||
const searchEntry = result.toolCalls!.find((tc) => tc.tool === 'search_db');
|
||||
expect(searchEntry).toBeDefined();
|
||||
const rawOutput = searchEntry!.output as {
|
||||
records: Array<{ id: number; name: string; data: string }>;
|
||||
total: number;
|
||||
};
|
||||
expect(rawOutput.total).toBe(3);
|
||||
expect(rawOutput.records[0].data).toBe('x'.repeat(200));
|
||||
|
||||
// ContentToolResult in messages stores the transformed output (what the LLM saw)
|
||||
const toolResults = findAllToolResults(result.messages);
|
||||
const searchToolResult = toolResults.find((tr) => tr.toolName === 'search_db');
|
||||
expect(searchToolResult).toBeDefined();
|
||||
const modelOutput = searchToolResult!.result as { summary: string };
|
||||
expect(modelOutput.summary).toContain('Found 3 records');
|
||||
expect(modelOutput.summary).toContain('Widget A');
|
||||
});
|
||||
|
||||
it('works with stream() — LLM receives transformed output', async () => {
|
||||
const fetchTool = new Tool('fetch_report')
|
||||
.description('Fetch a detailed report by ID')
|
||||
.input(z.object({ reportId: z.string().describe('Report ID') }))
|
||||
.output(
|
||||
z.object({
|
||||
id: z.string(),
|
||||
title: z.string(),
|
||||
body: z.string(),
|
||||
metadata: z.object({ pages: z.number(), author: z.string() }),
|
||||
}),
|
||||
)
|
||||
.handler(async ({ reportId }) => ({
|
||||
id: reportId,
|
||||
title: 'Q4 Sales Report',
|
||||
body: 'Detailed analysis spanning multiple pages...'.repeat(10),
|
||||
metadata: { pages: 42, author: 'Jane Doe' },
|
||||
}))
|
||||
.toModelOutput((output) => ({
|
||||
id: output.id,
|
||||
title: output.title,
|
||||
pageCount: output.metadata.pages,
|
||||
}));
|
||||
|
||||
const agent = new Agent('to-model-output-stream-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions(
|
||||
'You are a report assistant. Use fetch_report to retrieve reports. Mention the title and page count. Be concise.',
|
||||
)
|
||||
.tool(fetchTool);
|
||||
|
||||
const { stream } = await agent.stream('Get report RPT-001');
|
||||
const chunks = await collectStreamChunks(stream);
|
||||
|
||||
// The tool result messages in the stream contain the transformed output
|
||||
const messageChunks = chunksOfType(chunks, 'message');
|
||||
const toolResults = findAllToolResults(messageChunks.map((c) => c.message));
|
||||
|
||||
const reportResult = toolResults.find((tr) => tr.toolName === 'fetch_report');
|
||||
expect(reportResult).toBeDefined();
|
||||
|
||||
// The model output (transformed) should have the truncated fields
|
||||
const modelOutput = reportResult!.result as { id: string; title: string; pageCount: number };
|
||||
expect(modelOutput.id).toBe('RPT-001');
|
||||
expect(modelOutput.title).toBe('Q4 Sales Report');
|
||||
expect(modelOutput.pageCount).toBe(42);
|
||||
// The body should NOT be in the model output (it was stripped by toModelOutput)
|
||||
expect((modelOutput as Record<string, unknown>).body).toBeUndefined();
|
||||
|
||||
const text = collectTextDeltas(chunks);
|
||||
expect(text).toBeTruthy();
|
||||
expect(text).toMatch(/Q4 Sales Report/i);
|
||||
});
|
||||
|
||||
it('does not affect the LLM output when toModelOutput is not set', async () => {
|
||||
const echoTool = new Tool('echo')
|
||||
.description('Echo back the input message')
|
||||
.input(z.object({ message: z.string().describe('Message to echo') }))
|
||||
.output(z.object({ echoed: z.string() }))
|
||||
.handler(async ({ message }) => ({ echoed: message }));
|
||||
|
||||
const agent = new Agent('no-to-model-output-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('You are a simple echo bot. Use echo tool and repeat the result. Be concise.')
|
||||
.tool(echoTool);
|
||||
|
||||
const result = await agent.generate('Echo the message "hello world"');
|
||||
|
||||
// Without toModelOutput, tool result in messages should have the raw output
|
||||
const toolResults = findAllToolResults(result.messages);
|
||||
const echoResult = toolResults.find((tr) => tr.toolName === 'echo');
|
||||
expect(echoResult).toBeDefined();
|
||||
expect((echoResult!.result as { echoed: string }).echoed).toBe('hello world');
|
||||
|
||||
// And toolCalls should also have the same raw output
|
||||
expect(result.toolCalls).toBeDefined();
|
||||
const echoEntry = result.toolCalls!.find((tc) => tc.tool === 'echo');
|
||||
expect(echoEntry).toBeDefined();
|
||||
expect((echoEntry!.output as { echoed: string }).echoed).toBe('hello world');
|
||||
});
|
||||
|
||||
it('works alongside toMessage — both transforms apply independently', async () => {
|
||||
const calcTool = new Tool('multiply')
|
||||
.description('Multiply two numbers')
|
||||
.input(
|
||||
z.object({
|
||||
a: z.number().describe('First number'),
|
||||
b: z.number().describe('Second number'),
|
||||
}),
|
||||
)
|
||||
.output(z.object({ result: z.number() }))
|
||||
.handler(async ({ a, b }) => ({ result: a * b }))
|
||||
.toModelOutput((output) => ({
|
||||
answer: output.result,
|
||||
note: 'multiplication complete',
|
||||
}))
|
||||
.toMessage((output) => ({
|
||||
type: 'custom',
|
||||
data: {
|
||||
dummy: `Product is ${output.result}`,
|
||||
},
|
||||
}));
|
||||
|
||||
const agent = new Agent('both-transforms-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('You are a calculator. Use multiply to multiply numbers. Be concise.')
|
||||
.tool(calcTool);
|
||||
|
||||
const result = await agent.generate('What is 7 times 8?');
|
||||
|
||||
// Custom message from toMessage should be present (uses raw output)
|
||||
const customMessages = result.messages.filter((m) => m.type === 'custom') as Array<{
|
||||
type: 'custom';
|
||||
data: { dummy: string };
|
||||
}>;
|
||||
expect(customMessages.length).toBeGreaterThan(0);
|
||||
expect(customMessages[0].data.dummy).toBe('Product is 56');
|
||||
|
||||
// toolCalls stores the raw output
|
||||
expect(result.toolCalls).toBeDefined();
|
||||
const multiplyEntry = result.toolCalls!.find((tc) => tc.tool === 'multiply');
|
||||
expect(multiplyEntry).toBeDefined();
|
||||
expect((multiplyEntry!.output as { result: number }).result).toBe(56);
|
||||
|
||||
// Tool result in messages stores the transformed output for the LLM
|
||||
const toolResults = findAllToolResults(result.messages);
|
||||
const multiplyToolResult = toolResults.find((tr) => tr.toolName === 'multiply');
|
||||
expect(multiplyToolResult).toBeDefined();
|
||||
const modelOutput = multiplyToolResult!.result as { answer: number; note: string };
|
||||
expect(modelOutput.answer).toBe(56);
|
||||
expect(modelOutput.note).toBe('multiplication complete');
|
||||
|
||||
// The custom messages should be filtered out for the LLM
|
||||
const llmMessages = filterLlmMessages(result.messages);
|
||||
expect(llmMessages.length).toBeLessThan(result.messages.length);
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,115 @@
|
|||
import { expect, it } from 'vitest';
|
||||
|
||||
import {
|
||||
describeIf,
|
||||
collectStreamChunks,
|
||||
chunksOfType,
|
||||
getModel,
|
||||
createAgentWithAddTool,
|
||||
} from './helpers';
|
||||
import { Agent } from '../../index';
|
||||
import type { StreamChunk } from '../../index';
|
||||
|
||||
const describe = describeIf('anthropic');
|
||||
|
||||
describe('token usage integration', () => {
|
||||
it('reports token usage on a simple text response via streamText', async () => {
|
||||
const agent = new Agent('token-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('Reply with exactly: "Hello". Nothing else.');
|
||||
|
||||
const { stream: fullStream } = await agent.stream('Say hello');
|
||||
|
||||
const chunks = await collectStreamChunks(fullStream);
|
||||
const finishChunks = chunksOfType(chunks, 'finish');
|
||||
expect(finishChunks.length).toBeGreaterThan(0);
|
||||
|
||||
const finish = finishChunks[0] as StreamChunk & { type: 'finish' };
|
||||
expect(finish.usage).toBeDefined();
|
||||
expect(finish.usage!.promptTokens).toBeGreaterThan(0);
|
||||
expect(finish.usage!.completionTokens).toBeGreaterThan(0);
|
||||
expect(finish.usage!.totalTokens).toBe(
|
||||
finish.usage!.promptTokens + finish.usage!.completionTokens,
|
||||
);
|
||||
});
|
||||
|
||||
it('reports token usage on a simple text response via run()', async () => {
|
||||
const agent = new Agent('token-run-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('Reply with exactly: "Hello". Nothing else.');
|
||||
|
||||
const result = await agent.generate('Say hello');
|
||||
expect(result.usage).toBeDefined();
|
||||
expect(result.usage!.promptTokens).toBeGreaterThan(0);
|
||||
expect(result.usage!.completionTokens).toBeGreaterThan(0);
|
||||
expect(result.usage!.totalTokens).toBe(
|
||||
result.usage!.promptTokens + result.usage!.completionTokens,
|
||||
);
|
||||
});
|
||||
|
||||
it('reports token usage after a multi-step tool call', async () => {
|
||||
const agent = createAgentWithAddTool('anthropic');
|
||||
|
||||
const { stream: fullStream } = await agent.stream('What is 7 + 13?');
|
||||
|
||||
const chunks = await collectStreamChunks(fullStream);
|
||||
const finishChunks = chunksOfType(chunks, 'finish');
|
||||
expect(finishChunks.length).toBeGreaterThan(0);
|
||||
const finish = finishChunks[0] as StreamChunk & { type: 'finish' };
|
||||
|
||||
expect(finish.usage).toBeDefined();
|
||||
// Multi-step should use more tokens than a simple response
|
||||
expect(finish.usage!.promptTokens).toBeGreaterThan(0);
|
||||
expect(finish.usage!.completionTokens).toBeGreaterThan(0);
|
||||
expect(finish.usage!.totalTokens).toBe(
|
||||
finish.usage!.promptTokens + finish.usage!.completionTokens,
|
||||
);
|
||||
});
|
||||
|
||||
it('emits finish chunks with token usage in the stream', async () => {
|
||||
const agent = new Agent('finish-chunk-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('Reply with exactly: "OK". Nothing else.');
|
||||
|
||||
const { stream: fullStream } = await agent.stream('Acknowledge');
|
||||
|
||||
const chunks = await collectStreamChunks(fullStream);
|
||||
const finishChunks = chunksOfType(chunks, 'finish');
|
||||
|
||||
expect(finishChunks.length).toBeGreaterThan(0);
|
||||
|
||||
const finish = finishChunks[0] as StreamChunk & { type: 'finish' };
|
||||
expect(finish.finishReason).toBeDefined();
|
||||
|
||||
// Finish chunks should carry usage when available
|
||||
if (finish.usage) {
|
||||
expect(finish.usage.promptTokens).toBeGreaterThanOrEqual(0);
|
||||
expect(finish.usage.completionTokens).toBeGreaterThanOrEqual(0);
|
||||
}
|
||||
});
|
||||
|
||||
it('accumulates higher token counts with more complex prompts', async () => {
|
||||
const agent = new Agent('token-scale-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('You are a helpful assistant. Be concise.');
|
||||
|
||||
// Short prompt
|
||||
const { stream: short } = await agent.stream('Hi');
|
||||
const chunks = await collectStreamChunks(short);
|
||||
const finishChunks = chunksOfType(chunks, 'finish');
|
||||
expect(finishChunks.length).toBeGreaterThan(0);
|
||||
const finishShort = finishChunks[0] as StreamChunk & { type: 'finish' };
|
||||
|
||||
// Longer prompt
|
||||
const { stream: long } = await agent.stream(
|
||||
'Explain the difference between TCP and UDP networking protocols. Include at least three key differences.',
|
||||
);
|
||||
const chunksLong = await collectStreamChunks(long);
|
||||
const finishChunksLong = chunksOfType(chunksLong, 'finish');
|
||||
expect(finishChunksLong.length).toBeGreaterThan(0);
|
||||
const finishLong = finishChunksLong[0] as StreamChunk & { type: 'finish' };
|
||||
|
||||
// Longer prompt should use more completion tokens (longer response)
|
||||
expect(finishLong.usage!.completionTokens).toBeGreaterThan(finishShort.usage!.completionTokens);
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,104 @@
|
|||
import { expect, it } from 'vitest';
|
||||
|
||||
import {
|
||||
describeIf,
|
||||
collectStreamChunks,
|
||||
chunksOfType,
|
||||
collectTextDeltas,
|
||||
findAllToolResults,
|
||||
createAgentWithAlwaysErrorTool,
|
||||
createAgentWithFlakyTool,
|
||||
} from './helpers';
|
||||
import type { StreamChunk } from './helpers';
|
||||
|
||||
const describe = describeIf('anthropic');
|
||||
|
||||
describe('tool error handling integration', () => {
|
||||
it('does not crash when a tool throws — stream completes with a finish chunk', async () => {
|
||||
const agent = createAgentWithAlwaysErrorTool('anthropic');
|
||||
|
||||
const { stream } = await agent.stream('Fetch the data for id "abc123".');
|
||||
const chunks = await collectStreamChunks(stream);
|
||||
|
||||
// Stream must never emit an error chunk
|
||||
const errorChunks = chunks.filter((c) => c.type === 'error');
|
||||
expect(errorChunks).toHaveLength(0);
|
||||
|
||||
// Stream must close with a finish chunk whose reason is not 'error'
|
||||
const finishChunks = chunksOfType(chunks, 'finish');
|
||||
expect(finishChunks.length).toBeGreaterThan(0);
|
||||
const finish = finishChunks[0] as StreamChunk & { type: 'finish' };
|
||||
expect(finish.finishReason).not.toBe('error');
|
||||
});
|
||||
|
||||
it('does not crash when a tool throws — generate returns finishReason stop', async () => {
|
||||
const agent = createAgentWithAlwaysErrorTool('anthropic');
|
||||
|
||||
const result = await agent.generate('Fetch the data for id "abc123".');
|
||||
|
||||
expect(result.error).toBeUndefined();
|
||||
expect(result.finishReason).toBe('stop');
|
||||
});
|
||||
|
||||
it('LLM receives the error message and acknowledges it in the response', async () => {
|
||||
const agent = createAgentWithAlwaysErrorTool('anthropic');
|
||||
|
||||
const { stream } = await agent.stream('Fetch the data for id "abc123".');
|
||||
const chunks = await collectStreamChunks(stream);
|
||||
|
||||
// Verify there IS a text response (LLM acknowledged the error)
|
||||
const text = collectTextDeltas(chunks);
|
||||
expect(text.length).toBeGreaterThan(0);
|
||||
|
||||
// The response should mention the failure (error was visible to LLM)
|
||||
const mentionsFailure = /error|fail|unavailable|timeout|unable|could not/i.test(text);
|
||||
expect(mentionsFailure).toBe(true);
|
||||
});
|
||||
|
||||
it('error tool-result appears in the message list', async () => {
|
||||
const agent = createAgentWithAlwaysErrorTool('anthropic');
|
||||
|
||||
const { stream } = await agent.stream('Fetch the data for id "abc123".');
|
||||
const chunks = await collectStreamChunks(stream);
|
||||
|
||||
// There should be a tool-result message in the stream
|
||||
const messageChunks = chunksOfType(chunks, 'message');
|
||||
const toolResults = findAllToolResults(messageChunks.map((c) => c.message));
|
||||
|
||||
// The tool should have been called and produced a result (even if it errored)
|
||||
expect(toolResults.length).toBeGreaterThan(0);
|
||||
const brokenResult = toolResults.find((r) => r.toolName === 'broken_tool');
|
||||
expect(brokenResult).toBeDefined();
|
||||
});
|
||||
|
||||
it('LLM can self-correct by retrying a flaky tool', async () => {
|
||||
const { agent, callCount } = createAgentWithFlakyTool('anthropic');
|
||||
|
||||
const result = await agent.generate('Fetch the data for id "xyz".');
|
||||
|
||||
// Tool was called more than once — LLM retried after seeing the error
|
||||
expect(callCount()).toBeGreaterThanOrEqual(2);
|
||||
|
||||
// Agent completed successfully
|
||||
expect(result.error).toBeUndefined();
|
||||
expect(result.finishReason).toBe('stop');
|
||||
});
|
||||
|
||||
it('LLM self-correction: stream mode — flaky tool succeeds on retry', async () => {
|
||||
const { agent, callCount } = createAgentWithFlakyTool('anthropic');
|
||||
|
||||
const { stream } = await agent.stream('Fetch the data for id "xyz".');
|
||||
const chunks = await collectStreamChunks(stream);
|
||||
|
||||
// No error chunk in the stream
|
||||
const errorChunks = chunks.filter((c) => c.type === 'error');
|
||||
expect(errorChunks).toHaveLength(0);
|
||||
|
||||
// Tool was retried
|
||||
expect(callCount()).toBeGreaterThanOrEqual(2);
|
||||
|
||||
// Response should mention success or the value
|
||||
const text = collectTextDeltas(chunks);
|
||||
expect(text.length).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,185 @@
|
|||
import { expect, it } from 'vitest';
|
||||
|
||||
import {
|
||||
describeIf,
|
||||
collectStreamChunks,
|
||||
chunksOfType,
|
||||
createAgentWithInterruptibleTool,
|
||||
createAgentWithMixedTools,
|
||||
createAgentWithParallelInterruptibleCalls,
|
||||
} from './helpers';
|
||||
import { isLlmMessage, type StreamChunk } from '../../index';
|
||||
|
||||
const describe = describeIf('anthropic');
|
||||
|
||||
describe('tool interrupt integration', () => {
|
||||
it('pauses the stream when a tool suspends', async () => {
|
||||
const agent = createAgentWithInterruptibleTool('anthropic');
|
||||
|
||||
const { stream: fullStream } = await agent.stream('Delete the file /tmp/test.txt');
|
||||
|
||||
const chunks = await collectStreamChunks(fullStream);
|
||||
const chunkTypes = chunks.map((c) => c.type);
|
||||
|
||||
expect(chunkTypes).toContain('tool-call-suspended');
|
||||
|
||||
const suspendedChunks = chunksOfType(chunks, 'tool-call-suspended');
|
||||
expect(suspendedChunks.length).toBe(1);
|
||||
|
||||
const suspended = suspendedChunks[0] as StreamChunk & { type: 'tool-call-suspended' };
|
||||
expect(suspended.toolName).toBe('delete_file');
|
||||
expect(suspended.runId).toBeTruthy();
|
||||
expect(suspended.toolCallId).toBeTruthy();
|
||||
expect(suspended.suspendPayload).toEqual(
|
||||
// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
|
||||
expect.objectContaining({ message: expect.any(String), severity: 'destructive' }),
|
||||
);
|
||||
|
||||
// No tool-result should appear (tool is suspended)
|
||||
const contentChunks = chunks.filter(
|
||||
(c) =>
|
||||
c.type === 'message' &&
|
||||
'content' in c &&
|
||||
(c.content as { type: string }).type === 'tool-result',
|
||||
);
|
||||
expect(contentChunks).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('resumes the stream after resume with approval', async () => {
|
||||
const agent = createAgentWithInterruptibleTool('anthropic');
|
||||
|
||||
const { stream: fullStream } = await agent.stream('Delete the file /tmp/test.txt');
|
||||
|
||||
const chunks = await collectStreamChunks(fullStream);
|
||||
const suspendedChunks = chunksOfType(chunks, 'tool-call-suspended');
|
||||
expect(suspendedChunks.length).toBe(1);
|
||||
|
||||
const suspended = suspendedChunks[0] as StreamChunk & { type: 'tool-call-suspended' };
|
||||
const resumedStream = await agent.resume(
|
||||
'stream',
|
||||
{ approved: true },
|
||||
{ runId: suspended.runId!, toolCallId: suspended.toolCallId! },
|
||||
);
|
||||
|
||||
const resumedChunks = await collectStreamChunks(resumedStream.stream);
|
||||
const resumedTypes = resumedChunks.map((c) => c.type);
|
||||
|
||||
// After approval, tool-result should appear as content chunk
|
||||
const toolResultChunks = resumedChunks.filter(
|
||||
(c) =>
|
||||
c.type === 'message' &&
|
||||
isLlmMessage(c.message) &&
|
||||
c.message.content.some((c) => c.type === 'tool-result'),
|
||||
);
|
||||
expect(toolResultChunks.length).toBeGreaterThan(0);
|
||||
|
||||
expect(resumedTypes).toContain('text-delta');
|
||||
});
|
||||
|
||||
it('resumes the stream after resume with denial', async () => {
|
||||
const agent = createAgentWithInterruptibleTool('anthropic');
|
||||
|
||||
const { stream: fullStream } = await agent.stream('Delete the file /tmp/test.txt');
|
||||
|
||||
const chunks = await collectStreamChunks(fullStream);
|
||||
const suspendedChunks = chunksOfType(chunks, 'tool-call-suspended');
|
||||
expect(suspendedChunks.length).toBe(1);
|
||||
|
||||
const suspended = suspendedChunks[0] as StreamChunk & { type: 'tool-call-suspended' };
|
||||
const resumedStream = await agent.resume(
|
||||
'stream',
|
||||
{ approved: false },
|
||||
{ runId: suspended.runId!, toolCallId: suspended.toolCallId! },
|
||||
);
|
||||
|
||||
const resumedChunks = await collectStreamChunks(resumedStream.stream);
|
||||
const resumedTypes = resumedChunks.map((c) => c.type);
|
||||
|
||||
expect(resumedTypes).toContain('text-delta');
|
||||
});
|
||||
|
||||
it('resumes each pending tool call one by one when multiple tool calls are suspended', async () => {
|
||||
const agent = createAgentWithParallelInterruptibleCalls('anthropic');
|
||||
|
||||
const { stream: fullStream } = await agent.stream(
|
||||
'Delete these two files: /tmp/a.txt and /tmp/b.txt. You MUST call delete_file for each file in a single turn using parallel tool calls. After deleting all files, tell if you succeeded',
|
||||
);
|
||||
|
||||
const chunks = await collectStreamChunks(fullStream);
|
||||
const suspendedChunks = chunksOfType(chunks, 'tool-call-suspended');
|
||||
|
||||
// The first interruptible tool call suspends, halting the loop.
|
||||
// Only 1 suspended chunk is emitted even though 2 tool calls were made.
|
||||
expect(suspendedChunks.length).toBe(1);
|
||||
|
||||
const suspended1 = suspendedChunks[0] as StreamChunk & { type: 'tool-call-suspended' };
|
||||
expect(suspended1.toolName).toBe('delete_file');
|
||||
|
||||
// Resume the first suspended tool call
|
||||
const stream2 = await agent.resume(
|
||||
'stream',
|
||||
{ approved: true },
|
||||
{ runId: suspended1.runId!, toolCallId: suspended1.toolCallId! },
|
||||
);
|
||||
|
||||
const chunks2 = await collectStreamChunks(stream2.stream);
|
||||
const suspendedChunks2 = chunksOfType(chunks2, 'tool-call-suspended');
|
||||
|
||||
// The second tool call should now be suspended (not an error)
|
||||
expect(suspendedChunks2.length).toBe(1);
|
||||
|
||||
const suspended2 = suspendedChunks2[0] as StreamChunk & { type: 'tool-call-suspended' };
|
||||
expect(suspended2.toolCallId).not.toBe(suspended1.toolCallId);
|
||||
expect(suspended2.toolName).toBe('delete_file');
|
||||
|
||||
// Resume the second suspended tool call
|
||||
const stream3 = await agent.resume(
|
||||
'stream',
|
||||
{ approved: true },
|
||||
{ runId: suspended2.runId!, toolCallId: suspended2.toolCallId! },
|
||||
);
|
||||
|
||||
const chunks3 = await collectStreamChunks(stream3.stream);
|
||||
|
||||
// After all original tool calls are resolved, the agent loop should
|
||||
// continue without crashing (no AI_MissingToolResultsError).
|
||||
// The LLM may respond with text or make additional tool calls.
|
||||
const errorChunks = chunks3.filter((c) => c.type === 'error');
|
||||
expect(errorChunks).toHaveLength(0);
|
||||
|
||||
const finishChunks = chunksOfType(chunks3, 'finish');
|
||||
expect(finishChunks.length).toBeGreaterThan(0);
|
||||
const finish = finishChunks[0] as StreamChunk & { type: 'finish' };
|
||||
expect(finish.finishReason).not.toBe('error');
|
||||
});
|
||||
|
||||
it('auto-executes non-interruptible tools while suspending interruptible ones', async () => {
|
||||
const agent = createAgentWithMixedTools('anthropic');
|
||||
|
||||
const { stream: fullStream } = await agent.stream(
|
||||
'You must call both tools: first call list_files with dir="/home", then call delete_file with path="/home/readme.md". Do not skip either tool.',
|
||||
);
|
||||
|
||||
const chunks = await collectStreamChunks(fullStream);
|
||||
|
||||
// list_files should auto-execute — its result should appear as content
|
||||
const toolResultChunks = chunks.filter(
|
||||
(c) =>
|
||||
c.type === 'message' &&
|
||||
isLlmMessage(c.message) &&
|
||||
c.message.content.some((c) => c.type === 'tool-result'),
|
||||
);
|
||||
expect(toolResultChunks.length).toBeGreaterThan(0);
|
||||
|
||||
// delete_file should be suspended
|
||||
const suspendedChunks = chunksOfType(chunks, 'tool-call-suspended');
|
||||
const deleteSuspended = suspendedChunks.find(
|
||||
(c) => (c as StreamChunk & { type: 'tool-call-suspended' }).toolName === 'delete_file',
|
||||
);
|
||||
|
||||
// If the LLM called delete_file, it should have been suspended
|
||||
if (deleteSuspended) {
|
||||
expect(deleteSuspended).toBeDefined();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,55 @@
|
|||
import { expect, it } from 'vitest';
|
||||
|
||||
import {
|
||||
chunksOfType,
|
||||
collectStreamChunks,
|
||||
createAgentWithToContentTool,
|
||||
describeIf,
|
||||
} from './helpers';
|
||||
import { filterLlmMessages } from '../../index';
|
||||
import type { AgentMessage, StreamChunk } from '../../index';
|
||||
|
||||
const describe = describeIf('anthropic');
|
||||
|
||||
describe('tool-result to message integration', () => {
|
||||
it('adds a custom message to generate result that is visible to user but not to the LLM', async () => {
|
||||
const agent = createAgentWithToContentTool('anthropic');
|
||||
const result = await agent.generate('What is 3 + 4?');
|
||||
|
||||
// The custom message must appear in result.messages
|
||||
const customMessages = result.messages.filter((m) => m.type === 'custom');
|
||||
expect(customMessages.length).toBeGreaterThan(0);
|
||||
|
||||
const toolResultMsg = customMessages.find((m) => m.type === 'custom' && 'dummy' in m.data) as
|
||||
| { type: 'custom'; data: { dummy: string } }
|
||||
| undefined;
|
||||
|
||||
expect(toolResultMsg).toBeDefined();
|
||||
expect(toolResultMsg!.data.dummy).toContain('dummy message. Tool output');
|
||||
|
||||
// filterLlmMessages must strip the custom message — the LLM never sees it.
|
||||
// The filtered count must be less than total because custom messages were removed.
|
||||
const llmMessages = filterLlmMessages(result.messages);
|
||||
expect(llmMessages.length).toBeLessThan(result.messages.length);
|
||||
});
|
||||
|
||||
it('emits toContent result as a content chunk in the stream', async () => {
|
||||
const agent = createAgentWithToContentTool('anthropic');
|
||||
const { stream } = await agent.stream('What is 5 + 6?');
|
||||
|
||||
const chunks = await collectStreamChunks(stream);
|
||||
// Must contain at least one content chunk with the custom text from toContent
|
||||
const messageChunks = chunksOfType(chunks, 'message') as Array<
|
||||
StreamChunk & { type: 'message'; message: AgentMessage }
|
||||
>;
|
||||
|
||||
const toContentChunk = messageChunks.find(
|
||||
(c) => c.message.type === 'custom' && 'dummy' in c.message.data,
|
||||
);
|
||||
|
||||
expect(toContentChunk).toBeDefined();
|
||||
expect(
|
||||
(toContentChunk!.message as { type: 'custom'; data: { dummy: string } }).data.dummy,
|
||||
).toContain('dummy message. Tool output');
|
||||
});
|
||||
});
|
||||
170
packages/@n8n/agents/src/__tests__/integration/usage.test.ts
Normal file
170
packages/@n8n/agents/src/__tests__/integration/usage.test.ts
Normal file
|
|
@ -0,0 +1,170 @@
|
|||
import { expect, it } from 'vitest';
|
||||
|
||||
import { describeIf, collectStreamChunks, chunksOfType, getModel } from './helpers';
|
||||
import { Agent } from '../../index';
|
||||
import type { StreamChunk } from '../../index';
|
||||
|
||||
const describeAnthropic = describeIf('anthropic');
|
||||
|
||||
describeAnthropic('usage and cost (Anthropic)', () => {
|
||||
it('returns token usage on generate result', async () => {
|
||||
const agent = new Agent('usage-test').model(getModel('anthropic')).instructions('Be concise.');
|
||||
|
||||
const result = await agent.generate('Say hello');
|
||||
|
||||
expect(result.usage).toBeDefined();
|
||||
expect(result.usage!.promptTokens).toBeGreaterThan(0);
|
||||
expect(result.usage!.completionTokens).toBeGreaterThan(0);
|
||||
expect(result.usage!.totalTokens).toBe(
|
||||
result.usage!.promptTokens + result.usage!.completionTokens,
|
||||
);
|
||||
});
|
||||
|
||||
it('returns token usage on stream finish chunk', async () => {
|
||||
const agent = new Agent('usage-stream-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('Be concise.');
|
||||
|
||||
const { stream: fullStream } = await agent.stream('Say hello');
|
||||
const chunks = await collectStreamChunks(fullStream);
|
||||
const finishChunks = chunksOfType(chunks, 'finish');
|
||||
|
||||
expect(finishChunks.length).toBeGreaterThan(0);
|
||||
const finish = finishChunks[0] as StreamChunk & { type: 'finish' };
|
||||
expect(finish.usage).toBeDefined();
|
||||
expect(finish.usage!.promptTokens).toBeGreaterThan(0);
|
||||
expect(finish.usage!.completionTokens).toBeGreaterThan(0);
|
||||
expect(finish.usage!.totalTokens).toBe(
|
||||
finish.usage!.promptTokens + finish.usage!.completionTokens,
|
||||
);
|
||||
});
|
||||
|
||||
it('includes estimated cost from models.dev pricing', async () => {
|
||||
const agent = new Agent('cost-test').model(getModel('anthropic')).instructions('Be concise.');
|
||||
|
||||
const result = await agent.generate('Say hello');
|
||||
|
||||
expect(result.usage).toBeDefined();
|
||||
expect(result.usage!.cost).toBeDefined();
|
||||
expect(result.usage!.cost).toBeGreaterThan(0);
|
||||
|
||||
// Sanity check: a simple "say hello" should cost less than $0.01
|
||||
expect(result.usage!.cost!).toBeLessThan(0.01);
|
||||
});
|
||||
|
||||
it('includes model ID in generate result', async () => {
|
||||
const agent = new Agent('model-test').model(getModel('anthropic')).instructions('Be concise.');
|
||||
|
||||
const result = await agent.generate('Say hello');
|
||||
expect(result.model).toBe(getModel('anthropic'));
|
||||
});
|
||||
|
||||
it('includes cost in stream finish chunk', async () => {
|
||||
const agent = new Agent('cost-stream-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('Be concise.');
|
||||
|
||||
const { stream: fullStream } = await agent.stream('Say hello');
|
||||
const chunks = await collectStreamChunks(fullStream);
|
||||
const finishChunks = chunksOfType(chunks, 'finish');
|
||||
|
||||
expect(finishChunks.length).toBeGreaterThan(0);
|
||||
const finish = finishChunks[0] as StreamChunk & { type: 'finish' };
|
||||
expect(finish.usage).toBeDefined();
|
||||
expect(finish.usage!.cost).toBeDefined();
|
||||
expect(finish.usage!.cost).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('aggregates sub-agent usage when using asTool()', async () => {
|
||||
const subAgent = new Agent('translator')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('Translate the input to French. Reply with only the translation.');
|
||||
|
||||
const parentAgent = new Agent('orchestrator')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions(
|
||||
'You are an orchestrator. When asked to translate, use the translator tool. Be concise.',
|
||||
)
|
||||
.tool(subAgent.asTool('Translate text to French'));
|
||||
|
||||
const result = await parentAgent.generate('Translate "hello world" to French');
|
||||
|
||||
// Parent should have its own usage
|
||||
expect(result.usage).toBeDefined();
|
||||
expect(result.usage!.promptTokens).toBeGreaterThan(0);
|
||||
expect(result.usage!.cost).toBeGreaterThan(0);
|
||||
expect(result.model).toBe(getModel('anthropic'));
|
||||
|
||||
// Sub-agent usage should be captured
|
||||
expect(result.subAgentUsage).toBeDefined();
|
||||
expect(result.subAgentUsage!.length).toBeGreaterThan(0);
|
||||
|
||||
const translatorUsage = result.subAgentUsage!.find((s) => s.agent === 'translator');
|
||||
expect(translatorUsage).toBeDefined();
|
||||
expect(translatorUsage!.usage.promptTokens).toBeGreaterThan(0);
|
||||
expect(translatorUsage!.usage.cost).toBeGreaterThan(0);
|
||||
|
||||
// Total cost should be parent + sub-agent
|
||||
expect(result.totalCost).toBeDefined();
|
||||
expect(result.totalCost!).toBeGreaterThan(result.usage!.cost!);
|
||||
expect(result.totalCost!).toBeCloseTo(result.usage!.cost! + translatorUsage!.usage.cost!, 6);
|
||||
});
|
||||
|
||||
it('aggregates sub-agent usage via stream()', async () => {
|
||||
const subAgent = new Agent('stream-translator')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('Translate the input to French. Reply with only the translation.');
|
||||
|
||||
const parentAgent = new Agent('stream-orchestrator')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions(
|
||||
'You are an orchestrator. When asked to translate, use the stream-translator tool. Be concise.',
|
||||
)
|
||||
.tool(subAgent.asTool('Translate text to French'));
|
||||
|
||||
const { stream: fullStream } = await parentAgent.stream('Translate "goodbye" to French');
|
||||
const chunks = await collectStreamChunks(fullStream);
|
||||
const finishChunks = chunksOfType(chunks, 'finish');
|
||||
|
||||
expect(finishChunks.length).toBeGreaterThan(0);
|
||||
const finish = finishChunks[finishChunks.length - 1] as StreamChunk & { type: 'finish' };
|
||||
|
||||
// Should have usage with cost
|
||||
expect(finish.usage).toBeDefined();
|
||||
expect(finish.usage!.cost).toBeGreaterThan(0);
|
||||
|
||||
// Should include model
|
||||
expect(finish.model).toBe(getModel('anthropic'));
|
||||
|
||||
// Should include sub-agent usage
|
||||
expect(finish.subAgentUsage).toBeDefined();
|
||||
expect(finish.subAgentUsage!.length).toBeGreaterThan(0);
|
||||
|
||||
const translatorUsage = finish.subAgentUsage!.find((s) => s.agent === 'stream-translator');
|
||||
expect(translatorUsage).toBeDefined();
|
||||
expect(translatorUsage!.usage.promptTokens).toBeGreaterThan(0);
|
||||
expect(translatorUsage!.usage.cost).toBeGreaterThan(0);
|
||||
|
||||
// Total cost should include parent + sub-agent
|
||||
expect(finish.totalCost).toBeDefined();
|
||||
expect(finish.totalCost!).toBeGreaterThan(finish.usage!.cost!);
|
||||
});
|
||||
});
|
||||
|
||||
const describeOpenAI = describeIf('openai');
|
||||
|
||||
describeOpenAI('usage and cost (OpenAI)', () => {
|
||||
it('returns token usage and cost on generate result', async () => {
|
||||
const agent = new Agent('openai-usage-test')
|
||||
.model(getModel('openai'))
|
||||
.instructions('Be concise.');
|
||||
|
||||
const result = await agent.generate('Say hello');
|
||||
|
||||
expect(result.usage).toBeDefined();
|
||||
expect(result.usage!.promptTokens).toBeGreaterThan(0);
|
||||
expect(result.usage!.completionTokens).toBeGreaterThan(0);
|
||||
expect(result.usage!.cost).toBeDefined();
|
||||
expect(result.usage!.cost).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,240 @@
|
|||
import { afterEach, beforeEach, expect, it } from 'vitest';
|
||||
|
||||
import { Agent } from '../../../sdk/agent';
|
||||
import type { FileEntry } from '../../../workspace/types';
|
||||
import { Workspace } from '../../../workspace/workspace';
|
||||
import { InMemoryFilesystem, FakeProcessManager, FakeSandbox } from '../../workspace/test-utils';
|
||||
import {
|
||||
chunksOfType,
|
||||
collectStreamChunks,
|
||||
collectTextDeltas,
|
||||
describeIf,
|
||||
findAllToolCalls,
|
||||
findAllToolResults,
|
||||
getModel,
|
||||
} from '../helpers';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const describe = describeIf('anthropic');
|
||||
|
||||
describe('workspace agent integration', () => {
|
||||
let memFs: InMemoryFilesystem;
|
||||
let fakeProcessManager: FakeProcessManager;
|
||||
let fakeSandbox: FakeSandbox;
|
||||
let workspace: Workspace;
|
||||
|
||||
beforeEach(async () => {
|
||||
memFs = new InMemoryFilesystem('agent-test-fs');
|
||||
fakeProcessManager = new FakeProcessManager();
|
||||
fakeSandbox = new FakeSandbox('agent-test', fakeProcessManager);
|
||||
workspace = new Workspace({
|
||||
id: 'agent-ws',
|
||||
filesystem: memFs,
|
||||
sandbox: fakeSandbox,
|
||||
});
|
||||
await workspace.init();
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await workspace.destroy();
|
||||
});
|
||||
|
||||
it('agent uses workspace_write_file and workspace_read_file tools', async () => {
|
||||
const agent = new Agent('workspace-file-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions(
|
||||
'You are a file manager. When asked to create a file, use workspace_write_file. ' +
|
||||
'When asked to read a file, use workspace_read_file. Be concise.',
|
||||
)
|
||||
.workspace(workspace);
|
||||
|
||||
const result = await agent.generate(
|
||||
'Write "Hello from n8n!" to /greeting.txt, then read it back and tell me the contents. You MUST call both tools',
|
||||
);
|
||||
|
||||
expect(result.finishReason).toBe('stop');
|
||||
expect(result.error).toBeUndefined();
|
||||
|
||||
const toolCalls = findAllToolCalls(result.messages);
|
||||
const toolResults = findAllToolResults(result.messages);
|
||||
|
||||
const writeCall = toolCalls.find((tc) => tc.toolName === 'workspace_write_file');
|
||||
expect(writeCall).toBeDefined();
|
||||
|
||||
const readCall = toolCalls.find((tc) => tc.toolName === 'workspace_read_file');
|
||||
expect(readCall).toBeDefined();
|
||||
|
||||
const readResult = toolResults.find((tr) => tr.toolName === 'workspace_read_file');
|
||||
expect(readResult).toBeDefined();
|
||||
expect((readResult!.result as { content: string }).content).toContain('Hello from n8n!');
|
||||
|
||||
expect(memFs.getFileContent('/greeting.txt')).toBe('Hello from n8n!');
|
||||
});
|
||||
|
||||
it('agent uses workspace_execute_command tool', async () => {
|
||||
fakeProcessManager.commandHandler = (cmd) => {
|
||||
if (cmd.includes('echo')) {
|
||||
const match = cmd.match(/echo\s+"?([^"]*)"?/);
|
||||
const text = match?.[1] ?? 'unknown';
|
||||
return { stdout: `${text}\n`, stderr: '', exitCode: 0 };
|
||||
}
|
||||
return { stdout: `ran: ${cmd}\n`, stderr: '', exitCode: 0 };
|
||||
};
|
||||
|
||||
const agent = new Agent('workspace-exec-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions(
|
||||
'You are a shell assistant. When asked to run a command, use workspace_execute_command. Be concise.',
|
||||
)
|
||||
.workspace(workspace);
|
||||
|
||||
const result = await agent.generate('Run the command: echo "n8n workspace test"');
|
||||
|
||||
expect(result.finishReason).toBe('stop');
|
||||
expect(result.error).toBeUndefined();
|
||||
|
||||
const toolCalls = findAllToolCalls(result.messages);
|
||||
const execCall = toolCalls.find((tc) => tc.toolName === 'workspace_execute_command');
|
||||
expect(execCall).toBeDefined();
|
||||
|
||||
const toolResults = findAllToolResults(result.messages);
|
||||
const execResult = toolResults.find((tr) => tr.toolName === 'workspace_execute_command');
|
||||
expect(execResult).toBeDefined();
|
||||
expect((execResult!.result as { success: boolean }).success).toBe(true);
|
||||
});
|
||||
|
||||
it('agent uses workspace_mkdir and workspace_list_files together', async () => {
|
||||
await memFs.mkdir('/project', { recursive: true });
|
||||
await memFs.writeFile('/project/index.ts', 'console.log("hello")');
|
||||
await memFs.writeFile('/project/README.md', '# Project');
|
||||
|
||||
const agent = new Agent('workspace-list-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions(
|
||||
'You are a file manager. Use workspace_list_files to list files. Be concise and list the filenames you find.',
|
||||
)
|
||||
.workspace(workspace);
|
||||
|
||||
const result = await agent.generate('List the files in the /project directory.');
|
||||
|
||||
expect(result.finishReason).toBe('stop');
|
||||
expect(result.error).toBeUndefined();
|
||||
|
||||
const toolCalls = findAllToolCalls(result.messages);
|
||||
const listCall = toolCalls.find((tc) => tc.toolName === 'workspace_list_files');
|
||||
expect(listCall).toBeDefined();
|
||||
|
||||
const toolResults = findAllToolResults(result.messages);
|
||||
const listResult = toolResults.find((tr) => tr.toolName === 'workspace_list_files');
|
||||
expect(listResult).toBeDefined();
|
||||
const entries = (listResult!.result as unknown as { entries: FileEntry[] }).entries;
|
||||
const names = entries.map((e) => e.name);
|
||||
expect(names).toContain('index.ts');
|
||||
expect(names).toContain('README.md');
|
||||
});
|
||||
|
||||
it('workspace instructions are appended to agent instructions', () => {
|
||||
new Agent('workspace-instructions-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions('Base instructions.')
|
||||
.workspace(workspace);
|
||||
const tools = workspace.getTools();
|
||||
expect(tools.length).toBe(13);
|
||||
|
||||
const instructions = workspace.getInstructions();
|
||||
expect(instructions).toContain('Fake sandbox');
|
||||
expect(instructions).toContain('In-memory filesystem');
|
||||
});
|
||||
|
||||
it('stream: agent writes a file and streams the response', async () => {
|
||||
const agent = new Agent('workspace-stream-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions(
|
||||
'You are a file manager. When asked to create a file, use workspace_write_file. Be very concise.',
|
||||
)
|
||||
.workspace(workspace);
|
||||
|
||||
const { stream } = await agent.stream(
|
||||
'Create a file at /hello.txt with the content "streaming works"',
|
||||
);
|
||||
const chunks = await collectStreamChunks(stream);
|
||||
|
||||
const errorChunks = chunks.filter((c) => c.type === 'error');
|
||||
expect(errorChunks).toHaveLength(0);
|
||||
|
||||
const finishChunks = chunksOfType(chunks, 'finish');
|
||||
expect(finishChunks.length).toBeGreaterThan(0);
|
||||
const lastFinish = finishChunks[finishChunks.length - 1] as {
|
||||
type: 'finish';
|
||||
finishReason: string;
|
||||
};
|
||||
expect(lastFinish.finishReason).toBe('stop');
|
||||
|
||||
const text = collectTextDeltas(chunks);
|
||||
expect(text.length).toBeGreaterThan(0);
|
||||
|
||||
expect(memFs.getFileContent('/hello.txt')).toBe('streaming works');
|
||||
});
|
||||
|
||||
it('agent uses workspace_file_stat to get file metadata', async () => {
|
||||
await memFs.writeFile('/data.json', '{"key": "value", "count": 42}');
|
||||
|
||||
const agent = new Agent('workspace-stat-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions(
|
||||
'You are a file manager. Use workspace_file_stat to get file info. Report the file size and type. Be concise.',
|
||||
)
|
||||
.workspace(workspace);
|
||||
|
||||
const result = await agent.generate('What is the size and type of /data.json?');
|
||||
|
||||
expect(result.finishReason).toBe('stop');
|
||||
expect(result.error).toBeUndefined();
|
||||
|
||||
const toolCalls = findAllToolCalls(result.messages);
|
||||
const statCall = toolCalls.find((tc) => tc.toolName === 'workspace_file_stat');
|
||||
expect(statCall).toBeDefined();
|
||||
|
||||
const toolResults = findAllToolResults(result.messages);
|
||||
const statResult = toolResults.find((tr) => tr.toolName === 'workspace_file_stat');
|
||||
expect(statResult).toBeDefined();
|
||||
const stat = statResult!.result as { type: string; size: number };
|
||||
expect(stat.type).toBe('file');
|
||||
expect(stat.size).toBe(29);
|
||||
});
|
||||
|
||||
it('agent handles multi-step workflow: mkdir, write, list, read', async () => {
|
||||
const agent = new Agent('workspace-workflow-test')
|
||||
.model(getModel('anthropic'))
|
||||
.instructions(
|
||||
"You are a file manager. Follow the user's instructions step by step using workspace tools. " +
|
||||
'Available: workspace_mkdir, workspace_write_file, workspace_list_files, workspace_read_file. Be concise.',
|
||||
)
|
||||
.workspace(workspace);
|
||||
|
||||
const result = await agent.generate(
|
||||
'1. Create a directory /app\n' +
|
||||
'2. Write "export default {}" to /app/config.ts\n' +
|
||||
'3. List files in /app\n' +
|
||||
'4. Read /app/config.ts and tell me its contents',
|
||||
);
|
||||
|
||||
expect(result.finishReason).toBe('stop');
|
||||
expect(result.error).toBeUndefined();
|
||||
|
||||
const toolResults = findAllToolResults(result.messages);
|
||||
const resultToolNames = toolResults.map((tr) => tr.toolName);
|
||||
|
||||
expect(resultToolNames).toContain('workspace_write_file');
|
||||
expect(resultToolNames).toContain('workspace_read_file');
|
||||
|
||||
const readResult = toolResults.find((tr) => tr.toolName === 'workspace_read_file');
|
||||
expect(readResult).toBeDefined();
|
||||
expect((readResult!.result as { content: string }).content).toContain('export default {}');
|
||||
|
||||
expect(memFs.getFileContent('/app/config.ts')).toBe('export default {}');
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,59 @@
|
|||
import { expect, it } from 'vitest';
|
||||
import { z } from 'zod';
|
||||
|
||||
import { describeIf, findLastTextContent } from './helpers';
|
||||
import { Agent, Tool, filterLlmMessages } from '../../index';
|
||||
|
||||
const describe = describeIf('anthropic');
|
||||
|
||||
describe('Zod validation errors surface to LLM and allow self-correction', () => {
|
||||
/**
|
||||
* Verify that when the LLM receives a Zod error result, it shows up in the
|
||||
* conversation as an actual tool-result message with an error payload (not a
|
||||
* thrown exception), so the agent loop continues rather than aborting.
|
||||
*/
|
||||
it('includes the Zod error text in the tool-result visible to the LLM', async () => {
|
||||
const strictTool = new Tool('find_user')
|
||||
.description('Find a user by their numeric age (18–99 only).')
|
||||
.input(
|
||||
z.object({
|
||||
age: z
|
||||
.number()
|
||||
.int()
|
||||
.min(18, 'age must be at least 18')
|
||||
.max(99, 'age must be at most 99')
|
||||
.describe('User age (18–99)'),
|
||||
}),
|
||||
)
|
||||
.output(z.object({ user: z.string() }))
|
||||
.handler(async ({ age }) => ({ user: `User aged ${age}` }));
|
||||
|
||||
const agent = new Agent('age-correction-agent')
|
||||
.model('anthropic/claude-haiku-4-5')
|
||||
.instructions(
|
||||
'You are a user directory. Use find_user to look up users by age. ' +
|
||||
'The age must be between 18 and 99. ' +
|
||||
'If validation fails, correct the age and retry. Be very concise.',
|
||||
)
|
||||
.tool(strictTool);
|
||||
|
||||
// "150" is out of range — should trigger a Zod error, then retry with a valid age
|
||||
const result = await agent.generate(
|
||||
'Find a user aged 150. If that age is invalid, use 25 instead and retry. You MUST find a user aged 150, and only then use 25',
|
||||
);
|
||||
|
||||
expect(result.finishReason).toBe('stop');
|
||||
expect(result.error).toBeUndefined();
|
||||
|
||||
// At least two tool-result messages: one error, one success
|
||||
const allMessages = filterLlmMessages(result.messages);
|
||||
const toolResultMessages = allMessages.filter((m) =>
|
||||
m.content.some((c) => c.type === 'tool-result'),
|
||||
);
|
||||
expect(toolResultMessages.length).toBeGreaterThanOrEqual(2);
|
||||
|
||||
// The final response should mention a user (age 25 or similar)
|
||||
const text = findLastTextContent(result.messages);
|
||||
expect(text).toBeTruthy();
|
||||
});
|
||||
});
|
||||
66
packages/@n8n/agents/src/__tests__/model-factory.test.ts
Normal file
66
packages/@n8n/agents/src/__tests__/model-factory.test.ts
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
import type { LanguageModel } from 'ai';
|
||||
|
||||
import { createModel } from '../runtime/model-factory';
|
||||
|
||||
jest.mock('@ai-sdk/anthropic', () => ({
|
||||
createAnthropic: (opts?: { apiKey?: string; baseURL?: string }) => (model: string) => ({
|
||||
provider: 'anthropic',
|
||||
modelId: model,
|
||||
apiKey: opts?.apiKey,
|
||||
baseURL: opts?.baseURL,
|
||||
specificationVersion: 'v3',
|
||||
}),
|
||||
}));
|
||||
|
||||
jest.mock('@ai-sdk/openai', () => ({
|
||||
createOpenAI: (opts?: { apiKey?: string; baseURL?: string }) => (model: string) => ({
|
||||
provider: 'openai',
|
||||
modelId: model,
|
||||
apiKey: opts?.apiKey,
|
||||
baseURL: opts?.baseURL,
|
||||
specificationVersion: 'v3',
|
||||
}),
|
||||
}));
|
||||
|
||||
describe('createModel', () => {
|
||||
it('should accept a string config', () => {
|
||||
const model = createModel('anthropic/claude-sonnet-4-5') as unknown as Record<string, unknown>;
|
||||
expect(model.provider).toBe('anthropic');
|
||||
expect(model.modelId).toBe('claude-sonnet-4-5');
|
||||
});
|
||||
|
||||
it('should accept an object config with url', () => {
|
||||
const model = createModel({
|
||||
id: 'openai/gpt-4o',
|
||||
apiKey: 'sk-test',
|
||||
url: 'https://custom.endpoint.com/v1',
|
||||
}) as unknown as Record<string, unknown>;
|
||||
expect(model.provider).toBe('openai');
|
||||
expect(model.modelId).toBe('gpt-4o');
|
||||
expect(model.apiKey).toBe('sk-test');
|
||||
expect(model.baseURL).toBe('https://custom.endpoint.com/v1');
|
||||
});
|
||||
|
||||
it('should pass through a prebuilt LanguageModel', () => {
|
||||
const prebuilt = {
|
||||
doGenerate: jest.fn(),
|
||||
doStream: jest.fn(),
|
||||
specificationVersion: 'v2' as const,
|
||||
modelId: 'custom-model',
|
||||
provider: 'custom',
|
||||
defaultObjectGenerationMode: undefined,
|
||||
} as unknown as LanguageModel;
|
||||
|
||||
const result = createModel(prebuilt);
|
||||
expect(result).toBe(prebuilt);
|
||||
});
|
||||
|
||||
it('should handle model IDs with multiple slashes', () => {
|
||||
const model = createModel('openai/ft:gpt-4o:my-org:custom:abc123') as unknown as Record<
|
||||
string,
|
||||
unknown
|
||||
>;
|
||||
expect(model.provider).toBe('openai');
|
||||
expect(model.modelId).toBe('ft:gpt-4o:my-org:custom:abc123');
|
||||
});
|
||||
});
|
||||
553
packages/@n8n/agents/src/__tests__/sqlite-memory.test.ts
Normal file
553
packages/@n8n/agents/src/__tests__/sqlite-memory.test.ts
Normal file
|
|
@ -0,0 +1,553 @@
|
|||
import * as fs from 'fs';
|
||||
import * as os from 'os';
|
||||
import * as path from 'path';
|
||||
|
||||
import { SqliteMemory } from '../storage/sqlite-memory';
|
||||
import type { AgentMessage, Message } from '../types/sdk/message';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function makeTempDb(): string {
|
||||
return path.join(os.tmpdir(), `test-${Date.now()}-${Math.random().toString(36).slice(2)}.db`);
|
||||
}
|
||||
|
||||
function makeMsg(role: 'user' | 'assistant', text: string): Message {
|
||||
return { role, content: [{ type: 'text', text }] };
|
||||
}
|
||||
|
||||
function textOf(msg: AgentMessage): string {
|
||||
const m = msg as Message;
|
||||
return (m.content[0] as { text: string }).text;
|
||||
}
|
||||
|
||||
function makeMemory(dbPath: string, namespace?: string): SqliteMemory {
|
||||
return new SqliteMemory({ url: `file:${dbPath}`, namespace });
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Thread management
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('SqliteMemory — threads', () => {
|
||||
let dbPath: string;
|
||||
|
||||
beforeEach(() => {
|
||||
dbPath = makeTempDb();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
try {
|
||||
fs.unlinkSync(dbPath);
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
});
|
||||
|
||||
it('saves and retrieves a thread', async () => {
|
||||
const mem = makeMemory(dbPath);
|
||||
const saved = await mem.saveThread({
|
||||
id: 't-1',
|
||||
resourceId: 'user-1',
|
||||
title: 'Hello',
|
||||
metadata: { foo: 'bar' },
|
||||
});
|
||||
|
||||
expect(saved.id).toBe('t-1');
|
||||
expect(saved.resourceId).toBe('user-1');
|
||||
expect(saved.title).toBe('Hello');
|
||||
expect(saved.metadata).toEqual({ foo: 'bar' });
|
||||
expect(saved.createdAt).toBeInstanceOf(Date);
|
||||
expect(saved.updatedAt).toBeInstanceOf(Date);
|
||||
|
||||
const fetched = await mem.getThread('t-1');
|
||||
expect(fetched).not.toBeNull();
|
||||
expect(fetched!.id).toBe('t-1');
|
||||
expect(fetched!.title).toBe('Hello');
|
||||
expect(fetched!.metadata).toEqual({ foo: 'bar' });
|
||||
});
|
||||
|
||||
it('returns null for an unknown thread', async () => {
|
||||
const mem = makeMemory(dbPath);
|
||||
const result = await mem.getThread('nonexistent');
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
it('deletes a thread and its messages', async () => {
|
||||
const mem = makeMemory(dbPath);
|
||||
await mem.saveThread({ id: 't-del', resourceId: 'user-1' });
|
||||
await mem.saveMessages({ threadId: 't-del', messages: [makeMsg('user', 'hi')] });
|
||||
|
||||
await mem.deleteThread('t-del');
|
||||
|
||||
expect(await mem.getThread('t-del')).toBeNull();
|
||||
expect(await mem.getMessages('t-del')).toEqual([]);
|
||||
});
|
||||
|
||||
it('preserves createdAt on re-save, updates updatedAt', async () => {
|
||||
const mem = makeMemory(dbPath);
|
||||
const first = await mem.saveThread({ id: 't-resave', resourceId: 'user-1', title: 'v1' });
|
||||
|
||||
// Small delay to ensure updatedAt differs
|
||||
await new Promise((r) => setTimeout(r, 20));
|
||||
|
||||
const second = await mem.saveThread({ id: 't-resave', resourceId: 'user-1', title: 'v2' });
|
||||
|
||||
expect(second.createdAt.getTime()).toBe(first.createdAt.getTime());
|
||||
expect(second.updatedAt.getTime()).toBeGreaterThanOrEqual(first.updatedAt.getTime());
|
||||
expect(second.title).toBe('v2');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Message persistence
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('SqliteMemory — messages', () => {
|
||||
let dbPath: string;
|
||||
|
||||
beforeEach(() => {
|
||||
dbPath = makeTempDb();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
try {
|
||||
fs.unlinkSync(dbPath);
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
});
|
||||
|
||||
it('saves and retrieves messages in order', async () => {
|
||||
const mem = makeMemory(dbPath);
|
||||
await mem.saveMessages({
|
||||
threadId: 't-1',
|
||||
messages: [
|
||||
makeMsg('user', 'first'),
|
||||
makeMsg('assistant', 'second'),
|
||||
makeMsg('user', 'third'),
|
||||
],
|
||||
});
|
||||
|
||||
const msgs = await mem.getMessages('t-1');
|
||||
expect(msgs).toHaveLength(3);
|
||||
expect(textOf(msgs[0])).toBe('first');
|
||||
expect(textOf(msgs[1])).toBe('second');
|
||||
expect(textOf(msgs[2])).toBe('third');
|
||||
});
|
||||
|
||||
it('respects limit — returns last N messages', async () => {
|
||||
const mem = makeMemory(dbPath);
|
||||
// Save messages one at a time to guarantee distinct createdAt timestamps
|
||||
await mem.saveMessages({ threadId: 't-1', messages: [makeMsg('user', 'msg-1')] });
|
||||
await mem.saveMessages({ threadId: 't-1', messages: [makeMsg('assistant', 'msg-2')] });
|
||||
await mem.saveMessages({ threadId: 't-1', messages: [makeMsg('user', 'msg-3')] });
|
||||
await mem.saveMessages({ threadId: 't-1', messages: [makeMsg('assistant', 'msg-4')] });
|
||||
|
||||
const msgs = await mem.getMessages('t-1', { limit: 2 });
|
||||
expect(msgs).toHaveLength(2);
|
||||
expect(textOf(msgs[0])).toBe('msg-3');
|
||||
expect(textOf(msgs[1])).toBe('msg-4');
|
||||
});
|
||||
|
||||
it('isolates messages by thread', async () => {
|
||||
const mem = makeMemory(dbPath);
|
||||
await mem.saveMessages({ threadId: 't-a', messages: [makeMsg('user', 'thread-a')] });
|
||||
await mem.saveMessages({ threadId: 't-b', messages: [makeMsg('user', 'thread-b')] });
|
||||
|
||||
const msgsA = await mem.getMessages('t-a');
|
||||
expect(msgsA).toHaveLength(1);
|
||||
expect(textOf(msgsA[0])).toBe('thread-a');
|
||||
|
||||
const msgsB = await mem.getMessages('t-b');
|
||||
expect(msgsB).toHaveLength(1);
|
||||
expect(textOf(msgsB[0])).toBe('thread-b');
|
||||
});
|
||||
|
||||
it('assigns stable IDs — preserves existing, generates for missing', async () => {
|
||||
const mem = makeMemory(dbPath);
|
||||
const withId = { ...makeMsg('user', 'has-id'), id: 'custom-id-123' } as unknown as AgentMessage;
|
||||
const withoutId = makeMsg('assistant', 'no-id');
|
||||
|
||||
await mem.saveMessages({ threadId: 't-1', messages: [withId, withoutId] });
|
||||
|
||||
const msgs = await mem.getMessages('t-1');
|
||||
expect(msgs).toHaveLength(2);
|
||||
|
||||
// The message with a pre-existing id should keep it
|
||||
const first = msgs[0] as unknown as { id: string };
|
||||
expect(first.id).toBe('custom-id-123');
|
||||
|
||||
// The message without id should have gotten one assigned
|
||||
const second = msgs[1] as unknown as { id: string };
|
||||
expect(typeof second.id).toBe('string');
|
||||
expect(second.id.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it('deletes specific messages', async () => {
|
||||
const mem = makeMemory(dbPath);
|
||||
const m1 = { ...makeMsg('user', 'keep'), id: 'keep-1' } as unknown as AgentMessage;
|
||||
const m2 = { ...makeMsg('user', 'delete-me'), id: 'del-1' } as unknown as AgentMessage;
|
||||
await mem.saveMessages({ threadId: 't-1', messages: [m1, m2] });
|
||||
|
||||
await mem.deleteMessages(['del-1']);
|
||||
|
||||
const msgs = await mem.getMessages('t-1');
|
||||
expect(msgs).toHaveLength(1);
|
||||
expect((msgs[0] as unknown as { id: string }).id).toBe('keep-1');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Working memory
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('SqliteMemory — working memory', () => {
|
||||
let dbPath: string;
|
||||
|
||||
beforeEach(() => {
|
||||
dbPath = makeTempDb();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
try {
|
||||
fs.unlinkSync(dbPath);
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
});
|
||||
|
||||
it('returns null for an unknown key', async () => {
|
||||
const mem = makeMemory(dbPath);
|
||||
const result = await mem.getWorkingMemory({ threadId: 'thread-x', resourceId: 'unknown' });
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
it('saves and retrieves working memory keyed by resourceId', async () => {
|
||||
const mem = makeMemory(dbPath);
|
||||
await mem.saveWorkingMemory({ threadId: 'thread-1', resourceId: 'res-1' }, 'some context');
|
||||
const result = await mem.getWorkingMemory({ threadId: 'thread-1', resourceId: 'res-1' });
|
||||
expect(result).toBe('some context');
|
||||
});
|
||||
|
||||
it('overwrites working memory on re-save', async () => {
|
||||
const mem = makeMemory(dbPath);
|
||||
await mem.saveWorkingMemory({ threadId: 'thread-1', resourceId: 'res-1' }, 'v1');
|
||||
await mem.saveWorkingMemory({ threadId: 'thread-1', resourceId: 'res-1' }, 'v2');
|
||||
const result = await mem.getWorkingMemory({ threadId: 'thread-1', resourceId: 'res-1' });
|
||||
expect(result).toBe('v2');
|
||||
});
|
||||
|
||||
it('isolates working memory by resourceId', async () => {
|
||||
const mem = makeMemory(dbPath);
|
||||
await mem.saveWorkingMemory({ threadId: 'thread-a', resourceId: 'res-a' }, 'content-a');
|
||||
await mem.saveWorkingMemory({ threadId: 'thread-b', resourceId: 'res-b' }, 'content-b');
|
||||
|
||||
expect(await mem.getWorkingMemory({ threadId: 'thread-a', resourceId: 'res-a' })).toBe(
|
||||
'content-a',
|
||||
);
|
||||
expect(await mem.getWorkingMemory({ threadId: 'thread-b', resourceId: 'res-b' })).toBe(
|
||||
'content-b',
|
||||
);
|
||||
});
|
||||
|
||||
it('saves and retrieves working memory keyed by threadId (no resourceId)', async () => {
|
||||
const mem = makeMemory(dbPath);
|
||||
await mem.saveWorkingMemory({ threadId: 'thread-1' }, 'thread context');
|
||||
const result = await mem.getWorkingMemory({ threadId: 'thread-1' });
|
||||
expect(result).toBe('thread context');
|
||||
});
|
||||
|
||||
it('isolates working memory by threadId', async () => {
|
||||
const mem = makeMemory(dbPath);
|
||||
await mem.saveWorkingMemory({ threadId: 'thread-1' }, 'data 1');
|
||||
await mem.saveWorkingMemory({ threadId: 'thread-2' }, 'data 2');
|
||||
|
||||
expect(await mem.getWorkingMemory({ threadId: 'thread-1' })).toBe('data 1');
|
||||
expect(await mem.getWorkingMemory({ threadId: 'thread-2' })).toBe('data 2');
|
||||
});
|
||||
|
||||
it('stores scope=resource when resourceId is provided', async () => {
|
||||
const mem = makeMemory(dbPath);
|
||||
await mem.saveWorkingMemory({ threadId: 'thread-1', resourceId: 'res-1' }, 'resource content');
|
||||
|
||||
const { createClient } = await import('@libsql/client');
|
||||
const db = createClient({ url: `file:${dbPath}` });
|
||||
const result = await db.execute('SELECT scope FROM working_memory WHERE key = ?', ['res-1']);
|
||||
expect(result.rows[0].scope).toBe('resource');
|
||||
});
|
||||
|
||||
it('stores scope=thread when only threadId is provided', async () => {
|
||||
const mem = makeMemory(dbPath);
|
||||
await mem.saveWorkingMemory({ threadId: 'thread-1' }, 'thread content');
|
||||
|
||||
const { createClient } = await import('@libsql/client');
|
||||
const db = createClient({ url: `file:${dbPath}` });
|
||||
const result = await db.execute('SELECT scope FROM working_memory WHERE key = ?', ['thread-1']);
|
||||
expect(result.rows[0].scope).toBe('thread');
|
||||
});
|
||||
|
||||
it('does not mix resource-scoped and thread-scoped entries with the same key value', async () => {
|
||||
const mem = makeMemory(dbPath);
|
||||
const sharedKey = 'same-id';
|
||||
await mem.saveWorkingMemory({ threadId: 'thread-1', resourceId: sharedKey }, 'resource data');
|
||||
await mem.saveWorkingMemory({ threadId: sharedKey }, 'thread data');
|
||||
|
||||
expect(await mem.getWorkingMemory({ threadId: 'thread-1', resourceId: sharedKey })).toBe(
|
||||
'resource data',
|
||||
);
|
||||
expect(await mem.getWorkingMemory({ threadId: sharedKey })).toBe('thread data');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Restart survival
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('SqliteMemory — restart survival', () => {
|
||||
let dbPath: string;
|
||||
|
||||
beforeEach(() => {
|
||||
dbPath = makeTempDb();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
try {
|
||||
fs.unlinkSync(dbPath);
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
});
|
||||
|
||||
it('data survives a new SqliteMemory instance on same file', async () => {
|
||||
const mem1 = makeMemory(dbPath);
|
||||
await mem1.saveThread({ id: 't-surv', resourceId: 'user-1', title: 'persistent' });
|
||||
await mem1.saveMessages({ threadId: 't-surv', messages: [makeMsg('user', 'hello from past')] });
|
||||
await mem1.saveWorkingMemory({ threadId: 't-surv', resourceId: 'user-1' }, 'wm-data');
|
||||
|
||||
// Create a brand new instance pointing at the same file
|
||||
const mem2 = makeMemory(dbPath);
|
||||
|
||||
const thread = await mem2.getThread('t-surv');
|
||||
expect(thread).not.toBeNull();
|
||||
expect(thread!.title).toBe('persistent');
|
||||
|
||||
const msgs = await mem2.getMessages('t-surv');
|
||||
expect(msgs).toHaveLength(1);
|
||||
expect(textOf(msgs[0])).toBe('hello from past');
|
||||
|
||||
const wm = await mem2.getWorkingMemory({ threadId: 't-surv', resourceId: 'user-1' });
|
||||
expect(wm).toBe('wm-data');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Search
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('SqliteMemory — queryEmbeddings', () => {
|
||||
let dbPath: string;
|
||||
let mem: SqliteMemory;
|
||||
|
||||
beforeEach(() => {
|
||||
dbPath = makeTempDb();
|
||||
mem = makeMemory(dbPath);
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
try {
|
||||
fs.unlinkSync(dbPath);
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
});
|
||||
|
||||
it('returns empty array when no embeddings stored', async () => {
|
||||
const results = await mem.queryEmbeddings({
|
||||
threadId: 't1',
|
||||
vector: new Array<number>(3).fill(0),
|
||||
topK: 5,
|
||||
});
|
||||
expect(results).toEqual([]);
|
||||
});
|
||||
|
||||
it('returns nearest neighbours by cosine similarity', async () => {
|
||||
await mem.saveThread({ id: 't1', resourceId: 'u1' });
|
||||
|
||||
await mem.saveEmbeddings({
|
||||
threadId: 't1',
|
||||
resourceId: 'u1',
|
||||
entries: [
|
||||
{ id: 'msg-cats', vector: [1.0, 0.0, 0.0], text: 'About cats', model: 'test' },
|
||||
{ id: 'msg-dogs', vector: [0.0, 1.0, 0.0], text: 'About dogs', model: 'test' },
|
||||
{ id: 'msg-kittens', vector: [0.9, 0.1, 0.0], text: 'About kittens', model: 'test' },
|
||||
],
|
||||
});
|
||||
|
||||
// Query close to [1,0,0] — should return cats first, then kittens
|
||||
const results = await mem.queryEmbeddings({
|
||||
scope: 'resource',
|
||||
resourceId: 'u1',
|
||||
vector: [1.0, 0.0, 0.0],
|
||||
topK: 2,
|
||||
});
|
||||
|
||||
expect(results).toHaveLength(2);
|
||||
expect(results[0].id).toBe('msg-cats');
|
||||
expect(results[0].score).toBeGreaterThan(0.9);
|
||||
expect(results[1].id).toBe('msg-kittens');
|
||||
});
|
||||
|
||||
it('filters by threadId with scope=thread', async () => {
|
||||
await mem.saveThread({ id: 't1', resourceId: 'u1' });
|
||||
await mem.saveThread({ id: 't2', resourceId: 'u1' });
|
||||
|
||||
await mem.saveEmbeddings({
|
||||
threadId: 't1',
|
||||
entries: [{ id: 'msg-t1', vector: [1.0, 0.0, 0.0], text: 'Thread 1', model: 'test' }],
|
||||
});
|
||||
await mem.saveEmbeddings({
|
||||
threadId: 't2',
|
||||
entries: [{ id: 'msg-t2', vector: [1.0, 0.0, 0.0], text: 'Thread 2', model: 'test' }],
|
||||
});
|
||||
|
||||
const results = await mem.queryEmbeddings({
|
||||
scope: 'thread',
|
||||
threadId: 't1',
|
||||
vector: [1.0, 0.0, 0.0],
|
||||
topK: 10,
|
||||
});
|
||||
|
||||
expect(results).toHaveLength(1);
|
||||
expect(results[0].id).toBe('msg-t1');
|
||||
});
|
||||
|
||||
it('filters by resourceId with scope=resource', async () => {
|
||||
await mem.saveThread({ id: 't1', resourceId: 'user-a' });
|
||||
await mem.saveThread({ id: 't2', resourceId: 'user-a' });
|
||||
await mem.saveThread({ id: 't3', resourceId: 'user-b' });
|
||||
|
||||
await mem.saveEmbeddings({
|
||||
threadId: 't1',
|
||||
resourceId: 'user-a',
|
||||
entries: [{ id: 'msg-1', vector: [1.0, 0.0, 0.0], text: 'User A thread 1', model: 'test' }],
|
||||
});
|
||||
await mem.saveEmbeddings({
|
||||
threadId: 't2',
|
||||
resourceId: 'user-a',
|
||||
entries: [{ id: 'msg-2', vector: [0.9, 0.1, 0.0], text: 'User A thread 2', model: 'test' }],
|
||||
});
|
||||
await mem.saveEmbeddings({
|
||||
threadId: 't3',
|
||||
resourceId: 'user-b',
|
||||
entries: [{ id: 'msg-3', vector: [1.0, 0.0, 0.0], text: 'User B thread 3', model: 'test' }],
|
||||
});
|
||||
|
||||
const results = await mem.queryEmbeddings({
|
||||
scope: 'resource',
|
||||
resourceId: 'user-a',
|
||||
vector: [1.0, 0.0, 0.0],
|
||||
topK: 10,
|
||||
});
|
||||
|
||||
expect(results).toHaveLength(2);
|
||||
const ids = results.map((r) => r.id);
|
||||
expect(ids).toContain('msg-1');
|
||||
expect(ids).toContain('msg-2');
|
||||
expect(ids).not.toContain('msg-3');
|
||||
});
|
||||
|
||||
it('defaults to resource scope — returns all embeddings for a resourceId across threads', async () => {
|
||||
await mem.saveThread({ id: 't1', resourceId: 'user-x' });
|
||||
await mem.saveThread({ id: 't2', resourceId: 'user-x' });
|
||||
|
||||
await mem.saveEmbeddings({
|
||||
threadId: 't1',
|
||||
resourceId: 'user-x',
|
||||
entries: [{ id: 'msg-a', vector: [1.0, 0.0, 0.0], text: 'Thread 1 msg', model: 'test' }],
|
||||
});
|
||||
await mem.saveEmbeddings({
|
||||
threadId: 't2',
|
||||
resourceId: 'user-x',
|
||||
entries: [{ id: 'msg-b', vector: [0.9, 0.1, 0.0], text: 'Thread 2 msg', model: 'test' }],
|
||||
});
|
||||
|
||||
// No explicit scope — should default to 'resource'
|
||||
const results = await mem.queryEmbeddings({
|
||||
resourceId: 'user-x',
|
||||
vector: [1.0, 0.0, 0.0],
|
||||
topK: 10,
|
||||
});
|
||||
|
||||
expect(results).toHaveLength(2);
|
||||
});
|
||||
|
||||
it('resource scope excludes embeddings from other resources', async () => {
|
||||
await mem.saveEmbeddings({
|
||||
threadId: 't1',
|
||||
resourceId: 'res-1',
|
||||
entries: [{ id: 'msg-r1', vector: [1.0, 0.0, 0.0], text: 'Resource 1', model: 'test' }],
|
||||
});
|
||||
await mem.saveEmbeddings({
|
||||
threadId: 't2',
|
||||
resourceId: 'res-2',
|
||||
entries: [{ id: 'msg-r2', vector: [1.0, 0.0, 0.0], text: 'Resource 2', model: 'test' }],
|
||||
});
|
||||
|
||||
const results = await mem.queryEmbeddings({
|
||||
scope: 'resource',
|
||||
resourceId: 'res-1',
|
||||
vector: [1.0, 0.0, 0.0],
|
||||
topK: 10,
|
||||
});
|
||||
|
||||
expect(results).toHaveLength(1);
|
||||
expect(results[0].id).toBe('msg-r1');
|
||||
});
|
||||
|
||||
it('thread scope only returns embeddings from the specified thread', async () => {
|
||||
await mem.saveEmbeddings({
|
||||
threadId: 't1',
|
||||
resourceId: 'user-1',
|
||||
entries: [
|
||||
{ id: 'msg-t1a', vector: [1.0, 0.0, 0.0], text: 'Thread 1 A', model: 'test' },
|
||||
{ id: 'msg-t1b', vector: [0.0, 1.0, 0.0], text: 'Thread 1 B', model: 'test' },
|
||||
],
|
||||
});
|
||||
await mem.saveEmbeddings({
|
||||
threadId: 't2',
|
||||
resourceId: 'user-1',
|
||||
entries: [{ id: 'msg-t2', vector: [1.0, 0.0, 0.0], text: 'Thread 2', model: 'test' }],
|
||||
});
|
||||
|
||||
const results = await mem.queryEmbeddings({
|
||||
scope: 'thread',
|
||||
threadId: 't1',
|
||||
vector: [1.0, 0.0, 0.0],
|
||||
topK: 10,
|
||||
});
|
||||
|
||||
expect(results).toHaveLength(2);
|
||||
const ids = results.map((r) => r.id);
|
||||
expect(ids).toContain('msg-t1a');
|
||||
expect(ids).toContain('msg-t1b');
|
||||
expect(ids).not.toContain('msg-t2');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Namespace validation
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('SqliteMemory — namespace', () => {
|
||||
it('rejects invalid namespace characters', () => {
|
||||
expect(() => new SqliteMemory({ url: 'file::memory:', namespace: 'bad-ns!' })).toThrow(
|
||||
/Invalid namespace/,
|
||||
);
|
||||
});
|
||||
|
||||
it('accepts valid namespace', () => {
|
||||
expect(() => new SqliteMemory({ url: 'file::memory:', namespace: 'my_ns_01' })).not.toThrow();
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,157 @@
|
|||
import { stripOrphanedToolMessages } from '../runtime/strip-orphaned-tool-messages';
|
||||
import { isLlmMessage, toDbMessage } from '../sdk/message';
|
||||
import type { AgentDbMessage, AgentMessage, Message } from '../types/sdk/message';
|
||||
|
||||
function seed(messages: AgentMessage[]): AgentDbMessage[] {
|
||||
return messages.map(toDbMessage);
|
||||
}
|
||||
|
||||
describe('stripOrphanedToolMessages', () => {
|
||||
it('returns messages unchanged when all tool pairs are complete', () => {
|
||||
const messages = seed([
|
||||
{ role: 'user', content: [{ type: 'text', text: 'Hello' }] },
|
||||
{
|
||||
role: 'assistant',
|
||||
content: [
|
||||
{ type: 'text', text: 'Looking up...' },
|
||||
{ type: 'tool-call', toolCallId: 'c1', toolName: 'lookup', input: {} },
|
||||
],
|
||||
},
|
||||
{
|
||||
role: 'tool',
|
||||
content: [{ type: 'tool-result', toolCallId: 'c1', toolName: 'lookup', result: 42 }],
|
||||
},
|
||||
{ role: 'assistant', content: [{ type: 'text', text: 'Done.' }] },
|
||||
]);
|
||||
|
||||
const result = stripOrphanedToolMessages(messages);
|
||||
expect(result).toBe(messages);
|
||||
});
|
||||
|
||||
it('strips orphaned tool-result when matching tool-call is missing', () => {
|
||||
const messages = seed([
|
||||
{
|
||||
role: 'tool',
|
||||
content: [{ type: 'tool-result', toolCallId: 'c1', toolName: 'lookup', result: 42 }],
|
||||
},
|
||||
{ role: 'assistant', content: [{ type: 'text', text: 'There are 42.' }] },
|
||||
{ role: 'user', content: [{ type: 'text', text: 'Thanks' }] },
|
||||
]);
|
||||
|
||||
const result = stripOrphanedToolMessages(messages).filter(isLlmMessage) as Message[];
|
||||
|
||||
expect(result).toHaveLength(2);
|
||||
expect(result[0].role).toBe('assistant');
|
||||
expect(result[1].role).toBe('user');
|
||||
});
|
||||
|
||||
it('strips orphaned tool-call when matching tool-result is missing', () => {
|
||||
const messages = seed([
|
||||
{ role: 'user', content: [{ type: 'text', text: 'Check it' }] },
|
||||
{
|
||||
role: 'assistant',
|
||||
content: [
|
||||
{ type: 'text', text: 'Checking...' },
|
||||
{ type: 'tool-call', toolCallId: 'c1', toolName: 'lookup', input: {} },
|
||||
],
|
||||
},
|
||||
]);
|
||||
|
||||
const result = stripOrphanedToolMessages(messages).filter(isLlmMessage) as Message[];
|
||||
|
||||
expect(result).toHaveLength(2);
|
||||
const assistantMsg = result[1];
|
||||
expect(assistantMsg.role).toBe('assistant');
|
||||
expect(assistantMsg.content).toHaveLength(1);
|
||||
expect(assistantMsg.content[0].type).toBe('text');
|
||||
});
|
||||
|
||||
it('drops assistant message entirely if it only contained an orphaned tool-call', () => {
|
||||
const messages = seed([
|
||||
{ role: 'user', content: [{ type: 'text', text: 'Do it' }] },
|
||||
{
|
||||
role: 'assistant',
|
||||
content: [{ type: 'tool-call', toolCallId: 'c1', toolName: 'action', input: {} }],
|
||||
},
|
||||
]);
|
||||
|
||||
const result = stripOrphanedToolMessages(messages).filter(isLlmMessage) as Message[];
|
||||
|
||||
expect(result).toHaveLength(1);
|
||||
expect(result[0].role).toBe('user');
|
||||
});
|
||||
|
||||
it('handles mixed scenario: one complete pair and one orphaned result', () => {
|
||||
const messages = seed([
|
||||
{
|
||||
role: 'tool',
|
||||
content: [
|
||||
{ type: 'tool-result', toolCallId: 'orphan', toolName: 'lookup', result: 'stale' },
|
||||
],
|
||||
},
|
||||
{ role: 'assistant', content: [{ type: 'text', text: 'Old result' }] },
|
||||
{ role: 'user', content: [{ type: 'text', text: 'New question' }] },
|
||||
{
|
||||
role: 'assistant',
|
||||
content: [
|
||||
{ type: 'text', text: 'Looking up...' },
|
||||
{ type: 'tool-call', toolCallId: 'c2', toolName: 'lookup', input: {} },
|
||||
],
|
||||
},
|
||||
{
|
||||
role: 'tool',
|
||||
content: [{ type: 'tool-result', toolCallId: 'c2', toolName: 'lookup', result: 99 }],
|
||||
},
|
||||
{ role: 'assistant', content: [{ type: 'text', text: '99 items' }] },
|
||||
]);
|
||||
|
||||
const result = stripOrphanedToolMessages(messages).filter(isLlmMessage) as Message[];
|
||||
|
||||
expect(result).toHaveLength(5);
|
||||
expect(result[0].role).toBe('assistant');
|
||||
expect(result[0].content[0]).toEqual(
|
||||
expect.objectContaining({ type: 'text', text: 'Old result' }),
|
||||
);
|
||||
|
||||
const toolCallMsg = result.find(
|
||||
(m) => m.role === 'assistant' && m.content.some((c) => c.type === 'tool-call'),
|
||||
);
|
||||
expect(toolCallMsg).toBeDefined();
|
||||
const toolResultMsg = result.find((m) => m.role === 'tool');
|
||||
expect(toolResultMsg).toBeDefined();
|
||||
});
|
||||
|
||||
it('preserves custom (non-LLM) messages', () => {
|
||||
const customMsg: AgentDbMessage = {
|
||||
id: 'custom-1',
|
||||
type: 'custom',
|
||||
messageType: 'notification',
|
||||
data: { info: 'hello' },
|
||||
} as unknown as AgentDbMessage;
|
||||
|
||||
const messages: AgentDbMessage[] = [
|
||||
customMsg,
|
||||
...seed([
|
||||
{
|
||||
role: 'tool',
|
||||
content: [{ type: 'tool-result', toolCallId: 'orphan', toolName: 'x', result: null }],
|
||||
},
|
||||
]),
|
||||
];
|
||||
|
||||
const result = stripOrphanedToolMessages(messages);
|
||||
|
||||
expect(result).toHaveLength(1);
|
||||
expect(result[0]).toBe(customMsg);
|
||||
});
|
||||
|
||||
it('returns same array reference when no orphans exist (no-op fast path)', () => {
|
||||
const messages = seed([
|
||||
{ role: 'user', content: [{ type: 'text', text: 'Hi' }] },
|
||||
{ role: 'assistant', content: [{ type: 'text', text: 'Hello!' }] },
|
||||
]);
|
||||
|
||||
const result = stripOrphanedToolMessages(messages);
|
||||
expect(result).toBe(messages);
|
||||
});
|
||||
});
|
||||
170
packages/@n8n/agents/src/__tests__/telemetry.test.ts
Normal file
170
packages/@n8n/agents/src/__tests__/telemetry.test.ts
Normal file
|
|
@ -0,0 +1,170 @@
|
|||
import type { TelemetryIntegration } from 'ai';
|
||||
|
||||
import { Telemetry } from '../sdk/telemetry';
|
||||
|
||||
describe('Telemetry builder', () => {
|
||||
it('builds with defaults', async () => {
|
||||
const built = await new Telemetry().build();
|
||||
expect(built.enabled).toBe(true);
|
||||
expect(built.recordInputs).toBe(true);
|
||||
expect(built.recordOutputs).toBe(true);
|
||||
expect(built.functionId).toBeUndefined();
|
||||
expect(built.metadata).toBeUndefined();
|
||||
expect(built.integrations).toEqual([]);
|
||||
expect(built.tracer).toBeUndefined();
|
||||
expect(built.provider).toBeUndefined();
|
||||
});
|
||||
|
||||
it('sets all scalar fields', async () => {
|
||||
const built = await new Telemetry()
|
||||
.enabled(false)
|
||||
.functionId('my-agent')
|
||||
.metadata({ team: 'platform', version: 2 })
|
||||
.recordInputs(false)
|
||||
.recordOutputs(false)
|
||||
.build();
|
||||
|
||||
expect(built.enabled).toBe(false);
|
||||
expect(built.functionId).toBe('my-agent');
|
||||
expect(built.metadata).toEqual({ team: 'platform', version: 2 });
|
||||
expect(built.recordInputs).toBe(false);
|
||||
expect(built.recordOutputs).toBe(false);
|
||||
});
|
||||
|
||||
it('accepts a pre-built tracer', async () => {
|
||||
const fakeTracer = { startSpan: jest.fn() };
|
||||
const built = await new Telemetry().tracer(fakeTracer).build();
|
||||
expect(built.tracer).toBe(fakeTracer);
|
||||
});
|
||||
|
||||
it('throws when both .tracer() and .otlpEndpoint() are set', async () => {
|
||||
await expect(
|
||||
new Telemetry()
|
||||
.tracer({ startSpan: jest.fn() })
|
||||
.otlpEndpoint('http://localhost:4318')
|
||||
.build(),
|
||||
).rejects.toThrow('Cannot set both .tracer() and .otlpEndpoint()');
|
||||
});
|
||||
|
||||
it('collects multiple integrations', async () => {
|
||||
const int1: TelemetryIntegration = { onStart: jest.fn() };
|
||||
const int2: TelemetryIntegration = { onFinish: jest.fn() };
|
||||
const built = await new Telemetry().integration(int1).integration(int2).build();
|
||||
expect(built.integrations).toHaveLength(2);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Telemetry — redaction wrapping', () => {
|
||||
it('wraps integrations with redaction when .redact() is set', async () => {
|
||||
const receivedEvents: unknown[] = [];
|
||||
const integration: TelemetryIntegration = {
|
||||
onStart: (event) => {
|
||||
receivedEvents.push(event);
|
||||
},
|
||||
onFinish: (event) => {
|
||||
receivedEvents.push(event);
|
||||
},
|
||||
};
|
||||
|
||||
const built = await new Telemetry()
|
||||
.redact((data) => {
|
||||
const filtered = { ...data };
|
||||
delete filtered.secret;
|
||||
return filtered;
|
||||
})
|
||||
.integration(integration)
|
||||
.build();
|
||||
|
||||
// Call the wrapped onStart hook
|
||||
const startEvent = { model: { modelId: 'test' }, messages: { secret: 'hidden', safe: 'ok' } };
|
||||
built.integrations[0].onStart!(startEvent as never);
|
||||
// The secret should be redacted from nested objects
|
||||
const received = receivedEvents[0] as Record<string, unknown>;
|
||||
const messages = received.messages as Record<string, unknown>;
|
||||
expect(messages.secret).toBeUndefined();
|
||||
expect(messages.safe).toBe('ok');
|
||||
});
|
||||
|
||||
it('does not wrap integrations when .redact() is not set', async () => {
|
||||
const integration: TelemetryIntegration = { onStart: jest.fn() };
|
||||
const built = await new Telemetry().integration(integration).build();
|
||||
// The integration should be a copy (not the same reference due to spread) but functionally identical
|
||||
expect(built.integrations[0].onStart).toBe(integration.onStart);
|
||||
});
|
||||
|
||||
it('redacts top-level scalar fields via the redact callback', async () => {
|
||||
const receivedEvents: unknown[] = [];
|
||||
const integration: TelemetryIntegration = {
|
||||
onStart: (event) => {
|
||||
receivedEvents.push(event);
|
||||
},
|
||||
};
|
||||
|
||||
const built = await new Telemetry()
|
||||
.redact((data) => {
|
||||
const filtered = { ...data };
|
||||
delete filtered.secret;
|
||||
return filtered;
|
||||
})
|
||||
.integration(integration)
|
||||
.build();
|
||||
|
||||
const startEvent = { secret: 'top-level-secret', safe: 'ok', nested: { a: 1 } };
|
||||
built.integrations[0].onStart!(startEvent as never);
|
||||
const received = receivedEvents[0] as Record<string, unknown>;
|
||||
expect(received.secret).toBeUndefined();
|
||||
expect(received.safe).toBe('ok');
|
||||
});
|
||||
|
||||
it('redacts objects inside arrays', async () => {
|
||||
const receivedEvents: unknown[] = [];
|
||||
const integration: TelemetryIntegration = {
|
||||
onStart: (event) => {
|
||||
receivedEvents.push(event);
|
||||
},
|
||||
};
|
||||
|
||||
const built = await new Telemetry()
|
||||
.redact((data) => {
|
||||
const filtered = { ...data };
|
||||
delete filtered.secret;
|
||||
return filtered;
|
||||
})
|
||||
.integration(integration)
|
||||
.build();
|
||||
|
||||
const startEvent = {
|
||||
items: [
|
||||
{ secret: 'hidden', safe: 'ok' },
|
||||
{ secret: 'also-hidden', value: 42 },
|
||||
],
|
||||
};
|
||||
built.integrations[0].onStart!(startEvent as never);
|
||||
const received = receivedEvents[0] as Record<string, unknown>;
|
||||
const items = received.items as Array<Record<string, unknown>>;
|
||||
expect(items[0].secret).toBeUndefined();
|
||||
expect(items[0].safe).toBe('ok');
|
||||
expect(items[1].secret).toBeUndefined();
|
||||
expect(items[1].value).toBe(42);
|
||||
});
|
||||
});
|
||||
|
||||
describe('Telemetry.shutdown()', () => {
|
||||
it('calls provider.shutdown() when provider exists', async () => {
|
||||
const shutdownMock = jest.fn().mockResolvedValue(undefined);
|
||||
const built = await new Telemetry().build();
|
||||
// Manually inject a mock provider
|
||||
const withProvider = {
|
||||
...built,
|
||||
provider: { forceFlush: jest.fn(), shutdown: shutdownMock },
|
||||
};
|
||||
await Telemetry.shutdown(withProvider);
|
||||
expect(shutdownMock).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('does nothing when no provider exists', async () => {
|
||||
const built = await new Telemetry().build();
|
||||
// Should not throw
|
||||
await Telemetry.shutdown(built);
|
||||
});
|
||||
});
|
||||
191
packages/@n8n/agents/src/__tests__/tool-adapter.test.ts
Normal file
191
packages/@n8n/agents/src/__tests__/tool-adapter.test.ts
Normal file
|
|
@ -0,0 +1,191 @@
|
|||
import type { JSONSchema7 } from 'json-schema';
|
||||
import { z } from 'zod';
|
||||
|
||||
import { toAiSdkTools } from '../runtime/tool-adapter';
|
||||
import type { BuiltTool } from '../types';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Module mocks
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/consistent-type-imports
|
||||
type AiImport = typeof import('ai');
|
||||
|
||||
const jsonSchemaMock = jest.fn((schema: JSONSchema7) => ({ __jsonSchema: schema }));
|
||||
|
||||
jest.mock('ai', () => {
|
||||
const actual = jest.requireActual<AiImport>('ai');
|
||||
return {
|
||||
...actual,
|
||||
tool: jest.fn((config: unknown) => config),
|
||||
jsonSchema: (schema: JSONSchema7) => jsonSchemaMock(schema),
|
||||
};
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function makeJsonSchemaTool(
|
||||
inputSchema: JSONSchema7,
|
||||
overrides: Partial<BuiltTool> = {},
|
||||
): BuiltTool {
|
||||
return {
|
||||
name: 'testTool',
|
||||
description: 'A test tool',
|
||||
inputSchema,
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
function makeZodSchemaTool(overrides: Partial<BuiltTool> = {}): BuiltTool {
|
||||
return {
|
||||
name: 'zodTool',
|
||||
description: 'A zod schema tool',
|
||||
inputSchema: z.object({ id: z.string() }),
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// toAiSdkTools — empty / missing input
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('toAiSdkTools — empty / missing input', () => {
|
||||
it('returns an empty object when tools is undefined', () => {
|
||||
expect(toAiSdkTools(undefined)).toEqual({});
|
||||
});
|
||||
|
||||
it('returns an empty object when tools is an empty array', () => {
|
||||
expect(toAiSdkTools([])).toEqual({});
|
||||
});
|
||||
|
||||
it('skips tools that have no inputSchema', () => {
|
||||
const tool: BuiltTool = { name: 'noSchema', description: 'no schema' };
|
||||
const result = toAiSdkTools([tool]);
|
||||
expect(result).toEqual({});
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// toAiSdkTools — Zod schemas
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('toAiSdkTools — Zod schemas', () => {
|
||||
beforeEach(() => {
|
||||
jsonSchemaMock.mockClear();
|
||||
});
|
||||
|
||||
it('registers a tool keyed by its name', () => {
|
||||
const result = toAiSdkTools([makeZodSchemaTool()]);
|
||||
expect(result).toHaveProperty('zodTool');
|
||||
});
|
||||
|
||||
it('does NOT call jsonSchema() for Zod schema tools', () => {
|
||||
toAiSdkTools([makeZodSchemaTool()]);
|
||||
expect(jsonSchemaMock).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('passes the Zod schema directly as inputSchema', () => {
|
||||
const zodSchema = z.object({ query: z.string() });
|
||||
const result = toAiSdkTools([
|
||||
{ name: 'search', description: 'Search', inputSchema: zodSchema },
|
||||
]);
|
||||
expect((result['search'] as { inputSchema: unknown }).inputSchema).toBe(zodSchema);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// toAiSdkTools — JSON Schema (fixSchema behaviour)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('toAiSdkTools — JSON Schema / fixSchema', () => {
|
||||
beforeEach(() => {
|
||||
jsonSchemaMock.mockClear();
|
||||
});
|
||||
|
||||
it('registers a tool keyed by its name', () => {
|
||||
const result = toAiSdkTools([makeJsonSchemaTool({ properties: { id: { type: 'string' } } })]);
|
||||
expect(result).toHaveProperty('testTool');
|
||||
});
|
||||
|
||||
it('calls jsonSchema() for JSON Schema tools', () => {
|
||||
toAiSdkTools([makeJsonSchemaTool({ type: 'object', properties: { id: { type: 'string' } } })]);
|
||||
expect(jsonSchemaMock).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('fixSchema: adds type "object" when properties is present but type is absent', () => {
|
||||
const rawSchema: JSONSchema7 = {
|
||||
properties: { name: { type: 'string' } },
|
||||
};
|
||||
toAiSdkTools([makeJsonSchemaTool(rawSchema)]);
|
||||
|
||||
expect(jsonSchemaMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ type: 'object', properties: { name: { type: 'string' } } }),
|
||||
);
|
||||
});
|
||||
|
||||
it('fixSchema: preserves existing type when type is already set alongside properties', () => {
|
||||
const rawSchema: JSONSchema7 = {
|
||||
type: 'object',
|
||||
properties: { count: { type: 'number' } },
|
||||
};
|
||||
toAiSdkTools([makeJsonSchemaTool(rawSchema)]);
|
||||
|
||||
expect(jsonSchemaMock).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ type: 'object', properties: { count: { type: 'number' } } }),
|
||||
);
|
||||
// Confirm type was not altered from original
|
||||
const received = jsonSchemaMock.mock.calls[0][0];
|
||||
expect(received.type).toBe('object');
|
||||
});
|
||||
|
||||
it('fixSchema: does not add type when properties is absent', () => {
|
||||
const rawSchema: JSONSchema7 = { description: 'no properties' };
|
||||
toAiSdkTools([makeJsonSchemaTool(rawSchema)]);
|
||||
|
||||
const received = jsonSchemaMock.mock.calls[0][0];
|
||||
expect(received).not.toHaveProperty('type');
|
||||
});
|
||||
|
||||
it('fixSchema: does not mutate the original schema object', () => {
|
||||
const rawSchema: JSONSchema7 = { properties: { x: { type: 'string' } } };
|
||||
toAiSdkTools([makeJsonSchemaTool(rawSchema)]);
|
||||
|
||||
expect(rawSchema).not.toHaveProperty('type');
|
||||
});
|
||||
|
||||
it('handles multiple JSON Schema tools independently', () => {
|
||||
const schemaWithProps: JSONSchema7 = { properties: { a: { type: 'string' } } };
|
||||
const schemaWithType: JSONSchema7 = { type: 'object', properties: { b: { type: 'number' } } };
|
||||
|
||||
const result = toAiSdkTools([
|
||||
makeJsonSchemaTool(schemaWithProps, { name: 'toolA' }),
|
||||
makeJsonSchemaTool(schemaWithType, { name: 'toolB' }),
|
||||
]);
|
||||
|
||||
expect(result).toHaveProperty('toolA');
|
||||
expect(result).toHaveProperty('toolB');
|
||||
expect(jsonSchemaMock).toHaveBeenCalledTimes(2);
|
||||
|
||||
const firstCall = jsonSchemaMock.mock.calls[0][0];
|
||||
const secondCall = jsonSchemaMock.mock.calls[1][0];
|
||||
expect(firstCall.type).toBe('object');
|
||||
expect(secondCall.type).toBe('object');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// toAiSdkTools — description forwarding
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('toAiSdkTools — description forwarding', () => {
|
||||
it('forwards the tool description to the AI SDK tool config', () => {
|
||||
const zodSchema = z.object({ q: z.string() });
|
||||
const result = toAiSdkTools([
|
||||
{ name: 'myTool', description: 'Does something useful', inputSchema: zodSchema },
|
||||
]);
|
||||
|
||||
expect((result['myTool'] as { description: string }).description).toBe('Does something useful');
|
||||
});
|
||||
});
|
||||
289
packages/@n8n/agents/src/__tests__/tool.test.ts
Normal file
289
packages/@n8n/agents/src/__tests__/tool.test.ts
Normal file
|
|
@ -0,0 +1,289 @@
|
|||
import { z } from 'zod';
|
||||
|
||||
import { Tool, wrapToolForApproval } from '../sdk/tool';
|
||||
import type { BuiltTelemetry, BuiltTool, InterruptibleToolContext, ToolContext } from '../types';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Test helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function makeBuiltTool(overrides: Partial<BuiltTool> = {}): BuiltTool {
|
||||
return {
|
||||
name: 'testTool',
|
||||
description: 'A test tool',
|
||||
inputSchema: z.object({ id: z.string() }),
|
||||
handler: async (input) => {
|
||||
return await Promise.resolve({ result: (input as { id: string }).id });
|
||||
},
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
function makeCtx(resumeData?: unknown): { ctx: InterruptibleToolContext; suspendMock: jest.Mock } {
|
||||
const suspendMock = jest.fn().mockImplementation(async (payload: unknown) => {
|
||||
return await Promise.resolve({ __suspended: true, payload });
|
||||
});
|
||||
const ctx: InterruptibleToolContext = {
|
||||
suspend: suspendMock as unknown as InterruptibleToolContext['suspend'],
|
||||
resumeData,
|
||||
};
|
||||
return { ctx, suspendMock };
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tool builder — .requireApproval()
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('Tool builder — .requireApproval()', () => {
|
||||
it('build() returns a tool with suspendSchema and resumeSchema when .requireApproval() is set', () => {
|
||||
const tool = new Tool('delete')
|
||||
.description('Delete a record')
|
||||
.input(z.object({ id: z.string() }))
|
||||
.requireApproval()
|
||||
.handler(async ({ id }) => {
|
||||
return await Promise.resolve({ deleted: id });
|
||||
})
|
||||
.build();
|
||||
|
||||
expect(tool.suspendSchema).toBeDefined();
|
||||
expect(tool.resumeSchema).toBeDefined();
|
||||
});
|
||||
|
||||
it('build() throws when .requireApproval() is combined with .suspend()/.resume()', () => {
|
||||
expect(() => {
|
||||
new Tool('delete')
|
||||
.description('Delete a record')
|
||||
.input(z.object({ id: z.string() }))
|
||||
.requireApproval()
|
||||
.suspend(z.object({ msg: z.string() }))
|
||||
.resume(z.object({ ok: z.boolean() }))
|
||||
.handler(async (_input, _ctx) => {
|
||||
return await Promise.resolve({});
|
||||
})
|
||||
.build();
|
||||
}).toThrow('cannot use both approval');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tool builder — .needsApprovalFn()
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('Tool builder — .needsApprovalFn()', () => {
|
||||
it('build() returns a tool with suspendSchema and resumeSchema when .needsApprovalFn() is set', () => {
|
||||
const tool = new Tool('query')
|
||||
.description('Run a query')
|
||||
.input(z.object({ id: z.string() }))
|
||||
.needsApprovalFn(async (args) => {
|
||||
return await Promise.resolve((args as { id: string }).id === 'secret');
|
||||
})
|
||||
.handler(async ({ id }) => {
|
||||
return await Promise.resolve({ result: id });
|
||||
})
|
||||
.build();
|
||||
|
||||
expect(tool.suspendSchema).toBeDefined();
|
||||
expect(tool.resumeSchema).toBeDefined();
|
||||
});
|
||||
|
||||
it('build() throws when .needsApprovalFn() is combined with .suspend()/.resume()', () => {
|
||||
expect(() => {
|
||||
new Tool('query')
|
||||
.description('Run a query')
|
||||
.input(z.object({ id: z.string() }))
|
||||
.needsApprovalFn(async () => {
|
||||
return await Promise.resolve(true);
|
||||
})
|
||||
.suspend(z.object({ msg: z.string() }))
|
||||
.resume(z.object({ ok: z.boolean() }))
|
||||
.handler(async (_input, _ctx) => {
|
||||
return await Promise.resolve({});
|
||||
})
|
||||
.build();
|
||||
}).toThrow('cannot use both approval');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tool builder — without approval
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('Tool builder — without approval', () => {
|
||||
it('build() returns a normal tool (no suspendSchema) when neither .requireApproval() nor .needsApprovalFn() is set', () => {
|
||||
const tool = new Tool('fetch')
|
||||
.description('Fetch data')
|
||||
.input(z.object({ id: z.string() }))
|
||||
.handler(async ({ id }) => {
|
||||
return await Promise.resolve({ data: id });
|
||||
})
|
||||
.build();
|
||||
|
||||
expect(tool.suspendSchema).toBeUndefined();
|
||||
expect(tool.resumeSchema).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// wrapToolForApproval — requireApproval: true
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('wrapToolForApproval — requireApproval: true', () => {
|
||||
it('suspends on first call when requireApproval is true', async () => {
|
||||
const baseTool = makeBuiltTool();
|
||||
const wrapped = wrapToolForApproval(baseTool, { requireApproval: true });
|
||||
const { ctx, suspendMock } = makeCtx(); // resumeData = undefined → first call
|
||||
|
||||
await wrapped.handler!({ id: '1' }, ctx);
|
||||
|
||||
expect(suspendMock).toHaveBeenCalledWith({
|
||||
type: 'approval',
|
||||
toolName: 'testTool',
|
||||
args: { id: '1' },
|
||||
});
|
||||
});
|
||||
|
||||
it('executes original handler when approved on resume', async () => {
|
||||
const baseTool = makeBuiltTool();
|
||||
const wrapped = wrapToolForApproval(baseTool, { requireApproval: true });
|
||||
const { ctx } = makeCtx({ approved: true });
|
||||
|
||||
const result = await wrapped.handler!({ id: 'abc' }, ctx);
|
||||
|
||||
expect(result).toEqual({ result: 'abc' });
|
||||
});
|
||||
|
||||
it('returns declined message when not approved on resume', async () => {
|
||||
const baseTool = makeBuiltTool();
|
||||
const wrapped = wrapToolForApproval(baseTool, { requireApproval: true });
|
||||
const { ctx } = makeCtx({ approved: false });
|
||||
|
||||
const result = await wrapped.handler!({ id: 'abc' }, ctx);
|
||||
|
||||
expect(result).toEqual({ declined: true, message: 'Tool "testTool" was not approved' });
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// wrapToolForApproval — needsApprovalFn
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('wrapToolForApproval — needsApprovalFn', () => {
|
||||
it('does not suspend when needsApprovalFn returns false', async () => {
|
||||
const baseTool = makeBuiltTool();
|
||||
const wrapped = wrapToolForApproval(baseTool, {
|
||||
needsApprovalFn: async () => {
|
||||
return await Promise.resolve(false);
|
||||
},
|
||||
});
|
||||
const { ctx, suspendMock } = makeCtx(); // resumeData = undefined
|
||||
|
||||
const result = await wrapped.handler!({ id: 'safe' }, ctx);
|
||||
|
||||
expect(suspendMock).not.toHaveBeenCalled();
|
||||
expect(result).toEqual({ result: 'safe' });
|
||||
});
|
||||
|
||||
it('suspends when needsApprovalFn returns true', async () => {
|
||||
const baseTool = makeBuiltTool();
|
||||
const wrapped = wrapToolForApproval(baseTool, {
|
||||
needsApprovalFn: async (args) => {
|
||||
return await Promise.resolve((args as { id: string }).id === 'secret');
|
||||
},
|
||||
});
|
||||
const { ctx, suspendMock } = makeCtx(); // resumeData = undefined
|
||||
|
||||
await wrapped.handler!({ id: 'secret' }, ctx);
|
||||
|
||||
expect(suspendMock).toHaveBeenCalledWith({
|
||||
type: 'approval',
|
||||
toolName: 'testTool',
|
||||
args: { id: 'secret' },
|
||||
});
|
||||
});
|
||||
|
||||
it('does not suspend when needsApprovalFn returns false for non-matching args', async () => {
|
||||
const baseTool = makeBuiltTool();
|
||||
const wrapped = wrapToolForApproval(baseTool, {
|
||||
needsApprovalFn: async (args) => {
|
||||
return await Promise.resolve((args as { id: string }).id === 'secret');
|
||||
},
|
||||
});
|
||||
const { ctx, suspendMock } = makeCtx();
|
||||
|
||||
const result = await wrapped.handler!({ id: 'public' }, ctx);
|
||||
|
||||
expect(suspendMock).not.toHaveBeenCalled();
|
||||
expect(result).toEqual({ result: 'public' });
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// wrapToolForApproval — config: { requireApproval: true } (agent-level wrapping)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('wrapToolForApproval — config: { requireApproval: true } (agent-level wrapping)', () => {
|
||||
it('always suspends regardless of original tool settings', async () => {
|
||||
const baseTool = makeBuiltTool();
|
||||
const wrapped = wrapToolForApproval(baseTool, { requireApproval: true });
|
||||
const { ctx, suspendMock } = makeCtx(); // resumeData = undefined
|
||||
|
||||
await wrapped.handler!({ id: 'any-id' }, ctx);
|
||||
|
||||
expect(suspendMock).toHaveBeenCalledWith({
|
||||
type: 'approval',
|
||||
toolName: 'testTool',
|
||||
args: { id: 'any-id' },
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// wrapToolForApproval — telemetry propagation
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('wrapToolForApproval — telemetry propagation', () => {
|
||||
const fakeTelemetry: BuiltTelemetry = {
|
||||
enabled: true,
|
||||
functionId: 'parent-agent',
|
||||
recordInputs: true,
|
||||
recordOutputs: true,
|
||||
integrations: [],
|
||||
tracer: { startSpan: jest.fn() },
|
||||
};
|
||||
|
||||
it('forwards parentTelemetry to the original handler when approval is not needed', async () => {
|
||||
let capturedCtx: ToolContext | undefined;
|
||||
const baseTool = makeBuiltTool({
|
||||
handler: async (_input, ctx) => {
|
||||
capturedCtx = ctx as ToolContext;
|
||||
return await Promise.resolve({ result: 'ok' });
|
||||
},
|
||||
});
|
||||
const wrapped = wrapToolForApproval(baseTool, { requireApproval: false });
|
||||
const { ctx } = makeCtx(); // no resumeData
|
||||
ctx.parentTelemetry = fakeTelemetry;
|
||||
|
||||
await wrapped.handler!({ id: 'test' }, ctx);
|
||||
|
||||
expect(capturedCtx).toBeDefined();
|
||||
expect(capturedCtx!.parentTelemetry).toBe(fakeTelemetry);
|
||||
});
|
||||
|
||||
it('forwards parentTelemetry to the original handler after approval', async () => {
|
||||
let capturedCtx: ToolContext | undefined;
|
||||
const baseTool = makeBuiltTool({
|
||||
handler: async (_input, ctx) => {
|
||||
capturedCtx = ctx as ToolContext;
|
||||
return await Promise.resolve({ result: 'ok' });
|
||||
},
|
||||
});
|
||||
const wrapped = wrapToolForApproval(baseTool, { requireApproval: true });
|
||||
const { ctx } = makeCtx({ approved: true }); // resumeData = approved
|
||||
ctx.parentTelemetry = fakeTelemetry;
|
||||
|
||||
await wrapped.handler!({ id: 'test' }, ctx);
|
||||
|
||||
expect(capturedCtx).toBeDefined();
|
||||
expect(capturedCtx!.parentTelemetry).toBe(fakeTelemetry);
|
||||
});
|
||||
});
|
||||
197
packages/@n8n/agents/src/__tests__/working-memory.test.ts
Normal file
197
packages/@n8n/agents/src/__tests__/working-memory.test.ts
Normal file
|
|
@ -0,0 +1,197 @@
|
|||
import { z } from 'zod';
|
||||
|
||||
import {
|
||||
parseWorkingMemory,
|
||||
buildWorkingMemoryInstruction,
|
||||
templateFromSchema,
|
||||
WorkingMemoryStreamFilter,
|
||||
} from '../runtime/working-memory';
|
||||
import type { StreamChunk } from '../types';
|
||||
|
||||
describe('parseWorkingMemory', () => {
|
||||
it('extracts content between tags at end of text', () => {
|
||||
const text = 'Hello world.\n<working_memory>\n# Name: Alice\n</working_memory>';
|
||||
const result = parseWorkingMemory(text);
|
||||
expect(result.cleanText).toBe('Hello world.');
|
||||
expect(result.workingMemory).toBe('# Name: Alice');
|
||||
});
|
||||
|
||||
it('extracts content between tags in middle of text', () => {
|
||||
const text = 'Before.\n<working_memory>\ndata\n</working_memory>\nAfter.';
|
||||
const result = parseWorkingMemory(text);
|
||||
expect(result.cleanText).toBe('Before.\nAfter.');
|
||||
expect(result.workingMemory).toBe('data');
|
||||
});
|
||||
|
||||
it('returns null when no tags present', () => {
|
||||
const text = 'Just a normal response.';
|
||||
const result = parseWorkingMemory(text);
|
||||
expect(result.cleanText).toBe('Just a normal response.');
|
||||
expect(result.workingMemory).toBeNull();
|
||||
});
|
||||
|
||||
it('handles empty working memory', () => {
|
||||
const text = 'Response.\n<working_memory>\n</working_memory>';
|
||||
const result = parseWorkingMemory(text);
|
||||
expect(result.cleanText).toBe('Response.');
|
||||
expect(result.workingMemory).toBe('');
|
||||
});
|
||||
|
||||
it('handles multiline content with markdown', () => {
|
||||
const wm = '# User Context\n- **Name**: Alice\n- **City**: Berlin';
|
||||
const text = `Response text.\n<working_memory>\n${wm}\n</working_memory>`;
|
||||
const result = parseWorkingMemory(text);
|
||||
expect(result.workingMemory).toBe(wm);
|
||||
});
|
||||
});
|
||||
|
||||
describe('buildWorkingMemoryInstruction', () => {
|
||||
it('generates freeform instruction', () => {
|
||||
const result = buildWorkingMemoryInstruction('# Context\n- Name:', false);
|
||||
expect(result).toContain('<working_memory>');
|
||||
expect(result).toContain('</working_memory>');
|
||||
expect(result).toContain('# Context\n- Name:');
|
||||
});
|
||||
|
||||
it('generates structured instruction mentioning JSON', () => {
|
||||
const result = buildWorkingMemoryInstruction('{"userName": ""}', true);
|
||||
expect(result).toContain('JSON');
|
||||
expect(result).toContain('<working_memory>');
|
||||
});
|
||||
});
|
||||
|
||||
describe('templateFromSchema', () => {
|
||||
it('converts Zod schema to JSON template', () => {
|
||||
const schema = z.object({
|
||||
userName: z.string().optional().describe("The user's name"),
|
||||
favoriteColor: z.string().optional().describe('Favorite color'),
|
||||
});
|
||||
const result = templateFromSchema(schema);
|
||||
expect(result).toContain('userName');
|
||||
expect(result).toContain('favoriteColor');
|
||||
// Should be valid JSON
|
||||
let parsed: unknown;
|
||||
try {
|
||||
parsed = JSON.parse(result);
|
||||
} catch {
|
||||
parsed = undefined;
|
||||
}
|
||||
expect(parsed).toHaveProperty('userName');
|
||||
});
|
||||
});
|
||||
|
||||
/**
|
||||
* Helper that feeds chunks through a WorkingMemoryStreamFilter and collects
|
||||
* the output text and any persisted working memory content.
|
||||
*/
|
||||
async function runStreamFilter(
|
||||
chunks: string[],
|
||||
): Promise<{ outputText: string; persisted: string[] }> {
|
||||
const persisted: string[] = [];
|
||||
const stream = new TransformStream<StreamChunk>();
|
||||
const writer = stream.writable.getWriter();
|
||||
// eslint-disable-next-line @typescript-eslint/require-await
|
||||
const filter = new WorkingMemoryStreamFilter(writer, async (content) => {
|
||||
persisted.push(content);
|
||||
});
|
||||
|
||||
// Read the readable side concurrently to avoid backpressure deadlock
|
||||
const reader = stream.readable.getReader();
|
||||
const readAll = (async () => {
|
||||
let outputText = '';
|
||||
while (true) {
|
||||
const result = await reader.read();
|
||||
if (result.done) break;
|
||||
const chunk = result.value as StreamChunk;
|
||||
if (chunk.type === 'text-delta') outputText += chunk.delta;
|
||||
}
|
||||
return outputText;
|
||||
})();
|
||||
|
||||
for (const chunk of chunks) {
|
||||
await filter.write({ type: 'text-delta', delta: chunk });
|
||||
}
|
||||
await filter.flush();
|
||||
await writer.close();
|
||||
|
||||
const outputText = await readAll;
|
||||
return { outputText, persisted };
|
||||
}
|
||||
|
||||
describe('WorkingMemoryStreamFilter with tag split across multiple chunks', () => {
|
||||
it('handles tag split mid-open-tag', async () => {
|
||||
const { outputText, persisted } = await runStreamFilter([
|
||||
'Hello <work',
|
||||
'ing_memory>state</working_memory>',
|
||||
]);
|
||||
expect(outputText).toBe('Hello ');
|
||||
expect(persisted).toEqual(['state']);
|
||||
});
|
||||
|
||||
it('handles tag split mid-close-tag', async () => {
|
||||
const { outputText, persisted } = await runStreamFilter([
|
||||
'<working_memory>state</worki',
|
||||
'ng_memory> after',
|
||||
]);
|
||||
expect(persisted).toEqual(['state']);
|
||||
expect(outputText).toBe(' after');
|
||||
});
|
||||
|
||||
it('handles tag spread across 3+ chunks', async () => {
|
||||
const { outputText, persisted } = await runStreamFilter([
|
||||
'<wor',
|
||||
'king_mem',
|
||||
'ory>data</working_memory>',
|
||||
]);
|
||||
expect(persisted).toEqual(['data']);
|
||||
expect(outputText).toBe('');
|
||||
});
|
||||
|
||||
it('handles partial < that is not a tag', async () => {
|
||||
const { outputText, persisted } = await runStreamFilter(['Hello <', 'div>world']);
|
||||
expect(outputText).toBe('Hello <div>world');
|
||||
expect(persisted).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('parseWorkingMemory with invalid structured content', () => {
|
||||
it('strips tags and extracts content regardless of JSON validity', () => {
|
||||
const invalidJson = '{not valid json!!!}';
|
||||
const text = `Here is my response.\n<working_memory>\n${invalidJson}\n</working_memory>`;
|
||||
const result = parseWorkingMemory(text);
|
||||
|
||||
expect(result.cleanText).toBe('Here is my response.');
|
||||
expect(result.workingMemory).toBe(invalidJson);
|
||||
});
|
||||
|
||||
it('strips tags with content that fails Zod schema validation', () => {
|
||||
// Content is valid JSON but wrong shape for the schema
|
||||
const wrongShape = '{"unexpected": true}';
|
||||
const text = `Response text.\n<working_memory>\n${wrongShape}\n</working_memory>`;
|
||||
const result = parseWorkingMemory(text);
|
||||
|
||||
// Tags are stripped from response regardless
|
||||
expect(result.cleanText).toBe('Response text.');
|
||||
// Raw content is returned — caller decides whether it passes validation
|
||||
expect(result.workingMemory).toBe(wrongShape);
|
||||
|
||||
// Verify the content would indeed fail schema validation
|
||||
expect(result.workingMemory).not.toBeNull();
|
||||
let parsed: unknown;
|
||||
try {
|
||||
parsed = JSON.parse(result.workingMemory!);
|
||||
} catch {
|
||||
parsed = undefined;
|
||||
}
|
||||
expect(parsed).toBeDefined();
|
||||
});
|
||||
|
||||
it('strips tags even when content is completely non-JSON', () => {
|
||||
const text =
|
||||
'My reply.\n<working_memory>\nthis is just plain text, not JSON at all\n</working_memory>';
|
||||
const result = parseWorkingMemory(text);
|
||||
|
||||
expect(result.cleanText).toBe('My reply.');
|
||||
expect(result.workingMemory).toBe('this is just plain text, not JSON at all');
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,271 @@
|
|||
import { BaseFilesystem } from '../../workspace/filesystem/base-filesystem';
|
||||
import type { BaseFilesystemOptions } from '../../workspace/filesystem/base-filesystem';
|
||||
import type {
|
||||
FileContent,
|
||||
FileStat,
|
||||
FileEntry,
|
||||
ReadOptions,
|
||||
WriteOptions,
|
||||
ListOptions,
|
||||
RemoveOptions,
|
||||
CopyOptions,
|
||||
ProviderStatus,
|
||||
} from '../../workspace/types';
|
||||
|
||||
class TestFilesystem extends BaseFilesystem {
|
||||
readonly id: string;
|
||||
readonly name = 'TestFS';
|
||||
readonly provider = 'test';
|
||||
status: ProviderStatus = 'pending';
|
||||
|
||||
initFn = jest.fn().mockResolvedValue(undefined);
|
||||
destroyFn = jest.fn().mockResolvedValue(undefined);
|
||||
|
||||
constructor(id: string, options?: BaseFilesystemOptions) {
|
||||
super(options);
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
override async init(): Promise<void> {
|
||||
await this.initFn();
|
||||
}
|
||||
|
||||
override async destroy(): Promise<void> {
|
||||
await this.destroyFn();
|
||||
}
|
||||
|
||||
async readFile(_path: string, _options?: ReadOptions): Promise<string | Buffer> {
|
||||
await this.ensureReady();
|
||||
return 'test content';
|
||||
}
|
||||
|
||||
async writeFile(_path: string, _content: FileContent, _options?: WriteOptions): Promise<void> {
|
||||
await this.ensureReady();
|
||||
}
|
||||
|
||||
async appendFile(_path: string, _content: FileContent): Promise<void> {
|
||||
await this.ensureReady();
|
||||
}
|
||||
|
||||
async deleteFile(_path: string, _options?: RemoveOptions): Promise<void> {
|
||||
await this.ensureReady();
|
||||
}
|
||||
|
||||
async copyFile(_src: string, _dest: string, _options?: CopyOptions): Promise<void> {
|
||||
await this.ensureReady();
|
||||
}
|
||||
|
||||
async moveFile(_src: string, _dest: string, _options?: CopyOptions): Promise<void> {
|
||||
await this.ensureReady();
|
||||
}
|
||||
|
||||
async mkdir(_path: string, _options?: { recursive?: boolean }): Promise<void> {
|
||||
await this.ensureReady();
|
||||
}
|
||||
|
||||
async rmdir(_path: string, _options?: RemoveOptions): Promise<void> {
|
||||
await this.ensureReady();
|
||||
}
|
||||
|
||||
async readdir(_path: string, _options?: ListOptions): Promise<FileEntry[]> {
|
||||
await this.ensureReady();
|
||||
return [];
|
||||
}
|
||||
|
||||
async exists(_path: string): Promise<boolean> {
|
||||
await this.ensureReady();
|
||||
return false;
|
||||
}
|
||||
|
||||
async stat(_path: string): Promise<FileStat> {
|
||||
await this.ensureReady();
|
||||
return {
|
||||
name: 'test',
|
||||
path: _path,
|
||||
type: 'file',
|
||||
size: 0,
|
||||
createdAt: new Date(),
|
||||
modifiedAt: new Date(),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
describe('BaseFilesystem', () => {
|
||||
describe('lifecycle state transitions', () => {
|
||||
it('starts in pending status', () => {
|
||||
const fs = new TestFilesystem('1');
|
||||
expect(fs.status).toBe('pending');
|
||||
});
|
||||
|
||||
it('transitions pending → initializing → ready on _init', async () => {
|
||||
const statuses: string[] = [];
|
||||
const fs = new TestFilesystem('1');
|
||||
fs.initFn.mockImplementation(() => {
|
||||
statuses.push(fs.status);
|
||||
});
|
||||
|
||||
await fs._init();
|
||||
|
||||
expect(statuses).toContain('initializing');
|
||||
expect(fs.status).toBe('ready');
|
||||
});
|
||||
|
||||
it('_init is idempotent when already ready', async () => {
|
||||
const fs = new TestFilesystem('1');
|
||||
await fs._init();
|
||||
fs.initFn.mockClear();
|
||||
|
||||
await fs._init();
|
||||
|
||||
expect(fs.initFn).not.toHaveBeenCalled();
|
||||
expect(fs.status).toBe('ready');
|
||||
});
|
||||
|
||||
it('transitions to error on init failure', async () => {
|
||||
const fs = new TestFilesystem('1');
|
||||
fs.initFn.mockRejectedValue(new Error('init boom'));
|
||||
|
||||
await expect(fs._init()).rejects.toThrow('init boom');
|
||||
expect(fs.status).toBe('error');
|
||||
expect(fs.error).toBe('init boom');
|
||||
});
|
||||
|
||||
it('transitions to destroyed on _destroy after ready', async () => {
|
||||
const fs = new TestFilesystem('1');
|
||||
await fs._init();
|
||||
|
||||
const statuses: string[] = [];
|
||||
fs.destroyFn.mockImplementation(() => {
|
||||
statuses.push(fs.status);
|
||||
});
|
||||
|
||||
await fs._destroy();
|
||||
|
||||
expect(statuses).toContain('destroying');
|
||||
expect(fs.status).toBe('destroyed');
|
||||
});
|
||||
|
||||
it('_destroy from pending goes directly to destroyed', async () => {
|
||||
const fs = new TestFilesystem('1');
|
||||
await fs._destroy();
|
||||
|
||||
expect(fs.status).toBe('destroyed');
|
||||
expect(fs.destroyFn).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('_destroy is idempotent when already destroyed', async () => {
|
||||
const fs = new TestFilesystem('1');
|
||||
await fs._init();
|
||||
await fs._destroy();
|
||||
fs.destroyFn.mockClear();
|
||||
|
||||
await fs._destroy();
|
||||
|
||||
expect(fs.destroyFn).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('transitions to error on destroy failure', async () => {
|
||||
const fs = new TestFilesystem('1');
|
||||
await fs._init();
|
||||
fs.destroyFn.mockRejectedValue(new Error('destroy boom'));
|
||||
|
||||
await expect(fs._destroy()).rejects.toThrow('destroy boom');
|
||||
expect(fs.status).toBe('error');
|
||||
});
|
||||
});
|
||||
|
||||
describe('lifecycle hooks', () => {
|
||||
it('calls onInit hook after successful init', async () => {
|
||||
const onInit = jest.fn();
|
||||
const fs = new TestFilesystem('1', { onInit });
|
||||
|
||||
await fs._init();
|
||||
|
||||
expect(onInit).toHaveBeenCalledWith({ filesystem: fs });
|
||||
});
|
||||
|
||||
it('does not fail when onInit hook throws', async () => {
|
||||
const onInit = jest.fn().mockRejectedValue(new Error('hook err'));
|
||||
const fs = new TestFilesystem('1', { onInit });
|
||||
|
||||
await fs._init();
|
||||
|
||||
expect(fs.status).toBe('ready');
|
||||
});
|
||||
|
||||
it('calls onDestroy hook during destroy', async () => {
|
||||
const onDestroy = jest.fn();
|
||||
const fs = new TestFilesystem('1', { onDestroy });
|
||||
await fs._init();
|
||||
|
||||
await fs._destroy();
|
||||
|
||||
expect(onDestroy).toHaveBeenCalledWith({ filesystem: fs });
|
||||
});
|
||||
});
|
||||
|
||||
describe('ensureReady', () => {
|
||||
it('auto-initializes when calling a fs method from pending', async () => {
|
||||
const fs = new TestFilesystem('1');
|
||||
|
||||
const content = await fs.readFile('/test');
|
||||
|
||||
expect(content).toBe('test content');
|
||||
expect(fs.status).toBe('ready');
|
||||
});
|
||||
|
||||
it('throws if init fails when auto-initializing', async () => {
|
||||
const fs = new TestFilesystem('1');
|
||||
fs.initFn.mockRejectedValue(new Error('init fail'));
|
||||
|
||||
await expect(fs.readFile('/test')).rejects.toThrow();
|
||||
});
|
||||
});
|
||||
|
||||
describe('concurrent lifecycle calls', () => {
|
||||
it('deduplicates concurrent _init calls', async () => {
|
||||
const fs = new TestFilesystem('1');
|
||||
let resolveInit: () => void;
|
||||
fs.initFn.mockImplementation(
|
||||
async () =>
|
||||
await new Promise<void>((r) => {
|
||||
resolveInit = r;
|
||||
}),
|
||||
);
|
||||
|
||||
const p1 = fs._init();
|
||||
const p2 = fs._init();
|
||||
|
||||
resolveInit!();
|
||||
await Promise.all([p1, p2]);
|
||||
|
||||
expect(fs.initFn).toHaveBeenCalledTimes(1);
|
||||
expect(fs.status).toBe('ready');
|
||||
});
|
||||
|
||||
it('deduplicates concurrent _destroy calls', async () => {
|
||||
const fs = new TestFilesystem('1');
|
||||
await fs._init();
|
||||
|
||||
let resolveDestroy!: () => void;
|
||||
fs.destroyFn.mockImplementation(
|
||||
async () =>
|
||||
await new Promise<void>((r) => {
|
||||
resolveDestroy = r;
|
||||
}),
|
||||
);
|
||||
|
||||
const p1 = fs._destroy();
|
||||
// Flush microtasks so executeDestroy reaches destroyFn
|
||||
await Promise.resolve();
|
||||
await Promise.resolve();
|
||||
const p2 = fs._destroy();
|
||||
|
||||
resolveDestroy();
|
||||
await Promise.all([p1, p2]);
|
||||
|
||||
expect(fs.destroyFn).toHaveBeenCalledTimes(1);
|
||||
expect(fs.status).toBe('destroyed');
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,369 @@
|
|||
import { BaseSandbox } from '../../workspace/sandbox/base-sandbox';
|
||||
import type {
|
||||
CommandResult,
|
||||
SandboxProcessManager,
|
||||
BaseSandboxOptions,
|
||||
} from '../../workspace/types';
|
||||
import { ProcessHandle } from '../../workspace/types';
|
||||
|
||||
class StubProcessHandle extends ProcessHandle {
|
||||
readonly pid: number;
|
||||
private resolvedExitCode: number | undefined;
|
||||
|
||||
constructor(pid: number) {
|
||||
super();
|
||||
this.pid = pid;
|
||||
}
|
||||
|
||||
get exitCode(): number | undefined {
|
||||
return this.resolvedExitCode;
|
||||
}
|
||||
|
||||
async kill(): Promise<boolean> {
|
||||
this.resolvedExitCode = 137;
|
||||
return await Promise.resolve(true);
|
||||
}
|
||||
|
||||
async sendStdin(_data: string): Promise<void> {}
|
||||
|
||||
protected async _wait(): Promise<CommandResult> {
|
||||
this.resolvedExitCode = 0;
|
||||
this.emitStdout('ok\n');
|
||||
return await Promise.resolve({
|
||||
success: true,
|
||||
exitCode: 0,
|
||||
stdout: this.stdout,
|
||||
stderr: this.stderr,
|
||||
executionTimeMs: 1,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
function makeStubProcessManager(): SandboxProcessManager & {
|
||||
spawnMock: jest.Mock;
|
||||
} {
|
||||
const handle = new StubProcessHandle(1);
|
||||
const spawnMock = jest.fn().mockResolvedValue(handle);
|
||||
return {
|
||||
spawn: spawnMock,
|
||||
list: jest.fn().mockResolvedValue([]),
|
||||
get: jest.fn().mockResolvedValue(undefined),
|
||||
kill: jest.fn().mockResolvedValue(false),
|
||||
spawnMock,
|
||||
} as unknown as SandboxProcessManager & { spawnMock: jest.Mock };
|
||||
}
|
||||
|
||||
class TestSandbox extends BaseSandbox {
|
||||
readonly id: string;
|
||||
readonly name: string;
|
||||
readonly provider = 'test';
|
||||
|
||||
startFn = jest.fn().mockResolvedValue(undefined);
|
||||
stopFn = jest.fn().mockResolvedValue(undefined);
|
||||
destroyFn = jest.fn().mockResolvedValue(undefined);
|
||||
|
||||
constructor(id: string, options?: BaseSandboxOptions) {
|
||||
super(options);
|
||||
this.id = id;
|
||||
this.name = `test-sandbox-${id}`;
|
||||
}
|
||||
|
||||
async start(): Promise<void> {
|
||||
await this.startFn();
|
||||
}
|
||||
|
||||
async stop(): Promise<void> {
|
||||
await this.stopFn();
|
||||
}
|
||||
|
||||
async destroy(): Promise<void> {
|
||||
await this.destroyFn();
|
||||
}
|
||||
}
|
||||
|
||||
describe('BaseSandbox', () => {
|
||||
describe('lifecycle state transitions', () => {
|
||||
it('starts in pending status', () => {
|
||||
const sb = new TestSandbox('1');
|
||||
expect(sb.status).toBe('pending');
|
||||
});
|
||||
|
||||
it('transitions pending → starting → running on _start', async () => {
|
||||
const statuses: string[] = [];
|
||||
const sb = new TestSandbox('1');
|
||||
sb.startFn.mockImplementation(() => {
|
||||
statuses.push(sb.status);
|
||||
});
|
||||
|
||||
await sb._start();
|
||||
|
||||
expect(statuses).toContain('starting');
|
||||
expect(sb.status).toBe('running');
|
||||
});
|
||||
|
||||
it('_start is idempotent when already running', async () => {
|
||||
const sb = new TestSandbox('1');
|
||||
await sb._start();
|
||||
sb.startFn.mockClear();
|
||||
|
||||
await sb._start();
|
||||
|
||||
expect(sb.startFn).not.toHaveBeenCalled();
|
||||
expect(sb.status).toBe('running');
|
||||
});
|
||||
|
||||
it('transitions to error on start failure', async () => {
|
||||
const sb = new TestSandbox('1');
|
||||
sb.startFn.mockRejectedValue(new Error('start boom'));
|
||||
|
||||
await expect(sb._start()).rejects.toThrow('start boom');
|
||||
expect(sb.status).toBe('error');
|
||||
});
|
||||
|
||||
it('transitions running → stopping → stopped on _stop', async () => {
|
||||
const sb = new TestSandbox('1');
|
||||
await sb._start();
|
||||
|
||||
const statuses: string[] = [];
|
||||
sb.stopFn.mockImplementation(() => {
|
||||
statuses.push(sb.status);
|
||||
});
|
||||
|
||||
await sb._stop();
|
||||
|
||||
expect(statuses).toContain('stopping');
|
||||
expect(sb.status).toBe('stopped');
|
||||
});
|
||||
|
||||
it('_stop is no-op when already stopped', async () => {
|
||||
const sb = new TestSandbox('1');
|
||||
await sb._start();
|
||||
await sb._stop();
|
||||
sb.stopFn.mockClear();
|
||||
|
||||
await sb._stop();
|
||||
|
||||
expect(sb.stopFn).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('_stop is no-op when pending', async () => {
|
||||
const sb = new TestSandbox('1');
|
||||
await sb._stop();
|
||||
expect(sb.stopFn).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('transitions to error on stop failure', async () => {
|
||||
const sb = new TestSandbox('1');
|
||||
await sb._start();
|
||||
sb.stopFn.mockRejectedValue(new Error('stop boom'));
|
||||
|
||||
await expect(sb._stop()).rejects.toThrow('stop boom');
|
||||
expect(sb.status).toBe('error');
|
||||
});
|
||||
|
||||
it('transitions running → destroying → destroyed on _destroy', async () => {
|
||||
const sb = new TestSandbox('1');
|
||||
await sb._start();
|
||||
|
||||
const statuses: string[] = [];
|
||||
sb.destroyFn.mockImplementation(() => {
|
||||
statuses.push(sb.status);
|
||||
});
|
||||
|
||||
await sb._destroy();
|
||||
|
||||
expect(statuses).toContain('destroying');
|
||||
expect(sb.status).toBe('destroyed');
|
||||
});
|
||||
|
||||
it('_destroy from pending goes directly to destroyed', async () => {
|
||||
const sb = new TestSandbox('1');
|
||||
await sb._destroy();
|
||||
|
||||
expect(sb.status).toBe('destroyed');
|
||||
expect(sb.destroyFn).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('_destroy is idempotent when already destroyed', async () => {
|
||||
const sb = new TestSandbox('1');
|
||||
await sb._start();
|
||||
await sb._destroy();
|
||||
sb.destroyFn.mockClear();
|
||||
|
||||
await sb._destroy();
|
||||
|
||||
expect(sb.destroyFn).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('throws when trying to _start a destroyed sandbox', async () => {
|
||||
const sb = new TestSandbox('1');
|
||||
await sb._start();
|
||||
await sb._destroy();
|
||||
|
||||
await expect(sb._start()).rejects.toThrow('Cannot start a destroyed sandbox');
|
||||
});
|
||||
|
||||
it('transitions to error on destroy failure', async () => {
|
||||
const sb = new TestSandbox('1');
|
||||
await sb._start();
|
||||
sb.destroyFn.mockRejectedValue(new Error('destroy boom'));
|
||||
|
||||
await expect(sb._destroy()).rejects.toThrow('destroy boom');
|
||||
expect(sb.status).toBe('error');
|
||||
});
|
||||
});
|
||||
|
||||
describe('lifecycle hooks', () => {
|
||||
it('calls onStart hook after successful start', async () => {
|
||||
const onStart = jest.fn();
|
||||
const sb = new TestSandbox('1', { onStart });
|
||||
|
||||
await sb._start();
|
||||
|
||||
expect(onStart).toHaveBeenCalledWith({ sandbox: sb });
|
||||
});
|
||||
|
||||
it('does not fail when onStart hook throws', async () => {
|
||||
const onStart = jest.fn().mockRejectedValue(new Error('hook error'));
|
||||
const sb = new TestSandbox('1', { onStart });
|
||||
|
||||
await sb._start();
|
||||
|
||||
expect(sb.status).toBe('running');
|
||||
});
|
||||
|
||||
it('calls onStop hook before stopping', async () => {
|
||||
const onStop = jest.fn();
|
||||
const sb = new TestSandbox('1', { onStop });
|
||||
await sb._start();
|
||||
|
||||
await sb._stop();
|
||||
|
||||
expect(onStop).toHaveBeenCalledWith({ sandbox: sb });
|
||||
});
|
||||
|
||||
it('calls onDestroy hook before destroying', async () => {
|
||||
const onDestroy = jest.fn();
|
||||
const sb = new TestSandbox('1', { onDestroy });
|
||||
await sb._start();
|
||||
|
||||
await sb._destroy();
|
||||
|
||||
expect(onDestroy).toHaveBeenCalledWith({ sandbox: sb });
|
||||
});
|
||||
});
|
||||
|
||||
describe('ensureRunning', () => {
|
||||
it('starts the sandbox if not running', async () => {
|
||||
const sb = new TestSandbox('1');
|
||||
await sb.ensureRunning();
|
||||
|
||||
expect(sb.status).toBe('running');
|
||||
expect(sb.startFn).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('does nothing if already running', async () => {
|
||||
const sb = new TestSandbox('1');
|
||||
await sb._start();
|
||||
sb.startFn.mockClear();
|
||||
|
||||
await sb.ensureRunning();
|
||||
|
||||
expect(sb.startFn).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('throws if sandbox is destroyed', async () => {
|
||||
const sb = new TestSandbox('1');
|
||||
await sb._start();
|
||||
await sb._destroy();
|
||||
|
||||
await expect(sb.ensureRunning()).rejects.toThrow('has been destroyed');
|
||||
});
|
||||
});
|
||||
|
||||
describe('executeCommand', () => {
|
||||
it('spawns a process and returns results', async () => {
|
||||
const pm = makeStubProcessManager();
|
||||
const sb = new TestSandbox('1', { processes: pm });
|
||||
|
||||
await sb._start();
|
||||
const result = await sb.executeCommand('echo', ['hello']);
|
||||
|
||||
expect(pm.spawnMock).toHaveBeenCalledTimes(1);
|
||||
expect((pm.spawnMock.mock.calls as unknown as string[][])[0][0]).toBe('echo hello');
|
||||
expect(result.success).toBe(true);
|
||||
expect(result.stdout).toBe('ok\n');
|
||||
});
|
||||
|
||||
it('auto-starts sandbox before executing', async () => {
|
||||
const pm = makeStubProcessManager();
|
||||
const sb = new TestSandbox('1', { processes: pm });
|
||||
|
||||
const result = await sb.executeCommand('ls');
|
||||
|
||||
expect(sb.status).toBe('running');
|
||||
expect(result.success).toBe(true);
|
||||
});
|
||||
|
||||
it('throws when no process manager is available', async () => {
|
||||
const sb = new TestSandbox('1');
|
||||
await sb._start();
|
||||
|
||||
await expect(sb.executeCommand('ls')).rejects.toThrow('no process manager');
|
||||
});
|
||||
});
|
||||
|
||||
describe('getInstructions', () => {
|
||||
it('returns empty string by default', () => {
|
||||
const sb = new TestSandbox('1');
|
||||
expect(sb.getInstructions()).toBe('');
|
||||
});
|
||||
});
|
||||
|
||||
describe('concurrent lifecycle calls', () => {
|
||||
it('deduplicates concurrent _start calls', async () => {
|
||||
const sb = new TestSandbox('1');
|
||||
let resolveStart: () => void;
|
||||
sb.startFn.mockImplementation(
|
||||
async () =>
|
||||
await new Promise<void>((r) => {
|
||||
resolveStart = r;
|
||||
}),
|
||||
);
|
||||
|
||||
const p1 = sb._start();
|
||||
const p2 = sb._start();
|
||||
|
||||
resolveStart!();
|
||||
await Promise.all([p1, p2]);
|
||||
|
||||
expect(sb.startFn).toHaveBeenCalledTimes(1);
|
||||
expect(sb.status).toBe('running');
|
||||
});
|
||||
|
||||
it('deduplicates concurrent _destroy calls', async () => {
|
||||
const sb = new TestSandbox('1');
|
||||
await sb._start();
|
||||
|
||||
let resolveDestroy!: () => void;
|
||||
sb.destroyFn.mockImplementation(
|
||||
async () =>
|
||||
await new Promise<void>((r) => {
|
||||
resolveDestroy = r;
|
||||
}),
|
||||
);
|
||||
|
||||
const p1 = sb._destroy();
|
||||
// Flush microtasks so executeDestroy reaches destroyFn
|
||||
await Promise.resolve();
|
||||
await Promise.resolve();
|
||||
const p2 = sb._destroy();
|
||||
|
||||
resolveDestroy();
|
||||
await Promise.all([p1, p2]);
|
||||
|
||||
expect(sb.destroyFn).toHaveBeenCalledTimes(1);
|
||||
expect(sb.status).toBe('destroyed');
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,90 @@
|
|||
import { callLifecycle } from '../../workspace/lifecycle';
|
||||
|
||||
describe('callLifecycle', () => {
|
||||
it('calls _init when both _init and init exist', async () => {
|
||||
const target = {
|
||||
_init: jest.fn().mockResolvedValue(undefined),
|
||||
init: jest.fn().mockResolvedValue(undefined),
|
||||
};
|
||||
|
||||
await callLifecycle(target, 'init');
|
||||
|
||||
expect(target._init).toHaveBeenCalledTimes(1);
|
||||
expect(target.init).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('falls back to init when _init is undefined', async () => {
|
||||
const target = {
|
||||
init: jest.fn().mockResolvedValue(undefined),
|
||||
};
|
||||
|
||||
await callLifecycle(target, 'init');
|
||||
|
||||
expect(target.init).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('calls _start when both _start and start exist', async () => {
|
||||
const target = {
|
||||
_start: jest.fn().mockResolvedValue(undefined),
|
||||
start: jest.fn().mockResolvedValue(undefined),
|
||||
};
|
||||
|
||||
await callLifecycle(target, 'start');
|
||||
|
||||
expect(target._start).toHaveBeenCalledTimes(1);
|
||||
expect(target.start).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('calls _stop over stop', async () => {
|
||||
const target = {
|
||||
_stop: jest.fn().mockResolvedValue(undefined),
|
||||
stop: jest.fn().mockResolvedValue(undefined),
|
||||
};
|
||||
|
||||
await callLifecycle(target, 'stop');
|
||||
|
||||
expect(target._stop).toHaveBeenCalledTimes(1);
|
||||
expect(target.stop).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('calls _destroy over destroy', async () => {
|
||||
const target = {
|
||||
_destroy: jest.fn().mockResolvedValue(undefined),
|
||||
destroy: jest.fn().mockResolvedValue(undefined),
|
||||
};
|
||||
|
||||
await callLifecycle(target, 'destroy');
|
||||
|
||||
expect(target._destroy).toHaveBeenCalledTimes(1);
|
||||
expect(target.destroy).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('does nothing if neither underscore nor plain method exists', async () => {
|
||||
const target = {};
|
||||
|
||||
await expect(callLifecycle(target, 'init')).resolves.toBeUndefined();
|
||||
});
|
||||
|
||||
it('propagates errors from lifecycle methods', async () => {
|
||||
const error = new Error('lifecycle failure');
|
||||
const target = {
|
||||
_start: jest.fn().mockRejectedValue(error),
|
||||
};
|
||||
|
||||
await expect(callLifecycle(target, 'start')).rejects.toThrow('lifecycle failure');
|
||||
});
|
||||
|
||||
it('binds correctly (calls with proper this)', async () => {
|
||||
const target = {
|
||||
value: 42,
|
||||
// eslint-disable-next-line @typescript-eslint/require-await
|
||||
_init: jest.fn(async function (this: { value: number }) {
|
||||
expect(this.value).toBe(42);
|
||||
}),
|
||||
};
|
||||
|
||||
await callLifecycle(target, 'init');
|
||||
|
||||
expect(target._init).toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
338
packages/@n8n/agents/src/__tests__/workspace/test-utils.ts
Normal file
338
packages/@n8n/agents/src/__tests__/workspace/test-utils.ts
Normal file
|
|
@ -0,0 +1,338 @@
|
|||
import { BaseFilesystem } from '../../workspace/filesystem/base-filesystem';
|
||||
import { BaseSandbox } from '../../workspace/sandbox/base-sandbox';
|
||||
import { ProcessHandle, SandboxProcessManager } from '../../workspace/types';
|
||||
import type {
|
||||
CommandResult,
|
||||
FileContent,
|
||||
FileEntry,
|
||||
FileStat,
|
||||
ListOptions,
|
||||
MountConfig,
|
||||
ProcessInfo,
|
||||
ProviderStatus,
|
||||
ReadOptions,
|
||||
RemoveOptions,
|
||||
SpawnProcessOptions,
|
||||
WriteOptions,
|
||||
} from '../../workspace/types';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// In-memory filesystem (fake)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export class InMemoryFilesystem extends BaseFilesystem {
|
||||
readonly id: string;
|
||||
readonly name = 'InMemoryFilesystem';
|
||||
readonly provider = 'memory';
|
||||
readonly basePath = '/mem';
|
||||
status: ProviderStatus = 'pending';
|
||||
|
||||
private files = new Map<string, Buffer>();
|
||||
private dirs = new Set<string>();
|
||||
|
||||
constructor(id = 'mem-fs') {
|
||||
super();
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/require-await
|
||||
override async init(): Promise<void> {
|
||||
this.dirs.add('/');
|
||||
}
|
||||
|
||||
private normalizePath(p: string): string {
|
||||
return p.startsWith('/') ? p : `/${p}`;
|
||||
}
|
||||
|
||||
private parentDir(p: string): string {
|
||||
const parts = p.split('/');
|
||||
parts.pop();
|
||||
return parts.join('/') || '/';
|
||||
}
|
||||
|
||||
async readFile(filePath: string, options?: ReadOptions): Promise<string | Buffer> {
|
||||
await this.ensureReady();
|
||||
const p = this.normalizePath(filePath);
|
||||
const buf = this.files.get(p);
|
||||
if (!buf) throw new Error(`ENOENT: ${p}`);
|
||||
if (options?.encoding) return buf.toString(options.encoding);
|
||||
return buf;
|
||||
}
|
||||
|
||||
async writeFile(filePath: string, content: FileContent, options?: WriteOptions): Promise<void> {
|
||||
await this.ensureReady();
|
||||
const p = this.normalizePath(filePath);
|
||||
if (options?.recursive) {
|
||||
this.mkdirRecursive(this.parentDir(p));
|
||||
}
|
||||
const parent = this.parentDir(p);
|
||||
if (!this.dirs.has(parent))
|
||||
throw new Error(`ENOENT: parent directory ${parent} does not exist`);
|
||||
this.files.set(p, Buffer.from(content));
|
||||
}
|
||||
|
||||
async appendFile(filePath: string, content: FileContent): Promise<void> {
|
||||
await this.ensureReady();
|
||||
const p = this.normalizePath(filePath);
|
||||
const existing = this.files.get(p) ?? Buffer.alloc(0);
|
||||
const append = typeof content === 'string' ? Buffer.from(content) : Buffer.from(content);
|
||||
this.files.set(p, Buffer.concat([existing, append]));
|
||||
}
|
||||
|
||||
async deleteFile(filePath: string): Promise<void> {
|
||||
await this.ensureReady();
|
||||
const p = this.normalizePath(filePath);
|
||||
if (!this.files.has(p)) throw new Error(`ENOENT: ${p}`);
|
||||
this.files.delete(p);
|
||||
}
|
||||
|
||||
async copyFile(src: string, dest: string): Promise<void> {
|
||||
await this.ensureReady();
|
||||
const content = await this.readFile(src);
|
||||
await this.writeFile(dest, content);
|
||||
}
|
||||
|
||||
async moveFile(src: string, dest: string): Promise<void> {
|
||||
await this.ensureReady();
|
||||
await this.copyFile(src, dest);
|
||||
await this.deleteFile(src);
|
||||
}
|
||||
|
||||
async mkdir(dirPath: string, options?: { recursive?: boolean }): Promise<void> {
|
||||
await this.ensureReady();
|
||||
const p = this.normalizePath(dirPath);
|
||||
if (options?.recursive) {
|
||||
this.mkdirRecursive(p);
|
||||
} else {
|
||||
this.dirs.add(p);
|
||||
}
|
||||
}
|
||||
|
||||
async rmdir(dirPath: string, options?: RemoveOptions): Promise<void> {
|
||||
await this.ensureReady();
|
||||
const p = this.normalizePath(dirPath);
|
||||
if (options?.recursive) {
|
||||
for (const key of [...this.files.keys()]) {
|
||||
if (key.startsWith(p + '/') || key === p) this.files.delete(key);
|
||||
}
|
||||
for (const d of [...this.dirs]) {
|
||||
if (d.startsWith(p + '/') || d === p) this.dirs.delete(d);
|
||||
}
|
||||
} else {
|
||||
this.dirs.delete(p);
|
||||
}
|
||||
}
|
||||
|
||||
async readdir(dirPath: string, options?: ListOptions): Promise<FileEntry[]> {
|
||||
await this.ensureReady();
|
||||
const p = this.normalizePath(dirPath);
|
||||
const entries: FileEntry[] = [];
|
||||
const seen = new Set<string>();
|
||||
|
||||
for (const d of this.dirs) {
|
||||
if (d === p) continue;
|
||||
if (!d.startsWith(p + '/')) continue;
|
||||
const rel = d.slice(p.length + 1);
|
||||
if (!rel) continue;
|
||||
const isDirectChild = !rel.includes('/');
|
||||
if (isDirectChild || options?.recursive) {
|
||||
const name = rel.split('/').pop()!;
|
||||
if (!seen.has(`dir:${name}`)) {
|
||||
seen.add(`dir:${name}`);
|
||||
entries.push({ name, type: 'directory' });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (const [filePath] of this.files) {
|
||||
if (!filePath.startsWith(p + '/')) continue;
|
||||
const rel = filePath.slice(p.length + 1);
|
||||
if (!rel) continue;
|
||||
const isDirectChild = !rel.includes('/');
|
||||
if (isDirectChild || options?.recursive) {
|
||||
const name = filePath.split('/').pop()!;
|
||||
if (options?.extension) {
|
||||
const ext = options.extension.startsWith('.')
|
||||
? options.extension
|
||||
: `.${options.extension}`;
|
||||
if (!name.endsWith(ext)) continue;
|
||||
}
|
||||
if (!seen.has(`file:${name}`)) {
|
||||
seen.add(`file:${name}`);
|
||||
entries.push({ name, type: 'file' });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return entries;
|
||||
}
|
||||
|
||||
async exists(filePath: string): Promise<boolean> {
|
||||
await this.ensureReady();
|
||||
const p = this.normalizePath(filePath);
|
||||
return this.files.has(p) || this.dirs.has(p);
|
||||
}
|
||||
|
||||
async stat(filePath: string): Promise<FileStat> {
|
||||
await this.ensureReady();
|
||||
const p = this.normalizePath(filePath);
|
||||
const now = new Date();
|
||||
if (this.dirs.has(p)) {
|
||||
return {
|
||||
name: p.split('/').pop() ?? '/',
|
||||
path: filePath,
|
||||
type: 'directory',
|
||||
size: 0,
|
||||
createdAt: now,
|
||||
modifiedAt: now,
|
||||
};
|
||||
}
|
||||
const buf = this.files.get(p);
|
||||
if (!buf) throw new Error(`ENOENT: ${p}`);
|
||||
return {
|
||||
name: p.split('/').pop()!,
|
||||
path: filePath,
|
||||
type: 'file',
|
||||
size: buf.length,
|
||||
createdAt: now,
|
||||
modifiedAt: now,
|
||||
};
|
||||
}
|
||||
|
||||
getMountConfig(): MountConfig {
|
||||
return { type: 'local', basePath: '/mem' };
|
||||
}
|
||||
|
||||
getInstructions(): string {
|
||||
return 'In-memory filesystem. All file paths are relative to /mem.';
|
||||
}
|
||||
|
||||
getFileContent(filePath: string): string | undefined {
|
||||
const p = this.normalizePath(filePath);
|
||||
return this.files.get(p)?.toString('utf-8');
|
||||
}
|
||||
|
||||
private mkdirRecursive(p: string): void {
|
||||
const parts = p.split('/');
|
||||
let current = '';
|
||||
for (const part of parts) {
|
||||
current += current === '/' ? part : `/${part}`;
|
||||
if (!current) current = '/';
|
||||
this.dirs.add(current);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export class FakeProcessHandle extends ProcessHandle {
|
||||
readonly pid: number;
|
||||
private resolvedExitCode: number | undefined;
|
||||
private readonly outputFn: (command: string) => {
|
||||
stdout: string;
|
||||
stderr: string;
|
||||
exitCode: number;
|
||||
};
|
||||
private readonly cmdString: string;
|
||||
|
||||
constructor(
|
||||
pid: number,
|
||||
command: string,
|
||||
outputFn: (cmd: string) => { stdout: string; stderr: string; exitCode: number },
|
||||
) {
|
||||
super();
|
||||
this.pid = pid;
|
||||
this.cmdString = command;
|
||||
this.command = command;
|
||||
this.outputFn = outputFn;
|
||||
}
|
||||
|
||||
get exitCode(): number | undefined {
|
||||
return this.resolvedExitCode;
|
||||
}
|
||||
|
||||
async kill(): Promise<boolean> {
|
||||
this.resolvedExitCode = 137;
|
||||
return await Promise.resolve(true);
|
||||
}
|
||||
|
||||
async sendStdin(_data: string): Promise<void> {}
|
||||
|
||||
protected async _wait(): Promise<CommandResult> {
|
||||
const result = this.outputFn(this.cmdString);
|
||||
this.emitStdout(result.stdout);
|
||||
if (result.stderr) this.emitStderr(result.stderr);
|
||||
this.resolvedExitCode = result.exitCode;
|
||||
|
||||
return await Promise.resolve({
|
||||
success: result.exitCode === 0,
|
||||
exitCode: result.exitCode,
|
||||
stdout: this.stdout,
|
||||
stderr: this.stderr,
|
||||
executionTimeMs: 1,
|
||||
command: this.command,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Fake process manager
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export class FakeProcessManager extends SandboxProcessManager {
|
||||
private nextPid = 1;
|
||||
private tracked = new Map<number, FakeProcessHandle>();
|
||||
commandHandler: (command: string) => { stdout: string; stderr: string; exitCode: number };
|
||||
|
||||
constructor() {
|
||||
super();
|
||||
this.commandHandler = (cmd) => ({ stdout: `executed: ${cmd}\n`, stderr: '', exitCode: 0 });
|
||||
}
|
||||
|
||||
async spawn(command: string, _options?: SpawnProcessOptions): Promise<ProcessHandle> {
|
||||
const pid = this.nextPid++;
|
||||
const handle = new FakeProcessHandle(pid, command, this.commandHandler);
|
||||
this.tracked.set(pid, handle);
|
||||
return await Promise.resolve(handle);
|
||||
}
|
||||
|
||||
async list(): Promise<ProcessInfo[]> {
|
||||
return await Promise.resolve(
|
||||
[...this.tracked.entries()].map(([pid, h]) => ({
|
||||
pid,
|
||||
command: h.command,
|
||||
exitCode: h.exitCode,
|
||||
})),
|
||||
);
|
||||
}
|
||||
|
||||
async get(pid: number): Promise<ProcessHandle | undefined> {
|
||||
return await Promise.resolve(this.tracked.get(pid));
|
||||
}
|
||||
|
||||
async kill(pid: number): Promise<boolean> {
|
||||
const h = this.tracked.get(pid);
|
||||
if (!h) return false;
|
||||
const result = await h.kill();
|
||||
this.tracked.delete(pid);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
export class FakeSandbox extends BaseSandbox {
|
||||
readonly id: string;
|
||||
readonly name: string;
|
||||
readonly provider = 'fake';
|
||||
|
||||
constructor(id: string, pm: FakeProcessManager) {
|
||||
super({ processes: pm });
|
||||
this.id = id;
|
||||
this.name = `fake-sandbox-${id}`;
|
||||
}
|
||||
|
||||
async start(): Promise<void> {}
|
||||
async stop(): Promise<void> {}
|
||||
async destroy(): Promise<void> {}
|
||||
|
||||
override getInstructions(): string {
|
||||
return 'Fake sandbox for executing commands.';
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,325 @@
|
|||
import { InMemoryFilesystem, FakeProcessManager, FakeSandbox } from './test-utils';
|
||||
import type { FileEntry } from '../../workspace/types';
|
||||
import { Workspace } from '../../workspace/workspace';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Integration tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('Workspace integration with fakes', () => {
|
||||
let memFs: InMemoryFilesystem;
|
||||
let fakeProcessManager: FakeProcessManager;
|
||||
let fakeSandbox: FakeSandbox;
|
||||
let workspace: Workspace;
|
||||
|
||||
beforeEach(async () => {
|
||||
memFs = new InMemoryFilesystem();
|
||||
fakeProcessManager = new FakeProcessManager();
|
||||
fakeSandbox = new FakeSandbox('test', fakeProcessManager);
|
||||
workspace = new Workspace({
|
||||
id: 'integration-test',
|
||||
filesystem: memFs,
|
||||
sandbox: fakeSandbox,
|
||||
});
|
||||
await workspace.init();
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await workspace.destroy();
|
||||
});
|
||||
|
||||
it('initializes all providers and reaches ready state', () => {
|
||||
expect(workspace.status).toBe('ready');
|
||||
expect(memFs.status).toBe('ready');
|
||||
expect(fakeSandbox.status).toBe('running');
|
||||
});
|
||||
|
||||
it('returns combined instructions', () => {
|
||||
const instructions = workspace.getInstructions();
|
||||
expect(instructions).toContain('Fake sandbox');
|
||||
expect(instructions).toContain('In-memory filesystem');
|
||||
});
|
||||
|
||||
it('exposes all expected tools', () => {
|
||||
const tools = workspace.getTools();
|
||||
const names = tools.map((t) => t.name);
|
||||
|
||||
expect(names).toContain('workspace_read_file');
|
||||
expect(names).toContain('workspace_write_file');
|
||||
expect(names).toContain('workspace_list_files');
|
||||
expect(names).toContain('workspace_file_stat');
|
||||
expect(names).toContain('workspace_mkdir');
|
||||
expect(names).toContain('workspace_execute_command');
|
||||
});
|
||||
|
||||
describe('filesystem tools end-to-end', () => {
|
||||
it('write_file → read_file round-trip', async () => {
|
||||
const tools = workspace.getTools();
|
||||
const write = tools.find((t) => t.name === 'workspace_write_file')!;
|
||||
const read = tools.find((t) => t.name === 'workspace_read_file')!;
|
||||
|
||||
await write.handler!(
|
||||
{ path: '/hello.txt', content: 'Hello from integration test!' },
|
||||
{} as never,
|
||||
);
|
||||
const result = await read.handler!({ path: '/hello.txt', encoding: 'utf-8' }, {} as never);
|
||||
|
||||
expect((result as { content: string }).content).toBe('Hello from integration test!');
|
||||
});
|
||||
|
||||
it('mkdir → write → list round-trip', async () => {
|
||||
const tools = workspace.getTools();
|
||||
const mkdirTool = tools.find((t) => t.name === 'workspace_mkdir')!;
|
||||
const write = tools.find((t) => t.name === 'workspace_write_file')!;
|
||||
const list = tools.find((t) => t.name === 'workspace_list_files')!;
|
||||
|
||||
await mkdirTool.handler!({ path: '/project' }, {} as never);
|
||||
await write.handler!({ path: '/project/index.ts', content: 'export {}' }, {} as never);
|
||||
await write.handler!({ path: '/project/readme.md', content: '# Readme' }, {} as never);
|
||||
|
||||
const result = (await list.handler!({ path: '/project' }, {} as never)) as {
|
||||
entries: FileEntry[];
|
||||
};
|
||||
|
||||
expect(result.entries).toHaveLength(2);
|
||||
const names = result.entries.map((e) => e.name);
|
||||
expect(names).toContain('index.ts');
|
||||
expect(names).toContain('readme.md');
|
||||
});
|
||||
|
||||
it('write → stat returns metadata', async () => {
|
||||
const tools = workspace.getTools();
|
||||
const write = tools.find((t) => t.name === 'workspace_write_file')!;
|
||||
const stat = tools.find((t) => t.name === 'workspace_file_stat')!;
|
||||
|
||||
await write.handler!({ path: '/data.json', content: '{"key": "value"}' }, {} as never);
|
||||
const result = (await stat.handler!({ path: '/data.json' }, {} as never)) as {
|
||||
name: string;
|
||||
type: string;
|
||||
size: number;
|
||||
};
|
||||
|
||||
expect(result.name).toBe('data.json');
|
||||
expect(result.type).toBe('file');
|
||||
expect(result.size).toBe(16);
|
||||
});
|
||||
});
|
||||
|
||||
describe('sandbox tools end-to-end', () => {
|
||||
it('executes a command through the tool', async () => {
|
||||
fakeProcessManager.commandHandler = (cmd) => ({
|
||||
stdout: `ran: ${cmd}\n`,
|
||||
stderr: '',
|
||||
exitCode: 0,
|
||||
});
|
||||
|
||||
const tools = workspace.getTools();
|
||||
const exec = tools.find((t) => t.name === 'workspace_execute_command')!;
|
||||
|
||||
const result = (await exec.handler!({ command: 'echo test' }, {} as never)) as {
|
||||
success: boolean;
|
||||
stdout: string;
|
||||
exitCode: number;
|
||||
};
|
||||
|
||||
expect(result.success).toBe(true);
|
||||
expect(result.stdout).toBe('ran: echo test\n');
|
||||
expect(result.exitCode).toBe(0);
|
||||
});
|
||||
|
||||
it('reports command failure', async () => {
|
||||
fakeProcessManager.commandHandler = () => ({
|
||||
stdout: '',
|
||||
stderr: 'command not found',
|
||||
exitCode: 127,
|
||||
});
|
||||
|
||||
const tools = workspace.getTools();
|
||||
const exec = tools.find((t) => t.name === 'workspace_execute_command')!;
|
||||
|
||||
const result = (await exec.handler!({ command: 'invalid-cmd' }, {} as never)) as {
|
||||
success: boolean;
|
||||
stderr: string;
|
||||
exitCode: number;
|
||||
};
|
||||
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.exitCode).toBe(127);
|
||||
expect(result.stderr).toBe('command not found');
|
||||
});
|
||||
});
|
||||
|
||||
describe('full lifecycle', () => {
|
||||
it('init → use → destroy cycle', async () => {
|
||||
const ws = new Workspace({
|
||||
filesystem: new InMemoryFilesystem('lc-fs'),
|
||||
sandbox: new FakeSandbox('lc-sb', new FakeProcessManager()),
|
||||
});
|
||||
|
||||
expect(ws.status).toBe('pending');
|
||||
|
||||
await ws.init();
|
||||
expect(ws.status).toBe('ready');
|
||||
expect(ws.filesystem!.status).toBe('ready');
|
||||
expect(ws.sandbox!.status).toBe('running');
|
||||
|
||||
const tools = ws.getTools();
|
||||
expect(tools.length).toBeGreaterThan(0);
|
||||
|
||||
await ws.destroy();
|
||||
expect(ws.status).toBe('destroyed');
|
||||
expect(ws.sandbox!.status).toBe('destroyed');
|
||||
expect(ws.filesystem!.status).toBe('destroyed');
|
||||
});
|
||||
|
||||
it('workspace with only filesystem', async () => {
|
||||
const ws = new Workspace({ filesystem: new InMemoryFilesystem('fs-only') });
|
||||
await ws.init();
|
||||
|
||||
const tools = ws.getTools();
|
||||
const names = tools.map((t) => t.name);
|
||||
expect(names).not.toContain('workspace_execute_command');
|
||||
expect(names).toContain('workspace_read_file');
|
||||
|
||||
await ws.destroy();
|
||||
});
|
||||
|
||||
it('workspace with only sandbox', async () => {
|
||||
const ws = new Workspace({
|
||||
sandbox: new FakeSandbox('sb-only', new FakeProcessManager()),
|
||||
});
|
||||
await ws.init();
|
||||
|
||||
const tools = ws.getTools();
|
||||
const names = tools.map((t) => t.name);
|
||||
expect(names).toContain('workspace_execute_command');
|
||||
expect(names).not.toContain('workspace_read_file');
|
||||
|
||||
await ws.destroy();
|
||||
});
|
||||
|
||||
it('empty workspace lifecycle', async () => {
|
||||
const ws = new Workspace({});
|
||||
await ws.init();
|
||||
expect(ws.status).toBe('ready');
|
||||
expect(ws.getTools()).toEqual([]);
|
||||
await ws.destroy();
|
||||
expect(ws.status).toBe('destroyed');
|
||||
});
|
||||
});
|
||||
|
||||
describe('in-memory filesystem operations', () => {
|
||||
it('supports append', async () => {
|
||||
await memFs.writeFile('/log.txt', 'line1\n');
|
||||
await memFs.appendFile('/log.txt', 'line2\n');
|
||||
|
||||
const content = await memFs.readFile('/log.txt', { encoding: 'utf-8' });
|
||||
expect(content).toBe('line1\nline2\n');
|
||||
});
|
||||
|
||||
it('supports copy and move', async () => {
|
||||
await memFs.writeFile('/original.txt', 'original');
|
||||
await memFs.copyFile('/original.txt', '/copy.txt');
|
||||
|
||||
expect(await memFs.readFile('/copy.txt', { encoding: 'utf-8' })).toBe('original');
|
||||
|
||||
await memFs.moveFile('/copy.txt', '/moved.txt');
|
||||
expect(await memFs.exists('/copy.txt')).toBe(false);
|
||||
expect(await memFs.readFile('/moved.txt', { encoding: 'utf-8' })).toBe('original');
|
||||
});
|
||||
|
||||
it('supports rmdir recursive', async () => {
|
||||
await memFs.mkdir('/deep/nested', { recursive: true });
|
||||
await memFs.writeFile('/deep/nested/file.txt', 'data');
|
||||
|
||||
await memFs.rmdir('/deep', { recursive: true });
|
||||
|
||||
expect(await memFs.exists('/deep')).toBe(false);
|
||||
expect(await memFs.exists('/deep/nested/file.txt')).toBe(false);
|
||||
});
|
||||
|
||||
it('readFile throws on non-existent file', async () => {
|
||||
await expect(memFs.readFile('/nonexistent')).rejects.toThrow('ENOENT');
|
||||
});
|
||||
|
||||
it('deleteFile throws on non-existent file', async () => {
|
||||
await expect(memFs.deleteFile('/nonexistent')).rejects.toThrow('ENOENT');
|
||||
});
|
||||
});
|
||||
|
||||
describe('fake process manager', () => {
|
||||
it('tracks spawned processes', async () => {
|
||||
const handle = await fakeProcessManager.spawn('echo hello');
|
||||
const processes = await fakeProcessManager.list();
|
||||
|
||||
expect(processes).toHaveLength(1);
|
||||
expect(processes[0].pid).toBe(handle.pid);
|
||||
});
|
||||
|
||||
it('can retrieve a handle by pid', async () => {
|
||||
const handle = await fakeProcessManager.spawn('ls');
|
||||
const retrieved = await fakeProcessManager.get(handle.pid);
|
||||
|
||||
expect(retrieved).toBe(handle);
|
||||
});
|
||||
|
||||
it('returns undefined for unknown pid', async () => {
|
||||
expect(await fakeProcessManager.get(999)).toBeUndefined();
|
||||
});
|
||||
|
||||
it('can kill a process', async () => {
|
||||
const handle = await fakeProcessManager.spawn('sleep 100');
|
||||
const killed = await fakeProcessManager.kill(handle.pid);
|
||||
|
||||
expect(killed).toBe(true);
|
||||
expect(handle.exitCode).toBe(137);
|
||||
});
|
||||
|
||||
it('kill returns false for unknown pid', async () => {
|
||||
expect(await fakeProcessManager.kill(999)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('ProcessHandle stdout/stderr buffering', () => {
|
||||
it('buffers stdout and stderr', async () => {
|
||||
fakeProcessManager.commandHandler = () => ({
|
||||
stdout: 'output data',
|
||||
stderr: 'error data',
|
||||
exitCode: 0,
|
||||
});
|
||||
|
||||
const handle = await fakeProcessManager.spawn('test');
|
||||
const collected: string[] = [];
|
||||
|
||||
await handle.wait({
|
||||
onStdout: (data) => collected.push(`out:${data}`),
|
||||
onStderr: (data) => collected.push(`err:${data}`),
|
||||
});
|
||||
|
||||
expect(handle.stdout).toBe('output data');
|
||||
expect(handle.stderr).toBe('error data');
|
||||
expect(collected).toContain('out:output data');
|
||||
expect(collected).toContain('err:error data');
|
||||
});
|
||||
|
||||
it('supports multiple stdout/stderr listeners', async () => {
|
||||
fakeProcessManager.commandHandler = () => ({
|
||||
stdout: 'hello',
|
||||
stderr: '',
|
||||
exitCode: 0,
|
||||
});
|
||||
|
||||
const handle = await fakeProcessManager.spawn('test');
|
||||
const listener1: string[] = [];
|
||||
const listener2: string[] = [];
|
||||
|
||||
handle.addStdoutListener((d) => listener1.push(d));
|
||||
handle.addStdoutListener((d) => listener2.push(d));
|
||||
|
||||
await handle.wait();
|
||||
|
||||
expect(listener1).toEqual(['hello']);
|
||||
expect(listener2).toEqual(['hello']);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,268 @@
|
|||
import { createWorkspaceTools } from '../../workspace/tools/workspace-tools';
|
||||
import type { WorkspaceFilesystem, WorkspaceSandbox, CommandResult } from '../../workspace/types';
|
||||
|
||||
function makeFakeFilesystem(overrides: Partial<WorkspaceFilesystem> = {}): WorkspaceFilesystem {
|
||||
return {
|
||||
id: 'test-fs',
|
||||
name: 'TestFS',
|
||||
provider: 'test',
|
||||
status: 'ready',
|
||||
readFile: jest.fn().mockResolvedValue('file content'),
|
||||
writeFile: jest.fn().mockResolvedValue(undefined),
|
||||
appendFile: jest.fn().mockResolvedValue(undefined),
|
||||
deleteFile: jest.fn().mockResolvedValue(undefined),
|
||||
copyFile: jest.fn().mockResolvedValue(undefined),
|
||||
moveFile: jest.fn().mockResolvedValue(undefined),
|
||||
mkdir: jest.fn().mockResolvedValue(undefined),
|
||||
rmdir: jest.fn().mockResolvedValue(undefined),
|
||||
readdir: jest.fn().mockResolvedValue([
|
||||
{ name: 'file1.txt', type: 'file' as const },
|
||||
{ name: 'subdir', type: 'directory' as const },
|
||||
]),
|
||||
exists: jest.fn().mockResolvedValue(true),
|
||||
stat: jest.fn().mockResolvedValue({
|
||||
name: 'test.txt',
|
||||
path: '/test.txt',
|
||||
type: 'file' as const,
|
||||
size: 100,
|
||||
createdAt: new Date('2024-01-01'),
|
||||
modifiedAt: new Date('2024-06-01'),
|
||||
}),
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
function makeFakeSandbox(overrides: Partial<WorkspaceSandbox> = {}): WorkspaceSandbox {
|
||||
const mockResult: CommandResult = {
|
||||
success: true,
|
||||
exitCode: 0,
|
||||
stdout: 'hello world',
|
||||
stderr: '',
|
||||
executionTimeMs: 42,
|
||||
};
|
||||
return {
|
||||
id: 'test-sandbox',
|
||||
name: 'TestSandbox',
|
||||
provider: 'test',
|
||||
status: 'running',
|
||||
executeCommand: jest.fn().mockResolvedValue(mockResult),
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
describe('createWorkspaceTools', () => {
|
||||
it('returns no tools when workspace has no providers', () => {
|
||||
const tools = createWorkspaceTools({});
|
||||
expect(tools).toEqual([]);
|
||||
});
|
||||
|
||||
it('returns filesystem tools when filesystem is set', () => {
|
||||
const tools = createWorkspaceTools({ filesystem: makeFakeFilesystem() });
|
||||
const names = tools.map((t) => t.name);
|
||||
|
||||
expect(names).toEqual([
|
||||
'workspace_read_file',
|
||||
'workspace_write_file',
|
||||
'workspace_list_files',
|
||||
'workspace_file_stat',
|
||||
'workspace_mkdir',
|
||||
'workspace_delete_file',
|
||||
'workspace_append_file',
|
||||
'workspace_copy_file',
|
||||
'workspace_move_file',
|
||||
'workspace_rmdir',
|
||||
]);
|
||||
});
|
||||
|
||||
it('returns execute_command when sandbox has executeCommand', () => {
|
||||
const tools = createWorkspaceTools({ sandbox: makeFakeSandbox() });
|
||||
const names = tools.map((t) => t.name);
|
||||
|
||||
expect(names).toEqual(['workspace_execute_command']);
|
||||
});
|
||||
|
||||
it('does not return execute_command when sandbox lacks executeCommand', () => {
|
||||
const tools = createWorkspaceTools({
|
||||
sandbox: makeFakeSandbox({ executeCommand: undefined }),
|
||||
});
|
||||
|
||||
expect(tools).toEqual([]);
|
||||
});
|
||||
|
||||
it('returns all tools when both filesystem and sandbox are set', () => {
|
||||
const tools = createWorkspaceTools({
|
||||
filesystem: makeFakeFilesystem(),
|
||||
sandbox: makeFakeSandbox(),
|
||||
});
|
||||
const names = tools.map((t) => t.name);
|
||||
|
||||
expect(names).toContain('workspace_read_file');
|
||||
expect(names).toContain('workspace_execute_command');
|
||||
expect(names).toHaveLength(11);
|
||||
});
|
||||
|
||||
describe('tool handlers', () => {
|
||||
it('read_file handler calls filesystem.readFile', async () => {
|
||||
const fs = makeFakeFilesystem();
|
||||
const tools = createWorkspaceTools({ filesystem: fs });
|
||||
const readTool = tools.find((t) => t.name === 'workspace_read_file')!;
|
||||
|
||||
const result = await readTool.handler!({ path: '/test.txt', encoding: 'utf-8' }, {} as never);
|
||||
|
||||
expect(fs.readFile).toHaveBeenCalledWith('/test.txt', { encoding: 'utf-8' });
|
||||
expect(result).toEqual({ content: 'file content' });
|
||||
});
|
||||
|
||||
it('write_file handler calls filesystem.writeFile', async () => {
|
||||
const fs = makeFakeFilesystem();
|
||||
const tools = createWorkspaceTools({ filesystem: fs });
|
||||
const writeTool = tools.find((t) => t.name === 'workspace_write_file')!;
|
||||
|
||||
const result = await writeTool.handler!(
|
||||
{ path: '/out.txt', content: 'hello', recursive: true },
|
||||
{} as never,
|
||||
);
|
||||
|
||||
expect(fs.writeFile).toHaveBeenCalledWith('/out.txt', 'hello', { recursive: true });
|
||||
expect(result).toEqual({ success: true });
|
||||
});
|
||||
|
||||
it('list_files handler calls filesystem.readdir', async () => {
|
||||
const fs = makeFakeFilesystem();
|
||||
const tools = createWorkspaceTools({ filesystem: fs });
|
||||
const listTool = tools.find((t) => t.name === 'workspace_list_files')!;
|
||||
|
||||
const result = await listTool.handler!({ path: '/', recursive: false }, {} as never);
|
||||
|
||||
expect(fs.readdir).toHaveBeenCalledWith('/', { recursive: false });
|
||||
expect(result).toEqual({
|
||||
entries: [
|
||||
{ name: 'file1.txt', type: 'file' },
|
||||
{ name: 'subdir', type: 'directory' },
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it('file_stat handler calls filesystem.stat', async () => {
|
||||
const fs = makeFakeFilesystem();
|
||||
const tools = createWorkspaceTools({ filesystem: fs });
|
||||
const statTool = tools.find((t) => t.name === 'workspace_file_stat')!;
|
||||
|
||||
const result = await statTool.handler!({ path: '/test.txt' }, {} as never);
|
||||
|
||||
expect(fs.stat).toHaveBeenCalledWith('/test.txt');
|
||||
expect(result).toEqual({
|
||||
name: 'test.txt',
|
||||
path: '/test.txt',
|
||||
type: 'file',
|
||||
size: 100,
|
||||
createdAt: '2024-01-01T00:00:00.000Z',
|
||||
modifiedAt: '2024-06-01T00:00:00.000Z',
|
||||
});
|
||||
});
|
||||
|
||||
it('mkdir handler calls filesystem.mkdir', async () => {
|
||||
const fs = makeFakeFilesystem();
|
||||
const tools = createWorkspaceTools({ filesystem: fs });
|
||||
const mkdirTool = tools.find((t) => t.name === 'workspace_mkdir')!;
|
||||
|
||||
const result = await mkdirTool.handler!({ path: '/new-dir', recursive: true }, {} as never);
|
||||
|
||||
expect(fs.mkdir).toHaveBeenCalledWith('/new-dir', { recursive: true });
|
||||
expect(result).toEqual({ success: true });
|
||||
});
|
||||
|
||||
it('delete_file handler calls filesystem.deleteFile', async () => {
|
||||
const fs = makeFakeFilesystem();
|
||||
const tools = createWorkspaceTools({ filesystem: fs });
|
||||
const deleteTool = tools.find((t) => t.name === 'workspace_delete_file')!;
|
||||
|
||||
const result = await deleteTool.handler!(
|
||||
{ path: '/old.txt', recursive: false, force: true },
|
||||
{} as never,
|
||||
);
|
||||
|
||||
expect(fs.deleteFile).toHaveBeenCalledWith('/old.txt', { recursive: false, force: true });
|
||||
expect(result).toEqual({ success: true });
|
||||
});
|
||||
|
||||
it('append_file handler calls filesystem.appendFile', async () => {
|
||||
const fs = makeFakeFilesystem();
|
||||
const tools = createWorkspaceTools({ filesystem: fs });
|
||||
const appendTool = tools.find((t) => t.name === 'workspace_append_file')!;
|
||||
|
||||
const result = await appendTool.handler!(
|
||||
{ path: '/log.txt', content: 'new line' },
|
||||
{} as never,
|
||||
);
|
||||
|
||||
expect(fs.appendFile).toHaveBeenCalledWith('/log.txt', 'new line');
|
||||
expect(result).toEqual({ success: true });
|
||||
});
|
||||
|
||||
it('copy_file handler calls filesystem.copyFile', async () => {
|
||||
const fs = makeFakeFilesystem();
|
||||
const tools = createWorkspaceTools({ filesystem: fs });
|
||||
const copyTool = tools.find((t) => t.name === 'workspace_copy_file')!;
|
||||
|
||||
const result = await copyTool.handler!(
|
||||
{ src: '/a.txt', dest: '/b.txt', overwrite: true },
|
||||
{} as never,
|
||||
);
|
||||
|
||||
expect(fs.copyFile).toHaveBeenCalledWith('/a.txt', '/b.txt', { overwrite: true });
|
||||
expect(result).toEqual({ success: true });
|
||||
});
|
||||
|
||||
it('move_file handler calls filesystem.moveFile', async () => {
|
||||
const fs = makeFakeFilesystem();
|
||||
const tools = createWorkspaceTools({ filesystem: fs });
|
||||
const moveTool = tools.find((t) => t.name === 'workspace_move_file')!;
|
||||
|
||||
const result = await moveTool.handler!(
|
||||
{ src: '/old.txt', dest: '/new.txt', overwrite: false },
|
||||
{} as never,
|
||||
);
|
||||
|
||||
expect(fs.moveFile).toHaveBeenCalledWith('/old.txt', '/new.txt', { overwrite: false });
|
||||
expect(result).toEqual({ success: true });
|
||||
});
|
||||
|
||||
it('rmdir handler calls filesystem.rmdir', async () => {
|
||||
const fs = makeFakeFilesystem();
|
||||
const tools = createWorkspaceTools({ filesystem: fs });
|
||||
const rmdirTool = tools.find((t) => t.name === 'workspace_rmdir')!;
|
||||
|
||||
const result = await rmdirTool.handler!(
|
||||
{ path: '/old-dir', recursive: true, force: false },
|
||||
{} as never,
|
||||
);
|
||||
|
||||
expect(fs.rmdir).toHaveBeenCalledWith('/old-dir', { recursive: true, force: false });
|
||||
expect(result).toEqual({ success: true });
|
||||
});
|
||||
|
||||
it('execute_command handler calls sandbox.executeCommand', async () => {
|
||||
const sb = makeFakeSandbox();
|
||||
const tools = createWorkspaceTools({ sandbox: sb });
|
||||
const execTool = tools.find((t) => t.name === 'workspace_execute_command')!;
|
||||
|
||||
const result = await execTool.handler!(
|
||||
{ command: 'echo hello', cwd: '/tmp', timeout: 5000 },
|
||||
{} as never,
|
||||
);
|
||||
|
||||
expect(sb.executeCommand).toHaveBeenCalledWith('echo hello', undefined, {
|
||||
cwd: '/tmp',
|
||||
timeout: 5000,
|
||||
});
|
||||
expect(result).toEqual({
|
||||
success: true,
|
||||
exitCode: 0,
|
||||
stdout: 'hello world',
|
||||
stderr: '',
|
||||
executionTimeMs: 42,
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
309
packages/@n8n/agents/src/__tests__/workspace/workspace.test.ts
Normal file
309
packages/@n8n/agents/src/__tests__/workspace/workspace.test.ts
Normal file
|
|
@ -0,0 +1,309 @@
|
|||
import type { WorkspaceFilesystem, WorkspaceSandbox } from '../../workspace/types';
|
||||
import { Workspace } from '../../workspace/workspace';
|
||||
|
||||
function makeFakeFilesystem(overrides: Partial<WorkspaceFilesystem> = {}): WorkspaceFilesystem {
|
||||
return {
|
||||
id: 'test-fs',
|
||||
name: 'TestFS',
|
||||
provider: 'test',
|
||||
status: 'pending',
|
||||
readFile: jest.fn(),
|
||||
writeFile: jest.fn(),
|
||||
appendFile: jest.fn(),
|
||||
deleteFile: jest.fn(),
|
||||
copyFile: jest.fn(),
|
||||
moveFile: jest.fn(),
|
||||
mkdir: jest.fn(),
|
||||
rmdir: jest.fn(),
|
||||
readdir: jest.fn(),
|
||||
exists: jest.fn(),
|
||||
stat: jest.fn(),
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
function makeFakeSandbox(overrides: Partial<WorkspaceSandbox> = {}): WorkspaceSandbox {
|
||||
return {
|
||||
id: 'test-sandbox',
|
||||
name: 'TestSandbox',
|
||||
provider: 'test',
|
||||
status: 'pending',
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
describe('Workspace', () => {
|
||||
describe('constructor', () => {
|
||||
it('generates an id when none is provided', () => {
|
||||
const ws = new Workspace({});
|
||||
expect(ws.id).toMatch(/^workspace-[0-9a-f-]+$/);
|
||||
});
|
||||
|
||||
it('uses a custom id when provided', () => {
|
||||
const ws = new Workspace({ id: 'my-ws' });
|
||||
expect(ws.id).toBe('my-ws');
|
||||
});
|
||||
|
||||
it('generates a name from the id when none is provided', () => {
|
||||
const ws = new Workspace({ id: 'abc' });
|
||||
expect(ws.name).toBe('workspace-abc');
|
||||
});
|
||||
|
||||
it('uses a custom name when provided', () => {
|
||||
const ws = new Workspace({ id: 'abc', name: 'My Workspace' });
|
||||
expect(ws.name).toBe('My Workspace');
|
||||
});
|
||||
|
||||
it('starts with pending status', () => {
|
||||
const ws = new Workspace({});
|
||||
expect(ws.status).toBe('pending');
|
||||
});
|
||||
|
||||
it('exposes filesystem and sandbox', () => {
|
||||
const fs = makeFakeFilesystem();
|
||||
const sb = makeFakeSandbox();
|
||||
const ws = new Workspace({ filesystem: fs, sandbox: sb });
|
||||
|
||||
expect(ws.filesystem).toBe(fs);
|
||||
expect(ws.sandbox).toBe(sb);
|
||||
});
|
||||
|
||||
it('returns undefined for absent filesystem and sandbox', () => {
|
||||
const ws = new Workspace({});
|
||||
|
||||
expect(ws.filesystem).toBeUndefined();
|
||||
expect(ws.sandbox).toBeUndefined();
|
||||
});
|
||||
|
||||
it('generates unique IDs using randomUUID', () => {
|
||||
const ws1 = new Workspace({});
|
||||
const ws2 = new Workspace({});
|
||||
expect(ws1.id).not.toBe(ws2.id);
|
||||
expect(ws1.id).toMatch(/^workspace-/);
|
||||
});
|
||||
});
|
||||
|
||||
describe('init', () => {
|
||||
it('calls filesystem._init then sandbox._start', async () => {
|
||||
const order: string[] = [];
|
||||
const fs = makeFakeFilesystem({
|
||||
_init: jest.fn(async () => {
|
||||
await Promise.resolve();
|
||||
order.push('fs-init');
|
||||
}),
|
||||
});
|
||||
const sb = makeFakeSandbox({
|
||||
_start: jest.fn(async () => {
|
||||
await Promise.resolve();
|
||||
order.push('sb-start');
|
||||
}),
|
||||
});
|
||||
const ws = new Workspace({ filesystem: fs, sandbox: sb });
|
||||
|
||||
await ws.init();
|
||||
|
||||
expect(order).toEqual(['fs-init', 'sb-start']);
|
||||
expect(ws.status).toBe('ready');
|
||||
});
|
||||
|
||||
it('sets status to ready when no providers', async () => {
|
||||
const ws = new Workspace({});
|
||||
await ws.init();
|
||||
expect(ws.status).toBe('ready');
|
||||
});
|
||||
|
||||
it('initializes only filesystem when no sandbox', async () => {
|
||||
const fs = makeFakeFilesystem({
|
||||
_init: jest.fn().mockResolvedValue(undefined),
|
||||
});
|
||||
const ws = new Workspace({ filesystem: fs });
|
||||
|
||||
await ws.init();
|
||||
|
||||
expect(fs._init).toHaveBeenCalled();
|
||||
expect(ws.status).toBe('ready');
|
||||
});
|
||||
|
||||
it('starts only sandbox when no filesystem', async () => {
|
||||
const sb = makeFakeSandbox({
|
||||
_start: jest.fn().mockResolvedValue(undefined),
|
||||
});
|
||||
const ws = new Workspace({ sandbox: sb });
|
||||
|
||||
await ws.init();
|
||||
|
||||
expect(sb._start).toHaveBeenCalled();
|
||||
expect(ws.status).toBe('ready');
|
||||
});
|
||||
|
||||
it('destroys filesystem and sets error status when sandbox start fails', async () => {
|
||||
const fs = makeFakeFilesystem({
|
||||
_init: jest.fn().mockResolvedValue(undefined),
|
||||
_destroy: jest.fn().mockResolvedValue(undefined),
|
||||
});
|
||||
const sb = makeFakeSandbox({
|
||||
_start: jest.fn().mockRejectedValue(new Error('sandbox start failed')),
|
||||
});
|
||||
const ws = new Workspace({ filesystem: fs, sandbox: sb });
|
||||
|
||||
await expect(ws.init()).rejects.toThrow('sandbox start failed');
|
||||
|
||||
expect(fs._init).toHaveBeenCalled();
|
||||
expect(fs._destroy).toHaveBeenCalled();
|
||||
expect(ws.status).toBe('error');
|
||||
});
|
||||
|
||||
it('is idempotent when already ready', async () => {
|
||||
const fs = makeFakeFilesystem({
|
||||
_init: jest.fn().mockResolvedValue(undefined),
|
||||
});
|
||||
const ws = new Workspace({ filesystem: fs });
|
||||
|
||||
await ws.init();
|
||||
(fs._init as jest.Mock).mockClear();
|
||||
|
||||
await ws.init();
|
||||
|
||||
expect(fs._init).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('deduplicates concurrent init calls', async () => {
|
||||
let resolveInit: () => void;
|
||||
const fs = makeFakeFilesystem({
|
||||
_init: jest.fn(
|
||||
async () =>
|
||||
await new Promise<void>((r) => {
|
||||
resolveInit = r;
|
||||
}),
|
||||
),
|
||||
});
|
||||
const ws = new Workspace({ filesystem: fs });
|
||||
|
||||
const p1 = ws.init();
|
||||
const p2 = ws.init();
|
||||
|
||||
resolveInit!();
|
||||
await Promise.all([p1, p2]);
|
||||
|
||||
expect(fs._init).toHaveBeenCalledTimes(1);
|
||||
expect(ws.status).toBe('ready');
|
||||
});
|
||||
});
|
||||
|
||||
describe('destroy', () => {
|
||||
it('calls sandbox._destroy then filesystem._destroy', async () => {
|
||||
const order: string[] = [];
|
||||
const fs = makeFakeFilesystem({
|
||||
_destroy: jest.fn(async () => {
|
||||
await Promise.resolve();
|
||||
order.push('fs-destroy');
|
||||
}),
|
||||
});
|
||||
const sb = makeFakeSandbox({
|
||||
_destroy: jest.fn(async () => {
|
||||
await Promise.resolve();
|
||||
order.push('sb-destroy');
|
||||
}),
|
||||
});
|
||||
const ws = new Workspace({ filesystem: fs, sandbox: sb });
|
||||
|
||||
await ws.destroy();
|
||||
|
||||
expect(order).toEqual(['sb-destroy', 'fs-destroy']);
|
||||
expect(ws.status).toBe('destroyed');
|
||||
});
|
||||
|
||||
it('sets status to destroyed when no providers', async () => {
|
||||
const ws = new Workspace({});
|
||||
await ws.destroy();
|
||||
expect(ws.status).toBe('destroyed');
|
||||
});
|
||||
|
||||
it('transitions to error when sandbox destroy throws', async () => {
|
||||
const fs = makeFakeFilesystem({
|
||||
_destroy: jest.fn().mockResolvedValue(undefined),
|
||||
});
|
||||
const sb = makeFakeSandbox({
|
||||
_destroy: jest.fn().mockRejectedValue(new Error('sandbox boom')),
|
||||
});
|
||||
const ws = new Workspace({ filesystem: fs, sandbox: sb });
|
||||
|
||||
await expect(ws.destroy()).rejects.toThrow('sandbox boom');
|
||||
|
||||
expect(fs._destroy).toHaveBeenCalled();
|
||||
expect(ws.status).toBe('error');
|
||||
});
|
||||
});
|
||||
|
||||
describe('getInstructions', () => {
|
||||
it('combines sandbox and filesystem instructions', () => {
|
||||
const fs = makeFakeFilesystem({
|
||||
getInstructions: () => 'FS instructions',
|
||||
});
|
||||
const sb = makeFakeSandbox({
|
||||
getInstructions: () => 'SB instructions',
|
||||
});
|
||||
const ws = new Workspace({ filesystem: fs, sandbox: sb });
|
||||
|
||||
expect(ws.getInstructions()).toBe('SB instructions\n\nFS instructions');
|
||||
});
|
||||
|
||||
it('returns empty string when no providers', () => {
|
||||
const ws = new Workspace({});
|
||||
expect(ws.getInstructions()).toBe('');
|
||||
});
|
||||
|
||||
it('omits empty instruction strings', () => {
|
||||
const fs = makeFakeFilesystem({
|
||||
getInstructions: () => '',
|
||||
});
|
||||
const sb = makeFakeSandbox({
|
||||
getInstructions: () => 'SB only',
|
||||
});
|
||||
const ws = new Workspace({ filesystem: fs, sandbox: sb });
|
||||
|
||||
expect(ws.getInstructions()).toBe('SB only');
|
||||
});
|
||||
});
|
||||
|
||||
describe('getTools', () => {
|
||||
it('returns filesystem tools when filesystem is set', () => {
|
||||
const fs = makeFakeFilesystem();
|
||||
const ws = new Workspace({ filesystem: fs });
|
||||
|
||||
const tools = ws.getTools();
|
||||
|
||||
const names = tools.map((t) => t.name);
|
||||
expect(names).toContain('workspace_read_file');
|
||||
expect(names).toContain('workspace_write_file');
|
||||
expect(names).toContain('workspace_list_files');
|
||||
expect(names).toContain('workspace_file_stat');
|
||||
expect(names).toContain('workspace_mkdir');
|
||||
});
|
||||
|
||||
it('returns execute_command tool when sandbox has executeCommand', () => {
|
||||
const sb = makeFakeSandbox({
|
||||
executeCommand: jest.fn(),
|
||||
});
|
||||
const ws = new Workspace({ sandbox: sb });
|
||||
|
||||
const tools = ws.getTools();
|
||||
const names = tools.map((t) => t.name);
|
||||
expect(names).toContain('workspace_execute_command');
|
||||
});
|
||||
|
||||
it('returns empty array when no providers', () => {
|
||||
const ws = new Workspace({});
|
||||
expect(ws.getTools()).toEqual([]);
|
||||
});
|
||||
|
||||
it('does not include execute_command if sandbox has no executeCommand', () => {
|
||||
const sb = makeFakeSandbox();
|
||||
const ws = new Workspace({ sandbox: sb });
|
||||
|
||||
const tools = ws.getTools();
|
||||
const names = tools.map((t) => t.name);
|
||||
expect(names).not.toContain('workspace_execute_command');
|
||||
});
|
||||
});
|
||||
});
|
||||
25
packages/@n8n/agents/src/evals/categorization.ts
Normal file
25
packages/@n8n/agents/src/evals/categorization.ts
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
import { Eval } from '../sdk/eval';
|
||||
|
||||
/** Deterministic categorization eval — checks if output matches the expected label. */
|
||||
export function categorization(): Eval {
|
||||
return new Eval('categorization')
|
||||
.description('Checks if output matches the expected category label')
|
||||
.check(({ output, expected }) => {
|
||||
if (!expected) {
|
||||
return { pass: false, reasoning: 'No expected category provided' };
|
||||
}
|
||||
|
||||
const normalOutput = output.toLowerCase().trim();
|
||||
const normalExpected = expected.toLowerCase().trim();
|
||||
|
||||
if (normalOutput === normalExpected) {
|
||||
return { pass: true, reasoning: 'Exact match' };
|
||||
}
|
||||
|
||||
if (normalOutput.includes(normalExpected)) {
|
||||
return { pass: true, reasoning: `Output contains expected label "${expected}"` };
|
||||
}
|
||||
|
||||
return { pass: false, reasoning: `Expected "${expected}", got "${output}"` };
|
||||
});
|
||||
}
|
||||
35
packages/@n8n/agents/src/evals/contains-keywords.ts
Normal file
35
packages/@n8n/agents/src/evals/contains-keywords.ts
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
import { Eval } from '../sdk/eval';
|
||||
|
||||
/**
|
||||
* Deterministic keyword presence eval.
|
||||
* Expects `expected` to be a comma-separated list of keywords.
|
||||
* Passes only if ALL keywords are found in the output.
|
||||
*/
|
||||
export function containsKeywords(): Eval {
|
||||
return new Eval('contains-keywords')
|
||||
.description('Checks if output contains all expected keywords')
|
||||
.check(({ output, expected }) => {
|
||||
if (!expected) {
|
||||
return { pass: false, reasoning: 'No expected keywords provided' };
|
||||
}
|
||||
|
||||
const keywords = expected
|
||||
.split(',')
|
||||
.map((k) => k.trim().toLowerCase())
|
||||
.filter(Boolean);
|
||||
if (keywords.length === 0) {
|
||||
return { pass: false, reasoning: 'No keywords to check' };
|
||||
}
|
||||
|
||||
const normalOutput = output.toLowerCase();
|
||||
const missing = keywords.filter((k) => !normalOutput.includes(k));
|
||||
|
||||
return {
|
||||
pass: missing.length === 0,
|
||||
reasoning:
|
||||
missing.length === 0
|
||||
? `All ${keywords.length} keywords found`
|
||||
: `Missing ${missing.length}/${keywords.length} keywords: ${missing.join(', ')}`,
|
||||
};
|
||||
});
|
||||
}
|
||||
30
packages/@n8n/agents/src/evals/correctness.ts
Normal file
30
packages/@n8n/agents/src/evals/correctness.ts
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
import { parseJudgeResponse } from './parse-judge-response';
|
||||
import { Eval } from '../sdk/eval';
|
||||
|
||||
/**
|
||||
* LLM-as-judge correctness eval. Returns an Eval pre-configured with a
|
||||
* judge handler — caller must still set `.model()` and `.credential()`.
|
||||
*/
|
||||
export function correctness(): Eval {
|
||||
return new Eval('correctness')
|
||||
.description('Judges if the output is factually correct compared to the expected answer')
|
||||
.judge(async ({ input, output, expected, llm }) => {
|
||||
const prompt = [
|
||||
'You are evaluating an AI assistant response for factual correctness.',
|
||||
'',
|
||||
`User question: ${input}`,
|
||||
`Expected answer: ${expected ?? '(none provided)'}`,
|
||||
`Actual answer: ${output}`,
|
||||
'',
|
||||
'Does the actual answer correctly address the question and match the expected answer?',
|
||||
'Answer with pass or fail:',
|
||||
'- pass = the answer is correct and addresses the question',
|
||||
'- fail = the answer is incorrect, incomplete, or irrelevant',
|
||||
'',
|
||||
'Respond with ONLY a JSON object (no markdown fences): {"pass": true/false, "reasoning": "<explanation>"}',
|
||||
].join('\n');
|
||||
|
||||
const result = await llm(prompt);
|
||||
return parseJudgeResponse(result.text);
|
||||
});
|
||||
}
|
||||
28
packages/@n8n/agents/src/evals/helpfulness.ts
Normal file
28
packages/@n8n/agents/src/evals/helpfulness.ts
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
import { parseJudgeResponse } from './parse-judge-response';
|
||||
import { Eval } from '../sdk/eval';
|
||||
|
||||
/**
|
||||
* LLM-as-judge helpfulness eval. Returns an Eval pre-configured with a
|
||||
* judge handler — caller must still set `.model()` and `.credential()`.
|
||||
*/
|
||||
export function helpfulness(): Eval {
|
||||
return new Eval('helpfulness')
|
||||
.description('Judges whether the response is helpful for the user query')
|
||||
.judge(async ({ input, output, llm }) => {
|
||||
const prompt = [
|
||||
'You are evaluating an AI assistant response for helpfulness.',
|
||||
'',
|
||||
`User question: ${input}`,
|
||||
`Assistant response: ${output}`,
|
||||
'',
|
||||
'Is this response helpful to the user?',
|
||||
'- pass = the response is helpful, addresses the question, and provides useful information',
|
||||
'- fail = the response is unhelpful, off-topic, or lacks useful information',
|
||||
'',
|
||||
'Respond with ONLY a JSON object (no markdown fences): {"pass": true/false, "reasoning": "<explanation>"}',
|
||||
].join('\n');
|
||||
|
||||
const result = await llm(prompt);
|
||||
return parseJudgeResponse(result.text);
|
||||
});
|
||||
}
|
||||
7
packages/@n8n/agents/src/evals/index.ts
Normal file
7
packages/@n8n/agents/src/evals/index.ts
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
export { correctness } from './correctness';
|
||||
export { helpfulness } from './helpfulness';
|
||||
export { stringSimilarity } from './string-similarity';
|
||||
export { categorization } from './categorization';
|
||||
export { containsKeywords } from './contains-keywords';
|
||||
export { jsonValidity } from './json-validity';
|
||||
export { toolCallAccuracy } from './tool-call-accuracy';
|
||||
18
packages/@n8n/agents/src/evals/json-validity.ts
Normal file
18
packages/@n8n/agents/src/evals/json-validity.ts
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
import { Eval } from '../sdk/eval';
|
||||
|
||||
/** Deterministic JSON validity eval — checks if the output is parseable JSON. */
|
||||
export function jsonValidity(): Eval {
|
||||
return new Eval('json-validity')
|
||||
.description('Checks if output is valid JSON')
|
||||
.check(({ output }) => {
|
||||
try {
|
||||
JSON.parse(output);
|
||||
return { pass: true, reasoning: 'Valid JSON' };
|
||||
} catch (e) {
|
||||
return {
|
||||
pass: false,
|
||||
reasoning: `Invalid JSON: ${e instanceof Error ? e.message : 'parse error'}`,
|
||||
};
|
||||
}
|
||||
});
|
||||
}
|
||||
32
packages/@n8n/agents/src/evals/parse-judge-response.ts
Normal file
32
packages/@n8n/agents/src/evals/parse-judge-response.ts
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
import type { EvalScore } from '../types';
|
||||
|
||||
/**
|
||||
* Parse an LLM judge response into an EvalScore (pass/fail).
|
||||
* Handles JSON wrapped in markdown fences, plain JSON, or raw text.
|
||||
*/
|
||||
export function parseJudgeResponse(text: string): EvalScore {
|
||||
// Strip markdown code fences if present: ```json ... ``` or ``` ... ```
|
||||
const stripped = text
|
||||
.replace(/^```(?:json)?\s*\n?/i, '')
|
||||
.replace(/\n?```\s*$/i, '')
|
||||
.trim();
|
||||
|
||||
try {
|
||||
const parsed = JSON.parse(stripped) as { pass?: boolean; score?: number; reasoning?: string };
|
||||
// Support both { pass: true } and legacy { score: 0.8 } formats
|
||||
const pass = parsed.pass ?? (parsed.score !== undefined ? parsed.score >= 0.7 : false);
|
||||
return {
|
||||
pass,
|
||||
reasoning: parsed.reasoning ?? stripped,
|
||||
};
|
||||
} catch {
|
||||
// Fallback: detect pass/fail from plain text or malformed JSON
|
||||
const lowerText = stripped.toLowerCase();
|
||||
const hasPassTrue = lowerText.includes('"pass": true') || lowerText.includes('"pass":true');
|
||||
const hasFailFalse = lowerText.includes('"pass": false') || lowerText.includes('"pass":false');
|
||||
// If no JSON-like pattern, check for plain-text "pass" or "fail" keywords
|
||||
const pass =
|
||||
hasPassTrue || (!hasFailFalse && /\bpass\b/i.test(stripped) && !/\bfail\b/i.test(stripped));
|
||||
return { pass, reasoning: stripped };
|
||||
}
|
||||
}
|
||||
46
packages/@n8n/agents/src/evals/string-similarity.ts
Normal file
46
packages/@n8n/agents/src/evals/string-similarity.ts
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
import { Eval } from '../sdk/eval';
|
||||
|
||||
/**
|
||||
* Dice coefficient string similarity — measures overlap of bigrams between
|
||||
* two strings. Returns 0-1 where 1 is identical.
|
||||
*/
|
||||
function diceSimilarity(a: string, b: string): number {
|
||||
const normalA = a.toLowerCase().trim();
|
||||
const normalB = b.toLowerCase().trim();
|
||||
|
||||
if (normalA === normalB) return 1;
|
||||
if (normalA.length < 2 || normalB.length < 2) return 0;
|
||||
|
||||
const bigrams = (s: string): Set<string> => {
|
||||
const set = new Set<string>();
|
||||
for (let i = 0; i < s.length - 1; i++) {
|
||||
set.add(s.slice(i, i + 2));
|
||||
}
|
||||
return set;
|
||||
};
|
||||
|
||||
const aBigrams = bigrams(normalA);
|
||||
const bBigrams = bigrams(normalB);
|
||||
let intersection = 0;
|
||||
for (const bg of aBigrams) {
|
||||
if (bBigrams.has(bg)) intersection++;
|
||||
}
|
||||
|
||||
return (2 * intersection) / (aBigrams.size + bBigrams.size);
|
||||
}
|
||||
|
||||
/** Deterministic string similarity eval using Dice coefficient. */
|
||||
export function stringSimilarity(): Eval {
|
||||
return new Eval('string-similarity')
|
||||
.description('Measures string similarity between output and expected answer')
|
||||
.check(({ output, expected }) => {
|
||||
if (expected === undefined) {
|
||||
return { pass: false, reasoning: 'No expected value provided' };
|
||||
}
|
||||
const similarity = diceSimilarity(output, expected);
|
||||
return {
|
||||
pass: similarity >= 0.7,
|
||||
reasoning: `Dice similarity: ${(similarity * 100).toFixed(1)}%`,
|
||||
};
|
||||
});
|
||||
}
|
||||
35
packages/@n8n/agents/src/evals/tool-call-accuracy.ts
Normal file
35
packages/@n8n/agents/src/evals/tool-call-accuracy.ts
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
import { Eval } from '../sdk/eval';
|
||||
|
||||
/**
|
||||
* Deterministic tool call accuracy eval.
|
||||
* Expects `expected` to be a comma-separated list of tool names that should have been called.
|
||||
* Passes only if ALL expected tools were called.
|
||||
*/
|
||||
export function toolCallAccuracy(): Eval {
|
||||
return new Eval('tool-call-accuracy')
|
||||
.description('Checks if the agent called all expected tools')
|
||||
.check(({ expected, toolCalls }) => {
|
||||
if (!expected) {
|
||||
return { pass: false, reasoning: 'No expected tool names provided' };
|
||||
}
|
||||
|
||||
const expectedTools = expected
|
||||
.split(',')
|
||||
.map((t) => t.trim().toLowerCase())
|
||||
.filter(Boolean);
|
||||
if (expectedTools.length === 0) {
|
||||
return { pass: false, reasoning: 'No expected tools to check' };
|
||||
}
|
||||
|
||||
const calledTools = new Set((toolCalls ?? []).map((tc) => tc.tool.toLowerCase()));
|
||||
const missing = expectedTools.filter((t) => !calledTools.has(t));
|
||||
|
||||
return {
|
||||
pass: missing.length === 0,
|
||||
reasoning:
|
||||
missing.length === 0
|
||||
? `All ${expectedTools.length} expected tools were called`
|
||||
: `Missing tools: ${missing.join(', ')}. Called: [${[...calledTools].join(', ') || 'none'}]`,
|
||||
};
|
||||
});
|
||||
}
|
||||
129
packages/@n8n/agents/src/index.ts
Normal file
129
packages/@n8n/agents/src/index.ts
Normal file
|
|
@ -0,0 +1,129 @@
|
|||
export type {
|
||||
BuiltTool,
|
||||
BuiltProviderTool,
|
||||
BuiltAgent,
|
||||
BuiltMemory,
|
||||
BuiltGuardrail,
|
||||
BuiltEval,
|
||||
RunOptions,
|
||||
AgentResult,
|
||||
GenerateResult,
|
||||
StreamResult,
|
||||
EvalInput,
|
||||
EvalScore,
|
||||
EvalRunResult,
|
||||
EvalResults,
|
||||
ToolContext,
|
||||
InterruptibleToolContext,
|
||||
CheckpointStore,
|
||||
StreamChunk,
|
||||
SubAgentUsage,
|
||||
Provider,
|
||||
ThinkingConfig,
|
||||
ThinkingConfigFor,
|
||||
AnthropicThinkingConfig,
|
||||
OpenAIThinkingConfig,
|
||||
GoogleThinkingConfig,
|
||||
XaiThinkingConfig,
|
||||
SerializableAgentState,
|
||||
AgentRunState,
|
||||
MemoryConfig,
|
||||
TitleGenerationConfig,
|
||||
Thread,
|
||||
SemanticRecallConfig,
|
||||
ResumeOptions,
|
||||
McpServerConfig,
|
||||
McpVerifyResult,
|
||||
ModelConfig,
|
||||
ExecutionOptions,
|
||||
PersistedExecutionOptions,
|
||||
BuiltTelemetry,
|
||||
AttributeValue,
|
||||
} from './types';
|
||||
export type { ProviderOptions } from '@ai-sdk/provider-utils';
|
||||
export { AgentEvent } from './types';
|
||||
export type { AgentEventData, AgentEventHandler } from './types';
|
||||
|
||||
export { Tool } from './sdk/tool';
|
||||
export { Memory } from './sdk/memory';
|
||||
export { Guardrail } from './sdk/guardrail';
|
||||
export { Eval } from './sdk/eval';
|
||||
export { evaluate } from './sdk/evaluate';
|
||||
export type { DatasetRow, EvaluateConfig } from './sdk/evaluate';
|
||||
export * as evals from './evals/index';
|
||||
export { Telemetry } from './sdk/telemetry';
|
||||
export { LangSmithTelemetry } from './integrations/langsmith';
|
||||
export type { LangSmithTelemetryConfig } from './integrations/langsmith';
|
||||
export { Agent } from './sdk/agent';
|
||||
export { McpClient } from './sdk/mcp-client';
|
||||
export { Network } from './sdk/network';
|
||||
export { providerTools } from './sdk/provider-tools';
|
||||
export { verify } from './sdk/verify';
|
||||
export type { VerifyResult } from './sdk/verify';
|
||||
export type {
|
||||
ContentCitation,
|
||||
ContentFile,
|
||||
ContentMetadata,
|
||||
ContentReasoning,
|
||||
ContentText,
|
||||
ContentToolCall,
|
||||
ContentToolResult,
|
||||
Message,
|
||||
MessageContent,
|
||||
MessageRole,
|
||||
AgentMessage,
|
||||
CustomAgentMessages,
|
||||
AgentDbMessage,
|
||||
} from './types/sdk/message';
|
||||
export {
|
||||
toDbMessage,
|
||||
filterLlmMessages,
|
||||
isLlmMessage,
|
||||
} from './sdk/message';
|
||||
export { fetchProviderCatalog } from './sdk/catalog';
|
||||
export type {
|
||||
ProviderCatalog,
|
||||
ProviderInfo,
|
||||
ModelInfo,
|
||||
ModelCost,
|
||||
ModelLimits,
|
||||
} from './sdk/catalog';
|
||||
export { SqliteMemory } from './storage/sqlite-memory';
|
||||
export type { SqliteMemoryConfig } from './storage/sqlite-memory';
|
||||
export { PostgresMemory } from './storage/postgres-memory';
|
||||
export type { PostgresMemoryConfig } from './storage/postgres-memory';
|
||||
|
||||
export { Workspace } from './workspace';
|
||||
export { BaseFilesystem } from './workspace';
|
||||
export { BaseSandbox } from './workspace';
|
||||
export { createWorkspaceTools } from './workspace';
|
||||
export { SandboxProcessManager, ProcessHandle } from './workspace';
|
||||
|
||||
export type {
|
||||
BaseFilesystemOptions,
|
||||
FilesystemLifecycleHook,
|
||||
WorkspaceFilesystem,
|
||||
WorkspaceSandbox,
|
||||
WorkspaceConfig,
|
||||
CommandResult,
|
||||
CommandOptions,
|
||||
ExecuteCommandOptions,
|
||||
FileContent,
|
||||
FileStat,
|
||||
FileEntry,
|
||||
ReadOptions,
|
||||
WriteOptions,
|
||||
ListOptions,
|
||||
RemoveOptions,
|
||||
CopyOptions,
|
||||
ProviderStatus,
|
||||
SandboxInfo,
|
||||
LocalFilesystemOptions,
|
||||
LocalSandboxOptions,
|
||||
DaytonaSandboxOptions,
|
||||
BaseSandboxOptions,
|
||||
MountConfig,
|
||||
MountResult,
|
||||
SpawnProcessOptions,
|
||||
ProcessInfo,
|
||||
} from './workspace';
|
||||
131
packages/@n8n/agents/src/integrations/langsmith.ts
Normal file
131
packages/@n8n/agents/src/integrations/langsmith.ts
Normal file
|
|
@ -0,0 +1,131 @@
|
|||
import { Telemetry } from '../sdk/telemetry';
|
||||
import type { BuiltTelemetry, OpaqueTracer, OpaqueTracerProvider } from '../types/telemetry';
|
||||
|
||||
export interface LangSmithTelemetryConfig {
|
||||
/** LangSmith API key. If omitted, resolved via `.credential()` or LANGSMITH_API_KEY env var. */
|
||||
apiKey?: string;
|
||||
/** LangSmith project name. Falls back to LANGSMITH_PROJECT env var, then 'default'. */
|
||||
project?: string;
|
||||
/** LangSmith API base URL. Falls back to LANGSMITH_ENDPOINT env var. */
|
||||
endpoint?: string;
|
||||
/**
|
||||
* Override the full OTLP traces URL. Normally derived from `endpoint`
|
||||
* as `${endpoint}/otel/v1/traces`. Use this for custom collectors or testing.
|
||||
*/
|
||||
url?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create the LangSmith OTel tracer + provider from config.
|
||||
* Dynamically imports langsmith and OTel packages so they remain
|
||||
* optional peer dependencies.
|
||||
*/
|
||||
async function createLangSmithTracer(
|
||||
config?: LangSmithTelemetryConfig,
|
||||
resolvedApiKey?: string,
|
||||
): Promise<{ tracer: OpaqueTracer; provider: OpaqueTracerProvider }> {
|
||||
const { NodeTracerProvider } = (await import('@opentelemetry/sdk-trace-node')) as {
|
||||
NodeTracerProvider: new (cfg?: {
|
||||
spanProcessors?: unknown[];
|
||||
}) => OpaqueTracerProvider & {
|
||||
getTracer(name: string): OpaqueTracer;
|
||||
};
|
||||
};
|
||||
|
||||
const { LangSmithOTLPTraceExporter } = (await import('langsmith/experimental/otel/exporter')) as {
|
||||
LangSmithOTLPTraceExporter: new (cfg?: {
|
||||
apiKey?: string;
|
||||
projectName?: string;
|
||||
endpoint?: string;
|
||||
}) => unknown;
|
||||
};
|
||||
|
||||
const { LangSmithOTLPSpanProcessor } = (await import(
|
||||
'langsmith/experimental/otel/processor'
|
||||
)) as {
|
||||
LangSmithOTLPSpanProcessor: new (exporter: unknown) => unknown;
|
||||
};
|
||||
|
||||
// SECURITY: When the engine-resolved credential is the active key (i.e. no
|
||||
// explicit config.apiKey overrides it), ignore user-provided url/endpoint to
|
||||
// prevent redirecting the injected API key to an arbitrary host.
|
||||
const apiKey = config?.apiKey ?? resolvedApiKey;
|
||||
const usingResolvedKey = !config?.apiKey && resolvedApiKey !== undefined;
|
||||
const url = usingResolvedKey
|
||||
? undefined
|
||||
: (config?.url ??
|
||||
(config?.endpoint ? `${config.endpoint.replace(/\/$/, '')}/otel/v1/traces` : undefined));
|
||||
|
||||
const exporter = new LangSmithOTLPTraceExporter({
|
||||
apiKey,
|
||||
projectName: config?.project,
|
||||
...(url ? { url } : {}),
|
||||
});
|
||||
|
||||
const processor = new LangSmithOTLPSpanProcessor(exporter);
|
||||
|
||||
const provider = new NodeTracerProvider({
|
||||
spanProcessors: [processor],
|
||||
});
|
||||
// Do NOT call provider.register() — avoid polluting the global tracer provider.
|
||||
|
||||
return { tracer: provider.getTracer('@n8n/agents'), provider };
|
||||
}
|
||||
|
||||
/**
|
||||
* Pre-built telemetry for LangSmith. Extends `Telemetry` so all builder
|
||||
* methods (`.credential()`, `.functionId()`, `.recordOutputs()`, `.redact()`,
|
||||
* etc.) are available.
|
||||
*
|
||||
* Requires `langsmith` and `@opentelemetry/sdk-trace-node` as peer dependencies.
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* import { Agent, LangSmithTelemetry } from '@n8n/agents';
|
||||
*
|
||||
* const telemetry = new LangSmithTelemetry({ project: 'my-project' })
|
||||
* .credential('langsmith')
|
||||
* .recordOutputs(false);
|
||||
*
|
||||
* const agent = new Agent('assistant')
|
||||
* .model('anthropic/claude-sonnet-4-5')
|
||||
* .telemetry(telemetry)
|
||||
* .instructions('...');
|
||||
* ```
|
||||
*/
|
||||
export class LangSmithTelemetry extends Telemetry {
|
||||
private langsmithConfig?: LangSmithTelemetryConfig;
|
||||
|
||||
constructor(config?: LangSmithTelemetryConfig) {
|
||||
super();
|
||||
this.langsmithConfig = config;
|
||||
}
|
||||
|
||||
/** @override Build telemetry config, creating the LangSmith tracer. */
|
||||
override async build(): Promise<BuiltTelemetry> {
|
||||
if (this.otlpEndpointValue !== undefined) {
|
||||
throw new Error('LangSmithTelemetry creates its own tracer — do not use .otlpEndpoint().');
|
||||
}
|
||||
|
||||
// Clear any tracer from a previous build() so the parent's
|
||||
// .tracer()/.otlpEndpoint() mutual-exclusion check passes cleanly.
|
||||
this.tracerValue = undefined;
|
||||
|
||||
// The LangSmith exporter silently drops all spans unless this is set.
|
||||
// Auto-enable it so users don't have to remember a magic env var.
|
||||
process.env.LANGCHAIN_TRACING_V2 ??= 'true';
|
||||
|
||||
const { tracer, provider } = await createLangSmithTracer(
|
||||
this.langsmithConfig,
|
||||
this.resolvedKey,
|
||||
);
|
||||
this.tracerValue = tracer;
|
||||
|
||||
// Call parent build() which handles integrations, redaction, etc.
|
||||
const built = await super.build();
|
||||
|
||||
// Attach the provider for flush/shutdown (parent build sets it from
|
||||
// otlpEndpoint but not from .tracer(), so we add it here).
|
||||
return { ...built, provider };
|
||||
}
|
||||
}
|
||||
1841
packages/@n8n/agents/src/runtime/agent-runtime.ts
Normal file
1841
packages/@n8n/agents/src/runtime/agent-runtime.ts
Normal file
File diff suppressed because it is too large
Load diff
82
packages/@n8n/agents/src/runtime/event-bus.ts
Normal file
82
packages/@n8n/agents/src/runtime/event-bus.ts
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
import { AgentEvent } from '../types/runtime/event';
|
||||
import type { AgentEventData, AgentEventHandler } from '../types/runtime/event';
|
||||
|
||||
/**
|
||||
* Internal event bus for agent lifecycle events.
|
||||
*
|
||||
* Shared between Agent (public API) and AgentRuntime (emitter).
|
||||
* Handlers registered via `on()` are called synchronously when
|
||||
* `emit()` is invoked from the agentic loop.
|
||||
*
|
||||
* Cancellation uses a standard `AbortController`. The signal is passed
|
||||
* directly to the AI SDK's `generateText` / `streamText` calls so that
|
||||
* in-flight HTTP requests are cancelled immediately when `abort()` is called,
|
||||
* rather than waiting for the current LLM call to finish.
|
||||
*
|
||||
* A new controller is created for each run via `resetAbort()` so the same
|
||||
* agent instance can be reused after cancellation.
|
||||
*/
|
||||
export class AgentEventBus {
|
||||
private handlers = new Map<AgentEvent, Set<AgentEventHandler>>();
|
||||
|
||||
private controller = new AbortController();
|
||||
|
||||
private externalCleanup?: () => void;
|
||||
|
||||
on(event: AgentEvent, handler: AgentEventHandler): void {
|
||||
let set = this.handlers.get(event);
|
||||
if (!set) {
|
||||
set = new Set();
|
||||
this.handlers.set(event, set);
|
||||
}
|
||||
set.add(handler);
|
||||
}
|
||||
|
||||
emit(data: AgentEventData): void {
|
||||
const set = this.handlers.get(data.type);
|
||||
if (!set) return;
|
||||
for (const handler of set) {
|
||||
handler(data);
|
||||
}
|
||||
}
|
||||
|
||||
abort(): void {
|
||||
this.controller.abort();
|
||||
}
|
||||
|
||||
/**
|
||||
* Replace the AbortController with a fresh one.
|
||||
* Called at the start of each generate() / stream() so the agent
|
||||
* can be reused after a previous cancellation.
|
||||
*
|
||||
* When an external signal is provided, its abort is forwarded to the
|
||||
* internal controller so that either `abort()` or the external signal
|
||||
* can cancel the current run.
|
||||
*/
|
||||
resetAbort(externalSignal?: AbortSignal): void {
|
||||
this.externalCleanup?.();
|
||||
this.externalCleanup = undefined;
|
||||
this.controller = new AbortController();
|
||||
|
||||
if (externalSignal) {
|
||||
if (externalSignal.aborted) {
|
||||
this.controller.abort(externalSignal.reason);
|
||||
} else {
|
||||
const onAbort = () => this.controller.abort(externalSignal.reason);
|
||||
externalSignal.addEventListener('abort', onAbort, { once: true });
|
||||
this.externalCleanup = () => externalSignal.removeEventListener('abort', onAbort);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** The AbortSignal for the current run. Pass to generateText / streamText. */
|
||||
get signal(): AbortSignal {
|
||||
return this.controller.signal;
|
||||
}
|
||||
|
||||
get isAborted(): boolean {
|
||||
return this.controller.signal.aborted;
|
||||
}
|
||||
}
|
||||
|
||||
export { AgentEvent };
|
||||
45
packages/@n8n/agents/src/runtime/logger.ts
Normal file
45
packages/@n8n/agents/src/runtime/logger.ts
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
/**
|
||||
* Filtered logger that suppresses known noisy warnings from the runtime.
|
||||
* All other messages are forwarded to console.
|
||||
*/
|
||||
|
||||
const SUPPRESSED_PATTERNS = [
|
||||
'No memory is configured but resourceId and threadId were passed in args',
|
||||
];
|
||||
|
||||
function isSuppressed(message: string): boolean {
|
||||
return SUPPRESSED_PATTERNS.some((pattern) => message.includes(pattern));
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a logger that drops messages matching known suppressed patterns
|
||||
* and forwards everything else to console.
|
||||
*/
|
||||
export function createFilteredLogger() {
|
||||
return {
|
||||
debug(message: string, ...args: unknown[]) {
|
||||
if (!isSuppressed(message)) console.debug(message, ...args);
|
||||
},
|
||||
info(message: string, ...args: unknown[]) {
|
||||
if (!isSuppressed(message)) console.info(message, ...args);
|
||||
},
|
||||
warn(message: string, ...args: unknown[]) {
|
||||
if (!isSuppressed(message)) console.warn(message, ...args);
|
||||
},
|
||||
error(message: string, ...args: unknown[]) {
|
||||
if (!isSuppressed(message)) console.error(message, ...args);
|
||||
},
|
||||
trackException() {},
|
||||
getTransports() {
|
||||
return new Map();
|
||||
},
|
||||
// eslint-disable-next-line @typescript-eslint/require-await
|
||||
async listLogs() {
|
||||
return { logs: [] as unknown[], total: 0, page: 1, perPage: 100, hasMore: false };
|
||||
},
|
||||
// eslint-disable-next-line @typescript-eslint/require-await
|
||||
async listLogsByRunId() {
|
||||
return { logs: [] as unknown[], total: 0, page: 1, perPage: 100, hasMore: false };
|
||||
},
|
||||
};
|
||||
}
|
||||
178
packages/@n8n/agents/src/runtime/mcp-connection.ts
Normal file
178
packages/@n8n/agents/src/runtime/mcp-connection.ts
Normal file
|
|
@ -0,0 +1,178 @@
|
|||
/** Don't remove the .js extensions. That's how the @modelcontextprotocol/sdk is packaged. */
|
||||
import { Client } from '@modelcontextprotocol/sdk/client/index.js';
|
||||
import { SSEClientTransport } from '@modelcontextprotocol/sdk/client/sse.js';
|
||||
import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js';
|
||||
import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js';
|
||||
import { CallToolResultSchema, type CallToolResult } from '@modelcontextprotocol/sdk/types.js';
|
||||
|
||||
import { McpToolResolver } from './mcp-tool-resolver';
|
||||
import { wrapToolForApproval } from '../sdk/tool';
|
||||
import type { McpServerConfig } from '../types/sdk/mcp';
|
||||
import type { BuiltTool } from '../types/sdk/tool';
|
||||
|
||||
/** The raw result returned by an MCP tool call. */
|
||||
export type McpCallToolResult = CallToolResult;
|
||||
|
||||
/** Wraps a single MCP SDK Client instance for one server. Not publicly exported. */
|
||||
export class McpConnection {
|
||||
private client: Client;
|
||||
|
||||
private config: McpServerConfig;
|
||||
|
||||
private readonly shouldRequireToolApproval: boolean;
|
||||
|
||||
private connectionPromise: Promise<void> | undefined = undefined;
|
||||
private disconnectPromise: Promise<void> | undefined = undefined;
|
||||
private closed = false;
|
||||
|
||||
constructor(config: McpServerConfig, requireToolApproval = false) {
|
||||
this.config = config;
|
||||
this.shouldRequireToolApproval = requireToolApproval;
|
||||
this.client = new Client({ name: '@n8n/agents', version: '0.1.0' }, { capabilities: {} });
|
||||
}
|
||||
|
||||
async connect(): Promise<void> {
|
||||
if (this.connectionPromise !== undefined) {
|
||||
return await this.connectionPromise;
|
||||
}
|
||||
this.connectionPromise = this.connectWithTransport(this.createTransport(this.config));
|
||||
try {
|
||||
await this.connectionPromise;
|
||||
} catch (error) {
|
||||
this.connectionPromise = undefined;
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
private async connectWithTransport(
|
||||
transport: SSEClientTransport | StreamableHTTPClientTransport | StdioClientTransport,
|
||||
): Promise<void> {
|
||||
const timeoutMs = this.config.connectionTimeoutMs;
|
||||
if (timeoutMs === undefined) {
|
||||
await this.client.connect(transport);
|
||||
return;
|
||||
}
|
||||
if (!Number.isFinite(timeoutMs) || timeoutMs <= 0) {
|
||||
throw new Error(
|
||||
`MCP server "${this.config.name}": connectionTimeoutMs must be a positive finite number`,
|
||||
);
|
||||
}
|
||||
let timeoutId: ReturnType<typeof setTimeout> | undefined;
|
||||
try {
|
||||
await Promise.race([
|
||||
this.client.connect(transport),
|
||||
new Promise<never>((_, reject) => {
|
||||
timeoutId = setTimeout(() => {
|
||||
reject(
|
||||
new Error(
|
||||
`MCP server "${this.config.name}": connection timed out after ${timeoutMs}ms`,
|
||||
),
|
||||
);
|
||||
}, timeoutMs);
|
||||
}),
|
||||
]);
|
||||
} catch (error) {
|
||||
await this.client.close().catch(() => {});
|
||||
throw error;
|
||||
} finally {
|
||||
if (timeoutId !== undefined) clearTimeout(timeoutId);
|
||||
}
|
||||
}
|
||||
|
||||
/** List tools from the server, resolving them into BuiltTool instances with prefixed names. */
|
||||
async listTools(): Promise<BuiltTool[]> {
|
||||
const result = await this.client.listTools();
|
||||
const resolver = new McpToolResolver();
|
||||
const tools = resolver.resolve(this, result.tools);
|
||||
return tools.map((t) =>
|
||||
t.suspendSchema || !this.needsApproval(t)
|
||||
? t
|
||||
: wrapToolForApproval(t, { requireApproval: true }),
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true when a resolved tool should be wrapped with an approval gate.
|
||||
*
|
||||
* A tool needs approval when either:
|
||||
* - the global `shouldRequireToolApproval` flag (set via Agent.requireToolApproval()) is true, OR
|
||||
* - `config.requireApproval` is `true` (all tools on this server), OR
|
||||
* - `config.requireApproval` is a string array that includes the tool's original (un-prefixed) name.
|
||||
*/
|
||||
private needsApproval(tool: BuiltTool): boolean {
|
||||
if (this.shouldRequireToolApproval) return true;
|
||||
|
||||
const { requireApproval } = this.config;
|
||||
if (requireApproval === true) return true;
|
||||
|
||||
if (Array.isArray(requireApproval) && requireApproval.length > 0) {
|
||||
const prefix = `${this.config.name}_`;
|
||||
const originalName = tool.name.startsWith(prefix)
|
||||
? tool.name.slice(prefix.length)
|
||||
: tool.name;
|
||||
return requireApproval.includes(originalName);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
async callTool(name: string, args: Record<string, unknown>): Promise<McpCallToolResult> {
|
||||
const result = await this.client.callTool({ name, arguments: args }, CallToolResultSchema);
|
||||
return result as McpCallToolResult;
|
||||
}
|
||||
|
||||
async disconnect(): Promise<void> {
|
||||
if (this.disconnectPromise) return await this.disconnectPromise;
|
||||
const promise = this.doDisconnect();
|
||||
this.disconnectPromise = promise;
|
||||
return await promise.finally(() => {
|
||||
if (this.disconnectPromise === promise) this.disconnectPromise = undefined;
|
||||
});
|
||||
}
|
||||
|
||||
private async doDisconnect(): Promise<void> {
|
||||
if (this.closed) return;
|
||||
await this.client.close();
|
||||
this.connectionPromise = undefined;
|
||||
this.closed = true;
|
||||
}
|
||||
|
||||
get name(): string {
|
||||
return this.config.name;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true when this server's config declares per-server approval requirements
|
||||
* without requiring a network connection.
|
||||
*/
|
||||
declaresApproval(): boolean {
|
||||
const { requireApproval } = this.config;
|
||||
return (
|
||||
requireApproval === true || (Array.isArray(requireApproval) && requireApproval.length > 0)
|
||||
);
|
||||
}
|
||||
|
||||
private createTransport(
|
||||
config: McpServerConfig,
|
||||
): SSEClientTransport | StreamableHTTPClientTransport | StdioClientTransport {
|
||||
if (config.command) {
|
||||
return new StdioClientTransport({
|
||||
command: config.command,
|
||||
args: config.args,
|
||||
env: config.env,
|
||||
});
|
||||
} else if (config.url) {
|
||||
const url = new URL(config.url);
|
||||
const requestInit: RequestInit | undefined = config.headers
|
||||
? { headers: config.headers }
|
||||
: undefined;
|
||||
|
||||
if (config.transport === 'streamableHttp') {
|
||||
return new StreamableHTTPClientTransport(url, { requestInit });
|
||||
}
|
||||
|
||||
return new SSEClientTransport(url, { requestInit });
|
||||
}
|
||||
throw new Error(`MCP server "${config.name}": provide either "url" or "command"`);
|
||||
}
|
||||
}
|
||||
92
packages/@n8n/agents/src/runtime/mcp-tool-resolver.ts
Normal file
92
packages/@n8n/agents/src/runtime/mcp-tool-resolver.ts
Normal file
|
|
@ -0,0 +1,92 @@
|
|||
import type { Tool } from '@modelcontextprotocol/sdk/types.js';
|
||||
import type { JSONSchema7 } from 'json-schema';
|
||||
|
||||
import type { McpCallToolResult, McpConnection } from './mcp-connection';
|
||||
import type { AgentMessage, ContentFile, ContentText } from '../types/sdk/message';
|
||||
import type { BuiltTool, InterruptibleToolContext, ToolContext } from '../types/sdk/tool';
|
||||
|
||||
type McpContentBlock = McpCallToolResult['content'][number];
|
||||
|
||||
/**
|
||||
* Convert raw MCP tool definitions into BuiltTool instances.
|
||||
* Tool names are prefixed with the server name to prevent collisions.
|
||||
* Not publicly exported.
|
||||
*/
|
||||
export class McpToolResolver {
|
||||
resolve(connection: McpConnection, tools: Tool[]): BuiltTool[] {
|
||||
return tools.map((tool) => this.resolveTool(connection, tool));
|
||||
}
|
||||
|
||||
private resolveTool(connection: McpConnection, tool: Tool): BuiltTool {
|
||||
const prefixedName = `${connection.name}_${tool.name}`;
|
||||
const originalName = tool.name;
|
||||
|
||||
const handler = async (
|
||||
input: unknown,
|
||||
_ctx: ToolContext | InterruptibleToolContext,
|
||||
): Promise<unknown> => {
|
||||
const args = (input ?? {}) as Record<string, unknown>;
|
||||
return await connection.callTool(originalName, args);
|
||||
};
|
||||
|
||||
const toMessage = (output: unknown): AgentMessage | undefined => {
|
||||
return buildRichMessage(output as McpCallToolResult);
|
||||
};
|
||||
|
||||
const builtTool: BuiltTool = {
|
||||
name: prefixedName,
|
||||
description: tool.description ?? '',
|
||||
inputSchema: tool.inputSchema as JSONSchema7,
|
||||
handler,
|
||||
toMessage,
|
||||
mcpTool: true,
|
||||
mcpServerName: connection.name,
|
||||
};
|
||||
|
||||
return builtTool;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert an MCP CallToolResult into a rich AgentMessage containing text and image content parts.
|
||||
* Returns undefined if the result contains only text (the tool-result JSON is sufficient for the LLM).
|
||||
* Returns an assistant Message with ContentFile parts for image blocks so multimodal models can process them.
|
||||
*/
|
||||
function buildRichMessage(result: McpCallToolResult): AgentMessage | undefined {
|
||||
if (!result?.content) return undefined;
|
||||
|
||||
const hasImages = result.content.some((block) => block.type === 'image');
|
||||
if (!hasImages) return undefined;
|
||||
|
||||
const contentParts: Array<ContentText | ContentFile> = [];
|
||||
|
||||
for (const block of result.content) {
|
||||
const part = blockToContentPart(block);
|
||||
if (part) contentParts.push(part);
|
||||
}
|
||||
|
||||
if (contentParts.length === 0) return undefined;
|
||||
|
||||
return { role: 'assistant', content: contentParts };
|
||||
}
|
||||
|
||||
function blockToContentPart(block: McpContentBlock): ContentText | ContentFile | undefined {
|
||||
if (block.type === 'text' && block.text) {
|
||||
return { type: 'text', text: block.text };
|
||||
}
|
||||
|
||||
if (block.type === 'image' && block.data) {
|
||||
return {
|
||||
type: 'file',
|
||||
data: block.data,
|
||||
mediaType: block.mimeType ?? 'image/png',
|
||||
};
|
||||
}
|
||||
|
||||
if (block.type === 'resource' && block.resource) {
|
||||
const text = 'text' in block.resource ? block.resource.text : block.resource.uri;
|
||||
return { type: 'text', text };
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
122
packages/@n8n/agents/src/runtime/memory-store.ts
Normal file
122
packages/@n8n/agents/src/runtime/memory-store.ts
Normal file
|
|
@ -0,0 +1,122 @@
|
|||
import { toDbMessage } from '../sdk/message';
|
||||
import type { BuiltMemory, Thread } from '../types';
|
||||
import type { AgentDbMessage, AgentMessage } from '../types/sdk/message';
|
||||
|
||||
interface StoredMessage {
|
||||
message: AgentDbMessage;
|
||||
createdAt: Date;
|
||||
}
|
||||
|
||||
/**
|
||||
* In-memory implementation of BuiltMemory.
|
||||
* All data is lost on process restart — suitable for development and testing.
|
||||
*
|
||||
* Thread context for `saveMessages` is established by calling `saveThread` first.
|
||||
* The most recently saved thread is used when `saveMessages` is called.
|
||||
*/
|
||||
export class InMemoryMemory implements BuiltMemory {
|
||||
private threads = new Map<string, Thread>();
|
||||
|
||||
private messagesByThread = new Map<string, StoredMessage[]>();
|
||||
|
||||
private workingMemoryByKey = new Map<string, string>();
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/require-await
|
||||
async getWorkingMemory(params: { threadId: string; resourceId?: string }): Promise<
|
||||
string | null
|
||||
> {
|
||||
return this.workingMemoryByKey.get(params.resourceId ?? params.threadId) ?? null;
|
||||
}
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/require-await
|
||||
async saveWorkingMemory(
|
||||
params: { threadId: string; resourceId?: string },
|
||||
content: string,
|
||||
): Promise<void> {
|
||||
this.workingMemoryByKey.set(params.resourceId ?? params.threadId, content);
|
||||
}
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/require-await
|
||||
async getThread(threadId: string): Promise<Thread | null> {
|
||||
return this.threads.get(threadId) ?? null;
|
||||
}
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/require-await
|
||||
async saveThread(thread: Omit<Thread, 'createdAt' | 'updatedAt'>): Promise<Thread> {
|
||||
const existing = this.threads.get(thread.id);
|
||||
const now = new Date();
|
||||
const saved: Thread = {
|
||||
...thread,
|
||||
title: thread.title ?? existing?.title,
|
||||
metadata: thread.metadata ?? existing?.metadata,
|
||||
createdAt: existing?.createdAt ?? now,
|
||||
updatedAt: now,
|
||||
};
|
||||
this.threads.set(thread.id, saved);
|
||||
return saved;
|
||||
}
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/require-await
|
||||
async deleteThread(threadId: string): Promise<void> {
|
||||
this.threads.delete(threadId);
|
||||
this.messagesByThread.delete(threadId);
|
||||
}
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/require-await
|
||||
async getMessages(
|
||||
threadId: string,
|
||||
opts?: { limit?: number; before?: Date },
|
||||
): Promise<AgentDbMessage[]> {
|
||||
let stored = this.messagesByThread.get(threadId) ?? [];
|
||||
if (opts?.before) {
|
||||
const cutoff = opts.before.getTime();
|
||||
stored = stored.filter((s) => s.createdAt.getTime() < cutoff);
|
||||
}
|
||||
if (opts?.limit) stored = stored.slice(-opts.limit);
|
||||
return stored.map((s) => s.message);
|
||||
}
|
||||
|
||||
/**
|
||||
* Save messages to the thread established by the most recent `saveThread` call.
|
||||
* Always call `saveThread` before `saveMessages` to set the thread context.
|
||||
*/
|
||||
// eslint-disable-next-line @typescript-eslint/require-await
|
||||
async saveMessages(args: {
|
||||
threadId: string;
|
||||
resourceId?: string;
|
||||
messages: AgentMessage[];
|
||||
}): Promise<void> {
|
||||
const existing = this.messagesByThread.get(args.threadId) ?? [];
|
||||
const now = new Date();
|
||||
for (const msg of args.messages) {
|
||||
existing.push({ message: toDbMessage(msg), createdAt: now });
|
||||
}
|
||||
this.messagesByThread.set(args.threadId, existing);
|
||||
}
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/require-await
|
||||
async deleteMessages(messageIds: string[]): Promise<void> {
|
||||
const idSet = new Set(messageIds);
|
||||
for (const [threadId, messages] of this.messagesByThread.entries()) {
|
||||
this.messagesByThread.set(
|
||||
threadId,
|
||||
messages.filter((s) => !idSet.has(s.message.id)),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Save messages to a specific thread, ensuring the thread exists first.
|
||||
* Always call this instead of `memory.saveMessages()` directly, as it
|
||||
* establishes the thread context required by implementations like InMemoryMemory.
|
||||
*/
|
||||
export async function saveMessagesToThread(
|
||||
memory: BuiltMemory,
|
||||
threadId: string,
|
||||
resourceId: string,
|
||||
messages: AgentMessage[],
|
||||
): Promise<void> {
|
||||
await memory.saveThread({ id: threadId, resourceId });
|
||||
await memory.saveMessages({ threadId, resourceId, messages });
|
||||
}
|
||||
128
packages/@n8n/agents/src/runtime/message-list.ts
Normal file
128
packages/@n8n/agents/src/runtime/message-list.ts
Normal file
|
|
@ -0,0 +1,128 @@
|
|||
import type { ProviderOptions } from '@ai-sdk/provider-utils';
|
||||
import type { ModelMessage } from 'ai';
|
||||
|
||||
import { toAiMessages } from './messages';
|
||||
import { stripOrphanedToolMessages } from './strip-orphaned-tool-messages';
|
||||
import { buildWorkingMemoryInstruction } from './working-memory';
|
||||
import { filterLlmMessages } from '../sdk/message';
|
||||
import type { SerializedMessageList } from '../types/runtime/message-list';
|
||||
import type { AgentDbMessage } from '../types/sdk/message';
|
||||
|
||||
export type { SerializedMessageList };
|
||||
|
||||
export interface WorkingMemoryContext {
|
||||
template: string;
|
||||
structured: boolean;
|
||||
/** The current persisted state, or null if not yet loaded. Falls back to template. */
|
||||
state: string | null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Append-only message container with Set-based source tracking.
|
||||
*
|
||||
* Three named sources:
|
||||
* history — messages loaded from memory at the start of the turn.
|
||||
* Never included in turnDelta(); already persisted.
|
||||
* input — the caller's raw input for this turn (custom messages preserved).
|
||||
* response — LLM replies, tool results, and custom tool messages from this turn.
|
||||
*
|
||||
* Serialization stores the flat message array plus the IDs of each set so
|
||||
* the full three-way source distinction survives a round-trip.
|
||||
*/
|
||||
export class AgentMessageList {
|
||||
private all: AgentDbMessage[] = [];
|
||||
|
||||
private historySet = new Set<AgentDbMessage>();
|
||||
|
||||
private inputSet = new Set<AgentDbMessage>();
|
||||
|
||||
private responseSet = new Set<AgentDbMessage>();
|
||||
|
||||
/** Working memory context for this run. Set by buildMessageList / resume. */
|
||||
workingMemory: WorkingMemoryContext | undefined;
|
||||
|
||||
addHistory(messages: AgentDbMessage[]): void {
|
||||
for (const m of messages) {
|
||||
this.all.push(m);
|
||||
this.historySet.add(m);
|
||||
}
|
||||
}
|
||||
|
||||
addInput(messages: AgentDbMessage[]): void {
|
||||
for (const m of messages) {
|
||||
this.all.push(m);
|
||||
this.inputSet.add(m);
|
||||
}
|
||||
}
|
||||
|
||||
addResponse(messages: AgentDbMessage[]): void {
|
||||
for (const m of messages) {
|
||||
this.all.push(m);
|
||||
this.responseSet.add(m);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Full LLM context for a generateText / streamText call.
|
||||
* Prepends the system prompt (with working memory appended if configured),
|
||||
* strips custom messages via filterLlmMessages.
|
||||
*/
|
||||
forLlm(baseInstructions: string, instructionProviderOptions?: ProviderOptions): ModelMessage[] {
|
||||
let systemPrompt = baseInstructions;
|
||||
|
||||
if (this.workingMemory) {
|
||||
const wmInstruction = buildWorkingMemoryInstruction(
|
||||
this.workingMemory.template,
|
||||
this.workingMemory.structured,
|
||||
);
|
||||
const wmState = this.workingMemory.state ?? this.workingMemory.template;
|
||||
systemPrompt +=
|
||||
wmInstruction + '\n\nCurrent working memory state:\n```\n' + wmState + '\n```';
|
||||
}
|
||||
|
||||
const systemMessage: ModelMessage = instructionProviderOptions
|
||||
? { role: 'system', content: systemPrompt, providerOptions: instructionProviderOptions }
|
||||
: { role: 'system', content: systemPrompt };
|
||||
return [systemMessage, ...toAiMessages(filterLlmMessages(stripOrphanedToolMessages(this.all)))];
|
||||
}
|
||||
|
||||
/**
|
||||
* Current-turn delta for memory persistence (input + responses).
|
||||
* Non-destructive — safe to call multiple times (e.g. on retry).
|
||||
*/
|
||||
turnDelta(): AgentDbMessage[] {
|
||||
return this.all.filter((m) => this.inputSet.has(m) || this.responseSet.has(m));
|
||||
}
|
||||
|
||||
/**
|
||||
* Only the LLM-produced messages from this turn (responses + tool results).
|
||||
* Used for GenerateResult.messages — callers should not see their own input echoed back.
|
||||
*/
|
||||
responseDelta(): AgentDbMessage[] {
|
||||
return this.all.filter((m) => this.responseSet.has(m));
|
||||
}
|
||||
|
||||
serialize(): SerializedMessageList {
|
||||
const toIds = (set: Set<AgentDbMessage>) => Array.from(set).map((m) => m.id);
|
||||
return {
|
||||
messages: [...this.all],
|
||||
historyIds: toIds(this.historySet),
|
||||
inputIds: toIds(this.inputSet),
|
||||
responseIds: toIds(this.responseSet),
|
||||
};
|
||||
}
|
||||
|
||||
static deserialize(data: SerializedMessageList): AgentMessageList {
|
||||
const list = new AgentMessageList();
|
||||
const historyIdSet = new Set(data.historyIds);
|
||||
const inputIdSet = new Set(data.inputIds);
|
||||
const responseIdSet = new Set(data.responseIds);
|
||||
for (const m of data.messages) {
|
||||
list.all.push(m);
|
||||
if (historyIdSet.has(m.id)) list.historySet.add(m);
|
||||
if (inputIdSet.has(m.id)) list.inputSet.add(m);
|
||||
if (responseIdSet.has(m.id)) list.responseSet.add(m);
|
||||
}
|
||||
return list;
|
||||
}
|
||||
}
|
||||
299
packages/@n8n/agents/src/runtime/messages.ts
Normal file
299
packages/@n8n/agents/src/runtime/messages.ts
Normal file
|
|
@ -0,0 +1,299 @@
|
|||
import type {
|
||||
FilePart,
|
||||
ModelMessage,
|
||||
TextPart,
|
||||
ToolCallPart,
|
||||
ToolResultPart,
|
||||
ImagePart,
|
||||
ToolApprovalRequest,
|
||||
ToolApprovalResponse,
|
||||
FinishReason as AiFinishReason,
|
||||
} from 'ai';
|
||||
|
||||
import { toDbMessage } from '../sdk/message';
|
||||
import type { FinishReason } from '../types';
|
||||
import type {
|
||||
AgentDbMessage,
|
||||
AgentMessage,
|
||||
ContentFile,
|
||||
ContentReasoning,
|
||||
ContentText,
|
||||
ContentToolCall,
|
||||
ContentToolResult,
|
||||
Message,
|
||||
MessageContent,
|
||||
} from '../types/sdk/message';
|
||||
import type { JSONValue } from '../types/utils/json';
|
||||
|
||||
/** Reasoning content part — mirrors @ai-sdk/provider-utils ReasoningPart (not re-exported by 'ai'). */
|
||||
type ReasoningPart = { type: 'reasoning'; text: string };
|
||||
|
||||
type AiContentPart =
|
||||
| TextPart
|
||||
| FilePart
|
||||
| ImagePart
|
||||
| ReasoningPart
|
||||
| ToolCallPart
|
||||
| ToolResultPart
|
||||
| ToolApprovalRequest
|
||||
| ToolApprovalResponse;
|
||||
|
||||
// --- Type guards for MessageContent blocks ---
|
||||
|
||||
function isText(block: MessageContent): block is ContentText {
|
||||
return block.type === 'text';
|
||||
}
|
||||
|
||||
function isReasoning(block: MessageContent): block is ContentReasoning {
|
||||
return block.type === 'reasoning';
|
||||
}
|
||||
|
||||
function isFile(block: MessageContent): block is ContentFile {
|
||||
return block.type === 'file';
|
||||
}
|
||||
|
||||
function isToolCall(block: MessageContent): block is ContentToolCall {
|
||||
return block.type === 'tool-call';
|
||||
}
|
||||
|
||||
function isToolResult(block: MessageContent): block is ContentToolResult {
|
||||
return block.type === 'tool-result';
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a JSONValue that may be a stringified JSON object back into
|
||||
* its parsed form. Non-string values pass through unchanged.
|
||||
*/
|
||||
function parseJsonValue(value: JSONValue): unknown {
|
||||
if (typeof value === 'string') {
|
||||
try {
|
||||
return JSON.parse(value);
|
||||
} catch {
|
||||
return value;
|
||||
}
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
/** Convert a single n8n MessageContent block to an AI SDK content part. */
|
||||
function toAiContent(block: MessageContent): AiContentPart | undefined {
|
||||
let base: AiContentPart | undefined;
|
||||
if (isText(block)) {
|
||||
base = { type: 'text', text: block.text };
|
||||
} else if (isFile(block)) {
|
||||
base = {
|
||||
type: 'file',
|
||||
data: block.data,
|
||||
mediaType: block.mediaType ?? 'application/octet-stream',
|
||||
};
|
||||
} else if (isToolCall(block)) {
|
||||
base = {
|
||||
type: 'tool-call',
|
||||
toolCallId: block.toolCallId ?? '',
|
||||
toolName: block.toolName,
|
||||
input: parseJsonValue(block.input),
|
||||
providerExecuted: block.providerExecuted,
|
||||
};
|
||||
}
|
||||
if (isToolResult(block)) {
|
||||
if (block.isError) {
|
||||
if (typeof block.result === 'string') {
|
||||
base = {
|
||||
type: 'tool-result',
|
||||
toolCallId: block.toolCallId,
|
||||
toolName: block.toolName,
|
||||
output: { type: 'error-text', value: block.result },
|
||||
};
|
||||
} else {
|
||||
base = {
|
||||
type: 'tool-result',
|
||||
toolCallId: block.toolCallId,
|
||||
toolName: block.toolName,
|
||||
output: { type: 'error-json', value: block.result },
|
||||
};
|
||||
}
|
||||
} else {
|
||||
base = {
|
||||
type: 'tool-result',
|
||||
toolCallId: block.toolCallId,
|
||||
toolName: block.toolName,
|
||||
output: { type: 'json', value: block.result },
|
||||
};
|
||||
}
|
||||
} else if (isReasoning(block)) {
|
||||
base = { type: 'reasoning', text: block.text };
|
||||
}
|
||||
|
||||
if (base && block.providerOptions) {
|
||||
return { ...base, providerOptions: block.providerOptions } as AiContentPart;
|
||||
}
|
||||
return base;
|
||||
}
|
||||
|
||||
/** Convert a single AI SDK content part to an n8n MessageContent block. */
|
||||
function fromAiContent(part: AiContentPart): MessageContent | undefined {
|
||||
const providerOptions = 'providerOptions' in part ? part.providerOptions : undefined;
|
||||
|
||||
let base: MessageContent | undefined;
|
||||
switch (part.type) {
|
||||
case 'text':
|
||||
base = { type: 'text', text: part.text };
|
||||
break;
|
||||
case 'file': {
|
||||
const data =
|
||||
part.data instanceof URL ? part.data.toString() : (part.data as ContentFile['data']);
|
||||
base = { type: 'file', data, mediaType: part.mediaType };
|
||||
break;
|
||||
}
|
||||
case 'image': {
|
||||
const data =
|
||||
part.image instanceof URL ? part.image.toString() : (part.image as ContentFile['data']);
|
||||
base = { type: 'file', data, mediaType: part.mediaType };
|
||||
break;
|
||||
}
|
||||
case 'reasoning':
|
||||
base = { type: 'reasoning', text: part.text };
|
||||
break;
|
||||
case 'tool-call':
|
||||
base = {
|
||||
type: 'tool-call',
|
||||
toolCallId: part.toolCallId,
|
||||
toolName: part.toolName,
|
||||
input: part.input as JSONValue,
|
||||
providerExecuted: part.providerExecuted,
|
||||
};
|
||||
break;
|
||||
case 'tool-result': {
|
||||
const { output } = part;
|
||||
let result: JSONValue;
|
||||
let isError: boolean | undefined;
|
||||
if (output.type === 'json') {
|
||||
result = output.value;
|
||||
} else if (output.type === 'text') {
|
||||
result = output.value;
|
||||
} else if (output.type === 'error-json') {
|
||||
result = output.value;
|
||||
isError = true;
|
||||
} else if (output.type === 'error-text') {
|
||||
result = output.value;
|
||||
isError = true;
|
||||
} else {
|
||||
result = null;
|
||||
isError = true;
|
||||
}
|
||||
base = {
|
||||
type: 'tool-result',
|
||||
toolCallId: part.toolCallId,
|
||||
toolName: part.toolName,
|
||||
result,
|
||||
isError,
|
||||
};
|
||||
break;
|
||||
}
|
||||
// Ignore these types, because HITL is handled by our runtime
|
||||
case 'tool-approval-request':
|
||||
case 'tool-approval-response':
|
||||
default:
|
||||
return undefined;
|
||||
}
|
||||
|
||||
if (base && providerOptions) {
|
||||
return { ...base, providerOptions };
|
||||
}
|
||||
return base;
|
||||
}
|
||||
|
||||
/** Convert a single n8n Message to an AI SDK ModelMessage. */
|
||||
export function toAiMessage(msg: Message): ModelMessage {
|
||||
let base: ModelMessage;
|
||||
switch (msg.role) {
|
||||
case 'system': {
|
||||
const text = msg.content
|
||||
.filter(isText)
|
||||
.map((b) => b.text)
|
||||
.join('');
|
||||
base = { role: 'system', content: text };
|
||||
break;
|
||||
}
|
||||
|
||||
case 'user': {
|
||||
const parts = msg.content
|
||||
.map(toAiContent)
|
||||
.filter((p): p is TextPart | FilePart => p?.type === 'text' || p?.type === 'file');
|
||||
base = { role: 'user', content: parts };
|
||||
break;
|
||||
}
|
||||
|
||||
case 'assistant': {
|
||||
const parts = msg.content
|
||||
.map(toAiContent)
|
||||
.filter(
|
||||
(p): p is TextPart | ReasoningPart | ToolCallPart | ToolResultPart | FilePart =>
|
||||
p?.type === 'text' ||
|
||||
p?.type === 'reasoning' ||
|
||||
p?.type === 'tool-call' ||
|
||||
p?.type === 'tool-result' ||
|
||||
p?.type === 'file',
|
||||
);
|
||||
base = { role: 'assistant', content: parts };
|
||||
break;
|
||||
}
|
||||
|
||||
case 'tool': {
|
||||
const parts = msg.content
|
||||
.map(toAiContent)
|
||||
.filter((p): p is ToolResultPart => p?.type === 'tool-result');
|
||||
base = { role: 'tool', content: parts };
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
throw new Error(`Unknown role: ${msg.role as string}`);
|
||||
}
|
||||
|
||||
if (msg.providerOptions) {
|
||||
return { ...base, providerOptions: msg.providerOptions };
|
||||
}
|
||||
return base;
|
||||
}
|
||||
|
||||
/** Convert n8n Messages to AI SDK ModelMessages for passing to stream/generateText. */
|
||||
export function toAiMessages(messages: Message[]): ModelMessage[] {
|
||||
return messages.map(toAiMessage);
|
||||
}
|
||||
|
||||
/** Convert a single AI SDK ModelMessage to an n8n AgentDbMessage (with a generated id). */
|
||||
export function fromAiMessage(msg: ModelMessage): AgentDbMessage {
|
||||
const rawContent = msg.content;
|
||||
const content: MessageContent[] =
|
||||
typeof rawContent === 'string'
|
||||
? [{ type: 'text', text: rawContent }]
|
||||
: rawContent.map(fromAiContent).filter((p): p is MessageContent => p !== undefined);
|
||||
const message: AgentMessage = { role: msg.role, content };
|
||||
if ('providerOptions' in msg && msg.providerOptions) {
|
||||
message.providerOptions = msg.providerOptions;
|
||||
}
|
||||
return toDbMessage(message);
|
||||
}
|
||||
|
||||
/** Convert AI SDK ModelMessages to n8n AgentDbMessages (each with a generated id). */
|
||||
export function fromAiMessages(messages: ModelMessage[]): AgentDbMessage[] {
|
||||
return messages.map(fromAiMessage);
|
||||
}
|
||||
|
||||
export function fromAiFinishReason(reason: AiFinishReason): FinishReason {
|
||||
switch (reason) {
|
||||
case 'stop':
|
||||
return 'stop';
|
||||
case 'length':
|
||||
return 'length';
|
||||
case 'content-filter':
|
||||
return 'content-filter';
|
||||
case 'tool-calls':
|
||||
return 'tool-calls';
|
||||
case 'error':
|
||||
return 'error';
|
||||
case 'other':
|
||||
return 'other';
|
||||
}
|
||||
}
|
||||
116
packages/@n8n/agents/src/runtime/model-factory.ts
Normal file
116
packages/@n8n/agents/src/runtime/model-factory.ts
Normal file
|
|
@ -0,0 +1,116 @@
|
|||
/* eslint-disable @typescript-eslint/no-require-imports */
|
||||
import type { EmbeddingModel, LanguageModel } from 'ai';
|
||||
|
||||
import type { ModelConfig } from '../types/sdk/agent';
|
||||
|
||||
type CreateProviderFn = (opts?: {
|
||||
apiKey?: string;
|
||||
baseURL?: string;
|
||||
}) => (model: string) => LanguageModel;
|
||||
type CreateEmbeddingProviderFn = (opts?: { apiKey?: string }) => {
|
||||
embeddingModel(model: string): EmbeddingModel;
|
||||
};
|
||||
|
||||
function isLanguageModel(config: unknown): config is LanguageModel {
|
||||
return typeof config === 'object' && config !== null && 'doGenerate' in config;
|
||||
}
|
||||
|
||||
/**
|
||||
* Provider packages are loaded dynamically via require() so only the
|
||||
* provider needed at runtime must be installed.
|
||||
*/
|
||||
export function createModel(config: ModelConfig): LanguageModel {
|
||||
if (isLanguageModel(config)) {
|
||||
return config;
|
||||
}
|
||||
|
||||
const stripEmpty = <T>(value: T | undefined): T | undefined => {
|
||||
if (!value) return undefined;
|
||||
if (typeof value === 'string' && value.trim() === '') return undefined;
|
||||
return value;
|
||||
};
|
||||
|
||||
const modelId = stripEmpty(typeof config === 'string' ? config : config.id);
|
||||
const apiKey = stripEmpty(typeof config === 'string' ? undefined : config.apiKey);
|
||||
const baseURL = stripEmpty(typeof config === 'string' ? undefined : config.url);
|
||||
|
||||
if (!modelId) {
|
||||
throw new Error('Model ID is required');
|
||||
}
|
||||
|
||||
const [provider, ...rest] = modelId.split('/');
|
||||
const modelName = rest.join('/');
|
||||
|
||||
switch (provider) {
|
||||
case 'anthropic': {
|
||||
const { createAnthropic } = require('@ai-sdk/anthropic') as {
|
||||
createAnthropic: CreateProviderFn;
|
||||
};
|
||||
return createAnthropic({ apiKey, baseURL })(modelName);
|
||||
}
|
||||
case 'openai': {
|
||||
const { createOpenAI } = require('@ai-sdk/openai') as {
|
||||
createOpenAI: CreateProviderFn;
|
||||
};
|
||||
return createOpenAI({ apiKey, baseURL })(modelName);
|
||||
}
|
||||
case 'google': {
|
||||
const { createGoogleGenerativeAI } = require('@ai-sdk/google') as {
|
||||
createGoogleGenerativeAI: CreateProviderFn;
|
||||
};
|
||||
return createGoogleGenerativeAI({ apiKey, baseURL })(modelName);
|
||||
}
|
||||
case 'xai': {
|
||||
const { createXai } = require('@ai-sdk/xai') as {
|
||||
createXai: CreateProviderFn;
|
||||
};
|
||||
return createXai({ apiKey, baseURL })(modelName);
|
||||
}
|
||||
default:
|
||||
throw new Error(
|
||||
`Unsupported provider: "${provider}". Supported: anthropic, openai, google, xai`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Registry of embedding provider packages and their factory function names.
|
||||
* Each AI SDK provider follows the same pattern:
|
||||
* createProvider({ apiKey }).embeddingModel(modelName)
|
||||
*
|
||||
* To add a new provider, install its @ai-sdk/* package and add an entry here.
|
||||
*/
|
||||
const EMBEDDING_PROVIDERS = {
|
||||
openai: { pkg: '@ai-sdk/openai', factory: 'createOpenAI' },
|
||||
google: { pkg: '@ai-sdk/google', factory: 'createGoogleGenerativeAI' },
|
||||
mistral: { pkg: '@ai-sdk/mistral', factory: 'createMistral' },
|
||||
cohere: { pkg: '@ai-sdk/cohere', factory: 'createCohere' },
|
||||
amazon: { pkg: '@ai-sdk/amazon-bedrock', factory: 'createAmazonBedrock' },
|
||||
bedrock: { pkg: '@ai-sdk/amazon-bedrock', factory: 'createAmazonBedrock' },
|
||||
} as const;
|
||||
|
||||
type EmbeddingProvider = keyof typeof EMBEDDING_PROVIDERS;
|
||||
type EmbeddingModelId = `${EmbeddingProvider}/${string}`;
|
||||
|
||||
/**
|
||||
* Create an embedding model from a "provider/model" string (e.g. "openai/text-embedding-3-small").
|
||||
* Supports any AI SDK provider that exposes `.embeddingModel()`.
|
||||
* The provider package must be installed at runtime.
|
||||
*/
|
||||
export function createEmbeddingModel(
|
||||
embedderString: EmbeddingModelId | (string & {}),
|
||||
apiKey?: string,
|
||||
): EmbeddingModel {
|
||||
const [provider, ...rest] = embedderString.split('/');
|
||||
const modelName = rest.join('/');
|
||||
|
||||
const entry = EMBEDDING_PROVIDERS[provider as EmbeddingProvider];
|
||||
if (!entry) {
|
||||
const supported = Object.keys(EMBEDDING_PROVIDERS).join(', ');
|
||||
throw new Error(`Unsupported embedding provider: "${provider}". Supported: ${supported}`);
|
||||
}
|
||||
|
||||
const mod = require(entry.pkg) as Record<string, CreateEmbeddingProviderFn>;
|
||||
const factory = mod[entry.factory];
|
||||
return factory({ apiKey }).embeddingModel(modelName);
|
||||
}
|
||||
68
packages/@n8n/agents/src/runtime/run-state.ts
Normal file
68
packages/@n8n/agents/src/runtime/run-state.ts
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
import type { CheckpointStore, SerializableAgentState } from '../types';
|
||||
|
||||
/**
|
||||
* Default in-memory CheckpointStore implementation.
|
||||
* Used when no external store is configured (storage: 'memory' or omitted).
|
||||
*
|
||||
* Note: Suspended runs that are never resumed accumulate indefinitely.
|
||||
* For long-running processes a TTL-based eviction mechanism should be added
|
||||
* to prevent unbounded memory growth.
|
||||
*/
|
||||
class MemoryCheckpointStore implements CheckpointStore {
|
||||
private store = new Map<string, SerializableAgentState>();
|
||||
|
||||
async save(key: string, state: SerializableAgentState): Promise<void> {
|
||||
await Promise.resolve(this.store.set(key, state));
|
||||
}
|
||||
|
||||
async load(key: string): Promise<SerializableAgentState | undefined> {
|
||||
return await Promise.resolve(this.store.get(key));
|
||||
}
|
||||
|
||||
async delete(key: string): Promise<void> {
|
||||
await Promise.resolve(this.store.delete(key));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Manages suspended agent run state for tool approval (HITL).
|
||||
* Delegates all persistence to a CheckpointStore — either the provided
|
||||
* external store or the default MemoryCheckpointStore.
|
||||
*/
|
||||
export class RunStateManager {
|
||||
private store: CheckpointStore;
|
||||
|
||||
constructor(storage?: 'memory' | CheckpointStore) {
|
||||
this.store = storage && storage !== 'memory' ? storage : new MemoryCheckpointStore();
|
||||
}
|
||||
|
||||
/** Save a suspended run state. */
|
||||
async suspend(runId: string, state: SerializableAgentState): Promise<void> {
|
||||
await this.store.save(runId, { ...state, status: 'suspended' });
|
||||
}
|
||||
|
||||
/** Load a suspended run state for resumption and mark it running. Status is not updated in the store. */
|
||||
async resume(runId: string): Promise<SerializableAgentState | undefined> {
|
||||
const state = await this.store.load(runId);
|
||||
if (!state) return undefined;
|
||||
if (state.status !== 'suspended') {
|
||||
throw new Error(`Run ${runId} is not suspended. Cannot resume.`);
|
||||
}
|
||||
const newState: SerializableAgentState = { ...state, status: 'running' };
|
||||
return newState;
|
||||
}
|
||||
|
||||
/** Delete a finished run from storage. Called when a resumed run completes without re-suspending. */
|
||||
async complete(runId: string): Promise<void> {
|
||||
try {
|
||||
await this.store.delete(runId);
|
||||
} catch (deleteError: unknown) {
|
||||
console.error(`[RunStateManager] Failed to delete checkpoint ${runId}:`, deleteError);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Generate a unique run ID. */
|
||||
export function generateRunId(): string {
|
||||
return `run_${crypto.randomUUID()}`;
|
||||
}
|
||||
145
packages/@n8n/agents/src/runtime/runtime-helpers.ts
Normal file
145
packages/@n8n/agents/src/runtime/runtime-helpers.ts
Normal file
|
|
@ -0,0 +1,145 @@
|
|||
/**
|
||||
* Pure utility functions used by AgentRuntime that require no class context.
|
||||
* These are extracted here to keep agent-runtime.ts focused on orchestration logic.
|
||||
*/
|
||||
import { toDbMessage } from '../sdk/message';
|
||||
import type { GenerateResult, StreamChunk, TokenUsage } from '../types';
|
||||
import { toTokenUsage } from './stream';
|
||||
import type { AgentDbMessage, AgentMessage, ContentToolResult } from '../types/sdk/message';
|
||||
import type { JSONValue } from '../types/utils/json';
|
||||
|
||||
/** Normalize a string input to an AgentDbMessage array, assigning ids where missing. */
|
||||
export function normalizeInput(input: AgentMessage[] | string): AgentDbMessage[] {
|
||||
if (typeof input === 'string') {
|
||||
return [toDbMessage({ role: 'user', content: [{ type: 'text', text: input }] })];
|
||||
}
|
||||
return input.map(toDbMessage);
|
||||
}
|
||||
|
||||
/** Build an AI SDK tool ModelMessage for a tool execution result. */
|
||||
export function makeToolResultMessage(
|
||||
toolCallId: string,
|
||||
toolName: string,
|
||||
result: unknown,
|
||||
): AgentDbMessage {
|
||||
return toDbMessage({
|
||||
role: 'tool',
|
||||
content: [
|
||||
{
|
||||
type: 'tool-result',
|
||||
toolCallId,
|
||||
toolName,
|
||||
result: result as JSONValue,
|
||||
},
|
||||
],
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Build an AI SDK tool ModelMessage for a tool execution error.
|
||||
* The LLM receives this as a tool result so it can self-correct on the next iteration.
|
||||
* The error is surfaced via the output json value so the LLM can read and reason about it.
|
||||
*/
|
||||
export function makeErrorToolResultMessage(
|
||||
toolCallId: string,
|
||||
toolName: string,
|
||||
error: unknown,
|
||||
): AgentDbMessage {
|
||||
const message = error instanceof Error ? `${error.name}: ${error.message}` : String(error);
|
||||
return toDbMessage({
|
||||
role: 'tool',
|
||||
content: [
|
||||
{
|
||||
type: 'tool-result',
|
||||
toolCallId,
|
||||
toolName,
|
||||
result: { error: message } as JSONValue,
|
||||
isError: true,
|
||||
},
|
||||
],
|
||||
});
|
||||
}
|
||||
|
||||
/** Extract all tool-result content parts from a flat list of agent messages. */
|
||||
export function extractToolResults(messages: AgentDbMessage[]): ContentToolResult[] {
|
||||
return messages
|
||||
.flatMap((m) => ('content' in m ? m.content : []))
|
||||
.filter((c): c is ContentToolResult => c.type === 'tool-result');
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a ReadableStream that immediately yields an error chunk followed by
|
||||
* a finish chunk. Used when setup errors prevent the normal stream loop from
|
||||
* starting, so callers always receive a well-formed stream.
|
||||
*/
|
||||
export function makeErrorStream(error: unknown): ReadableStream<StreamChunk> {
|
||||
const { readable, writable } = new TransformStream<StreamChunk, StreamChunk>();
|
||||
const writer = writable.getWriter();
|
||||
writer.write({ type: 'error', error }).catch(() => {});
|
||||
writer.write({ type: 'finish', finishReason: 'error' }).catch(() => {});
|
||||
writer.close().catch(() => {});
|
||||
return readable;
|
||||
}
|
||||
|
||||
/** Accumulate token usage across two values, returning undefined if both are absent. */
|
||||
export function mergeUsage(
|
||||
current: TokenUsage | undefined,
|
||||
next: TokenUsage | undefined,
|
||||
): TokenUsage | undefined {
|
||||
if (!next) return current;
|
||||
if (!current) return next;
|
||||
const merged: TokenUsage = {
|
||||
promptTokens: current.promptTokens + next.promptTokens,
|
||||
completionTokens: current.completionTokens + next.completionTokens,
|
||||
totalTokens: current.totalTokens + next.totalTokens,
|
||||
};
|
||||
|
||||
const cacheRead =
|
||||
(current.inputTokenDetails?.cacheRead ?? 0) + (next.inputTokenDetails?.cacheRead ?? 0);
|
||||
const cacheWrite =
|
||||
(current.inputTokenDetails?.cacheWrite ?? 0) + (next.inputTokenDetails?.cacheWrite ?? 0);
|
||||
if (cacheRead > 0 || cacheWrite > 0) {
|
||||
merged.inputTokenDetails = {
|
||||
...(cacheRead > 0 && { cacheRead }),
|
||||
...(cacheWrite > 0 && { cacheWrite }),
|
||||
};
|
||||
}
|
||||
|
||||
const reasoning =
|
||||
(current.outputTokenDetails?.reasoning ?? 0) + (next.outputTokenDetails?.reasoning ?? 0);
|
||||
if (reasoning > 0) {
|
||||
merged.outputTokenDetails = { reasoning };
|
||||
}
|
||||
|
||||
return merged;
|
||||
}
|
||||
|
||||
/**
|
||||
* Accumulate token usage across loop iterations.
|
||||
* Wraps mergeUsage + toTokenUsage to keep call sites concise.
|
||||
*/
|
||||
export function accumulateUsage(
|
||||
current: TokenUsage | undefined,
|
||||
raw:
|
||||
| {
|
||||
inputTokens?: number | undefined;
|
||||
outputTokens?: number | undefined;
|
||||
totalTokens?: number | undefined;
|
||||
inputTokenDetails?: { cacheReadTokens?: number; cacheWriteTokens?: number };
|
||||
outputTokenDetails?: { reasoningTokens?: number };
|
||||
}
|
||||
| undefined,
|
||||
): TokenUsage | undefined {
|
||||
if (!raw) return current;
|
||||
return mergeUsage(current, toTokenUsage(raw));
|
||||
}
|
||||
|
||||
/** Compute totalCost from sub-agent usage already present on the result. */
|
||||
export function applySubAgentUsage(result: GenerateResult): GenerateResult {
|
||||
if (!result.subAgentUsage || result.subAgentUsage.length === 0) return result;
|
||||
|
||||
const parentCost = result.usage?.cost ?? 0;
|
||||
const subCost = result.subAgentUsage.reduce((sum, s) => sum + (s.usage.cost ?? 0), 0);
|
||||
|
||||
return { ...result, totalCost: parentCost + subCost };
|
||||
}
|
||||
120
packages/@n8n/agents/src/runtime/stream.ts
Normal file
120
packages/@n8n/agents/src/runtime/stream.ts
Normal file
|
|
@ -0,0 +1,120 @@
|
|||
import type { TextStreamPart, ToolSet } from 'ai';
|
||||
|
||||
import type { FinishReason, StreamChunk, TokenUsage } from '../types';
|
||||
import type { JSONValue } from '../types/utils/json';
|
||||
|
||||
/** Map AI SDK v6 LanguageModelUsage to our TokenUsage type. */
|
||||
export function toTokenUsage(
|
||||
usage:
|
||||
| {
|
||||
inputTokens?: number;
|
||||
outputTokens?: number;
|
||||
totalTokens?: number;
|
||||
inputTokenDetails?: { cacheReadTokens?: number; cacheWriteTokens?: number };
|
||||
outputTokenDetails?: { reasoningTokens?: number };
|
||||
}
|
||||
| undefined,
|
||||
): TokenUsage | undefined {
|
||||
if (!usage) return undefined;
|
||||
|
||||
const result: TokenUsage = {
|
||||
promptTokens: usage.inputTokens ?? 0,
|
||||
completionTokens: usage.outputTokens ?? 0,
|
||||
totalTokens: usage.totalTokens ?? (usage.inputTokens ?? 0) + (usage.outputTokens ?? 0),
|
||||
};
|
||||
|
||||
const cacheRead = usage.inputTokenDetails?.cacheReadTokens;
|
||||
const cacheWrite = usage.inputTokenDetails?.cacheWriteTokens;
|
||||
if (cacheRead || cacheWrite) {
|
||||
result.inputTokenDetails = {
|
||||
...(cacheRead && { cacheRead }),
|
||||
...(cacheWrite && { cacheWrite }),
|
||||
};
|
||||
}
|
||||
|
||||
if (usage.outputTokenDetails?.reasoningTokens !== undefined) {
|
||||
result.outputTokenDetails = { reasoning: usage.outputTokenDetails.reasoningTokens };
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/** Convert a single AI SDK v6 fullStream chunk to an n8n StreamChunk (or undefined to skip). */
|
||||
export function convertChunk(c: TextStreamPart<ToolSet>): StreamChunk | undefined {
|
||||
switch (c.type) {
|
||||
case 'text-delta':
|
||||
return { type: 'text-delta', delta: c.text ?? '' };
|
||||
|
||||
case 'reasoning-delta':
|
||||
return { type: 'reasoning-delta', delta: c.text ?? '' };
|
||||
|
||||
case 'tool-call':
|
||||
return {
|
||||
type: 'message',
|
||||
message: {
|
||||
role: 'tool',
|
||||
content: [
|
||||
{
|
||||
type: 'tool-call',
|
||||
toolCallId: c.toolCallId,
|
||||
toolName: c.toolName ?? '',
|
||||
input: c.input as JSONValue,
|
||||
},
|
||||
],
|
||||
},
|
||||
};
|
||||
|
||||
case 'tool-input-start':
|
||||
return {
|
||||
type: 'tool-call-delta',
|
||||
name: c.toolName,
|
||||
};
|
||||
|
||||
case 'tool-input-delta':
|
||||
return {
|
||||
type: 'tool-call-delta',
|
||||
...(c.delta !== undefined && { argumentsDelta: c.delta }),
|
||||
};
|
||||
|
||||
case 'tool-result':
|
||||
return {
|
||||
type: 'message',
|
||||
message: {
|
||||
role: 'tool',
|
||||
content: [
|
||||
{
|
||||
type: 'tool-result',
|
||||
toolCallId: c.toolCallId ?? '',
|
||||
toolName: c.toolName ?? '',
|
||||
// eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
|
||||
result: c.output && 'value' in c.output ? (c.output.value as JSONValue) : null,
|
||||
},
|
||||
],
|
||||
},
|
||||
};
|
||||
|
||||
case 'error':
|
||||
return { type: 'error', error: c.error };
|
||||
|
||||
case 'finish-step': {
|
||||
const usage = toTokenUsage(c.usage);
|
||||
return {
|
||||
type: 'finish',
|
||||
finishReason: (c.finishReason ?? 'stop') as FinishReason,
|
||||
...(usage && { usage }),
|
||||
};
|
||||
}
|
||||
|
||||
case 'finish': {
|
||||
const usage = toTokenUsage(c.totalUsage);
|
||||
return {
|
||||
type: 'finish',
|
||||
finishReason: (c.finishReason ?? 'stop') as FinishReason,
|
||||
...(usage && { usage }),
|
||||
};
|
||||
}
|
||||
|
||||
default:
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,70 @@
|
|||
import { isLlmMessage } from '../sdk/message';
|
||||
import type { AgentDbMessage, MessageContent } from '../types/sdk/message';
|
||||
|
||||
/**
|
||||
* Strip orphaned tool-call and tool-result content from a message list.
|
||||
*
|
||||
* When memory loads the last N messages, the window boundary can split
|
||||
* tool-call / tool-result pairs, leaving one side without its counterpart.
|
||||
* Sending these orphans to the LLM causes provider errors because tool
|
||||
* calls and results must always be paired.
|
||||
*
|
||||
* This function:
|
||||
* 1. Collects all toolCallIds present in tool-call and tool-result blocks.
|
||||
* 2. Identifies orphans — calls without a matching result and vice-versa.
|
||||
* 3. Strips orphaned content blocks from their messages.
|
||||
* 4. Drops messages that become empty after stripping (e.g. a tool message
|
||||
* whose only content was the orphaned result).
|
||||
* 5. Preserves non-tool content (text, reasoning, files) in mixed messages.
|
||||
*/
|
||||
export function stripOrphanedToolMessages(messages: AgentDbMessage[]): AgentDbMessage[] {
|
||||
const callIds = new Set<string>();
|
||||
const resultIds = new Set<string>();
|
||||
|
||||
for (const msg of messages) {
|
||||
if (!isLlmMessage(msg)) continue;
|
||||
for (const block of msg.content) {
|
||||
if (block.type === 'tool-call' && block.toolCallId) {
|
||||
callIds.add(block.toolCallId);
|
||||
} else if (block.type === 'tool-result' && block.toolCallId) {
|
||||
resultIds.add(block.toolCallId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const orphanedCallIds = new Set([...callIds].filter((id) => !resultIds.has(id)));
|
||||
const orphanedResultIds = new Set([...resultIds].filter((id) => !callIds.has(id)));
|
||||
|
||||
if (orphanedCallIds.size === 0 && orphanedResultIds.size === 0) {
|
||||
return messages;
|
||||
}
|
||||
|
||||
const result: AgentDbMessage[] = [];
|
||||
|
||||
for (const msg of messages) {
|
||||
if (!isLlmMessage(msg)) {
|
||||
result.push(msg);
|
||||
continue;
|
||||
}
|
||||
|
||||
const filtered = msg.content.filter((block: MessageContent) => {
|
||||
if (block.type === 'tool-call' && block.toolCallId && orphanedCallIds.has(block.toolCallId)) {
|
||||
return false;
|
||||
}
|
||||
if (
|
||||
block.type === 'tool-result' &&
|
||||
block.toolCallId &&
|
||||
orphanedResultIds.has(block.toolCallId)
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
});
|
||||
|
||||
if (filtered.length === 0) continue;
|
||||
|
||||
result.push({ ...msg, content: filtered });
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
77
packages/@n8n/agents/src/runtime/title-generation.ts
Normal file
77
packages/@n8n/agents/src/runtime/title-generation.ts
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
import { generateText } from 'ai';
|
||||
|
||||
import type { BuiltMemory, TitleGenerationConfig } from '../types';
|
||||
import { createFilteredLogger } from './logger';
|
||||
import { createModel } from './model-factory';
|
||||
import type { ModelConfig } from '../types/sdk/agent';
|
||||
import type { AgentDbMessage } from '../types/sdk/message';
|
||||
|
||||
const logger = createFilteredLogger();
|
||||
|
||||
const DEFAULT_TITLE_INSTRUCTIONS = [
|
||||
'- you will generate a short title based on the first message a user begins a conversation with',
|
||||
'- ensure it is not more than 80 characters long',
|
||||
"- the title should be a summary of the user's message",
|
||||
'- do not use quotes or colons',
|
||||
'- the entire text you return will be used as the title',
|
||||
].join('\n');
|
||||
|
||||
/**
|
||||
* Generate a title for a thread if it doesn't already have one.
|
||||
*
|
||||
* Designed to run fire-and-forget after the agent response is complete.
|
||||
* All errors are caught and logged — title generation failures never
|
||||
* block or break the agent response.
|
||||
*/
|
||||
export async function generateThreadTitle(opts: {
|
||||
memory: BuiltMemory;
|
||||
threadId: string;
|
||||
resourceId: string;
|
||||
titleConfig: TitleGenerationConfig;
|
||||
/** The agent's own model, used as fallback when titleConfig.model is not set. */
|
||||
agentModel: ModelConfig;
|
||||
/** Messages from the current turn, used to find the first user message. */
|
||||
turnDelta: AgentDbMessage[];
|
||||
}): Promise<void> {
|
||||
try {
|
||||
const thread = await opts.memory.getThread(opts.threadId);
|
||||
if (thread?.title) return;
|
||||
|
||||
const userMessage = opts.turnDelta.find((m) => 'role' in m && m.role === 'user');
|
||||
if (!userMessage || !('content' in userMessage)) return;
|
||||
|
||||
const userText = (userMessage.content as Array<{ type: string; text?: string }>)
|
||||
.filter((c) => c.type === 'text' && c.text)
|
||||
.map((c) => c.text!)
|
||||
.join(' ');
|
||||
if (!userText) return;
|
||||
|
||||
const titleModelId = opts.titleConfig.model ?? opts.agentModel;
|
||||
const titleModel = createModel(titleModelId);
|
||||
const instructions = opts.titleConfig.instructions ?? DEFAULT_TITLE_INSTRUCTIONS;
|
||||
|
||||
const result = await generateText({
|
||||
model: titleModel,
|
||||
messages: [
|
||||
{ role: 'system', content: instructions },
|
||||
{ role: 'user', content: userText },
|
||||
],
|
||||
});
|
||||
|
||||
let title = result.text?.trim();
|
||||
if (!title) return;
|
||||
|
||||
// Strip <think>...</think> blocks (e.g. from DeepSeek R1)
|
||||
title = title.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
|
||||
if (!title) return;
|
||||
|
||||
await opts.memory.saveThread({
|
||||
id: opts.threadId,
|
||||
resourceId: opts.resourceId,
|
||||
title,
|
||||
metadata: thread?.metadata,
|
||||
});
|
||||
} catch (error) {
|
||||
logger.warn('Failed to generate thread title', { error });
|
||||
}
|
||||
}
|
||||
182
packages/@n8n/agents/src/runtime/tool-adapter.ts
Normal file
182
packages/@n8n/agents/src/runtime/tool-adapter.ts
Normal file
|
|
@ -0,0 +1,182 @@
|
|||
import { tool, jsonSchema, type Tool as AiSdkTool } from 'ai';
|
||||
import type { JSONSchema7 } from 'json-schema';
|
||||
import { z } from 'zod';
|
||||
|
||||
import {
|
||||
type BuiltProviderTool,
|
||||
type BuiltTool,
|
||||
type BuiltTelemetry,
|
||||
type InterruptibleToolContext,
|
||||
type ToolContext,
|
||||
} from '../types';
|
||||
import type { SubAgentUsage } from '../types/sdk/agent';
|
||||
import { isZodSchema } from '../utils/zod';
|
||||
|
||||
type AiSdkProviderTool = AiSdkTool & {
|
||||
type: 'provider';
|
||||
};
|
||||
/**
|
||||
* Branded symbol used to tag the return value of `ctx.suspend(payload)`.
|
||||
* The agent runtime checks for this brand on the tool's return value
|
||||
* instead of catching a thrown error.
|
||||
*/
|
||||
const SUSPEND_BRAND = Symbol('SuspendBrand');
|
||||
|
||||
/**
|
||||
* Branded symbol used to tag tool results from agent-as-tool calls.
|
||||
* Carries sub-agent usage so the parent runtime can aggregate costs
|
||||
* without any external state (WeakMap, mutable tool fields, etc.).
|
||||
*/
|
||||
const AGENT_TOOL_BRAND = Symbol('AgentToolBrand');
|
||||
|
||||
export interface SuspendedToolResult {
|
||||
readonly [SUSPEND_BRAND]: true;
|
||||
payload: unknown;
|
||||
}
|
||||
|
||||
/** Type guard: returns true when a tool's return value is a suspend signal. */
|
||||
export function isSuspendedToolResult(value: unknown): value is SuspendedToolResult {
|
||||
return typeof value === 'object' && value !== null && SUSPEND_BRAND in value;
|
||||
}
|
||||
|
||||
export interface AgentToolResult {
|
||||
readonly [AGENT_TOOL_BRAND]: true;
|
||||
/** The actual tool output (passed back to the LLM). */
|
||||
readonly output: unknown;
|
||||
/** Sub-agent usage entries to aggregate into the parent's result. */
|
||||
readonly subAgentUsage: SubAgentUsage[];
|
||||
}
|
||||
|
||||
/** Type guard: returns true when a tool result carries sub-agent usage. */
|
||||
export function isAgentToolResult(value: unknown): value is AgentToolResult {
|
||||
return typeof value === 'object' && value !== null && AGENT_TOOL_BRAND in value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a branded agent-tool result that carries sub-agent usage alongside the output.
|
||||
* The output properties are spread onto the object so it remains a valid tool output
|
||||
* even when accessed directly (e.g. in tests). The runtime detects the brand via
|
||||
* isAgentToolResult() and extracts the sub-agent usage.
|
||||
* Typed as `never` so `return createAgentToolResult(...)` satisfies any handler return type
|
||||
* (same pattern as ctx.suspend).
|
||||
*/
|
||||
export function createAgentToolResult(output: unknown, subAgentUsage: SubAgentUsage[]): never {
|
||||
const base = typeof output === 'object' && output !== null ? output : {};
|
||||
return { ...base, [AGENT_TOOL_BRAND]: true, output, subAgentUsage } as never;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert an array of BuiltProviderTools into a Record of AI SDK provider-defined tool objects.
|
||||
* Provider tools are executed on the provider's infrastructure (e.g. Anthropic web search,
|
||||
* OpenAI code interpreter) — they are never executed locally by the agent loop.
|
||||
*
|
||||
* The cast to AiSdkTool is required because the AI SDK's ToolSet type demands `inputSchema`
|
||||
* on every entry, but provider-defined tools have no input schema (the provider handles it).
|
||||
* At runtime the AI SDK correctly recognises the `type: 'provider'` discriminant.
|
||||
*/
|
||||
export function toAiSdkProviderTools(tools?: BuiltProviderTool[]): Record<string, AiSdkTool> {
|
||||
if (!tools || tools.length === 0) return {};
|
||||
|
||||
const result: Record<string, AiSdkTool> = {};
|
||||
for (const t of tools) {
|
||||
const providerTool: AiSdkProviderTool = {
|
||||
type: 'provider',
|
||||
id: t.name,
|
||||
args: t.args,
|
||||
inputSchema: t.inputSchema ?? z.any(),
|
||||
};
|
||||
result[t.name] = providerTool;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
const fixSchema = (schema: JSONSchema7): JSONSchema7 => {
|
||||
// Ensure 'type: object' is present when properties are present (required by some providers):
|
||||
if (
|
||||
typeof schema === 'object' &&
|
||||
schema !== null &&
|
||||
'properties' in schema &&
|
||||
!('type' in schema)
|
||||
) {
|
||||
return { ...schema, type: 'object' as const };
|
||||
}
|
||||
return schema;
|
||||
};
|
||||
|
||||
/**
|
||||
* Convert an array of BuiltTools into a Record of AI SDK tool definitions.
|
||||
* Tools are created WITHOUT execute — the agent loop handles execution manually.
|
||||
* Supports both Zod schemas (SDK-defined tools) and raw JSON Schema (MCP tools).
|
||||
*/
|
||||
export function toAiSdkTools(tools?: BuiltTool[]): Record<string, AiSdkTool> {
|
||||
if (!tools || tools.length === 0) return {};
|
||||
|
||||
const result: Record<string, AiSdkTool> = {};
|
||||
for (const t of tools) {
|
||||
if (t.inputSchema) {
|
||||
if (isZodSchema(t.inputSchema)) {
|
||||
result[t.name] = tool({
|
||||
description: t.description,
|
||||
inputSchema: t.inputSchema,
|
||||
providerOptions: t.providerOptions,
|
||||
});
|
||||
} else {
|
||||
result[t.name] = tool({
|
||||
description: t.description,
|
||||
inputSchema: jsonSchema(fixSchema(t.inputSchema)),
|
||||
providerOptions: t.providerOptions,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute a tool call by finding its handler and running it.
|
||||
* For tools with suspend/resume schemas, passes an InterruptibleToolContext
|
||||
* that lets the handler call `suspend(payload)`.
|
||||
*/
|
||||
export async function executeTool(
|
||||
args: unknown,
|
||||
builtTool: BuiltTool,
|
||||
resumeData?: unknown,
|
||||
parentTelemetry?: BuiltTelemetry,
|
||||
): Promise<unknown> {
|
||||
if (!builtTool.handler) {
|
||||
throw new Error(`No handler found for tool "${builtTool.name}"`);
|
||||
}
|
||||
|
||||
if (builtTool.suspendSchema) {
|
||||
const ctx: InterruptibleToolContext = {
|
||||
suspend: async (payload: unknown): Promise<never> => {
|
||||
return await Promise.resolve({ [SUSPEND_BRAND]: true, payload } as never);
|
||||
},
|
||||
resumeData,
|
||||
parentTelemetry,
|
||||
};
|
||||
return await builtTool.handler(args, ctx);
|
||||
}
|
||||
|
||||
const ctx: ToolContext = { parentTelemetry };
|
||||
return await builtTool.handler(args, ctx);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a tool has suspend/resume schemas (i.e. is interruptible).
|
||||
*/
|
||||
export function isInterruptible(toolName: string, toolMap: Map<string, BuiltTool>): boolean {
|
||||
const builtTool = toolMap.get(toolName);
|
||||
return !!builtTool?.suspendSchema;
|
||||
}
|
||||
|
||||
/** Build a Map from tool name to BuiltTool for quick lookups. */
|
||||
export function buildToolMap(tools?: BuiltTool[]): Map<string, BuiltTool> {
|
||||
const map = new Map<string, BuiltTool>();
|
||||
if (tools) {
|
||||
for (const t of tools) {
|
||||
map.set(t.name, t);
|
||||
}
|
||||
}
|
||||
return map;
|
||||
}
|
||||
183
packages/@n8n/agents/src/runtime/working-memory.ts
Normal file
183
packages/@n8n/agents/src/runtime/working-memory.ts
Normal file
|
|
@ -0,0 +1,183 @@
|
|||
import type { z } from 'zod';
|
||||
|
||||
import type { StreamChunk } from '../types';
|
||||
import { createFilteredLogger } from './logger';
|
||||
|
||||
const logger = createFilteredLogger();
|
||||
|
||||
type ZodObjectSchema = z.ZodObject<z.ZodRawShape>;
|
||||
|
||||
const OPEN_TAG = '<working_memory>';
|
||||
const CLOSE_TAG = '</working_memory>';
|
||||
|
||||
/**
|
||||
* Extract working memory content from an LLM response.
|
||||
* Returns the clean text (tags stripped) and the extracted working memory (or null).
|
||||
*/
|
||||
export function parseWorkingMemory(text: string): {
|
||||
cleanText: string;
|
||||
workingMemory: string | null;
|
||||
} {
|
||||
const openIdx = text.indexOf(OPEN_TAG);
|
||||
if (openIdx === -1) return { cleanText: text, workingMemory: null };
|
||||
|
||||
const closeIdx = text.indexOf(CLOSE_TAG, openIdx);
|
||||
if (closeIdx === -1) return { cleanText: text, workingMemory: null };
|
||||
|
||||
const contentStart = openIdx + OPEN_TAG.length;
|
||||
const rawContent = text.slice(contentStart, closeIdx);
|
||||
const workingMemory = rawContent.replace(/^\n/, '').replace(/\n$/, '');
|
||||
|
||||
const before = text.slice(0, openIdx).replace(/\n$/, '');
|
||||
const after = text.slice(closeIdx + CLOSE_TAG.length).replace(/^\n/, '');
|
||||
const cleanText = (before + (after ? '\n' + after : '')).trim();
|
||||
|
||||
return { cleanText, workingMemory };
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate the system prompt instruction for working memory.
|
||||
*/
|
||||
export function buildWorkingMemoryInstruction(template: string, structured: boolean): string {
|
||||
const format = structured
|
||||
? 'Emit the updated state as valid JSON matching the schema'
|
||||
: 'Update the template with any new information learned';
|
||||
|
||||
return [
|
||||
'',
|
||||
'## Working Memory',
|
||||
'',
|
||||
'You have persistent working memory that survives across conversations.',
|
||||
'The current state will be shown to you in a system message.',
|
||||
'IMPORTANT: Always respond to the user first with your normal reply.',
|
||||
`Then, at the very end of your response, emit your updated working memory inside ${OPEN_TAG}...${CLOSE_TAG} tags on a new line.`,
|
||||
`${format}. If nothing changed, emit the current state unchanged.`,
|
||||
'The working memory block must be the last thing in your response, after your reply to the user.',
|
||||
'',
|
||||
'Current template:',
|
||||
'```',
|
||||
template,
|
||||
'```',
|
||||
].join('\n');
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a Zod object schema to a JSON template string for structured working memory.
|
||||
*/
|
||||
export function templateFromSchema(schema: ZodObjectSchema): string {
|
||||
const obj: Record<string, string> = {};
|
||||
for (const [key, field] of Object.entries(schema.shape)) {
|
||||
const desc = field.description;
|
||||
obj[key] = desc ?? '';
|
||||
}
|
||||
return JSON.stringify(obj, null, 2);
|
||||
}
|
||||
|
||||
type PersistFn = (content: string) => Promise<void>;
|
||||
|
||||
/**
|
||||
* Wraps a stream writer to intercept <working_memory> tags from text-delta chunks.
|
||||
* All non-text-delta chunks pass through unchanged.
|
||||
* Text inside the tags is buffered and persisted when the closing tag is detected.
|
||||
*/
|
||||
export class WorkingMemoryStreamFilter {
|
||||
private writer: WritableStreamDefaultWriter<StreamChunk>;
|
||||
|
||||
private persist: PersistFn;
|
||||
|
||||
private state: 'normal' | 'inside' = 'normal';
|
||||
|
||||
private buffer = '';
|
||||
|
||||
private pendingText = '';
|
||||
|
||||
constructor(writer: WritableStreamDefaultWriter<StreamChunk>, persist: PersistFn) {
|
||||
this.writer = writer;
|
||||
this.persist = persist;
|
||||
}
|
||||
|
||||
async write(chunk: StreamChunk): Promise<void> {
|
||||
if (chunk.type !== 'text-delta') {
|
||||
await this.writer.write(chunk);
|
||||
return;
|
||||
}
|
||||
|
||||
this.pendingText += chunk.delta;
|
||||
|
||||
while (this.pendingText.length > 0) {
|
||||
if (this.state === 'normal') {
|
||||
const openIdx = this.pendingText.indexOf(OPEN_TAG);
|
||||
if (openIdx === -1) {
|
||||
// No full open tag found. Check if the tail is a valid prefix of OPEN_TAG.
|
||||
const lastLt = this.pendingText.lastIndexOf('<');
|
||||
if (
|
||||
lastLt !== -1 &&
|
||||
this.pendingText.length - lastLt < OPEN_TAG.length &&
|
||||
OPEN_TAG.startsWith(this.pendingText.slice(lastLt))
|
||||
) {
|
||||
// Potential partial tag at end — forward everything before it, hold the rest
|
||||
if (lastLt > 0) {
|
||||
await this.writer.write({
|
||||
type: 'text-delta',
|
||||
delta: this.pendingText.slice(0, lastLt),
|
||||
});
|
||||
}
|
||||
this.pendingText = this.pendingText.slice(lastLt);
|
||||
} else {
|
||||
// No partial tag concern — forward everything
|
||||
await this.writer.write({ type: 'text-delta', delta: this.pendingText });
|
||||
this.pendingText = '';
|
||||
}
|
||||
break;
|
||||
}
|
||||
// Forward text before the tag
|
||||
if (openIdx > 0) {
|
||||
await this.writer.write({
|
||||
type: 'text-delta',
|
||||
delta: this.pendingText.slice(0, openIdx),
|
||||
});
|
||||
}
|
||||
this.state = 'inside';
|
||||
this.pendingText = this.pendingText.slice(openIdx + OPEN_TAG.length);
|
||||
this.buffer = '';
|
||||
} else {
|
||||
// Inside tag — look for closing tag
|
||||
const closeIdx = this.pendingText.indexOf(CLOSE_TAG);
|
||||
if (closeIdx === -1) {
|
||||
// Check if the tail is a valid prefix of CLOSE_TAG — hold it back
|
||||
const lastLt = this.pendingText.lastIndexOf('<');
|
||||
if (
|
||||
lastLt !== -1 &&
|
||||
this.pendingText.length - lastLt < CLOSE_TAG.length &&
|
||||
CLOSE_TAG.startsWith(this.pendingText.slice(lastLt))
|
||||
) {
|
||||
this.buffer += this.pendingText.slice(0, lastLt);
|
||||
this.pendingText = this.pendingText.slice(lastLt);
|
||||
} else {
|
||||
this.buffer += this.pendingText;
|
||||
this.pendingText = '';
|
||||
}
|
||||
break;
|
||||
}
|
||||
this.buffer += this.pendingText.slice(0, closeIdx);
|
||||
this.pendingText = this.pendingText.slice(closeIdx + CLOSE_TAG.length);
|
||||
this.state = 'normal';
|
||||
const content = this.buffer.replace(/^\n/, '').replace(/\n$/, '');
|
||||
this.persist(content).catch((error: unknown) => {
|
||||
logger.warn('Failed to persist working memory', { error });
|
||||
});
|
||||
this.buffer = '';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async flush(): Promise<void> {
|
||||
if (this.state === 'normal' && this.pendingText.length > 0) {
|
||||
await this.writer.write({ type: 'text-delta', delta: this.pendingText });
|
||||
}
|
||||
// Reset all state so the filter is clean for reuse after abort/completion.
|
||||
this.pendingText = '';
|
||||
this.buffer = '';
|
||||
this.state = 'normal';
|
||||
}
|
||||
}
|
||||
676
packages/@n8n/agents/src/sdk/agent.ts
Normal file
676
packages/@n8n/agents/src/sdk/agent.ts
Normal file
|
|
@ -0,0 +1,676 @@
|
|||
import type { ProviderOptions } from '@ai-sdk/provider-utils';
|
||||
import { z } from 'zod';
|
||||
|
||||
import type { Eval } from './eval';
|
||||
import type { McpClient } from './mcp-client';
|
||||
import { Memory } from './memory';
|
||||
import { Telemetry } from './telemetry';
|
||||
import { Tool, wrapToolForApproval } from './tool';
|
||||
import { AgentRuntime } from '../runtime/agent-runtime';
|
||||
import { AgentEventBus } from '../runtime/event-bus';
|
||||
import { createAgentToolResult } from '../runtime/tool-adapter';
|
||||
import type {
|
||||
AgentEvent,
|
||||
AgentEventHandler,
|
||||
AgentMiddleware,
|
||||
BuiltAgent,
|
||||
BuiltEval,
|
||||
BuiltGuardrail,
|
||||
BuiltMemory,
|
||||
BuiltProviderTool,
|
||||
BuiltTool,
|
||||
BuiltTelemetry,
|
||||
CheckpointStore,
|
||||
ExecutionOptions,
|
||||
GenerateResult,
|
||||
MemoryConfig,
|
||||
ModelConfig,
|
||||
Provider,
|
||||
RunOptions,
|
||||
SerializableAgentState,
|
||||
StreamResult,
|
||||
SubAgentUsage,
|
||||
ThinkingConfig,
|
||||
ThinkingConfigFor,
|
||||
ResumeOptions,
|
||||
} from '../types';
|
||||
import type { AgentMessage } from '../types/sdk/message';
|
||||
import type { Workspace } from '../workspace/workspace';
|
||||
|
||||
const DEFAULT_LAST_MESSAGES = 10;
|
||||
|
||||
type ToolParameter = BuiltTool | { build(): BuiltTool };
|
||||
|
||||
/**
|
||||
* Builder for creating AI agents with a fluent API.
|
||||
*
|
||||
* Usage:
|
||||
* ```typescript
|
||||
* const agent = new Agent('assistant')
|
||||
* .model('anthropic', 'claude-sonnet-4') // typed: Agent<'anthropic'>
|
||||
* .credential('anthropic')
|
||||
* .instructions('You are a helpful assistant.')
|
||||
* .tool(searchTool);
|
||||
*
|
||||
* const result = await agent.generate('Hello!');
|
||||
* ```
|
||||
*/
|
||||
|
||||
export class Agent implements BuiltAgent {
|
||||
readonly name: string;
|
||||
|
||||
private modelId?: string;
|
||||
|
||||
private modelConfigObj?: ModelConfig;
|
||||
|
||||
private instructionProviderOpts?: ProviderOptions;
|
||||
|
||||
private instructionsText?: string;
|
||||
|
||||
private tools: BuiltTool[] = [];
|
||||
|
||||
private providerTools: BuiltProviderTool[] = [];
|
||||
|
||||
private memoryConfig?: MemoryConfig;
|
||||
|
||||
// TODO: Guardrails are accepted by the builder API for forward
|
||||
// compatibility but not yet wired to the runtime.
|
||||
private inputGuardrails: BuiltGuardrail[] = [];
|
||||
|
||||
private outputGuardrails: BuiltGuardrail[] = [];
|
||||
|
||||
private agentEvals: BuiltEval[] = [];
|
||||
|
||||
private outputSchema?: z.ZodType;
|
||||
|
||||
private checkpointStore?: 'memory' | CheckpointStore;
|
||||
|
||||
private thinkingConfig?: ThinkingConfig;
|
||||
|
||||
private credentialName?: string;
|
||||
|
||||
private resolvedKey?: string;
|
||||
|
||||
private runtime?: AgentRuntime;
|
||||
|
||||
private concurrencyValue?: number;
|
||||
|
||||
private telemetryBuilder?: Telemetry;
|
||||
|
||||
private telemetryConfig?: BuiltTelemetry;
|
||||
|
||||
private middlewares: AgentMiddleware[] = [];
|
||||
|
||||
private requireToolApprovalValue = false;
|
||||
|
||||
private mcpClients: McpClient[] = [];
|
||||
|
||||
private buildPromise: Promise<AgentRuntime> | undefined;
|
||||
|
||||
private eventBus = new AgentEventBus();
|
||||
|
||||
private workspaceInstance?: Workspace;
|
||||
|
||||
constructor(name: string) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the model with provider type information.
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* // Typed form — enables provider-specific config on .thinking() etc.
|
||||
* agent.model('anthropic', 'claude-sonnet-4-5')
|
||||
*
|
||||
* // Untyped form — backwards compatible
|
||||
* agent.model('anthropic/claude-sonnet-4-5')
|
||||
* ```
|
||||
*/
|
||||
model(providerOrIdOrConfig: string | ModelConfig, modelName?: string): this {
|
||||
if (typeof providerOrIdOrConfig === 'string') {
|
||||
this.modelId = modelName ? `${providerOrIdOrConfig}/${modelName}` : providerOrIdOrConfig;
|
||||
this.modelConfigObj = undefined;
|
||||
} else {
|
||||
this.modelConfigObj = providerOrIdOrConfig;
|
||||
this.modelId = undefined;
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Set the system instructions for the agent. Required before building. */
|
||||
instructions(text: string, options?: { providerOptions?: ProviderOptions }): this {
|
||||
this.instructionsText = text;
|
||||
this.instructionProviderOpts = options?.providerOptions;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Add a tool to the agent's capabilities. Accepts a built tool or a Tool builder (which will be built automatically). Can also accept an array of tools. */
|
||||
tool(t: ToolParameter | ToolParameter[]): this {
|
||||
if (Array.isArray(t)) {
|
||||
for (const tool of t) {
|
||||
this.tool(tool);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
const built = 'build' in t ? t.build() : t;
|
||||
this.tools.push(built);
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Add a provider-defined tool (e.g. Anthropic web search, OpenAI code interpreter). */
|
||||
providerTool(builtProviderTool: BuiltProviderTool): this {
|
||||
this.providerTools.push(builtProviderTool);
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Set the memory configuration. Accepts a MemoryConfig, Memory builder, or bare BuiltMemory. */
|
||||
memory(m: MemoryConfig | Memory | BuiltMemory): this {
|
||||
if (m instanceof Memory) {
|
||||
// Memory builder — call build()
|
||||
this.memoryConfig = m.build();
|
||||
} else if ('memory' in m && 'lastMessages' in m) {
|
||||
// MemoryConfig — use directly
|
||||
this.memoryConfig = m;
|
||||
} else {
|
||||
// Bare BuiltMemory — wrap in minimal config
|
||||
this.memoryConfig = { memory: m, lastMessages: DEFAULT_LAST_MESSAGES };
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Add a middleware. */
|
||||
middleware(m: AgentMiddleware): this {
|
||||
this.middlewares.push(m);
|
||||
return this;
|
||||
}
|
||||
|
||||
// TODO: guardrails can be a middleware internally
|
||||
/** Add an input guardrail. Accepts a built guardrail or a Guardrail builder. */
|
||||
inputGuardrail(g: BuiltGuardrail | { build(): BuiltGuardrail }): this {
|
||||
this.inputGuardrails.push('_config' in g ? g : g.build());
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Add an output guardrail. Accepts a built guardrail or a Guardrail builder. */
|
||||
outputGuardrail(g: BuiltGuardrail | { build(): BuiltGuardrail }): this {
|
||||
this.outputGuardrails.push('_config' in g ? g : g.build());
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Add an eval to run after each agent response. Accepts an Eval builder or BuiltEval. */
|
||||
eval(e: Eval | BuiltEval | { ensureBuilt(): BuiltEval }): this {
|
||||
const built = '_run' in e ? e : (e as Eval).ensureBuilt();
|
||||
this.agentEvals.push(built);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the checkpoint storage for tool suspend/resume (human-in-the-loop).
|
||||
* Required when any tool uses `.suspend()` / `.resume()`.
|
||||
*
|
||||
* - `'memory'` — in-process storage (lost on restart, fine for dev)
|
||||
* - A storage provider instance (e.g. `new LibSQLStore(...)`, `new PgStore(...)`)
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* const agent = new Agent('assistant')
|
||||
* .model('anthropic/claude-sonnet-4-5')
|
||||
* .instructions('...')
|
||||
* .tool(dangerousTool) // has .suspend() / .resume()
|
||||
* .checkpoint('memory');
|
||||
* ```
|
||||
*/
|
||||
checkpoint(storage: 'memory' | CheckpointStore): this {
|
||||
this.checkpointStore = storage;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Declare a credential this agent requires. The execution engine resolves
|
||||
* the credential name to an API key at build time and injects it into the
|
||||
* model configuration — user code never handles raw keys.
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* const agent = new Agent('assistant')
|
||||
* .model('anthropic/claude-sonnet-4-5')
|
||||
* .credential('anthropic')
|
||||
* .instructions('You are helpful.');
|
||||
* ```
|
||||
*/
|
||||
credential(name: string): this {
|
||||
this.credentialName = name;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** @internal Read the declared credential name (used by the execution engine). */
|
||||
protected get declaredCredential(): string | undefined {
|
||||
return this.credentialName;
|
||||
}
|
||||
|
||||
/** @internal Set the resolved API key (called by the execution engine before super.build()). */
|
||||
protected set resolvedApiKey(key: string) {
|
||||
this.resolvedKey = key;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set a structured output schema. When set, the agent's response will be
|
||||
* parsed into a typed object matching the schema, available as `result.output`.
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* const agent = new Agent('extractor')
|
||||
* .model('anthropic/claude-sonnet-4-5')
|
||||
* .instructions('Extract structured data.')
|
||||
* .structuredOutput(z.object({
|
||||
* code: z.string(),
|
||||
* explanation: z.string(),
|
||||
* }));
|
||||
*
|
||||
* const result = await agent.generate('...');
|
||||
* console.log(result.structuredOutput); // { code: '...', explanation: '...' }
|
||||
* ```
|
||||
*/
|
||||
structuredOutput(schema: z.ZodType): this {
|
||||
this.outputSchema = schema;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Enable extended thinking / reasoning for the agent.
|
||||
* The config type is inferred from the provider set via `.model()`.
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* // Anthropic — budgetTokens
|
||||
* new Agent('thinker')
|
||||
* .model('anthropic', 'claude-sonnet-4-5')
|
||||
* .thinking({ budgetTokens: 10000 })
|
||||
*
|
||||
* // OpenAI — reasoningEffort
|
||||
* new Agent('thinker')
|
||||
* .model('openai', 'o3-mini')
|
||||
* .thinking({ reasoningEffort: 'high' })
|
||||
* ```
|
||||
*/
|
||||
thinking<P extends Provider>(_provider: P, config?: ThinkingConfigFor<P>): this {
|
||||
this.thinkingConfig = config ?? {};
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Set telemetry configuration for this agent. Accepts a Telemetry builder or pre-built config. */
|
||||
telemetry(t: Telemetry | BuiltTelemetry): this {
|
||||
if (t instanceof Telemetry) {
|
||||
this.telemetryBuilder = t;
|
||||
this.telemetryConfig = undefined;
|
||||
} else {
|
||||
this.telemetryBuilder = undefined;
|
||||
this.telemetryConfig = t;
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
/** @internal Read the declared telemetry builder (used by the execution engine to resolve credentials). */
|
||||
protected get declaredTelemetry(): Telemetry | undefined {
|
||||
return this.telemetryBuilder;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the number of tool calls to execute concurrently within a single LLM turn.
|
||||
*
|
||||
* - `1` (default) — sequential execution, fully backward-compatible.
|
||||
* - `Infinity` — unlimited parallelism (all tool calls start at once).
|
||||
* - Any number in between — bounded concurrency (e.g. `5` = at most 5 tools run simultaneously).
|
||||
*/
|
||||
toolCallConcurrency(n: number): this {
|
||||
if ((n !== Infinity && !Number.isInteger(n)) || n < 1) {
|
||||
throw new Error('toolCallConcurrency must be a positive integer or Infinity');
|
||||
}
|
||||
this.concurrencyValue = n;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Require human approval before any tool executes.
|
||||
* Tools that already have .suspend()/.resume() (suspendSchema) are skipped.
|
||||
* Requires .checkpoint() to be set.
|
||||
*/
|
||||
requireToolApproval(): this {
|
||||
this.requireToolApprovalValue = true;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Attach a workspace to this agent. Workspace tools and instructions
|
||||
* are injected at build time.
|
||||
*/
|
||||
workspace(ws: Workspace): this {
|
||||
this.workspaceInstance = ws;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add an MCP client as a tool source for this agent.
|
||||
* Tools from all servers in the client become available to the agent.
|
||||
* Multiple clients can be added; tools are merged across all of them.
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* const client = new McpClient([
|
||||
* { name: 'browser', url: 'http://localhost:9222/mcp', transport: 'streamableHttp' },
|
||||
* { name: 'fs', command: 'npx', args: ['@anthropic/mcp-fs', '/tmp'] },
|
||||
* ]);
|
||||
*
|
||||
* const agent = new Agent('assistant')
|
||||
* .model('anthropic', 'claude-sonnet-4')
|
||||
* .mcp(client)
|
||||
* .instructions('You are a helpful assistant.');
|
||||
* ```
|
||||
*/
|
||||
mcp(client: McpClient): this {
|
||||
this.mcpClients.push(client);
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Get the evals attached to this agent. */
|
||||
get evaluations(): BuiltEval[] {
|
||||
return [...this.agentEvals];
|
||||
}
|
||||
|
||||
/**
|
||||
* Register a handler for an agent lifecycle event.
|
||||
* Handlers are called synchronously during the agentic loop.
|
||||
*/
|
||||
on(event: AgentEvent, handler: AgentEventHandler): void {
|
||||
this.eventBus.on(event, handler);
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrap this agent as a tool for use in multi-agent composition.
|
||||
* The tool sends a text prompt to this agent and returns the text of the response.
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* const coordinatorAgent = new Agent('coordinator')
|
||||
* .model('anthropic/claude-sonnet-4-5')
|
||||
* .instructions('Route tasks to specialist agents.')
|
||||
* .tool(writerAgent.asTool('Write content given a topic'));
|
||||
* ```
|
||||
*/
|
||||
asTool(description: string): BuiltTool {
|
||||
// eslint-disable-next-line @typescript-eslint/no-this-alias
|
||||
const agent = this;
|
||||
|
||||
const tool = new Tool(this.name)
|
||||
.description(description)
|
||||
.input(
|
||||
z.object({
|
||||
input: z.string().describe('The input to send to the agent'),
|
||||
}),
|
||||
)
|
||||
.output(
|
||||
z.object({
|
||||
result: z.string().describe('The result of the agent'),
|
||||
}),
|
||||
)
|
||||
.handler(async (rawInput, ctx) => {
|
||||
const { input } = rawInput as { input: string };
|
||||
const result = await agent.generate(input, {
|
||||
telemetry: ctx.parentTelemetry,
|
||||
} as RunOptions & ExecutionOptions);
|
||||
|
||||
const text = result.messages
|
||||
.filter((m) => 'role' in m && m.role === 'assistant')
|
||||
.flatMap((m) => ('content' in m ? m.content : []))
|
||||
.filter((c) => c.type === 'text')
|
||||
.map((c) => ('text' in c ? c.text : ''))
|
||||
.join('');
|
||||
|
||||
// Collect sub-agent usage: this agent's own + any nested sub-agents
|
||||
const subAgentUsage: SubAgentUsage[] = [];
|
||||
if (result.usage) {
|
||||
subAgentUsage.push({ agent: agent.name, model: result.model, usage: result.usage });
|
||||
}
|
||||
if (result.subAgentUsage) {
|
||||
subAgentUsage.push(...result.subAgentUsage);
|
||||
}
|
||||
|
||||
// Return branded result — the runtime unwraps it to extract sub-agent usage.
|
||||
// createAgentToolResult returns `never`, same pattern as ctx.suspend().
|
||||
if (subAgentUsage.length > 0) {
|
||||
return createAgentToolResult({ result: text }, subAgentUsage);
|
||||
}
|
||||
return { result: text };
|
||||
});
|
||||
|
||||
return tool.build();
|
||||
}
|
||||
|
||||
/** Return the latest state snapshot of the agent. Returns `{ status: 'idle' }` before first run. */
|
||||
getState(): SerializableAgentState {
|
||||
if (!this.runtime) {
|
||||
return {
|
||||
persistence: undefined,
|
||||
status: 'idle',
|
||||
messageList: { messages: [], historyIds: [], inputIds: [], responseIds: [] },
|
||||
pendingToolCalls: {},
|
||||
};
|
||||
}
|
||||
return this.runtime.getState();
|
||||
}
|
||||
|
||||
/**
|
||||
* Cancel the currently running agent.
|
||||
* Synchronous — sets an abort flag; the agentic loop checks it asynchronously.
|
||||
*/
|
||||
abort(): void {
|
||||
this.eventBus.abort();
|
||||
}
|
||||
|
||||
/** Generate a response (non-streaming). Lazy-builds on first call. */
|
||||
async generate(
|
||||
input: AgentMessage[] | string,
|
||||
options?: RunOptions & ExecutionOptions,
|
||||
): Promise<GenerateResult> {
|
||||
const runtime = await this.ensureBuilt();
|
||||
return await runtime.generate(this.toMessages(input), options);
|
||||
}
|
||||
|
||||
/** Stream a response. Lazy-builds on first call. */
|
||||
async stream(
|
||||
input: AgentMessage[] | string,
|
||||
options?: RunOptions & ExecutionOptions,
|
||||
): Promise<StreamResult> {
|
||||
const runtime = await this.ensureBuilt();
|
||||
return await runtime.stream(this.toMessages(input), options);
|
||||
}
|
||||
|
||||
/** Resume a suspended tool call with data. Lazy-builds on first call. */
|
||||
async resume(
|
||||
method: 'generate',
|
||||
data: unknown,
|
||||
options: ResumeOptions & ExecutionOptions,
|
||||
): Promise<GenerateResult>;
|
||||
async resume(
|
||||
method: 'stream',
|
||||
data: unknown,
|
||||
options: ResumeOptions & ExecutionOptions,
|
||||
): Promise<StreamResult>;
|
||||
async resume(
|
||||
method: 'generate' | 'stream',
|
||||
data: unknown,
|
||||
options: ResumeOptions & ExecutionOptions,
|
||||
): Promise<GenerateResult | StreamResult> {
|
||||
const runtime = await this.ensureBuilt();
|
||||
if (method === 'generate') {
|
||||
return await runtime.resume('generate', data, options);
|
||||
}
|
||||
return await runtime.resume('stream', data, options);
|
||||
}
|
||||
|
||||
approve(method: 'generate', options: ResumeOptions & ExecutionOptions): Promise<GenerateResult>;
|
||||
approve(method: 'stream', options: ResumeOptions & ExecutionOptions): Promise<StreamResult>;
|
||||
async approve(
|
||||
method: 'generate' | 'stream',
|
||||
options: ResumeOptions & ExecutionOptions,
|
||||
): Promise<GenerateResult | StreamResult> {
|
||||
if (method === 'generate') {
|
||||
return await this.resume('generate', { approved: true }, options);
|
||||
}
|
||||
return await this.resume('stream', { approved: true }, options);
|
||||
}
|
||||
|
||||
deny(method: 'generate', options: ResumeOptions & ExecutionOptions): Promise<GenerateResult>;
|
||||
deny(method: 'stream', options: ResumeOptions & ExecutionOptions): Promise<StreamResult>;
|
||||
async deny(
|
||||
method: 'generate' | 'stream',
|
||||
options: ResumeOptions & ExecutionOptions,
|
||||
): Promise<GenerateResult | StreamResult> {
|
||||
if (method === 'generate') {
|
||||
return await this.resume('generate', { approved: false }, options);
|
||||
}
|
||||
return await this.resume('stream', { approved: false }, options);
|
||||
}
|
||||
|
||||
/**
|
||||
* @internal Lazy-build the agent on first use. Stores the promise so
|
||||
* concurrent callers share one build operation. On error the promise is
|
||||
* cleared so the caller can retry.
|
||||
*/
|
||||
private async ensureBuilt(): Promise<AgentRuntime> {
|
||||
if (!this.buildPromise) {
|
||||
const p = this.build();
|
||||
this.buildPromise = p;
|
||||
p.catch(() => {
|
||||
if (this.buildPromise === p) this.buildPromise = undefined;
|
||||
});
|
||||
}
|
||||
return await this.buildPromise;
|
||||
}
|
||||
|
||||
private toMessages(input: string | AgentMessage[]): AgentMessage[] {
|
||||
if (Array.isArray(input)) return input;
|
||||
return [{ role: 'user', content: [{ type: 'text', text: input }] }];
|
||||
}
|
||||
|
||||
/** @internal Validate configuration and produce an AgentRuntime. Overridden by the execution engine. */
|
||||
protected async build(): Promise<AgentRuntime> {
|
||||
const hasModel = this.modelId ?? this.modelConfigObj;
|
||||
if (!hasModel) {
|
||||
throw new Error(`Agent "${this.name}" requires a model`);
|
||||
}
|
||||
if (!this.instructionsText) {
|
||||
throw new Error(`Agent "${this.name}" requires instructions`);
|
||||
}
|
||||
|
||||
const finalTools = [...this.tools];
|
||||
|
||||
if (this.workspaceInstance) {
|
||||
const wsTools = this.workspaceInstance.getTools();
|
||||
finalTools.push(...wsTools);
|
||||
}
|
||||
|
||||
let finalStaticTools = finalTools;
|
||||
if (this.requireToolApprovalValue) {
|
||||
finalStaticTools = finalTools.map((t) =>
|
||||
t.suspendSchema ? t : wrapToolForApproval(t, { requireApproval: true }),
|
||||
);
|
||||
}
|
||||
|
||||
// Validate checkpoint requirement from static tools and known MCP approval config
|
||||
// before attempting any network connections (allows fast failure).
|
||||
const staticNeedsCheckpoint = finalStaticTools.some((t) => t.suspendSchema);
|
||||
const mcpNeedsCheckpoint =
|
||||
(this.requireToolApprovalValue && this.mcpClients.length > 0) ||
|
||||
this.mcpClients.some((c) => c.declaresApproval());
|
||||
if ((staticNeedsCheckpoint || mcpNeedsCheckpoint) && !this.checkpointStore) {
|
||||
throw new Error(
|
||||
`Agent "${this.name}" has tools requiring approval or suspend/resume but no checkpoint storage. ` +
|
||||
"Add .checkpoint('memory') for in-process storage, " +
|
||||
'or pass a persistent store (e.g. LibSQLStore, PgStore).',
|
||||
);
|
||||
}
|
||||
|
||||
// Resolve tools from all MCP clients.
|
||||
const mcpToolLists = await Promise.all(this.mcpClients.map(async (c) => await c.listTools()));
|
||||
let mcpTools = mcpToolLists.flat();
|
||||
|
||||
// Apply global requireToolApproval to MCP tools (per-server approval is already
|
||||
// handled inside McpClient/McpConnection.listTools()).
|
||||
if (this.requireToolApprovalValue) {
|
||||
mcpTools = mcpTools.map((t) =>
|
||||
t.suspendSchema ? t : wrapToolForApproval(t, { requireApproval: true }),
|
||||
);
|
||||
}
|
||||
|
||||
// Detect collisions between MCP tools and static tools.
|
||||
const staticNames = new Set(finalStaticTools.map((t) => t.name));
|
||||
const collisions = mcpTools.filter((t) => staticNames.has(t.name)).map((t) => t.name);
|
||||
if (collisions.length > 0) {
|
||||
throw new Error(
|
||||
`MCP tool name collision — the following tool names resolve to duplicates: ${collisions.join(', ')}`,
|
||||
);
|
||||
}
|
||||
|
||||
const allTools = [...finalStaticTools, ...mcpTools];
|
||||
|
||||
// Validate checkpoint again after discovering actual MCP tools
|
||||
// (catches the case where MCP tools have suspendSchema after listing).
|
||||
const allNeedCheckpoint = allTools.some((t) => t.suspendSchema);
|
||||
if (allNeedCheckpoint && !this.checkpointStore) {
|
||||
throw new Error(
|
||||
`Agent "${this.name}" has tools requiring approval or suspend/resume but no checkpoint storage. ` +
|
||||
"Add .checkpoint('memory') for in-process storage, " +
|
||||
'or pass a persistent store (e.g. LibSQLStore, PgStore).',
|
||||
);
|
||||
}
|
||||
|
||||
let modelConfig: ModelConfig;
|
||||
if (this.modelConfigObj) {
|
||||
if (
|
||||
this.resolvedKey &&
|
||||
typeof this.modelConfigObj === 'object' &&
|
||||
'id' in this.modelConfigObj
|
||||
) {
|
||||
modelConfig = { ...this.modelConfigObj, apiKey: this.resolvedKey };
|
||||
} else {
|
||||
modelConfig = this.modelConfigObj;
|
||||
}
|
||||
} else if (this.resolvedKey) {
|
||||
modelConfig = { id: this.modelId!, apiKey: this.resolvedKey };
|
||||
} else {
|
||||
modelConfig = this.modelId!;
|
||||
}
|
||||
|
||||
let instructions = this.instructionsText;
|
||||
if (this.workspaceInstance) {
|
||||
const wsInstructions = this.workspaceInstance.getInstructions();
|
||||
if (wsInstructions) {
|
||||
instructions = `${instructions}\n\n${wsInstructions}`;
|
||||
}
|
||||
}
|
||||
|
||||
this.runtime = new AgentRuntime({
|
||||
name: this.name,
|
||||
model: modelConfig,
|
||||
instructions,
|
||||
tools: allTools.length > 0 ? allTools : undefined,
|
||||
instructionProviderOptions: this.instructionProviderOpts,
|
||||
providerTools: this.providerTools.length > 0 ? this.providerTools : undefined,
|
||||
memory: this.memoryConfig?.memory,
|
||||
lastMessages: this.memoryConfig?.lastMessages,
|
||||
workingMemory: this.memoryConfig?.workingMemory,
|
||||
semanticRecall: this.memoryConfig?.semanticRecall,
|
||||
structuredOutput: this.outputSchema,
|
||||
checkpointStorage: this.checkpointStore,
|
||||
thinking: this.thinkingConfig,
|
||||
eventBus: this.eventBus,
|
||||
toolCallConcurrency: this.concurrencyValue,
|
||||
titleGeneration: this.memoryConfig?.titleGeneration,
|
||||
telemetry: this.telemetryConfig ?? (await this.telemetryBuilder?.build()),
|
||||
});
|
||||
|
||||
return this.runtime;
|
||||
}
|
||||
}
|
||||
186
packages/@n8n/agents/src/sdk/catalog.ts
Normal file
186
packages/@n8n/agents/src/sdk/catalog.ts
Normal file
|
|
@ -0,0 +1,186 @@
|
|||
const MODELS_DEV_URL = 'https://models.dev/api.json';
|
||||
|
||||
/** Cost per million tokens. */
|
||||
export interface ModelCost {
|
||||
/** Cost per million input tokens (USD). */
|
||||
input: number;
|
||||
/** Cost per million output tokens (USD). */
|
||||
output: number;
|
||||
/** Cost per million cached input tokens (USD). */
|
||||
cacheRead?: number;
|
||||
/** Cost per million cache write tokens (USD). */
|
||||
cacheWrite?: number;
|
||||
}
|
||||
|
||||
/** Model context/output limits. */
|
||||
export interface ModelLimits {
|
||||
/** Maximum context window size in tokens. */
|
||||
context?: number;
|
||||
/** Maximum output tokens. */
|
||||
output?: number;
|
||||
}
|
||||
|
||||
/** Information about a single model. */
|
||||
export interface ModelInfo {
|
||||
/** Model ID (e.g. 'claude-sonnet-4-5'). */
|
||||
id: string;
|
||||
/** Human-readable name (e.g. 'Claude Sonnet 4.5'). */
|
||||
name: string;
|
||||
/** Whether the model supports reasoning / thinking. */
|
||||
reasoning: boolean;
|
||||
/** Whether the model supports tool calling. */
|
||||
toolCall: boolean;
|
||||
/** Cost per million tokens. */
|
||||
cost?: ModelCost;
|
||||
/** Token limits. */
|
||||
limits?: ModelLimits;
|
||||
}
|
||||
|
||||
/** Information about a provider. */
|
||||
export interface ProviderInfo {
|
||||
/** Provider ID (e.g. 'anthropic'). */
|
||||
id: string;
|
||||
/** Human-readable name (e.g. 'Anthropic'). */
|
||||
name: string;
|
||||
/** Available models keyed by model ID. */
|
||||
models: Record<string, ModelInfo>;
|
||||
}
|
||||
|
||||
/** The full catalog of providers and their models. */
|
||||
export type ProviderCatalog = Record<string, ProviderInfo>;
|
||||
|
||||
interface ModelsDevModel {
|
||||
id: string;
|
||||
name: string;
|
||||
reasoning?: boolean;
|
||||
tool_call?: boolean;
|
||||
cost?: { input?: number; output?: number; cache_read?: number; cache_write?: number };
|
||||
limit?: { context?: number; output?: number };
|
||||
}
|
||||
|
||||
interface ModelsDevProvider {
|
||||
id: string;
|
||||
name: string;
|
||||
models?: Record<string, ModelsDevModel>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch the provider/model catalog from models.dev.
|
||||
*
|
||||
* Returns a map of provider ID → ProviderInfo with all available models.
|
||||
* The catalog is fetched once and can be cached by the caller.
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* import { fetchProviderCatalog } from '@n8n/agents';
|
||||
*
|
||||
* const catalog = await fetchProviderCatalog();
|
||||
* console.log(Object.keys(catalog)); // ['anthropic', 'openai', ...]
|
||||
* console.log(catalog.anthropic.models['claude-sonnet-4-5'].reasoning); // true
|
||||
* ```
|
||||
*/
|
||||
export async function fetchProviderCatalog(): Promise<ProviderCatalog> {
|
||||
const response = await fetch(MODELS_DEV_URL);
|
||||
if (!response.ok) {
|
||||
throw new Error(`Failed to fetch provider catalog: ${response.statusText}`);
|
||||
}
|
||||
|
||||
const data = (await response.json()) as Record<string, ModelsDevProvider>;
|
||||
const catalog: ProviderCatalog = {};
|
||||
|
||||
for (const [key, provider] of Object.entries(data)) {
|
||||
if (!provider.models || Object.keys(provider.models).length === 0) continue;
|
||||
|
||||
const models: Record<string, ModelInfo> = {};
|
||||
for (const [modelId, model] of Object.entries(provider.models)) {
|
||||
const info: ModelInfo = {
|
||||
id: model.id,
|
||||
name: model.name,
|
||||
reasoning: model.reasoning ?? false,
|
||||
toolCall: model.tool_call ?? false,
|
||||
};
|
||||
if (model.cost?.input !== undefined && model.cost?.output !== undefined) {
|
||||
info.cost = {
|
||||
input: model.cost.input,
|
||||
output: model.cost.output,
|
||||
...(model.cost.cache_read !== undefined && { cacheRead: model.cost.cache_read }),
|
||||
...(model.cost.cache_write !== undefined && { cacheWrite: model.cost.cache_write }),
|
||||
};
|
||||
}
|
||||
if (model.limit) {
|
||||
info.limits = {
|
||||
...(model.limit.context !== undefined && { context: model.limit.context }),
|
||||
...(model.limit.output !== undefined && { output: model.limit.output }),
|
||||
};
|
||||
}
|
||||
models[modelId] = info;
|
||||
}
|
||||
|
||||
catalog[key] = {
|
||||
id: provider.id,
|
||||
name: provider.name,
|
||||
models,
|
||||
};
|
||||
}
|
||||
|
||||
return catalog;
|
||||
}
|
||||
|
||||
// --- Global cached catalog for internal use ---
|
||||
|
||||
let cachedCatalog: ProviderCatalog | undefined;
|
||||
let catalogFetchPromise: Promise<ProviderCatalog | undefined> | undefined;
|
||||
|
||||
/**
|
||||
* Get the cached catalog, fetching once if needed.
|
||||
* Returns undefined if the fetch fails (offline, timeout, etc.).
|
||||
* On failure, clears the in-flight promise so the next call retries.
|
||||
* @internal
|
||||
*/
|
||||
export async function getCachedCatalog(): Promise<ProviderCatalog | undefined> {
|
||||
if (cachedCatalog) return cachedCatalog;
|
||||
|
||||
catalogFetchPromise ??= fetchProviderCatalog()
|
||||
.then((c) => {
|
||||
cachedCatalog = c;
|
||||
return c;
|
||||
})
|
||||
.catch((error: unknown) => {
|
||||
// Clear so subsequent calls retry
|
||||
catalogFetchPromise = undefined;
|
||||
console.warn(
|
||||
'[agents] Failed to fetch model catalog from models.dev — cost data will be unavailable:',
|
||||
error instanceof Error ? error.message : error,
|
||||
);
|
||||
return undefined;
|
||||
});
|
||||
|
||||
return await catalogFetchPromise;
|
||||
}
|
||||
|
||||
/**
|
||||
* Look up cost info for a model by its full ID (e.g. 'anthropic/claude-sonnet-4-5').
|
||||
* Returns undefined if catalog is unavailable or model not found.
|
||||
* @internal
|
||||
*/
|
||||
export async function getModelCost(modelId: string): Promise<ModelCost | undefined> {
|
||||
const catalog = await getCachedCatalog();
|
||||
if (!catalog) return undefined;
|
||||
|
||||
const [provider, ...rest] = modelId.split('/');
|
||||
const modelName = rest.join('/');
|
||||
|
||||
return catalog[provider]?.models[modelName]?.cost;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the cost in USD from token usage and per-million-token pricing.
|
||||
*/
|
||||
export function computeCost(
|
||||
usage: { promptTokens: number; completionTokens: number },
|
||||
cost: ModelCost,
|
||||
): number {
|
||||
const inputCost = (usage.promptTokens / 1_000_000) * cost.input;
|
||||
const outputCost = (usage.completionTokens / 1_000_000) * cost.output;
|
||||
return inputCost + outputCost;
|
||||
}
|
||||
183
packages/@n8n/agents/src/sdk/eval.ts
Normal file
183
packages/@n8n/agents/src/sdk/eval.ts
Normal file
|
|
@ -0,0 +1,183 @@
|
|||
import { filterLlmMessages } from './message';
|
||||
import { AgentRuntime } from '../runtime/agent-runtime';
|
||||
import type { BuiltEval, CheckFn, EvalInput, EvalScore, JudgeFn, JudgeHandlerFn } from '../types';
|
||||
import type { AgentMessage } from '../types/sdk/message';
|
||||
|
||||
/** Extract text content from LLM messages (custom messages are skipped). */
|
||||
function extractText(messages: AgentMessage[]): string {
|
||||
return filterLlmMessages(messages)
|
||||
.flatMap((m) => m.content)
|
||||
.filter((c) => c.type === 'text')
|
||||
.map((c) => (c as { text: string }).text)
|
||||
.join('');
|
||||
}
|
||||
|
||||
/**
|
||||
* Builder for creating evaluations with a fluent API.
|
||||
*
|
||||
* Two modes:
|
||||
* - **Deterministic**: `.check(fn)` — pure function scoring
|
||||
* - **LLM-as-judge**: `.model()` + `.credential()` + `.judge(fn)` — LLM-powered scoring
|
||||
*
|
||||
* Usage:
|
||||
* ```typescript
|
||||
* // Deterministic
|
||||
* const jsonCheck = new Eval('json-check')
|
||||
* .description('Verify output is valid JSON')
|
||||
* .check(({ output }) => {
|
||||
* try { JSON.parse(output); return { score: 1, reasoning: 'Valid JSON' }; }
|
||||
* catch { return { score: 0, reasoning: 'Invalid JSON' }; }
|
||||
* });
|
||||
*
|
||||
* // LLM-as-judge
|
||||
* const correctness = new Eval('correctness')
|
||||
* .description('Judge factual correctness')
|
||||
* .model('anthropic/claude-haiku-4-5')
|
||||
* .credential('anthropic')
|
||||
* .judge(async ({ input, output, expected, llm }) => {
|
||||
* const result = await llm(`Is "${output}" correct for "${input}"? Expected: ${expected}`);
|
||||
* const score = parseFloat(result.text.match(/[\d.]+/)?.[0] ?? '0');
|
||||
* return { score: Math.min(1, Math.max(0, score)), reasoning: result.text };
|
||||
* });
|
||||
* ```
|
||||
*/
|
||||
export class Eval {
|
||||
private evalName: string;
|
||||
|
||||
private desc?: string;
|
||||
|
||||
private checkFn?: CheckFn;
|
||||
|
||||
private judgeFn?: JudgeHandlerFn;
|
||||
|
||||
private modelId?: string;
|
||||
|
||||
private credentialName?: string;
|
||||
|
||||
private _resolvedApiKey?: string;
|
||||
|
||||
constructor(name: string) {
|
||||
this.evalName = name;
|
||||
}
|
||||
|
||||
/** Human-readable description of what this eval measures. */
|
||||
description(desc: string): this {
|
||||
this.desc = desc;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Set the judge model (LLM-as-judge mode). */
|
||||
model(modelId: string): this {
|
||||
this.modelId = modelId;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Declare a credential for the judge model. */
|
||||
credential(name: string): this {
|
||||
this.credentialName = name;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** @internal Read the declared credential name (used by the execution engine). */
|
||||
protected get declaredCredential(): string | undefined {
|
||||
return this.credentialName;
|
||||
}
|
||||
|
||||
/** @internal Set the resolved API key for the judge model. */
|
||||
protected set resolvedApiKey(key: string) {
|
||||
this._resolvedApiKey = key;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set a deterministic check function.
|
||||
* Mutually exclusive with `.judge()`.
|
||||
*/
|
||||
check(fn: CheckFn): this {
|
||||
if (this.judgeFn) {
|
||||
throw new Error(`Eval "${this.evalName}": cannot use both .check() and .judge()`);
|
||||
}
|
||||
this.checkFn = fn;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set an LLM-as-judge handler. Requires `.model()` and `.credential()`.
|
||||
* The handler receives `{ input, output, expected, llm }` where `llm`
|
||||
* is a callable function bound to the judge model.
|
||||
* Mutually exclusive with `.check()`.
|
||||
*/
|
||||
judge(fn: JudgeHandlerFn): this {
|
||||
if (this.checkFn) {
|
||||
throw new Error(`Eval "${this.evalName}": cannot use both .check() and .judge()`);
|
||||
}
|
||||
this.judgeFn = fn;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** The eval name. */
|
||||
get name(): string {
|
||||
return this.evalName;
|
||||
}
|
||||
|
||||
/** @internal Build the eval into a runnable form. */
|
||||
protected build(): BuiltEval {
|
||||
if (!this.checkFn && !this.judgeFn) {
|
||||
throw new Error(`Eval "${this.evalName}" requires either .check() or .judge()`);
|
||||
}
|
||||
|
||||
if (this.judgeFn && !this.modelId) {
|
||||
throw new Error(`Eval "${this.evalName}" uses .judge() but no .model() was set`);
|
||||
}
|
||||
|
||||
const name = this.evalName;
|
||||
const desc = this.desc;
|
||||
|
||||
if (this.checkFn) {
|
||||
const checkFn = this.checkFn;
|
||||
return {
|
||||
name,
|
||||
description: desc,
|
||||
_run: async (input: EvalInput) => await checkFn(input),
|
||||
};
|
||||
}
|
||||
|
||||
// LLM-as-judge mode
|
||||
const judgeFn = this.judgeFn!;
|
||||
const modelConfig: string | { id: `${string}/${string}`; apiKey: string } = this._resolvedApiKey
|
||||
? { id: this.modelId! as `${string}/${string}`, apiKey: this._resolvedApiKey }
|
||||
: this.modelId!;
|
||||
|
||||
const runtime = new AgentRuntime({
|
||||
name: `${name}-judge`,
|
||||
model: modelConfig,
|
||||
instructions: 'You are an evaluation judge. Respond precisely as instructed.',
|
||||
});
|
||||
|
||||
const llm: JudgeFn = async (prompt: string) => {
|
||||
const result = await runtime.generate([
|
||||
{ role: 'user', content: [{ type: 'text', text: prompt }] },
|
||||
]);
|
||||
return { text: extractText(result.messages) };
|
||||
};
|
||||
|
||||
return {
|
||||
name,
|
||||
description: desc,
|
||||
_run: async (input: EvalInput) => await judgeFn({ ...input, llm }),
|
||||
};
|
||||
}
|
||||
|
||||
/** @internal Ensure the eval is built (lazy). */
|
||||
private _built?: BuiltEval;
|
||||
|
||||
/** @internal */
|
||||
ensureBuilt(): BuiltEval {
|
||||
this._built ??= this.build();
|
||||
return this._built;
|
||||
}
|
||||
|
||||
/** Run this eval against a single input. Lazy-builds on first call. */
|
||||
async run(input: EvalInput): Promise<EvalScore> {
|
||||
return await this.ensureBuilt()._run(input);
|
||||
}
|
||||
}
|
||||
159
packages/@n8n/agents/src/sdk/evaluate.ts
Normal file
159
packages/@n8n/agents/src/sdk/evaluate.ts
Normal file
|
|
@ -0,0 +1,159 @@
|
|||
import type { Agent } from './agent';
|
||||
import type { Eval } from './eval';
|
||||
import { filterLlmMessages } from './message';
|
||||
import type { EvalResults, EvalRunResult, EvalScore, GenerateResult } from '../types';
|
||||
import type { AgentMessage } from '../types/sdk/message';
|
||||
|
||||
/** Extract text content from messages. */
|
||||
function extractText(messages: AgentMessage[]): string {
|
||||
return filterLlmMessages(messages)
|
||||
.flatMap((m) => m.content)
|
||||
.filter((c) => c.type === 'text')
|
||||
.map((c) => (c as { text: string }).text)
|
||||
.join('');
|
||||
}
|
||||
|
||||
export interface DatasetRow {
|
||||
/** The prompt to send to the agent. */
|
||||
input: string;
|
||||
/** Expected answer (used by evals like correctness/similarity). */
|
||||
expected?: string;
|
||||
/**
|
||||
* Per-tool resume data overrides for evaluation. By default all suspended
|
||||
* tools are auto-resumed with `{ approved: true }` during evaluations.
|
||||
* Use this to test denial or custom resume scenarios.
|
||||
*
|
||||
* - `'deny'` is shorthand for `{ approved: false }`
|
||||
* - An object value is passed as-is to `agent.resume()`
|
||||
*/
|
||||
resumeData?: Record<string, 'deny' | Record<string, unknown>>;
|
||||
}
|
||||
|
||||
export interface EvaluateConfig {
|
||||
/** Dataset of test cases to run through the agent. */
|
||||
dataset: DatasetRow[];
|
||||
/** Evals to run against each agent response. */
|
||||
evals: Eval[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Run a dataset through an agent and score the results with evals.
|
||||
*
|
||||
* All dataset rows and evals run in parallel for maximum throughput.
|
||||
* Suspended tool calls are **auto-resumed with `{ approved: true }`** during
|
||||
* evals. Use `resumeData` in dataset rows to override per tool.
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* const results = await evaluate(agent, {
|
||||
* dataset: [
|
||||
* { input: 'What is 2+2?', expected: '4' },
|
||||
* { input: 'Delete temp files', resumeData: { delete_file: 'deny' } },
|
||||
* { input: 'Book flight', resumeData: { book: { seat: '12A' } } },
|
||||
* ],
|
||||
* evals: [correctness, similarity],
|
||||
* });
|
||||
* ```
|
||||
*/
|
||||
export async function evaluate(agent: Agent, config: EvaluateConfig): Promise<EvalResults> {
|
||||
const { dataset, evals } = config;
|
||||
|
||||
const runs: EvalRunResult[] = await Promise.all(
|
||||
dataset.map(async (row) => {
|
||||
const result = await runWithInterrupts(agent, row.input, row.resumeData);
|
||||
|
||||
const toolCalls = result.toolCalls ?? [];
|
||||
|
||||
// Build composite output: if the agent's text is empty but it made
|
||||
// tool calls, include the tool outputs so evals have something to score.
|
||||
let output = extractText(result.messages);
|
||||
if (!output.trim() && toolCalls.length > 0) {
|
||||
const toolOutputs = toolCalls
|
||||
.filter((tc) => tc.output !== undefined)
|
||||
.map((tc) => `[${tc.tool}] ${JSON.stringify(tc.output)}`);
|
||||
if (toolOutputs.length > 0) {
|
||||
output = toolOutputs.join('\n');
|
||||
}
|
||||
}
|
||||
|
||||
const scoreEntries = await Promise.all(
|
||||
evals.map(async (ev): Promise<[string, EvalScore]> => {
|
||||
const score = await ev.run({
|
||||
input: row.input,
|
||||
output,
|
||||
expected: row.expected,
|
||||
toolCalls,
|
||||
});
|
||||
return [ev.name, score];
|
||||
}),
|
||||
);
|
||||
|
||||
return {
|
||||
input: row.input,
|
||||
output,
|
||||
expected: row.expected,
|
||||
scores: Object.fromEntries(scoreEntries),
|
||||
};
|
||||
}),
|
||||
);
|
||||
|
||||
const summary: EvalResults['summary'] = {};
|
||||
for (const ev of evals) {
|
||||
const results = runs
|
||||
.map((r) => r.scores[ev.name]?.pass)
|
||||
.filter((p): p is boolean => p !== undefined);
|
||||
|
||||
if (results.length > 0) {
|
||||
const passed = results.filter(Boolean).length;
|
||||
summary[ev.name] = {
|
||||
passed,
|
||||
failed: results.length - passed,
|
||||
total: results.length,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return { runs, summary };
|
||||
}
|
||||
|
||||
/**
|
||||
* Run the agent with automatic interrupt handling.
|
||||
* Uses generate() and loops: if the result has a pendingSuspend, resolves
|
||||
* the resume data and calls agent.resume('generate', ...) to get a
|
||||
* GenerateResult directly without needing to stream-and-re-generate.
|
||||
*
|
||||
* Tools are auto-resumed with `{ approved: true }` by default;
|
||||
* use `resumeOverrides` to override per tool.
|
||||
*/
|
||||
async function runWithInterrupts(
|
||||
agent: Agent,
|
||||
input: string,
|
||||
resumeOverrides?: Record<string, 'deny' | Record<string, unknown>>,
|
||||
): Promise<GenerateResult> {
|
||||
let result = await agent.generate(input);
|
||||
const allToolCalls: Array<{ tool: string; input: unknown; output: unknown }> = [
|
||||
...(result.toolCalls ?? []),
|
||||
];
|
||||
|
||||
while (result.pendingSuspend && result.pendingSuspend.length > 0) {
|
||||
const { runId, toolCallId, toolName } = result.pendingSuspend[0];
|
||||
const override = toolName ? resumeOverrides?.[toolName] : undefined;
|
||||
|
||||
let data: Record<string, unknown>;
|
||||
if (override === 'deny') {
|
||||
data = { approved: false };
|
||||
} else if (override && typeof override === 'object') {
|
||||
data = override;
|
||||
} else {
|
||||
data = { approved: true };
|
||||
}
|
||||
|
||||
result = await agent.resume('generate', data, { runId, toolCallId });
|
||||
allToolCalls.push(...(result.toolCalls ?? []));
|
||||
}
|
||||
|
||||
return {
|
||||
...result,
|
||||
...(allToolCalls.length > 0 ? { toolCalls: allToolCalls } : {}),
|
||||
};
|
||||
}
|
||||
52
packages/@n8n/agents/src/sdk/guardrail.ts
Normal file
52
packages/@n8n/agents/src/sdk/guardrail.ts
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
import type { BuiltGuardrail, GuardrailType, GuardrailStrategy, PiiDetectionType } from '../types';
|
||||
|
||||
export class Guardrail {
|
||||
private name: string;
|
||||
|
||||
private guardType?: GuardrailType;
|
||||
|
||||
private strategyType?: GuardrailStrategy;
|
||||
|
||||
private detectionTypes?: PiiDetectionType[];
|
||||
|
||||
private thresholdValue?: number;
|
||||
|
||||
constructor(name: string) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
type(guardType: GuardrailType): this {
|
||||
this.guardType = guardType;
|
||||
return this;
|
||||
}
|
||||
|
||||
strategy(strategy: GuardrailStrategy): this {
|
||||
this.strategyType = strategy;
|
||||
return this;
|
||||
}
|
||||
|
||||
detect(types: PiiDetectionType[]): this {
|
||||
this.detectionTypes = types;
|
||||
return this;
|
||||
}
|
||||
|
||||
threshold(value: number): this {
|
||||
this.thresholdValue = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
build(): BuiltGuardrail {
|
||||
if (!this.guardType) throw new Error(`Guardrail "${this.name}" requires a type`);
|
||||
if (!this.strategyType) throw new Error(`Guardrail "${this.name}" requires a strategy`);
|
||||
|
||||
return {
|
||||
name: this.name,
|
||||
guardType: this.guardType,
|
||||
strategy: this.strategyType,
|
||||
_config: {
|
||||
detectionTypes: this.detectionTypes,
|
||||
threshold: this.thresholdValue,
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
231
packages/@n8n/agents/src/sdk/mcp-client.ts
Normal file
231
packages/@n8n/agents/src/sdk/mcp-client.ts
Normal file
|
|
@ -0,0 +1,231 @@
|
|||
import { McpConnection } from '../runtime/mcp-connection';
|
||||
import type { McpServerConfig, McpVerifyResult } from '../types/sdk/mcp';
|
||||
import type { BuiltTool } from '../types/sdk/tool';
|
||||
|
||||
/**
|
||||
* Manages connections to one or more MCP servers and exposes their tools
|
||||
* as a flat list of BuiltTool instances.
|
||||
*
|
||||
* Connections are established lazily on the first `listTools()` call and
|
||||
* kept alive until `close()` is called. Both operations deduplicate
|
||||
* concurrent calls via stored promises, so calling `listTools()` from
|
||||
* multiple concurrent `generate()` runs is safe.
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* const client = new McpClient([
|
||||
* { name: 'browser', url: 'http://localhost:9222/mcp', transport: 'streamableHttp' },
|
||||
* { name: 'fs', command: 'npx', args: ['@anthropic/mcp-fs', '/tmp'] },
|
||||
* ]);
|
||||
*
|
||||
* const agent = new Agent('assistant')
|
||||
* .model('anthropic/claude-sonnet-4-5')
|
||||
* .instructions('You are a helpful assistant.')
|
||||
* .mcp(client);
|
||||
*
|
||||
* const result = await agent.generate('List files in /tmp');
|
||||
* await client.close();
|
||||
* ```
|
||||
*/
|
||||
export class McpClient {
|
||||
private readonly configs: McpServerConfig[];
|
||||
|
||||
private connections: McpConnection[];
|
||||
|
||||
private listToolsPromise: Promise<BuiltTool[]> | undefined;
|
||||
|
||||
private closePromise: Promise<void> | undefined;
|
||||
|
||||
/**
|
||||
* @param configs - Server configurations. Each must have either `url` or `command`.
|
||||
* Duplicate names within the list are rejected.
|
||||
* @param requireToolApproval - When true, every tool from every server is wrapped
|
||||
* with a human-approval gate (requires `.checkpoint()` on the Agent).
|
||||
*/
|
||||
constructor(configs: McpServerConfig[], requireToolApproval = false) {
|
||||
for (const cfg of configs) {
|
||||
if (!cfg.url && !cfg.command) {
|
||||
throw new Error(
|
||||
`MCP server "${cfg.name}": exactly one of "url" or "command" must be provided`,
|
||||
);
|
||||
}
|
||||
if (cfg.url && cfg.command) {
|
||||
throw new Error(`MCP server "${cfg.name}": provide either "url" or "command", not both`);
|
||||
}
|
||||
}
|
||||
|
||||
const seen = new Set<string>();
|
||||
for (const cfg of configs) {
|
||||
if (seen.has(cfg.name)) {
|
||||
throw new Error(`MCP server name "${cfg.name}" is already registered`);
|
||||
}
|
||||
seen.add(cfg.name);
|
||||
}
|
||||
|
||||
this.configs = configs;
|
||||
this.connections = configs.map((cfg) => new McpConnection(cfg, requireToolApproval));
|
||||
}
|
||||
|
||||
/**
|
||||
* Explicitly connect to all servers without listing tools.
|
||||
* Optional — `listTools()` connects implicitly.
|
||||
*/
|
||||
async connect(): Promise<void> {
|
||||
await this.listTools();
|
||||
}
|
||||
|
||||
/**
|
||||
* Connect to all servers (if not already connected) and return the full
|
||||
* flat list of tools. Subsequent calls return the cached list without
|
||||
* additional network round-trips. On error the cache is cleared so the
|
||||
* caller can retry.
|
||||
*/
|
||||
async listTools(): Promise<BuiltTool[]> {
|
||||
if (!this.listToolsPromise) {
|
||||
const p = this.doListTools();
|
||||
this.listToolsPromise = p;
|
||||
p.catch(() => {
|
||||
if (this.listToolsPromise === p) this.listToolsPromise = undefined;
|
||||
});
|
||||
}
|
||||
return await this.listToolsPromise;
|
||||
}
|
||||
|
||||
/**
|
||||
* Disconnect from all servers. Subsequent calls are no-ops.
|
||||
* Best-effort — errors are logged but not thrown.
|
||||
*/
|
||||
async close(): Promise<void> {
|
||||
this.closePromise ??= this.doClose();
|
||||
return await this.closePromise;
|
||||
}
|
||||
|
||||
/**
|
||||
* Verify connectivity to all configured servers.
|
||||
* Each server is connected to with a temporary connection, its tools are
|
||||
* listed, and the connection is closed — this does NOT affect the
|
||||
* long-lived connections used by `listTools()`.
|
||||
*
|
||||
* Never throws — returns a result object indicating success or per-server
|
||||
* errors so callers can handle partial failures gracefully.
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* const result = await client.verify();
|
||||
* if (!result.ok) {
|
||||
* console.error('MCP connection failed:', result.errors);
|
||||
* }
|
||||
* ```
|
||||
*/
|
||||
async verify(): Promise<McpVerifyResult> {
|
||||
if (this.configs.length === 0) {
|
||||
return { ok: true, servers: [] };
|
||||
}
|
||||
|
||||
const results = await Promise.allSettled(
|
||||
this.configs.map(async (cfg) => {
|
||||
const conn = new McpConnection(cfg);
|
||||
try {
|
||||
await conn.connect();
|
||||
const tools = await conn.listTools();
|
||||
return { name: cfg.name, tools: tools.length };
|
||||
} finally {
|
||||
await conn.disconnect().catch(() => {});
|
||||
}
|
||||
}),
|
||||
);
|
||||
|
||||
const errors: Array<{ server: string; error: string }> = [];
|
||||
const servers: Array<{ name: string; tools: number }> = [];
|
||||
|
||||
for (let i = 0; i < results.length; i++) {
|
||||
const result = results[i];
|
||||
if (result.status === 'rejected') {
|
||||
errors.push({
|
||||
server: this.configs[i].name,
|
||||
error: result.reason instanceof Error ? result.reason.message : String(result.reason),
|
||||
});
|
||||
} else {
|
||||
servers.push(result.value);
|
||||
}
|
||||
}
|
||||
|
||||
return errors.length > 0 ? { ok: false, errors } : { ok: true, servers };
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true when any configured server declares per-server approval
|
||||
* requirements (`requireApproval: true` or a non-empty `requireApproval`
|
||||
* string array). Does NOT require a network connection.
|
||||
*
|
||||
* Used by the Agent builder to validate checkpoint configuration before
|
||||
* attempting to connect.
|
||||
*/
|
||||
declaresApproval(): boolean {
|
||||
return this.connections.some((conn) => conn.declaresApproval());
|
||||
}
|
||||
|
||||
private async doListTools(): Promise<BuiltTool[]> {
|
||||
const connectedConnections: McpConnection[] = [];
|
||||
|
||||
const settled = await Promise.allSettled(
|
||||
this.connections.map(async (conn) => {
|
||||
await conn.connect();
|
||||
connectedConnections.push(conn);
|
||||
return await conn.listTools();
|
||||
}),
|
||||
);
|
||||
|
||||
const failed = settled
|
||||
.map((r, i) => ({ result: r, name: this.connections[i].name }))
|
||||
.filter((x) => x.result.status === 'rejected');
|
||||
|
||||
if (failed.length > 0) {
|
||||
await Promise.allSettled(connectedConnections.map(async (c) => await c.disconnect()));
|
||||
const details = failed
|
||||
.map((x) => {
|
||||
const reason =
|
||||
x.result.status === 'rejected'
|
||||
? x.result.reason instanceof Error
|
||||
? x.result.reason.message
|
||||
: String(x.result.reason)
|
||||
: '';
|
||||
return `${x.name}: ${reason}`;
|
||||
})
|
||||
.join('; ');
|
||||
throw new Error(`MCP connection failed — ${details}`);
|
||||
}
|
||||
|
||||
const tools = settled.flatMap((r) => (r.status === 'fulfilled' ? r.value : []));
|
||||
|
||||
const seen = new Set<string>();
|
||||
const duplicates: string[] = [];
|
||||
for (const tool of tools) {
|
||||
if (seen.has(tool.name)) {
|
||||
duplicates.push(tool.name);
|
||||
}
|
||||
seen.add(tool.name);
|
||||
}
|
||||
|
||||
if (duplicates.length > 0) {
|
||||
await Promise.allSettled(connectedConnections.map(async (c) => await c.disconnect()));
|
||||
throw new Error(
|
||||
`MCP tool name collision — the following tool names resolve to duplicates: ${duplicates.join(', ')}`,
|
||||
);
|
||||
}
|
||||
|
||||
return tools;
|
||||
}
|
||||
|
||||
private async doClose(): Promise<void> {
|
||||
await Promise.allSettled(
|
||||
this.connections.map(async (conn) => {
|
||||
try {
|
||||
await conn.disconnect();
|
||||
} catch (error) {
|
||||
console.error(`MCP disconnect error for server "${conn.name}":`, error);
|
||||
}
|
||||
}),
|
||||
);
|
||||
}
|
||||
}
|
||||
187
packages/@n8n/agents/src/sdk/memory.ts
Normal file
187
packages/@n8n/agents/src/sdk/memory.ts
Normal file
|
|
@ -0,0 +1,187 @@
|
|||
import type { z } from 'zod';
|
||||
|
||||
import { InMemoryMemory } from '../runtime/memory-store';
|
||||
import { templateFromSchema } from '../runtime/working-memory';
|
||||
import type {
|
||||
BuiltMemory,
|
||||
MemoryConfig,
|
||||
SemanticRecallConfig,
|
||||
TitleGenerationConfig,
|
||||
} from '../types';
|
||||
|
||||
type ZodObjectSchema = z.ZodObject<z.ZodRawShape>;
|
||||
|
||||
const DEFAULT_LAST_MESSAGES = 10;
|
||||
|
||||
/**
|
||||
* Builder for configuring conversation memory.
|
||||
*
|
||||
* Usage:
|
||||
* ```typescript
|
||||
* const memory = new Memory()
|
||||
* .storage('memory')
|
||||
* .lastMessages(20)
|
||||
* .freeform('# User Context\n- **Name**:\n- **City**:');
|
||||
*
|
||||
* agent.memory(memory);
|
||||
* ```
|
||||
*/
|
||||
export class Memory {
|
||||
private lastMessagesValue: number = DEFAULT_LAST_MESSAGES;
|
||||
|
||||
private semanticRecallConfig?: SemanticRecallConfig;
|
||||
|
||||
private workingMemorySchema?: ZodObjectSchema;
|
||||
|
||||
private workingMemoryTemplate?: string;
|
||||
|
||||
private workingMemoryScope: 'resource' | 'thread' = 'resource';
|
||||
|
||||
private memoryBackend?: BuiltMemory;
|
||||
|
||||
private titleGenerationConfig?: TitleGenerationConfig;
|
||||
|
||||
/** The configured number of recent messages to include. */
|
||||
get lastMessageCount(): number {
|
||||
return this.lastMessagesValue;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the storage backend for conversation history.
|
||||
*
|
||||
* - `'memory'` — in-process memory (default, lost on restart)
|
||||
* - A `BuiltMemory` instance — for persistent storage (e.g. SqliteMemory)
|
||||
*/
|
||||
storage(backend: 'memory' | BuiltMemory): this {
|
||||
if (backend === 'memory') {
|
||||
this.memoryBackend = undefined;
|
||||
} else {
|
||||
this.memoryBackend = backend;
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Set the number of recent messages to include in context. */
|
||||
lastMessages(count: number): this {
|
||||
this.lastMessagesValue = count;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Enable semantic recall (RAG-based retrieval of relevant past messages). */
|
||||
semanticRecall(config: SemanticRecallConfig): this {
|
||||
this.semanticRecallConfig = config;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Enable structured working memory with a Zod schema.
|
||||
* Mutually exclusive with `.freeform()`.
|
||||
*/
|
||||
structured(schema: ZodObjectSchema): this {
|
||||
this.workingMemorySchema = schema;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Enable free-form working memory with a markdown/text template.
|
||||
* Mutually exclusive with `.structured()`.
|
||||
*/
|
||||
freeform(template: string): this {
|
||||
this.workingMemoryTemplate = template;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the working memory scope.
|
||||
*
|
||||
* - `'resource'` (default) — working memory is shared across all threads for the same resource/user.
|
||||
* - `'thread'` — working memory is scoped to a single conversation thread.
|
||||
*/
|
||||
scope(s: 'resource' | 'thread'): this {
|
||||
this.workingMemoryScope = s;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Enable automatic title generation for new threads.
|
||||
*
|
||||
* - `true` — uses the agent's own model and default instructions.
|
||||
* - `{ model, instructions }` — custom model and/or custom instructions.
|
||||
*
|
||||
* Titles are generated once per thread (only when the thread has no title)
|
||||
* and run asynchronously so they never block the agent response.
|
||||
*/
|
||||
titleGeneration(config: boolean | TitleGenerationConfig): this {
|
||||
if (config === true) {
|
||||
this.titleGenerationConfig = {};
|
||||
} else if (config === false) {
|
||||
this.titleGenerationConfig = undefined;
|
||||
} else {
|
||||
this.titleGenerationConfig = config;
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate configuration and produce a `MemoryConfig`.
|
||||
*
|
||||
* @throws if both `.structured()` and `.freeform()` are used
|
||||
* @throws if `.freeform()` template is empty
|
||||
* @throws if `.semanticRecall()` is used with a backend that doesn't support search()
|
||||
*/
|
||||
build(): MemoryConfig {
|
||||
if (this.workingMemorySchema && this.workingMemoryTemplate !== undefined) {
|
||||
throw new Error(
|
||||
'Working memory cannot use both .structured() and .freeform(). ' +
|
||||
'Choose one: .structured(zodSchema) for typed state, or .freeform(template) for free-form text.',
|
||||
);
|
||||
}
|
||||
|
||||
if (this.workingMemoryTemplate !== undefined && this.workingMemoryTemplate.trim() === '') {
|
||||
throw new Error(
|
||||
'Free-form working memory template cannot be empty. ' +
|
||||
'Provide a markdown template with slots for the agent to fill.',
|
||||
);
|
||||
}
|
||||
|
||||
const memory: BuiltMemory = this.memoryBackend ?? new InMemoryMemory();
|
||||
|
||||
if (this.semanticRecallConfig) {
|
||||
if (!memory.queryEmbeddings && !memory.search) {
|
||||
throw new Error(
|
||||
'Semantic recall requires a storage backend with queryEmbeddings() or search() support.',
|
||||
);
|
||||
}
|
||||
if (!memory.search && !this.semanticRecallConfig.embedder) {
|
||||
throw new Error(
|
||||
'Semantic recall requires an embedder when using queryEmbeddings(). Add embedder to your semanticRecall config: ' +
|
||||
".semanticRecall({ topK: 5, embedder: 'openai/text-embedding-3-small' })",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let workingMemory: MemoryConfig['workingMemory'];
|
||||
if (this.workingMemorySchema) {
|
||||
workingMemory = {
|
||||
template: templateFromSchema(this.workingMemorySchema),
|
||||
structured: true,
|
||||
schema: this.workingMemorySchema,
|
||||
scope: this.workingMemoryScope,
|
||||
};
|
||||
} else if (this.workingMemoryTemplate !== undefined) {
|
||||
workingMemory = {
|
||||
template: this.workingMemoryTemplate,
|
||||
structured: false,
|
||||
scope: this.workingMemoryScope,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
memory,
|
||||
lastMessages: this.lastMessagesValue,
|
||||
workingMemory,
|
||||
semanticRecall: this.semanticRecallConfig,
|
||||
titleGeneration: this.titleGenerationConfig,
|
||||
};
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue