diff --git a/.agents/skills/response-compliance/SKILL.md b/.agents/skills/response-compliance/SKILL.md new file mode 100644 index 0000000000..da0b953052 --- /dev/null +++ b/.agents/skills/response-compliance/SKILL.md @@ -0,0 +1,87 @@ +--- +name: response-compliance +description: OpenResponses API compliance testing. Use when testing the Response API endpoint, running compliance tests, or debugging Response API schema issues. Triggers on 'compliance', 'response api test', 'openresponses test'. +--- + +# OpenResponses Compliance Test + +Run the official OpenResponses compliance test suite against the local (or remote) Response API endpoint. + +## Quick Start + +```bash +# From the openapi package directory +cd lobehub/packages/openapi + +# Run all tests (dev mode, localhost:3010) +APP_URL=http://localhost:3010 bun run test:response-compliance -- \ + --auth-header "lobe-auth-dev-backend-api" --no-bearer --api-key 1 + +# Run specific tests only +APP_URL=http://localhost:3010 bun run test:response-compliance -- \ + --auth-header "lobe-auth-dev-backend-api" --no-bearer --api-key 1 \ + --filter basic-response,streaming-response + +# Verbose mode (shows request/response details) +APP_URL=http://localhost:3010 bun run test:response-compliance -- \ + --auth-header "lobe-auth-dev-backend-api" --no-bearer --api-key 1 -v + +# JSON output (for CI) +APP_URL=http://localhost:3010 bun run test:response-compliance -- \ + --auth-header "lobe-auth-dev-backend-api" --no-bearer --api-key 1 --json +``` + +## Prerequisites + +- Dev server running with `ENABLE_MOCK_DEV_USER=true` in `.env` +- The `api/v1/responses` route registered (via `src/app/(backend)/api/v1/[[...route]]/route.ts`) + +## Auth Modes + +| Mode | Flags | +| --------------- | ------------------------------------------------------------------- | +| Dev (mock user) | `--auth-header "lobe-auth-dev-backend-api" --no-bearer --api-key 1` | +| API Key | `--api-key lb-xxxxxxxxxxxxxxxx` | +| Custom | `--auth-header --api-key ` | + +## Test IDs + +Available `--filter` values: + +| ID | Description | Related Issue | +| -------------------- | -------------------------------------- | ------------- | +| `basic-response` | Simple text generation (non-streaming) | LOBE-5858 | +| `streaming-response` | SSE streaming lifecycle + events | LOBE-5859 | +| `system-prompt` | System role message handling | LOBE-5858 | +| `tool-calling` | Function tool definition + call output | LOBE-5860 | +| `image-input` | Multimodal image URL content | — | +| `multi-turn` | Conversation history via input items | LOBE-5861 | + +## Environment Variables + +| Variable | Default | Description | +| --------- | ----------------------- | ----------------------------------------- | +| `APP_URL` | `http://localhost:3010` | Server base URL (auto-appends `/api/v1`) | +| `API_KEY` | — | API key (alternative to `--api-key` flag) | + +## How It Works + +The script (`lobehub/packages/openapi/scripts/compliance-test.sh`) clones the official [openresponses/openresponses](https://github.com/openresponses/openresponses) repo into `scripts/openresponses-compliance/` (gitignored) and runs its CLI test runner. First run clones; subsequent runs update from upstream. + +## Debugging Failures + +1. Run with `-v` to see full request/response payloads +2. Common failure patterns: + - **"Failed to parse JSON"**: Auth failed, server returned HTML redirect + - **"Response has no output items"**: LLM execution not yet implemented + - **"Expected number, received null"**: Missing required field in response schema + - **"Invalid input"**: Zod validation on response schema — check field format + +## Key Files + +- **Types**: `lobehub/packages/openapi/src/types/responses.type.ts` +- **Service**: `lobehub/packages/openapi/src/services/responses.service.ts` +- **Controller**: `lobehub/packages/openapi/src/controllers/responses.controller.ts` +- **Route**: `lobehub/packages/openapi/src/routes/responses.route.ts` +- **Test script**: `lobehub/packages/openapi/scripts/compliance-test.sh` +- **Cloud route**: `src/app/(backend)/api/v1/[[...route]]/route.ts` diff --git a/packages/openapi/package.json b/packages/openapi/package.json index be50ca769c..6cd847f586 100644 --- a/packages/openapi/package.json +++ b/packages/openapi/package.json @@ -3,6 +3,9 @@ "version": "1.0.0", "private": true, "main": "./src/index.ts", + "scripts": { + "test:response-compliance": "./scripts/compliance-test.sh" + }, "dependencies": { "@hono/zod-validator": "^0.7.6", "@lobechat/model-runtime": "workspace:*", diff --git a/packages/openapi/scripts/.gitignore b/packages/openapi/scripts/.gitignore new file mode 100644 index 0000000000..6551b3b40e --- /dev/null +++ b/packages/openapi/scripts/.gitignore @@ -0,0 +1 @@ +openresponses-compliance/ diff --git a/packages/openapi/scripts/compliance-test.sh b/packages/openapi/scripts/compliance-test.sh new file mode 100755 index 0000000000..00d1d6b36e --- /dev/null +++ b/packages/openapi/scripts/compliance-test.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash +# OpenResponses Compliance Test Runner +# Clones the official test suite locally and runs it against a local server. +# +# Usage: +# ./scripts/compliance-test.sh # uses APP_URL env var +# ./scripts/compliance-test.sh --filter basic-response # filter tests +# ./scripts/compliance-test.sh --base-url http://localhost:3010/api/v1 --api-key +# +# Environment variables: +# APP_URL - App base URL (default: http://localhost:3010), auto-appends /api/v1 +# API_KEY - API key for authentication +# +# All flags are forwarded to the upstream CLI (run with --help to see all options). + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +REPO_URL="https://github.com/openresponses/openresponses.git" +CACHE_DIR="$SCRIPT_DIR/openresponses-compliance" +BRANCH="main" + +# Clone or update the test suite +if [ -d "$CACHE_DIR/.git" ]; then + echo "Updating cached test suite..." + git -C "$CACHE_DIR" fetch --depth 1 origin "$BRANCH" --quiet + git -C "$CACHE_DIR" checkout FETCH_HEAD --quiet +else + echo "Cloning test suite (one-time)..." + git clone --depth 1 --branch "$BRANCH" "$REPO_URL" "$CACHE_DIR" --quiet +fi + +# Install deps (bun is fast, skips if lock unchanged) +cd "$CACHE_DIR" +bun install --frozen-lockfile --silent 2>/dev/null || bun install --silent + +# Build default args from env vars (can be overridden by explicit flags) +DEFAULT_ARGS=() + +# Auto-detect --base-url from APP_URL if not explicitly provided +if ! echo "$@" | grep -q -- "--base-url\|-u"; then + BASE_URL="${APP_URL:-http://localhost:3010}/api/v1" + DEFAULT_ARGS+=(--base-url "$BASE_URL") +fi + +# Auto-detect --api-key from API_KEY if not explicitly provided +if ! echo "$@" | grep -q -- "--api-key\|-k"; then + if [ -n "${API_KEY:-}" ]; then + DEFAULT_ARGS+=(--api-key "$API_KEY") + fi +fi + +# Run the compliance test CLI +exec bun run bin/compliance-test.ts "${DEFAULT_ARGS[@]}" "$@" diff --git a/packages/openapi/src/controllers/responses.controller.ts b/packages/openapi/src/controllers/responses.controller.ts new file mode 100644 index 0000000000..13b083778a --- /dev/null +++ b/packages/openapi/src/controllers/responses.controller.ts @@ -0,0 +1,51 @@ +import type { Context } from 'hono'; +import { streamSSE } from 'hono/streaming'; + +import { BaseController } from '../common/base.controller'; +import { ResponsesService } from '../services/responses.service'; +import type { CreateResponseRequest } from '../types/responses.type'; + +/** + * Responses Controller + * Handles OpenResponses API requests + */ +export class ResponsesController extends BaseController { + /** + * POST /api/v1/responses + * Create a model response (streaming or non-streaming) + */ + async createResponse(c: Context): Promise { + try { + const body = await this.getBody(c); + const userId = this.getUserId(c); + const db = await this.getDatabase(); + const service = new ResponsesService(db, userId); + + if (body.stream) { + return this.handleStreamingResponse(c, service, body); + } + + const response = await service.createResponse(body); + return c.json(response); + } catch (error) { + return this.handleError(c, error); + } + } + + private handleStreamingResponse( + c: Context, + service: ResponsesService, + params: CreateResponseRequest, + ): Response { + return streamSSE(c, async (stream) => { + const generator = service.createStreamingResponse(params); + + for await (const event of generator) { + await stream.writeSSE({ + data: JSON.stringify(event), + event: event.type, + }); + } + }); + } +} diff --git a/packages/openapi/src/routes/index.ts b/packages/openapi/src/routes/index.ts index 5504299fbe..b97f6112d2 100644 --- a/packages/openapi/src/routes/index.ts +++ b/packages/openapi/src/routes/index.ts @@ -7,6 +7,7 @@ import MessagesRoutes from './messages.route'; import ModelsRoutes from './models.route'; import PermissionsRoutes from './permissions.route'; import ProvidersRoutes from './providers.route'; +import ResponsesRoutes from './responses.route'; import RolesRoutes from './roles.route'; import TopicsRoutes from './topics.route'; import UsersRoutes from './users.route'; @@ -21,6 +22,7 @@ export default { 'models': ModelsRoutes, 'permissions': PermissionsRoutes, 'providers': ProvidersRoutes, + 'responses': ResponsesRoutes, 'roles': RolesRoutes, 'topics': TopicsRoutes, 'users': UsersRoutes, diff --git a/packages/openapi/src/routes/responses.route.ts b/packages/openapi/src/routes/responses.route.ts new file mode 100644 index 0000000000..69c33330e8 --- /dev/null +++ b/packages/openapi/src/routes/responses.route.ts @@ -0,0 +1,24 @@ +import { zValidator } from '@hono/zod-validator'; +import { Hono } from 'hono'; + +import { ResponsesController } from '../controllers/responses.controller'; +import { requireAuth } from '../middleware/auth'; +import { CreateResponseRequestSchema } from '../types/responses.type'; + +const ResponsesRoutes = new Hono(); + +/** + * POST /api/v1/responses + * Create a model response (OpenResponses protocol) + */ +ResponsesRoutes.post( + '/', + requireAuth, + zValidator('json', CreateResponseRequestSchema), + async (c) => { + const controller = new ResponsesController(); + return await controller.createResponse(c); + }, +); + +export default ResponsesRoutes; diff --git a/packages/openapi/src/services/responses.service.ts b/packages/openapi/src/services/responses.service.ts new file mode 100644 index 0000000000..82e3a99c21 --- /dev/null +++ b/packages/openapi/src/services/responses.service.ts @@ -0,0 +1,622 @@ +import type { AgentState } from '@lobechat/agent-runtime'; + +import { InMemoryStreamEventManager } from '@/server/modules/AgentRuntime/InMemoryStreamEventManager'; +import type { + StreamChunkData, + StreamEvent, +} from '@/server/modules/AgentRuntime/StreamEventManager'; +import { AgentRuntimeService } from '@/server/services/agentRuntime'; +import { AiAgentService } from '@/server/services/aiAgent'; + +import { BaseService } from '../common/base.service'; +import type { + CreateResponseRequest, + InputItem, + OutputItem, + ResponseObject, + ResponseStreamEvent, + ResponseUsage, +} from '../types/responses.type'; + +/** + * Response API Service + * Handles OpenResponses protocol request execution via AiAgentService.execAgent + * + * The `model` field is treated as an agent slug. + * Execution is delegated to execAgent (background mode), + * with executeSync used when synchronous results are needed. + */ +export class ResponsesService extends BaseService { + /** + * Extract a prompt string from OpenResponses input + */ + private extractPrompt(input: string | InputItem[]): string { + if (typeof input === 'string') return input; + + // Find the last user message + for (let i = input.length - 1; i >= 0; i--) { + const item = input[i]; + if (item.type === 'message' && item.role === 'user') { + if (typeof item.content === 'string') return item.content; + return item.content + .map((part) => { + if (part.type === 'input_text') return part.text; + return ''; + }) + .filter(Boolean) + .join(''); + } + } + + return ''; + } + + /** + * Extract system/developer instructions from input items + * These are concatenated and used as additional system prompt + */ + private extractInputInstructions(input: string | InputItem[]): string { + if (typeof input === 'string') return ''; + + const parts: string[] = []; + for (const item of input) { + if (item.type === 'message' && (item.role === 'system' || item.role === 'developer')) { + if (typeof item.content === 'string') { + parts.push(item.content); + } else { + const text = item.content + .map((part) => { + if (part.type === 'input_text') return part.text; + return ''; + }) + .filter(Boolean) + .join(''); + if (text) parts.push(text); + } + } + } + + return parts.join('\n\n'); + } + + /** + * Build combined instructions from request params and input items + */ + private buildInstructions(params: CreateResponseRequest): string | undefined { + const inputInstructions = this.extractInputInstructions(params.input); + const requestInstructions = params.instructions ?? ''; + + const combined = [inputInstructions, requestInstructions].filter(Boolean).join('\n\n'); + return combined || undefined; + } + + /** + * Extract assistant content from AgentState after execution + */ + private extractAssistantContent(state: AgentState): string { + if (!state.messages?.length) return ''; + + for (let i = state.messages.length - 1; i >= 0; i--) { + const msg = state.messages[i]; + if (msg.role === 'assistant' && msg.content) { + return typeof msg.content === 'string' ? msg.content : ''; + } + } + + return ''; + } + + /** + * Extract full output items from AgentState messages, including tool calls. + * Converts assistant tool_calls → function_call items, + * tool result messages → function_call_output items, + * and final assistant message → message item. + */ + private extractOutputItems( + state: AgentState, + responseId: string, + ): { output: OutputItem[]; outputText: string } { + if (!state.messages?.length) return { output: [], outputText: '' }; + + const output: OutputItem[] = []; + let outputText = ''; + let itemCounter = 0; + + // Skip system messages; process assistant and tool messages in order + for (const msg of state.messages) { + if (msg.role === 'assistant') { + const hasToolCalls = msg.tool_calls && msg.tool_calls.length > 0; + + // Handle tool_calls from assistant + if (hasToolCalls) { + for (const toolCall of msg.tool_calls) { + output.push({ + arguments: toolCall.function?.arguments ?? '{}', + call_id: toolCall.id ?? `call_${itemCounter}`, + id: `fc_${responseId.slice(5)}_${itemCounter++}`, + name: toolCall.function?.name ?? '', + status: 'completed' as const, + type: 'function_call' as const, + }); + } + } + + // Only emit message item for assistant messages WITHOUT tool_calls (i.e., final text response) + if (!hasToolCalls) { + const content = typeof msg.content === 'string' ? msg.content : ''; + if (content) { + outputText = content; + output.push({ + content: [ + { annotations: [], logprobs: [], text: content, type: 'output_text' as const }, + ], + id: `msg_${responseId.slice(5)}_${itemCounter++}`, + role: 'assistant' as const, + status: 'completed' as const, + type: 'message' as const, + }); + } + } + } else if (msg.role === 'tool') { + output.push({ + call_id: msg.tool_call_id ?? '', + id: `fco_${responseId.slice(5)}_${itemCounter++}`, + output: typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content), + status: 'completed' as const, + type: 'function_call_output' as const, + }); + } + } + + return { output, outputText }; + } + + /** + * Extract usage from AgentState + */ + private extractUsage(state: AgentState): ResponseUsage { + const tokens = state.usage?.llm?.tokens; + return { + input_tokens: tokens?.input ?? 0, + output_tokens: tokens?.output ?? 0, + total_tokens: tokens?.total ?? 0, + }; + } + + /** + * Create a response (non-streaming) + * Calls execAgent with autoStart: false, then executeSync to wait for completion + */ + async createResponse(params: CreateResponseRequest): Promise { + const createdAt = Math.floor(Date.now() / 1000); + + try { + const slug = params.model; + const prompt = this.extractPrompt(params.input); + const instructions = this.buildInstructions(params); + + // Resolve topicId from previous_response_id for multi-turn + const previousTopicId = params.previous_response_id + ? this.extractTopicIdFromResponseId(params.previous_response_id) + : null; + + this.log('info', 'Creating response via execAgent', { + hasInstructions: !!instructions, + previousTopicId, + prompt: prompt.slice(0, 50), + slug, + }); + + // 1. Create agent operation without auto-start + const aiAgentService = new AiAgentService(this.db, this.userId); + const execResult = await aiAgentService.execAgent({ + appContext: previousTopicId ? { topicId: previousTopicId } : undefined, + autoStart: false, + instructions, + prompt, + slug, + stream: false, + }); + + if (!execResult.success) { + throw new Error(execResult.error || 'Failed to create agent operation'); + } + + // Generate response ID encoding topicId for multi-turn support + const responseId = this.generateResponseId(execResult.topicId); + + // 2. Execute synchronously to completion + const agentRuntimeService = new AgentRuntimeService(this.db, this.userId, { + queueService: null, + }); + const finalState = await agentRuntimeService.executeSync(execResult.operationId); + + // 3. Extract results from final state + const { output, outputText } = this.extractOutputItems(finalState, responseId); + const usage = this.extractUsage(finalState); + + return this.buildResponseObject({ + completedAt: Math.floor(Date.now() / 1000), + createdAt, + id: responseId, + output, + outputText, + params, + status: finalState.status === 'error' ? 'failed' : 'completed', + usage, + }); + } catch (error) { + const errorResponseId = this.generateResponseId(); + this.log('error', 'Response creation failed', { error, responseId: errorResponseId }); + + return this.buildResponseObject({ + createdAt, + error: { + code: 'server_error', + message: error instanceof Error ? error.message : 'Unknown error', + }, + id: errorResponseId, + output: [], + outputText: '', + params, + status: 'failed', + }); + } + } + + /** + * Create a streaming response with real token-level streaming + * Subscribes to Agent Runtime stream events and converts to OpenResponses SSE events + */ + async *createStreamingResponse( + params: CreateResponseRequest, + ): AsyncGenerator { + const createdAt = Math.floor(Date.now() / 1000); + let sequenceNumber = 0; + const outputIndex = 0; + const contentIndex = 0; + + try { + const slug = params.model; + const prompt = this.extractPrompt(params.input); + const instructions = this.buildInstructions(params); + + // Resolve topicId from previous_response_id for multi-turn + const previousTopicId = params.previous_response_id + ? this.extractTopicIdFromResponseId(params.previous_response_id) + : null; + + // 1. Create agent operation (before generating responseId so we have topicId) + const aiAgentService = new AiAgentService(this.db, this.userId); + const execResult = await aiAgentService.execAgent({ + appContext: previousTopicId ? { topicId: previousTopicId } : undefined, + autoStart: false, + instructions, + prompt, + slug, + stream: true, + }); + + if (!execResult.success) { + throw new Error(execResult.error || 'Failed to create agent operation'); + } + + const operationId = execResult.operationId; + + // Generate response ID encoding topicId for multi-turn support + const responseId = this.generateResponseId(execResult.topicId); + const outputItemId = `msg_${responseId.slice(5)}`; + + const response = this.buildResponseObject({ + createdAt, + id: responseId, + output: [], + outputText: '', + params, + status: 'in_progress', + }); + + // Emit response.created + response.in_progress + yield { response, sequence_number: sequenceNumber++, type: 'response.created' as const }; + yield { + response, + sequence_number: sequenceNumber++, + type: 'response.in_progress' as const, + }; + + // 2. Create AgentRuntimeService with custom stream manager for event subscription + const streamEventManager = new InMemoryStreamEventManager(); + const agentRuntimeService = new AgentRuntimeService(this.db, this.userId, { + queueService: null, + streamEventManager, + }); + + // 3. Setup async event queue to bridge push events → pull-based generator + const eventQueue: StreamEvent[] = []; + let resolveWaiting: (() => void) | null = null; + let executionDone = false; + + const unsubscribe = streamEventManager.subscribe(operationId, (events) => { + eventQueue.push(...events); + if (resolveWaiting) { + resolveWaiting(); + resolveWaiting = null; + } + }); + + // Helper to wait for next event batch + const waitForEvents = (): Promise => + new Promise((resolve) => { + if (eventQueue.length > 0 || executionDone) { + resolve(); + } else { + resolveWaiting = resolve; + } + }); + + // 4. Start execution in background + let finalState: AgentState | undefined; + const executionPromise = agentRuntimeService + .executeSync(operationId) + .then((state) => { + finalState = state; + }) + .catch((err) => { + finalState = { status: 'error' } as AgentState; + this.log('error', 'Streaming execution failed', { error: err, responseId }); + }) + .finally(() => { + executionDone = true; + if (resolveWaiting) { + resolveWaiting(); + resolveWaiting = null; + } + }); + + // 5. Emit output_item.added + content_part.added immediately + const outputItem: OutputItem = { + content: [{ annotations: [], logprobs: [], text: '', type: 'output_text' as const }], + id: outputItemId, + role: 'assistant' as const, + status: 'in_progress' as const, + type: 'message' as const, + }; + + yield { + item: outputItem, + output_index: outputIndex, + sequence_number: sequenceNumber++, + type: 'response.output_item.added' as const, + }; + yield { + content_index: contentIndex, + item_id: outputItemId, + output_index: outputIndex, + part: { annotations: [], logprobs: [], text: '', type: 'output_text' as const }, + sequence_number: sequenceNumber++, + type: 'response.content_part.added' as const, + }; + + // 6. Process stream events and emit text deltas + let accumulatedText = ''; + + while (!executionDone || eventQueue.length > 0) { + await waitForEvents(); + + while (eventQueue.length > 0) { + const event = eventQueue.shift()!; + + if (event.type === 'stream_chunk') { + const chunk = event.data as StreamChunkData; + if (chunk.chunkType === 'text' && chunk.content) { + accumulatedText += chunk.content; + yield { + content_index: contentIndex, + delta: chunk.content, + item_id: outputItemId, + logprobs: [], + output_index: outputIndex, + sequence_number: sequenceNumber++, + type: 'response.output_text.delta' as const, + }; + } + } + } + } + + // 7. Wait for execution to fully complete + await executionPromise; + unsubscribe(); + + // If no text came through streaming, extract from final state + if (!accumulatedText && finalState) { + accumulatedText = this.extractAssistantContent(finalState); + } + + const usage = finalState + ? this.extractUsage(finalState) + : { input_tokens: 0, output_tokens: 0, total_tokens: 0 }; + + // 8. Emit closing events for text content + yield { + content_index: contentIndex, + item_id: outputItemId, + logprobs: [], + output_index: outputIndex, + sequence_number: sequenceNumber++, + text: accumulatedText, + type: 'response.output_text.done' as const, + }; + + yield { + content_index: contentIndex, + item_id: outputItemId, + output_index: outputIndex, + part: { + annotations: [], + logprobs: [], + text: accumulatedText, + type: 'output_text' as const, + }, + sequence_number: sequenceNumber++, + type: 'response.content_part.done' as const, + }; + + const completedItem: OutputItem = { + content: [ + { annotations: [], logprobs: [], text: accumulatedText, type: 'output_text' as const }, + ], + id: outputItemId, + role: 'assistant' as const, + status: 'completed' as const, + type: 'message' as const, + }; + + yield { + item: completedItem, + output_index: outputIndex, + sequence_number: sequenceNumber++, + type: 'response.output_item.done' as const, + }; + + // 9. Build final output including tool calls from AgentState + const fullOutput = finalState + ? this.extractOutputItems(finalState, responseId) + : { output: [completedItem], outputText: accumulatedText }; + + yield { + response: { + ...response, + completed_at: Math.floor(Date.now() / 1000), + output: fullOutput.output, + output_text: fullOutput.outputText || accumulatedText, + status: (finalState?.status === 'error' ? 'failed' : 'completed') as any, + usage: { + input_tokens: usage.input_tokens, + input_tokens_details: { cached_tokens: 0 }, + output_tokens: usage.output_tokens, + output_tokens_details: { reasoning_tokens: 0 }, + total_tokens: usage.total_tokens, + }, + }, + sequence_number: sequenceNumber, + type: 'response.completed' as const, + }; + } catch (error) { + const errorResponseId = this.generateResponseId(); + this.log('error', 'Streaming response failed', { error, responseId: errorResponseId }); + + const errorResponse = this.buildResponseObject({ + createdAt, + error: { + code: 'server_error', + message: error instanceof Error ? error.message : 'Unknown error', + }, + id: errorResponseId, + output: [], + outputText: '', + params, + status: 'failed', + }); + + yield { + response: errorResponse, + sequence_number: sequenceNumber, + type: 'response.failed' as const, + }; + } + } + + /** + * Generate a response ID that encodes the topicId for multi-turn support. + * Format: resp_{topicId}_{8-char-random-suffix} + * When no topicId is available, generates a plain random ID. + */ + private generateResponseId(topicId?: string): string { + const chars = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'; + let suffix = ''; + for (let i = 0; i < 8; i++) { + suffix += chars[Math.floor(Math.random() * chars.length)]; + } + if (topicId) { + return `resp_${topicId}_${suffix}`; + } + // Fallback: plain 24-char random ID + let id = ''; + for (let i = 0; i < 16; i++) { + id += chars[Math.floor(Math.random() * chars.length)]; + } + return `resp_${id}_${suffix}`; + } + + /** + * Extract topicId from a response ID (previous_response_id). + * Reverses the encoding from generateResponseId. + */ + private extractTopicIdFromResponseId(responseId: string): string | null { + if (!responseId.startsWith('resp_')) return null; + const withoutPrefix = responseId.slice(5); // remove "resp_" + const lastUnderscore = withoutPrefix.lastIndexOf('_'); + if (lastUnderscore === -1) return null; + const topicId = withoutPrefix.slice(0, lastUnderscore); + return topicId || null; + } + + private buildResponseObject(opts: { + completedAt?: number | null; + createdAt: number; + error?: { code: 'server_error'; message: string }; + id: string; + output: OutputItem[]; + outputText: string; + params: CreateResponseRequest; + status: ResponseObject['status']; + usage?: ResponseUsage; + }): ResponseObject { + const p = opts.params as Record; + return { + background: p.background ?? false, + completed_at: opts.completedAt ?? null, + created_at: opts.createdAt, + error: opts.error ?? null, + frequency_penalty: p.frequency_penalty ?? 0, + id: opts.id, + incomplete_details: null, + instructions: opts.params.instructions ?? null, + max_output_tokens: opts.params.max_output_tokens ?? null, + max_tool_calls: p.max_tool_calls ?? null, + metadata: opts.params.metadata ?? {}, + model: opts.params.model, + object: 'response', + output: opts.output, + output_text: opts.outputText, + parallel_tool_calls: opts.params.parallel_tool_calls ?? true, + presence_penalty: p.presence_penalty ?? 0, + previous_response_id: opts.params.previous_response_id ?? null, + prompt_cache_key: p.prompt_cache_key ?? null, + reasoning: opts.params.reasoning ?? null, + safety_identifier: p.safety_identifier ?? null, + service_tier: p.service_tier ?? 'default', + status: opts.status, + store: p.store ?? true, + temperature: opts.params.temperature ?? 1, + text: { format: { type: 'text' } }, + tool_choice: opts.params.tool_choice ?? 'auto', + tools: opts.params.tools?.map((t: any) => ({ ...t, strict: t.strict ?? null })) ?? [], + top_logprobs: p.top_logprobs ?? 0, + top_p: opts.params.top_p ?? 1, + truncation: + opts.params.truncation && typeof opts.params.truncation === 'object' + ? opts.params.truncation.type + : (opts.params.truncation ?? 'disabled'), + usage: { + input_tokens: opts.usage?.input_tokens ?? 0, + input_tokens_details: { cached_tokens: 0 }, + output_tokens: opts.usage?.output_tokens ?? 0, + output_tokens_details: { reasoning_tokens: 0 }, + total_tokens: opts.usage?.total_tokens ?? 0, + }, + user: opts.params.user ?? null, + } as any; + } +} diff --git a/packages/openapi/src/types/index.ts b/packages/openapi/src/types/index.ts index 01e3ac469f..7b98475f30 100644 --- a/packages/openapi/src/types/index.ts +++ b/packages/openapi/src/types/index.ts @@ -65,6 +65,7 @@ export * from './message-translations.type'; export * from './model.type'; export * from './permission.type'; export * from './provider.type'; +export * from './responses.type'; export * from './role.type'; export * from './topic.type'; export * from './user.type'; diff --git a/packages/openapi/src/types/responses.type.ts b/packages/openapi/src/types/responses.type.ts new file mode 100644 index 0000000000..9fb9128229 --- /dev/null +++ b/packages/openapi/src/types/responses.type.ts @@ -0,0 +1,372 @@ +import { z } from 'zod'; + +// ==================== Content Types ==================== + +export const InputTextContentSchema = z.object({ + text: z.string(), + type: z.literal('input_text'), +}); +export type InputTextContent = z.infer; + +export const OutputTextContentSchema = z.object({ + annotations: z.array(z.any()).optional(), + logprobs: z.array(z.any()).optional(), + text: z.string(), + type: z.literal('output_text'), +}); +export type OutputTextContent = z.infer; + +export const InputImageContentSchema = z.object({ + detail: z.enum(['auto', 'low', 'high']).optional(), + image_url: z.string().optional(), + type: z.literal('input_image'), +}); +export type InputImageContent = z.infer; + +export const InputFileContentSchema = z.object({ + file_data: z.string().optional(), + file_id: z.string().optional(), + filename: z.string().optional(), + type: z.literal('input_file'), +}); +export type InputFileContent = z.infer; + +export const ContentPartSchema = z.discriminatedUnion('type', [ + InputTextContentSchema, + OutputTextContentSchema, + InputImageContentSchema, + InputFileContentSchema, +]); +export type ContentPart = z.infer; + +export const InputContentPartSchema = z.discriminatedUnion('type', [ + InputTextContentSchema, + InputImageContentSchema, + InputFileContentSchema, +]); +export type InputContentPart = z.infer; + +// ==================== Item Types ==================== + +export const MessageItemSchema = z + .object({ + content: z.union([z.string(), z.array(ContentPartSchema)]), + id: z.string().optional(), + role: z.enum(['user', 'assistant', 'system', 'developer']), + status: z.enum(['completed', 'in_progress']).optional(), + type: z.literal('message'), + }) + .passthrough(); +export type MessageItem = z.infer; + +export const FunctionCallItemSchema = z.object({ + arguments: z.string(), + call_id: z.string().optional(), + id: z.string().optional(), + name: z.string(), + status: z.enum(['completed', 'in_progress']).optional(), + type: z.literal('function_call'), +}); +export type FunctionCallItem = z.infer; + +export const FunctionCallOutputItemSchema = z.object({ + call_id: z.string(), + id: z.string().optional(), + output: z.string(), + status: z.enum(['completed', 'in_progress', 'incomplete']).optional(), + type: z.literal('function_call_output'), +}); +export type FunctionCallOutputItem = z.infer; + +export const ReasoningItemSchema = z.object({ + id: z.string().optional(), + reasoning_summary: z + .array(z.object({ text: z.string(), type: z.literal('summary_text') })) + .optional(), + type: z.literal('reasoning'), +}); +export type ReasoningItem = z.infer; + +export const OutputItemSchema = z.discriminatedUnion('type', [ + MessageItemSchema, + FunctionCallItemSchema, + FunctionCallOutputItemSchema, + ReasoningItemSchema, +]); +export type OutputItem = z.infer; + +// Input items: what the user can send +export const InputItemSchema = z.discriminatedUnion('type', [ + MessageItemSchema, + FunctionCallItemSchema, + FunctionCallOutputItemSchema, +]); +export type InputItem = z.infer; + +// ==================== Tool Types ==================== + +export const FunctionToolSchema = z.object({ + description: z.string().optional(), + name: z.string(), + parameters: z.record(z.any()).optional(), + strict: z.boolean().optional(), + type: z.literal('function'), +}); +export type FunctionTool = z.infer; + +export const ToolSchema = FunctionToolSchema; +export type Tool = z.infer; + +// ==================== Usage Types ==================== + +export interface ResponseUsage { + input_tokens: number; + input_tokens_details?: { + cached_tokens?: number; + }; + output_tokens: number; + output_tokens_details?: { + reasoning_tokens?: number; + }; + total_tokens: number; +} + +// ==================== Error Types ==================== + +export type ResponseErrorCode = + | 'server_error' + | 'invalid_request_error' + | 'not_found' + | 'model_error' + | 'rate_limit_error'; + +export interface ResponseError { + code: ResponseErrorCode; + message: string; +} + +// ==================== Response Status ==================== + +export type ResponseStatus = 'completed' | 'failed' | 'in_progress' | 'incomplete' | 'queued'; + +// ==================== Truncation ==================== + +export const TruncationSchema = z.object({ + type: z.enum(['auto', 'disabled']).default('disabled'), +}); +export type Truncation = z.infer; + +// ==================== Reasoning ==================== + +export const ReasoningConfigSchema = z.object({ + effort: z.enum(['low', 'medium', 'high']).optional(), +}); +export type ReasoningConfig = z.infer; + +// ==================== Request Schema ==================== + +export const CreateResponseRequestSchema = z + .object({ + input: z.union([z.string(), z.array(InputItemSchema)]), + instructions: z.string().nullish(), + max_output_tokens: z.number().int().positive().nullish(), + metadata: z.record(z.string()).nullish(), + model: z.string(), + parallel_tool_calls: z.boolean().nullish(), + previous_response_id: z.string().nullish(), + reasoning: ReasoningConfigSchema.nullish(), + stream: z.boolean().nullish(), + temperature: z.number().min(0).max(2).nullish(), + tool_choice: z + .union([ + z.enum(['auto', 'required', 'none']), + z.object({ name: z.string(), type: z.literal('function') }), + ]) + .nullish(), + tools: z.array(ToolSchema).nullish(), + top_p: z.number().min(0).max(1).nullish(), + truncation: TruncationSchema.nullish(), + user: z.string().nullish(), + }) + .passthrough(); + +export type CreateResponseRequest = z.infer; + +// ==================== Response Object ==================== + +export interface ResponseObject { + background?: boolean | null; + completed_at?: number | null; + created_at: number; + error?: ResponseError | null; + frequency_penalty?: number | null; + id: string; + incomplete_details?: { reason: string } | null; + instructions?: string | null; + max_output_tokens?: number | null; + metadata?: Record | null; + model: string; + object: 'response'; + output: OutputItem[]; + output_text: string; + parallel_tool_calls?: boolean | null; + presence_penalty?: number | null; + previous_response_id?: string | null; + reasoning?: ReasoningConfig | null; + service_tier?: string | null; + status: ResponseStatus; + store?: boolean | null; + temperature?: number | null; + text?: { format?: { type: string } }; + tool_choice?: CreateResponseRequest['tool_choice']; + tools?: Tool[]; + top_p?: number | null; + truncation?: string | null; + usage?: ResponseUsage | null; + user?: string | null; +} + +// ==================== SSE Event Types ==================== + +export type ResponseStreamEventType = + | 'response.created' + | 'response.in_progress' + | 'response.completed' + | 'response.failed' + | 'response.incomplete' + // Output item events + | 'response.output_item.added' + | 'response.output_item.done' + // Content part events + | 'response.content_part.added' + | 'response.content_part.done' + // Text delta events + | 'response.output_text.delta' + | 'response.output_text.done' + // Function call events + | 'response.function_call_arguments.delta' + | 'response.function_call_arguments.done' + // Reasoning events + | 'response.reasoning_summary_text.delta' + | 'response.reasoning_summary_text.done'; + +export interface BaseStreamEvent { + sequence_number: number; + type: ResponseStreamEventType; +} + +export interface ResponseCreatedEvent extends BaseStreamEvent { + response: ResponseObject; + type: 'response.created'; +} + +export interface ResponseInProgressEvent extends BaseStreamEvent { + response: ResponseObject; + type: 'response.in_progress'; +} + +export interface ResponseCompletedEvent extends BaseStreamEvent { + response: ResponseObject; + type: 'response.completed'; +} + +export interface ResponseFailedEvent extends BaseStreamEvent { + response: ResponseObject; + type: 'response.failed'; +} + +export interface ResponseIncompleteEvent extends BaseStreamEvent { + response: ResponseObject; + type: 'response.incomplete'; +} + +export interface OutputItemAddedEvent extends BaseStreamEvent { + item: OutputItem; + output_index: number; + type: 'response.output_item.added'; +} + +export interface OutputItemDoneEvent extends BaseStreamEvent { + item: OutputItem; + output_index: number; + type: 'response.output_item.done'; +} + +export interface ContentPartAddedEvent extends BaseStreamEvent { + content_index: number; + item_id: string; + output_index: number; + part: ContentPart; + type: 'response.content_part.added'; +} + +export interface ContentPartDoneEvent extends BaseStreamEvent { + content_index: number; + item_id: string; + output_index: number; + part: ContentPart; + type: 'response.content_part.done'; +} + +export interface OutputTextDeltaEvent extends BaseStreamEvent { + content_index: number; + delta: string; + item_id: string; + logprobs: any[]; + output_index: number; + type: 'response.output_text.delta'; +} + +export interface OutputTextDoneEvent extends BaseStreamEvent { + content_index: number; + item_id: string; + logprobs: any[]; + output_index: number; + text: string; + type: 'response.output_text.done'; +} + +export interface FunctionCallArgumentsDeltaEvent extends BaseStreamEvent { + delta: string; + item_id: string; + output_index: number; + type: 'response.function_call_arguments.delta'; +} + +export interface FunctionCallArgumentsDoneEvent extends BaseStreamEvent { + arguments: string; + item_id: string; + output_index: number; + type: 'response.function_call_arguments.done'; +} + +export interface ReasoningSummaryTextDeltaEvent extends BaseStreamEvent { + delta: string; + item_id: string; + output_index: number; + type: 'response.reasoning_summary_text.delta'; +} + +export interface ReasoningSummaryTextDoneEvent extends BaseStreamEvent { + item_id: string; + output_index: number; + text: string; + type: 'response.reasoning_summary_text.done'; +} + +export type ResponseStreamEvent = + | ContentPartAddedEvent + | ContentPartDoneEvent + | FunctionCallArgumentsDeltaEvent + | FunctionCallArgumentsDoneEvent + | OutputItemAddedEvent + | OutputItemDoneEvent + | OutputTextDeltaEvent + | OutputTextDoneEvent + | ReasoningSummaryTextDeltaEvent + | ReasoningSummaryTextDoneEvent + | ResponseCompletedEvent + | ResponseCreatedEvent + | ResponseFailedEvent + | ResponseInProgressEvent + | ResponseIncompleteEvent; diff --git a/packages/types/src/agentGroup/index.ts b/packages/types/src/agentGroup/index.ts index 931066104f..331f20f860 100644 --- a/packages/types/src/agentGroup/index.ts +++ b/packages/types/src/agentGroup/index.ts @@ -162,6 +162,8 @@ export interface ExecAgentParams { autoStart?: boolean; /** Optional existing message IDs to include in context */ existingMessageIds?: string[]; + /** Additional system instructions appended after the agent's own system role */ + instructions?: string; /** The user input/prompt */ prompt: string; /** The agent slug to run (either agentId or slug is required) */ diff --git a/src/server/services/aiAgent/index.ts b/src/server/services/aiAgent/index.ts index f620f0426b..804e9f6a15 100644 --- a/src/server/services/aiAgent/index.ts +++ b/src/server/services/aiAgent/index.ts @@ -197,6 +197,7 @@ export class AiAgentService { discordContext, existingMessageIds = [], files, + instructions, stepCallbacks, stream, title, @@ -260,6 +261,14 @@ export class AiAgentService { } } + // 2.5. Append additional instructions to agent's systemRole + if (instructions) { + agentConfig.systemRole = agentConfig.systemRole + ? `${agentConfig.systemRole}\n\n${instructions}` + : instructions; + log('execAgent: appended additional instructions to systemRole'); + } + // 3. Handle topic creation: if no topicId provided, create a new topic; otherwise reuse existing let topicId = appContext?.topicId; if (!topicId) {