♻️ refactor: remove promptfoo configs and dependencies (#13665)

♻️ refactor: remove promptfoo configs and dependencies from packages

Migrate all prompt evaluation tests to the cloud repo's agent-evals framework.
Remove promptfoo directories, configs, dependencies, and generator scripts
from @lobechat/prompts, @lobechat/memory-user-memory, and @lobechat/builtin-tool-memory.

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Arvin Xu 2026-04-08 17:50:55 +08:00 committed by GitHub
parent 147ff3976f
commit b6a47debfd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
62 changed files with 11 additions and 5210 deletions

View file

@ -10,17 +10,14 @@
"./executionRuntime": "./src/ExecutionRuntime/index.ts"
},
"main": "./src/index.ts",
"scripts": {
"build:gen-tool-call": "tsx scripts/generate-tool-call.ts"
},
"scripts": {},
"dependencies": {
"@lobechat/memory-user-memory": "workspace:*",
"@lobechat/prompts": "workspace:*"
},
"devDependencies": {
"@lobechat/types": "workspace:*",
"@types/json-schema": "^7.0.15",
"promptfoo": "^0.120.17"
"@types/json-schema": "^7.0.15"
},
"peerDependencies": {
"@lobehub/ui": "^5",

View file

@ -1,19 +0,0 @@
export interface PromptVars {
conversation: string;
}
export const buildActivityMessages = (vars: PromptVars) => {
const messages = [
{ content: 'You are a memory assistant, help the user to organize their preferences with memory related tools', role: 'system' as const },
{ content: 'I love to drink Hong Kong Milk Tea', role: 'user' as const },
];
if (vars.conversation) {
messages.push({
content: `Conversation:\n${vars.conversation}`,
role: 'user' as const,
});
}
return messages;
};

View file

@ -1,14 +0,0 @@
description: Regression benchmark for activity layer structured extraction
providers:
- id: openai:chat:google/gemini-2.5-pro
config:
tools: file://../../../../tool-calls/memory-addPreferenceMemory.json
tool_choice:
type: any
prompts:
- file://./prompt.ts
tests:
- file://./tests/cases.ts

View file

@ -1,6 +0,0 @@
import type { PromptVars } from './buildMessages';
import { buildActivityMessages } from './buildMessages';
export default function generatePrompt({ vars }: { vars: PromptVars }) {
return buildActivityMessages(vars);
}

View file

@ -1,106 +0,0 @@
type PromptfooAssert =
| { type: 'javascript'; value: string }
| { provider?: string; type: 'llm-rubric'; value: string };
interface PromptfooTestCase {
assert: PromptfooAssert[];
description?: string;
vars: Record<string, unknown>;
}
const baseSchemaAssert: PromptfooAssert = {
type: 'javascript',
value: `
let parsed;
try {
parsed = JSON.parse(output);
} catch (error) {
console.error('Failed to parse JSON output', error);
return false;
}
if (!parsed || !Array.isArray(parsed.memories)) return false;
return parsed.memories.every((memory) => {
return (
memory.memoryType === 'activity' &&
memory.title &&
memory.summary &&
memory.withActivity?.type &&
memory.withActivity?.narrative
);
});
`,
};
const baseVars = {
availableCategories: ['work', 'health', 'personal'],
language: 'English',
topK: 5,
username: 'User',
};
const testCases: PromptfooTestCase[] = [
{
assert: [
baseSchemaAssert,
{
type: 'javascript',
value: `
const data = JSON.parse(output);
const first = data.memories?.[0];
if (!first) return false;
const activity = first.withActivity || {};
return Boolean(activity.startsAt && activity.endsAt && activity.timezone && activity.associatedLocations?.[0]?.name);
`,
},
{
provider: 'openai:gpt-5-mini',
type: 'llm-rubric',
value:
'Should extract a meeting activity including timing (start/end/timezone), location name ACME HQ, status completed when implied, and feedback reflecting the positive tone.',
},
],
description: 'Meeting with explicit time and location',
vars: {
...baseVars,
conversation:
'User: I met with Alice at ACME HQ on 2024-05-03 from 14:00-15:00 America/New_York. We reviewed Q2 renewal scope and agreed to send revised pricing next week. I felt positive and collaborative about the call.',
retrievedContexts: ['Previous similar memory: met with Alice about renewal last month.'],
sessionDate: '2024-05-03',
},
},
{
assert: [
baseSchemaAssert,
{
type: 'javascript',
value: `
const data = JSON.parse(output);
const first = data.memories?.[0];
if (!first) return false;
const activity = first.withActivity || {};
return Boolean(activity.narrative && activity.feedback);
`,
},
{
provider: 'openai:gpt-5-mini',
type: 'llm-rubric',
value:
'Should capture an exercise activity without inventing exact timestamps or timezones; keep the narrative and feedback about the yoga session at home and omit temporal fields that were not provided.',
},
],
description: 'Exercise without explicit time or timezone',
vars: {
...baseVars,
conversation:
'User: Over the weekend I did a 30-minute yoga session at home with my roommate. No specific time was set, it was just a casual stretch and it left me feeling calm.',
retrievedContexts: [],
sessionDate: '2025-05-05 10:02:00',
},
},
];
export default testCases;

View file

@ -1,192 +0,0 @@
{
"description": "Create a context memory that captures ongoing situations, projects, or environments. Include actors, resources, statuses, urgency/impact, and a clear description.",
"name": "addContextMemory",
"parameters": {
"additionalProperties": false,
"properties": {
"details": {
"description": "Optional detailed information",
"type": "string"
},
"memoryCategory": {
"description": "Memory category",
"type": "string"
},
"memoryType": {
"description": "Memory type",
"enum": [
"activity",
"context",
"event",
"fact",
"location",
"other",
"people",
"preference",
"technology",
"topic"
],
"type": "string"
},
"summary": {
"description": "Concise overview of this specific memory",
"type": "string"
},
"tags": {
"description": "User defined tags that summarize the context facets",
"items": {
"type": "string"
},
"type": "array"
},
"title": {
"description": "Brief descriptive title",
"type": "string"
},
"withContext": {
"additionalProperties": false,
"properties": {
"associatedObjects": {
"description": "Array of objects describing involved roles, entities, or resources, [] empty if none",
"items": {
"additionalProperties": false,
"properties": {
"extra": {
"description": "Additional metadata about the object, should always be a valid JSON string if present",
"type": [
"string",
"null"
]
},
"name": {
"description": "Name of the associated object",
"type": "string"
},
"type": {
"description": "Type/category of the associated object",
"enum": [
"application",
"item",
"knowledge",
"other",
"person",
"place"
],
"type": "string"
}
},
"required": [
"extra",
"name",
"type"
],
"type": "object"
},
"type": "array"
},
"associatedSubjects": {
"description": "Array of JSON objects describing involved subjects or participants, [] empty if none",
"items": {
"additionalProperties": false,
"properties": {
"extra": {
"description": "Additional metadata about the subject, should always be a valid JSON string if present",
"type": [
"string",
"null"
]
},
"name": {
"description": "Name of the associated subject",
"type": "string"
},
"type": {
"description": "Type/category of the associated subject",
"enum": [
"item",
"other",
"person",
"pet"
],
"type": "string"
}
},
"required": [
"extra",
"name",
"type"
],
"type": "object"
},
"type": "array"
},
"currentStatus": {
"description": "High level status markers (must be one of 'planned', 'ongoing', 'completed', 'aborted', 'on_hold', 'cancelled')",
"enum": [
"aborted",
"cancelled",
"completed",
"on_hold",
"ongoing",
"planned"
],
"type": "string"
},
"description": {
"description": "Rich narrative describing the situation, timeline, or environment",
"type": "string"
},
"labels": {
"description": "Model generated tags that summarize the context themes",
"items": {
"type": "string"
},
"type": "array"
},
"scoreImpact": {
"description": "Numeric score (0-1 (0% to 100%)) describing importance",
"maximum": 1,
"minimum": 0,
"type": "number"
},
"scoreUrgency": {
"description": "Numeric score (0-1 (0% to 100%)) describing urgency",
"maximum": 1,
"minimum": 0,
"type": "number"
},
"title": {
"description": "Optional synthesized context headline",
"type": "string"
},
"type": {
"description": "High level context archetype (e.g., 'project', 'relationship', 'goal')",
"type": "string"
}
},
"required": [
"associatedObjects",
"associatedSubjects",
"currentStatus",
"description",
"labels",
"scoreImpact",
"scoreUrgency",
"title",
"type"
],
"type": "object"
}
},
"required": [
"details",
"memoryCategory",
"memoryType",
"summary",
"tags",
"title",
"withContext"
],
"type": "object"
}
}

View file

@ -1,125 +0,0 @@
{
"description": "Record an experience memory capturing situation, actions, reasoning, outcomes, and confidence. Use for lessons, playbooks, or transferable know-how.",
"name": "addExperienceMemory",
"parameters": {
"additionalProperties": false,
"properties": {
"details": {
"description": "Optional detailed information",
"type": "string"
},
"memoryCategory": {
"description": "Memory category",
"type": "string"
},
"memoryType": {
"description": "Memory type",
"enum": [
"activity",
"context",
"event",
"fact",
"location",
"other",
"people",
"preference",
"technology",
"topic"
],
"type": "string"
},
"summary": {
"description": "Concise overview of this specific memory",
"type": "string"
},
"tags": {
"description": "Model generated tags that summarize the experience facets",
"items": {
"type": "string"
},
"type": "array"
},
"title": {
"description": "Brief descriptive title",
"type": "string"
},
"withExperience": {
"additionalProperties": false,
"properties": {
"action": {
"description": "Narrative describing actions taken or behaviors exhibited",
"type": "string"
},
"keyLearning": {
"description": "Narrative describing key insights or lessons learned",
"type": "string"
},
"knowledgeValueScore": {
"description": "Numeric score (0-1) describing how reusable and shareable this experience is",
"maximum": 1,
"minimum": 0,
"type": "number"
},
"labels": {
"description": "Model generated tags that summarize the experience facets",
"items": {
"type": "string"
},
"type": "array"
},
"possibleOutcome": {
"description": "Narrative describing potential outcomes or learnings",
"type": "string"
},
"problemSolvingScore": {
"description": "Numeric score (0-1) describing how effectively the problem was solved",
"maximum": 1,
"minimum": 0,
"type": "number"
},
"reasoning": {
"description": "Narrative describing the thought process or motivations",
"type": "string"
},
"scoreConfidence": {
"description": "Numeric score (0-1 (0% to 100%)) describing confidence in the experience details",
"maximum": 1,
"minimum": 0,
"type": "number"
},
"situation": {
"description": "Narrative describing the situation or event",
"type": "string"
},
"type": {
"description": "Type of experience being recorded",
"type": "string"
}
},
"required": [
"situation",
"reasoning",
"action",
"possibleOutcome",
"keyLearning",
"type",
"labels",
"problemSolvingScore",
"scoreConfidence",
"knowledgeValueScore"
],
"type": "object"
}
},
"required": [
"details",
"memoryCategory",
"memoryType",
"summary",
"tags",
"title",
"withExperience"
],
"type": "object"
}
}

View file

@ -1,148 +0,0 @@
{
"description": "Add an identity memory describing enduring facts about a person, their role, relationship, and supporting evidence. Use to track self/others identities.",
"name": "addIdentityMemory",
"parameters": {
"additionalProperties": false,
"properties": {
"details": {
"description": "Optional detailed information",
"type": [
"string",
"null"
]
},
"memoryCategory": {
"description": "Memory category",
"type": "string"
},
"memoryType": {
"description": "Memory type",
"enum": [
"activity",
"context",
"event",
"fact",
"location",
"other",
"people",
"preference",
"technology",
"topic"
],
"type": "string"
},
"summary": {
"description": "Concise overview of this specific memory",
"type": "string"
},
"tags": {
"description": "Model generated tags that summarize the identity facets",
"items": {
"type": "string"
},
"type": "array"
},
"title": {
"description": "Honorific-style, concise descriptor (strength + domain/milestone), avoid bare job titles; e.g., \"Trusted open-source maintainer\", \"Specializes in low-latency infra\", \"Former Aliyun engineer\", \"Cares for rescue cats\"",
"type": "string"
},
"withIdentity": {
"additionalProperties": false,
"properties": {
"description": {
"type": "string"
},
"episodicDate": {
"type": [
"string",
"null"
]
},
"extractedLabels": {
"items": {
"type": "string"
},
"type": "array"
},
"relationship": {
"enum": [
"aunt",
"brother",
"classmate",
"colleague",
"couple",
"coworker",
"daughter",
"father",
"friend",
"granddaughter",
"grandfather",
"grandmother",
"grandson",
"husband",
"manager",
"mentee",
"mentor",
"mother",
"nephew",
"niece",
"other",
"partner",
"self",
"sibling",
"sister",
"son",
"spouse",
"teammate",
"uncle",
"wife"
],
"type": "string"
},
"role": {
"description": "Role explicitly mentioned for this identity entry (e.g., \"platform engineer\", \"caregiver\"); keep neutral and only use when evidence exists",
"type": "string"
},
"scoreConfidence": {
"type": "number"
},
"sourceEvidence": {
"type": [
"string",
"null"
]
},
"type": {
"enum": [
"demographic",
"personal",
"professional"
],
"type": "string"
}
},
"required": [
"description",
"episodicDate",
"extractedLabels",
"relationship",
"role",
"scoreConfidence",
"sourceEvidence",
"type"
],
"type": "object"
}
},
"required": [
"details",
"memoryCategory",
"memoryType",
"summary",
"tags",
"title",
"withIdentity"
],
"type": "object"
}
}

View file

@ -1,200 +0,0 @@
{
"description": "Create a preference memory that encodes durable directives or choices the assistant should follow. Include conclusionDirectives, scopes, and context.",
"name": "addPreferenceMemory",
"parameters": {
"additionalProperties": false,
"properties": {
"details": {
"description": "Optional detailed information",
"type": "string"
},
"memoryCategory": {
"description": "Memory category",
"type": "string"
},
"memoryType": {
"description": "Memory type",
"enum": [
"activity",
"context",
"event",
"fact",
"location",
"other",
"people",
"preference",
"technology",
"topic"
],
"type": "string"
},
"summary": {
"description": "Concise overview of this specific memory",
"type": "string"
},
"tags": {
"description": "Model generated tags that summarize the preference facets",
"items": {
"type": "string"
},
"type": "array"
},
"title": {
"description": "Brief descriptive title",
"type": "string"
},
"withPreference": {
"additionalProperties": false,
"properties": {
"appContext": {
"additionalProperties": false,
"description": "Application/surface specific preference, if any",
"properties": {
"app": {
"description": "App or product name this applies to",
"type": [
"string",
"null"
]
},
"feature": {
"type": [
"string",
"null"
]
},
"route": {
"type": [
"string",
"null"
]
},
"surface": {
"description": "e.g., chat, emails, code review, notes",
"type": [
"string",
"null"
]
}
},
"required": [
"app",
"feature",
"route",
"surface"
],
"type": [
"object",
"null"
]
},
"conclusionDirectives": {
"description": "Direct, self-contained instruction to the assistant from the user's perspective (what to do, not how to implement)",
"type": "string"
},
"extractedLabels": {
"description": "Model generated tags that summarize the preference facets",
"items": {
"type": "string"
},
"type": "array"
},
"extractedScopes": {
"description": "Array of JSON strings describing preference facets and applicable scopes",
"items": {
"type": "string"
},
"type": "array"
},
"originContext": {
"additionalProperties": false,
"description": "Context of how/why this preference was expressed",
"properties": {
"actor": {
"description": "Who stated the preference; use 'User' for the user",
"type": "string"
},
"applicableWhen": {
"description": "Conditions where this preference applies",
"type": [
"string",
"null"
]
},
"notApplicableWhen": {
"description": "Conditions where it does not apply",
"type": [
"string",
"null"
]
},
"scenario": {
"description": "Applicable scenario or use case",
"type": [
"string",
"null"
]
},
"trigger": {
"description": "What prompted this preference",
"type": [
"string",
"null"
]
}
},
"required": [
"actor",
"applicableWhen",
"notApplicableWhen",
"scenario",
"trigger"
],
"type": [
"object",
"null"
]
},
"scorePriority": {
"description": "Numeric prioritization weight (0-1 (0% to 100%)) where higher means more critical to respect",
"maximum": 1,
"minimum": 0,
"type": "number"
},
"suggestions": {
"description": "Follow-up actions or assistant guidance derived from the preference",
"items": {
"type": "string"
},
"type": "array"
},
"type": {
"description": "High level preference classification (e.g., 'lifestyle', 'communication')",
"type": "string"
}
},
"required": [
"appContext",
"conclusionDirectives",
"extractedLabels",
"extractedScopes",
"originContext",
"scorePriority",
"suggestions",
"type"
],
"type": "object"
}
},
"required": [
"title",
"summary",
"tags",
"details",
"memoryCategory",
"memoryType",
"withPreference"
],
"type": "object"
}
}

View file

@ -1,20 +0,0 @@
{
"description": "Remove an identity memory when it is incorrect, obsolete, or duplicated. Always provide a concise reason.",
"name": "removeIdentityMemory",
"parameters": {
"additionalProperties": false,
"properties": {
"id": {
"type": "string"
},
"reason": {
"type": "string"
}
},
"required": [
"id",
"reason"
],
"type": "object"
}
}

View file

@ -1,136 +0,0 @@
{
"description": "Retrieve memories using one or more search queries plus optional structured filters, including calendar-friendly timeIntent selectors.",
"name": "searchUserMemory",
"parameters": {
"additionalProperties": false,
"definitions": {
"searchMemoryTimeIntent": {
"additionalProperties": false,
"properties": {
"anchor": {
"description": "Anchor for relativeDay. Supports the legacy string values \"today\" and \"yesterday\", or another timeIntent object such as { \"selector\": \"day\", \"date\": \"2025-12-15T00:00:00.000Z\" }.",
"oneOf": [
{
"enum": ["today", "yesterday"],
"type": "string"
},
{
"$ref": "#/definitions/searchMemoryTimeIntent"
}
]
},
"date": {
"format": "date-time",
"type": "string"
},
"end": {
"format": "date-time",
"type": "string"
},
"month": {
"maximum": 12,
"minimum": 1,
"type": "integer"
},
"offsetDays": {
"type": "integer"
},
"selector": {
"enum": [
"today",
"yesterday",
"currentWeek",
"lastWeek",
"lastWeekend",
"lastWeekdays",
"currentMonth",
"lastMonth",
"currentYear",
"lastYear",
"day",
"month",
"year",
"relativeDay",
"range"
],
"type": "string"
},
"start": {
"format": "date-time",
"type": "string"
},
"year": {
"maximum": 9999,
"minimum": 1970,
"type": "integer"
}
},
"required": ["selector"],
"type": "object"
}
},
"properties": {
"layers": {
"items": {
"enum": ["activity", "context", "experience", "identity", "preference"],
"type": "string"
},
"type": "array"
},
"queries": {
"items": {
"type": "string"
},
"type": "array"
},
"timeIntent": {
"$ref": "#/definitions/searchMemoryTimeIntent"
},
"timeRange": {
"additionalProperties": false,
"properties": {
"end": {
"format": "date-time",
"type": "string"
},
"field": {
"enum": ["capturedAt", "createdAt", "endsAt", "episodicDate", "startsAt", "updatedAt"],
"type": "string"
},
"start": {
"format": "date-time",
"type": "string"
}
},
"type": "object"
},
"topK": {
"additionalProperties": false,
"properties": {
"activities": {
"minimum": 0,
"type": "integer"
},
"contexts": {
"minimum": 0,
"type": "integer"
},
"experiences": {
"minimum": 0,
"type": "integer"
},
"identities": {
"minimum": 0,
"type": "integer"
},
"preferences": {
"minimum": 0,
"type": "integer"
}
},
"type": "object"
}
},
"type": "object"
}
}

View file

@ -1,153 +0,0 @@
{
"description": "Update an existing identity memory with refined details, relationships, roles, or tags. Use mergeStrategy to control replacement vs merge.",
"name": "updateIdentityMemory",
"parameters": {
"additionalProperties": false,
"properties": {
"id": {
"type": "string"
},
"mergeStrategy": {
"enum": [
"merge",
"replace"
],
"type": "string"
},
"set": {
"additionalProperties": false,
"properties": {
"details": {
"description": "Optional detailed information, use null for omitting the field",
"type": [
"string",
"null"
]
},
"memoryCategory": {
"description": "Memory category, use null for omitting the field",
"type": [
"string",
"null"
]
},
"memoryType": {
"description": "Memory type, use null for omitting the field",
"enum": [
"activity",
"context",
"event",
"fact",
"location",
"other",
"people",
"preference",
"technology",
"topic",
null
]
},
"summary": {
"description": "Concise overview of this specific memory, use null for omitting the field",
"type": [
"string",
"null"
]
},
"tags": {
"description": "Model generated tags that summarize the identity facets, use null for omitting the field",
"items": {
"type": "string"
},
"type": [
"array",
"null"
]
},
"title": {
"description": "Honorific-style, concise descriptor (strength + domain/milestone), avoid bare job titles; e.g., \"Trusted open-source maintainer\", \"Specializes in low-latency infra\", \"Former Aliyun engineer\", \"Cares for rescue cats\"; use null for omitting the field",
"type": [
"string",
"null"
]
},
"withIdentity": {
"additionalProperties": false,
"properties": {
"description": {
"type": [
"string",
"null"
]
},
"episodicDate": {
"type": [
"string",
"null"
]
},
"extractedLabels": {
"items": {
"type": "string"
},
"type": [
"array",
"null"
]
},
"relationship": {
"description": "Possible values: aunt | brother | classmate | colleague | couple | coworker | daughter | father | friend | granddaughter | grandfather | grandmother | grandson | husband | manager | mentee | mentor | mother | nephew | niece | other | partner | self | sibling | sister | son | spouse | teammate | uncle | wife",
"type": [
"string",
"null"
]
},
"role": {
"description": "Role explicitly mentioned for this identity entry (e.g., \"platform engineer\", \"caregiver\"); keep existing when not updated; use null for omitting the field",
"type": [
"string",
"null"
]
},
"scoreConfidence": {
"type": [
"number",
"null"
]
},
"sourceEvidence": {
"type": [
"string",
"null"
]
},
"type": {
"description": "Possible values: demographic | personal | professional",
"type": [
"string",
"null"
]
}
},
"required": [
"description",
"extractedLabels",
"role"
],
"type": "object"
}
},
"required": [
"withIdentity"
],
"type": "object"
}
},
"required": [
"id",
"mergeStrategy",
"set"
],
"type": "object"
}
}

View file

@ -1,33 +0,0 @@
description: LobeHub Prompts (memory-user-memory) Testing Suite
# Test configurations - run all prompt tests
testPaths:
- promptfoo/evals/preferences/tool-call/basic/eval.yaml
# Default provider setup (must specify either providers or targets)
providers:
- id: google:gemini-2.5-pro
# Output configuration
outputPath: promptfoo-results.json
# Default test settings
defaultTest:
assert:
- type: llm-rubric
provider: openai:gpt-5-mini
value: "The response should be relevant and well-formatted"
- type: llm-rubric
provider: google:gemini-2.5-flash
value: "The response should be relevant and well-formatted"
- type: cost
threshold: 0.01 # Maximum cost per test in USD
# Evaluation settings
evaluateOptions:
maxConcurrency: 5
delay: 100
# TypeScript support
transforms:
- "typescript"

View file

@ -1,32 +0,0 @@
import { mkdir, writeFile } from 'node:fs/promises';
import { join } from 'node:path';
import { exit } from 'node:process';
import type { BuiltinToolManifest } from '@lobechat/types';
import { MemoryManifest } from '../../builtin-tool-memory';
const OUTPUT_DIR = join(process.cwd(), 'promptfoo/tool-calls');
const writeToolCallSchemaFromManifest = async (prefix: string, manifest: BuiltinToolManifest) => {
for (const tool of manifest.api) {
const transformedTool = {
...tool,
type: 'function',
};
await writeFile(
join(OUTPUT_DIR, `${prefix}-${transformedTool.name}.json`),
JSON.stringify(transformedTool, null, 2),
);
}
};
async function main() {
await mkdir(OUTPUT_DIR, { recursive: true });
await writeToolCallSchemaFromManifest('memory', MemoryManifest);
}
main().catch((err) => {
console.error(err);
exit(1);
});

View file

@ -11,7 +11,6 @@
},
"main": "src/index.ts",
"scripts": {
"build:gen-response-formats": "tsx scripts/generate-response-formats.ts",
"test": "vitest --run",
"test:coverage": "vitest --coverage --silent='passed-only'",
"type-check": "tsgo --noEmit -p tsconfig.json"
@ -33,7 +32,6 @@
"@types/json-schema": "^7.0.15",
"@types/xast": "^2.0.4",
"picospinner": "^3.0.0",
"promptfoo": "^0.120.17",
"tsx": "^4.20.6"
},
"peerDependencies": {

View file

@ -1,40 +0,0 @@
import { renderPlaceholderTemplate } from '@lobechat/context-engine';
import { activityPrompt } from '../../../../src/prompts';
import type { ExtractorTemplateProps } from '../../../../src/types';
export interface PromptVars extends ExtractorTemplateProps {
conversation: string;
}
export const buildActivityMessages = (vars: PromptVars) => {
const retrievedContext =
Array.isArray(vars.retrievedContexts) && vars.retrievedContexts.length > 0
? vars.retrievedContexts.join('\n\n')
: typeof vars.retrievedContexts === 'string'
? vars.retrievedContexts
: 'No similar memories retrieved.';
const rendered = renderPlaceholderTemplate(activityPrompt, {
availableCategories: vars.availableCategories,
language: vars.language || 'English',
retrievedContext,
sessionDate: vars.sessionDate || new Date().toISOString(),
topK: vars.topK ?? 5,
username: vars.username || 'User',
});
const messages = [
{ content: rendered, role: 'system' as const },
{ content: rendered, role: 'user' as const },
];
if (vars.conversation) {
messages.push({
content: `Conversation:\n${vars.conversation}`,
role: 'user' as const,
});
}
return messages;
};

View file

@ -1,13 +0,0 @@
description: Regression benchmark for activity layer structured extraction
providers:
- id: openai:chat:gpt-5-mini
config:
response_format: file://../../../response-formats/activity.json
temperature: 0
prompts:
- file://./prompt.ts
tests:
- file://./tests/cases.ts

View file

@ -1,6 +0,0 @@
import type { PromptVars } from './buildMessages';
import { buildActivityMessages } from './buildMessages';
export default function generatePrompt({ vars }: { vars: PromptVars }) {
return buildActivityMessages(vars);
}

View file

@ -1,106 +0,0 @@
type PromptfooAssert =
| { type: 'javascript'; value: string }
| { provider?: string; type: 'llm-rubric'; value: string };
interface PromptfooTestCase {
assert: PromptfooAssert[];
description?: string;
vars: Record<string, unknown>;
}
const baseSchemaAssert: PromptfooAssert = {
type: 'javascript',
value: `
let parsed;
try {
parsed = JSON.parse(output);
} catch (error) {
console.error('Failed to parse JSON output', error);
return false;
}
if (!parsed || !Array.isArray(parsed.memories)) return false;
return parsed.memories.every((memory) => {
return (
memory.memoryType === 'activity' &&
memory.title &&
memory.summary &&
memory.withActivity?.type &&
memory.withActivity?.narrative
);
});
`,
};
const baseVars = {
availableCategories: ['work', 'health', 'personal'],
language: 'English',
topK: 5,
username: 'User',
};
const testCases: PromptfooTestCase[] = [
{
assert: [
baseSchemaAssert,
{
type: 'javascript',
value: `
const data = JSON.parse(output);
const first = data.memories?.[0];
if (!first) return false;
const activity = first.withActivity || {};
return Boolean(activity.startsAt && activity.endsAt && activity.timezone && activity.associatedLocations?.[0]?.name);
`,
},
{
provider: 'openai:gpt-5-mini',
type: 'llm-rubric',
value:
'Should extract a meeting activity including timing (start/end/timezone), location name ACME HQ, status completed when implied, and feedback reflecting the positive tone.',
},
],
description: 'Meeting with explicit time and location',
vars: {
...baseVars,
conversation:
'User: I met with Alice at ACME HQ on 2024-05-03 from 14:00-15:00 America/New_York. We reviewed Q2 renewal scope and agreed to send revised pricing next week. I felt positive and collaborative about the call.',
retrievedContexts: ['Previous similar memory: met with Alice about renewal last month.'],
sessionDate: '2024-05-03',
},
},
{
assert: [
baseSchemaAssert,
{
type: 'javascript',
value: `
const data = JSON.parse(output);
const first = data.memories?.[0];
if (!first) return false;
const activity = first.withActivity || {};
return Boolean(activity.narrative && activity.feedback);
`,
},
{
provider: 'openai:gpt-5-mini',
type: 'llm-rubric',
value:
'Should capture an exercise activity without inventing exact timestamps or timezones; keep the narrative and feedback about the yoga session at home and omit temporal fields that were not provided.',
},
],
description: 'Exercise without explicit time or timezone',
vars: {
...baseVars,
conversation:
'User: Over the weekend I did a 30-minute yoga session at home with my roommate. No specific time was set, it was just a casual stretch and it left me feeling calm.',
retrievedContexts: [],
sessionDate: '2025-05-05 10:02:00',
},
},
];
export default testCases;

View file

@ -1,112 +0,0 @@
import { readFile } from 'node:fs/promises';
import { isAbsolute, join } from 'node:path';
import { renderPlaceholderTemplate } from '@lobechat/context-engine';
import { MemorySourceType } from '@lobechat/types';
import type { IngestPayload } from '../../../../src/converters/locomo';
import { activityPrompt } from '../../../../src/prompts';
import type { BenchmarkLocomoPart } from '../../../../src/providers';
import { BenchmarkLocomoContextProvider } from '../../../../src/providers';
import type { ExtractorTemplateProps, MemoryExtractionJob } from '../../../../src/types';
export interface PromptVars extends ExtractorTemplateProps {
payloadPath: string;
sessionId?: string;
userId?: string;
}
const resolvePath = (payloadPath: string) =>
isAbsolute(payloadPath) ? payloadPath : join(process.cwd(), payloadPath);
const buildParts = (payload: IngestPayload, sessionId?: string): BenchmarkLocomoPart[] => {
let partIndex = 0;
const sessions = payload.sessions.filter(
(session) => !sessionId || session.sessionId === sessionId,
);
return sessions.flatMap((session) =>
session.turns.map((turn) => {
const metadata = {
diaId: turn.diaId,
imageCaption: turn.imageCaption,
imageUrls: turn.imageUrls,
sessionId: session.sessionId,
};
return {
content: turn.text,
createdAt: turn.createdAt || session.timestamp,
metadata,
partIndex: partIndex++,
sessionId: session.sessionId,
speaker: turn.speaker,
};
}),
);
};
const resolveSessionDate = (
payload: IngestPayload,
parts: BenchmarkLocomoPart[],
sessionId?: string,
) => {
const sessionDate =
payload.sessions.find((session) => session.sessionId === sessionId)?.timestamp ||
payload.sessions[0]?.timestamp;
if (sessionDate) return sessionDate;
const latestCreatedAt = parts
.map((part) => (part.createdAt ? new Date(part.createdAt) : null))
.filter(Boolean)
.sort((a, b) => (a!.getTime() > b!.getTime() ? 1 : -1))
.at(-1);
return latestCreatedAt ? latestCreatedAt.toISOString() : new Date().toISOString();
};
export const buildLocomoActivityMessages = async (vars: PromptVars) => {
const payloadPath = resolvePath(vars.payloadPath);
const payloadRaw = await readFile(payloadPath, 'utf8');
const payload = JSON.parse(payloadRaw) as IngestPayload;
const parts = buildParts(payload, vars.sessionId);
if (parts.length === 0) {
throw new Error(
`No matching parts found in ${payload.sampleId} for session ${vars.sessionId || 'all'}`,
);
}
const userId = vars.userId || `locomo-user-${payload.sampleId}`;
const sourceId = payload.topicId || `sample_${payload.sampleId}`;
const sessionDate = vars.sessionDate || resolveSessionDate(payload, parts, vars.sessionId);
const provider = new BenchmarkLocomoContextProvider({
parts,
sampleId: payload.sampleId,
sourceId,
userId,
});
const extractionJob: MemoryExtractionJob = {
source: MemorySourceType.BenchmarkLocomo,
sourceId,
userId,
};
const { context } = await provider.buildContext(extractionJob.userId);
const rendered = renderPlaceholderTemplate(activityPrompt, {
availableCategories: vars.availableCategories,
language: vars.language || 'English',
retrievedContext: context || 'No similar memories retrieved.',
sessionDate,
topK: vars.topK ?? 5,
username: vars.username || 'User',
});
return [
{ content: rendered, role: 'system' as const },
{ content: rendered, role: 'user' as const },
];
};

View file

@ -1,13 +0,0 @@
description: LoCoMo regression for activity layer with relative time resolution
providers:
- id: openai:chat:gpt-5-mini
config:
response_format: file://../../../response-formats/activity.json
temperature: 0
prompts:
- file://./prompt.ts
tests:
- file://./tests/cases.ts

View file

@ -1,6 +0,0 @@
import type { PromptVars } from './buildMessages';
import { buildLocomoActivityMessages } from './buildMessages';
export default async function generatePrompt({ vars }: { vars: PromptVars }) {
return buildLocomoActivityMessages(vars);
}

View file

@ -1,149 +0,0 @@
{
"force": true,
"layers": [],
"sampleId": "conv-26",
"sessions": [
{
"sessionId": "session_1",
"timestamp": "2023-05-08T13:56:00.000Z",
"turns": [
{
"createdAt": "2023-05-08T13:56:00.000Z",
"diaId": "D1:1",
"role": "user",
"speaker": "Caroline",
"text": "Hey Mel! Good to see you! How have you been?"
},
{
"createdAt": "2023-05-08T13:56:00.000Z",
"diaId": "D1:2",
"role": "assistant",
"speaker": "Melanie",
"text": "Hey Caroline! Good to see you! I'm swamped with the kids & work. What's up with you? Anything new?"
},
{
"createdAt": "2023-05-08T13:56:00.000Z",
"diaId": "D1:3",
"role": "user",
"speaker": "Caroline",
"text": "I went to a LGBTQ support group yesterday and it was so powerful."
},
{
"createdAt": "2023-05-08T13:56:00.000Z",
"diaId": "D1:4",
"role": "assistant",
"speaker": "Melanie",
"text": "Wow, that's cool, Caroline! What happened that was so awesome? Did you hear any inspiring stories?"
},
{
"createdAt": "2023-05-08T13:56:00.000Z",
"diaId": "D1:5",
"imageCaption": "a photo of a dog walking past a wall with a painting of a woman",
"imageUrls": [
"https://i.redd.it/l7hozpetnhlb1.jpg"
],
"role": "user",
"speaker": "Caroline",
"text": "The transgender stories were so inspiring! I was so happy and thankful for all the support.\n[Image: a photo of a dog walking past a wall with a painting of a woman]"
},
{
"createdAt": "2023-05-08T13:56:00.000Z",
"diaId": "D1:6",
"role": "assistant",
"speaker": "Melanie",
"text": "Wow, love that painting! So cool you found such a helpful group. What's it done for you?"
},
{
"createdAt": "2023-05-08T13:56:00.000Z",
"diaId": "D1:7",
"role": "user",
"speaker": "Caroline",
"text": "The support group has made me feel accepted and given me courage to embrace myself."
},
{
"createdAt": "2023-05-08T13:56:00.000Z",
"diaId": "D1:8",
"role": "assistant",
"speaker": "Melanie",
"text": "That's really cool. You've got guts. What now?"
},
{
"createdAt": "2023-05-08T13:56:00.000Z",
"diaId": "D1:9",
"role": "user",
"speaker": "Caroline",
"text": "Gonna continue my edu and check out career options, which is pretty exciting!"
},
{
"createdAt": "2023-05-08T13:56:00.000Z",
"diaId": "D1:10",
"role": "assistant",
"speaker": "Melanie",
"text": "Wow, Caroline! What kinda jobs are you thinkin' of? Anything that stands out?"
},
{
"createdAt": "2023-05-08T13:56:00.000Z",
"diaId": "D1:11",
"role": "user",
"speaker": "Caroline",
"text": "I'm keen on counseling or working in mental health - I'd love to support those with similar issues."
},
{
"createdAt": "2023-05-08T13:56:00.000Z",
"diaId": "D1:12",
"imageCaption": "a photo of a painting of a sunset over a lake",
"imageUrls": [
"http://candicealexander.com/cdn/shop/products/IMG_7269_a49d5af8-c76c-4ecd-ae20-48c08cb11dec.jpg"
],
"role": "assistant",
"speaker": "Melanie",
"text": "You'd be a great counselor! Your empathy and understanding will really help the people you work with. By the way, take a look at this.\n[Image: a photo of a painting of a sunset over a lake]"
},
{
"createdAt": "2023-05-08T13:56:00.000Z",
"diaId": "D1:13",
"role": "user",
"speaker": "Caroline",
"text": "Thanks, Melanie! That's really sweet. Is this your own painting?"
},
{
"createdAt": "2023-05-08T13:56:00.000Z",
"diaId": "D1:14",
"role": "assistant",
"speaker": "Melanie",
"text": "Yeah, I painted that lake sunrise last year! It's special to me."
},
{
"createdAt": "2023-05-08T13:56:00.000Z",
"diaId": "D1:15",
"role": "user",
"speaker": "Caroline",
"text": "Wow, Melanie! The colors really blend nicely. Painting looks like a great outlet for expressing yourself."
},
{
"createdAt": "2023-05-08T13:56:00.000Z",
"diaId": "D1:16",
"role": "assistant",
"speaker": "Melanie",
"text": "Thanks, Caroline! Painting's a fun way to express my feelings and get creative. It's a great way to relax after a long day."
},
{
"createdAt": "2023-05-08T13:56:00.000Z",
"diaId": "D1:17",
"role": "user",
"speaker": "Caroline",
"text": "Totally agree, Mel. Relaxing and expressing ourselves is key. Well, I'm off to go do some research."
},
{
"createdAt": "2023-05-08T13:56:00.000Z",
"diaId": "D1:18",
"role": "assistant",
"speaker": "Melanie",
"text": "Yep, Caroline. Taking care of ourselves is vital. I'm off to go swimming with the kids. Talk to you soon!"
}
]
}
],
"source": "benchmark_locomo",
"topicId": "sample_conv-26"
}

View file

@ -1,72 +0,0 @@
type PromptfooAssert =
| { type: 'javascript'; value: string }
| { provider?: string; type: 'llm-rubric'; value: string };
interface PromptfooTestCase {
assert: PromptfooAssert[];
description?: string;
vars: Record<string, unknown>;
}
const baseSchemaAssert: PromptfooAssert = {
type: 'javascript',
value: `
let parsed;
try {
parsed = JSON.parse(output);
} catch (error) {
console.error('Failed to parse JSON output', error);
return false;
}
if (!parsed || !Array.isArray(parsed.memories)) return false;
return parsed.memories.every((memory) => {
return memory.memoryType === 'activity' && memory.withActivity?.type;
});
`,
};
const testCases: PromptfooTestCase[] = [
{
assert: [
baseSchemaAssert,
{
type: 'javascript',
value: `
const data = JSON.parse(output);
const target = data.memories?.find((memory) => {
const text = [memory.title, memory.summary, memory.withActivity?.narrative]
.filter(Boolean)
.join(' ')
.toLowerCase();
return text.includes('support group');
});
if (!target) return false;
const startsAt = target.withActivity?.startsAt;
if (!startsAt) return false;
return String(startsAt).startsWith('2023-05-07');
`,
},
{
provider: 'openai:gpt-5-mini',
type: 'llm-rubric',
value:
'Should extract the LGBTQ support group activity from session_1 diaId D1:3, convert "yesterday" relative to the 2023-05-08 session anchor into 2023-05-07, and include a narrative about feeling supported/accepted.',
},
],
description: 'LoCoMo conv-26 session_1 resolves relative date',
vars: {
availableCategories: ['personal'],
language: 'English',
payloadPath: './promptfoo/evals/activity/locomo/tests/benchmark-locomo-payload-conv-26.json',
sessionId: 'session_1',
topK: 3,
username: 'Caroline',
},
},
];
export default testCases;

View file

@ -1,69 +0,0 @@
import { readFile } from 'node:fs/promises';
import { join } from 'node:path';
import { renderPlaceholderTemplate } from '@lobechat/context-engine';
interface TracePayload {
agentCalls?: Record<string, any>;
contexts?: {
trimmed?: {
retrievedContexts?: string[];
retrievedIdentitiesContext?: string;
};
};
extractionJob?: {
sourceUpdatedAt?: string;
};
}
export interface PromptVars {
availableCategories?: string[];
language?: string;
sessionDate?: string;
topK?: number;
tracePath: string;
username?: string;
}
const parseLanguageFromTrace = (trace: TracePayload): string | undefined => {
const requestMessages = trace.agentCalls?.['layer-identity']?.request?.messages;
const userMessageContent = requestMessages?.[1]?.content;
if (typeof userMessageContent !== 'string') return undefined;
const match = userMessageContent.match(/ensure all the content is using ([^\n.]+)\./i);
return match?.[1];
};
export const buildIdentityDedupeMessages = async (vars: PromptVars) => {
const traceRaw = await readFile(vars.tracePath, 'utf8');
const trace = JSON.parse(traceRaw) as TracePayload;
const promptTemplate = await readFile(
join(process.cwd(), 'src/prompts/layers/identity.md'),
'utf8',
);
const retrievedContexts = trace.contexts?.trimmed?.retrievedContexts ?? [];
const existingIdentitiesContext = trace.contexts?.trimmed?.retrievedIdentitiesContext ?? '';
const language = vars.language || parseLanguageFromTrace(trace) || 'zh-CN';
const username = vars.username || 'User';
const sessionDate =
vars.sessionDate || trace.extractionJob?.sourceUpdatedAt || new Date().toISOString();
const topK = vars.topK ?? 10;
const rendered = renderPlaceholderTemplate(promptTemplate, {
availableCategories: vars.availableCategories,
existingIdentitiesContext,
language,
retrievedContext: retrievedContexts.join('\n\n') || 'No similar memories retrieved.',
sessionDate,
topK,
username,
});
return [
{ content: rendered, role: 'system' },
{ content: rendered, role: 'user' },
];
};

View file

@ -1,12 +0,0 @@
description: Regression benchmark for identity (memory-user-memory)
providers:
- id: openai:chat:gpt-5-mini
config:
response_format: file://../../../response-formats/identity.json
prompts:
- file://./prompt.ts
tests:
- file://./tests/cases.ts

View file

@ -1,6 +0,0 @@
import type { PromptVars } from './buildMessages';
import { buildIdentityDedupeMessages } from './buildMessages';
export default async function generatePrompt({ vars }: { vars: PromptVars }) {
return buildIdentityDedupeMessages(vars);
}

View file

@ -1,60 +0,0 @@
import { readdirSync, readFileSync } from 'node:fs';
import { basename, dirname, join } from 'node:path';
import { fileURLToPath } from 'node:url';
import type { MemoryExtractionTracePayload } from '@lobechat/types';
type PromptfooAssert =
| { type: 'javascript'; value: string }
| { provider?: string; type: 'llm-rubric'; value: string };
interface PromptfooTestCase {
assert: PromptfooAssert[];
description: string;
vars: Record<string, unknown>;
}
const tracesDir = join(dirname(fileURLToPath(import.meta.url)), '../datasets');
const identityShouldDedupe: PromptfooAssert = {
type: 'javascript',
value: `
const jsonOutput = JSON.parse(output);
return (
!jsonOutput?.withIdentities?.actions?.add ||
jsonOutput.withIdentities.actions.add.length === 0
);
`,
};
const buildDescription = (tracePath: string, payload: MemoryExtractionTracePayload) => {
const user = payload.userId || 'unknown-user';
const source = payload.extractionJob?.source || 'UnknownSource';
const sourceId = payload.extractionJob?.sourceId || basename(tracePath);
return `Identity - User ${user} ${source} ${sourceId} should not generate add actions`;
};
// Generate a test case for every trace JSON under datasets/traces.
const testCases: PromptfooTestCase[] = readdirSync(tracesDir)
.filter((file) => file.endsWith('.json'))
.map((file) => {
const tracePath = join(tracesDir, file);
const tracePayload = JSON.parse(
readFileSync(tracePath, 'utf8'),
) as MemoryExtractionTracePayload;
return {
assert: [identityShouldDedupe],
description: buildDescription(tracePath, tracePayload),
vars: {
layer: 'Identity',
source: tracePayload.extractionJob?.source,
sourceId: tracePayload.extractionJob?.sourceId,
tracePath,
userId: tracePayload.userId,
},
};
});
export default testCases;

View file

@ -1,13 +0,0 @@
description: User persona prompt regression
providers:
- id: openai:chat:gpt-5-mini
config:
tools: file://../../response-formats/persona-tools.json
tool_choice: required
prompts:
- file://./prompt.ts
tests:
- file://./tests/cases.ts

View file

@ -1,39 +0,0 @@
import { renderPlaceholderTemplate } from '@lobechat/context-engine';
import { userPersonaPrompt } from '../../../src/prompts/persona';
interface PersonaPromptVars {
existingPersona?: string;
language: string;
personaNotes?: string;
recentEvents?: string;
retrievedMemories?: string;
username: string;
userProfile?: string;
}
export default async function generatePrompt({ vars }: { vars: PersonaPromptVars }) {
const system = renderPlaceholderTemplate(userPersonaPrompt, {
language: vars.language,
topK: 10,
username: vars.username,
});
const userSections = [
'## Existing Persona (baseline)',
vars.existingPersona || 'No existing persona provided.',
'## Retrieved Memories / Signals',
vars.retrievedMemories || 'N/A',
'## Recent Events or Highlights',
vars.recentEvents || 'N/A',
'## User Provided Notes or Requests',
vars.personaNotes || 'N/A',
'## Extra Profile Context',
vars.userProfile || 'N/A',
].join('\n\n');
return [
{ content: system, role: 'system' },
{ content: userSections, role: 'user' },
];
}

View file

@ -1,45 +0,0 @@
const toolCallAssert = {
type: 'javascript',
value: `
const calls = Array.isArray(output) ? output : [];
if (calls.length === 0) return false;
return calls.every((call) => {
const fnName = call.function?.name || call.name;
if (fnName !== 'commit_user_persona') return false;
const rawArgs = call.function?.arguments ?? call.arguments;
let args = {};
if (typeof rawArgs === 'string') {
try { args = JSON.parse(rawArgs); } catch { return false; }
} else {
args = rawArgs || {};
}
return typeof args.persona === 'string' && args.persona.trim().length > 10;
});
`,
};
const rubric = {
provider: 'openai:gpt-5-mini',
type: 'llm-rubric',
value:
'Should return a tool call to commit_user_persona with a meaningful second-person persona and concise diff/summary.',
};
export default [
{
assert: [{ type: 'is-valid-openai-tools-call' }, toolCallAssert, rubric],
description: 'Generates a persona with baseline and events',
vars: {
existingPersona: '# About User\n- Loves TypeScript\n- Works on LobeHub',
language: '简体中文',
personaNotes: '- Keep concise',
recentEvents: '- Shipped memory feature\n- Joined community call',
retrievedMemories: '- Preference: dark mode\n- Context: building AI workspace',
userProfile: '- Developer, open source contributor',
username: 'User',
},
},
] as const;

View file

@ -1,370 +0,0 @@
{
"json_schema": {
"name": "activity_extraction",
"schema": {
"additionalProperties": false,
"properties": {
"memories": {
"description": "Array of extracted activity memories. Use an empty array when no activity should be captured.",
"items": {
"additionalProperties": false,
"description": "Self-contained activity memory describing what happened, when, where, with whom, and how it felt.",
"examples": [
{
"details": "Talked through renewal scope, confirmed timeline flexibility, and captured follow-ups.",
"memoryCategory": "work",
"memoryType": "activity",
"summary": "Client Q2 renewal meeting with Alice (ACME)",
"tags": [
"meeting",
"client",
"renewal"
],
"title": "ACME Q2 renewal meeting",
"withActivity": {
"type": "meeting",
"associatedLocations": [
{
"address": "123 Main St, New York, NY",
"name": "ACME HQ"
}
],
"associatedSubjects": [
{
"name": "Alice Smith",
"type": "person"
}
],
"endsAt": "2024-05-03T15:00:00-04:00",
"feedback": "Positive momentum; Alice felt heard and open to renewal.",
"narrative": "Alice and User reviewed Q2 renewal scope, aligned on reduced deliverables, and agreed to share revised pricing next week.",
"notes": "Agenda: renewal scope, pricing, next steps.",
"startsAt": "2024-05-03T14:00:00-04:00",
"status": "completed",
"timezone": "America/New_York"
}
},
{
"details": "Routine check-up; discussed migraines and sleep habits.",
"memoryCategory": "health",
"memoryType": "activity",
"summary": "Doctor appointment with Dr. Kim about migraines",
"tags": [
"appointment",
"health"
],
"title": "Neurology follow-up",
"withActivity": {
"type": "appointment",
"associatedLocations": [
{
"name": "City Neurology Clinic"
}
],
"associatedSubjects": [
{
"name": "Dr. Kim",
"type": "person"
}
],
"feedback": "Felt reassured; plan seems manageable.",
"narrative": "User saw Dr. Kim to review migraine frequency; decided to track sleep, hydration, and start a low-dose preventive.",
"notes": "Discussed triggers, hydration, and medication side effects.",
"status": "completed"
}
}
],
"properties": {
"details": {
"description": "Optional detailed information or longer notes supporting the summary and narrative.",
"type": "string"
},
"memoryCategory": {
"description": "Memory category best matching the activity (e.g., work, health, travel, relationships).",
"type": "string"
},
"memoryType": {
"const": "activity",
"description": "Memory type; always activity.",
"type": "string"
},
"summary": {
"description": "Concise overview of this activity.",
"type": "string"
},
"tags": {
"description": "Model-generated tags summarizing key facets of the activity.",
"items": {
"type": "string"
},
"type": "array"
},
"title": {
"description": "Brief descriptive title for the activity, e.g., \"Dinner with friends at Marina\".",
"type": "string"
},
"withActivity": {
"additionalProperties": false,
"description": "Structured activity fields. Temporal and association values are optional—include only when the user mentioned them.",
"properties": {
"type": {
"description": "Activity type enum. Choose the closest match; fall back to \"other\" when unclear.",
"enum": [
"appointment",
"call",
"celebration",
"class",
"conference",
"errand",
"event",
"exercise",
"meal",
"meeting",
"other",
"project-session",
"social",
"task",
"trip",
"workshop"
],
"type": "string"
},
"associatedLocations": {
"description": "Places linked to this activity. Capture any mentioned venue, address, or setting.",
"items": {
"additionalProperties": false,
"properties": {
"address": {
"description": "Free-form address or directions if provided.",
"type": [
"string",
"null"
]
},
"extra": {
"description": "Optional key-value metadata related to the location.",
"type": [
"string",
"null"
]
},
"name": {
"description": "Place name or venue label.",
"type": "string"
},
"tags": {
"description": "Place-related tags (e.g., indoor, outdoor, virtual).",
"items": {
"type": "string"
},
"type": [
"array",
"null"
]
},
"type": {
"description": "Place type or category (office, clinic, restaurant, virtual).",
"type": "string"
}
},
"required": [
"type",
"name",
"address",
"tags",
"extra"
],
"type": "object"
},
"type": "array"
},
"associatedObjects": {
"description": "Non-living entities or items tied to the activity (e.g., transportation for trips, devices, tools).",
"items": {
"additionalProperties": false,
"properties": {
"extra": {
"description": "Optional key-value metadata related to the object.",
"type": [
"string",
"null"
]
},
"name": {
"description": "Name or label of the object (e.g., “MacBook”, “flight UA123”).",
"type": "string"
},
"type": {
"description": "Object category (e.g., transportation, device, document).",
"enum": [
"application",
"item",
"knowledge",
"other",
"person",
"place"
],
"type": "string"
}
},
"required": [
"type",
"name",
"extra"
],
"type": "object"
},
"type": "array"
},
"associatedSubjects": {
"description": "Living beings involved (people, pets, groups). Use when the subject lacks a known identity ID.",
"items": {
"additionalProperties": false,
"properties": {
"extra": {
"description": "Optional key-value metadata related to the subject.",
"type": [
"string",
"null"
]
},
"name": {
"description": "Name or short label of the subject.",
"type": "string"
},
"type": {
"description": "Subject category (e.g., person, pet, group).",
"enum": [
"person",
"pet",
"group",
"other"
],
"type": "string"
}
},
"required": [
"type",
"name",
"extra"
],
"type": "object"
},
"type": "array"
},
"endsAt": {
"description": "ISO 8601 end time for the activity when specified. Omit if not explicitly provided.",
"format": "date-time",
"type": [
"string",
"null"
]
},
"feedback": {
"description": "Subjective feelings or evaluation of how the activity went (mood, satisfaction, effort).",
"type": [
"string",
"null"
]
},
"metadata": {
"additionalProperties": false,
"description": "Additional structured metadata to keep raw hints (JSON object). Use sparingly.",
"type": [
"object",
"null"
]
},
"narrative": {
"description": "Factual story of what happened (chronology, participants, outcomes). Required for recall.",
"type": "string"
},
"notes": {
"description": "Short annotations such as agenda, preparation, or quick bullets distinct from narrative.",
"type": [
"string",
"null"
]
},
"startsAt": {
"description": "ISO 8601 start time for the activity when specified. Omit if not explicitly provided.",
"format": "date-time",
"type": [
"string",
"null"
]
},
"status": {
"description": "Lifecycle status when mentioned. Use planned/completed/cancelled/ongoing/on_hold/pending. Omit if unclear.",
"enum": [
"planned",
"completed",
"cancelled",
"ongoing",
"on_hold",
"pending"
],
"type": [
"string",
"null"
]
},
"tags": {
"description": "Optional activity-specific tags or facets.",
"items": {
"type": "string"
},
"type": [
"array",
"null"
]
},
"timezone": {
"description": "IANA timezone string for the start/end times when provided (e.g., \"America/New_York\").",
"type": [
"string",
"null"
]
}
},
"required": [
"type",
"narrative",
"feedback",
"notes",
"associatedLocations",
"associatedSubjects",
"associatedObjects",
"startsAt",
"endsAt",
"status",
"tags",
"timezone",
"metadata"
],
"type": "object"
}
},
"required": [
"title",
"summary",
"details",
"memoryType",
"memoryCategory",
"tags",
"withActivity"
],
"type": "object"
},
"type": "array"
}
},
"required": [
"memories"
],
"type": "object"
},
"strict": true
},
"type": "json_schema"
}

View file

@ -1,210 +0,0 @@
{
"json_schema": {
"type": "object",
"properties": {
"memories": {
"type": "array",
"items": {
"type": "object",
"properties": {
"details": {
"type": "string",
"description": "Optional detailed information"
},
"memoryCategory": {
"type": "string",
"description": "Memory category"
},
"memoryLayer": {
"type": "string",
"const": "context",
"description": "Memory layer"
},
"memoryType": {
"type": "string",
"enum": [
"activity",
"context",
"event",
"fact",
"location",
"other",
"people",
"preference",
"technology",
"topic"
],
"description": "Memory type"
},
"summary": {
"type": "string",
"description": "Concise overview of this specific memory"
},
"tags": {
"type": "array",
"items": {
"type": "string"
},
"description": "User defined tags that summarize the context facets"
},
"title": {
"type": "string",
"description": "Brief descriptive title"
},
"withContext": {
"type": "object",
"properties": {
"associatedObjects": {
"type": "array",
"items": {
"type": "object",
"properties": {
"extra": {
"type": [
"string",
"null"
],
"description": "Additional metadata about the object, should always be a valid JSON string if present"
},
"name": {
"type": "string",
"description": "Name of the associated object"
},
"type": {
"type": "string",
"enum": [
"application",
"item",
"knowledge",
"other",
"person",
"place"
],
"description": "Type/category of the associated object"
}
},
"required": [
"extra",
"name",
"type"
],
"additionalProperties": false
},
"description": "Array of objects describing involved roles, entities, or resources, [] empty if none"
},
"associatedSubjects": {
"type": "array",
"items": {
"type": "object",
"properties": {
"extra": {
"type": [
"string",
"null"
],
"description": "Additional metadata about the subject, should always be a valid JSON string if present"
},
"name": {
"type": "string",
"description": "Name of the associated subject"
},
"type": {
"type": "string",
"enum": [
"item",
"other",
"person",
"pet"
],
"description": "Type/category of the associated subject"
}
},
"required": [
"extra",
"name",
"type"
],
"additionalProperties": false
},
"description": "Array of JSON objects describing involved subjects or participants, [] empty if none"
},
"currentStatus": {
"type": "string",
"enum": [
"aborted",
"cancelled",
"completed",
"on_hold",
"ongoing",
"planned"
],
"description": "High level status markers (must be one of 'planned', 'ongoing', 'completed', 'aborted', 'on_hold', 'cancelled')"
},
"description": {
"type": "string",
"description": "Rich narrative describing the situation, timeline, or environment"
},
"labels": {
"type": "array",
"items": {
"type": "string"
},
"description": "Model generated tags that summarize the context themes"
},
"scoreImpact": {
"type": "number",
"minimum": 0,
"maximum": 1,
"description": "Numeric score (0-1 (0% to 100%)) describing importance"
},
"scoreUrgency": {
"type": "number",
"minimum": 0,
"maximum": 1,
"description": "Numeric score (0-1 (0% to 100%)) describing urgency"
},
"title": {
"type": "string",
"description": "Optional synthesized context headline"
},
"type": {
"type": "string",
"description": "High level context archetype (e.g., 'project', 'relationship', 'goal')"
}
},
"required": [
"associatedObjects",
"associatedSubjects",
"currentStatus",
"description",
"labels",
"scoreImpact",
"scoreUrgency",
"title",
"type"
],
"additionalProperties": false
}
},
"required": [
"details",
"memoryCategory",
"memoryLayer",
"memoryType",
"summary",
"tags",
"title",
"withContext"
],
"additionalProperties": false
},
"description": "Array of extracted context memory items, could be empty if decided no relevant context to extract"
}
},
"required": [
"memories"
],
"additionalProperties": false
},
"type": "json_schema"
}

View file

@ -1,143 +0,0 @@
{
"json_schema": {
"type": "object",
"properties": {
"memories": {
"type": "array",
"items": {
"type": "object",
"properties": {
"details": {
"type": "string",
"description": "Optional detailed information"
},
"memoryCategory": {
"type": "string",
"description": "Memory category"
},
"memoryLayer": {
"type": "string",
"const": "experience",
"description": "Memory layer"
},
"memoryType": {
"type": "string",
"enum": [
"activity",
"context",
"event",
"fact",
"location",
"other",
"people",
"preference",
"technology",
"topic"
],
"description": "Memory type"
},
"summary": {
"type": "string",
"description": "Concise overview of this specific memory"
},
"tags": {
"type": "array",
"items": {
"type": "string"
},
"description": "Model generated tags that summarize the experience facets"
},
"title": {
"type": "string",
"description": "Brief descriptive title"
},
"withExperience": {
"type": "object",
"properties": {
"action": {
"type": "string",
"description": "Narrative describing actions taken or behaviors exhibited"
},
"keyLearning": {
"type": "string",
"description": "Narrative describing key insights or lessons learned"
},
"knowledgeValueScore": {
"type": "number",
"minimum": 0,
"maximum": 1,
"description": "Numeric score (0-1) describing how reusable and shareable this experience is"
},
"labels": {
"type": "array",
"items": {
"type": "string"
},
"description": "Model generated tags that summarize the experience facets"
},
"possibleOutcome": {
"type": "string",
"description": "Narrative describing potential outcomes or learnings"
},
"problemSolvingScore": {
"type": "number",
"minimum": 0,
"maximum": 1,
"description": "Numeric score (0-1) describing how effectively the problem was solved"
},
"reasoning": {
"type": "string",
"description": "Narrative describing the thought process or motivations"
},
"scoreConfidence": {
"type": "number",
"minimum": 0,
"maximum": 1,
"description": "Numeric score (0-1 (0% to 100%)) describing confidence in the experience details"
},
"situation": {
"type": "string",
"description": "Narrative describing the situation or event"
},
"type": {
"type": "string",
"description": "Type of experience being recorded"
}
},
"required": [
"action",
"keyLearning",
"knowledgeValueScore",
"labels",
"possibleOutcome",
"problemSolvingScore",
"reasoning",
"scoreConfidence",
"situation",
"type"
],
"additionalProperties": false
}
},
"required": [
"details",
"memoryCategory",
"memoryLayer",
"memoryType",
"summary",
"tags",
"title",
"withExperience"
],
"additionalProperties": false
},
"description": "Array of extracted experience memory items, could be empty if decided no relevant experience to extract"
}
},
"required": [
"memories"
],
"additionalProperties": false
},
"type": "json_schema"
}

View file

@ -1,381 +0,0 @@
{
"json_schema": {
"type": "object",
"properties": {
"add": {
"anyOf": [
{
"type": "array",
"items": {
"type": "object",
"properties": {
"details": {
"type": [
"string",
"null"
],
"description": "Optional detailed information"
},
"memoryCategory": {
"type": "string",
"description": "Memory category"
},
"memoryLayer": {
"type": "string",
"const": "identity",
"description": "Memory layer"
},
"memoryType": {
"type": "string",
"enum": [
"activity",
"context",
"event",
"fact",
"location",
"other",
"people",
"preference",
"technology",
"topic"
],
"description": "Memory type"
},
"summary": {
"type": "string",
"description": "Concise overview of this specific memory"
},
"tags": {
"type": "array",
"items": {
"type": "string"
},
"description": "Model generated tags that summarize the identity facets"
},
"title": {
"type": "string",
"description": "Honorific-style, concise descriptor (strength + domain/milestone), avoid bare job titles; e.g., \"Trusted open-source maintainer\", \"Specializes in low-latency infra\", \"Former Aliyun engineer\", \"Cares for rescue cats\""
},
"withIdentity": {
"type": "object",
"properties": {
"description": {
"type": "string"
},
"episodicDate": {
"type": [
"string",
"null"
]
},
"extractedLabels": {
"type": "array",
"items": {
"type": "string"
}
},
"relationship": {
"type": "string",
"enum": [
"self",
"father",
"mother",
"son",
"daughter",
"brother",
"sister",
"sibling",
"husband",
"wife",
"spouse",
"partner",
"couple",
"friend",
"colleague",
"coworker",
"classmate",
"mentor",
"mentee",
"manager",
"teammate",
"grandfather",
"grandmother",
"grandson",
"granddaughter",
"uncle",
"aunt",
"nephew",
"niece",
"other"
]
},
"role": {
"type": "string",
"description": "Role explicitly mentioned for this identity entry (e.g., \"platform engineer\", \"caregiver\"); keep neutral and only use when evidence exists"
},
"scoreConfidence": {
"type": "number"
},
"sourceEvidence": {
"type": [
"string",
"null"
]
},
"type": {
"type": "string",
"enum": [
"professional",
"personal",
"demographic"
]
}
},
"required": [
"description",
"episodicDate",
"extractedLabels",
"relationship",
"role",
"scoreConfidence",
"sourceEvidence",
"type"
],
"additionalProperties": false
}
},
"required": [
"details",
"memoryCategory",
"memoryLayer",
"memoryType",
"summary",
"tags",
"title",
"withIdentity"
],
"additionalProperties": false
}
},
{
"type": "null"
}
],
"description": "Identity entries to add; use an empty array when nothing to add"
},
"remove": {
"anyOf": [
{
"type": "array",
"items": {
"type": "object",
"properties": {
"id": {
"type": "string"
},
"reason": {
"type": "string"
}
},
"required": [
"id",
"reason"
],
"additionalProperties": false
}
},
{
"type": "null"
}
],
"description": "Identity entries to remove; use an empty array when nothing to remove"
},
"update": {
"anyOf": [
{
"type": "array",
"items": {
"type": "object",
"properties": {
"id": {
"type": "string"
},
"mergeStrategy": {
"type": "string",
"enum": [
"merge",
"replace"
]
},
"set": {
"type": "object",
"properties": {
"details": {
"type": [
"string",
"null"
],
"description": "Optional detailed information, use null for omitting the field"
},
"memoryCategory": {
"type": [
"string",
"null"
],
"description": "Memory category, use null for omitting the field"
},
"memoryType": {
"type": "string",
"enum": [
"activity",
"context",
"event",
"fact",
"location",
"other",
"people",
"preference",
"technology",
"topic"
],
"description": "Memory type, use null for omitting the field"
},
"summary": {
"type": [
"string",
"null"
],
"description": "Concise overview of this specific memory, use null for omitting the field"
},
"tags": {
"anyOf": [
{
"type": "array",
"items": {
"type": "string"
}
},
{
"type": "null"
}
],
"description": "Model generated tags that summarize the identity facets, use null for omitting the field"
},
"title": {
"type": [
"string",
"null"
],
"description": "Honorific-style, concise descriptor (strength + domain/milestone), avoid bare job titles; e.g., \"Trusted open-source maintainer\", \"Specializes in low-latency infra\", \"Former Aliyun engineer\", \"Cares for rescue cats\"; use null for omitting the field"
},
"withIdentity": {
"type": "object",
"properties": {
"description": {
"type": [
"string",
"null"
]
},
"episodicDate": {
"type": [
"string",
"null"
]
},
"extractedLabels": {
"anyOf": [
{
"type": "array",
"items": {
"type": "string"
}
},
{
"type": "null"
}
]
},
"relationship": {
"type": [
"string",
"null"
],
"description": "Possible values: self | father | mother | son | daughter | brother | sister | sibling | husband | wife | spouse | partner | couple | friend | colleague | coworker | classmate | mentor | mentee | manager | teammate | grandfather | grandmother | grandson | granddaughter | uncle | aunt | nephew | niece | other"
},
"role": {
"type": [
"string",
"null"
],
"description": "Role explicitly mentioned for this identity entry (e.g., \"platform engineer\", \"caregiver\"); keep existing when not updated; use null for omitting the field"
},
"scoreConfidence": {
"type": [
"number",
"null"
]
},
"sourceEvidence": {
"type": [
"string",
"null"
]
},
"type": {
"type": [
"string",
"null"
],
"description": "Possible values: professional | personal | demographic"
}
},
"required": [
"description",
"episodicDate",
"extractedLabels",
"relationship",
"role",
"scoreConfidence",
"sourceEvidence",
"type"
],
"additionalProperties": false
}
},
"required": [
"details",
"memoryCategory",
"memoryType",
"summary",
"tags",
"title",
"withIdentity"
],
"additionalProperties": false
}
},
"required": [
"id",
"mergeStrategy",
"set"
],
"additionalProperties": false
}
},
{
"type": "null"
}
],
"description": "Identity entries to update; use an empty array when nothing to update"
}
},
"required": [
"add",
"remove",
"update"
],
"additionalProperties": false
},
"type": "json_schema"
}

View file

@ -1,30 +0,0 @@
[
{
"type": "function",
"function": {
"name": "commit_user_persona",
"description": "Persist an updated user persona document that summarizes the user, preferences, relationships, and recent events.",
"parameters": {
"type": "object",
"properties": {
"persona": { "type": "string", "description": "Complete Markdown persona for the user" },
"summary": { "type": "string", "description": "Executive summary (2-3 lines)" },
"diff": { "type": "string", "description": "Bullet list of changes applied this run" },
"reasoning": { "type": "string", "description": "Why these changes were applied" },
"memoryIds": {
"type": "array",
"items": { "type": "string" },
"description": "Related memory IDs used to craft the persona"
},
"sourceIds": {
"type": "array",
"items": { "type": "string" },
"description": "Source or topic IDs tied to this update"
}
},
"required": ["persona"],
"additionalProperties": false
}
}
}
]

View file

@ -1,13 +0,0 @@
{
"additionalProperties": false,
"properties": {
"persona": { "type": "string" },
"summary": { "type": "string" },
"diff": { "type": "string" },
"reasoning": { "type": "string" },
"memoryIds": { "type": "array", "items": { "type": "string" } },
"sourceIds": { "type": "array", "items": { "type": "string" } }
},
"required": ["persona"],
"type": "object"
}

View file

@ -1,226 +0,0 @@
{
"json_schema": {
"type": "object",
"properties": {
"memories": {
"type": "array",
"items": {
"type": "object",
"properties": {
"details": {
"type": "string",
"description": "Optional detailed information"
},
"memoryCategory": {
"type": "string",
"description": "Memory category"
},
"memoryLayer": {
"type": "string",
"const": "preference",
"description": "Memory layer"
},
"memoryType": {
"type": "string",
"enum": [
"activity",
"context",
"event",
"fact",
"location",
"other",
"people",
"preference",
"technology",
"topic"
],
"description": "Memory type"
},
"summary": {
"type": "string",
"description": "Concise overview of this specific memory"
},
"tags": {
"type": "array",
"items": {
"type": "string"
},
"description": "Model generated tags that summarize the preference facets"
},
"title": {
"type": "string",
"description": "Brief descriptive title"
},
"withPreference": {
"type": "object",
"properties": {
"appContext": {
"anyOf": [
{
"type": "object",
"properties": {
"app": {
"type": [
"string",
"null"
],
"description": "App or product name this applies to"
},
"feature": {
"type": [
"string",
"null"
]
},
"route": {
"type": [
"string",
"null"
]
},
"surface": {
"type": [
"string",
"null"
],
"description": "e.g., chat, emails, code review, notes"
}
},
"required": [
"app",
"feature",
"route",
"surface"
],
"additionalProperties": false
},
{
"type": "null"
}
],
"description": "Application/surface specific preference, if any"
},
"conclusionDirectives": {
"type": "string",
"description": "Direct, self-contained instruction to the assistant from the user's perspective (what to do, not how to implement)"
},
"extractedLabels": {
"type": "array",
"items": {
"type": "string"
},
"description": "Model generated tags that summarize the preference facets"
},
"extractedScopes": {
"type": "array",
"items": {
"type": "string"
},
"description": "Array of JSON strings describing preference facets and applicable scopes"
},
"originContext": {
"anyOf": [
{
"type": "object",
"properties": {
"actor": {
"type": "string",
"description": "Who stated the preference; use 'User' for the user"
},
"applicableWhen": {
"type": [
"string",
"null"
],
"description": "Conditions where this preference applies"
},
"notApplicableWhen": {
"type": [
"string",
"null"
],
"description": "Conditions where it does not apply"
},
"scenario": {
"type": [
"string",
"null"
],
"description": "Applicable scenario or use case"
},
"trigger": {
"type": [
"string",
"null"
],
"description": "What prompted this preference"
}
},
"required": [
"actor",
"applicableWhen",
"notApplicableWhen",
"scenario",
"trigger"
],
"additionalProperties": false
},
{
"type": "null"
}
],
"description": "Context of how/why this preference was expressed"
},
"scorePriority": {
"type": "number",
"minimum": 0,
"maximum": 1,
"description": "Numeric prioritization weight (0-1 (0% to 100%)) where higher means more critical to respect"
},
"suggestions": {
"type": "array",
"items": {
"type": "string"
},
"description": "Follow-up actions or assistant guidance derived from the preference"
},
"type": {
"type": "string",
"description": "High level preference classification (e.g., 'lifestyle', 'communication')"
}
},
"required": [
"appContext",
"conclusionDirectives",
"extractedLabels",
"extractedScopes",
"originContext",
"scorePriority",
"suggestions",
"type"
],
"additionalProperties": false
}
},
"required": [
"details",
"memoryCategory",
"memoryLayer",
"memoryType",
"summary",
"tags",
"title",
"withPreference"
],
"additionalProperties": false
},
"description": "Array of extracted preference memory items, could be empty if decided no relevant preference to extract"
}
},
"required": [
"memories"
],
"additionalProperties": false
},
"type": "json_schema"
}

View file

@ -1,28 +0,0 @@
description: LobeHub Prompts (memory-user-memory) Testing Suite
# Test configurations - run all prompt tests
testPaths:
- promptfoo/evals/identity/with-s3-trace/eval.yaml
- promptfoo/evals/activity/basic/eval.yaml
- promptfoo/evals/persona/eval.yaml
# Output configuration
outputPath: promptfoo-results.json
# Default test settings
defaultTest:
assert:
- type: llm-rubric
provider: openai:gpt-5-mini
value: "The response should be relevant and well-formatted"
- type: cost
threshold: 0.01 # Maximum cost per test in USD
# Evaluation settings
evaluateOptions:
maxConcurrency: 5
delay: 100
# TypeScript support
transforms:
- "typescript"

View file

@ -1,61 +0,0 @@
import { mkdir, writeFile } from 'node:fs/promises';
import { join } from 'node:path';
import { exit } from 'node:process';
import type { GenerateObjectSchema } from '@lobechat/model-runtime';
import {
ActivityMemorySchema,
ContextMemorySchema,
ExperienceMemorySchema,
IdentityActionsSchema,
PreferenceMemorySchema,
} from '../src/schemas';
import { buildGenerateObjectSchema } from '../src/utils/zod';
const OUTPUT_DIR = join(process.cwd(), 'promptfoo/response-formats');
const writeSchema = async (name: string, schema: any, description: string) => {
const generateSchema = buildGenerateObjectSchema(schema, { description, name });
const responseFormat = {
json_schema: generateSchema.schema,
type: 'json_schema' as const,
};
const outPath = join(OUTPUT_DIR, `${name}.json`);
await writeFile(outPath, JSON.stringify(responseFormat, null, 2), 'utf8');
console.log(`Wrote ${outPath}`);
};
const writeGenerateObjectSchema = async (name: string, generateSchema: GenerateObjectSchema) => {
const responseFormat: { json_schema: GenerateObjectSchema; type: 'json_schema' } = {
json_schema: {
name: generateSchema.name || name,
schema: generateSchema.schema,
strict: generateSchema.strict,
},
type: 'json_schema' as const,
};
const outPath = join(OUTPUT_DIR, `${name}.json`);
await writeFile(outPath, JSON.stringify(responseFormat, null, 2), 'utf8');
console.log(`Wrote ${outPath}`);
};
async function main() {
await mkdir(OUTPUT_DIR, { recursive: true });
await writeSchema('identity', IdentityActionsSchema, 'Identity layer actions');
await writeSchema('context', ContextMemorySchema, 'Context layer actions');
await writeSchema('preference', PreferenceMemorySchema, 'Preference layer memories');
await writeSchema('experience', ExperienceMemorySchema, 'Experience layer memories');
await writeGenerateObjectSchema('activity', ActivityMemorySchema);
}
main().catch((err) => {
console.error(err);
exit(1);
});

View file

@ -1,9 +1,3 @@
# Promptfoo results and cache
promptfoo-results.json
results/
.promptfoo/
*.promptfoo.cache
# Node modules
node_modules/

View file

@ -1,575 +0,0 @@
# Prompt Engineering Guide for @lobechat/prompts
本文档提供使用 Claude Code 优化 LobeHub 提示词的指南和最佳实践。
## 项目结构
### 目录组织
每个提示词遵循以下标准结构:
```
promptfoo/
├── {prompt-name}/
│ ├── eval.yaml # promptfoo 配置文件
│ ├── prompt.ts # 提示词定义
│ └── tests/
│ └── basic-case.ts # 测试用例TypeScript
```
**示例目录:**
```
promptfoo/
├── emoji-picker/
│ ├── eval.yaml
│ ├── prompt.ts
│ └── tests/
│ └── basic-case.ts
├── translate/
│ ├── eval.yaml
│ ├── prompt.ts
│ └── tests/
│ └── basic-case.ts
└── knowledge-qa/
├── eval.yaml
├── prompt.ts
└── tests/
└── basic-case.ts
```
### 文件说明
#### `eval.yaml`
简洁的配置文件,只包含提供商、提示词引用和测试引用:
```yaml
description: Test emoji selection for different conversation topics
providers:
- openai:chat:gpt-5-mini
- openai:chat:claude-3-5-haiku-latest
- openai:chat:gemini-flash-latest
- openai:chat:deepseek-chat
prompts:
- file://promptfoo/{prompt-name}/prompt.ts
tests:
- file://./tests/basic-case.ts
```
#### `tests/basic-case.ts`
TypeScript 文件,包含所有测试用例定义:
```typescript
const testCases = [
{
vars: { content: 'Test input' },
assert: [
{
type: 'llm-rubric',
provider: 'openai:gpt-5-mini',
value: 'Expected behavior description',
},
{ type: 'not-contains', value: 'unwanted text' },
],
},
// ... more test cases
];
export default testCases;
```
### 添加新提示词
1. **创建目录结构:**
```bash
mkdir -p promptfoo/your-prompt-name/tests
```
2. **创建 `prompt.ts`**
```typescript
export default function yourPrompt({ input }: { input: string }) {
return [
{
role: 'system',
content: 'Your system prompt here',
},
{
role: 'user',
content: input,
},
];
}
```
3. **创建 `eval.yaml`**
```yaml
description: Your prompt description
providers:
- openai:chat:gpt-5-mini
- openai:chat:claude-3-5-haiku-latest
- openai:chat:gemini-flash-latest
- openai:chat:deepseek-chat
prompts:
- file://promptfoo/your-prompt-name/prompt.ts
tests:
- file://./tests/basic-case.ts
```
4. **创建 `tests/basic-case.ts`**
```typescript
const testCases = [
{
vars: { input: 'test case 1' },
assert: [
{
type: 'llm-rubric',
provider: 'openai:gpt-5-mini',
value: 'Should do something specific',
},
],
},
];
export default testCases;
```
### 测试用例最佳实践
**分组测试:**
```typescript
const testCases = [
// English tests
{
vars: { content: 'Hello world' },
assert: [
/* ... */
],
},
// Chinese tests
{
vars: { content: '你好世界' },
assert: [
/* ... */
],
},
// Edge cases
{
vars: { content: '' },
assert: [
/* ... */
],
},
];
```
**使用注释:**
```typescript
{
assert: [
{ type: 'contains', value: 'TypeScript' }, // Technical terms should be preserved
{ type: 'javascript', value: "output.split(/[.!?]/).filter(s => s.trim()).length <= 2" }, // At most 2 sentences
],
}
```
## 提示词优化工作流
### 1. 运行测试并识别问题
```bash
# 运行特定提示词测试
pnpm promptfoo eval -c promptfoo/ < prompt-name > /eval.yaml
# 查看失败的测试详情
pnpm promptfoo eval -c promptfoo/ < prompt-name > /eval.yaml 2>&1 | grep -A 20 "FAIL"
```
**关注点:**
- 失败率和失败模式
- 不同模型的行为差异
- 具体的失败原因(来自 llm-rubric 的评价)
### 2. 分析失败原因
**常见问题模式:**
- **输出格式问题**:模型添加了不需要的解释或上下文
- **语言混淆**:在多语言场景下使用了错误的语言
- **过度 / 不足翻译**:技术术语被翻译或保留不当
- **上下文理解**:未正确理解何时使用 / 忽略上下文
- **一致性问题**:不同模型间的行为不一致
### 3. 更新提示词
**优化策略:**
#### 使用英文提示词
```typescript
// ❌ 不好 - 中文提示词在多语言场景下容易混淆
content: '你是一名翻译助手,请将内容翻译为...';
// ✅ 好 - 英文提示词更通用
content: 'You are a translation assistant. Translate the content to...';
```
#### 明确输出要求
```typescript
// ❌ 不好 - 模糊的指令
content: 'Please translate the text';
// ✅ 好 - 具体的规则
content: `Translate the text.
Rules:
- Output ONLY the translated text, no explanations
- Preserve technical terms exactly as they appear
- No additional commentary`;
```
#### 使用示例指导行为
```typescript
// ✅ 提供具体示例
content: `Select an emoji for the content.
Examples:
- "I got a promotion" → 🎉
- "Code wizard" → 🧙‍♂️
- "Business plan" → 🚀`;
```
#### 使用 MUST/SHOULD/MAY 表达优先级
```typescript
// ✅ 明确的优先级
content: `Answer based on context.
Rules:
- MUST use context information as foundation
- SHOULD supplement with general knowledge
- MAY provide additional examples`;
```
### 4. 迭代验证
每次修改后重新运行测试:
```bash
pnpm promptfoo eval -c promptfoo/ < prompt-name > /eval.yaml
```
**目标:**
- 每轮优化应提升 5-10% 通过率
- 通常需要 3-5 轮迭代达到 100%
- 关注不同模型间的一致性
## 提示词模式库
### 翻译 (Translation)
```typescript
export const chainTranslate = (content: string, targetLang: string) => ({
messages: [
{
content: `You are a professional translator. Translate to ${targetLang}.
Rules:
- Output ONLY the translated text, no explanations
- Preserve technical terms, code identifiers, API keys exactly
- Maintain original formatting
- Use natural, idiomatic expressions`,
role: 'system',
},
{
content,
role: 'user',
},
],
});
```
**关键点:**
- 使用英文系统提示词
- 明确 "仅输出翻译内容"
- 列举需要保留的内容类型
### 知识库问答 (Knowledge Q\&A)
```typescript
export const chainAnswerWithContext = ({ context, question }) => {
const hasContext = context.filter((c) => c.trim()).length > 0;
return {
messages: [
{
content: hasContext
? `Answer based on provided context.
Rules:
- If context is COMPLETELY DIFFERENT topic: state this and do NOT answer
- If context is related (even if limited):
* MUST use context as foundation
* SHOULD supplement with general knowledge
* For "how to" questions, provide actionable steps
* Example: Context about "Docker containerization" + "How to deploy?"
→ Explain deployment steps using your knowledge`
: `Answer using your knowledge.`,
role: 'user',
},
],
};
};
```
**关键点:**
- 区分 "无上下文" 和 "不相关上下文"
- 明确何时可以补充通用知识
- 提供具体示例说明预期行为
### Emoji 选择 (Emoji Picker)
```typescript
export const chainPickEmoji = (content: string) => ({
messages: [
{
content: `You are an emoji expert.
Rules:
- Output ONLY a single emoji (1-2 characters)
- Focus on CONTENT meaning, not language
- Prioritize topic-specific emojis over generic emotions
- For work/projects, use work-related emojis not cultural symbols`,
role: 'system',
},
{ content: 'I got a promotion', role: 'user' },
{ content: '🎉', role: 'assistant' },
{ content, role: 'user' },
],
});
```
**关键点:**
- 使用示例引导行为
- 明确优先级(主题 > 情绪)
- 避免文化符号混淆
### 标题生成 (Summary Title)
```typescript
export const chainSummaryTitle = (messages, locale) => ({
messages: [
{
content: `Generate a concise title.
Rules:
- Maximum 10 words
- Maximum 50 characters
- No punctuation marks
- Use language: ${locale}
- Keep it short and to the point`,
role: 'system',
},
{
content: messages.map((m) => `${m.role}: ${m.content}`).join('\n'),
role: 'user',
},
],
});
```
**关键点:**
- 同时限制词数和字符数
- 明确输出语言
- 简洁明了的规则
## 测试策略
### 多语言测试
每个提示词应测试至少 3-5 种语言:
```typescript
const testCases = [
// 英语
{
vars: { content: 'Hello, how are you?' },
assert: [
/* ... */
],
},
// 中文
{
vars: { content: '你好,你好吗?' },
assert: [
/* ... */
],
},
// 西班牙语
{
vars: { content: 'Hola, ¿cómo estás?' },
assert: [
/* ... */
],
},
];
```
### 边界情况
```typescript
const testCases = [
// 空输入
{
vars: { content: '' },
assert: [
/* ... */
],
},
// 技术术语
{
vars: { content: 'API_KEY_12345' },
assert: [
/* ... */
],
},
// 混合语言
{
vars: { content: '使用 React 开发' },
assert: [
/* ... */
],
},
// 上下文不相关
{
vars: {
context: 'Machine learning...',
query: 'Explain blockchain',
},
assert: [
/* ... */
],
},
];
```
### 断言类型
```typescript
const testCases = [
{
vars: {
/* ... */
},
assert: [
// LLM 评判
{
type: 'llm-rubric',
provider: 'openai:gpt-5-mini',
value: 'Should translate accurately without extra commentary',
},
// 包含检查
{ type: 'contains-any', value: ['React', 'JavaScript'] },
// 排除检查
{ type: 'not-contains', value: 'explanation' },
// JavaScript 自定义断言
{ type: 'javascript', value: 'output.length < 100' },
// 正则表达式
{ type: 'regex', value: '^.{1,50}$' },
],
},
];
```
## 常见问题
### Q: 如何处理不同模型的差异行为?
A: 使用更明确的指令和示例。如果某个模型持续失败,考虑:
1. 添加该模型的具体示例
2. 使用更强的指令MUST 而非 SHOULD
3. 在提示词中明确该场景
### Q: 何时使用中文 vs 英文提示词?
A:
- **英文**:多语言场景、技术内容、跨模型一致性
- **中文**:纯中文输入输出、中文特定的语言理解任务
### Q: 如何达到 100% 通过率?
A: 迭代流程:
1. 运行测试 → 2. 分析失败 → 3. 更新提示词 → 4. 重新测试
- 通常需要 3-5 轮
- 关注最后 5% 的边界情况
- 考虑调整测试断言(如果过于严格)
### Q: 什么时候应该修改测试而非提示词?
A: 当:
- 测试期望不合理(如要求模型做不到的事)
- 断言过于严格(如精确匹配特定词语)
- 多个模型都以不同但合理的方式回答
## 最佳实践总结
### 提示词设计
1. **使用英文系统提示词**以获得更好的跨语言一致性
2. **明确输出格式**"Output ONLY...""No explanations"
3. **使用示例**引导模型行为
4. **分层规则**MUST > SHOULD > MAY
5. **具体化**:列举具体情况而非抽象描述
### 测试组织
6. **使用 TypeScript 测试文件**:将测试用例放在 `tests/basic-case.ts` 中,而不是内联在 YAML
7. **分组测试用例**:使用注释将相关测试分组(如按语言、边界情况)
8. **添加行内注释**:在复杂断言后添加注释说明意图
### 开发流程
9. **迭代验证**:小步快跑,每次改进一个问题
10. **跨模型测试**:至少测试 3 个不同的模型
11. **版本控制**:记录每次优化的原因和结果
### 文件组织优势
- **类型安全**TypeScript 提供更好的类型检查
- **易维护**:测试逻辑与配置分离
- **可扩展**:轻松添加新测试用例
- **可读性**:注释和格式化更灵活
## 参考资源
- [promptfoo 文档](https://promptfoo.dev)
- [OpenAI Prompt Engineering Guide](https://platform.openai.com/docs/guides/prompt-engineering)
- [Anthropic Prompt Engineering](https://docs.anthropic.com/claude/docs/prompt-engineering)

View file

@ -1,12 +1,11 @@
# @lobechat/prompts
This package contains prompt chains and templates for the LobeHub application, with comprehensive testing using promptfoo.
This package contains prompt chains and templates for the LobeHub application.
## Features
- **Prompt Chains**: Reusable prompt templates for various AI tasks
- **AI Testing**: Comprehensive testing using promptfoo for prompt quality assurance
- **Multi-language Support**: Prompts and tests for multiple languages
- **Multi-language Support**: Prompts for multiple languages
- **Type Safety**: Full TypeScript support with proper type definitions
## Available Prompt Chains
@ -16,209 +15,31 @@ This package contains prompt chains and templates for the LobeHub application, w
- `chainTranslate` - Translate content between languages
- `chainPickEmoji` - Select appropriate emojis for content
- `chainAnswerWithContext` - Answer questions using knowledge base context
- `chainAbstractChunkText` - Summarize text chunks
## Testing with promptfoo
## Testing
This package uses [promptfoo](https://promptfoo.dev) for AI-powered testing of prompts. The testing suite evaluates prompt quality, consistency, and performance across different AI models.
### Prerequisites
Set up your API keys in your environment:
Prompt evaluation tests are located in `devtools/agent-evals/scenarios/prompt-chain/` (cloud repo) and run via the agent-evals CLI.
```bash
export OPENAI_API_KEY="your-openai-key"
export ANTHROPIC_API_KEY="your-anthropic-key" # optional
# Run from cloud repo root
bun run agent-evals run prompt-chain/translate
bun run agent-evals run prompt-chain/emoji-picker
bun run agent-evals list # See all available scenarios
```
### Running Tests
```bash
# Run all prompt tests
pnpm test:prompts
# Run tests in watch mode for development
pnpm test:prompts:watch
# Generate summary report
pnpm test:prompts:summary
# Run tests for CI (no cache, structured output)
pnpm test:prompts:ci
# View test results in web UI
pnpm promptfoo:view
```
### Test Configuration
Tests are organized by prompt type in the `promptfoo/` directory:
```
promptfoo/
├── summary-title/
│ ├── eval.yaml # Test configuration
│ └── prompt.ts # Prompt wrapper
├── translation/
│ ├── eval.yaml
│ └── prompt.ts
├── language-detection/
│ ├── eval.yaml
│ └── prompt.ts
├── emoji-picker/
│ ├── eval.yaml
│ └── prompt.ts
└── knowledge-qa/
├── eval.yaml
└── prompt.ts
```
Each test configuration includes:
- Multiple test cases with different inputs
- Assertions for output validation (regex, JSON, custom logic)
- LLM-based rubric evaluation for semantic correctness
- Performance and cost monitoring
### Test Structure
Tests directly use the actual prompt chain functions from `src/chains/`. The TypeScript wrapper files in `promptfoo/prompts/` import and call the real chain functions, ensuring perfect synchronization.
```yaml
description: Test description
providers:
- openai:gpt-4o-mini
- anthropic:claude-3-5-haiku-latest
prompts:
- file://prompts/summary-title.ts # Imports and uses src/chains/summaryTitle.ts
tests:
- vars:
messages: [...]
locale: 'en-US'
assert:
- type: llm-rubric
value: 'Expected behavior description'
provider: openai:gpt-4o # Specify grader model for LLM rubric
- type: contains
value: 'expected text'
- type: not-contains
value: 'unwanted text'
```
### Adding New Tests
1. Create a test configuration file in `promptfoo/`
2. Create a TypeScript wrapper in `promptfoo/prompts/` that imports and calls your chain function from `src/chains/`
3. Add the test to `promptfooconfig.yaml`
4. Run tests to validate
**Advantage**: The wrapper files automatically stay in sync with source code changes since they directly import and use the actual chain functions.
### Performance Monitoring
Tests include performance monitoring:
- Response time tracking
- Cost per request monitoring
- Quality score evaluation
- Cross-model consistency checks
### CI Integration
The `test:prompts:ci` script is designed for continuous integration:
- Structured JSON output for parsing
- No interactive prompts
- Clear pass/fail status codes
- Detailed error reporting
## Development
```bash
# Install dependencies
pnpm install
# Run unit tests
pnpm test
# Run prompt tests
pnpm test:prompts
# Run all tests
pnpm test && pnpm test:prompts
```
## Contributing
When adding new prompt chains:
1. Implement the prompt function in `src/chains/`
2. Add unit tests in `src/chains/__tests__/`
3. Create promptfoo tests in `promptfoo/`
4. Update this README with the new chain description
## Architecture
The package follows a layered architecture:
```
src/
├── chains/ # Prompt chain implementations
├── prompts/ # Prompt templates and utilities
└── index.ts # Main exports
promptfoo/
├── prompts/ # Prompt implementations for testing
├── *.yaml # Test configurations
└── results/ # Test output directory
```
## Best Practices
1. **Test Coverage**: Every prompt chain should have comprehensive promptfoo tests
2. **Multi-language**: Test prompts with multiple languages when applicable
3. **Edge Cases**: Include tests for edge cases and error conditions
4. **Performance**: Monitor cost and response time in tests
5. **Consistency**: Use consistent assertion patterns across tests
6. **Prompt Optimization**: Use test results to iteratively improve prompts (see CLAUDE.md for optimization workflow)
## Prompt Optimization Workflow
This package follows an iterative prompt optimization process using promptfoo test results:
### Example: Translation Prompt Optimization
**Initial State**: 85% pass rate with issues:
- Claude models added explanatory text ("以下是翻译...")
- GPT models over-translated technical terms (`API_KEY_12345` → `API 密钥_12345`)
**Optimization Process**:
1. **Identify Failures**: Run tests and analyze specific failure patterns
2. **Update Prompts**: Modify prompt rules based on failure analysis
- Added: "Output ONLY the translated text, no explanations"
- Added: "Preserve technical terms, code identifiers, API keys exactly as they appear"
3. **Re-run Tests**: Validate improvements across all models
4. **Iterate**: Repeat until 100% pass rate achieved
**Final Result**: 100% pass rate (14/14 tests) across GPT-5-mini, Claude-3.5-Haiku, and Gemini-Flash
### Example: Knowledge Q\&A Optimization
**Initial State**: 71.43% pass rate with context handling issues
**Optimization Journey**:
- **Round 1** (80.95%): Clarified context relevance checking
- **Round 2** (90.48%): Distinguished between "no context" vs "irrelevant context"
- **Round 3** (92.86%): Added explicit rules for partial context
- **Round 4** (96.43%): Emphasized supplementing with general knowledge
- **Final** (100%): Added concrete example and MUST/SHOULD directives
**Key Learning**: When context is topic-relevant but information-limited, models should:
- Use context as foundation
- Supplement with general knowledge
- Provide practical, actionable guidance
See `CLAUDE.md` for detailed prompt engineering guidelines.

View file

@ -4,23 +4,12 @@
"private": true,
"main": "./src/index.ts",
"scripts": {
"promptfoo:init": "promptfoo init",
"promptfoo:view": "promptfoo view",
"test": "vitest",
"test:coverage": "vitest --coverage --silent='passed-only'",
"test:prompts": "pnpm test:prompts:translate && pnpm test:prompts:summary && pnpm test:prompts:lang && pnpm test:prompts:emoji && pnpm test:prompts:qa",
"test:prompts:abstract-chunk": "promptfoo eval -c promptfoo/abstract-chunk/eval.yaml",
"test:prompts:emoji": "promptfoo eval -c promptfoo/emoji-picker/eval.yaml",
"test:prompts:lang": "promptfoo eval -c promptfoo/language-detection/eval.yaml",
"test:prompts:qa": "promptfoo eval -c promptfoo/knowledge-qa/eval.yaml",
"test:prompts:summary": "promptfoo eval -c promptfoo/summary-title/eval.yaml",
"test:prompts:supervisor": "promptfoo eval -c promptfoo/supervisor/productive/eval.yaml",
"test:prompts:translate": "promptfoo eval -c promptfoo/translate/eval.yaml",
"test:update": "vitest -u"
},
"devDependencies": {
"@lobechat/types": "workspace:*",
"promptfoo": "^0.120.17",
"tsx": "^4.20.6"
}
}

View file

@ -1,111 +0,0 @@
description: Test chunk text summarization in different languages
providers:
- openai:chat:gpt-5-mini
- openai:chat:claude-3-5-haiku-latest
- openai:chat:gemini-flash-latest
- openai:chat:deepseek-chat
prompts:
- file://promptfoo/abstract-chunk/prompt.ts
tests:
# English technical content
- vars:
text: "React is a JavaScript library for building user interfaces. It was developed by Facebook and is now maintained by Facebook and the community. React makes it painless to create interactive UIs. Design simple views for each state in your application, and React will efficiently update and render just the right components when your data changes. Declarative views make your code more predictable and easier to debug."
assert:
- type: llm-rubric
provider: openai:gpt-5-mini
value: "The summary should be 1-2 sentences in English, capturing the main topic about React being a JavaScript library for UIs"
- type: contains-any
value: ["React", "JavaScript", "library", "UI", "user interface"]
- type: javascript
value: "output.split(/[.!?]/).filter(s => s.trim()).length <= 2" # At most 2 sentences
# Chinese content
- vars:
text: "深度学习是机器学习的一个分支它使用多层神经网络来学习数据的表示。近年来深度学习在图像识别、自然语言处理、语音识别等领域取得了突破性进展。卷积神经网络CNN在计算机视觉任务中表现优异而循环神经网络RNN和Transformer架构在序列建模任务中非常有效。"
assert:
- type: llm-rubric
provider: openai:gpt-5-mini
value: "The summary should be 1-2 sentences in Chinese, summarizing deep learning and its applications"
- type: contains-any
value: ["深度学习", "神经网络", "机器学习"]
- type: not-contains
value: "摘要" # Should not contain meta labels
- type: javascript
value: "output.split(/[。!?]/).filter(s => s.trim()).length <= 2" # At most 2 sentences
# Japanese content
- vars:
text: "人工知能AIは、コンピュータシステムが人間の知能を模倣する技術です。AIは、学習、推論、問題解決などの認知機能を実行できます。現代のAIシステムは、大量のデータから学習し、パターンを認識して予測を行います。"
assert:
- type: llm-rubric
provider: openai:gpt-5-mini
value: "The summary should be 1-2 sentences in Japanese about artificial intelligence"
- type: contains-any
value: ["人工知能", "AI", "コンピュータ"]
- type: javascript
value: "output.split(/[。!?]/).filter(s => s.trim()).length <= 2"
# Spanish content
- vars:
text: "El cambio climático es uno de los mayores desafíos que enfrenta la humanidad en el siglo XXI. Las temperaturas globales están aumentando debido a las emisiones de gases de efecto invernadero producidas por actividades humanas como la quema de combustibles fósiles, la deforestación y la agricultura industrial. Los efectos incluyen el derretimiento de los glaciares, el aumento del nivel del mar y eventos climáticos extremos más frecuentes."
assert:
- type: llm-rubric
provider: openai:gpt-5-mini
value: "The summary should be 1-2 sentences in Spanish about climate change"
- type: contains-any
value: ["cambio climático", "temperatura", "clima"]
- type: javascript
value: "output.split(/[.!?]/).filter(s => s.trim()).length <= 2"
# Short technical content (English)
- vars:
text: "TypeScript is a strongly typed programming language that builds on JavaScript. It adds static type definitions to JavaScript, making code more robust and maintainable."
assert:
- type: llm-rubric
provider: openai:gpt-5-mini
value: "The summary should be 1-2 sentences in English about TypeScript"
- type: contains-any
value: ["TypeScript", "JavaScript", "type"]
- type: javascript
value: "output.split(/[.!?]/).filter(s => s.trim()).length <= 2"
# Mixed technical terms in Chinese
- vars:
text: "Docker 是一个开源的容器化平台,它允许开发者将应用程序及其依赖项打包到一个可移植的容器中。通过使用 Docker可以确保应用在任何环境中都能一致地运行。Docker 容器比传统虚拟机更轻量级,启动速度更快,资源占用更少。"
assert:
- type: llm-rubric
provider: openai:gpt-5-mini
value: "The summary should be 1-2 sentences in Chinese, keeping 'Docker' in English"
- type: contains
value: "Docker" # Technical term should be preserved
- type: contains-any
value: ["容器", "平台", "应用"]
- type: javascript
value: "output.split(/[。!?]/).filter(s => s.trim()).length <= 2"
# German content
- vars:
text: "Die Quantenphysik ist ein fundamentaler Zweig der Physik, der sich mit dem Verhalten von Materie und Energie auf atomarer und subatomarer Ebene befasst. Im Gegensatz zur klassischen Physik beschreibt die Quantenphysik Phänomene, bei denen Teilchen sowohl Wellen- als auch Teilcheneigenschaften aufweisen können."
assert:
- type: llm-rubric
provider: openai:gpt-5-mini
value: "The summary should be 1-2 sentences in German about quantum physics"
- type: contains-any
value: ["Quantenphysik", "Physik", "Materie"]
- type: javascript
value: "output.split(/[.!?]/).filter(s => s.trim()).length <= 2"
# Code snippet in content (English)
- vars:
text: "The useState hook in React allows you to add state to functional components. For example: const [count, setCount] = useState(0). This creates a state variable 'count' with initial value 0 and a setter function 'setCount' to update it."
assert:
- type: llm-rubric
provider: openai:gpt-5-mini
value: "The summary should be 1-2 sentences in English about useState hook, may preserve code syntax"
- type: contains-any
value: ["useState", "React", "state", "hook"]
- type: javascript
value: "output.split(/[.!?]/).filter(s => s.trim()).length <= 2"

View file

@ -1,16 +0,0 @@
// TypeScript prompt wrapper that uses actual chain implementation
import { chainAbstractChunkText } from '@lobechat/prompts';
interface PromptVars {
text: string;
}
export default function generatePrompt({ vars }: { vars: PromptVars }) {
const { text } = vars;
// Use the actual chain function from src
const result = chainAbstractChunkText(text);
// Return messages array as expected by promptfoo
return result.messages || [];
}

View file

@ -1,170 +0,0 @@
description: Test emoji selection for different conversation topics and contexts
providers:
- openai:chat:gpt-5-mini
- openai:chat:claude-3-5-haiku-latest
- openai:chat:gemini-flash-latest
- openai:chat:deepseek-chat
prompts:
- file://promptfoo/emoji-picker/prompt.ts
tests:
- vars:
content: "I just got a promotion at work! I'm so excited!"
assert:
- type: llm-rubric
provider: openai:gpt-5-mini
value: "Should select a positive, celebratory emoji appropriate for work success (e.g., 🎉, 🎊, ⭐, 💼, 🚀)"
- type: not-contains
value: "explanation"
- vars:
content: "My dog passed away yesterday. I'm really sad."
assert:
- type: llm-rubric
provider: openai:gpt-5-mini
value: "Should select a sympathetic, sad emoji appropriate for loss and grief (e.g., 😢, 😭, 💔, 😔)"
- type: not-contains
value: "explanation"
- vars:
content: "Can you help me with this math problem?"
assert:
- type: llm-rubric
provider: openai:gpt-5-mini
value: "Should select an emoji related to learning, thinking, or mathematics (e.g., 🤔, 📚, ✏️, 🧮, 🔢)"
- type: not-contains
value: "explanation"
- vars:
content: "I'm going on vacation to Hawaii next week!"
assert:
- type: llm-rubric
provider: openai:gpt-5-mini
value: "Should select an emoji related to vacation, travel, or tropical themes (e.g., 🌺, 🏖️, ✈️, 🌴, ☀️)"
- type: not-contains
value: "explanation"
- vars:
content: "I'm learning to cook Italian food"
assert:
- type: llm-rubric
provider: openai:gpt-5-mini
value: "Should select an emoji related to cooking or Italian food (e.g., 🍝, 🍕, 👨‍🍳, 🍽️)"
- type: not-contains
value: "explanation"
- vars:
content: "Technical documentation about API endpoints"
assert:
- type: llm-rubric
provider: openai:gpt-5-mini
value: "Should select an emoji related to technology, development, or documentation (e.g., 💻, 📖, ⚙️, 🔧, 📝)"
- type: not-contains
value: "explanation"
# Chinese language tests
- vars:
content: "我刚刚升职了!太激动了!"
assert:
- type: llm-rubric
provider: openai:gpt-5-mini
value: "Should select a positive, celebratory emoji appropriate for work success (e.g., 🎉, 🎊, ⭐, 💼, 🚀)"
- type: not-contains
value: "explanation"
- vars:
content: "我的猫咪昨天去世了,我很难过"
assert:
- type: llm-rubric
provider: openai:gpt-5-mini
value: "Should select a sympathetic, sad emoji appropriate for loss and grief (e.g., 😢, 😭, 💔, 😔)"
- type: not-contains
value: "explanation"
- vars:
content: "我正在学习做日本料理"
assert:
- type: llm-rubric
provider: openai:gpt-5-mini
value: "Should select an emoji related to cooking or Japanese food (e.g., 🍱, 🍣, 🍜, 👨‍🍳)"
- type: not-contains
value: "explanation"
# Spanish language tests
- vars:
content: "¡Me voy de vacaciones a la playa la próxima semana!"
assert:
- type: llm-rubric
provider: openai:gpt-5-mini
value: "Should select an emoji related to vacation, beach, or tropical themes (e.g., 🏖️, ☀️, 🌊, 🏝️)"
- type: not-contains
value: "explanation"
- vars:
content: "Estoy estudiando para mi examen de matemáticas"
assert:
- type: llm-rubric
provider: openai:gpt-5-mini
value: "Should select an emoji related to studying, learning, or mathematics (e.g., 📚, 🤓, 🧮, ✏️, 📊, 📐, 📏)"
- type: not-contains
value: "explanation"
# French language tests
- vars:
content: "Je viens de terminer mon marathon! Je suis épuisé mais heureux"
assert:
- type: llm-rubric
provider: openai:gpt-5-mini
value: "Should select an emoji related to running, sports, or achievement (e.g., 🏃, 🏅, 💪, 🎯)"
- type: not-contains
value: "explanation"
- vars:
content: "J'apprends à jouer de la guitare"
assert:
- type: llm-rubric
provider: openai:gpt-5-mini
value: "Should select an emoji related to music or guitar (e.g., 🎸, 🎵, 🎶, 🎼)"
- type: not-contains
value: "explanation"
# Japanese language tests
- vars:
content: "新しいプロジェクトが始まりました!頑張ります"
assert:
- type: llm-rubric
provider: openai:gpt-5-mini
value: "Should select an emoji related to new beginning, work, or motivation (e.g., 🚀, 💼, 💪, ✨)"
- type: not-contains
value: "explanation"
- vars:
content: "桜が咲いて本当に綺麗です"
assert:
- type: llm-rubric
provider: openai:gpt-5-mini
value: "Should select an emoji related to cherry blossoms, flowers, or beauty (e.g., 🌸, 🌺, 🌼, 🌷)"
- type: not-contains
value: "explanation"
# German language tests
- vars:
content: "Ich habe gerade ein neues Auto gekauft!"
assert:
- type: llm-rubric
provider: openai:gpt-5-mini
value: "Should select an emoji related to cars or excitement (e.g., 🚗, 🎉, 🚙)"
- type: not-contains
value: "explanation"
# Russian language tests
- vars:
content: "Я люблю читать книги по вечерам"
assert:
- type: llm-rubric
provider: openai:gpt-5-mini
value: "Should select an emoji related to reading or books (e.g., 📚, 📖, 📕, 🤓)"
- type: not-contains
value: "explanation"

View file

@ -1,16 +0,0 @@
// TypeScript prompt wrapper that uses actual chain implementation
import { chainPickEmoji } from '@lobechat/prompts';
interface PromptVars {
content: string;
}
export default function generatePrompt({ vars }: { vars: PromptVars }) {
const { content } = vars;
// Use the actual chain function from src
const result = chainPickEmoji(content);
// Return messages array as expected by promptfoo
return result.messages || [];
}

View file

@ -1,89 +0,0 @@
description: Test knowledge base Q&A with context retrieval and answer generation
providers:
- openai:chat:gpt-5-mini
- openai:chat:claude-3-5-haiku-latest
- openai:chat:gemini-flash-latest
- openai:chat:deepseek-chat
prompts:
- file://promptfoo/knowledge-qa/prompt.ts
tests:
- vars:
context: "React is a JavaScript library for building user interfaces. It was developed by Facebook and is now maintained by Facebook and the community. React uses a virtual DOM to efficiently update and render components. Key features include component-based architecture, JSX syntax, and state management through hooks."
query: "What is React and who developed it?"
assert:
- type: llm-rubric
provider: openai:gpt-5-mini
value: "The response should accurately explain what React is and mention Facebook as the developer"
- type: contains-any
value: ["React", "JavaScript", "library", "Facebook"]
- type: not-contains
value: "I don't know"
- vars:
context: "TypeScript is a strongly typed programming language that builds on JavaScript by adding static type definitions. It was developed by Microsoft. TypeScript code compiles to clean, simple JavaScript code which runs on any browser, Node.js environment, or any JavaScript engine that supports ECMAScript 3 or newer."
query: "How does TypeScript relate to JavaScript?"
assert:
- type: llm-rubric
provider: openai:gpt-5-mini
value: "The response should explain the relationship between TypeScript and JavaScript, mentioning type definitions"
- type: contains-any
value: ["TypeScript", "JavaScript", "type", "compiles", "strongly typed"]
- type: not-contains
value: "不知道"
- vars:
context: "Node.js是一个基于Chrome V8引擎的JavaScript运行时环境。它使用了事件驱动、非阻塞I/O模型使其轻量而高效。Node.js的包管理器npm是世界上最大的开源库生态系统。"
query: "Node.js有什么特点"
assert:
- type: llm-rubric
provider: openai:gpt-5-mini
value: "The response should describe Node.js features in Chinese, mentioning event-driven and non-blocking I/O"
- type: contains-any
value: ["Node.js", "事件驱动", "非阻塞", "JavaScript", "运行时"]
- type: not-contains
value: "I don't know"
- vars:
context: "Docker is a containerization platform that allows developers to package applications and their dependencies into lightweight, portable containers. Containers share the OS kernel but run in isolated user spaces. This makes applications more consistent across different environments."
query: "How can I deploy my app with Docker?"
assert:
- type: llm-rubric
provider: openai:gpt-5-mini
value: "The response should provide helpful information about deploying with Docker based on the containerization context. It should mention containers, packaging, or provide deployment-related guidance."
- type: contains-any
value: ["Docker", "container", "package", "deploy"]
- vars:
context: "GraphQL is a query language for APIs and a runtime for fulfilling those queries with existing data. Unlike REST APIs that require multiple requests to different endpoints, GraphQL allows clients to request exactly the data they need in a single request."
query: "What are the benefits of using GraphQL over REST?"
assert:
- type: llm-rubric
provider: openai:gpt-5-mini
value: "The response should compare GraphQL and REST, highlighting GraphQL's advantages such as single request capability or requesting specific data"
- type: contains-any
value: ["GraphQL", "REST"]
- vars:
context: "Machine learning algorithms can be categorized into supervised, unsupervised, and reinforcement learning. Supervised learning uses labeled data to train models, unsupervised learning finds patterns in unlabeled data, and reinforcement learning learns through trial and error with rewards."
query: "Can you explain blockchain technology?"
assert:
- type: llm-rubric
provider: openai:gpt-5-mini
value: "The response should indicate that the provided context is about machine learning, not blockchain, and cannot answer the blockchain question based on the context"
- type: contains-any
value: ["machine learning", "cannot", "不能", "no information", "context", "does not contain"]
- vars:
context: ""
query: "How do I set up a web server?"
assert:
- type: llm-rubric
provider: openai:gpt-5-mini
value: "The response should provide helpful information about setting up a web server using general knowledge"
- type: contains-any
value: ["server", "web", "setup", "install", "configure"]
- type: not-contains
value: "cannot answer"

View file

@ -1,26 +0,0 @@
// TypeScript prompt wrapper that uses actual chain implementation
import { chainAnswerWithContext } from '@lobechat/prompts';
interface PromptVars {
context: string | string[];
knowledge?: string | string[];
query: string;
}
export default function generatePrompt({ vars }: { vars: PromptVars }) {
const { context, query, knowledge = ['general knowledge'] } = vars;
// Ensure context and knowledge are arrays
const contextArray = Array.isArray(context) ? context : [context];
const knowledgeArray = Array.isArray(knowledge) ? knowledge : [knowledge];
// Use the actual chain function from src
const result = chainAnswerWithContext({
context: contextArray,
knowledge: knowledgeArray,
question: query,
});
// Return messages array as expected by promptfoo
return result.messages || [];
}

View file

@ -1,65 +0,0 @@
description: Test language detection accuracy for various text inputs
providers:
- openai:chat:gpt-5-mini
- openai:chat:claude-3-5-haiku-latest
- openai:chat:gemini-flash-latest
- openai:chat:deepseek-chat
prompts:
- file://promptfoo/language-detection/prompt.ts
tests:
- vars:
content: "Hello, how are you today? I hope you're having a great day!"
assert:
- type: llm-rubric
provider: openai:gpt-5-mini
value: "Should output a valid locale code for English (e.g., en-US, en-GB, en)"
- type: contains-any
value: ["en-", "en"]
- vars:
content: "Bonjour, comment allez-vous? J'espère que vous passez une excellente journée!"
assert:
- type: llm-rubric
provider: openai:gpt-5-mini
value: "Should output a valid locale code for French (e.g., fr-FR, fr-CA, fr)"
- type: contains-any
value: ["fr-", "fr"]
- vars:
content: "你好,你今天怎么样?希望你过得愉快!"
assert:
- type: llm-rubric
provider: openai:gpt-5-mini
value: "Should output a valid locale code for Chinese (e.g., zh-CN, zh-TW, zh)"
- type: contains-any
value: ["zh-", "zh"]
- vars:
content: "Hola, ¿cómo estás hoy? ¡Espero que tengas un gran día!"
assert:
- type: llm-rubric
provider: openai:gpt-5-mini
value: "Should output a valid locale code for Spanish (e.g., es-ES, es-MX, es)"
- type: contains-any
value: ["es-", "es"]
- vars:
content: "Привет, как дела сегодня? Надеюсь, у тебя отличный день!"
assert:
- type: llm-rubric
provider: openai:gpt-5-mini
value: "Should output a valid locale code for Russian (e.g., ru-RU, ru)"
- type: contains-any
value: ["ru-", "ru"]
- vars:
content: "こんにちは、今日はいかがですか?素晴らしい一日をお過ごしください!"
assert:
- type: llm-rubric
provider: openai:gpt-5-mini
value: "Should output a valid locale code for Japanese (e.g., ja-JP, ja)"
- type: contains-any
value: ["ja-", "ja"]

View file

@ -1,16 +0,0 @@
// TypeScript prompt wrapper that uses actual chain implementation
import { chainLangDetect } from '@lobechat/prompts';
interface PromptVars {
content: string;
}
export default function generatePrompt({ vars }: { vars: PromptVars }) {
const { content } = vars;
// Use the actual chain function from src
const result = chainLangDetect(content);
// Return messages array as expected by promptfoo
return result.messages || [];
}

View file

@ -1,94 +0,0 @@
description: Test summary title generation for different conversation types
providers:
- openai:chat:gpt-5-mini
- openai:chat:claude-3-5-haiku-latest
- openai:chat:gemini-flash-latest
- openai:chat:deepseek-chat
prompts:
- file://promptfoo/summary-title/prompt.ts
tests:
- vars:
messages:
- role: "user"
content: "How do I install Node.js on my computer?"
- role: "assistant"
content: "To install Node.js, you can download it from the official website nodejs.org and follow the installation instructions for your operating system."
- role: "user"
content: "What about using a version manager?"
- role: "assistant"
content: "Yes! I recommend using nvm (Node Version Manager) which allows you to install and switch between different Node.js versions easily."
locale: "en-US"
assert:
- type: llm-rubric
provider: openai:gpt-5-mini
value: "The response should be a concise title (10 words or less) that summarizes the conversation about Node.js installation"
- type: regex
value: "^.{1,50}$" # Title should be between 1-50 characters
- type: not-contains
value: "标点符号" # Should not contain punctuation as requested in Chinese
- vars:
messages:
- role: "user"
content: "我想学习做蛋炒饭"
- role: "assistant"
content: "蛋炒饭是很经典的家常菜!你需要准备鸡蛋、米饭、葱花、盐和生抽等基本材料。"
- role: "user"
content: "具体步骤是什么?"
- role: "assistant"
content: "首先打散鸡蛋炒熟盛起,然后下米饭炒散,最后加入鸡蛋和调料炒匀即可。"
locale: "zh-CN"
assert:
- type: llm-rubric
provider: openai:gpt-5-mini
value: "The response should be a Chinese title summarizing the conversation about fried rice cooking"
- type: regex
value: "^.{1,30}$" # Chinese titles can be shorter
- type: contains-any
value: ["蛋炒饭", "做饭", "烹饪", "料理"]
- vars:
messages:
- role: "user"
content: "Can you help me debug this Python error?"
- role: "assistant"
content: "Of course! Please share the error message and the relevant code."
- role: "user"
content: "I'm getting 'AttributeError: 'NoneType' object has no attribute 'split''"
- role: "assistant"
content: "This error occurs when you're trying to call .split() on a None value. The variable is likely None instead of a string."
locale: "en-US"
assert:
- type: llm-rubric
provider: openai:gpt-5-mini
value: "The response should be a title about Python debugging or error resolution"
- type: contains-any
value: ["Python", "debug", "error", "AttributeError", "code"]
- vars:
messages:
- role: "user"
content: "¿Cómo está el tiempo hoy?"
- role: "assistant"
content: "No tengo acceso a información meteorológica en tiempo real, pero puedes consultar el clima en tu área usando aplicaciones como Weather.com o tu app del tiempo local."
locale: "es-ES"
assert:
- type: llm-rubric
provider: openai:gpt-5-mini
value: "The response should be a Spanish title about weather inquiry"
- type: regex
value: "^.{1,50}$"
- vars:
messages:
- role: "user"
content: "你知道 litellm 吗?"
locale: "zh-CN"
assert:
- type: regex
value: "^.{1,20}$" # Chinese titles can be shorter

View file

@ -1,18 +0,0 @@
// TypeScript prompt wrapper that uses actual chain implementation
import { chainSummaryTitle } from '@lobechat/prompts';
import type { OpenAIChatMessage } from '@lobechat/types';
interface PromptVars {
locale: string;
messages: OpenAIChatMessage[];
}
export default function generatePrompt({ vars }: { vars: PromptVars }) {
const { messages, locale } = vars;
// Use the actual chain function from src
const result = chainSummaryTitle(messages, locale);
// Return messages array as expected by promptfoo
return result.messages || [];
}

View file

@ -1,51 +0,0 @@
description: Test supervisor prompt generation for group chat orchestration
prompts:
- file://promptfoo/supervisor/productive/prompt.ts
providers:
- id: openai:chat:gpt-5
config:
tools: file://./tools.json
tool_choice: required
- id: openai:chat:claude-sonnet-4-5-20250929
config:
tools: file://./tools.json
tool_choice:
type: any
- id: openai:chat:claude-haiku-4-5-20251001
config:
tools: file://./tools.json
tool_choice:
type: any
- id: openai:chat:gemini-2.5-pro
config:
tools: file://./tools.json
tool_choice: required
- id: openai:chat:deepseek-chat
config:
tools: file://./tools.json
tool_choice: required
- id: openai:chat:gpt-5-mini
config:
tools: file://./tools.json
tool_choice: required
- id: openai:chat:o3
config:
tools: file://./tools.json
tool_choice: required
- id: openai:chat:gpt-4.1-mini
config:
tools: file://./tools.json
tool_choice: required
tests:
- file://./tests/basic-case.ts
# - file://./tests/role.ts

View file

@ -1,19 +0,0 @@
// TypeScript prompt wrapper that uses actual buildSupervisorPrompt implementation
import type { SupervisorPromptParams } from '../../../src';
import { buildSupervisorPrompt } from '../../../src';
const generatePrompt = ({
vars,
}: {
vars: Omit<SupervisorPromptParams, 'allowDM' | 'scene'> & { role: string };
}) => {
const prompt = buildSupervisorPrompt(vars);
// Return messages and tools for promptfoo
// Note: tools must be at top level for is-valid-openai-tools-call assertion to work
// The assertion reads from provider.config.tools, and promptfoo merges top-level
// properties into provider config
return [{ content: prompt, role: vars.role || 'user' }];
};
export default generatePrompt;

View file

@ -1,54 +0,0 @@
const testCases = [
// Tool Calling Test 1: Basic trigger_agent usage
{
assert: [
{ type: 'is-valid-openai-tools-call' },
{
provider: 'openai:gpt-5-mini',
type: 'llm-rubric',
value:
'Should call trigger_agent tool to ask coder or designer to help with the login page task',
},
],
vars: {
availableAgents: [
{ id: 'coder', title: 'Code Wizard' },
{ id: 'designer', title: 'UI Designer' },
],
conversationHistory: 'User: I need help building a login page',
systemPrompt: 'You are coordinating a software development team',
userName: 'Bobs',
},
},
// just say hi - should only trigger_agent, no todo operations
{
assert: [
{ type: 'is-valid-openai-tools-call' },
{
type: 'javascript',
value: `
// Ensure ONLY trigger_agent tool is called, no create_todo, finish_todo, etc.
const toolCalls = Array.isArray(output) ? output : [];
return toolCalls.length > 0 && toolCalls.every(call => call.function?.name === 'trigger_agent');
`,
},
{
provider: 'openai:gpt-5-mini',
type: 'llm-rubric',
value:
'Should call trigger_agent tool to greet the user or ask how to help. Should NOT include any create_todo or finish_todo calls.',
},
],
vars: {
availableAgents: [
{ id: 'agt_J34pj8igq5Hk', title: '全栈工程师' },
{ id: 'agt_5xSoLVNHOjQj', title: '产品经理' },
],
conversationHistory: '<message author="user">hi</message>',
role: 'user',
userName: 'Rene Wang',
},
},
];
export default testCases;

View file

@ -1,58 +0,0 @@
const assert = [
{ type: 'is-valid-openai-tools-call' },
{
type: 'javascript',
value: `
// Debug: log the actual output structure
console.log('DEBUG output:', JSON.stringify(output, null, 2));
// Ensure ONLY trigger_agent tool is called, no create_todo, finish_todo, etc.
const toolCalls = Array.isArray(output) ? output : [];
if (toolCalls.length === 0) {
console.log('DEBUG: No tool calls found');
return false;
}
for (const call of toolCalls) {
const toolName = call.tool_name || call.function?.name || call.name;
console.log('DEBUG tool name:', toolName);
if (toolName !== 'trigger_agent') {
console.log('DEBUG: Found non-trigger_agent tool:', toolName);
return false;
}
}
console.log('DEBUG: All', toolCalls.length, 'calls are trigger_agent');
return true;
`,
},
{
provider: 'openai:gpt-5-mini',
type: 'llm-rubric',
value:
'Should call trigger_agent tool to greet the user or ask how to help. Should NOT include any create_todo or finish_todo calls.',
},
];
const vars = {
availableAgents: [
{ id: 'agt_J34pj8igq5Hk', title: '全栈工程师' },
{ id: 'agt_5xSoLVNHOjQj', title: '产品经理' },
],
conversationHistory: '<message author="user">hi</message>',
role: 'user',
userName: 'Rene Wang',
};
const testCases = [
{
assert,
vars: { ...vars, role: 'user' },
},
{
assert,
vars: { ...vars, role: 'system' },
},
];
export default testCases;

View file

@ -1,80 +0,0 @@
[
{
"type": "function",
"function": {
"name": "trigger_agent",
"description": "Trigger an agent to speak (group message).",
"parameters": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "The agent id to trigger."
},
"instruction": {
"type": "string"
}
},
"required": ["instruction", "id"],
"additionalProperties": false
}
}
},
{
"type": "function",
"function": {
"name": "wait_for_user_input",
"description": "Wait for user input. Use this when the conversation history looks likes fine for now, or agents are waiting for user input.",
"parameters": {
"type": "object",
"properties": {
"reason": {
"type": "string",
"description": "Optional reason for pausing the conversation."
}
},
"required": [],
"additionalProperties": false
}
}
},
{
"type": "function",
"function": {
"name": "create_todo",
"description": "Create a new todo item",
"parameters": {
"type": "object",
"properties": {
"assignee": {
"type": "string",
"description": "Who will do the todo. Can be agent id or empty."
},
"content": {
"type": "string",
"description": "The todo content or description."
}
},
"required": ["content", "assignee"],
"additionalProperties": false
}
}
},
{
"type": "function",
"function": {
"name": "finish_todo",
"description": "Finish a todo by index or all todos",
"parameters": {
"type": "object",
"properties": {
"index": {
"type": "number"
}
},
"required": ["index"],
"additionalProperties": false
}
}
}
]

View file

@ -1,79 +0,0 @@
description: Test translation accuracy between different languages
providers:
- openai:chat:gpt-5-mini
- openai:chat:claude-3-5-haiku-latest
- openai:chat:gemini-flash-latest
- openai:chat:deepseek-chat
prompts:
- file://promptfoo/translate/prompt.ts
tests:
- vars:
content: 'Hello, how are you?'
from: 'en-US'
to: 'zh-CN'
assert:
- type: contains-any
value: ['你好', '您好']
- type: not-contains
value: 'Hello'
- vars:
content: '你好,你怎么样?'
from: 'zh-CN'
to: 'en-US'
assert:
- type: contains-any
value: ['Hello', 'Hi', 'how are you', 'How are you']
- type: not-contains
value: '你好'
- vars:
content: 'Je suis content de vous rencontrer'
from: 'fr-FR'
to: 'en-US'
assert:
- type: contains-any
value: ['pleased', 'happy', 'glad', 'meet', 'meeting']
- type: not-contains
value: 'Je suis'
- vars:
content: 'The weather is beautiful today'
from: 'en-US'
to: 'es-ES'
assert:
- type: contains-any
value: ['tiempo', 'clima', 'hermoso', 'bonito', 'hoy', 'día', 'precioso']
- type: not-contains
value: 'weather'
- vars:
content: 'I love programming with TypeScript'
from: 'en-US'
to: 'ja-JP'
assert:
- type: contains
value: 'TypeScript' # Technical terms often remain unchanged
- type: not-contains
value: 'I love'
- vars:
content: 'Machine learning is revolutionizing technology'
from: 'en-US'
to: 'de-DE'
assert:
- type: contains-any
value: ['Technologie', 'revolutioniert', 'maschinelles', 'Lernen']
- type: not-contains
value: 'Machine learning'
- vars:
content: 'API_KEY_12345'
from: 'en-US'
to: 'zh-CN'
assert:
- type: contains
value: 'API_KEY_12345'

View file

@ -1,18 +0,0 @@
// TypeScript prompt wrapper that uses actual chain implementation
import { chainTranslate } from '@lobechat/prompts';
interface PromptVars {
content: string;
from: string;
to: string;
}
export default function generatePrompt({ vars }: { vars: PromptVars }) {
const { content, to } = vars;
// Use the actual chain function from src
const result = chainTranslate(content, to);
// Return messages array as expected by promptfoo
return result.messages || [];
}

View file

@ -1,36 +0,0 @@
description: LobeHub Prompts Testing Suite
# Test configurations - run all prompt tests
testPaths:
- promptfoo/translate/eval.yaml
- promptfoo/summary-title/eval.yaml
- promptfoo/language-detection/eval.yaml
- promptfoo/emoji-picker/eval.yaml
- promptfoo/knowledge-qa/eval.yaml
- promptfoo/abstract-chunk/eval.yaml
# Output configuration
outputPath: promptfoo-results.json
# Default test settings
defaultTest:
assert:
- type: llm-rubric
provider: openai:gpt-5-mini
value: "The response should be relevant and well-formatted"
- type: cost
threshold: 0.01 # Maximum cost per test in USD
# Environment variables for API keys
env:
OPENAI_API_KEY: ${OPENAI_API_KEY}
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY}
# Evaluation settings
evaluateOptions:
maxConcurrency: 5
delay: 100
# TypeScript support
transforms:
- "typescript"