mirror of
https://github.com/lobehub/lobehub
synced 2026-04-21 09:37:28 +00:00
♻️ refactor: remove promptfoo configs and dependencies (#13665)
♻️ refactor: remove promptfoo configs and dependencies from packages
Migrate all prompt evaluation tests to the cloud repo's agent-evals framework.
Remove promptfoo directories, configs, dependencies, and generator scripts
from @lobechat/prompts, @lobechat/memory-user-memory, and @lobechat/builtin-tool-memory.
Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
147ff3976f
commit
b6a47debfd
62 changed files with 11 additions and 5210 deletions
|
|
@ -10,17 +10,14 @@
|
|||
"./executionRuntime": "./src/ExecutionRuntime/index.ts"
|
||||
},
|
||||
"main": "./src/index.ts",
|
||||
"scripts": {
|
||||
"build:gen-tool-call": "tsx scripts/generate-tool-call.ts"
|
||||
},
|
||||
"scripts": {},
|
||||
"dependencies": {
|
||||
"@lobechat/memory-user-memory": "workspace:*",
|
||||
"@lobechat/prompts": "workspace:*"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@lobechat/types": "workspace:*",
|
||||
"@types/json-schema": "^7.0.15",
|
||||
"promptfoo": "^0.120.17"
|
||||
"@types/json-schema": "^7.0.15"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@lobehub/ui": "^5",
|
||||
|
|
|
|||
|
|
@ -1,19 +0,0 @@
|
|||
export interface PromptVars {
|
||||
conversation: string;
|
||||
}
|
||||
|
||||
export const buildActivityMessages = (vars: PromptVars) => {
|
||||
const messages = [
|
||||
{ content: 'You are a memory assistant, help the user to organize their preferences with memory related tools', role: 'system' as const },
|
||||
{ content: 'I love to drink Hong Kong Milk Tea', role: 'user' as const },
|
||||
];
|
||||
|
||||
if (vars.conversation) {
|
||||
messages.push({
|
||||
content: `Conversation:\n${vars.conversation}`,
|
||||
role: 'user' as const,
|
||||
});
|
||||
}
|
||||
|
||||
return messages;
|
||||
};
|
||||
|
|
@ -1,14 +0,0 @@
|
|||
description: Regression benchmark for activity layer structured extraction
|
||||
|
||||
providers:
|
||||
- id: openai:chat:google/gemini-2.5-pro
|
||||
config:
|
||||
tools: file://../../../../tool-calls/memory-addPreferenceMemory.json
|
||||
tool_choice:
|
||||
type: any
|
||||
|
||||
prompts:
|
||||
- file://./prompt.ts
|
||||
|
||||
tests:
|
||||
- file://./tests/cases.ts
|
||||
|
|
@ -1,6 +0,0 @@
|
|||
import type { PromptVars } from './buildMessages';
|
||||
import { buildActivityMessages } from './buildMessages';
|
||||
|
||||
export default function generatePrompt({ vars }: { vars: PromptVars }) {
|
||||
return buildActivityMessages(vars);
|
||||
}
|
||||
|
|
@ -1,106 +0,0 @@
|
|||
type PromptfooAssert =
|
||||
| { type: 'javascript'; value: string }
|
||||
| { provider?: string; type: 'llm-rubric'; value: string };
|
||||
|
||||
interface PromptfooTestCase {
|
||||
assert: PromptfooAssert[];
|
||||
description?: string;
|
||||
vars: Record<string, unknown>;
|
||||
}
|
||||
|
||||
const baseSchemaAssert: PromptfooAssert = {
|
||||
type: 'javascript',
|
||||
value: `
|
||||
let parsed;
|
||||
try {
|
||||
parsed = JSON.parse(output);
|
||||
} catch (error) {
|
||||
console.error('Failed to parse JSON output', error);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!parsed || !Array.isArray(parsed.memories)) return false;
|
||||
|
||||
return parsed.memories.every((memory) => {
|
||||
return (
|
||||
memory.memoryType === 'activity' &&
|
||||
memory.title &&
|
||||
memory.summary &&
|
||||
memory.withActivity?.type &&
|
||||
memory.withActivity?.narrative
|
||||
);
|
||||
});
|
||||
`,
|
||||
};
|
||||
|
||||
const baseVars = {
|
||||
availableCategories: ['work', 'health', 'personal'],
|
||||
language: 'English',
|
||||
topK: 5,
|
||||
username: 'User',
|
||||
};
|
||||
|
||||
const testCases: PromptfooTestCase[] = [
|
||||
{
|
||||
assert: [
|
||||
baseSchemaAssert,
|
||||
{
|
||||
type: 'javascript',
|
||||
value: `
|
||||
const data = JSON.parse(output);
|
||||
const first = data.memories?.[0];
|
||||
if (!first) return false;
|
||||
|
||||
const activity = first.withActivity || {};
|
||||
return Boolean(activity.startsAt && activity.endsAt && activity.timezone && activity.associatedLocations?.[0]?.name);
|
||||
`,
|
||||
},
|
||||
{
|
||||
provider: 'openai:gpt-5-mini',
|
||||
type: 'llm-rubric',
|
||||
value:
|
||||
'Should extract a meeting activity including timing (start/end/timezone), location name ACME HQ, status completed when implied, and feedback reflecting the positive tone.',
|
||||
},
|
||||
],
|
||||
description: 'Meeting with explicit time and location',
|
||||
vars: {
|
||||
...baseVars,
|
||||
conversation:
|
||||
'User: I met with Alice at ACME HQ on 2024-05-03 from 14:00-15:00 America/New_York. We reviewed Q2 renewal scope and agreed to send revised pricing next week. I felt positive and collaborative about the call.',
|
||||
retrievedContexts: ['Previous similar memory: met with Alice about renewal last month.'],
|
||||
sessionDate: '2024-05-03',
|
||||
},
|
||||
},
|
||||
{
|
||||
assert: [
|
||||
baseSchemaAssert,
|
||||
{
|
||||
type: 'javascript',
|
||||
value: `
|
||||
const data = JSON.parse(output);
|
||||
const first = data.memories?.[0];
|
||||
if (!first) return false;
|
||||
|
||||
const activity = first.withActivity || {};
|
||||
return Boolean(activity.narrative && activity.feedback);
|
||||
`,
|
||||
},
|
||||
{
|
||||
provider: 'openai:gpt-5-mini',
|
||||
type: 'llm-rubric',
|
||||
value:
|
||||
'Should capture an exercise activity without inventing exact timestamps or timezones; keep the narrative and feedback about the yoga session at home and omit temporal fields that were not provided.',
|
||||
},
|
||||
],
|
||||
description: 'Exercise without explicit time or timezone',
|
||||
vars: {
|
||||
...baseVars,
|
||||
conversation:
|
||||
'User: Over the weekend I did a 30-minute yoga session at home with my roommate. No specific time was set, it was just a casual stretch and it left me feeling calm.',
|
||||
retrievedContexts: [],
|
||||
sessionDate: '2025-05-05 10:02:00',
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
export default testCases;
|
||||
|
|
@ -1,192 +0,0 @@
|
|||
{
|
||||
"description": "Create a context memory that captures ongoing situations, projects, or environments. Include actors, resources, statuses, urgency/impact, and a clear description.",
|
||||
"name": "addContextMemory",
|
||||
"parameters": {
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"details": {
|
||||
"description": "Optional detailed information",
|
||||
"type": "string"
|
||||
},
|
||||
"memoryCategory": {
|
||||
"description": "Memory category",
|
||||
"type": "string"
|
||||
},
|
||||
"memoryType": {
|
||||
"description": "Memory type",
|
||||
"enum": [
|
||||
"activity",
|
||||
"context",
|
||||
"event",
|
||||
"fact",
|
||||
"location",
|
||||
"other",
|
||||
"people",
|
||||
"preference",
|
||||
"technology",
|
||||
"topic"
|
||||
],
|
||||
"type": "string"
|
||||
},
|
||||
"summary": {
|
||||
"description": "Concise overview of this specific memory",
|
||||
"type": "string"
|
||||
},
|
||||
"tags": {
|
||||
"description": "User defined tags that summarize the context facets",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": "array"
|
||||
},
|
||||
"title": {
|
||||
"description": "Brief descriptive title",
|
||||
"type": "string"
|
||||
},
|
||||
"withContext": {
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"associatedObjects": {
|
||||
"description": "Array of objects describing involved roles, entities, or resources, [] empty if none",
|
||||
"items": {
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"extra": {
|
||||
"description": "Additional metadata about the object, should always be a valid JSON string if present",
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"name": {
|
||||
"description": "Name of the associated object",
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"description": "Type/category of the associated object",
|
||||
"enum": [
|
||||
"application",
|
||||
"item",
|
||||
"knowledge",
|
||||
"other",
|
||||
"person",
|
||||
"place"
|
||||
],
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"extra",
|
||||
"name",
|
||||
"type"
|
||||
],
|
||||
"type": "object"
|
||||
},
|
||||
"type": "array"
|
||||
},
|
||||
"associatedSubjects": {
|
||||
"description": "Array of JSON objects describing involved subjects or participants, [] empty if none",
|
||||
"items": {
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"extra": {
|
||||
"description": "Additional metadata about the subject, should always be a valid JSON string if present",
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"name": {
|
||||
"description": "Name of the associated subject",
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"description": "Type/category of the associated subject",
|
||||
"enum": [
|
||||
"item",
|
||||
"other",
|
||||
"person",
|
||||
"pet"
|
||||
],
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"extra",
|
||||
"name",
|
||||
"type"
|
||||
],
|
||||
"type": "object"
|
||||
},
|
||||
"type": "array"
|
||||
},
|
||||
"currentStatus": {
|
||||
"description": "High level status markers (must be one of 'planned', 'ongoing', 'completed', 'aborted', 'on_hold', 'cancelled')",
|
||||
"enum": [
|
||||
"aborted",
|
||||
"cancelled",
|
||||
"completed",
|
||||
"on_hold",
|
||||
"ongoing",
|
||||
"planned"
|
||||
],
|
||||
"type": "string"
|
||||
},
|
||||
"description": {
|
||||
"description": "Rich narrative describing the situation, timeline, or environment",
|
||||
"type": "string"
|
||||
},
|
||||
"labels": {
|
||||
"description": "Model generated tags that summarize the context themes",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": "array"
|
||||
},
|
||||
"scoreImpact": {
|
||||
"description": "Numeric score (0-1 (0% to 100%)) describing importance",
|
||||
"maximum": 1,
|
||||
"minimum": 0,
|
||||
"type": "number"
|
||||
},
|
||||
"scoreUrgency": {
|
||||
"description": "Numeric score (0-1 (0% to 100%)) describing urgency",
|
||||
"maximum": 1,
|
||||
"minimum": 0,
|
||||
"type": "number"
|
||||
},
|
||||
"title": {
|
||||
"description": "Optional synthesized context headline",
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"description": "High level context archetype (e.g., 'project', 'relationship', 'goal')",
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"associatedObjects",
|
||||
"associatedSubjects",
|
||||
"currentStatus",
|
||||
"description",
|
||||
"labels",
|
||||
"scoreImpact",
|
||||
"scoreUrgency",
|
||||
"title",
|
||||
"type"
|
||||
],
|
||||
"type": "object"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"details",
|
||||
"memoryCategory",
|
||||
"memoryType",
|
||||
"summary",
|
||||
"tags",
|
||||
"title",
|
||||
"withContext"
|
||||
],
|
||||
"type": "object"
|
||||
}
|
||||
}
|
||||
|
|
@ -1,125 +0,0 @@
|
|||
{
|
||||
"description": "Record an experience memory capturing situation, actions, reasoning, outcomes, and confidence. Use for lessons, playbooks, or transferable know-how.",
|
||||
"name": "addExperienceMemory",
|
||||
"parameters": {
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"details": {
|
||||
"description": "Optional detailed information",
|
||||
"type": "string"
|
||||
},
|
||||
"memoryCategory": {
|
||||
"description": "Memory category",
|
||||
"type": "string"
|
||||
},
|
||||
"memoryType": {
|
||||
"description": "Memory type",
|
||||
"enum": [
|
||||
"activity",
|
||||
"context",
|
||||
"event",
|
||||
"fact",
|
||||
"location",
|
||||
"other",
|
||||
"people",
|
||||
"preference",
|
||||
"technology",
|
||||
"topic"
|
||||
],
|
||||
"type": "string"
|
||||
},
|
||||
"summary": {
|
||||
"description": "Concise overview of this specific memory",
|
||||
"type": "string"
|
||||
},
|
||||
"tags": {
|
||||
"description": "Model generated tags that summarize the experience facets",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": "array"
|
||||
},
|
||||
"title": {
|
||||
"description": "Brief descriptive title",
|
||||
"type": "string"
|
||||
},
|
||||
"withExperience": {
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"action": {
|
||||
"description": "Narrative describing actions taken or behaviors exhibited",
|
||||
"type": "string"
|
||||
},
|
||||
"keyLearning": {
|
||||
"description": "Narrative describing key insights or lessons learned",
|
||||
"type": "string"
|
||||
},
|
||||
"knowledgeValueScore": {
|
||||
"description": "Numeric score (0-1) describing how reusable and shareable this experience is",
|
||||
"maximum": 1,
|
||||
"minimum": 0,
|
||||
"type": "number"
|
||||
},
|
||||
"labels": {
|
||||
"description": "Model generated tags that summarize the experience facets",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": "array"
|
||||
},
|
||||
"possibleOutcome": {
|
||||
"description": "Narrative describing potential outcomes or learnings",
|
||||
"type": "string"
|
||||
},
|
||||
"problemSolvingScore": {
|
||||
"description": "Numeric score (0-1) describing how effectively the problem was solved",
|
||||
"maximum": 1,
|
||||
"minimum": 0,
|
||||
"type": "number"
|
||||
},
|
||||
"reasoning": {
|
||||
"description": "Narrative describing the thought process or motivations",
|
||||
"type": "string"
|
||||
},
|
||||
"scoreConfidence": {
|
||||
"description": "Numeric score (0-1 (0% to 100%)) describing confidence in the experience details",
|
||||
"maximum": 1,
|
||||
"minimum": 0,
|
||||
"type": "number"
|
||||
},
|
||||
"situation": {
|
||||
"description": "Narrative describing the situation or event",
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"description": "Type of experience being recorded",
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"situation",
|
||||
"reasoning",
|
||||
"action",
|
||||
"possibleOutcome",
|
||||
"keyLearning",
|
||||
"type",
|
||||
"labels",
|
||||
"problemSolvingScore",
|
||||
"scoreConfidence",
|
||||
"knowledgeValueScore"
|
||||
],
|
||||
"type": "object"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"details",
|
||||
"memoryCategory",
|
||||
"memoryType",
|
||||
"summary",
|
||||
"tags",
|
||||
"title",
|
||||
"withExperience"
|
||||
],
|
||||
"type": "object"
|
||||
}
|
||||
}
|
||||
|
|
@ -1,148 +0,0 @@
|
|||
{
|
||||
"description": "Add an identity memory describing enduring facts about a person, their role, relationship, and supporting evidence. Use to track self/others identities.",
|
||||
"name": "addIdentityMemory",
|
||||
"parameters": {
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"details": {
|
||||
"description": "Optional detailed information",
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"memoryCategory": {
|
||||
"description": "Memory category",
|
||||
"type": "string"
|
||||
},
|
||||
"memoryType": {
|
||||
"description": "Memory type",
|
||||
"enum": [
|
||||
"activity",
|
||||
"context",
|
||||
"event",
|
||||
"fact",
|
||||
"location",
|
||||
"other",
|
||||
"people",
|
||||
"preference",
|
||||
"technology",
|
||||
"topic"
|
||||
],
|
||||
"type": "string"
|
||||
},
|
||||
"summary": {
|
||||
"description": "Concise overview of this specific memory",
|
||||
"type": "string"
|
||||
},
|
||||
"tags": {
|
||||
"description": "Model generated tags that summarize the identity facets",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": "array"
|
||||
},
|
||||
"title": {
|
||||
"description": "Honorific-style, concise descriptor (strength + domain/milestone), avoid bare job titles; e.g., \"Trusted open-source maintainer\", \"Specializes in low-latency infra\", \"Former Aliyun engineer\", \"Cares for rescue cats\"",
|
||||
"type": "string"
|
||||
},
|
||||
"withIdentity": {
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"description": {
|
||||
"type": "string"
|
||||
},
|
||||
"episodicDate": {
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"extractedLabels": {
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": "array"
|
||||
},
|
||||
"relationship": {
|
||||
"enum": [
|
||||
"aunt",
|
||||
"brother",
|
||||
"classmate",
|
||||
"colleague",
|
||||
"couple",
|
||||
"coworker",
|
||||
"daughter",
|
||||
"father",
|
||||
"friend",
|
||||
"granddaughter",
|
||||
"grandfather",
|
||||
"grandmother",
|
||||
"grandson",
|
||||
"husband",
|
||||
"manager",
|
||||
"mentee",
|
||||
"mentor",
|
||||
"mother",
|
||||
"nephew",
|
||||
"niece",
|
||||
"other",
|
||||
"partner",
|
||||
"self",
|
||||
"sibling",
|
||||
"sister",
|
||||
"son",
|
||||
"spouse",
|
||||
"teammate",
|
||||
"uncle",
|
||||
"wife"
|
||||
],
|
||||
"type": "string"
|
||||
},
|
||||
"role": {
|
||||
"description": "Role explicitly mentioned for this identity entry (e.g., \"platform engineer\", \"caregiver\"); keep neutral and only use when evidence exists",
|
||||
"type": "string"
|
||||
},
|
||||
"scoreConfidence": {
|
||||
"type": "number"
|
||||
},
|
||||
"sourceEvidence": {
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"type": {
|
||||
"enum": [
|
||||
"demographic",
|
||||
"personal",
|
||||
"professional"
|
||||
],
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"description",
|
||||
"episodicDate",
|
||||
"extractedLabels",
|
||||
"relationship",
|
||||
"role",
|
||||
"scoreConfidence",
|
||||
"sourceEvidence",
|
||||
"type"
|
||||
],
|
||||
"type": "object"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"details",
|
||||
"memoryCategory",
|
||||
"memoryType",
|
||||
"summary",
|
||||
"tags",
|
||||
"title",
|
||||
"withIdentity"
|
||||
],
|
||||
"type": "object"
|
||||
}
|
||||
}
|
||||
|
|
@ -1,200 +0,0 @@
|
|||
{
|
||||
"description": "Create a preference memory that encodes durable directives or choices the assistant should follow. Include conclusionDirectives, scopes, and context.",
|
||||
"name": "addPreferenceMemory",
|
||||
"parameters": {
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"details": {
|
||||
"description": "Optional detailed information",
|
||||
"type": "string"
|
||||
},
|
||||
"memoryCategory": {
|
||||
"description": "Memory category",
|
||||
"type": "string"
|
||||
},
|
||||
"memoryType": {
|
||||
"description": "Memory type",
|
||||
"enum": [
|
||||
"activity",
|
||||
"context",
|
||||
"event",
|
||||
"fact",
|
||||
"location",
|
||||
"other",
|
||||
"people",
|
||||
"preference",
|
||||
"technology",
|
||||
"topic"
|
||||
],
|
||||
"type": "string"
|
||||
},
|
||||
"summary": {
|
||||
"description": "Concise overview of this specific memory",
|
||||
"type": "string"
|
||||
},
|
||||
"tags": {
|
||||
"description": "Model generated tags that summarize the preference facets",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": "array"
|
||||
},
|
||||
"title": {
|
||||
"description": "Brief descriptive title",
|
||||
"type": "string"
|
||||
},
|
||||
"withPreference": {
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"appContext": {
|
||||
"additionalProperties": false,
|
||||
"description": "Application/surface specific preference, if any",
|
||||
"properties": {
|
||||
"app": {
|
||||
"description": "App or product name this applies to",
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"feature": {
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"route": {
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"surface": {
|
||||
"description": "e.g., chat, emails, code review, notes",
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"app",
|
||||
"feature",
|
||||
"route",
|
||||
"surface"
|
||||
],
|
||||
"type": [
|
||||
"object",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"conclusionDirectives": {
|
||||
"description": "Direct, self-contained instruction to the assistant from the user's perspective (what to do, not how to implement)",
|
||||
"type": "string"
|
||||
},
|
||||
"extractedLabels": {
|
||||
"description": "Model generated tags that summarize the preference facets",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": "array"
|
||||
},
|
||||
"extractedScopes": {
|
||||
"description": "Array of JSON strings describing preference facets and applicable scopes",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": "array"
|
||||
},
|
||||
"originContext": {
|
||||
"additionalProperties": false,
|
||||
"description": "Context of how/why this preference was expressed",
|
||||
"properties": {
|
||||
"actor": {
|
||||
"description": "Who stated the preference; use 'User' for the user",
|
||||
"type": "string"
|
||||
},
|
||||
"applicableWhen": {
|
||||
"description": "Conditions where this preference applies",
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"notApplicableWhen": {
|
||||
"description": "Conditions where it does not apply",
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"scenario": {
|
||||
"description": "Applicable scenario or use case",
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"trigger": {
|
||||
"description": "What prompted this preference",
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"actor",
|
||||
"applicableWhen",
|
||||
"notApplicableWhen",
|
||||
"scenario",
|
||||
"trigger"
|
||||
],
|
||||
"type": [
|
||||
"object",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"scorePriority": {
|
||||
"description": "Numeric prioritization weight (0-1 (0% to 100%)) where higher means more critical to respect",
|
||||
"maximum": 1,
|
||||
"minimum": 0,
|
||||
"type": "number"
|
||||
},
|
||||
"suggestions": {
|
||||
"description": "Follow-up actions or assistant guidance derived from the preference",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": "array"
|
||||
},
|
||||
"type": {
|
||||
"description": "High level preference classification (e.g., 'lifestyle', 'communication')",
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"appContext",
|
||||
"conclusionDirectives",
|
||||
"extractedLabels",
|
||||
"extractedScopes",
|
||||
"originContext",
|
||||
"scorePriority",
|
||||
"suggestions",
|
||||
"type"
|
||||
],
|
||||
"type": "object"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"title",
|
||||
"summary",
|
||||
"tags",
|
||||
"details",
|
||||
"memoryCategory",
|
||||
"memoryType",
|
||||
"withPreference"
|
||||
],
|
||||
"type": "object"
|
||||
}
|
||||
}
|
||||
|
|
@ -1,20 +0,0 @@
|
|||
{
|
||||
"description": "Remove an identity memory when it is incorrect, obsolete, or duplicated. Always provide a concise reason.",
|
||||
"name": "removeIdentityMemory",
|
||||
"parameters": {
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string"
|
||||
},
|
||||
"reason": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"id",
|
||||
"reason"
|
||||
],
|
||||
"type": "object"
|
||||
}
|
||||
}
|
||||
|
|
@ -1,136 +0,0 @@
|
|||
{
|
||||
"description": "Retrieve memories using one or more search queries plus optional structured filters, including calendar-friendly timeIntent selectors.",
|
||||
"name": "searchUserMemory",
|
||||
"parameters": {
|
||||
"additionalProperties": false,
|
||||
"definitions": {
|
||||
"searchMemoryTimeIntent": {
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"anchor": {
|
||||
"description": "Anchor for relativeDay. Supports the legacy string values \"today\" and \"yesterday\", or another timeIntent object such as { \"selector\": \"day\", \"date\": \"2025-12-15T00:00:00.000Z\" }.",
|
||||
"oneOf": [
|
||||
{
|
||||
"enum": ["today", "yesterday"],
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"$ref": "#/definitions/searchMemoryTimeIntent"
|
||||
}
|
||||
]
|
||||
},
|
||||
"date": {
|
||||
"format": "date-time",
|
||||
"type": "string"
|
||||
},
|
||||
"end": {
|
||||
"format": "date-time",
|
||||
"type": "string"
|
||||
},
|
||||
"month": {
|
||||
"maximum": 12,
|
||||
"minimum": 1,
|
||||
"type": "integer"
|
||||
},
|
||||
"offsetDays": {
|
||||
"type": "integer"
|
||||
},
|
||||
"selector": {
|
||||
"enum": [
|
||||
"today",
|
||||
"yesterday",
|
||||
"currentWeek",
|
||||
"lastWeek",
|
||||
"lastWeekend",
|
||||
"lastWeekdays",
|
||||
"currentMonth",
|
||||
"lastMonth",
|
||||
"currentYear",
|
||||
"lastYear",
|
||||
"day",
|
||||
"month",
|
||||
"year",
|
||||
"relativeDay",
|
||||
"range"
|
||||
],
|
||||
"type": "string"
|
||||
},
|
||||
"start": {
|
||||
"format": "date-time",
|
||||
"type": "string"
|
||||
},
|
||||
"year": {
|
||||
"maximum": 9999,
|
||||
"minimum": 1970,
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"required": ["selector"],
|
||||
"type": "object"
|
||||
}
|
||||
},
|
||||
"properties": {
|
||||
"layers": {
|
||||
"items": {
|
||||
"enum": ["activity", "context", "experience", "identity", "preference"],
|
||||
"type": "string"
|
||||
},
|
||||
"type": "array"
|
||||
},
|
||||
"queries": {
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": "array"
|
||||
},
|
||||
"timeIntent": {
|
||||
"$ref": "#/definitions/searchMemoryTimeIntent"
|
||||
},
|
||||
"timeRange": {
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"end": {
|
||||
"format": "date-time",
|
||||
"type": "string"
|
||||
},
|
||||
"field": {
|
||||
"enum": ["capturedAt", "createdAt", "endsAt", "episodicDate", "startsAt", "updatedAt"],
|
||||
"type": "string"
|
||||
},
|
||||
"start": {
|
||||
"format": "date-time",
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"type": "object"
|
||||
},
|
||||
"topK": {
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"activities": {
|
||||
"minimum": 0,
|
||||
"type": "integer"
|
||||
},
|
||||
"contexts": {
|
||||
"minimum": 0,
|
||||
"type": "integer"
|
||||
},
|
||||
"experiences": {
|
||||
"minimum": 0,
|
||||
"type": "integer"
|
||||
},
|
||||
"identities": {
|
||||
"minimum": 0,
|
||||
"type": "integer"
|
||||
},
|
||||
"preferences": {
|
||||
"minimum": 0,
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"type": "object"
|
||||
}
|
||||
},
|
||||
"type": "object"
|
||||
}
|
||||
}
|
||||
|
|
@ -1,153 +0,0 @@
|
|||
{
|
||||
"description": "Update an existing identity memory with refined details, relationships, roles, or tags. Use mergeStrategy to control replacement vs merge.",
|
||||
"name": "updateIdentityMemory",
|
||||
"parameters": {
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string"
|
||||
},
|
||||
"mergeStrategy": {
|
||||
"enum": [
|
||||
"merge",
|
||||
"replace"
|
||||
],
|
||||
"type": "string"
|
||||
},
|
||||
"set": {
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"details": {
|
||||
"description": "Optional detailed information, use null for omitting the field",
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"memoryCategory": {
|
||||
"description": "Memory category, use null for omitting the field",
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"memoryType": {
|
||||
"description": "Memory type, use null for omitting the field",
|
||||
"enum": [
|
||||
"activity",
|
||||
"context",
|
||||
"event",
|
||||
"fact",
|
||||
"location",
|
||||
"other",
|
||||
"people",
|
||||
"preference",
|
||||
"technology",
|
||||
"topic",
|
||||
null
|
||||
]
|
||||
},
|
||||
"summary": {
|
||||
"description": "Concise overview of this specific memory, use null for omitting the field",
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"tags": {
|
||||
"description": "Model generated tags that summarize the identity facets, use null for omitting the field",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": [
|
||||
"array",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"title": {
|
||||
"description": "Honorific-style, concise descriptor (strength + domain/milestone), avoid bare job titles; e.g., \"Trusted open-source maintainer\", \"Specializes in low-latency infra\", \"Former Aliyun engineer\", \"Cares for rescue cats\"; use null for omitting the field",
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"withIdentity": {
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"description": {
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"episodicDate": {
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"extractedLabels": {
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": [
|
||||
"array",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"relationship": {
|
||||
"description": "Possible values: aunt | brother | classmate | colleague | couple | coworker | daughter | father | friend | granddaughter | grandfather | grandmother | grandson | husband | manager | mentee | mentor | mother | nephew | niece | other | partner | self | sibling | sister | son | spouse | teammate | uncle | wife",
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"role": {
|
||||
"description": "Role explicitly mentioned for this identity entry (e.g., \"platform engineer\", \"caregiver\"); keep existing when not updated; use null for omitting the field",
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"scoreConfidence": {
|
||||
"type": [
|
||||
"number",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"sourceEvidence": {
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"type": {
|
||||
"description": "Possible values: demographic | personal | professional",
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"description",
|
||||
"extractedLabels",
|
||||
"role"
|
||||
],
|
||||
"type": "object"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"withIdentity"
|
||||
],
|
||||
"type": "object"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"id",
|
||||
"mergeStrategy",
|
||||
"set"
|
||||
],
|
||||
"type": "object"
|
||||
}
|
||||
}
|
||||
|
|
@ -1,33 +0,0 @@
|
|||
description: LobeHub Prompts (memory-user-memory) Testing Suite
|
||||
|
||||
# Test configurations - run all prompt tests
|
||||
testPaths:
|
||||
- promptfoo/evals/preferences/tool-call/basic/eval.yaml
|
||||
|
||||
# Default provider setup (must specify either providers or targets)
|
||||
providers:
|
||||
- id: google:gemini-2.5-pro
|
||||
|
||||
# Output configuration
|
||||
outputPath: promptfoo-results.json
|
||||
|
||||
# Default test settings
|
||||
defaultTest:
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
provider: openai:gpt-5-mini
|
||||
value: "The response should be relevant and well-formatted"
|
||||
- type: llm-rubric
|
||||
provider: google:gemini-2.5-flash
|
||||
value: "The response should be relevant and well-formatted"
|
||||
- type: cost
|
||||
threshold: 0.01 # Maximum cost per test in USD
|
||||
|
||||
# Evaluation settings
|
||||
evaluateOptions:
|
||||
maxConcurrency: 5
|
||||
delay: 100
|
||||
|
||||
# TypeScript support
|
||||
transforms:
|
||||
- "typescript"
|
||||
|
|
@ -1,32 +0,0 @@
|
|||
import { mkdir, writeFile } from 'node:fs/promises';
|
||||
import { join } from 'node:path';
|
||||
import { exit } from 'node:process';
|
||||
|
||||
import type { BuiltinToolManifest } from '@lobechat/types';
|
||||
|
||||
import { MemoryManifest } from '../../builtin-tool-memory';
|
||||
|
||||
const OUTPUT_DIR = join(process.cwd(), 'promptfoo/tool-calls');
|
||||
|
||||
const writeToolCallSchemaFromManifest = async (prefix: string, manifest: BuiltinToolManifest) => {
|
||||
for (const tool of manifest.api) {
|
||||
const transformedTool = {
|
||||
...tool,
|
||||
type: 'function',
|
||||
};
|
||||
await writeFile(
|
||||
join(OUTPUT_DIR, `${prefix}-${transformedTool.name}.json`),
|
||||
JSON.stringify(transformedTool, null, 2),
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
async function main() {
|
||||
await mkdir(OUTPUT_DIR, { recursive: true });
|
||||
await writeToolCallSchemaFromManifest('memory', MemoryManifest);
|
||||
}
|
||||
|
||||
main().catch((err) => {
|
||||
console.error(err);
|
||||
exit(1);
|
||||
});
|
||||
|
|
@ -11,7 +11,6 @@
|
|||
},
|
||||
"main": "src/index.ts",
|
||||
"scripts": {
|
||||
"build:gen-response-formats": "tsx scripts/generate-response-formats.ts",
|
||||
"test": "vitest --run",
|
||||
"test:coverage": "vitest --coverage --silent='passed-only'",
|
||||
"type-check": "tsgo --noEmit -p tsconfig.json"
|
||||
|
|
@ -33,7 +32,6 @@
|
|||
"@types/json-schema": "^7.0.15",
|
||||
"@types/xast": "^2.0.4",
|
||||
"picospinner": "^3.0.0",
|
||||
"promptfoo": "^0.120.17",
|
||||
"tsx": "^4.20.6"
|
||||
},
|
||||
"peerDependencies": {
|
||||
|
|
|
|||
|
|
@ -1,40 +0,0 @@
|
|||
import { renderPlaceholderTemplate } from '@lobechat/context-engine';
|
||||
|
||||
import { activityPrompt } from '../../../../src/prompts';
|
||||
import type { ExtractorTemplateProps } from '../../../../src/types';
|
||||
|
||||
export interface PromptVars extends ExtractorTemplateProps {
|
||||
conversation: string;
|
||||
}
|
||||
|
||||
export const buildActivityMessages = (vars: PromptVars) => {
|
||||
const retrievedContext =
|
||||
Array.isArray(vars.retrievedContexts) && vars.retrievedContexts.length > 0
|
||||
? vars.retrievedContexts.join('\n\n')
|
||||
: typeof vars.retrievedContexts === 'string'
|
||||
? vars.retrievedContexts
|
||||
: 'No similar memories retrieved.';
|
||||
|
||||
const rendered = renderPlaceholderTemplate(activityPrompt, {
|
||||
availableCategories: vars.availableCategories,
|
||||
language: vars.language || 'English',
|
||||
retrievedContext,
|
||||
sessionDate: vars.sessionDate || new Date().toISOString(),
|
||||
topK: vars.topK ?? 5,
|
||||
username: vars.username || 'User',
|
||||
});
|
||||
|
||||
const messages = [
|
||||
{ content: rendered, role: 'system' as const },
|
||||
{ content: rendered, role: 'user' as const },
|
||||
];
|
||||
|
||||
if (vars.conversation) {
|
||||
messages.push({
|
||||
content: `Conversation:\n${vars.conversation}`,
|
||||
role: 'user' as const,
|
||||
});
|
||||
}
|
||||
|
||||
return messages;
|
||||
};
|
||||
|
|
@ -1,13 +0,0 @@
|
|||
description: Regression benchmark for activity layer structured extraction
|
||||
|
||||
providers:
|
||||
- id: openai:chat:gpt-5-mini
|
||||
config:
|
||||
response_format: file://../../../response-formats/activity.json
|
||||
temperature: 0
|
||||
|
||||
prompts:
|
||||
- file://./prompt.ts
|
||||
|
||||
tests:
|
||||
- file://./tests/cases.ts
|
||||
|
|
@ -1,6 +0,0 @@
|
|||
import type { PromptVars } from './buildMessages';
|
||||
import { buildActivityMessages } from './buildMessages';
|
||||
|
||||
export default function generatePrompt({ vars }: { vars: PromptVars }) {
|
||||
return buildActivityMessages(vars);
|
||||
}
|
||||
|
|
@ -1,106 +0,0 @@
|
|||
type PromptfooAssert =
|
||||
| { type: 'javascript'; value: string }
|
||||
| { provider?: string; type: 'llm-rubric'; value: string };
|
||||
|
||||
interface PromptfooTestCase {
|
||||
assert: PromptfooAssert[];
|
||||
description?: string;
|
||||
vars: Record<string, unknown>;
|
||||
}
|
||||
|
||||
const baseSchemaAssert: PromptfooAssert = {
|
||||
type: 'javascript',
|
||||
value: `
|
||||
let parsed;
|
||||
try {
|
||||
parsed = JSON.parse(output);
|
||||
} catch (error) {
|
||||
console.error('Failed to parse JSON output', error);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!parsed || !Array.isArray(parsed.memories)) return false;
|
||||
|
||||
return parsed.memories.every((memory) => {
|
||||
return (
|
||||
memory.memoryType === 'activity' &&
|
||||
memory.title &&
|
||||
memory.summary &&
|
||||
memory.withActivity?.type &&
|
||||
memory.withActivity?.narrative
|
||||
);
|
||||
});
|
||||
`,
|
||||
};
|
||||
|
||||
const baseVars = {
|
||||
availableCategories: ['work', 'health', 'personal'],
|
||||
language: 'English',
|
||||
topK: 5,
|
||||
username: 'User',
|
||||
};
|
||||
|
||||
const testCases: PromptfooTestCase[] = [
|
||||
{
|
||||
assert: [
|
||||
baseSchemaAssert,
|
||||
{
|
||||
type: 'javascript',
|
||||
value: `
|
||||
const data = JSON.parse(output);
|
||||
const first = data.memories?.[0];
|
||||
if (!first) return false;
|
||||
|
||||
const activity = first.withActivity || {};
|
||||
return Boolean(activity.startsAt && activity.endsAt && activity.timezone && activity.associatedLocations?.[0]?.name);
|
||||
`,
|
||||
},
|
||||
{
|
||||
provider: 'openai:gpt-5-mini',
|
||||
type: 'llm-rubric',
|
||||
value:
|
||||
'Should extract a meeting activity including timing (start/end/timezone), location name ACME HQ, status completed when implied, and feedback reflecting the positive tone.',
|
||||
},
|
||||
],
|
||||
description: 'Meeting with explicit time and location',
|
||||
vars: {
|
||||
...baseVars,
|
||||
conversation:
|
||||
'User: I met with Alice at ACME HQ on 2024-05-03 from 14:00-15:00 America/New_York. We reviewed Q2 renewal scope and agreed to send revised pricing next week. I felt positive and collaborative about the call.',
|
||||
retrievedContexts: ['Previous similar memory: met with Alice about renewal last month.'],
|
||||
sessionDate: '2024-05-03',
|
||||
},
|
||||
},
|
||||
{
|
||||
assert: [
|
||||
baseSchemaAssert,
|
||||
{
|
||||
type: 'javascript',
|
||||
value: `
|
||||
const data = JSON.parse(output);
|
||||
const first = data.memories?.[0];
|
||||
if (!first) return false;
|
||||
|
||||
const activity = first.withActivity || {};
|
||||
return Boolean(activity.narrative && activity.feedback);
|
||||
`,
|
||||
},
|
||||
{
|
||||
provider: 'openai:gpt-5-mini',
|
||||
type: 'llm-rubric',
|
||||
value:
|
||||
'Should capture an exercise activity without inventing exact timestamps or timezones; keep the narrative and feedback about the yoga session at home and omit temporal fields that were not provided.',
|
||||
},
|
||||
],
|
||||
description: 'Exercise without explicit time or timezone',
|
||||
vars: {
|
||||
...baseVars,
|
||||
conversation:
|
||||
'User: Over the weekend I did a 30-minute yoga session at home with my roommate. No specific time was set, it was just a casual stretch and it left me feeling calm.',
|
||||
retrievedContexts: [],
|
||||
sessionDate: '2025-05-05 10:02:00',
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
export default testCases;
|
||||
|
|
@ -1,112 +0,0 @@
|
|||
import { readFile } from 'node:fs/promises';
|
||||
import { isAbsolute, join } from 'node:path';
|
||||
|
||||
import { renderPlaceholderTemplate } from '@lobechat/context-engine';
|
||||
import { MemorySourceType } from '@lobechat/types';
|
||||
|
||||
import type { IngestPayload } from '../../../../src/converters/locomo';
|
||||
import { activityPrompt } from '../../../../src/prompts';
|
||||
import type { BenchmarkLocomoPart } from '../../../../src/providers';
|
||||
import { BenchmarkLocomoContextProvider } from '../../../../src/providers';
|
||||
import type { ExtractorTemplateProps, MemoryExtractionJob } from '../../../../src/types';
|
||||
|
||||
export interface PromptVars extends ExtractorTemplateProps {
|
||||
payloadPath: string;
|
||||
sessionId?: string;
|
||||
userId?: string;
|
||||
}
|
||||
|
||||
const resolvePath = (payloadPath: string) =>
|
||||
isAbsolute(payloadPath) ? payloadPath : join(process.cwd(), payloadPath);
|
||||
|
||||
const buildParts = (payload: IngestPayload, sessionId?: string): BenchmarkLocomoPart[] => {
|
||||
let partIndex = 0;
|
||||
const sessions = payload.sessions.filter(
|
||||
(session) => !sessionId || session.sessionId === sessionId,
|
||||
);
|
||||
|
||||
return sessions.flatMap((session) =>
|
||||
session.turns.map((turn) => {
|
||||
const metadata = {
|
||||
diaId: turn.diaId,
|
||||
imageCaption: turn.imageCaption,
|
||||
imageUrls: turn.imageUrls,
|
||||
sessionId: session.sessionId,
|
||||
};
|
||||
|
||||
return {
|
||||
content: turn.text,
|
||||
createdAt: turn.createdAt || session.timestamp,
|
||||
metadata,
|
||||
partIndex: partIndex++,
|
||||
sessionId: session.sessionId,
|
||||
speaker: turn.speaker,
|
||||
};
|
||||
}),
|
||||
);
|
||||
};
|
||||
|
||||
const resolveSessionDate = (
|
||||
payload: IngestPayload,
|
||||
parts: BenchmarkLocomoPart[],
|
||||
sessionId?: string,
|
||||
) => {
|
||||
const sessionDate =
|
||||
payload.sessions.find((session) => session.sessionId === sessionId)?.timestamp ||
|
||||
payload.sessions[0]?.timestamp;
|
||||
|
||||
if (sessionDate) return sessionDate;
|
||||
|
||||
const latestCreatedAt = parts
|
||||
.map((part) => (part.createdAt ? new Date(part.createdAt) : null))
|
||||
.filter(Boolean)
|
||||
.sort((a, b) => (a!.getTime() > b!.getTime() ? 1 : -1))
|
||||
.at(-1);
|
||||
|
||||
return latestCreatedAt ? latestCreatedAt.toISOString() : new Date().toISOString();
|
||||
};
|
||||
|
||||
export const buildLocomoActivityMessages = async (vars: PromptVars) => {
|
||||
const payloadPath = resolvePath(vars.payloadPath);
|
||||
const payloadRaw = await readFile(payloadPath, 'utf8');
|
||||
const payload = JSON.parse(payloadRaw) as IngestPayload;
|
||||
|
||||
const parts = buildParts(payload, vars.sessionId);
|
||||
if (parts.length === 0) {
|
||||
throw new Error(
|
||||
`No matching parts found in ${payload.sampleId} for session ${vars.sessionId || 'all'}`,
|
||||
);
|
||||
}
|
||||
const userId = vars.userId || `locomo-user-${payload.sampleId}`;
|
||||
const sourceId = payload.topicId || `sample_${payload.sampleId}`;
|
||||
const sessionDate = vars.sessionDate || resolveSessionDate(payload, parts, vars.sessionId);
|
||||
|
||||
const provider = new BenchmarkLocomoContextProvider({
|
||||
parts,
|
||||
sampleId: payload.sampleId,
|
||||
sourceId,
|
||||
userId,
|
||||
});
|
||||
|
||||
const extractionJob: MemoryExtractionJob = {
|
||||
source: MemorySourceType.BenchmarkLocomo,
|
||||
sourceId,
|
||||
userId,
|
||||
};
|
||||
|
||||
const { context } = await provider.buildContext(extractionJob.userId);
|
||||
|
||||
const rendered = renderPlaceholderTemplate(activityPrompt, {
|
||||
availableCategories: vars.availableCategories,
|
||||
language: vars.language || 'English',
|
||||
retrievedContext: context || 'No similar memories retrieved.',
|
||||
sessionDate,
|
||||
topK: vars.topK ?? 5,
|
||||
username: vars.username || 'User',
|
||||
});
|
||||
|
||||
return [
|
||||
{ content: rendered, role: 'system' as const },
|
||||
{ content: rendered, role: 'user' as const },
|
||||
];
|
||||
};
|
||||
|
|
@ -1,13 +0,0 @@
|
|||
description: LoCoMo regression for activity layer with relative time resolution
|
||||
|
||||
providers:
|
||||
- id: openai:chat:gpt-5-mini
|
||||
config:
|
||||
response_format: file://../../../response-formats/activity.json
|
||||
temperature: 0
|
||||
|
||||
prompts:
|
||||
- file://./prompt.ts
|
||||
|
||||
tests:
|
||||
- file://./tests/cases.ts
|
||||
|
|
@ -1,6 +0,0 @@
|
|||
import type { PromptVars } from './buildMessages';
|
||||
import { buildLocomoActivityMessages } from './buildMessages';
|
||||
|
||||
export default async function generatePrompt({ vars }: { vars: PromptVars }) {
|
||||
return buildLocomoActivityMessages(vars);
|
||||
}
|
||||
|
|
@ -1,149 +0,0 @@
|
|||
{
|
||||
"force": true,
|
||||
"layers": [],
|
||||
"sampleId": "conv-26",
|
||||
"sessions": [
|
||||
{
|
||||
"sessionId": "session_1",
|
||||
"timestamp": "2023-05-08T13:56:00.000Z",
|
||||
"turns": [
|
||||
{
|
||||
"createdAt": "2023-05-08T13:56:00.000Z",
|
||||
"diaId": "D1:1",
|
||||
"role": "user",
|
||||
"speaker": "Caroline",
|
||||
"text": "Hey Mel! Good to see you! How have you been?"
|
||||
},
|
||||
{
|
||||
"createdAt": "2023-05-08T13:56:00.000Z",
|
||||
"diaId": "D1:2",
|
||||
"role": "assistant",
|
||||
"speaker": "Melanie",
|
||||
"text": "Hey Caroline! Good to see you! I'm swamped with the kids & work. What's up with you? Anything new?"
|
||||
},
|
||||
{
|
||||
"createdAt": "2023-05-08T13:56:00.000Z",
|
||||
"diaId": "D1:3",
|
||||
"role": "user",
|
||||
"speaker": "Caroline",
|
||||
"text": "I went to a LGBTQ support group yesterday and it was so powerful."
|
||||
},
|
||||
{
|
||||
"createdAt": "2023-05-08T13:56:00.000Z",
|
||||
"diaId": "D1:4",
|
||||
"role": "assistant",
|
||||
"speaker": "Melanie",
|
||||
"text": "Wow, that's cool, Caroline! What happened that was so awesome? Did you hear any inspiring stories?"
|
||||
},
|
||||
{
|
||||
"createdAt": "2023-05-08T13:56:00.000Z",
|
||||
"diaId": "D1:5",
|
||||
"imageCaption": "a photo of a dog walking past a wall with a painting of a woman",
|
||||
"imageUrls": [
|
||||
"https://i.redd.it/l7hozpetnhlb1.jpg"
|
||||
],
|
||||
"role": "user",
|
||||
"speaker": "Caroline",
|
||||
"text": "The transgender stories were so inspiring! I was so happy and thankful for all the support.\n[Image: a photo of a dog walking past a wall with a painting of a woman]"
|
||||
},
|
||||
{
|
||||
"createdAt": "2023-05-08T13:56:00.000Z",
|
||||
"diaId": "D1:6",
|
||||
"role": "assistant",
|
||||
"speaker": "Melanie",
|
||||
"text": "Wow, love that painting! So cool you found such a helpful group. What's it done for you?"
|
||||
},
|
||||
{
|
||||
"createdAt": "2023-05-08T13:56:00.000Z",
|
||||
"diaId": "D1:7",
|
||||
"role": "user",
|
||||
"speaker": "Caroline",
|
||||
"text": "The support group has made me feel accepted and given me courage to embrace myself."
|
||||
},
|
||||
{
|
||||
"createdAt": "2023-05-08T13:56:00.000Z",
|
||||
"diaId": "D1:8",
|
||||
"role": "assistant",
|
||||
"speaker": "Melanie",
|
||||
"text": "That's really cool. You've got guts. What now?"
|
||||
},
|
||||
{
|
||||
"createdAt": "2023-05-08T13:56:00.000Z",
|
||||
"diaId": "D1:9",
|
||||
"role": "user",
|
||||
"speaker": "Caroline",
|
||||
"text": "Gonna continue my edu and check out career options, which is pretty exciting!"
|
||||
},
|
||||
{
|
||||
"createdAt": "2023-05-08T13:56:00.000Z",
|
||||
"diaId": "D1:10",
|
||||
"role": "assistant",
|
||||
"speaker": "Melanie",
|
||||
"text": "Wow, Caroline! What kinda jobs are you thinkin' of? Anything that stands out?"
|
||||
},
|
||||
{
|
||||
"createdAt": "2023-05-08T13:56:00.000Z",
|
||||
"diaId": "D1:11",
|
||||
"role": "user",
|
||||
"speaker": "Caroline",
|
||||
"text": "I'm keen on counseling or working in mental health - I'd love to support those with similar issues."
|
||||
},
|
||||
{
|
||||
"createdAt": "2023-05-08T13:56:00.000Z",
|
||||
"diaId": "D1:12",
|
||||
"imageCaption": "a photo of a painting of a sunset over a lake",
|
||||
"imageUrls": [
|
||||
"http://candicealexander.com/cdn/shop/products/IMG_7269_a49d5af8-c76c-4ecd-ae20-48c08cb11dec.jpg"
|
||||
],
|
||||
"role": "assistant",
|
||||
"speaker": "Melanie",
|
||||
"text": "You'd be a great counselor! Your empathy and understanding will really help the people you work with. By the way, take a look at this.\n[Image: a photo of a painting of a sunset over a lake]"
|
||||
},
|
||||
{
|
||||
"createdAt": "2023-05-08T13:56:00.000Z",
|
||||
"diaId": "D1:13",
|
||||
"role": "user",
|
||||
"speaker": "Caroline",
|
||||
"text": "Thanks, Melanie! That's really sweet. Is this your own painting?"
|
||||
},
|
||||
{
|
||||
"createdAt": "2023-05-08T13:56:00.000Z",
|
||||
"diaId": "D1:14",
|
||||
"role": "assistant",
|
||||
"speaker": "Melanie",
|
||||
"text": "Yeah, I painted that lake sunrise last year! It's special to me."
|
||||
},
|
||||
{
|
||||
"createdAt": "2023-05-08T13:56:00.000Z",
|
||||
"diaId": "D1:15",
|
||||
"role": "user",
|
||||
"speaker": "Caroline",
|
||||
"text": "Wow, Melanie! The colors really blend nicely. Painting looks like a great outlet for expressing yourself."
|
||||
},
|
||||
{
|
||||
"createdAt": "2023-05-08T13:56:00.000Z",
|
||||
"diaId": "D1:16",
|
||||
"role": "assistant",
|
||||
"speaker": "Melanie",
|
||||
"text": "Thanks, Caroline! Painting's a fun way to express my feelings and get creative. It's a great way to relax after a long day."
|
||||
},
|
||||
{
|
||||
"createdAt": "2023-05-08T13:56:00.000Z",
|
||||
"diaId": "D1:17",
|
||||
"role": "user",
|
||||
"speaker": "Caroline",
|
||||
"text": "Totally agree, Mel. Relaxing and expressing ourselves is key. Well, I'm off to go do some research."
|
||||
},
|
||||
{
|
||||
"createdAt": "2023-05-08T13:56:00.000Z",
|
||||
"diaId": "D1:18",
|
||||
"role": "assistant",
|
||||
"speaker": "Melanie",
|
||||
"text": "Yep, Caroline. Taking care of ourselves is vital. I'm off to go swimming with the kids. Talk to you soon!"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": "benchmark_locomo",
|
||||
"topicId": "sample_conv-26"
|
||||
}
|
||||
|
|
@ -1,72 +0,0 @@
|
|||
type PromptfooAssert =
|
||||
| { type: 'javascript'; value: string }
|
||||
| { provider?: string; type: 'llm-rubric'; value: string };
|
||||
|
||||
interface PromptfooTestCase {
|
||||
assert: PromptfooAssert[];
|
||||
description?: string;
|
||||
vars: Record<string, unknown>;
|
||||
}
|
||||
|
||||
const baseSchemaAssert: PromptfooAssert = {
|
||||
type: 'javascript',
|
||||
value: `
|
||||
let parsed;
|
||||
try {
|
||||
parsed = JSON.parse(output);
|
||||
} catch (error) {
|
||||
console.error('Failed to parse JSON output', error);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!parsed || !Array.isArray(parsed.memories)) return false;
|
||||
|
||||
return parsed.memories.every((memory) => {
|
||||
return memory.memoryType === 'activity' && memory.withActivity?.type;
|
||||
});
|
||||
`,
|
||||
};
|
||||
|
||||
const testCases: PromptfooTestCase[] = [
|
||||
{
|
||||
assert: [
|
||||
baseSchemaAssert,
|
||||
{
|
||||
type: 'javascript',
|
||||
value: `
|
||||
const data = JSON.parse(output);
|
||||
const target = data.memories?.find((memory) => {
|
||||
const text = [memory.title, memory.summary, memory.withActivity?.narrative]
|
||||
.filter(Boolean)
|
||||
.join(' ')
|
||||
.toLowerCase();
|
||||
return text.includes('support group');
|
||||
});
|
||||
|
||||
if (!target) return false;
|
||||
const startsAt = target.withActivity?.startsAt;
|
||||
if (!startsAt) return false;
|
||||
|
||||
return String(startsAt).startsWith('2023-05-07');
|
||||
`,
|
||||
},
|
||||
{
|
||||
provider: 'openai:gpt-5-mini',
|
||||
type: 'llm-rubric',
|
||||
value:
|
||||
'Should extract the LGBTQ support group activity from session_1 diaId D1:3, convert "yesterday" relative to the 2023-05-08 session anchor into 2023-05-07, and include a narrative about feeling supported/accepted.',
|
||||
},
|
||||
],
|
||||
description: 'LoCoMo conv-26 session_1 resolves relative date',
|
||||
vars: {
|
||||
availableCategories: ['personal'],
|
||||
language: 'English',
|
||||
payloadPath: './promptfoo/evals/activity/locomo/tests/benchmark-locomo-payload-conv-26.json',
|
||||
sessionId: 'session_1',
|
||||
topK: 3,
|
||||
username: 'Caroline',
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
export default testCases;
|
||||
|
|
@ -1,69 +0,0 @@
|
|||
import { readFile } from 'node:fs/promises';
|
||||
import { join } from 'node:path';
|
||||
|
||||
import { renderPlaceholderTemplate } from '@lobechat/context-engine';
|
||||
|
||||
interface TracePayload {
|
||||
agentCalls?: Record<string, any>;
|
||||
contexts?: {
|
||||
trimmed?: {
|
||||
retrievedContexts?: string[];
|
||||
retrievedIdentitiesContext?: string;
|
||||
};
|
||||
};
|
||||
extractionJob?: {
|
||||
sourceUpdatedAt?: string;
|
||||
};
|
||||
}
|
||||
|
||||
export interface PromptVars {
|
||||
availableCategories?: string[];
|
||||
language?: string;
|
||||
sessionDate?: string;
|
||||
topK?: number;
|
||||
tracePath: string;
|
||||
username?: string;
|
||||
}
|
||||
|
||||
const parseLanguageFromTrace = (trace: TracePayload): string | undefined => {
|
||||
const requestMessages = trace.agentCalls?.['layer-identity']?.request?.messages;
|
||||
const userMessageContent = requestMessages?.[1]?.content;
|
||||
if (typeof userMessageContent !== 'string') return undefined;
|
||||
|
||||
const match = userMessageContent.match(/ensure all the content is using ([^\n.]+)\./i);
|
||||
return match?.[1];
|
||||
};
|
||||
|
||||
export const buildIdentityDedupeMessages = async (vars: PromptVars) => {
|
||||
const traceRaw = await readFile(vars.tracePath, 'utf8');
|
||||
const trace = JSON.parse(traceRaw) as TracePayload;
|
||||
|
||||
const promptTemplate = await readFile(
|
||||
join(process.cwd(), 'src/prompts/layers/identity.md'),
|
||||
'utf8',
|
||||
);
|
||||
|
||||
const retrievedContexts = trace.contexts?.trimmed?.retrievedContexts ?? [];
|
||||
const existingIdentitiesContext = trace.contexts?.trimmed?.retrievedIdentitiesContext ?? '';
|
||||
|
||||
const language = vars.language || parseLanguageFromTrace(trace) || 'zh-CN';
|
||||
const username = vars.username || 'User';
|
||||
const sessionDate =
|
||||
vars.sessionDate || trace.extractionJob?.sourceUpdatedAt || new Date().toISOString();
|
||||
const topK = vars.topK ?? 10;
|
||||
|
||||
const rendered = renderPlaceholderTemplate(promptTemplate, {
|
||||
availableCategories: vars.availableCategories,
|
||||
existingIdentitiesContext,
|
||||
language,
|
||||
retrievedContext: retrievedContexts.join('\n\n') || 'No similar memories retrieved.',
|
||||
sessionDate,
|
||||
topK,
|
||||
username,
|
||||
});
|
||||
|
||||
return [
|
||||
{ content: rendered, role: 'system' },
|
||||
{ content: rendered, role: 'user' },
|
||||
];
|
||||
};
|
||||
|
|
@ -1,12 +0,0 @@
|
|||
description: Regression benchmark for identity (memory-user-memory)
|
||||
|
||||
providers:
|
||||
- id: openai:chat:gpt-5-mini
|
||||
config:
|
||||
response_format: file://../../../response-formats/identity.json
|
||||
|
||||
prompts:
|
||||
- file://./prompt.ts
|
||||
|
||||
tests:
|
||||
- file://./tests/cases.ts
|
||||
|
|
@ -1,6 +0,0 @@
|
|||
import type { PromptVars } from './buildMessages';
|
||||
import { buildIdentityDedupeMessages } from './buildMessages';
|
||||
|
||||
export default async function generatePrompt({ vars }: { vars: PromptVars }) {
|
||||
return buildIdentityDedupeMessages(vars);
|
||||
}
|
||||
|
|
@ -1,60 +0,0 @@
|
|||
import { readdirSync, readFileSync } from 'node:fs';
|
||||
import { basename, dirname, join } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
|
||||
import type { MemoryExtractionTracePayload } from '@lobechat/types';
|
||||
|
||||
type PromptfooAssert =
|
||||
| { type: 'javascript'; value: string }
|
||||
| { provider?: string; type: 'llm-rubric'; value: string };
|
||||
|
||||
interface PromptfooTestCase {
|
||||
assert: PromptfooAssert[];
|
||||
description: string;
|
||||
vars: Record<string, unknown>;
|
||||
}
|
||||
|
||||
const tracesDir = join(dirname(fileURLToPath(import.meta.url)), '../datasets');
|
||||
|
||||
const identityShouldDedupe: PromptfooAssert = {
|
||||
type: 'javascript',
|
||||
value: `
|
||||
const jsonOutput = JSON.parse(output);
|
||||
return (
|
||||
!jsonOutput?.withIdentities?.actions?.add ||
|
||||
jsonOutput.withIdentities.actions.add.length === 0
|
||||
);
|
||||
`,
|
||||
};
|
||||
|
||||
const buildDescription = (tracePath: string, payload: MemoryExtractionTracePayload) => {
|
||||
const user = payload.userId || 'unknown-user';
|
||||
const source = payload.extractionJob?.source || 'UnknownSource';
|
||||
const sourceId = payload.extractionJob?.sourceId || basename(tracePath);
|
||||
|
||||
return `Identity - User ${user} ${source} ${sourceId} should not generate add actions`;
|
||||
};
|
||||
|
||||
// Generate a test case for every trace JSON under datasets/traces.
|
||||
const testCases: PromptfooTestCase[] = readdirSync(tracesDir)
|
||||
.filter((file) => file.endsWith('.json'))
|
||||
.map((file) => {
|
||||
const tracePath = join(tracesDir, file);
|
||||
const tracePayload = JSON.parse(
|
||||
readFileSync(tracePath, 'utf8'),
|
||||
) as MemoryExtractionTracePayload;
|
||||
|
||||
return {
|
||||
assert: [identityShouldDedupe],
|
||||
description: buildDescription(tracePath, tracePayload),
|
||||
vars: {
|
||||
layer: 'Identity',
|
||||
source: tracePayload.extractionJob?.source,
|
||||
sourceId: tracePayload.extractionJob?.sourceId,
|
||||
tracePath,
|
||||
userId: tracePayload.userId,
|
||||
},
|
||||
};
|
||||
});
|
||||
|
||||
export default testCases;
|
||||
|
|
@ -1,13 +0,0 @@
|
|||
description: User persona prompt regression
|
||||
|
||||
providers:
|
||||
- id: openai:chat:gpt-5-mini
|
||||
config:
|
||||
tools: file://../../response-formats/persona-tools.json
|
||||
tool_choice: required
|
||||
|
||||
prompts:
|
||||
- file://./prompt.ts
|
||||
|
||||
tests:
|
||||
- file://./tests/cases.ts
|
||||
|
|
@ -1,39 +0,0 @@
|
|||
import { renderPlaceholderTemplate } from '@lobechat/context-engine';
|
||||
|
||||
import { userPersonaPrompt } from '../../../src/prompts/persona';
|
||||
|
||||
interface PersonaPromptVars {
|
||||
existingPersona?: string;
|
||||
language: string;
|
||||
personaNotes?: string;
|
||||
recentEvents?: string;
|
||||
retrievedMemories?: string;
|
||||
username: string;
|
||||
userProfile?: string;
|
||||
}
|
||||
|
||||
export default async function generatePrompt({ vars }: { vars: PersonaPromptVars }) {
|
||||
const system = renderPlaceholderTemplate(userPersonaPrompt, {
|
||||
language: vars.language,
|
||||
topK: 10,
|
||||
username: vars.username,
|
||||
});
|
||||
|
||||
const userSections = [
|
||||
'## Existing Persona (baseline)',
|
||||
vars.existingPersona || 'No existing persona provided.',
|
||||
'## Retrieved Memories / Signals',
|
||||
vars.retrievedMemories || 'N/A',
|
||||
'## Recent Events or Highlights',
|
||||
vars.recentEvents || 'N/A',
|
||||
'## User Provided Notes or Requests',
|
||||
vars.personaNotes || 'N/A',
|
||||
'## Extra Profile Context',
|
||||
vars.userProfile || 'N/A',
|
||||
].join('\n\n');
|
||||
|
||||
return [
|
||||
{ content: system, role: 'system' },
|
||||
{ content: userSections, role: 'user' },
|
||||
];
|
||||
}
|
||||
|
|
@ -1,45 +0,0 @@
|
|||
const toolCallAssert = {
|
||||
type: 'javascript',
|
||||
value: `
|
||||
const calls = Array.isArray(output) ? output : [];
|
||||
if (calls.length === 0) return false;
|
||||
|
||||
return calls.every((call) => {
|
||||
const fnName = call.function?.name || call.name;
|
||||
if (fnName !== 'commit_user_persona') return false;
|
||||
|
||||
const rawArgs = call.function?.arguments ?? call.arguments;
|
||||
let args = {};
|
||||
if (typeof rawArgs === 'string') {
|
||||
try { args = JSON.parse(rawArgs); } catch { return false; }
|
||||
} else {
|
||||
args = rawArgs || {};
|
||||
}
|
||||
|
||||
return typeof args.persona === 'string' && args.persona.trim().length > 10;
|
||||
});
|
||||
`,
|
||||
};
|
||||
|
||||
const rubric = {
|
||||
provider: 'openai:gpt-5-mini',
|
||||
type: 'llm-rubric',
|
||||
value:
|
||||
'Should return a tool call to commit_user_persona with a meaningful second-person persona and concise diff/summary.',
|
||||
};
|
||||
|
||||
export default [
|
||||
{
|
||||
assert: [{ type: 'is-valid-openai-tools-call' }, toolCallAssert, rubric],
|
||||
description: 'Generates a persona with baseline and events',
|
||||
vars: {
|
||||
existingPersona: '# About User\n- Loves TypeScript\n- Works on LobeHub',
|
||||
language: '简体中文',
|
||||
personaNotes: '- Keep concise',
|
||||
recentEvents: '- Shipped memory feature\n- Joined community call',
|
||||
retrievedMemories: '- Preference: dark mode\n- Context: building AI workspace',
|
||||
userProfile: '- Developer, open source contributor',
|
||||
username: 'User',
|
||||
},
|
||||
},
|
||||
] as const;
|
||||
|
|
@ -1,370 +0,0 @@
|
|||
{
|
||||
"json_schema": {
|
||||
"name": "activity_extraction",
|
||||
"schema": {
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"memories": {
|
||||
"description": "Array of extracted activity memories. Use an empty array when no activity should be captured.",
|
||||
"items": {
|
||||
"additionalProperties": false,
|
||||
"description": "Self-contained activity memory describing what happened, when, where, with whom, and how it felt.",
|
||||
"examples": [
|
||||
{
|
||||
"details": "Talked through renewal scope, confirmed timeline flexibility, and captured follow-ups.",
|
||||
"memoryCategory": "work",
|
||||
"memoryType": "activity",
|
||||
"summary": "Client Q2 renewal meeting with Alice (ACME)",
|
||||
"tags": [
|
||||
"meeting",
|
||||
"client",
|
||||
"renewal"
|
||||
],
|
||||
"title": "ACME Q2 renewal meeting",
|
||||
"withActivity": {
|
||||
"type": "meeting",
|
||||
"associatedLocations": [
|
||||
{
|
||||
"address": "123 Main St, New York, NY",
|
||||
"name": "ACME HQ"
|
||||
}
|
||||
],
|
||||
"associatedSubjects": [
|
||||
{
|
||||
"name": "Alice Smith",
|
||||
"type": "person"
|
||||
}
|
||||
],
|
||||
"endsAt": "2024-05-03T15:00:00-04:00",
|
||||
"feedback": "Positive momentum; Alice felt heard and open to renewal.",
|
||||
"narrative": "Alice and User reviewed Q2 renewal scope, aligned on reduced deliverables, and agreed to share revised pricing next week.",
|
||||
"notes": "Agenda: renewal scope, pricing, next steps.",
|
||||
"startsAt": "2024-05-03T14:00:00-04:00",
|
||||
"status": "completed",
|
||||
"timezone": "America/New_York"
|
||||
}
|
||||
},
|
||||
{
|
||||
"details": "Routine check-up; discussed migraines and sleep habits.",
|
||||
"memoryCategory": "health",
|
||||
"memoryType": "activity",
|
||||
"summary": "Doctor appointment with Dr. Kim about migraines",
|
||||
"tags": [
|
||||
"appointment",
|
||||
"health"
|
||||
],
|
||||
"title": "Neurology follow-up",
|
||||
"withActivity": {
|
||||
"type": "appointment",
|
||||
"associatedLocations": [
|
||||
{
|
||||
"name": "City Neurology Clinic"
|
||||
}
|
||||
],
|
||||
"associatedSubjects": [
|
||||
{
|
||||
"name": "Dr. Kim",
|
||||
"type": "person"
|
||||
}
|
||||
],
|
||||
"feedback": "Felt reassured; plan seems manageable.",
|
||||
"narrative": "User saw Dr. Kim to review migraine frequency; decided to track sleep, hydration, and start a low-dose preventive.",
|
||||
"notes": "Discussed triggers, hydration, and medication side effects.",
|
||||
"status": "completed"
|
||||
}
|
||||
}
|
||||
],
|
||||
"properties": {
|
||||
"details": {
|
||||
"description": "Optional detailed information or longer notes supporting the summary and narrative.",
|
||||
"type": "string"
|
||||
},
|
||||
"memoryCategory": {
|
||||
"description": "Memory category best matching the activity (e.g., work, health, travel, relationships).",
|
||||
"type": "string"
|
||||
},
|
||||
"memoryType": {
|
||||
"const": "activity",
|
||||
"description": "Memory type; always activity.",
|
||||
"type": "string"
|
||||
},
|
||||
"summary": {
|
||||
"description": "Concise overview of this activity.",
|
||||
"type": "string"
|
||||
},
|
||||
"tags": {
|
||||
"description": "Model-generated tags summarizing key facets of the activity.",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": "array"
|
||||
},
|
||||
"title": {
|
||||
"description": "Brief descriptive title for the activity, e.g., \"Dinner with friends at Marina\".",
|
||||
"type": "string"
|
||||
},
|
||||
"withActivity": {
|
||||
"additionalProperties": false,
|
||||
"description": "Structured activity fields. Temporal and association values are optional—include only when the user mentioned them.",
|
||||
"properties": {
|
||||
"type": {
|
||||
"description": "Activity type enum. Choose the closest match; fall back to \"other\" when unclear.",
|
||||
"enum": [
|
||||
"appointment",
|
||||
"call",
|
||||
"celebration",
|
||||
"class",
|
||||
"conference",
|
||||
"errand",
|
||||
"event",
|
||||
"exercise",
|
||||
"meal",
|
||||
"meeting",
|
||||
"other",
|
||||
"project-session",
|
||||
"social",
|
||||
"task",
|
||||
"trip",
|
||||
"workshop"
|
||||
],
|
||||
"type": "string"
|
||||
},
|
||||
"associatedLocations": {
|
||||
"description": "Places linked to this activity. Capture any mentioned venue, address, or setting.",
|
||||
"items": {
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"address": {
|
||||
"description": "Free-form address or directions if provided.",
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"extra": {
|
||||
"description": "Optional key-value metadata related to the location.",
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"name": {
|
||||
"description": "Place name or venue label.",
|
||||
"type": "string"
|
||||
},
|
||||
"tags": {
|
||||
"description": "Place-related tags (e.g., indoor, outdoor, virtual).",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": [
|
||||
"array",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"type": {
|
||||
"description": "Place type or category (office, clinic, restaurant, virtual).",
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"type",
|
||||
"name",
|
||||
"address",
|
||||
"tags",
|
||||
"extra"
|
||||
],
|
||||
"type": "object"
|
||||
},
|
||||
"type": "array"
|
||||
},
|
||||
"associatedObjects": {
|
||||
"description": "Non-living entities or items tied to the activity (e.g., transportation for trips, devices, tools).",
|
||||
"items": {
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"extra": {
|
||||
"description": "Optional key-value metadata related to the object.",
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"name": {
|
||||
"description": "Name or label of the object (e.g., “MacBook”, “flight UA123”).",
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"description": "Object category (e.g., transportation, device, document).",
|
||||
"enum": [
|
||||
"application",
|
||||
"item",
|
||||
"knowledge",
|
||||
"other",
|
||||
"person",
|
||||
"place"
|
||||
],
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"type",
|
||||
"name",
|
||||
"extra"
|
||||
],
|
||||
"type": "object"
|
||||
},
|
||||
"type": "array"
|
||||
},
|
||||
"associatedSubjects": {
|
||||
"description": "Living beings involved (people, pets, groups). Use when the subject lacks a known identity ID.",
|
||||
"items": {
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"extra": {
|
||||
"description": "Optional key-value metadata related to the subject.",
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"name": {
|
||||
"description": "Name or short label of the subject.",
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"description": "Subject category (e.g., person, pet, group).",
|
||||
"enum": [
|
||||
"person",
|
||||
"pet",
|
||||
"group",
|
||||
"other"
|
||||
],
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"type",
|
||||
"name",
|
||||
"extra"
|
||||
],
|
||||
"type": "object"
|
||||
},
|
||||
"type": "array"
|
||||
},
|
||||
"endsAt": {
|
||||
"description": "ISO 8601 end time for the activity when specified. Omit if not explicitly provided.",
|
||||
"format": "date-time",
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"feedback": {
|
||||
"description": "Subjective feelings or evaluation of how the activity went (mood, satisfaction, effort).",
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"metadata": {
|
||||
"additionalProperties": false,
|
||||
"description": "Additional structured metadata to keep raw hints (JSON object). Use sparingly.",
|
||||
"type": [
|
||||
"object",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"narrative": {
|
||||
"description": "Factual story of what happened (chronology, participants, outcomes). Required for recall.",
|
||||
"type": "string"
|
||||
},
|
||||
"notes": {
|
||||
"description": "Short annotations such as agenda, preparation, or quick bullets distinct from narrative.",
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"startsAt": {
|
||||
"description": "ISO 8601 start time for the activity when specified. Omit if not explicitly provided.",
|
||||
"format": "date-time",
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"status": {
|
||||
"description": "Lifecycle status when mentioned. Use planned/completed/cancelled/ongoing/on_hold/pending. Omit if unclear.",
|
||||
"enum": [
|
||||
"planned",
|
||||
"completed",
|
||||
"cancelled",
|
||||
"ongoing",
|
||||
"on_hold",
|
||||
"pending"
|
||||
],
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"tags": {
|
||||
"description": "Optional activity-specific tags or facets.",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": [
|
||||
"array",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"timezone": {
|
||||
"description": "IANA timezone string for the start/end times when provided (e.g., \"America/New_York\").",
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"type",
|
||||
"narrative",
|
||||
"feedback",
|
||||
"notes",
|
||||
"associatedLocations",
|
||||
"associatedSubjects",
|
||||
"associatedObjects",
|
||||
"startsAt",
|
||||
"endsAt",
|
||||
"status",
|
||||
"tags",
|
||||
"timezone",
|
||||
"metadata"
|
||||
],
|
||||
"type": "object"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"title",
|
||||
"summary",
|
||||
"details",
|
||||
"memoryType",
|
||||
"memoryCategory",
|
||||
"tags",
|
||||
"withActivity"
|
||||
],
|
||||
"type": "object"
|
||||
},
|
||||
"type": "array"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"memories"
|
||||
],
|
||||
"type": "object"
|
||||
},
|
||||
"strict": true
|
||||
},
|
||||
"type": "json_schema"
|
||||
}
|
||||
|
|
@ -1,210 +0,0 @@
|
|||
{
|
||||
"json_schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"memories": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"details": {
|
||||
"type": "string",
|
||||
"description": "Optional detailed information"
|
||||
},
|
||||
"memoryCategory": {
|
||||
"type": "string",
|
||||
"description": "Memory category"
|
||||
},
|
||||
"memoryLayer": {
|
||||
"type": "string",
|
||||
"const": "context",
|
||||
"description": "Memory layer"
|
||||
},
|
||||
"memoryType": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"activity",
|
||||
"context",
|
||||
"event",
|
||||
"fact",
|
||||
"location",
|
||||
"other",
|
||||
"people",
|
||||
"preference",
|
||||
"technology",
|
||||
"topic"
|
||||
],
|
||||
"description": "Memory type"
|
||||
},
|
||||
"summary": {
|
||||
"type": "string",
|
||||
"description": "Concise overview of this specific memory"
|
||||
},
|
||||
"tags": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": "User defined tags that summarize the context facets"
|
||||
},
|
||||
"title": {
|
||||
"type": "string",
|
||||
"description": "Brief descriptive title"
|
||||
},
|
||||
"withContext": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"associatedObjects": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"extra": {
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
],
|
||||
"description": "Additional metadata about the object, should always be a valid JSON string if present"
|
||||
},
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "Name of the associated object"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"application",
|
||||
"item",
|
||||
"knowledge",
|
||||
"other",
|
||||
"person",
|
||||
"place"
|
||||
],
|
||||
"description": "Type/category of the associated object"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"extra",
|
||||
"name",
|
||||
"type"
|
||||
],
|
||||
"additionalProperties": false
|
||||
},
|
||||
"description": "Array of objects describing involved roles, entities, or resources, [] empty if none"
|
||||
},
|
||||
"associatedSubjects": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"extra": {
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
],
|
||||
"description": "Additional metadata about the subject, should always be a valid JSON string if present"
|
||||
},
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "Name of the associated subject"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"item",
|
||||
"other",
|
||||
"person",
|
||||
"pet"
|
||||
],
|
||||
"description": "Type/category of the associated subject"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"extra",
|
||||
"name",
|
||||
"type"
|
||||
],
|
||||
"additionalProperties": false
|
||||
},
|
||||
"description": "Array of JSON objects describing involved subjects or participants, [] empty if none"
|
||||
},
|
||||
"currentStatus": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"aborted",
|
||||
"cancelled",
|
||||
"completed",
|
||||
"on_hold",
|
||||
"ongoing",
|
||||
"planned"
|
||||
],
|
||||
"description": "High level status markers (must be one of 'planned', 'ongoing', 'completed', 'aborted', 'on_hold', 'cancelled')"
|
||||
},
|
||||
"description": {
|
||||
"type": "string",
|
||||
"description": "Rich narrative describing the situation, timeline, or environment"
|
||||
},
|
||||
"labels": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": "Model generated tags that summarize the context themes"
|
||||
},
|
||||
"scoreImpact": {
|
||||
"type": "number",
|
||||
"minimum": 0,
|
||||
"maximum": 1,
|
||||
"description": "Numeric score (0-1 (0% to 100%)) describing importance"
|
||||
},
|
||||
"scoreUrgency": {
|
||||
"type": "number",
|
||||
"minimum": 0,
|
||||
"maximum": 1,
|
||||
"description": "Numeric score (0-1 (0% to 100%)) describing urgency"
|
||||
},
|
||||
"title": {
|
||||
"type": "string",
|
||||
"description": "Optional synthesized context headline"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"description": "High level context archetype (e.g., 'project', 'relationship', 'goal')"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"associatedObjects",
|
||||
"associatedSubjects",
|
||||
"currentStatus",
|
||||
"description",
|
||||
"labels",
|
||||
"scoreImpact",
|
||||
"scoreUrgency",
|
||||
"title",
|
||||
"type"
|
||||
],
|
||||
"additionalProperties": false
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"details",
|
||||
"memoryCategory",
|
||||
"memoryLayer",
|
||||
"memoryType",
|
||||
"summary",
|
||||
"tags",
|
||||
"title",
|
||||
"withContext"
|
||||
],
|
||||
"additionalProperties": false
|
||||
},
|
||||
"description": "Array of extracted context memory items, could be empty if decided no relevant context to extract"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"memories"
|
||||
],
|
||||
"additionalProperties": false
|
||||
},
|
||||
"type": "json_schema"
|
||||
}
|
||||
|
|
@ -1,143 +0,0 @@
|
|||
{
|
||||
"json_schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"memories": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"details": {
|
||||
"type": "string",
|
||||
"description": "Optional detailed information"
|
||||
},
|
||||
"memoryCategory": {
|
||||
"type": "string",
|
||||
"description": "Memory category"
|
||||
},
|
||||
"memoryLayer": {
|
||||
"type": "string",
|
||||
"const": "experience",
|
||||
"description": "Memory layer"
|
||||
},
|
||||
"memoryType": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"activity",
|
||||
"context",
|
||||
"event",
|
||||
"fact",
|
||||
"location",
|
||||
"other",
|
||||
"people",
|
||||
"preference",
|
||||
"technology",
|
||||
"topic"
|
||||
],
|
||||
"description": "Memory type"
|
||||
},
|
||||
"summary": {
|
||||
"type": "string",
|
||||
"description": "Concise overview of this specific memory"
|
||||
},
|
||||
"tags": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": "Model generated tags that summarize the experience facets"
|
||||
},
|
||||
"title": {
|
||||
"type": "string",
|
||||
"description": "Brief descriptive title"
|
||||
},
|
||||
"withExperience": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"action": {
|
||||
"type": "string",
|
||||
"description": "Narrative describing actions taken or behaviors exhibited"
|
||||
},
|
||||
"keyLearning": {
|
||||
"type": "string",
|
||||
"description": "Narrative describing key insights or lessons learned"
|
||||
},
|
||||
"knowledgeValueScore": {
|
||||
"type": "number",
|
||||
"minimum": 0,
|
||||
"maximum": 1,
|
||||
"description": "Numeric score (0-1) describing how reusable and shareable this experience is"
|
||||
},
|
||||
"labels": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": "Model generated tags that summarize the experience facets"
|
||||
},
|
||||
"possibleOutcome": {
|
||||
"type": "string",
|
||||
"description": "Narrative describing potential outcomes or learnings"
|
||||
},
|
||||
"problemSolvingScore": {
|
||||
"type": "number",
|
||||
"minimum": 0,
|
||||
"maximum": 1,
|
||||
"description": "Numeric score (0-1) describing how effectively the problem was solved"
|
||||
},
|
||||
"reasoning": {
|
||||
"type": "string",
|
||||
"description": "Narrative describing the thought process or motivations"
|
||||
},
|
||||
"scoreConfidence": {
|
||||
"type": "number",
|
||||
"minimum": 0,
|
||||
"maximum": 1,
|
||||
"description": "Numeric score (0-1 (0% to 100%)) describing confidence in the experience details"
|
||||
},
|
||||
"situation": {
|
||||
"type": "string",
|
||||
"description": "Narrative describing the situation or event"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"description": "Type of experience being recorded"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"action",
|
||||
"keyLearning",
|
||||
"knowledgeValueScore",
|
||||
"labels",
|
||||
"possibleOutcome",
|
||||
"problemSolvingScore",
|
||||
"reasoning",
|
||||
"scoreConfidence",
|
||||
"situation",
|
||||
"type"
|
||||
],
|
||||
"additionalProperties": false
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"details",
|
||||
"memoryCategory",
|
||||
"memoryLayer",
|
||||
"memoryType",
|
||||
"summary",
|
||||
"tags",
|
||||
"title",
|
||||
"withExperience"
|
||||
],
|
||||
"additionalProperties": false
|
||||
},
|
||||
"description": "Array of extracted experience memory items, could be empty if decided no relevant experience to extract"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"memories"
|
||||
],
|
||||
"additionalProperties": false
|
||||
},
|
||||
"type": "json_schema"
|
||||
}
|
||||
|
|
@ -1,381 +0,0 @@
|
|||
{
|
||||
"json_schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"add": {
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"details": {
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
],
|
||||
"description": "Optional detailed information"
|
||||
},
|
||||
"memoryCategory": {
|
||||
"type": "string",
|
||||
"description": "Memory category"
|
||||
},
|
||||
"memoryLayer": {
|
||||
"type": "string",
|
||||
"const": "identity",
|
||||
"description": "Memory layer"
|
||||
},
|
||||
"memoryType": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"activity",
|
||||
"context",
|
||||
"event",
|
||||
"fact",
|
||||
"location",
|
||||
"other",
|
||||
"people",
|
||||
"preference",
|
||||
"technology",
|
||||
"topic"
|
||||
],
|
||||
"description": "Memory type"
|
||||
},
|
||||
"summary": {
|
||||
"type": "string",
|
||||
"description": "Concise overview of this specific memory"
|
||||
},
|
||||
"tags": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": "Model generated tags that summarize the identity facets"
|
||||
},
|
||||
"title": {
|
||||
"type": "string",
|
||||
"description": "Honorific-style, concise descriptor (strength + domain/milestone), avoid bare job titles; e.g., \"Trusted open-source maintainer\", \"Specializes in low-latency infra\", \"Former Aliyun engineer\", \"Cares for rescue cats\""
|
||||
},
|
||||
"withIdentity": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"description": {
|
||||
"type": "string"
|
||||
},
|
||||
"episodicDate": {
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"extractedLabels": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"relationship": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"self",
|
||||
"father",
|
||||
"mother",
|
||||
"son",
|
||||
"daughter",
|
||||
"brother",
|
||||
"sister",
|
||||
"sibling",
|
||||
"husband",
|
||||
"wife",
|
||||
"spouse",
|
||||
"partner",
|
||||
"couple",
|
||||
"friend",
|
||||
"colleague",
|
||||
"coworker",
|
||||
"classmate",
|
||||
"mentor",
|
||||
"mentee",
|
||||
"manager",
|
||||
"teammate",
|
||||
"grandfather",
|
||||
"grandmother",
|
||||
"grandson",
|
||||
"granddaughter",
|
||||
"uncle",
|
||||
"aunt",
|
||||
"nephew",
|
||||
"niece",
|
||||
"other"
|
||||
]
|
||||
},
|
||||
"role": {
|
||||
"type": "string",
|
||||
"description": "Role explicitly mentioned for this identity entry (e.g., \"platform engineer\", \"caregiver\"); keep neutral and only use when evidence exists"
|
||||
},
|
||||
"scoreConfidence": {
|
||||
"type": "number"
|
||||
},
|
||||
"sourceEvidence": {
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"professional",
|
||||
"personal",
|
||||
"demographic"
|
||||
]
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"description",
|
||||
"episodicDate",
|
||||
"extractedLabels",
|
||||
"relationship",
|
||||
"role",
|
||||
"scoreConfidence",
|
||||
"sourceEvidence",
|
||||
"type"
|
||||
],
|
||||
"additionalProperties": false
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"details",
|
||||
"memoryCategory",
|
||||
"memoryLayer",
|
||||
"memoryType",
|
||||
"summary",
|
||||
"tags",
|
||||
"title",
|
||||
"withIdentity"
|
||||
],
|
||||
"additionalProperties": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
],
|
||||
"description": "Identity entries to add; use an empty array when nothing to add"
|
||||
},
|
||||
"remove": {
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string"
|
||||
},
|
||||
"reason": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"id",
|
||||
"reason"
|
||||
],
|
||||
"additionalProperties": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
],
|
||||
"description": "Identity entries to remove; use an empty array when nothing to remove"
|
||||
},
|
||||
"update": {
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string"
|
||||
},
|
||||
"mergeStrategy": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"merge",
|
||||
"replace"
|
||||
]
|
||||
},
|
||||
"set": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"details": {
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
],
|
||||
"description": "Optional detailed information, use null for omitting the field"
|
||||
},
|
||||
"memoryCategory": {
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
],
|
||||
"description": "Memory category, use null for omitting the field"
|
||||
},
|
||||
"memoryType": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"activity",
|
||||
"context",
|
||||
"event",
|
||||
"fact",
|
||||
"location",
|
||||
"other",
|
||||
"people",
|
||||
"preference",
|
||||
"technology",
|
||||
"topic"
|
||||
],
|
||||
"description": "Memory type, use null for omitting the field"
|
||||
},
|
||||
"summary": {
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
],
|
||||
"description": "Concise overview of this specific memory, use null for omitting the field"
|
||||
},
|
||||
"tags": {
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
],
|
||||
"description": "Model generated tags that summarize the identity facets, use null for omitting the field"
|
||||
},
|
||||
"title": {
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
],
|
||||
"description": "Honorific-style, concise descriptor (strength + domain/milestone), avoid bare job titles; e.g., \"Trusted open-source maintainer\", \"Specializes in low-latency infra\", \"Former Aliyun engineer\", \"Cares for rescue cats\"; use null for omitting the field"
|
||||
},
|
||||
"withIdentity": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"description": {
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"episodicDate": {
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"extractedLabels": {
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
]
|
||||
},
|
||||
"relationship": {
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
],
|
||||
"description": "Possible values: self | father | mother | son | daughter | brother | sister | sibling | husband | wife | spouse | partner | couple | friend | colleague | coworker | classmate | mentor | mentee | manager | teammate | grandfather | grandmother | grandson | granddaughter | uncle | aunt | nephew | niece | other"
|
||||
},
|
||||
"role": {
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
],
|
||||
"description": "Role explicitly mentioned for this identity entry (e.g., \"platform engineer\", \"caregiver\"); keep existing when not updated; use null for omitting the field"
|
||||
},
|
||||
"scoreConfidence": {
|
||||
"type": [
|
||||
"number",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"sourceEvidence": {
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"type": {
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
],
|
||||
"description": "Possible values: professional | personal | demographic"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"description",
|
||||
"episodicDate",
|
||||
"extractedLabels",
|
||||
"relationship",
|
||||
"role",
|
||||
"scoreConfidence",
|
||||
"sourceEvidence",
|
||||
"type"
|
||||
],
|
||||
"additionalProperties": false
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"details",
|
||||
"memoryCategory",
|
||||
"memoryType",
|
||||
"summary",
|
||||
"tags",
|
||||
"title",
|
||||
"withIdentity"
|
||||
],
|
||||
"additionalProperties": false
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"id",
|
||||
"mergeStrategy",
|
||||
"set"
|
||||
],
|
||||
"additionalProperties": false
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
],
|
||||
"description": "Identity entries to update; use an empty array when nothing to update"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"add",
|
||||
"remove",
|
||||
"update"
|
||||
],
|
||||
"additionalProperties": false
|
||||
},
|
||||
"type": "json_schema"
|
||||
}
|
||||
|
|
@ -1,30 +0,0 @@
|
|||
[
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "commit_user_persona",
|
||||
"description": "Persist an updated user persona document that summarizes the user, preferences, relationships, and recent events.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"persona": { "type": "string", "description": "Complete Markdown persona for the user" },
|
||||
"summary": { "type": "string", "description": "Executive summary (2-3 lines)" },
|
||||
"diff": { "type": "string", "description": "Bullet list of changes applied this run" },
|
||||
"reasoning": { "type": "string", "description": "Why these changes were applied" },
|
||||
"memoryIds": {
|
||||
"type": "array",
|
||||
"items": { "type": "string" },
|
||||
"description": "Related memory IDs used to craft the persona"
|
||||
},
|
||||
"sourceIds": {
|
||||
"type": "array",
|
||||
"items": { "type": "string" },
|
||||
"description": "Source or topic IDs tied to this update"
|
||||
}
|
||||
},
|
||||
"required": ["persona"],
|
||||
"additionalProperties": false
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
|
|
@ -1,13 +0,0 @@
|
|||
{
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"persona": { "type": "string" },
|
||||
"summary": { "type": "string" },
|
||||
"diff": { "type": "string" },
|
||||
"reasoning": { "type": "string" },
|
||||
"memoryIds": { "type": "array", "items": { "type": "string" } },
|
||||
"sourceIds": { "type": "array", "items": { "type": "string" } }
|
||||
},
|
||||
"required": ["persona"],
|
||||
"type": "object"
|
||||
}
|
||||
|
|
@ -1,226 +0,0 @@
|
|||
{
|
||||
"json_schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"memories": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"details": {
|
||||
"type": "string",
|
||||
"description": "Optional detailed information"
|
||||
},
|
||||
"memoryCategory": {
|
||||
"type": "string",
|
||||
"description": "Memory category"
|
||||
},
|
||||
"memoryLayer": {
|
||||
"type": "string",
|
||||
"const": "preference",
|
||||
"description": "Memory layer"
|
||||
},
|
||||
"memoryType": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"activity",
|
||||
"context",
|
||||
"event",
|
||||
"fact",
|
||||
"location",
|
||||
"other",
|
||||
"people",
|
||||
"preference",
|
||||
"technology",
|
||||
"topic"
|
||||
],
|
||||
"description": "Memory type"
|
||||
},
|
||||
"summary": {
|
||||
"type": "string",
|
||||
"description": "Concise overview of this specific memory"
|
||||
},
|
||||
"tags": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": "Model generated tags that summarize the preference facets"
|
||||
},
|
||||
"title": {
|
||||
"type": "string",
|
||||
"description": "Brief descriptive title"
|
||||
},
|
||||
"withPreference": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"appContext": {
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"app": {
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
],
|
||||
"description": "App or product name this applies to"
|
||||
},
|
||||
"feature": {
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"route": {
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"surface": {
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
],
|
||||
"description": "e.g., chat, emails, code review, notes"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"app",
|
||||
"feature",
|
||||
"route",
|
||||
"surface"
|
||||
],
|
||||
"additionalProperties": false
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
],
|
||||
"description": "Application/surface specific preference, if any"
|
||||
},
|
||||
"conclusionDirectives": {
|
||||
"type": "string",
|
||||
"description": "Direct, self-contained instruction to the assistant from the user's perspective (what to do, not how to implement)"
|
||||
},
|
||||
"extractedLabels": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": "Model generated tags that summarize the preference facets"
|
||||
},
|
||||
"extractedScopes": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": "Array of JSON strings describing preference facets and applicable scopes"
|
||||
},
|
||||
"originContext": {
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"actor": {
|
||||
"type": "string",
|
||||
"description": "Who stated the preference; use 'User' for the user"
|
||||
},
|
||||
"applicableWhen": {
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
],
|
||||
"description": "Conditions where this preference applies"
|
||||
},
|
||||
"notApplicableWhen": {
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
],
|
||||
"description": "Conditions where it does not apply"
|
||||
},
|
||||
"scenario": {
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
],
|
||||
"description": "Applicable scenario or use case"
|
||||
},
|
||||
"trigger": {
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
],
|
||||
"description": "What prompted this preference"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"actor",
|
||||
"applicableWhen",
|
||||
"notApplicableWhen",
|
||||
"scenario",
|
||||
"trigger"
|
||||
],
|
||||
"additionalProperties": false
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
],
|
||||
"description": "Context of how/why this preference was expressed"
|
||||
},
|
||||
"scorePriority": {
|
||||
"type": "number",
|
||||
"minimum": 0,
|
||||
"maximum": 1,
|
||||
"description": "Numeric prioritization weight (0-1 (0% to 100%)) where higher means more critical to respect"
|
||||
},
|
||||
"suggestions": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": "Follow-up actions or assistant guidance derived from the preference"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"description": "High level preference classification (e.g., 'lifestyle', 'communication')"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"appContext",
|
||||
"conclusionDirectives",
|
||||
"extractedLabels",
|
||||
"extractedScopes",
|
||||
"originContext",
|
||||
"scorePriority",
|
||||
"suggestions",
|
||||
"type"
|
||||
],
|
||||
"additionalProperties": false
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"details",
|
||||
"memoryCategory",
|
||||
"memoryLayer",
|
||||
"memoryType",
|
||||
"summary",
|
||||
"tags",
|
||||
"title",
|
||||
"withPreference"
|
||||
],
|
||||
"additionalProperties": false
|
||||
},
|
||||
"description": "Array of extracted preference memory items, could be empty if decided no relevant preference to extract"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"memories"
|
||||
],
|
||||
"additionalProperties": false
|
||||
},
|
||||
"type": "json_schema"
|
||||
}
|
||||
|
|
@ -1,28 +0,0 @@
|
|||
description: LobeHub Prompts (memory-user-memory) Testing Suite
|
||||
|
||||
# Test configurations - run all prompt tests
|
||||
testPaths:
|
||||
- promptfoo/evals/identity/with-s3-trace/eval.yaml
|
||||
- promptfoo/evals/activity/basic/eval.yaml
|
||||
- promptfoo/evals/persona/eval.yaml
|
||||
|
||||
# Output configuration
|
||||
outputPath: promptfoo-results.json
|
||||
|
||||
# Default test settings
|
||||
defaultTest:
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
provider: openai:gpt-5-mini
|
||||
value: "The response should be relevant and well-formatted"
|
||||
- type: cost
|
||||
threshold: 0.01 # Maximum cost per test in USD
|
||||
|
||||
# Evaluation settings
|
||||
evaluateOptions:
|
||||
maxConcurrency: 5
|
||||
delay: 100
|
||||
|
||||
# TypeScript support
|
||||
transforms:
|
||||
- "typescript"
|
||||
|
|
@ -1,61 +0,0 @@
|
|||
import { mkdir, writeFile } from 'node:fs/promises';
|
||||
import { join } from 'node:path';
|
||||
import { exit } from 'node:process';
|
||||
|
||||
import type { GenerateObjectSchema } from '@lobechat/model-runtime';
|
||||
|
||||
import {
|
||||
ActivityMemorySchema,
|
||||
ContextMemorySchema,
|
||||
ExperienceMemorySchema,
|
||||
IdentityActionsSchema,
|
||||
PreferenceMemorySchema,
|
||||
} from '../src/schemas';
|
||||
import { buildGenerateObjectSchema } from '../src/utils/zod';
|
||||
|
||||
const OUTPUT_DIR = join(process.cwd(), 'promptfoo/response-formats');
|
||||
|
||||
const writeSchema = async (name: string, schema: any, description: string) => {
|
||||
const generateSchema = buildGenerateObjectSchema(schema, { description, name });
|
||||
|
||||
const responseFormat = {
|
||||
json_schema: generateSchema.schema,
|
||||
type: 'json_schema' as const,
|
||||
};
|
||||
|
||||
const outPath = join(OUTPUT_DIR, `${name}.json`);
|
||||
await writeFile(outPath, JSON.stringify(responseFormat, null, 2), 'utf8');
|
||||
|
||||
console.log(`Wrote ${outPath}`);
|
||||
};
|
||||
|
||||
const writeGenerateObjectSchema = async (name: string, generateSchema: GenerateObjectSchema) => {
|
||||
const responseFormat: { json_schema: GenerateObjectSchema; type: 'json_schema' } = {
|
||||
json_schema: {
|
||||
name: generateSchema.name || name,
|
||||
schema: generateSchema.schema,
|
||||
strict: generateSchema.strict,
|
||||
},
|
||||
type: 'json_schema' as const,
|
||||
};
|
||||
|
||||
const outPath = join(OUTPUT_DIR, `${name}.json`);
|
||||
await writeFile(outPath, JSON.stringify(responseFormat, null, 2), 'utf8');
|
||||
|
||||
console.log(`Wrote ${outPath}`);
|
||||
};
|
||||
|
||||
async function main() {
|
||||
await mkdir(OUTPUT_DIR, { recursive: true });
|
||||
|
||||
await writeSchema('identity', IdentityActionsSchema, 'Identity layer actions');
|
||||
await writeSchema('context', ContextMemorySchema, 'Context layer actions');
|
||||
await writeSchema('preference', PreferenceMemorySchema, 'Preference layer memories');
|
||||
await writeSchema('experience', ExperienceMemorySchema, 'Experience layer memories');
|
||||
await writeGenerateObjectSchema('activity', ActivityMemorySchema);
|
||||
}
|
||||
|
||||
main().catch((err) => {
|
||||
console.error(err);
|
||||
exit(1);
|
||||
});
|
||||
6
packages/prompts/.gitignore
vendored
6
packages/prompts/.gitignore
vendored
|
|
@ -1,9 +1,3 @@
|
|||
# Promptfoo results and cache
|
||||
promptfoo-results.json
|
||||
results/
|
||||
.promptfoo/
|
||||
*.promptfoo.cache
|
||||
|
||||
# Node modules
|
||||
node_modules/
|
||||
|
||||
|
|
|
|||
|
|
@ -1,575 +0,0 @@
|
|||
# Prompt Engineering Guide for @lobechat/prompts
|
||||
|
||||
本文档提供使用 Claude Code 优化 LobeHub 提示词的指南和最佳实践。
|
||||
|
||||
## 项目结构
|
||||
|
||||
### 目录组织
|
||||
|
||||
每个提示词遵循以下标准结构:
|
||||
|
||||
```
|
||||
promptfoo/
|
||||
├── {prompt-name}/
|
||||
│ ├── eval.yaml # promptfoo 配置文件
|
||||
│ ├── prompt.ts # 提示词定义
|
||||
│ └── tests/
|
||||
│ └── basic-case.ts # 测试用例(TypeScript)
|
||||
```
|
||||
|
||||
**示例目录:**
|
||||
|
||||
```
|
||||
promptfoo/
|
||||
├── emoji-picker/
|
||||
│ ├── eval.yaml
|
||||
│ ├── prompt.ts
|
||||
│ └── tests/
|
||||
│ └── basic-case.ts
|
||||
├── translate/
|
||||
│ ├── eval.yaml
|
||||
│ ├── prompt.ts
|
||||
│ └── tests/
|
||||
│ └── basic-case.ts
|
||||
└── knowledge-qa/
|
||||
├── eval.yaml
|
||||
├── prompt.ts
|
||||
└── tests/
|
||||
└── basic-case.ts
|
||||
```
|
||||
|
||||
### 文件说明
|
||||
|
||||
#### `eval.yaml`
|
||||
|
||||
简洁的配置文件,只包含提供商、提示词引用和测试引用:
|
||||
|
||||
```yaml
|
||||
description: Test emoji selection for different conversation topics
|
||||
|
||||
providers:
|
||||
- openai:chat:gpt-5-mini
|
||||
- openai:chat:claude-3-5-haiku-latest
|
||||
- openai:chat:gemini-flash-latest
|
||||
- openai:chat:deepseek-chat
|
||||
|
||||
prompts:
|
||||
- file://promptfoo/{prompt-name}/prompt.ts
|
||||
|
||||
tests:
|
||||
- file://./tests/basic-case.ts
|
||||
```
|
||||
|
||||
#### `tests/basic-case.ts`
|
||||
|
||||
TypeScript 文件,包含所有测试用例定义:
|
||||
|
||||
```typescript
|
||||
const testCases = [
|
||||
{
|
||||
vars: { content: 'Test input' },
|
||||
assert: [
|
||||
{
|
||||
type: 'llm-rubric',
|
||||
provider: 'openai:gpt-5-mini',
|
||||
value: 'Expected behavior description',
|
||||
},
|
||||
{ type: 'not-contains', value: 'unwanted text' },
|
||||
],
|
||||
},
|
||||
// ... more test cases
|
||||
];
|
||||
|
||||
export default testCases;
|
||||
```
|
||||
|
||||
### 添加新提示词
|
||||
|
||||
1. **创建目录结构:**
|
||||
|
||||
```bash
|
||||
mkdir -p promptfoo/your-prompt-name/tests
|
||||
```
|
||||
|
||||
2. **创建 `prompt.ts`:**
|
||||
|
||||
```typescript
|
||||
export default function yourPrompt({ input }: { input: string }) {
|
||||
return [
|
||||
{
|
||||
role: 'system',
|
||||
content: 'Your system prompt here',
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: input,
|
||||
},
|
||||
];
|
||||
}
|
||||
```
|
||||
|
||||
3. **创建 `eval.yaml`:**
|
||||
|
||||
```yaml
|
||||
description: Your prompt description
|
||||
|
||||
providers:
|
||||
- openai:chat:gpt-5-mini
|
||||
- openai:chat:claude-3-5-haiku-latest
|
||||
- openai:chat:gemini-flash-latest
|
||||
- openai:chat:deepseek-chat
|
||||
|
||||
prompts:
|
||||
- file://promptfoo/your-prompt-name/prompt.ts
|
||||
|
||||
tests:
|
||||
- file://./tests/basic-case.ts
|
||||
```
|
||||
|
||||
4. **创建 `tests/basic-case.ts`:**
|
||||
|
||||
```typescript
|
||||
const testCases = [
|
||||
{
|
||||
vars: { input: 'test case 1' },
|
||||
assert: [
|
||||
{
|
||||
type: 'llm-rubric',
|
||||
provider: 'openai:gpt-5-mini',
|
||||
value: 'Should do something specific',
|
||||
},
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
export default testCases;
|
||||
```
|
||||
|
||||
### 测试用例最佳实践
|
||||
|
||||
**分组测试:**
|
||||
|
||||
```typescript
|
||||
const testCases = [
|
||||
// English tests
|
||||
{
|
||||
vars: { content: 'Hello world' },
|
||||
assert: [
|
||||
/* ... */
|
||||
],
|
||||
},
|
||||
|
||||
// Chinese tests
|
||||
{
|
||||
vars: { content: '你好世界' },
|
||||
assert: [
|
||||
/* ... */
|
||||
],
|
||||
},
|
||||
|
||||
// Edge cases
|
||||
{
|
||||
vars: { content: '' },
|
||||
assert: [
|
||||
/* ... */
|
||||
],
|
||||
},
|
||||
];
|
||||
```
|
||||
|
||||
**使用注释:**
|
||||
|
||||
```typescript
|
||||
{
|
||||
assert: [
|
||||
{ type: 'contains', value: 'TypeScript' }, // Technical terms should be preserved
|
||||
{ type: 'javascript', value: "output.split(/[.!?]/).filter(s => s.trim()).length <= 2" }, // At most 2 sentences
|
||||
],
|
||||
}
|
||||
```
|
||||
|
||||
## 提示词优化工作流
|
||||
|
||||
### 1. 运行测试并识别问题
|
||||
|
||||
```bash
|
||||
# 运行特定提示词测试
|
||||
pnpm promptfoo eval -c promptfoo/ < prompt-name > /eval.yaml
|
||||
|
||||
# 查看失败的测试详情
|
||||
pnpm promptfoo eval -c promptfoo/ < prompt-name > /eval.yaml 2>&1 | grep -A 20 "FAIL"
|
||||
```
|
||||
|
||||
**关注点:**
|
||||
|
||||
- 失败率和失败模式
|
||||
- 不同模型的行为差异
|
||||
- 具体的失败原因(来自 llm-rubric 的评价)
|
||||
|
||||
### 2. 分析失败原因
|
||||
|
||||
**常见问题模式:**
|
||||
|
||||
- **输出格式问题**:模型添加了不需要的解释或上下文
|
||||
- **语言混淆**:在多语言场景下使用了错误的语言
|
||||
- **过度 / 不足翻译**:技术术语被翻译或保留不当
|
||||
- **上下文理解**:未正确理解何时使用 / 忽略上下文
|
||||
- **一致性问题**:不同模型间的行为不一致
|
||||
|
||||
### 3. 更新提示词
|
||||
|
||||
**优化策略:**
|
||||
|
||||
#### 使用英文提示词
|
||||
|
||||
```typescript
|
||||
// ❌ 不好 - 中文提示词在多语言场景下容易混淆
|
||||
content: '你是一名翻译助手,请将内容翻译为...';
|
||||
|
||||
// ✅ 好 - 英文提示词更通用
|
||||
content: 'You are a translation assistant. Translate the content to...';
|
||||
```
|
||||
|
||||
#### 明确输出要求
|
||||
|
||||
```typescript
|
||||
// ❌ 不好 - 模糊的指令
|
||||
content: 'Please translate the text';
|
||||
|
||||
// ✅ 好 - 具体的规则
|
||||
content: `Translate the text.
|
||||
|
||||
Rules:
|
||||
- Output ONLY the translated text, no explanations
|
||||
- Preserve technical terms exactly as they appear
|
||||
- No additional commentary`;
|
||||
```
|
||||
|
||||
#### 使用示例指导行为
|
||||
|
||||
```typescript
|
||||
// ✅ 提供具体示例
|
||||
content: `Select an emoji for the content.
|
||||
|
||||
Examples:
|
||||
- "I got a promotion" → 🎉
|
||||
- "Code wizard" → 🧙♂️
|
||||
- "Business plan" → 🚀`;
|
||||
```
|
||||
|
||||
#### 使用 MUST/SHOULD/MAY 表达优先级
|
||||
|
||||
```typescript
|
||||
// ✅ 明确的优先级
|
||||
content: `Answer based on context.
|
||||
|
||||
Rules:
|
||||
- MUST use context information as foundation
|
||||
- SHOULD supplement with general knowledge
|
||||
- MAY provide additional examples`;
|
||||
```
|
||||
|
||||
### 4. 迭代验证
|
||||
|
||||
每次修改后重新运行测试:
|
||||
|
||||
```bash
|
||||
pnpm promptfoo eval -c promptfoo/ < prompt-name > /eval.yaml
|
||||
```
|
||||
|
||||
**目标:**
|
||||
|
||||
- 每轮优化应提升 5-10% 通过率
|
||||
- 通常需要 3-5 轮迭代达到 100%
|
||||
- 关注不同模型间的一致性
|
||||
|
||||
## 提示词模式库
|
||||
|
||||
### 翻译 (Translation)
|
||||
|
||||
```typescript
|
||||
export const chainTranslate = (content: string, targetLang: string) => ({
|
||||
messages: [
|
||||
{
|
||||
content: `You are a professional translator. Translate to ${targetLang}.
|
||||
|
||||
Rules:
|
||||
- Output ONLY the translated text, no explanations
|
||||
- Preserve technical terms, code identifiers, API keys exactly
|
||||
- Maintain original formatting
|
||||
- Use natural, idiomatic expressions`,
|
||||
role: 'system',
|
||||
},
|
||||
{
|
||||
content,
|
||||
role: 'user',
|
||||
},
|
||||
],
|
||||
});
|
||||
```
|
||||
|
||||
**关键点:**
|
||||
|
||||
- 使用英文系统提示词
|
||||
- 明确 "仅输出翻译内容"
|
||||
- 列举需要保留的内容类型
|
||||
|
||||
### 知识库问答 (Knowledge Q\&A)
|
||||
|
||||
```typescript
|
||||
export const chainAnswerWithContext = ({ context, question }) => {
|
||||
const hasContext = context.filter((c) => c.trim()).length > 0;
|
||||
|
||||
return {
|
||||
messages: [
|
||||
{
|
||||
content: hasContext
|
||||
? `Answer based on provided context.
|
||||
|
||||
Rules:
|
||||
- If context is COMPLETELY DIFFERENT topic: state this and do NOT answer
|
||||
- If context is related (even if limited):
|
||||
* MUST use context as foundation
|
||||
* SHOULD supplement with general knowledge
|
||||
* For "how to" questions, provide actionable steps
|
||||
* Example: Context about "Docker containerization" + "How to deploy?"
|
||||
→ Explain deployment steps using your knowledge`
|
||||
: `Answer using your knowledge.`,
|
||||
role: 'user',
|
||||
},
|
||||
],
|
||||
};
|
||||
};
|
||||
```
|
||||
|
||||
**关键点:**
|
||||
|
||||
- 区分 "无上下文" 和 "不相关上下文"
|
||||
- 明确何时可以补充通用知识
|
||||
- 提供具体示例说明预期行为
|
||||
|
||||
### Emoji 选择 (Emoji Picker)
|
||||
|
||||
```typescript
|
||||
export const chainPickEmoji = (content: string) => ({
|
||||
messages: [
|
||||
{
|
||||
content: `You are an emoji expert.
|
||||
|
||||
Rules:
|
||||
- Output ONLY a single emoji (1-2 characters)
|
||||
- Focus on CONTENT meaning, not language
|
||||
- Prioritize topic-specific emojis over generic emotions
|
||||
- For work/projects, use work-related emojis not cultural symbols`,
|
||||
role: 'system',
|
||||
},
|
||||
{ content: 'I got a promotion', role: 'user' },
|
||||
{ content: '🎉', role: 'assistant' },
|
||||
{ content, role: 'user' },
|
||||
],
|
||||
});
|
||||
```
|
||||
|
||||
**关键点:**
|
||||
|
||||
- 使用示例引导行为
|
||||
- 明确优先级(主题 > 情绪)
|
||||
- 避免文化符号混淆
|
||||
|
||||
### 标题生成 (Summary Title)
|
||||
|
||||
```typescript
|
||||
export const chainSummaryTitle = (messages, locale) => ({
|
||||
messages: [
|
||||
{
|
||||
content: `Generate a concise title.
|
||||
|
||||
Rules:
|
||||
- Maximum 10 words
|
||||
- Maximum 50 characters
|
||||
- No punctuation marks
|
||||
- Use language: ${locale}
|
||||
- Keep it short and to the point`,
|
||||
role: 'system',
|
||||
},
|
||||
{
|
||||
content: messages.map((m) => `${m.role}: ${m.content}`).join('\n'),
|
||||
role: 'user',
|
||||
},
|
||||
],
|
||||
});
|
||||
```
|
||||
|
||||
**关键点:**
|
||||
|
||||
- 同时限制词数和字符数
|
||||
- 明确输出语言
|
||||
- 简洁明了的规则
|
||||
|
||||
## 测试策略
|
||||
|
||||
### 多语言测试
|
||||
|
||||
每个提示词应测试至少 3-5 种语言:
|
||||
|
||||
```typescript
|
||||
const testCases = [
|
||||
// 英语
|
||||
{
|
||||
vars: { content: 'Hello, how are you?' },
|
||||
assert: [
|
||||
/* ... */
|
||||
],
|
||||
},
|
||||
// 中文
|
||||
{
|
||||
vars: { content: '你好,你好吗?' },
|
||||
assert: [
|
||||
/* ... */
|
||||
],
|
||||
},
|
||||
// 西班牙语
|
||||
{
|
||||
vars: { content: 'Hola, ¿cómo estás?' },
|
||||
assert: [
|
||||
/* ... */
|
||||
],
|
||||
},
|
||||
];
|
||||
```
|
||||
|
||||
### 边界情况
|
||||
|
||||
```typescript
|
||||
const testCases = [
|
||||
// 空输入
|
||||
{
|
||||
vars: { content: '' },
|
||||
assert: [
|
||||
/* ... */
|
||||
],
|
||||
},
|
||||
// 技术术语
|
||||
{
|
||||
vars: { content: 'API_KEY_12345' },
|
||||
assert: [
|
||||
/* ... */
|
||||
],
|
||||
},
|
||||
// 混合语言
|
||||
{
|
||||
vars: { content: '使用 React 开发' },
|
||||
assert: [
|
||||
/* ... */
|
||||
],
|
||||
},
|
||||
// 上下文不相关
|
||||
{
|
||||
vars: {
|
||||
context: 'Machine learning...',
|
||||
query: 'Explain blockchain',
|
||||
},
|
||||
assert: [
|
||||
/* ... */
|
||||
],
|
||||
},
|
||||
];
|
||||
```
|
||||
|
||||
### 断言类型
|
||||
|
||||
```typescript
|
||||
const testCases = [
|
||||
{
|
||||
vars: {
|
||||
/* ... */
|
||||
},
|
||||
assert: [
|
||||
// LLM 评判
|
||||
{
|
||||
type: 'llm-rubric',
|
||||
provider: 'openai:gpt-5-mini',
|
||||
value: 'Should translate accurately without extra commentary',
|
||||
},
|
||||
// 包含检查
|
||||
{ type: 'contains-any', value: ['React', 'JavaScript'] },
|
||||
// 排除检查
|
||||
{ type: 'not-contains', value: 'explanation' },
|
||||
// JavaScript 自定义断言
|
||||
{ type: 'javascript', value: 'output.length < 100' },
|
||||
// 正则表达式
|
||||
{ type: 'regex', value: '^.{1,50}$' },
|
||||
],
|
||||
},
|
||||
];
|
||||
```
|
||||
|
||||
## 常见问题
|
||||
|
||||
### Q: 如何处理不同模型的差异行为?
|
||||
|
||||
A: 使用更明确的指令和示例。如果某个模型持续失败,考虑:
|
||||
|
||||
1. 添加该模型的具体示例
|
||||
2. 使用更强的指令(MUST 而非 SHOULD)
|
||||
3. 在提示词中明确该场景
|
||||
|
||||
### Q: 何时使用中文 vs 英文提示词?
|
||||
|
||||
A:
|
||||
|
||||
- **英文**:多语言场景、技术内容、跨模型一致性
|
||||
- **中文**:纯中文输入输出、中文特定的语言理解任务
|
||||
|
||||
### Q: 如何达到 100% 通过率?
|
||||
|
||||
A: 迭代流程:
|
||||
|
||||
1. 运行测试 → 2. 分析失败 → 3. 更新提示词 → 4. 重新测试
|
||||
|
||||
- 通常需要 3-5 轮
|
||||
- 关注最后 5% 的边界情况
|
||||
- 考虑调整测试断言(如果过于严格)
|
||||
|
||||
### Q: 什么时候应该修改测试而非提示词?
|
||||
|
||||
A: 当:
|
||||
|
||||
- 测试期望不合理(如要求模型做不到的事)
|
||||
- 断言过于严格(如精确匹配特定词语)
|
||||
- 多个模型都以不同但合理的方式回答
|
||||
|
||||
## 最佳实践总结
|
||||
|
||||
### 提示词设计
|
||||
|
||||
1. **使用英文系统提示词**以获得更好的跨语言一致性
|
||||
2. **明确输出格式**:"Output ONLY...","No explanations"
|
||||
3. **使用示例**引导模型行为
|
||||
4. **分层规则**:MUST > SHOULD > MAY
|
||||
5. **具体化**:列举具体情况而非抽象描述
|
||||
|
||||
### 测试组织
|
||||
|
||||
6. **使用 TypeScript 测试文件**:将测试用例放在 `tests/basic-case.ts` 中,而不是内联在 YAML
|
||||
7. **分组测试用例**:使用注释将相关测试分组(如按语言、边界情况)
|
||||
8. **添加行内注释**:在复杂断言后添加注释说明意图
|
||||
|
||||
### 开发流程
|
||||
|
||||
9. **迭代验证**:小步快跑,每次改进一个问题
|
||||
10. **跨模型测试**:至少测试 3 个不同的模型
|
||||
11. **版本控制**:记录每次优化的原因和结果
|
||||
|
||||
### 文件组织优势
|
||||
|
||||
- **类型安全**:TypeScript 提供更好的类型检查
|
||||
- **易维护**:测试逻辑与配置分离
|
||||
- **可扩展**:轻松添加新测试用例
|
||||
- **可读性**:注释和格式化更灵活
|
||||
|
||||
## 参考资源
|
||||
|
||||
- [promptfoo 文档](https://promptfoo.dev)
|
||||
- [OpenAI Prompt Engineering Guide](https://platform.openai.com/docs/guides/prompt-engineering)
|
||||
- [Anthropic Prompt Engineering](https://docs.anthropic.com/claude/docs/prompt-engineering)
|
||||
|
|
@ -1,12 +1,11 @@
|
|||
# @lobechat/prompts
|
||||
|
||||
This package contains prompt chains and templates for the LobeHub application, with comprehensive testing using promptfoo.
|
||||
This package contains prompt chains and templates for the LobeHub application.
|
||||
|
||||
## Features
|
||||
|
||||
- **Prompt Chains**: Reusable prompt templates for various AI tasks
|
||||
- **AI Testing**: Comprehensive testing using promptfoo for prompt quality assurance
|
||||
- **Multi-language Support**: Prompts and tests for multiple languages
|
||||
- **Multi-language Support**: Prompts for multiple languages
|
||||
- **Type Safety**: Full TypeScript support with proper type definitions
|
||||
|
||||
## Available Prompt Chains
|
||||
|
|
@ -16,209 +15,31 @@ This package contains prompt chains and templates for the LobeHub application, w
|
|||
- `chainTranslate` - Translate content between languages
|
||||
- `chainPickEmoji` - Select appropriate emojis for content
|
||||
- `chainAnswerWithContext` - Answer questions using knowledge base context
|
||||
- `chainAbstractChunkText` - Summarize text chunks
|
||||
|
||||
## Testing with promptfoo
|
||||
## Testing
|
||||
|
||||
This package uses [promptfoo](https://promptfoo.dev) for AI-powered testing of prompts. The testing suite evaluates prompt quality, consistency, and performance across different AI models.
|
||||
|
||||
### Prerequisites
|
||||
|
||||
Set up your API keys in your environment:
|
||||
Prompt evaluation tests are located in `devtools/agent-evals/scenarios/prompt-chain/` (cloud repo) and run via the agent-evals CLI.
|
||||
|
||||
```bash
|
||||
export OPENAI_API_KEY="your-openai-key"
|
||||
export ANTHROPIC_API_KEY="your-anthropic-key" # optional
|
||||
# Run from cloud repo root
|
||||
bun run agent-evals run prompt-chain/translate
|
||||
bun run agent-evals run prompt-chain/emoji-picker
|
||||
bun run agent-evals list # See all available scenarios
|
||||
```
|
||||
|
||||
### Running Tests
|
||||
|
||||
```bash
|
||||
# Run all prompt tests
|
||||
pnpm test:prompts
|
||||
|
||||
# Run tests in watch mode for development
|
||||
pnpm test:prompts:watch
|
||||
|
||||
# Generate summary report
|
||||
pnpm test:prompts:summary
|
||||
|
||||
# Run tests for CI (no cache, structured output)
|
||||
pnpm test:prompts:ci
|
||||
|
||||
# View test results in web UI
|
||||
pnpm promptfoo:view
|
||||
```
|
||||
|
||||
### Test Configuration
|
||||
|
||||
Tests are organized by prompt type in the `promptfoo/` directory:
|
||||
|
||||
```
|
||||
promptfoo/
|
||||
├── summary-title/
|
||||
│ ├── eval.yaml # Test configuration
|
||||
│ └── prompt.ts # Prompt wrapper
|
||||
├── translation/
|
||||
│ ├── eval.yaml
|
||||
│ └── prompt.ts
|
||||
├── language-detection/
|
||||
│ ├── eval.yaml
|
||||
│ └── prompt.ts
|
||||
├── emoji-picker/
|
||||
│ ├── eval.yaml
|
||||
│ └── prompt.ts
|
||||
└── knowledge-qa/
|
||||
├── eval.yaml
|
||||
└── prompt.ts
|
||||
```
|
||||
|
||||
Each test configuration includes:
|
||||
|
||||
- Multiple test cases with different inputs
|
||||
- Assertions for output validation (regex, JSON, custom logic)
|
||||
- LLM-based rubric evaluation for semantic correctness
|
||||
- Performance and cost monitoring
|
||||
|
||||
### Test Structure
|
||||
|
||||
Tests directly use the actual prompt chain functions from `src/chains/`. The TypeScript wrapper files in `promptfoo/prompts/` import and call the real chain functions, ensuring perfect synchronization.
|
||||
|
||||
```yaml
|
||||
description: Test description
|
||||
providers:
|
||||
- openai:gpt-4o-mini
|
||||
- anthropic:claude-3-5-haiku-latest
|
||||
prompts:
|
||||
- file://prompts/summary-title.ts # Imports and uses src/chains/summaryTitle.ts
|
||||
tests:
|
||||
- vars:
|
||||
messages: [...]
|
||||
locale: 'en-US'
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
value: 'Expected behavior description'
|
||||
provider: openai:gpt-4o # Specify grader model for LLM rubric
|
||||
- type: contains
|
||||
value: 'expected text'
|
||||
- type: not-contains
|
||||
value: 'unwanted text'
|
||||
```
|
||||
|
||||
### Adding New Tests
|
||||
|
||||
1. Create a test configuration file in `promptfoo/`
|
||||
2. Create a TypeScript wrapper in `promptfoo/prompts/` that imports and calls your chain function from `src/chains/`
|
||||
3. Add the test to `promptfooconfig.yaml`
|
||||
4. Run tests to validate
|
||||
|
||||
**Advantage**: The wrapper files automatically stay in sync with source code changes since they directly import and use the actual chain functions.
|
||||
|
||||
### Performance Monitoring
|
||||
|
||||
Tests include performance monitoring:
|
||||
|
||||
- Response time tracking
|
||||
- Cost per request monitoring
|
||||
- Quality score evaluation
|
||||
- Cross-model consistency checks
|
||||
|
||||
### CI Integration
|
||||
|
||||
The `test:prompts:ci` script is designed for continuous integration:
|
||||
|
||||
- Structured JSON output for parsing
|
||||
- No interactive prompts
|
||||
- Clear pass/fail status codes
|
||||
- Detailed error reporting
|
||||
|
||||
## Development
|
||||
|
||||
```bash
|
||||
# Install dependencies
|
||||
pnpm install
|
||||
|
||||
# Run unit tests
|
||||
pnpm test
|
||||
|
||||
# Run prompt tests
|
||||
pnpm test:prompts
|
||||
|
||||
# Run all tests
|
||||
pnpm test && pnpm test:prompts
|
||||
```
|
||||
|
||||
## Contributing
|
||||
|
||||
When adding new prompt chains:
|
||||
|
||||
1. Implement the prompt function in `src/chains/`
|
||||
2. Add unit tests in `src/chains/__tests__/`
|
||||
3. Create promptfoo tests in `promptfoo/`
|
||||
4. Update this README with the new chain description
|
||||
|
||||
## Architecture
|
||||
|
||||
The package follows a layered architecture:
|
||||
|
||||
```
|
||||
src/
|
||||
├── chains/ # Prompt chain implementations
|
||||
├── prompts/ # Prompt templates and utilities
|
||||
└── index.ts # Main exports
|
||||
|
||||
promptfoo/
|
||||
├── prompts/ # Prompt implementations for testing
|
||||
├── *.yaml # Test configurations
|
||||
└── results/ # Test output directory
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Test Coverage**: Every prompt chain should have comprehensive promptfoo tests
|
||||
2. **Multi-language**: Test prompts with multiple languages when applicable
|
||||
3. **Edge Cases**: Include tests for edge cases and error conditions
|
||||
4. **Performance**: Monitor cost and response time in tests
|
||||
5. **Consistency**: Use consistent assertion patterns across tests
|
||||
6. **Prompt Optimization**: Use test results to iteratively improve prompts (see CLAUDE.md for optimization workflow)
|
||||
|
||||
## Prompt Optimization Workflow
|
||||
|
||||
This package follows an iterative prompt optimization process using promptfoo test results:
|
||||
|
||||
### Example: Translation Prompt Optimization
|
||||
|
||||
**Initial State**: 85% pass rate with issues:
|
||||
|
||||
- Claude models added explanatory text ("以下是翻译...")
|
||||
- GPT models over-translated technical terms (`API_KEY_12345` → `API 密钥_12345`)
|
||||
|
||||
**Optimization Process**:
|
||||
|
||||
1. **Identify Failures**: Run tests and analyze specific failure patterns
|
||||
2. **Update Prompts**: Modify prompt rules based on failure analysis
|
||||
- Added: "Output ONLY the translated text, no explanations"
|
||||
- Added: "Preserve technical terms, code identifiers, API keys exactly as they appear"
|
||||
3. **Re-run Tests**: Validate improvements across all models
|
||||
4. **Iterate**: Repeat until 100% pass rate achieved
|
||||
|
||||
**Final Result**: 100% pass rate (14/14 tests) across GPT-5-mini, Claude-3.5-Haiku, and Gemini-Flash
|
||||
|
||||
### Example: Knowledge Q\&A Optimization
|
||||
|
||||
**Initial State**: 71.43% pass rate with context handling issues
|
||||
|
||||
**Optimization Journey**:
|
||||
|
||||
- **Round 1** (80.95%): Clarified context relevance checking
|
||||
- **Round 2** (90.48%): Distinguished between "no context" vs "irrelevant context"
|
||||
- **Round 3** (92.86%): Added explicit rules for partial context
|
||||
- **Round 4** (96.43%): Emphasized supplementing with general knowledge
|
||||
- **Final** (100%): Added concrete example and MUST/SHOULD directives
|
||||
|
||||
**Key Learning**: When context is topic-relevant but information-limited, models should:
|
||||
|
||||
- Use context as foundation
|
||||
- Supplement with general knowledge
|
||||
- Provide practical, actionable guidance
|
||||
|
||||
See `CLAUDE.md` for detailed prompt engineering guidelines.
|
||||
|
|
|
|||
|
|
@ -4,23 +4,12 @@
|
|||
"private": true,
|
||||
"main": "./src/index.ts",
|
||||
"scripts": {
|
||||
"promptfoo:init": "promptfoo init",
|
||||
"promptfoo:view": "promptfoo view",
|
||||
"test": "vitest",
|
||||
"test:coverage": "vitest --coverage --silent='passed-only'",
|
||||
"test:prompts": "pnpm test:prompts:translate && pnpm test:prompts:summary && pnpm test:prompts:lang && pnpm test:prompts:emoji && pnpm test:prompts:qa",
|
||||
"test:prompts:abstract-chunk": "promptfoo eval -c promptfoo/abstract-chunk/eval.yaml",
|
||||
"test:prompts:emoji": "promptfoo eval -c promptfoo/emoji-picker/eval.yaml",
|
||||
"test:prompts:lang": "promptfoo eval -c promptfoo/language-detection/eval.yaml",
|
||||
"test:prompts:qa": "promptfoo eval -c promptfoo/knowledge-qa/eval.yaml",
|
||||
"test:prompts:summary": "promptfoo eval -c promptfoo/summary-title/eval.yaml",
|
||||
"test:prompts:supervisor": "promptfoo eval -c promptfoo/supervisor/productive/eval.yaml",
|
||||
"test:prompts:translate": "promptfoo eval -c promptfoo/translate/eval.yaml",
|
||||
"test:update": "vitest -u"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@lobechat/types": "workspace:*",
|
||||
"promptfoo": "^0.120.17",
|
||||
"tsx": "^4.20.6"
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,111 +0,0 @@
|
|||
description: Test chunk text summarization in different languages
|
||||
|
||||
providers:
|
||||
- openai:chat:gpt-5-mini
|
||||
- openai:chat:claude-3-5-haiku-latest
|
||||
- openai:chat:gemini-flash-latest
|
||||
- openai:chat:deepseek-chat
|
||||
|
||||
prompts:
|
||||
- file://promptfoo/abstract-chunk/prompt.ts
|
||||
|
||||
tests:
|
||||
# English technical content
|
||||
- vars:
|
||||
text: "React is a JavaScript library for building user interfaces. It was developed by Facebook and is now maintained by Facebook and the community. React makes it painless to create interactive UIs. Design simple views for each state in your application, and React will efficiently update and render just the right components when your data changes. Declarative views make your code more predictable and easier to debug."
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
provider: openai:gpt-5-mini
|
||||
value: "The summary should be 1-2 sentences in English, capturing the main topic about React being a JavaScript library for UIs"
|
||||
- type: contains-any
|
||||
value: ["React", "JavaScript", "library", "UI", "user interface"]
|
||||
- type: javascript
|
||||
value: "output.split(/[.!?]/).filter(s => s.trim()).length <= 2" # At most 2 sentences
|
||||
|
||||
# Chinese content
|
||||
- vars:
|
||||
text: "深度学习是机器学习的一个分支,它使用多层神经网络来学习数据的表示。近年来,深度学习在图像识别、自然语言处理、语音识别等领域取得了突破性进展。卷积神经网络(CNN)在计算机视觉任务中表现优异,而循环神经网络(RNN)和Transformer架构在序列建模任务中非常有效。"
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
provider: openai:gpt-5-mini
|
||||
value: "The summary should be 1-2 sentences in Chinese, summarizing deep learning and its applications"
|
||||
- type: contains-any
|
||||
value: ["深度学习", "神经网络", "机器学习"]
|
||||
- type: not-contains
|
||||
value: "摘要" # Should not contain meta labels
|
||||
- type: javascript
|
||||
value: "output.split(/[。!?]/).filter(s => s.trim()).length <= 2" # At most 2 sentences
|
||||
|
||||
# Japanese content
|
||||
- vars:
|
||||
text: "人工知能(AI)は、コンピュータシステムが人間の知能を模倣する技術です。AIは、学習、推論、問題解決などの認知機能を実行できます。現代のAIシステムは、大量のデータから学習し、パターンを認識して予測を行います。"
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
provider: openai:gpt-5-mini
|
||||
value: "The summary should be 1-2 sentences in Japanese about artificial intelligence"
|
||||
- type: contains-any
|
||||
value: ["人工知能", "AI", "コンピュータ"]
|
||||
- type: javascript
|
||||
value: "output.split(/[。!?]/).filter(s => s.trim()).length <= 2"
|
||||
|
||||
# Spanish content
|
||||
- vars:
|
||||
text: "El cambio climático es uno de los mayores desafíos que enfrenta la humanidad en el siglo XXI. Las temperaturas globales están aumentando debido a las emisiones de gases de efecto invernadero producidas por actividades humanas como la quema de combustibles fósiles, la deforestación y la agricultura industrial. Los efectos incluyen el derretimiento de los glaciares, el aumento del nivel del mar y eventos climáticos extremos más frecuentes."
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
provider: openai:gpt-5-mini
|
||||
value: "The summary should be 1-2 sentences in Spanish about climate change"
|
||||
- type: contains-any
|
||||
value: ["cambio climático", "temperatura", "clima"]
|
||||
- type: javascript
|
||||
value: "output.split(/[.!?]/).filter(s => s.trim()).length <= 2"
|
||||
|
||||
# Short technical content (English)
|
||||
- vars:
|
||||
text: "TypeScript is a strongly typed programming language that builds on JavaScript. It adds static type definitions to JavaScript, making code more robust and maintainable."
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
provider: openai:gpt-5-mini
|
||||
value: "The summary should be 1-2 sentences in English about TypeScript"
|
||||
- type: contains-any
|
||||
value: ["TypeScript", "JavaScript", "type"]
|
||||
- type: javascript
|
||||
value: "output.split(/[.!?]/).filter(s => s.trim()).length <= 2"
|
||||
|
||||
# Mixed technical terms in Chinese
|
||||
- vars:
|
||||
text: "Docker 是一个开源的容器化平台,它允许开发者将应用程序及其依赖项打包到一个可移植的容器中。通过使用 Docker,可以确保应用在任何环境中都能一致地运行。Docker 容器比传统虚拟机更轻量级,启动速度更快,资源占用更少。"
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
provider: openai:gpt-5-mini
|
||||
value: "The summary should be 1-2 sentences in Chinese, keeping 'Docker' in English"
|
||||
- type: contains
|
||||
value: "Docker" # Technical term should be preserved
|
||||
- type: contains-any
|
||||
value: ["容器", "平台", "应用"]
|
||||
- type: javascript
|
||||
value: "output.split(/[。!?]/).filter(s => s.trim()).length <= 2"
|
||||
|
||||
# German content
|
||||
- vars:
|
||||
text: "Die Quantenphysik ist ein fundamentaler Zweig der Physik, der sich mit dem Verhalten von Materie und Energie auf atomarer und subatomarer Ebene befasst. Im Gegensatz zur klassischen Physik beschreibt die Quantenphysik Phänomene, bei denen Teilchen sowohl Wellen- als auch Teilcheneigenschaften aufweisen können."
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
provider: openai:gpt-5-mini
|
||||
value: "The summary should be 1-2 sentences in German about quantum physics"
|
||||
- type: contains-any
|
||||
value: ["Quantenphysik", "Physik", "Materie"]
|
||||
- type: javascript
|
||||
value: "output.split(/[.!?]/).filter(s => s.trim()).length <= 2"
|
||||
|
||||
# Code snippet in content (English)
|
||||
- vars:
|
||||
text: "The useState hook in React allows you to add state to functional components. For example: const [count, setCount] = useState(0). This creates a state variable 'count' with initial value 0 and a setter function 'setCount' to update it."
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
provider: openai:gpt-5-mini
|
||||
value: "The summary should be 1-2 sentences in English about useState hook, may preserve code syntax"
|
||||
- type: contains-any
|
||||
value: ["useState", "React", "state", "hook"]
|
||||
- type: javascript
|
||||
value: "output.split(/[.!?]/).filter(s => s.trim()).length <= 2"
|
||||
|
|
@ -1,16 +0,0 @@
|
|||
// TypeScript prompt wrapper that uses actual chain implementation
|
||||
import { chainAbstractChunkText } from '@lobechat/prompts';
|
||||
|
||||
interface PromptVars {
|
||||
text: string;
|
||||
}
|
||||
|
||||
export default function generatePrompt({ vars }: { vars: PromptVars }) {
|
||||
const { text } = vars;
|
||||
|
||||
// Use the actual chain function from src
|
||||
const result = chainAbstractChunkText(text);
|
||||
|
||||
// Return messages array as expected by promptfoo
|
||||
return result.messages || [];
|
||||
}
|
||||
|
|
@ -1,170 +0,0 @@
|
|||
description: Test emoji selection for different conversation topics and contexts
|
||||
|
||||
providers:
|
||||
- openai:chat:gpt-5-mini
|
||||
- openai:chat:claude-3-5-haiku-latest
|
||||
- openai:chat:gemini-flash-latest
|
||||
- openai:chat:deepseek-chat
|
||||
|
||||
prompts:
|
||||
- file://promptfoo/emoji-picker/prompt.ts
|
||||
|
||||
tests:
|
||||
- vars:
|
||||
content: "I just got a promotion at work! I'm so excited!"
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
provider: openai:gpt-5-mini
|
||||
value: "Should select a positive, celebratory emoji appropriate for work success (e.g., 🎉, 🎊, ⭐, 💼, 🚀)"
|
||||
- type: not-contains
|
||||
value: "explanation"
|
||||
|
||||
- vars:
|
||||
content: "My dog passed away yesterday. I'm really sad."
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
provider: openai:gpt-5-mini
|
||||
value: "Should select a sympathetic, sad emoji appropriate for loss and grief (e.g., 😢, 😭, 💔, 😔)"
|
||||
- type: not-contains
|
||||
value: "explanation"
|
||||
|
||||
- vars:
|
||||
content: "Can you help me with this math problem?"
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
provider: openai:gpt-5-mini
|
||||
value: "Should select an emoji related to learning, thinking, or mathematics (e.g., 🤔, 📚, ✏️, 🧮, 🔢)"
|
||||
- type: not-contains
|
||||
value: "explanation"
|
||||
|
||||
- vars:
|
||||
content: "I'm going on vacation to Hawaii next week!"
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
provider: openai:gpt-5-mini
|
||||
value: "Should select an emoji related to vacation, travel, or tropical themes (e.g., 🌺, 🏖️, ✈️, 🌴, ☀️)"
|
||||
- type: not-contains
|
||||
value: "explanation"
|
||||
|
||||
- vars:
|
||||
content: "I'm learning to cook Italian food"
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
provider: openai:gpt-5-mini
|
||||
value: "Should select an emoji related to cooking or Italian food (e.g., 🍝, 🍕, 👨🍳, 🍽️)"
|
||||
- type: not-contains
|
||||
value: "explanation"
|
||||
|
||||
- vars:
|
||||
content: "Technical documentation about API endpoints"
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
provider: openai:gpt-5-mini
|
||||
value: "Should select an emoji related to technology, development, or documentation (e.g., 💻, 📖, ⚙️, 🔧, 📝)"
|
||||
- type: not-contains
|
||||
value: "explanation"
|
||||
|
||||
# Chinese language tests
|
||||
- vars:
|
||||
content: "我刚刚升职了!太激动了!"
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
provider: openai:gpt-5-mini
|
||||
value: "Should select a positive, celebratory emoji appropriate for work success (e.g., 🎉, 🎊, ⭐, 💼, 🚀)"
|
||||
- type: not-contains
|
||||
value: "explanation"
|
||||
|
||||
- vars:
|
||||
content: "我的猫咪昨天去世了,我很难过"
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
provider: openai:gpt-5-mini
|
||||
value: "Should select a sympathetic, sad emoji appropriate for loss and grief (e.g., 😢, 😭, 💔, 😔)"
|
||||
- type: not-contains
|
||||
value: "explanation"
|
||||
|
||||
- vars:
|
||||
content: "我正在学习做日本料理"
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
provider: openai:gpt-5-mini
|
||||
value: "Should select an emoji related to cooking or Japanese food (e.g., 🍱, 🍣, 🍜, 👨🍳)"
|
||||
- type: not-contains
|
||||
value: "explanation"
|
||||
|
||||
# Spanish language tests
|
||||
- vars:
|
||||
content: "¡Me voy de vacaciones a la playa la próxima semana!"
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
provider: openai:gpt-5-mini
|
||||
value: "Should select an emoji related to vacation, beach, or tropical themes (e.g., 🏖️, ☀️, 🌊, 🏝️)"
|
||||
- type: not-contains
|
||||
value: "explanation"
|
||||
|
||||
- vars:
|
||||
content: "Estoy estudiando para mi examen de matemáticas"
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
provider: openai:gpt-5-mini
|
||||
value: "Should select an emoji related to studying, learning, or mathematics (e.g., 📚, 🤓, 🧮, ✏️, 📊, 📐, 📏)"
|
||||
- type: not-contains
|
||||
value: "explanation"
|
||||
|
||||
# French language tests
|
||||
- vars:
|
||||
content: "Je viens de terminer mon marathon! Je suis épuisé mais heureux"
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
provider: openai:gpt-5-mini
|
||||
value: "Should select an emoji related to running, sports, or achievement (e.g., 🏃, 🏅, 💪, 🎯)"
|
||||
- type: not-contains
|
||||
value: "explanation"
|
||||
|
||||
- vars:
|
||||
content: "J'apprends à jouer de la guitare"
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
provider: openai:gpt-5-mini
|
||||
value: "Should select an emoji related to music or guitar (e.g., 🎸, 🎵, 🎶, 🎼)"
|
||||
- type: not-contains
|
||||
value: "explanation"
|
||||
|
||||
# Japanese language tests
|
||||
- vars:
|
||||
content: "新しいプロジェクトが始まりました!頑張ります"
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
provider: openai:gpt-5-mini
|
||||
value: "Should select an emoji related to new beginning, work, or motivation (e.g., 🚀, 💼, 💪, ✨)"
|
||||
- type: not-contains
|
||||
value: "explanation"
|
||||
|
||||
- vars:
|
||||
content: "桜が咲いて本当に綺麗です"
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
provider: openai:gpt-5-mini
|
||||
value: "Should select an emoji related to cherry blossoms, flowers, or beauty (e.g., 🌸, 🌺, 🌼, 🌷)"
|
||||
- type: not-contains
|
||||
value: "explanation"
|
||||
|
||||
# German language tests
|
||||
- vars:
|
||||
content: "Ich habe gerade ein neues Auto gekauft!"
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
provider: openai:gpt-5-mini
|
||||
value: "Should select an emoji related to cars or excitement (e.g., 🚗, 🎉, 🚙)"
|
||||
- type: not-contains
|
||||
value: "explanation"
|
||||
|
||||
# Russian language tests
|
||||
- vars:
|
||||
content: "Я люблю читать книги по вечерам"
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
provider: openai:gpt-5-mini
|
||||
value: "Should select an emoji related to reading or books (e.g., 📚, 📖, 📕, 🤓)"
|
||||
- type: not-contains
|
||||
value: "explanation"
|
||||
|
|
@ -1,16 +0,0 @@
|
|||
// TypeScript prompt wrapper that uses actual chain implementation
|
||||
import { chainPickEmoji } from '@lobechat/prompts';
|
||||
|
||||
interface PromptVars {
|
||||
content: string;
|
||||
}
|
||||
|
||||
export default function generatePrompt({ vars }: { vars: PromptVars }) {
|
||||
const { content } = vars;
|
||||
|
||||
// Use the actual chain function from src
|
||||
const result = chainPickEmoji(content);
|
||||
|
||||
// Return messages array as expected by promptfoo
|
||||
return result.messages || [];
|
||||
}
|
||||
|
|
@ -1,89 +0,0 @@
|
|||
description: Test knowledge base Q&A with context retrieval and answer generation
|
||||
|
||||
providers:
|
||||
- openai:chat:gpt-5-mini
|
||||
- openai:chat:claude-3-5-haiku-latest
|
||||
- openai:chat:gemini-flash-latest
|
||||
- openai:chat:deepseek-chat
|
||||
|
||||
prompts:
|
||||
- file://promptfoo/knowledge-qa/prompt.ts
|
||||
|
||||
tests:
|
||||
- vars:
|
||||
context: "React is a JavaScript library for building user interfaces. It was developed by Facebook and is now maintained by Facebook and the community. React uses a virtual DOM to efficiently update and render components. Key features include component-based architecture, JSX syntax, and state management through hooks."
|
||||
query: "What is React and who developed it?"
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
provider: openai:gpt-5-mini
|
||||
value: "The response should accurately explain what React is and mention Facebook as the developer"
|
||||
- type: contains-any
|
||||
value: ["React", "JavaScript", "library", "Facebook"]
|
||||
- type: not-contains
|
||||
value: "I don't know"
|
||||
|
||||
- vars:
|
||||
context: "TypeScript is a strongly typed programming language that builds on JavaScript by adding static type definitions. It was developed by Microsoft. TypeScript code compiles to clean, simple JavaScript code which runs on any browser, Node.js environment, or any JavaScript engine that supports ECMAScript 3 or newer."
|
||||
query: "How does TypeScript relate to JavaScript?"
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
provider: openai:gpt-5-mini
|
||||
value: "The response should explain the relationship between TypeScript and JavaScript, mentioning type definitions"
|
||||
- type: contains-any
|
||||
value: ["TypeScript", "JavaScript", "type", "compiles", "strongly typed"]
|
||||
- type: not-contains
|
||||
value: "不知道"
|
||||
|
||||
- vars:
|
||||
context: "Node.js是一个基于Chrome V8引擎的JavaScript运行时环境。它使用了事件驱动、非阻塞I/O模型,使其轻量而高效。Node.js的包管理器npm是世界上最大的开源库生态系统。"
|
||||
query: "Node.js有什么特点?"
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
provider: openai:gpt-5-mini
|
||||
value: "The response should describe Node.js features in Chinese, mentioning event-driven and non-blocking I/O"
|
||||
- type: contains-any
|
||||
value: ["Node.js", "事件驱动", "非阻塞", "JavaScript", "运行时"]
|
||||
- type: not-contains
|
||||
value: "I don't know"
|
||||
|
||||
- vars:
|
||||
context: "Docker is a containerization platform that allows developers to package applications and their dependencies into lightweight, portable containers. Containers share the OS kernel but run in isolated user spaces. This makes applications more consistent across different environments."
|
||||
query: "How can I deploy my app with Docker?"
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
provider: openai:gpt-5-mini
|
||||
value: "The response should provide helpful information about deploying with Docker based on the containerization context. It should mention containers, packaging, or provide deployment-related guidance."
|
||||
- type: contains-any
|
||||
value: ["Docker", "container", "package", "deploy"]
|
||||
|
||||
- vars:
|
||||
context: "GraphQL is a query language for APIs and a runtime for fulfilling those queries with existing data. Unlike REST APIs that require multiple requests to different endpoints, GraphQL allows clients to request exactly the data they need in a single request."
|
||||
query: "What are the benefits of using GraphQL over REST?"
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
provider: openai:gpt-5-mini
|
||||
value: "The response should compare GraphQL and REST, highlighting GraphQL's advantages such as single request capability or requesting specific data"
|
||||
- type: contains-any
|
||||
value: ["GraphQL", "REST"]
|
||||
|
||||
- vars:
|
||||
context: "Machine learning algorithms can be categorized into supervised, unsupervised, and reinforcement learning. Supervised learning uses labeled data to train models, unsupervised learning finds patterns in unlabeled data, and reinforcement learning learns through trial and error with rewards."
|
||||
query: "Can you explain blockchain technology?"
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
provider: openai:gpt-5-mini
|
||||
value: "The response should indicate that the provided context is about machine learning, not blockchain, and cannot answer the blockchain question based on the context"
|
||||
- type: contains-any
|
||||
value: ["machine learning", "cannot", "不能", "no information", "context", "does not contain"]
|
||||
|
||||
- vars:
|
||||
context: ""
|
||||
query: "How do I set up a web server?"
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
provider: openai:gpt-5-mini
|
||||
value: "The response should provide helpful information about setting up a web server using general knowledge"
|
||||
- type: contains-any
|
||||
value: ["server", "web", "setup", "install", "configure"]
|
||||
- type: not-contains
|
||||
value: "cannot answer"
|
||||
|
|
@ -1,26 +0,0 @@
|
|||
// TypeScript prompt wrapper that uses actual chain implementation
|
||||
import { chainAnswerWithContext } from '@lobechat/prompts';
|
||||
|
||||
interface PromptVars {
|
||||
context: string | string[];
|
||||
knowledge?: string | string[];
|
||||
query: string;
|
||||
}
|
||||
|
||||
export default function generatePrompt({ vars }: { vars: PromptVars }) {
|
||||
const { context, query, knowledge = ['general knowledge'] } = vars;
|
||||
|
||||
// Ensure context and knowledge are arrays
|
||||
const contextArray = Array.isArray(context) ? context : [context];
|
||||
const knowledgeArray = Array.isArray(knowledge) ? knowledge : [knowledge];
|
||||
|
||||
// Use the actual chain function from src
|
||||
const result = chainAnswerWithContext({
|
||||
context: contextArray,
|
||||
knowledge: knowledgeArray,
|
||||
question: query,
|
||||
});
|
||||
|
||||
// Return messages array as expected by promptfoo
|
||||
return result.messages || [];
|
||||
}
|
||||
|
|
@ -1,65 +0,0 @@
|
|||
description: Test language detection accuracy for various text inputs
|
||||
|
||||
providers:
|
||||
- openai:chat:gpt-5-mini
|
||||
- openai:chat:claude-3-5-haiku-latest
|
||||
- openai:chat:gemini-flash-latest
|
||||
- openai:chat:deepseek-chat
|
||||
|
||||
prompts:
|
||||
- file://promptfoo/language-detection/prompt.ts
|
||||
|
||||
tests:
|
||||
- vars:
|
||||
content: "Hello, how are you today? I hope you're having a great day!"
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
provider: openai:gpt-5-mini
|
||||
value: "Should output a valid locale code for English (e.g., en-US, en-GB, en)"
|
||||
- type: contains-any
|
||||
value: ["en-", "en"]
|
||||
|
||||
- vars:
|
||||
content: "Bonjour, comment allez-vous? J'espère que vous passez une excellente journée!"
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
provider: openai:gpt-5-mini
|
||||
value: "Should output a valid locale code for French (e.g., fr-FR, fr-CA, fr)"
|
||||
- type: contains-any
|
||||
value: ["fr-", "fr"]
|
||||
|
||||
- vars:
|
||||
content: "你好,你今天怎么样?希望你过得愉快!"
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
provider: openai:gpt-5-mini
|
||||
value: "Should output a valid locale code for Chinese (e.g., zh-CN, zh-TW, zh)"
|
||||
- type: contains-any
|
||||
value: ["zh-", "zh"]
|
||||
|
||||
- vars:
|
||||
content: "Hola, ¿cómo estás hoy? ¡Espero que tengas un gran día!"
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
provider: openai:gpt-5-mini
|
||||
value: "Should output a valid locale code for Spanish (e.g., es-ES, es-MX, es)"
|
||||
- type: contains-any
|
||||
value: ["es-", "es"]
|
||||
|
||||
- vars:
|
||||
content: "Привет, как дела сегодня? Надеюсь, у тебя отличный день!"
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
provider: openai:gpt-5-mini
|
||||
value: "Should output a valid locale code for Russian (e.g., ru-RU, ru)"
|
||||
- type: contains-any
|
||||
value: ["ru-", "ru"]
|
||||
|
||||
- vars:
|
||||
content: "こんにちは、今日はいかがですか?素晴らしい一日をお過ごしください!"
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
provider: openai:gpt-5-mini
|
||||
value: "Should output a valid locale code for Japanese (e.g., ja-JP, ja)"
|
||||
- type: contains-any
|
||||
value: ["ja-", "ja"]
|
||||
|
|
@ -1,16 +0,0 @@
|
|||
// TypeScript prompt wrapper that uses actual chain implementation
|
||||
import { chainLangDetect } from '@lobechat/prompts';
|
||||
|
||||
interface PromptVars {
|
||||
content: string;
|
||||
}
|
||||
|
||||
export default function generatePrompt({ vars }: { vars: PromptVars }) {
|
||||
const { content } = vars;
|
||||
|
||||
// Use the actual chain function from src
|
||||
const result = chainLangDetect(content);
|
||||
|
||||
// Return messages array as expected by promptfoo
|
||||
return result.messages || [];
|
||||
}
|
||||
|
|
@ -1,94 +0,0 @@
|
|||
description: Test summary title generation for different conversation types
|
||||
|
||||
providers:
|
||||
- openai:chat:gpt-5-mini
|
||||
- openai:chat:claude-3-5-haiku-latest
|
||||
- openai:chat:gemini-flash-latest
|
||||
- openai:chat:deepseek-chat
|
||||
|
||||
prompts:
|
||||
- file://promptfoo/summary-title/prompt.ts
|
||||
|
||||
tests:
|
||||
- vars:
|
||||
messages:
|
||||
- role: "user"
|
||||
content: "How do I install Node.js on my computer?"
|
||||
- role: "assistant"
|
||||
content: "To install Node.js, you can download it from the official website nodejs.org and follow the installation instructions for your operating system."
|
||||
- role: "user"
|
||||
content: "What about using a version manager?"
|
||||
- role: "assistant"
|
||||
content: "Yes! I recommend using nvm (Node Version Manager) which allows you to install and switch between different Node.js versions easily."
|
||||
locale: "en-US"
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
provider: openai:gpt-5-mini
|
||||
value: "The response should be a concise title (10 words or less) that summarizes the conversation about Node.js installation"
|
||||
- type: regex
|
||||
value: "^.{1,50}$" # Title should be between 1-50 characters
|
||||
- type: not-contains
|
||||
value: "标点符号" # Should not contain punctuation as requested in Chinese
|
||||
|
||||
- vars:
|
||||
messages:
|
||||
- role: "user"
|
||||
content: "我想学习做蛋炒饭"
|
||||
- role: "assistant"
|
||||
content: "蛋炒饭是很经典的家常菜!你需要准备鸡蛋、米饭、葱花、盐和生抽等基本材料。"
|
||||
- role: "user"
|
||||
content: "具体步骤是什么?"
|
||||
- role: "assistant"
|
||||
content: "首先打散鸡蛋炒熟盛起,然后下米饭炒散,最后加入鸡蛋和调料炒匀即可。"
|
||||
locale: "zh-CN"
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
provider: openai:gpt-5-mini
|
||||
value: "The response should be a Chinese title summarizing the conversation about fried rice cooking"
|
||||
- type: regex
|
||||
value: "^.{1,30}$" # Chinese titles can be shorter
|
||||
- type: contains-any
|
||||
value: ["蛋炒饭", "做饭", "烹饪", "料理"]
|
||||
|
||||
- vars:
|
||||
messages:
|
||||
- role: "user"
|
||||
content: "Can you help me debug this Python error?"
|
||||
- role: "assistant"
|
||||
content: "Of course! Please share the error message and the relevant code."
|
||||
- role: "user"
|
||||
content: "I'm getting 'AttributeError: 'NoneType' object has no attribute 'split''"
|
||||
- role: "assistant"
|
||||
content: "This error occurs when you're trying to call .split() on a None value. The variable is likely None instead of a string."
|
||||
locale: "en-US"
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
provider: openai:gpt-5-mini
|
||||
|
||||
value: "The response should be a title about Python debugging or error resolution"
|
||||
- type: contains-any
|
||||
value: ["Python", "debug", "error", "AttributeError", "code"]
|
||||
|
||||
- vars:
|
||||
messages:
|
||||
- role: "user"
|
||||
content: "¿Cómo está el tiempo hoy?"
|
||||
- role: "assistant"
|
||||
content: "No tengo acceso a información meteorológica en tiempo real, pero puedes consultar el clima en tu área usando aplicaciones como Weather.com o tu app del tiempo local."
|
||||
locale: "es-ES"
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
provider: openai:gpt-5-mini
|
||||
|
||||
value: "The response should be a Spanish title about weather inquiry"
|
||||
- type: regex
|
||||
value: "^.{1,50}$"
|
||||
|
||||
- vars:
|
||||
messages:
|
||||
- role: "user"
|
||||
content: "你知道 litellm 吗?"
|
||||
locale: "zh-CN"
|
||||
assert:
|
||||
- type: regex
|
||||
value: "^.{1,20}$" # Chinese titles can be shorter
|
||||
|
|
@ -1,18 +0,0 @@
|
|||
// TypeScript prompt wrapper that uses actual chain implementation
|
||||
import { chainSummaryTitle } from '@lobechat/prompts';
|
||||
import type { OpenAIChatMessage } from '@lobechat/types';
|
||||
|
||||
interface PromptVars {
|
||||
locale: string;
|
||||
messages: OpenAIChatMessage[];
|
||||
}
|
||||
|
||||
export default function generatePrompt({ vars }: { vars: PromptVars }) {
|
||||
const { messages, locale } = vars;
|
||||
|
||||
// Use the actual chain function from src
|
||||
const result = chainSummaryTitle(messages, locale);
|
||||
|
||||
// Return messages array as expected by promptfoo
|
||||
return result.messages || [];
|
||||
}
|
||||
|
|
@ -1,51 +0,0 @@
|
|||
description: Test supervisor prompt generation for group chat orchestration
|
||||
|
||||
prompts:
|
||||
- file://promptfoo/supervisor/productive/prompt.ts
|
||||
|
||||
providers:
|
||||
- id: openai:chat:gpt-5
|
||||
config:
|
||||
tools: file://./tools.json
|
||||
tool_choice: required
|
||||
|
||||
- id: openai:chat:claude-sonnet-4-5-20250929
|
||||
config:
|
||||
tools: file://./tools.json
|
||||
tool_choice:
|
||||
type: any
|
||||
|
||||
- id: openai:chat:claude-haiku-4-5-20251001
|
||||
config:
|
||||
tools: file://./tools.json
|
||||
tool_choice:
|
||||
type: any
|
||||
|
||||
- id: openai:chat:gemini-2.5-pro
|
||||
config:
|
||||
tools: file://./tools.json
|
||||
tool_choice: required
|
||||
|
||||
- id: openai:chat:deepseek-chat
|
||||
config:
|
||||
tools: file://./tools.json
|
||||
tool_choice: required
|
||||
|
||||
- id: openai:chat:gpt-5-mini
|
||||
config:
|
||||
tools: file://./tools.json
|
||||
tool_choice: required
|
||||
|
||||
- id: openai:chat:o3
|
||||
config:
|
||||
tools: file://./tools.json
|
||||
tool_choice: required
|
||||
|
||||
- id: openai:chat:gpt-4.1-mini
|
||||
config:
|
||||
tools: file://./tools.json
|
||||
tool_choice: required
|
||||
|
||||
tests:
|
||||
- file://./tests/basic-case.ts
|
||||
# - file://./tests/role.ts
|
||||
|
|
@ -1,19 +0,0 @@
|
|||
// TypeScript prompt wrapper that uses actual buildSupervisorPrompt implementation
|
||||
import type { SupervisorPromptParams } from '../../../src';
|
||||
import { buildSupervisorPrompt } from '../../../src';
|
||||
|
||||
const generatePrompt = ({
|
||||
vars,
|
||||
}: {
|
||||
vars: Omit<SupervisorPromptParams, 'allowDM' | 'scene'> & { role: string };
|
||||
}) => {
|
||||
const prompt = buildSupervisorPrompt(vars);
|
||||
|
||||
// Return messages and tools for promptfoo
|
||||
// Note: tools must be at top level for is-valid-openai-tools-call assertion to work
|
||||
// The assertion reads from provider.config.tools, and promptfoo merges top-level
|
||||
// properties into provider config
|
||||
return [{ content: prompt, role: vars.role || 'user' }];
|
||||
};
|
||||
|
||||
export default generatePrompt;
|
||||
|
|
@ -1,54 +0,0 @@
|
|||
const testCases = [
|
||||
// Tool Calling Test 1: Basic trigger_agent usage
|
||||
{
|
||||
assert: [
|
||||
{ type: 'is-valid-openai-tools-call' },
|
||||
{
|
||||
provider: 'openai:gpt-5-mini',
|
||||
type: 'llm-rubric',
|
||||
value:
|
||||
'Should call trigger_agent tool to ask coder or designer to help with the login page task',
|
||||
},
|
||||
],
|
||||
vars: {
|
||||
availableAgents: [
|
||||
{ id: 'coder', title: 'Code Wizard' },
|
||||
{ id: 'designer', title: 'UI Designer' },
|
||||
],
|
||||
conversationHistory: 'User: I need help building a login page',
|
||||
systemPrompt: 'You are coordinating a software development team',
|
||||
userName: 'Bobs',
|
||||
},
|
||||
},
|
||||
// just say hi - should only trigger_agent, no todo operations
|
||||
{
|
||||
assert: [
|
||||
{ type: 'is-valid-openai-tools-call' },
|
||||
{
|
||||
type: 'javascript',
|
||||
value: `
|
||||
// Ensure ONLY trigger_agent tool is called, no create_todo, finish_todo, etc.
|
||||
const toolCalls = Array.isArray(output) ? output : [];
|
||||
return toolCalls.length > 0 && toolCalls.every(call => call.function?.name === 'trigger_agent');
|
||||
`,
|
||||
},
|
||||
{
|
||||
provider: 'openai:gpt-5-mini',
|
||||
type: 'llm-rubric',
|
||||
value:
|
||||
'Should call trigger_agent tool to greet the user or ask how to help. Should NOT include any create_todo or finish_todo calls.',
|
||||
},
|
||||
],
|
||||
vars: {
|
||||
availableAgents: [
|
||||
{ id: 'agt_J34pj8igq5Hk', title: '全栈工程师' },
|
||||
{ id: 'agt_5xSoLVNHOjQj', title: '产品经理' },
|
||||
],
|
||||
conversationHistory: '<message author="user">hi</message>',
|
||||
role: 'user',
|
||||
userName: 'Rene Wang',
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
export default testCases;
|
||||
|
|
@ -1,58 +0,0 @@
|
|||
const assert = [
|
||||
{ type: 'is-valid-openai-tools-call' },
|
||||
{
|
||||
type: 'javascript',
|
||||
value: `
|
||||
// Debug: log the actual output structure
|
||||
console.log('DEBUG output:', JSON.stringify(output, null, 2));
|
||||
|
||||
// Ensure ONLY trigger_agent tool is called, no create_todo, finish_todo, etc.
|
||||
const toolCalls = Array.isArray(output) ? output : [];
|
||||
if (toolCalls.length === 0) {
|
||||
console.log('DEBUG: No tool calls found');
|
||||
return false;
|
||||
}
|
||||
|
||||
for (const call of toolCalls) {
|
||||
const toolName = call.tool_name || call.function?.name || call.name;
|
||||
console.log('DEBUG tool name:', toolName);
|
||||
|
||||
if (toolName !== 'trigger_agent') {
|
||||
console.log('DEBUG: Found non-trigger_agent tool:', toolName);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
console.log('DEBUG: All', toolCalls.length, 'calls are trigger_agent');
|
||||
return true;
|
||||
`,
|
||||
},
|
||||
{
|
||||
provider: 'openai:gpt-5-mini',
|
||||
type: 'llm-rubric',
|
||||
value:
|
||||
'Should call trigger_agent tool to greet the user or ask how to help. Should NOT include any create_todo or finish_todo calls.',
|
||||
},
|
||||
];
|
||||
const vars = {
|
||||
availableAgents: [
|
||||
{ id: 'agt_J34pj8igq5Hk', title: '全栈工程师' },
|
||||
{ id: 'agt_5xSoLVNHOjQj', title: '产品经理' },
|
||||
],
|
||||
conversationHistory: '<message author="user">hi</message>',
|
||||
role: 'user',
|
||||
userName: 'Rene Wang',
|
||||
};
|
||||
|
||||
const testCases = [
|
||||
{
|
||||
assert,
|
||||
vars: { ...vars, role: 'user' },
|
||||
},
|
||||
{
|
||||
assert,
|
||||
vars: { ...vars, role: 'system' },
|
||||
},
|
||||
];
|
||||
|
||||
export default testCases;
|
||||
|
|
@ -1,80 +0,0 @@
|
|||
[
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "trigger_agent",
|
||||
"description": "Trigger an agent to speak (group message).",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string",
|
||||
"description": "The agent id to trigger."
|
||||
},
|
||||
"instruction": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": ["instruction", "id"],
|
||||
"additionalProperties": false
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "wait_for_user_input",
|
||||
"description": "Wait for user input. Use this when the conversation history looks likes fine for now, or agents are waiting for user input.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"reason": {
|
||||
"type": "string",
|
||||
"description": "Optional reason for pausing the conversation."
|
||||
}
|
||||
},
|
||||
"required": [],
|
||||
"additionalProperties": false
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "create_todo",
|
||||
"description": "Create a new todo item",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"assignee": {
|
||||
"type": "string",
|
||||
"description": "Who will do the todo. Can be agent id or empty."
|
||||
},
|
||||
"content": {
|
||||
"type": "string",
|
||||
"description": "The todo content or description."
|
||||
}
|
||||
},
|
||||
"required": ["content", "assignee"],
|
||||
"additionalProperties": false
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "finish_todo",
|
||||
"description": "Finish a todo by index or all todos",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"index": {
|
||||
"type": "number"
|
||||
}
|
||||
},
|
||||
"required": ["index"],
|
||||
"additionalProperties": false
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
|
|
@ -1,79 +0,0 @@
|
|||
description: Test translation accuracy between different languages
|
||||
|
||||
providers:
|
||||
- openai:chat:gpt-5-mini
|
||||
- openai:chat:claude-3-5-haiku-latest
|
||||
- openai:chat:gemini-flash-latest
|
||||
- openai:chat:deepseek-chat
|
||||
|
||||
prompts:
|
||||
- file://promptfoo/translate/prompt.ts
|
||||
|
||||
tests:
|
||||
- vars:
|
||||
content: 'Hello, how are you?'
|
||||
from: 'en-US'
|
||||
to: 'zh-CN'
|
||||
assert:
|
||||
- type: contains-any
|
||||
value: ['你好', '您好']
|
||||
- type: not-contains
|
||||
value: 'Hello'
|
||||
|
||||
- vars:
|
||||
content: '你好,你怎么样?'
|
||||
from: 'zh-CN'
|
||||
to: 'en-US'
|
||||
assert:
|
||||
- type: contains-any
|
||||
value: ['Hello', 'Hi', 'how are you', 'How are you']
|
||||
- type: not-contains
|
||||
value: '你好'
|
||||
|
||||
- vars:
|
||||
content: 'Je suis content de vous rencontrer'
|
||||
from: 'fr-FR'
|
||||
to: 'en-US'
|
||||
assert:
|
||||
- type: contains-any
|
||||
value: ['pleased', 'happy', 'glad', 'meet', 'meeting']
|
||||
- type: not-contains
|
||||
value: 'Je suis'
|
||||
|
||||
- vars:
|
||||
content: 'The weather is beautiful today'
|
||||
from: 'en-US'
|
||||
to: 'es-ES'
|
||||
assert:
|
||||
- type: contains-any
|
||||
value: ['tiempo', 'clima', 'hermoso', 'bonito', 'hoy', 'día', 'precioso']
|
||||
- type: not-contains
|
||||
value: 'weather'
|
||||
|
||||
- vars:
|
||||
content: 'I love programming with TypeScript'
|
||||
from: 'en-US'
|
||||
to: 'ja-JP'
|
||||
assert:
|
||||
- type: contains
|
||||
value: 'TypeScript' # Technical terms often remain unchanged
|
||||
- type: not-contains
|
||||
value: 'I love'
|
||||
|
||||
- vars:
|
||||
content: 'Machine learning is revolutionizing technology'
|
||||
from: 'en-US'
|
||||
to: 'de-DE'
|
||||
assert:
|
||||
- type: contains-any
|
||||
value: ['Technologie', 'revolutioniert', 'maschinelles', 'Lernen']
|
||||
- type: not-contains
|
||||
value: 'Machine learning'
|
||||
|
||||
- vars:
|
||||
content: 'API_KEY_12345'
|
||||
from: 'en-US'
|
||||
to: 'zh-CN'
|
||||
assert:
|
||||
- type: contains
|
||||
value: 'API_KEY_12345'
|
||||
|
|
@ -1,18 +0,0 @@
|
|||
// TypeScript prompt wrapper that uses actual chain implementation
|
||||
import { chainTranslate } from '@lobechat/prompts';
|
||||
|
||||
interface PromptVars {
|
||||
content: string;
|
||||
from: string;
|
||||
to: string;
|
||||
}
|
||||
|
||||
export default function generatePrompt({ vars }: { vars: PromptVars }) {
|
||||
const { content, to } = vars;
|
||||
|
||||
// Use the actual chain function from src
|
||||
const result = chainTranslate(content, to);
|
||||
|
||||
// Return messages array as expected by promptfoo
|
||||
return result.messages || [];
|
||||
}
|
||||
|
|
@ -1,36 +0,0 @@
|
|||
description: LobeHub Prompts Testing Suite
|
||||
|
||||
# Test configurations - run all prompt tests
|
||||
testPaths:
|
||||
- promptfoo/translate/eval.yaml
|
||||
- promptfoo/summary-title/eval.yaml
|
||||
- promptfoo/language-detection/eval.yaml
|
||||
- promptfoo/emoji-picker/eval.yaml
|
||||
- promptfoo/knowledge-qa/eval.yaml
|
||||
- promptfoo/abstract-chunk/eval.yaml
|
||||
|
||||
# Output configuration
|
||||
outputPath: promptfoo-results.json
|
||||
|
||||
# Default test settings
|
||||
defaultTest:
|
||||
assert:
|
||||
- type: llm-rubric
|
||||
provider: openai:gpt-5-mini
|
||||
value: "The response should be relevant and well-formatted"
|
||||
- type: cost
|
||||
threshold: 0.01 # Maximum cost per test in USD
|
||||
|
||||
# Environment variables for API keys
|
||||
env:
|
||||
OPENAI_API_KEY: ${OPENAI_API_KEY}
|
||||
ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY}
|
||||
|
||||
# Evaluation settings
|
||||
evaluateOptions:
|
||||
maxConcurrency: 5
|
||||
delay: 100
|
||||
|
||||
# TypeScript support
|
||||
transforms:
|
||||
- "typescript"
|
||||
Loading…
Reference in a new issue