feat(providers/pi): best-effort structured output via prompt engineering (#1297)

Pi's SDK has no native JSON-schema mode (unlike Claude's outputFormat /
Codex's outputSchema). Previously Pi declared structuredOutput: false
and any workflow using output_format silently degraded — the node ran,
the transcript was treated as free text, and downstream $nodeId.output.field
refs resolved to empty strings. 8 bundled/repo workflows across 10 nodes
were affected (archon-create-issue, archon-fix-github-issue,
archon-smart-pr-review, archon-workflow-builder, archon-validate-pr, etc.).

This PR closes the gap via prompt engineering + post-parse:

1. When requestOptions.outputFormat is present, the provider appends a
   "respond with ONLY a JSON object matching this schema" instruction plus
   JSON.stringify(schema) to the prompt before calling session.prompt().

2. bridgeSession accepts an optional jsonSchema param. When set, it buffers
   every assistant text_delta and — on the terminal result chunk — parses
   the buffer via tryParseStructuredOutput (trims whitespace, strips
   ```json / ``` fences, JSON.parse). On success, attaches
   structuredOutput to the result chunk (matching Claude's shape). On
   failure, emits a warn event and leaves structuredOutput undefined so
   the executor's existing dag.structured_output_missing path handles it.

3. Flipped PI_CAPABILITIES.structuredOutput to true. Unlike Claude/Codex
   this is best-effort, not SDK-enforced — reliable on GPT-5, Claude,
   Gemini 2.x, recent Qwen Coder, DeepSeek V3, less reliable on smaller
   or older models that ignore JSON-only instructions.

Tests added (14 total):
- tryParseStructuredOutput: clean JSON, fenced, bare fences, arrays,
  whitespace, empty, prose-wrapped (fails), malformed, inner backticks
- augmentPromptForJsonSchema via provider integration: schema appended,
  prompt unchanged when absent
- End-to-end: clean JSON → structuredOutput parsed; fenced JSON parses;
  prose-wrapped → no structuredOutput + no crash; no outputFormat →
  never sets structuredOutput even if assistant happens to emit JSON

Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Cole Medin 2026-04-19 10:16:02 -05:00 committed by GitHub
parent 83c119af78
commit fb73a500d7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 305 additions and 9 deletions

View file

@ -8,6 +8,13 @@ import type { ProviderCapabilities } from '../../types';
* envInjection covers both auth-key passthrough (setRuntimeApiKey for mapped
* provider env vars) and bash tool subprocess env (BashSpawnHook merges the
* caller's env over Pi's inherited baseline), matching Claude/Codex semantics.
*
* structuredOutput is best-effort (not SDK-enforced like Claude/Codex): the
* provider appends a "JSON only" instruction + the schema to the prompt and
* the event bridge parses the final assistant transcript on agent_end.
* Reliable on instruction-following models (GPT-5, Claude, Gemini 2.x,
* recent Qwen Coder, DeepSeek V3); parse failures degrade via the
* dag-executor's existing dag.structured_output_missing path.
*/
export const PI_CAPABILITIES: ProviderCapabilities = {
sessionResume: true,
@ -16,7 +23,7 @@ export const PI_CAPABILITIES: ProviderCapabilities = {
skills: true,
agents: false,
toolRestrictions: true,
structuredOutput: false,
structuredOutput: true,
envInjection: true,
costControl: false,
effortControl: true,

View file

@ -5,6 +5,7 @@ import {
buildResultChunk,
mapPiEvent,
serializeToolResult,
tryParseStructuredOutput,
usageToTokens,
} from './event-bridge';
@ -367,3 +368,56 @@ describe('mapPiEvent', () => {
).toEqual([]);
});
});
// ─── tryParseStructuredOutput ──────────────────────────────────────────────
describe('tryParseStructuredOutput', () => {
test('parses clean JSON object', () => {
expect(tryParseStructuredOutput('{"name":"alpha","count":3}')).toEqual({
name: 'alpha',
count: 3,
});
});
test('parses JSON with surrounding whitespace', () => {
expect(tryParseStructuredOutput(' \n{"ok":true}\n ')).toEqual({ ok: true });
});
test('strips ```json fences', () => {
const fenced = '```json\n{"area":"web","confidence":0.9}\n```';
expect(tryParseStructuredOutput(fenced)).toEqual({ area: 'web', confidence: 0.9 });
});
test('strips bare ``` fences', () => {
expect(tryParseStructuredOutput('```\n{"ok":1}\n```')).toEqual({ ok: 1 });
});
test('parses JSON arrays', () => {
expect(tryParseStructuredOutput('[1,2,3]')).toEqual([1, 2, 3]);
});
test('returns undefined on empty string', () => {
expect(tryParseStructuredOutput('')).toBeUndefined();
expect(tryParseStructuredOutput(' ')).toBeUndefined();
});
test('returns undefined when model wraps JSON in prose', () => {
// Realistic failure mode — model ignores "JSON only" instruction and adds
// explanatory text before/after. Caller degrades via the executor's
// missing-structured-output warning path.
const prose =
'Here is the JSON you requested:\n{"ok":true}\nLet me know if you need anything else.';
expect(tryParseStructuredOutput(prose)).toBeUndefined();
});
test('returns undefined on malformed JSON', () => {
expect(tryParseStructuredOutput('{not valid}')).toBeUndefined();
expect(tryParseStructuredOutput('{"unclosed":')).toBeUndefined();
});
test('preserves backticks inside JSON string values', () => {
// Fence stripper matches only at start/end; inner backticks must survive.
const withBackticks = '{"code":"run `npm test`"}';
expect(tryParseStructuredOutput(withBackticks)).toEqual({ code: 'run `npm test`' });
});
});

View file

@ -151,6 +151,33 @@ export function buildResultChunk(messages: readonly unknown[]): MessageChunk {
return chunk;
}
/**
* Attempt to parse a Pi assistant transcript as the structured-output JSON
* requested via `outputFormat`. Handles two common model failure modes:
* - trailing/leading whitespace (always stripped)
* - markdown code fences (```json ... ``` or bare ``` ... ```) that models
* emit despite the "no code fences" instruction in the prompt
*
* Returns the parsed value on success, `undefined` on any failure. Callers
* treat `undefined` as "structured output unavailable" and degrade via the
* dag-executor's existing missing-structured-output warning.
*/
export function tryParseStructuredOutput(text: string): unknown {
const trimmed = text.trim();
if (trimmed.length === 0) return undefined;
// Strip ```json / ``` fences if present. Match only at boundaries so we
// don't mangle JSON strings that legitimately contain backticks.
const cleaned = trimmed
.replace(/^```(?:json)?\s*\n?/i, '')
.replace(/\n?\s*```\s*$/, '')
.trim();
try {
return JSON.parse(cleaned);
} catch {
return undefined;
}
}
/**
* Pure mapper from Pi's `AgentSessionEvent` zero-or-more Archon `MessageChunk`s.
*
@ -243,13 +270,22 @@ export type BridgeQueueItem =
export async function* bridgeSession(
session: AgentSession,
prompt: string,
abortSignal?: AbortSignal
abortSignal?: AbortSignal,
jsonSchema?: Record<string, unknown>
): AsyncGenerator<MessageChunk> {
const queue = new AsyncQueue<BridgeQueueItem>();
// Best-effort structured-output buffer. Only accumulates when the caller
// requested a JSON schema; otherwise stays empty and the terminal chunk
// passes through untouched.
const wantsStructured = jsonSchema !== undefined;
let assistantBuffer = '';
const unsubscribe = session.subscribe((event: AgentSessionEvent) => {
try {
for (const chunk of mapPiEvent(event)) {
if (wantsStructured && chunk.type === 'assistant') {
assistantBuffer += chunk.content;
}
queue.push({ kind: 'chunk', chunk });
}
} catch (err) {
@ -291,8 +327,28 @@ export async function* bridgeSession(
// Pi's session.sessionId is always a UUID (even for in-memory); we emit
// it unconditionally and let the caller decide whether resume is
// meaningful (capability-gated at the registry level).
if (item.chunk.type === 'result' && session.sessionId) {
yield { ...item.chunk, sessionId: session.sessionId };
if (item.chunk.type === 'result') {
let terminal: MessageChunk = item.chunk;
if (session.sessionId) {
terminal = { ...terminal, sessionId: session.sessionId };
}
// Best-effort structured output: parse the accumulated assistant
// transcript as JSON and attach. On parse failure, leave it off —
// the dag-executor's existing dag.structured_output_missing path
// warns and downstream $node.output.field refs degrade to '' instead
// of propagating bogus data.
if (wantsStructured) {
const parsed = tryParseStructuredOutput(assistantBuffer);
if (parsed !== undefined) {
terminal = { ...terminal, structuredOutput: parsed };
} else {
getLog().warn(
{ bufferLength: assistantBuffer.length },
'pi.event-bridge.structured_output_parse_failed'
);
}
}
yield terminal;
} else {
yield item.chunk;
}

View file

@ -902,10 +902,11 @@ describe('PiProvider', () => {
expect(caps.skills).toBe(true);
expect(caps.sessionResume).toBe(true);
expect(caps.envInjection).toBe(true);
// Best-effort structured output via prompt engineering (not SDK-enforced).
expect(caps.structuredOutput).toBe(true);
// Still false:
expect(caps.mcp).toBe(false);
expect(caps.hooks).toBe(false);
expect(caps.structuredOutput).toBe(false);
});
test('nodeConfig.skills with unknown name yields system warning, does not abort', async () => {
@ -1047,4 +1048,140 @@ describe('PiProvider', () => {
);
expect(systemChunks.some(c => c.content.includes('sonnet-5 not available'))).toBe(true);
});
// ─── structured output (best-effort JSON via prompt engineering) ──────
// Script an assistant text_delta followed by agent_end so the bridge has
// buffered content to parse when outputFormat is set.
function scriptedAssistantThenEnd(text: string): FakeEvent[] {
return [
{
type: 'message_update',
message: { role: 'assistant' } as never,
assistantMessageEvent: {
type: 'text_delta',
contentIndex: 0,
delta: text,
partial: { role: 'assistant' } as never,
},
},
...scriptedAgentEnd(),
];
}
test('outputFormat: schema is appended to prompt as JSON instruction', async () => {
process.env.GEMINI_API_KEY = 'sk-test';
resetScript(scriptedAgentEnd());
await consume(
new PiProvider().sendQuery('Summarize this bug.', '/tmp', undefined, {
model: 'google/gemini-2.5-pro',
outputFormat: {
type: 'json_schema',
schema: { type: 'object', properties: { area: { type: 'string' } } },
},
})
);
// Prompt should now contain the original instruction + the schema hint.
expect(mockPrompt).toHaveBeenCalled();
const [sentPrompt] = mockPrompt.mock.calls[0] as [string];
expect(sentPrompt).toContain('Summarize this bug.');
expect(sentPrompt).toContain('Respond with ONLY a JSON object');
expect(sentPrompt).toContain('"area"');
});
test('outputFormat: absent → prompt passed through unchanged', async () => {
process.env.GEMINI_API_KEY = 'sk-test';
resetScript(scriptedAgentEnd());
await consume(
new PiProvider().sendQuery('do a thing', '/tmp', undefined, {
model: 'google/gemini-2.5-pro',
})
);
const [sentPrompt] = mockPrompt.mock.calls[0] as [string];
expect(sentPrompt).toBe('do a thing');
expect(sentPrompt).not.toContain('JSON');
});
test('outputFormat: result chunk carries parsed structuredOutput on clean JSON', async () => {
process.env.GEMINI_API_KEY = 'sk-test';
resetScript(scriptedAssistantThenEnd('{"area":"web","confidence":0.9}'));
const { chunks } = await consume(
new PiProvider().sendQuery('classify', '/tmp', undefined, {
model: 'google/gemini-2.5-pro',
outputFormat: {
type: 'json_schema',
schema: { type: 'object' },
},
})
);
const result = chunks.find(
(c): c is { type: 'result'; structuredOutput?: unknown } =>
typeof c === 'object' && c !== null && (c as { type?: string }).type === 'result'
);
expect(result).toBeDefined();
expect(result?.structuredOutput).toEqual({ area: 'web', confidence: 0.9 });
});
test('outputFormat: fenced JSON (```json ... ```) still parses', async () => {
process.env.GEMINI_API_KEY = 'sk-test';
resetScript(scriptedAssistantThenEnd('```json\n{"ok":true}\n```'));
const { chunks } = await consume(
new PiProvider().sendQuery('x', '/tmp', undefined, {
model: 'google/gemini-2.5-pro',
outputFormat: { type: 'json_schema', schema: {} },
})
);
const result = chunks.find(
(c): c is { type: 'result'; structuredOutput?: unknown } =>
typeof c === 'object' && c !== null && (c as { type?: string }).type === 'result'
);
expect(result?.structuredOutput).toEqual({ ok: true });
});
test('outputFormat: prose-wrapped JSON → no structuredOutput, degrades cleanly', async () => {
process.env.GEMINI_API_KEY = 'sk-test';
resetScript(scriptedAssistantThenEnd('Here is the JSON:\n{"ok":true}\nHope this helps!'));
const { chunks, error } = await consume(
new PiProvider().sendQuery('x', '/tmp', undefined, {
model: 'google/gemini-2.5-pro',
outputFormat: { type: 'json_schema', schema: {} },
})
);
// No crash — downstream degradation is the executor's job via its
// existing dag.structured_output_missing warning path.
expect(error).toBeUndefined();
const result = chunks.find(
(c): c is { type: 'result'; structuredOutput?: unknown } =>
typeof c === 'object' && c !== null && (c as { type?: string }).type === 'result'
);
expect(result).toBeDefined();
expect(result?.structuredOutput).toBeUndefined();
});
test('no outputFormat → structuredOutput never set even if assistant emits JSON', async () => {
process.env.GEMINI_API_KEY = 'sk-test';
resetScript(scriptedAssistantThenEnd('{"accidental":"json"}'));
const { chunks } = await consume(
new PiProvider().sendQuery('x', '/tmp', undefined, {
model: 'google/gemini-2.5-pro',
})
);
const result = chunks.find(
(c): c is { type: 'result'; structuredOutput?: unknown } =>
typeof c === 'object' && c !== null && (c as { type?: string }).type === 'result'
);
expect(result?.structuredOutput).toBeUndefined();
});
});

View file

@ -64,6 +64,30 @@ function lookupPiModel(provider: string, modelId: string): Model<Api> | undefine
);
}
/**
* Append a "respond with JSON matching this schema" instruction to the user
* prompt so Pi-backed models produce parseable structured output. Pi's SDK
* has no JSON-mode equivalent to Claude's outputFormat or Codex's
* outputSchema, so this is a best-effort fallback: the event bridge parses
* the assistant transcript on agent_end. Models that reliably follow
* instruction (GPT-5, Claude, Gemini 2.x, recent Qwen Coder, DeepSeek V3)
* return clean JSON; models that don't produce a parse failure, which the
* executor surfaces via the existing dag.structured_output_missing warning.
*/
export function augmentPromptForJsonSchema(
prompt: string,
schema: Record<string, unknown>
): string {
return `${prompt}
---
CRITICAL: Respond with ONLY a JSON object matching the schema below. No prose before or after the JSON. No markdown code fences. Just the raw JSON object as your final message.
Schema:
${JSON.stringify(schema, null, 2)}`;
}
/**
* Pi community provider wraps `@mariozechner/pi-coding-agent`'s full
* coding-agent harness. Each `sendQuery()` call creates a fresh session
@ -257,10 +281,26 @@ export class PiProvider implements IAgentProvider {
yield { type: 'system', content: `⚠️ ${modelFallbackMessage}` };
}
// 5. Bridge callback-based events to the async generator contract.
// 5. Structured output (best-effort). Pi has no SDK-level JSON schema
// mode the way Claude and Codex do, so we implement it via prompt
// engineering: append the schema + "JSON only, no fences" instruction,
// and have the bridge parse the accumulated assistant text on
// agent_end. Parse failures degrade gracefully — the executor's
// existing dag.structured_output_missing warning path handles them.
const outputFormat = requestOptions?.outputFormat;
const effectivePrompt = outputFormat
? augmentPromptForJsonSchema(prompt, outputFormat.schema)
: prompt;
// 6. Bridge callback-based events to the async generator contract.
// bridgeSession owns dispose() and abort wiring.
try {
yield* bridgeSession(session, prompt, requestOptions?.abortSignal);
yield* bridgeSession(
session,
effectivePrompt,
requestOptions?.abortSignal,
outputFormat?.schema
);
getLog().info({ piProvider: parsed.provider }, 'pi.prompt_completed');
} catch (err) {
getLog().error({ err, piProvider: parsed.provider }, 'pi.prompt_failed');

View file

@ -304,7 +304,7 @@ describe('registry', () => {
expect(piEntries).toHaveLength(1);
});
test('declares v2 capabilities (thinking, effort, tools, skills, sessionResume, envInjection supported)', () => {
test('declares v2 capabilities (thinking, effort, tools, skills, sessionResume, envInjection, structuredOutput supported)', () => {
registerPiProvider();
const caps = getProviderCapabilities('pi');
// Flipped true in v2
@ -314,10 +314,12 @@ describe('registry', () => {
expect(caps.skills).toBe(true);
expect(caps.sessionResume).toBe(true);
expect(caps.envInjection).toBe(true);
// Best-effort structured output via prompt engineering + post-parse —
// not SDK-enforced like Claude/Codex, but wired up and tested.
expect(caps.structuredOutput).toBe(true);
// Still false (out of v2 scope)
expect(caps.mcp).toBe(false);
expect(caps.hooks).toBe(false);
expect(caps.structuredOutput).toBe(false);
expect(caps.costControl).toBe(false);
expect(caps.fallbackModel).toBe(false);
expect(caps.sandbox).toBe(false);