mirror of
https://github.com/lobehub/lobehub
synced 2026-04-21 17:47:27 +00:00
🐛 fix: detect truncated tool_calls arguments in builtin tools (#13817)
* 🐛 fix: detect truncated tool_call arguments in builtin tools When an LLM hits max_tokens mid tool_call, the arguments JSON is truncated. The previous flow passed `{}` to the tool, which returned a generic "required field missing" error; the model re-tried with the same payload and the truncation repeated — one observed trace burned 17 min and $2.46 on 5 blind retries. Detect structural truncation (unclosed braces/brackets/strings) in BuiltinToolsExecutor before schema validation, and return a dedicated TRUNCATED_ARGUMENTS error telling the model to reduce payload size or raise max_tokens instead of retrying. Fixes LOBE-7148 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * 💄 chore: echo raw arguments string and reject all unparseable JSON Two improvements based on review: - Append the received arguments string to the error content so the model can verify the payload is exactly what it produced (stops it from blaming upstream or guessing what went wrong). - Treat ANY unparseable non-empty argsStr as an error (new code INVALID_JSON_ARGUMENTS), not just truncation. The previous fallback of passing `{}` to the tool produced generic "missing field" errors that hid the real cause. Empty argsStr still falls through to `{}` for tools that take no parameters. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
18bc2716b2
commit
7e89fa782d
5 changed files with 243 additions and 3 deletions
57
packages/utils/src/detectTruncatedJSON.test.ts
Normal file
57
packages/utils/src/detectTruncatedJSON.test.ts
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
import { describe, expect, it } from 'vitest';
|
||||
|
||||
import { detectTruncatedJSON } from './detectTruncatedJSON';
|
||||
|
||||
describe('detectTruncatedJSON', () => {
|
||||
it('returns null for a balanced object', () => {
|
||||
expect(detectTruncatedJSON('{"a": 1}')).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null for a balanced nested structure', () => {
|
||||
expect(detectTruncatedJSON('{"a": {"b": [1, 2]}}')).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null for empty string', () => {
|
||||
expect(detectTruncatedJSON('')).toBeNull();
|
||||
});
|
||||
|
||||
it('flags an object with an unclosed brace (typical LLM cutoff)', () => {
|
||||
const truncated = '{"title": "foo", "description": "bar", "type": "report"';
|
||||
expect(detectTruncatedJSON(truncated)).toMatch(/unclosed '\{'/);
|
||||
});
|
||||
|
||||
it('flags an unterminated string value', () => {
|
||||
const truncated = '{"title": "foo", "content": "this got cut';
|
||||
expect(detectTruncatedJSON(truncated)).toBe('unterminated string');
|
||||
});
|
||||
|
||||
it('flags an unclosed array', () => {
|
||||
const truncated = '[1, 2, 3';
|
||||
expect(detectTruncatedJSON(truncated)).toMatch(/unclosed '\['/);
|
||||
});
|
||||
|
||||
it('flags structure with both unclosed braces and brackets', () => {
|
||||
const truncated = '{"items": [1, 2, 3';
|
||||
// Any of the unclosed-bracket/brace reasons is acceptable — both are present.
|
||||
expect(detectTruncatedJSON(truncated)).toMatch(/unclosed/);
|
||||
});
|
||||
|
||||
it('returns null for malformed-but-balanced JSON (not a truncation signal)', () => {
|
||||
// invalid JSON but brackets balanced — should NOT be flagged as truncated
|
||||
expect(detectTruncatedJSON('{name: "foo"}')).toBeNull();
|
||||
});
|
||||
|
||||
it('ignores braces and quotes inside string values', () => {
|
||||
expect(detectTruncatedJSON('{"code": "if (a) { return \\"x\\"; }"}')).toBeNull();
|
||||
});
|
||||
|
||||
it('flags deeply nested object truncation', () => {
|
||||
const truncated = '{"a": {"b": {"c": "d"';
|
||||
expect(detectTruncatedJSON(truncated)).toMatch(/unclosed '\{'/);
|
||||
});
|
||||
|
||||
it('flags truncation mid-string inside nested objects', () => {
|
||||
const truncated = '{"a": {"b": {"c": "still writing';
|
||||
expect(detectTruncatedJSON(truncated)).toBe('unterminated string');
|
||||
});
|
||||
});
|
||||
41
packages/utils/src/detectTruncatedJSON.ts
Normal file
41
packages/utils/src/detectTruncatedJSON.ts
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
/**
|
||||
* Detect whether a JSON string looks structurally truncated — typical when an
|
||||
* LLM's `max_tokens` budget runs out mid-generation of a tool call payload.
|
||||
*
|
||||
* Returns a short reason string when truncation is suspected, or `null` when
|
||||
* the structure looks balanced (in which case any parse failure is more likely
|
||||
* a plain syntax error rather than truncation).
|
||||
*
|
||||
* Intended to be called AFTER `JSON.parse` has already failed, to distinguish
|
||||
* "truncated by max_tokens" from "malformed but complete".
|
||||
*/
|
||||
export const detectTruncatedJSON = (text: string): string | null => {
|
||||
if (!text) return null;
|
||||
|
||||
let braces = 0;
|
||||
let brackets = 0;
|
||||
let inString = false;
|
||||
let escape = false;
|
||||
|
||||
for (const ch of text) {
|
||||
if (escape) {
|
||||
escape = false;
|
||||
continue;
|
||||
}
|
||||
if (inString) {
|
||||
if (ch === '\\') escape = true;
|
||||
else if (ch === '"') inString = false;
|
||||
continue;
|
||||
}
|
||||
if (ch === '"') inString = true;
|
||||
else if (ch === '{') braces++;
|
||||
else if (ch === '}') braces--;
|
||||
else if (ch === '[') brackets++;
|
||||
else if (ch === ']') brackets--;
|
||||
}
|
||||
|
||||
if (inString) return 'unterminated string';
|
||||
if (braces > 0) return `${braces} unclosed '{'`;
|
||||
if (brackets > 0) return `${brackets} unclosed '['`;
|
||||
return null;
|
||||
};
|
||||
|
|
@ -1,8 +1,9 @@
|
|||
export * from './base64';
|
||||
export * from './dedupeBy';
|
||||
export * from './chunkers';
|
||||
export * from './client/cookie';
|
||||
export * from './dedupeBy';
|
||||
export * from './detectChinese';
|
||||
export * from './detectTruncatedJSON';
|
||||
export * from './env';
|
||||
export * from './error';
|
||||
export * from './folderStructure';
|
||||
|
|
|
|||
110
src/server/services/toolExecution/__tests__/builtin.test.ts
Normal file
110
src/server/services/toolExecution/__tests__/builtin.test.ts
Normal file
|
|
@ -0,0 +1,110 @@
|
|||
import type { ChatToolPayload } from '@lobechat/types';
|
||||
import { beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
|
||||
import { BuiltinToolsExecutor } from '../builtin';
|
||||
import type { ToolExecutionContext } from '../types';
|
||||
|
||||
const mockApiHandler = vi.fn();
|
||||
|
||||
vi.mock('../serverRuntimes', () => ({
|
||||
hasServerRuntime: vi.fn().mockReturnValue(true),
|
||||
getServerRuntime: vi.fn(async () => ({ createDocument: mockApiHandler })),
|
||||
}));
|
||||
|
||||
vi.mock('@/server/services/klavis', () => ({
|
||||
KlavisService: vi.fn().mockImplementation(() => ({})),
|
||||
}));
|
||||
vi.mock('@/server/services/market', () => ({
|
||||
MarketService: vi.fn().mockImplementation(() => ({})),
|
||||
}));
|
||||
|
||||
const buildPayload = (argsStr: string): ChatToolPayload => ({
|
||||
apiName: 'createDocument',
|
||||
arguments: argsStr,
|
||||
id: 't1',
|
||||
identifier: 'lobe-notebook',
|
||||
type: 'default' as any,
|
||||
});
|
||||
|
||||
const context: ToolExecutionContext = {
|
||||
toolManifestMap: {},
|
||||
userId: 'user-1',
|
||||
};
|
||||
|
||||
describe('BuiltinToolsExecutor truncated arguments', () => {
|
||||
const executor = new BuiltinToolsExecutor({} as any, 'user-1');
|
||||
|
||||
beforeEach(() => {
|
||||
mockApiHandler.mockReset();
|
||||
});
|
||||
|
||||
it('short-circuits with TRUNCATED_ARGUMENTS when JSON is cut mid-object', async () => {
|
||||
const truncated = '{"title": "Report", "description": "foo", "type": "report"';
|
||||
|
||||
const result = await executor.execute(buildPayload(truncated), context);
|
||||
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.error?.code).toBe('TRUNCATED_ARGUMENTS');
|
||||
expect(result.content).toMatch(/truncated/i);
|
||||
expect(result.content).toMatch(/max_tokens/);
|
||||
// The raw truncated payload is echoed back so the model sees exactly what
|
||||
// it produced and cannot blame upstream for a different payload.
|
||||
expect(result.content).toContain(truncated);
|
||||
expect(mockApiHandler).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('short-circuits with TRUNCATED_ARGUMENTS when a string value is unterminated', async () => {
|
||||
const truncated = '{"title": "Report", "content": "this is cut';
|
||||
|
||||
const result = await executor.execute(buildPayload(truncated), context);
|
||||
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.error?.code).toBe('TRUNCATED_ARGUMENTS');
|
||||
expect(result.content).toMatch(/unterminated string/);
|
||||
expect(result.content).toContain(truncated);
|
||||
expect(mockApiHandler).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('still dispatches to the runtime for valid JSON missing required fields', async () => {
|
||||
mockApiHandler.mockResolvedValueOnce({
|
||||
content: 'Error: Missing content. The document content is required.',
|
||||
success: false,
|
||||
});
|
||||
|
||||
const result = await executor.execute(
|
||||
buildPayload('{"title": "Report", "type": "report"}'),
|
||||
context,
|
||||
);
|
||||
|
||||
expect(mockApiHandler).toHaveBeenCalledWith({ title: 'Report', type: 'report' }, context);
|
||||
// The schema-level error from the runtime passes through untouched.
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.content).toMatch(/Missing content/);
|
||||
});
|
||||
|
||||
it('returns INVALID_JSON_ARGUMENTS for balanced-but-invalid JSON (not truncated)', async () => {
|
||||
// Balanced brackets but invalid syntax (unquoted key). Not a truncation,
|
||||
// but still unparseable — reject with a non-truncation error rather than
|
||||
// silently passing `{}` to the tool.
|
||||
const invalid = '{title: "Report"}';
|
||||
|
||||
const result = await executor.execute(buildPayload(invalid), context);
|
||||
|
||||
expect(result.success).toBe(false);
|
||||
expect(result.error?.code).toBe('INVALID_JSON_ARGUMENTS');
|
||||
expect(result.content).toMatch(/not valid JSON/);
|
||||
expect(result.content).toContain(invalid);
|
||||
expect(mockApiHandler).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('still dispatches normally when argsStr is empty', async () => {
|
||||
mockApiHandler.mockResolvedValueOnce({ content: 'ok', success: true });
|
||||
|
||||
// Empty arguments are legitimate for tools that take no params —
|
||||
// parse falls through to `{}` without triggering the invalid-JSON guard.
|
||||
const result = await executor.execute(buildPayload(''), context);
|
||||
|
||||
expect(mockApiHandler).toHaveBeenCalledWith({}, context);
|
||||
expect(result.success).toBe(true);
|
||||
});
|
||||
});
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
import { type LobeChatDatabase } from '@lobechat/database';
|
||||
import { type ChatToolPayload } from '@lobechat/types';
|
||||
import { safeParseJSON } from '@lobechat/utils';
|
||||
import { detectTruncatedJSON, safeParseJSON } from '@lobechat/utils';
|
||||
import debug from 'debug';
|
||||
|
||||
import { KlavisService } from '@/server/services/klavis';
|
||||
|
|
@ -25,7 +25,38 @@ export class BuiltinToolsExecutor implements IToolExecutor {
|
|||
context: ToolExecutionContext,
|
||||
): Promise<ToolExecutionResult> {
|
||||
const { identifier, apiName, arguments: argsStr, source } = payload;
|
||||
const args = safeParseJSON(argsStr) || {};
|
||||
const parsed = safeParseJSON(argsStr);
|
||||
|
||||
// When JSON.parse fails, return a dedicated error rather than silently
|
||||
// falling back to `{}`. Passing `{}` to the tool produced generic
|
||||
// "required field missing" errors, which led the model to retry with the
|
||||
// same broken payload. Distinguish a truncated payload (typical when
|
||||
// max_tokens is exhausted mid-tool-call) from plain malformed JSON, and
|
||||
// echo the raw arguments string so the model can verify it is exactly
|
||||
// what it produced.
|
||||
if (parsed === undefined && argsStr) {
|
||||
const truncationReason = detectTruncatedJSON(argsStr);
|
||||
const explanation = truncationReason
|
||||
? `The tool call arguments JSON appears to be truncated (${truncationReason}), ` +
|
||||
`likely because the model's max_tokens budget was exhausted ` +
|
||||
`(possibly by extended-thinking tokens). ` +
|
||||
`Either reduce the size of the content you are about to write, ` +
|
||||
`or ask the user to increase the model's max_tokens ` +
|
||||
`(and/or disable extended thinking or set a separate thinking budget). ` +
|
||||
`Do not retry with the same payload.`
|
||||
: `The tool call arguments string is not valid JSON and could not be parsed, ` +
|
||||
`so the tool was not invoked. Fix the JSON syntax and try again.`;
|
||||
const content = `${explanation}\n\nThe received arguments string was:\n${argsStr}`;
|
||||
const code = truncationReason ? 'TRUNCATED_ARGUMENTS' : 'INVALID_JSON_ARGUMENTS';
|
||||
log('Rejected invalid arguments for %s:%s (%s): %s', identifier, apiName, code, argsStr);
|
||||
return {
|
||||
content,
|
||||
error: { code, message: explanation },
|
||||
success: false,
|
||||
};
|
||||
}
|
||||
|
||||
const args = parsed || {};
|
||||
|
||||
log(
|
||||
'Executing builtin tool: %s:%s (source: %s) with args: %O',
|
||||
|
|
|
|||
Loading…
Reference in a new issue