🐛 fix: guard non-string content in context-engine to prevent e.trim errors (#13753)

🐛 fix: guard non-string content in context-engine to prevent `e.trim is not a function` Two unguarded `.trim()` / string-concatenation paths in the context-engine could throw or produce garbage text when a message's `content` is not a plain string (multimodal parts array, null tool turns). Both are reached in normal chat and trigger `e.trim is not a function` in production. - `resolveTopicReferences`: filter out non-string content in the fallback `lookupMessages` path before calling `.trim()`. Without this guard, the outer try/catch swallows the TypeError and drops the whole fallback. - `MessageContent` processor: normalize `message.content` (string or parts array) before concatenating file context, instead of relying on implicit `toString()` coercion which emitted `[object Object]` into the LLM prompt. Adds regression tests for both paths.
2026-04-21 17:47:27 +00:00 · 2026-04-12 19:27:52 +08:00 · 2026-04-12 19:27:52 +08:00 · 0486be4773
commit 0486be4773
parent f2ee67c3c5
4 changed files with 110 additions and 5 deletions
--- a/packages/context-engine/src/engine/topicReference/tests/resolveTopicReferences.test.ts
+++ b/packages/context-engine/src/engine/topicReference/tests/resolveTopicReferences.test.ts
@ -233,4 +233,46 @@ describe('resolveTopicReferences', () => {
    expect(result![0].summary).toBeUndefined();
    expect(result![0].recentMessages).toBeUndefined();
  });
+
+  // Regression: historical messages may carry non-string content (e.g. multimodal
+  // content parts array, or `null` from a tool-only assistant turn). The fallback
+  // path used to call `m.content?.trim()` / `m.content!.trim()` directly, which
+  // throws `e.trim is not a function` when content is an array. Skipping those
+  // messages is safer than crashing the whole context engine.
+  it('should skip messages whose content is not a string (array / object)', async () => {
+    const lookup = vi.fn().mockResolvedValue({ historySummary: null, title: 'T' });
+    const lookupMessages = vi.fn().mockResolvedValue([
+      // multimodal content as an array of parts — realistic shape from DB
+      { content: [{ text: 'ignored', type: 'text' }] as any, role: 'user' },
+      { content: { some: 'object' } as any, role: 'assistant' },
+      { content: 'plain text message', role: 'user' },
+    ]);
+
+    const result = await resolveTopicReferences(
+      [{ content: '<refer_topic name="T" id="t1" />' }],
+      lookup,
+      lookupMessages,
+    );
+
+    expect(result).toHaveLength(1);
+    expect(result![0].recentMessages).toEqual([{ content: 'plain text message', role: 'user' }]);
+  });
+
+  it('should not throw when every fallback message has non-string content', async () => {
+    const lookup = vi.fn().mockResolvedValue({ historySummary: null, title: 'T' });
+    const lookupMessages = vi.fn().mockResolvedValue([
+      { content: [{ text: 'a', type: 'text' }] as any, role: 'user' },
+      { content: null, role: 'assistant' },
+    ]);
+
+    // Must not throw — falls through to "no-context" like the empty-array case.
+    const result = await resolveTopicReferences(
+      [{ content: '<refer_topic name="T" id="t1" />' }],
+      lookup,
+      lookupMessages,
+    );
+
+    expect(result![0].summary).toBeUndefined();
+    expect(result![0].recentMessages).toBeUndefined();
+  });
 });
--- a/packages/context-engine/src/engine/topicReference/resolveTopicReferences.ts
+++ b/packages/context-engine/src/engine/topicReference/resolveTopicReferences.ts
@ -98,13 +98,16 @@ export async function resolveTopicReferences(
        try {
          const allMessages = await lookupMessages(topicId);

-          // Filter to user/assistant only, take last N, truncate content
+          // Filter to user/assistant only, take last N, truncate content.
+          // Guard typeof: historical messages may carry non-string content
+          // (multimodal parts array, null tool turns) — calling `.trim()` on
+          // those throws `e.trim is not a function` and kills the whole engine.
          const recent = allMessages
            .filter((m) => m.role === 'user' || m.role === 'assistant')
-            .filter((m) => m.content?.trim())
+            .filter((m) => typeof m.content === 'string' && m.content.trim())
            .slice(-MAX_RECENT_MESSAGES)
            .map((m) => ({
-              content: truncate(m.content!.trim(), MAX_MESSAGE_LENGTH),
+              content: truncate((m.content as string).trim(), MAX_MESSAGE_LENGTH),
              role: m.role,
            }));

--- a/packages/context-engine/src/processors/MessageContent.ts
+++ b/packages/context-engine/src/processors/MessageContent.ts
@ -153,8 +153,19 @@ export class MessageContentProcessor extends BaseProcessor {

    const contentParts: UserMessageContentPart[] = [];

-    // Add text content
-    let textContent = message.content || '';
+    // Normalize to a text string. Historical messages may already be in
+    // multimodal parts form (`content` is an array) — naive string
+    // concatenation coerces the array via `toString()` and produces
+    // `[object Object]` garbage. Extract text parts instead.
+    let textContent = '';
+    if (typeof message.content === 'string') {
+      textContent = message.content;
+    } else if (Array.isArray(message.content)) {
+      textContent = message.content
+        .filter((part: any) => part?.type === 'text' && typeof part.text === 'string')
+        .map((part: any) => part.text)
+        .join('\n\n');
+    }

    // Add file context (if file context is enabled and has files, images or videos)
    if ((hasFiles || hasImages || hasVideos) && this.config.fileContext?.enabled) {
--- a/packages/context-engine/src/processors/tests/MessageContent.test.ts
+++ b/packages/context-engine/src/processors/tests/MessageContent.test.ts
@ -287,6 +287,55 @@ describe('MessageContentProcessor', () => {
      // Should not include file context
      expect(result.messages[0].content).toBe('Hello');
    });
+
+    // Regression: when an already-multimodal user message (content is an array
+    // of parts) is re-processed with file context enabled, the old code did
+    // `textContent = message.content || ''` — turning the array back into
+    // `[object Object],[object Object]` via string coercion when concatenated
+    // with filesContext. Processor should instead extract text parts from the
+    // array (or leave the content untouched) rather than emit garbage.
+    it('should not stringify array content when concatenating file context', async () => {
+      mockIsCanUseVision.mockReturnValue(false);
+
+      const processor = new MessageContentProcessor({
+        model: 'gpt-4',
+        provider: 'openai',
+        isCanUseVision: mockIsCanUseVision,
+        fileContext: { enabled: true },
+      });
+
+      const messages: UIChatMessage[] = [
+        {
+          id: 'test',
+          role: 'user',
+          // Already-multimodal content (e.g. a historical user turn that was
+          // previously normalized to parts). Shape matches UserMessageContentPart[].
+          content: [{ text: 'Hello', type: 'text' }] as any,
+          fileList: [
+            {
+              id: 'file1',
+              name: 'test.txt',
+              fileType: 'text/plain',
+              size: 100,
+              url: 'http://example.com/test.txt',
+            },
+          ],
+          createdAt: Date.now(),
+          updatedAt: Date.now(),
+        },
+      ];
+
+      const result = await processor.process(createContext(messages));
+
+      const content = result.messages[0].content as any[];
+      expect(Array.isArray(content)).toBe(true);
+      const textPart = content.find((p) => p.type === 'text');
+      expect(textPart).toBeDefined();
+      // Must not contain the `[object Object]` string coercion artifact.
+      expect(textPart.text).not.toContain('[object Object]');
+      // Must preserve the original text payload.
+      expect(textPart.text).toContain('Hello');
+    });
  });

  describe('Reasoning/thinking content', () => {