🐛 fix: improve crawler error handling and timeout cancellation (#12487)

2026-04-21 09:37:28 +00:00 · 2026-02-26 22:59:10 +08:00 · 2026-02-26 22:59:10 +08:00 · 306c50704e
commit 306c50704e
parent 0365a14e16
38 changed files with 1462 additions and 784 deletions
--- a/.agents/skills/pr/SKILL.md
+++ b/.agents/skills/pr/SKILL.md
@ -0,0 +1,55 @@
+---
+name: pr
+description: "Create a PR for the current branch. Use when the user asks to create a pull request, submit PR, or says 'pr'."
+user_invocable: true
+---
+
+# Create Pull Request
+
+## Branch Strategy
+
+- **Target branch**: `canary` (development branch, cloud production)
+- `main` is the release branch — never PR directly to main
+
+## Steps
+
+1. **Gather context** (run in parallel):
+   - `git branch --show-current` — current branch name
+   - `git rev-parse --abbrev-ref @{u} 2>/dev/null` — remote tracking status
+   - `git log --oneline origin/canary..HEAD` — unpushed commits
+   - `gh pr list --head "$(git branch --show-current)" --json number,title,state,url` — existing PR
+   - `git log --oneline origin/canary..HEAD` — commit history for PR title
+   - `git diff --stat --stat-count=20 origin/canary..HEAD` — change summary
+
+2. **Push if needed**:
+   - No upstream: `git push -u origin $(git branch --show-current)`
+   - Has upstream: `git push origin $(git branch --show-current)`
+
+3. **Search related GitHub issues**:
+   - `gh issue list --search "<keywords>" --state all --limit 10`
+   - Only link issues with matching scope (avoid large umbrella issues)
+   - Skip if no matching issue found
+
+4. **Create PR** with `gh pr create --base canary`:
+   - Title: `<gitmoji> <type>(<scope>): <description>`
+   - Body: based on PR template (`.github/PULL_REQUEST_TEMPLATE.md`), fill checkboxes
+   - Link related GitHub issues using magic keywords (`Fixes #123`, `Closes #123`)
+   - Link Linear issues if applicable (`Fixes LOBE-xxx`)
+   - Use HEREDOC for body to preserve formatting
+
+5. **Open in browser**: `gh pr view --web`
+
+## PR Template
+
+Use `.github/PULL_REQUEST_TEMPLATE.md` as the body structure. Key sections:
+
+- **Change Type**: Check the appropriate gitmoji type
+- **Related Issue**: Link GitHub/Linear issues with magic keywords
+- **Description of Change**: Summarize what and why
+- **How to Test**: Describe test approach, check relevant boxes
+
+## Notes
+
+- **Release impact**: PR titles with `✨ feat/` or `🐛 fix` trigger releases — use carefully
+- **Language**: All PR content must be in English
+- If a PR already exists for the branch, inform the user instead of creating a duplicate
--- a/.agents/skills/upstash-workflow/SKILL.md
+++ b/.agents/skills/upstash-workflow/SKILL.md
@ -1,3 +1,8 @@
+---
+name: upstash-workflow
+description: 'Upstash Workflow implementation guide. Use when creating async workflows with QStash, implementing fan-out patterns, or building 3-layer workflow architecture (process → paginate → execute).'
+---
+
 # Upstash Workflow Implementation Guide

 This guide covers the standard patterns for implementing Upstash Workflow + QStash async workflows in the LobeHub codebase.
--- a/AGENTS.md
+++ b/AGENTS.md
@ -38,7 +38,8 @@ lobe-chat/

 ### Git Workflow

- The current release branch is `next` until v2.0.0 is officially released
+- **Branch strategy**: `canary` is the development branch (cloud production); `main` is the release branch (periodically cherry-picks from canary)
+- New branches should be created from `canary`; PRs should target `canary`
 - Use rebase for git pull
 - Git commit messages should prefix with gitmoji
 - Git branch name format: `username/feat/feature-name`
--- a/CLAUDE.md
+++ b/CLAUDE.md
@ -33,6 +33,8 @@ lobe-chat/

 ### Git Workflow

+- **Branch strategy**: `canary` is the development branch (cloud production); `main` is the release branch (periodically cherry-picks from canary)
+- New branches should be created from `canary`; PRs should target `canary`
 - Use rebase for `git pull`
 - Commit messages: prefix with gitmoji
 - Branch format: `<type>/<feature-name>`
--- a/GEMINI.md
+++ b/GEMINI.md
@ -33,6 +33,8 @@ lobe-chat/

 ### Git Workflow

+- **Branch strategy**: `canary` is the development branch (cloud production); `main` is the release branch (periodically cherry-picks from canary)
+- New branches should be created from `canary`; PRs should target `canary`
 - Use rebase for `git pull`
 - Commit messages: prefix with gitmoji
 - Branch format: `<type>/<feature-name>`
--- a/docs/self-hosting/advanced/online-search.mdx
+++ b/docs/self-hosting/advanced/online-search.mdx
@ -51,6 +51,24 @@ Supported crawler types are listed below:

 ---

+## `CRAWL_CONCURRENCY`
+
+Controls crawler concurrency per crawl task. The default is `3`. On low-resource servers, use `1` to reduce CPU spikes.
+
+```env
+CRAWL_CONCURRENCY=3
+```
+
+## `CRAWLER_RETRY`
+
+Controls retry attempts per URL on crawl failures. The default is `1` (up to 2 attempts total).
+
+```env
+CRAWLER_RETRY=1
+```
+
+---
+
 ## `SEARCH_PROVIDERS`

 Configure which search engine providers to use for web search.
--- a/docs/self-hosting/advanced/online-search.zh-CN.mdx
+++ b/docs/self-hosting/advanced/online-search.zh-CN.mdx
@ -46,6 +46,24 @@ CRAWLER_IMPLS="naive,search1api"

 ---

+## `CRAWL_CONCURRENCY`
+
+控制单次网页抓取任务的并发数量，默认值为 `3`。在低配置服务器上建议设置为 `1` 以降低 CPU 峰值。
+
+```env
+CRAWL_CONCURRENCY=3
+```
+
+## `CRAWLER_RETRY`
+
+控制单个 URL 的抓取失败重试次数，默认值为 `1`（即最多尝试 2 次）。
+
+```env
+CRAWLER_RETRY=1
+```
+
+---
+
 ## `SEARCH_PROVIDERS`

 配置联网搜索使用的搜索引擎提供商。
--- a/packages/web-crawler/src/tests/crawler.test.ts
+++ b/packages/web-crawler/src/tests/crawler.test.ts
@ -1,4 +1,4 @@
-import { describe, expect, it, vi } from 'vitest';
+import { beforeEach, describe, expect, it, vi } from 'vitest';

 import { Crawler } from '../crawler';

@ -19,6 +19,16 @@ vi.mock('../utils/appUrlRules', () => ({
 }));

 describe('Crawler', () => {
+  beforeEach(async () => {
+    vi.clearAllMocks();
+    // Reset applyUrlRules to default (no impls override)
+    const { applyUrlRules } = await import('../utils/appUrlRules');
+    vi.mocked(applyUrlRules).mockReturnValue({
+      transformedUrl: 'https://example.com',
+      filterOptions: {},
+    });
+  });
+
  const crawler = new Crawler();

  it('should crawl successfully with default impls', async () => {
@ -194,11 +204,12 @@ describe('Crawler', () => {
    });

    expect(result).toEqual({
-      crawler: undefined,
+      crawler: 'browserless',
      data: {
-        content: 'Fail to crawl the page. Error type: UnknownError, error message: undefined',
-        errorMessage: undefined,
-        errorType: 'UnknownError',
+        content:
+          'Fail to crawl the page. Error type: EmptyCrawlResultError, error message: browserless returned empty or short content',
+        errorMessage: 'browserless returned empty or short content',
+        errorType: 'EmptyCrawlResultError',
      },
      originalUrl: 'https://example.com',
      transformedUrl: undefined,
--- a/packages/web-crawler/src/crawImpl/tests/browserless.test.ts
+++ b/packages/web-crawler/src/crawImpl/tests/browserless.test.ts
@ -1,7 +1,13 @@
 import { describe, expect, it, vi } from 'vitest';

+import * as withTimeoutModule from '../../utils/withTimeout';
 import { browserless } from '../browserless';

+// Mock withTimeout to just call the factory function directly (bypassing real timeout)
+vi.spyOn(withTimeoutModule, 'withTimeout').mockImplementation((fn) =>
+  fn(new AbortController().signal),
+);
+
 describe('browserless', () => {
  it('should throw BrowserlessInitError when env vars not set', async () => {
    const originalEnv = { ...process.env };
@ -16,17 +22,22 @@ describe('browserless', () => {
    process.env = originalEnv;
  });

-  it('should return undefined on fetch error', async () => {
+  it('should throw NetworkConnectionError on fetch failed', async () => {
    process.env.BROWSERLESS_TOKEN = 'test-token';
-    global.fetch = vi.fn().mockRejectedValue(new Error('Fetch error'));
+    global.fetch = vi.fn().mockRejectedValue(new TypeError('fetch failed'));

-    const result = await browserless('https://example.com', { filterOptions: {} });
-    expect(result).toBeUndefined();
+    const { NetworkConnectionError } = await import('../../utils/errorType');
+    await expect(browserless('https://example.com', { filterOptions: {} })).rejects.toThrow(
+      NetworkConnectionError,
+    );
  });

  it('should return undefined when content is empty', async () => {
    process.env.BROWSERLESS_TOKEN = 'test-token';
    global.fetch = vi.fn().mockResolvedValue({
+      ok: true,
+      status: 200,
+      statusText: 'OK',
      text: vi.fn().mockResolvedValue('<html></html>'),
    } as any);

@ -37,6 +48,9 @@ describe('browserless', () => {
  it('should return undefined when title is "Just a moment..."', async () => {
    process.env.BROWSERLESS_TOKEN = 'test-token';
    global.fetch = vi.fn().mockResolvedValue({
+      ok: true,
+      status: 200,
+      statusText: 'OK',
      text: vi.fn().mockResolvedValue('<html><title>Just a moment...</title></html>'),
    } as any);

@ -46,7 +60,12 @@ describe('browserless', () => {

  it('should return crawl result on successful fetch', async () => {
    process.env.BROWSERLESS_TOKEN = 'test-token';
+    const longContent =
+      'This is a test paragraph with enough content to pass the length check. '.repeat(3);
    global.fetch = vi.fn().mockResolvedValue({
+      ok: true,
+      status: 200,
+      statusText: 'OK',
      text: vi.fn().mockResolvedValue(`
        <html>
          <head>
@ -54,7 +73,7 @@ describe('browserless', () => {
            <meta name="description" content="Test Description">
          </head>
          <body>
-            <h1>Test Content</h1>
+            <p>${longContent}</p>
          </body>
        </html>
      `),
@ -76,6 +95,9 @@ describe('browserless', () => {
  it('should include rejectRequestPattern in request payload', async () => {
    process.env.BROWSERLESS_TOKEN = 'test-token';
    const fetchMock = vi.fn().mockResolvedValue({
+      ok: true,
+      status: 200,
+      statusText: 'OK',
      text: vi.fn().mockResolvedValue('<html><title>Test</title></html>'),
    });
    global.fetch = fetchMock;
@ -90,9 +112,7 @@ describe('browserless', () => {

  it('should allow requests to permitted file types', async () => {
    const allowedExtensions = ['html', 'css', 'js', 'json', 'xml', 'webmanifest', 'txt', 'md'];
-    const pattern = new RegExp(
-      '.*\\.(?!(html|css|js|json|xml|webmanifest|txt|md)(\\?|#|$))[\\w-]+(?:[?#].*)?$',
-    );
+    const pattern = /.*\.(?!(html|css|js|json|xml|webmanifest|txt|md)(\?|#|$))[\w-]+(?:[?#].*)?$/;

    allowedExtensions.forEach((ext) => {
      expect(`file.${ext}`).not.toMatch(pattern);
@ -103,9 +123,7 @@ describe('browserless', () => {

  it('should reject requests to non-permitted file types', async () => {
    const rejectedExtensions = ['jpg', 'png', 'gif', 'pdf', 'doc', 'mp4', 'wav'];
-    const pattern = new RegExp(
-      '.*\\.(?!(html|css|js|json|xml|webmanifest|txt|md)(\\?|#|$))[\\w-]+(?:[?#].*)?$',
-    );
+    const pattern = /.*\.(?!(html|css|js|json|xml|webmanifest|txt|md)(\?|#|$))[\w-]+(?:[?#].*)?$/;

    rejectedExtensions.forEach((ext) => {
      expect(`file.${ext}`).toMatch(pattern);
@ -114,14 +132,16 @@ describe('browserless', () => {
    });
  });

-  it('should use correct URL when BROWSERLESS_URL is provided', async () => {
-    const customUrl = 'https://custom.browserless.io';
+  it('should call fetch with the base URL and content path', async () => {
    const originalEnv = { ...process.env };
    process.env.BROWSERLESS_TOKEN = 'test-token';
-    process.env.BROWSERLESS_URL = customUrl;
    global.fetch = vi.fn().mockImplementation((url) => {
-      expect(url).toContain(customUrl);
+      // BASE_URL is captured at module load time, so we verify fetch is called with /content path
+      expect(url).toContain('/content');
      return Promise.resolve({
+        ok: true,
+        status: 200,
+        statusText: 'OK',
        text: () => Promise.resolve('<html><title>Test</title></html>'),
      });
    });
--- a/packages/web-crawler/src/crawImpl/tests/exa.test.ts
+++ b/packages/web-crawler/src/crawImpl/tests/exa.test.ts
@ -1,5 +1,6 @@
 import { beforeEach, describe, expect, it, vi } from 'vitest';

+import { createMockResponse } from '../../test-utils';
 import { NetworkConnectionError, PageNotFoundError, TimeoutError } from '../../utils/errorType';
 import { exa } from '../exa';

@ -18,23 +19,20 @@ describe('exa crawler', () => {
  it('should successfully crawl content with API key', async () => {
    process.env.EXA_API_KEY = 'test-api-key';

-    const mockResponse = {
-      ok: true,
-      json: vi.fn().mockResolvedValue({
-        requestId: 'test-request-id',
-        results: [
-          {
-            id: 'test-id',
-            title: 'Test Article',
-            url: 'https://example.com',
-            text: 'This is a test article with enough content to pass the length check. '.repeat(3),
-            author: 'Test Author',
-            publishedDate: '2023-01-01',
-            summary: 'Test summary',
-          },
-        ],
-      }),
-    };
+    const mockResponse = createMockResponse({
+      requestId: 'test-request-id',
+      results: [
+        {
+          id: 'test-id',
+          title: 'Test Article',
+          url: 'https://example.com',
+          text: 'This is a test article with enough content to pass the length check. '.repeat(3),
+          author: 'Test Author',
+          publishedDate: '2023-01-01',
+          summary: 'Test summary',
+        },
+      ],
+    });

    const { withTimeout } = await import('../../utils/withTimeout');
    vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
@ -51,23 +49,20 @@ describe('exa crawler', () => {
      url: 'https://example.com',
    });

-    expect(withTimeout).toHaveBeenCalledWith(expect.any(Promise), 30000);
+    expect(withTimeout).toHaveBeenCalledWith(expect.any(Function), 30000);
  });

  it('should handle missing API key', async () => {
    // API key is undefined
-    const mockResponse = {
-      ok: true,
-      json: vi.fn().mockResolvedValue({
-        results: [
-          {
-            title: 'Test Article',
-            url: 'https://example.com',
-            text: 'Test content with sufficient length. '.repeat(5),
-          },
-        ],
-      }),
-    };
+    const mockResponse = createMockResponse({
+      results: [
+        {
+          title: 'Test Article',
+          url: 'https://example.com',
+          text: 'Test content with sufficient length. '.repeat(5),
+        },
+      ],
+    });

    const { withTimeout } = await import('../../utils/withTimeout');
    vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
@ -75,19 +70,16 @@ describe('exa crawler', () => {
    await exa('https://example.com', { filterOptions: {} });

    // Check that fetch was called with empty API key header
-    expect(withTimeout).toHaveBeenCalledWith(expect.any(Promise), 30000);
+    expect(withTimeout).toHaveBeenCalledWith(expect.any(Function), 30000);
  });

  it('should return undefined when no results are returned', async () => {
    process.env.EXA_API_KEY = 'test-api-key';

-    const mockResponse = {
-      ok: true,
-      json: vi.fn().mockResolvedValue({
-        requestId: 'test-request-id',
-        results: [],
-      }),
-    };
+    const mockResponse = createMockResponse({
+      requestId: 'test-request-id',
+      results: [],
+    });

    const { withTimeout } = await import('../../utils/withTimeout');
    vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
@ -108,18 +100,15 @@ describe('exa crawler', () => {
  it('should return undefined for short content', async () => {
    process.env.EXA_API_KEY = 'test-api-key';

-    const mockResponse = {
-      ok: true,
-      json: vi.fn().mockResolvedValue({
-        results: [
-          {
-            title: 'Test Article',
-            url: 'https://example.com',
-            text: 'Short', // Content too short
-          },
-        ],
-      }),
-    };
+    const mockResponse = createMockResponse({
+      results: [
+        {
+          title: 'Test Article',
+          url: 'https://example.com',
+          text: 'Short', // Content too short
+        },
+      ],
+    });

    const { withTimeout } = await import('../../utils/withTimeout');
    vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
@ -132,11 +121,11 @@ describe('exa crawler', () => {
  it('should throw PageNotFoundError for 404 status', async () => {
    process.env.EXA_API_KEY = 'test-api-key';

-    const mockResponse = {
+    const mockResponse = createMockResponse('Not Found', {
      ok: false,
      status: 404,
      statusText: 'Not Found',
-    };
+    });

    const { withTimeout } = await import('../../utils/withTimeout');
    vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
@ -149,11 +138,11 @@ describe('exa crawler', () => {
  it('should throw error for other HTTP errors', async () => {
    process.env.EXA_API_KEY = 'test-api-key';

-    const mockResponse = {
+    const mockResponse = createMockResponse('', {
      ok: false,
      status: 500,
      statusText: 'Internal Server Error',
-    };
+    });

    const { withTimeout } = await import('../../utils/withTimeout');
    vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
@ -167,7 +156,7 @@ describe('exa crawler', () => {
    process.env.EXA_API_KEY = 'test-api-key';

    const { withTimeout } = await import('../../utils/withTimeout');
-    vi.mocked(withTimeout).mockRejectedValue(new Error('fetch failed'));
+    vi.mocked(withTimeout).mockRejectedValue(new TypeError('fetch failed'));

    await expect(exa('https://example.com', { filterOptions: {} })).rejects.toThrow(
      NetworkConnectionError,
@ -198,42 +187,37 @@ describe('exa crawler', () => {
    );
  });

-  it('should return undefined when JSON parsing fails', async () => {
+  it('should throw ResponseBodyParseError when JSON parsing fails', async () => {
    process.env.EXA_API_KEY = 'test-api-key';

-    const mockResponse = {
-      ok: true,
+    const mockResponse = createMockResponse('not json', { ok: true });
+    mockResponse.json = vi.fn().mockRejectedValue(new Error('Invalid JSON'));
+    mockResponse.clone.mockReturnValue({
+      ...mockResponse,
      json: vi.fn().mockRejectedValue(new Error('Invalid JSON')),
-    };
+      text: vi.fn().mockResolvedValue('not json'),
+    });

    const { withTimeout } = await import('../../utils/withTimeout');
    vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);

-    const consoleSpy = vi.spyOn(console, 'error').mockImplementation(() => {});
-
-    const result = await exa('https://example.com', { filterOptions: {} });
-
-    expect(result).toBeUndefined();
-    expect(consoleSpy).toHaveBeenCalled();
-
-    consoleSpy.mockRestore();
+    await expect(exa('https://example.com', { filterOptions: {} })).rejects.toThrow(
+      'Exa returned non-JSON response: not json',
+    );
  });

  it('should use result URL when available', async () => {
    process.env.EXA_API_KEY = 'test-api-key';

-    const mockResponse = {
-      ok: true,
-      json: vi.fn().mockResolvedValue({
-        results: [
-          {
-            title: 'Test Article',
-            url: 'https://redirected.example.com',
-            text: 'Test content with sufficient length. '.repeat(5),
-          },
-        ],
-      }),
-    };
+    const mockResponse = createMockResponse({
+      results: [
+        {
+          title: 'Test Article',
+          url: 'https://redirected.example.com',
+          text: 'Test content with sufficient length. '.repeat(5),
+        },
+      ],
+    });

    const { withTimeout } = await import('../../utils/withTimeout');
    vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
@ -246,18 +230,15 @@ describe('exa crawler', () => {
  it('should fallback to original URL when result URL is missing', async () => {
    process.env.EXA_API_KEY = 'test-api-key';

-    const mockResponse = {
-      ok: true,
-      json: vi.fn().mockResolvedValue({
-        results: [
-          {
-            title: 'Test Article',
-            text: 'Test content with sufficient length. '.repeat(5),
-            // url is missing
-          },
-        ],
-      }),
-    };
+    const mockResponse = createMockResponse({
+      results: [
+        {
+          title: 'Test Article',
+          text: 'Test content with sufficient length. '.repeat(5),
+          // url is missing
+        },
+      ],
+    });

    const { withTimeout } = await import('../../utils/withTimeout');
    vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
--- a/packages/web-crawler/src/crawImpl/tests/firecrawl.test.ts
+++ b/packages/web-crawler/src/crawImpl/tests/firecrawl.test.ts
@ -1,5 +1,6 @@
 import { beforeEach, describe, expect, it, vi } from 'vitest';

+import { createMockResponse } from '../../test-utils';
 import { NetworkConnectionError, PageNotFoundError, TimeoutError } from '../../utils/errorType';
 import { firecrawl } from '../firecrawl';

@ -19,25 +20,23 @@ describe('firecrawl crawler', () => {
  it('should successfully crawl content with API key', async () => {
    process.env.FIRECRAWL_API_KEY = 'test-api-key';

-    const mockResponse = {
-      ok: true,
-      json: vi.fn().mockResolvedValue({
-        success: true,
-        data: {
-          markdown:
-            'This is a test markdown content with enough length to pass validation. '.repeat(3),
-          metadata: {
-            title: 'Test Article',
-            description: 'Test description',
-            sourceURL: 'https://example.com',
-            statusCode: 200,
-            language: 'en',
-            keywords: 'test',
-            robots: 'index',
-          },
+    const mockResponse = createMockResponse({
+      success: true,
+      data: {
+        markdown: 'This is a test markdown content with enough length to pass validation. '.repeat(
+          3,
+        ),
+        metadata: {
+          title: 'Test Article',
+          description: 'Test description',
+          sourceURL: 'https://example.com',
+          statusCode: 200,
+          language: 'en',
+          keywords: 'test',
+          robots: 'index',
        },
-      }),
-    };
+      },
+    });

    const { withTimeout } = await import('../../utils/withTimeout');
    vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
@ -55,58 +54,52 @@ describe('firecrawl crawler', () => {
      url: 'https://example.com',
    });

-    expect(withTimeout).toHaveBeenCalledWith(expect.any(Promise), 30000);
+    expect(withTimeout).toHaveBeenCalledWith(expect.any(Function), 30000);
  });

  it('should handle missing API key', async () => {
-    const mockResponse = {
-      ok: true,
-      json: vi.fn().mockResolvedValue({
-        success: true,
-        data: {
-          markdown: 'Test content with sufficient length. '.repeat(5),
-          metadata: {
-            title: 'Test',
-            description: 'Test',
-            sourceURL: 'https://example.com',
-            statusCode: 200,
-            language: 'en',
-            keywords: 'test',
-            robots: 'index',
-          },
+    const mockResponse = createMockResponse({
+      success: true,
+      data: {
+        markdown: 'Test content with sufficient length. '.repeat(5),
+        metadata: {
+          title: 'Test',
+          description: 'Test',
+          sourceURL: 'https://example.com',
+          statusCode: 200,
+          language: 'en',
+          keywords: 'test',
+          robots: 'index',
        },
-      }),
-    };
+      },
+    });

    const { withTimeout } = await import('../../utils/withTimeout');
    vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);

    await firecrawl('https://example.com', { filterOptions: {} });

-    expect(withTimeout).toHaveBeenCalledWith(expect.any(Promise), 30000);
+    expect(withTimeout).toHaveBeenCalledWith(expect.any(Function), 30000);
  });

  it('should return undefined for short content', async () => {
    process.env.FIRECRAWL_API_KEY = 'test-api-key';

-    const mockResponse = {
-      ok: true,
-      json: vi.fn().mockResolvedValue({
-        success: true,
-        data: {
-          markdown: 'Short', // Content too short
-          metadata: {
-            title: 'Test',
-            description: 'Test',
-            sourceURL: 'https://example.com',
-            statusCode: 200,
-            language: 'en',
-            keywords: 'test',
-            robots: 'index',
-          },
+    const mockResponse = createMockResponse({
+      success: true,
+      data: {
+        markdown: 'Short', // Content too short
+        metadata: {
+          title: 'Test',
+          description: 'Test',
+          sourceURL: 'https://example.com',
+          statusCode: 200,
+          language: 'en',
+          keywords: 'test',
+          robots: 'index',
        },
-      }),
-    };
+      },
+    });

    const { withTimeout } = await import('../../utils/withTimeout');
    vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
@ -119,24 +112,21 @@ describe('firecrawl crawler', () => {
  it('should return undefined when markdown is missing', async () => {
    process.env.FIRECRAWL_API_KEY = 'test-api-key';

-    const mockResponse = {
-      ok: true,
-      json: vi.fn().mockResolvedValue({
-        success: true,
-        data: {
-          // markdown is missing
-          metadata: {
-            title: 'Test',
-            description: 'Test',
-            sourceURL: 'https://example.com',
-            statusCode: 200,
-            language: 'en',
-            keywords: 'test',
-            robots: 'index',
-          },
+    const mockResponse = createMockResponse({
+      success: true,
+      data: {
+        // markdown is missing
+        metadata: {
+          title: 'Test',
+          description: 'Test',
+          sourceURL: 'https://example.com',
+          statusCode: 200,
+          language: 'en',
+          keywords: 'test',
+          robots: 'index',
        },
-      }),
-    };
+      },
+    });

    const { withTimeout } = await import('../../utils/withTimeout');
    vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
@ -149,11 +139,11 @@ describe('firecrawl crawler', () => {
  it('should throw PageNotFoundError for 404 status', async () => {
    process.env.FIRECRAWL_API_KEY = 'test-api-key';

-    const mockResponse = {
+    const mockResponse = createMockResponse('Not Found', {
      ok: false,
      status: 404,
      statusText: 'Not Found',
-    };
+    });

    const { withTimeout } = await import('../../utils/withTimeout');
    vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
@ -166,11 +156,11 @@ describe('firecrawl crawler', () => {
  it('should throw error for other HTTP errors', async () => {
    process.env.FIRECRAWL_API_KEY = 'test-api-key';

-    const mockResponse = {
+    const mockResponse = createMockResponse('', {
      ok: false,
      status: 500,
      statusText: 'Internal Server Error',
-    };
+    });

    const { withTimeout } = await import('../../utils/withTimeout');
    vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
@ -184,7 +174,7 @@ describe('firecrawl crawler', () => {
    process.env.FIRECRAWL_API_KEY = 'test-api-key';

    const { withTimeout } = await import('../../utils/withTimeout');
-    vi.mocked(withTimeout).mockRejectedValue(new Error('fetch failed'));
+    vi.mocked(withTimeout).mockRejectedValue(new TypeError('fetch failed'));

    await expect(firecrawl('https://example.com', { filterOptions: {} })).rejects.toThrow(
      NetworkConnectionError,
@ -217,54 +207,49 @@ describe('firecrawl crawler', () => {
    );
  });

-  it('should return undefined when JSON parsing fails', async () => {
+  it('should throw ResponseBodyParseError when JSON parsing fails', async () => {
    process.env.FIRECRAWL_API_KEY = 'test-api-key';

-    const mockResponse = {
-      ok: true,
+    const mockResponse = createMockResponse('not json', { ok: true });
+    mockResponse.json = vi.fn().mockRejectedValue(new Error('Invalid JSON'));
+    mockResponse.clone.mockReturnValue({
+      ...mockResponse,
      json: vi.fn().mockRejectedValue(new Error('Invalid JSON')),
-    };
+      text: vi.fn().mockResolvedValue('not json'),
+    });

    const { withTimeout } = await import('../../utils/withTimeout');
    vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);

-    const consoleSpy = vi.spyOn(console, 'error').mockImplementation(() => {});
-
-    const result = await firecrawl('https://example.com', { filterOptions: {} });
-
-    expect(result).toBeUndefined();
-    expect(consoleSpy).toHaveBeenCalled();
-
-    consoleSpy.mockRestore();
+    await expect(firecrawl('https://example.com', { filterOptions: {} })).rejects.toThrow(
+      'Firecrawl returned non-JSON response: not json',
+    );
  });

  it('should handle metadata with all optional fields', async () => {
    process.env.FIRECRAWL_API_KEY = 'test-api-key';

-    const mockResponse = {
-      ok: true,
-      json: vi.fn().mockResolvedValue({
-        success: true,
-        data: {
-          markdown: 'Complete test content with all metadata fields provided. '.repeat(3),
-          metadata: {
-            title: 'Complete Test Article',
-            description: 'Complete test description',
-            keywords: 'test,complete,article',
-            language: 'en',
-            ogDescription: 'OG description',
-            ogImage: 'https://example.com/image.jpg',
-            ogLocaleAlternate: ['en-US', 'fr-FR'],
-            ogSiteName: 'Example Site',
-            ogTitle: 'OG Title',
-            ogUrl: 'https://example.com/og',
-            robots: 'index,follow',
-            statusCode: 200,
-            sourceURL: 'https://example.com',
-          },
+    const mockResponse = createMockResponse({
+      success: true,
+      data: {
+        markdown: 'Complete test content with all metadata fields provided. '.repeat(3),
+        metadata: {
+          title: 'Complete Test Article',
+          description: 'Complete test description',
+          keywords: 'test,complete,article',
+          language: 'en',
+          ogDescription: 'OG description',
+          ogImage: 'https://example.com/image.jpg',
+          ogLocaleAlternate: ['en-US', 'fr-FR'],
+          ogSiteName: 'Example Site',
+          ogTitle: 'OG Title',
+          ogUrl: 'https://example.com/og',
+          robots: 'index,follow',
+          statusCode: 200,
+          sourceURL: 'https://example.com',
        },
-      }),
-    };
+      },
+    });

    const { withTimeout } = await import('../../utils/withTimeout');
    vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
--- a/packages/web-crawler/src/crawImpl/tests/jina.test.ts
+++ b/packages/web-crawler/src/crawImpl/tests/jina.test.ts
@ -1,29 +1,44 @@
 import { beforeEach, describe, expect, it, vi } from 'vitest';

+import { createMockResponse } from '../../test-utils';
+import * as withTimeoutModule from '../../utils/withTimeout';
 import { jina } from '../jina';

+// Mock withTimeout to just call the factory function directly (bypassing real timeout)
+vi.spyOn(withTimeoutModule, 'withTimeout').mockImplementation((fn) =>
+  fn(new AbortController().signal),
+);
+
 describe('jina crawler', () => {
  const mockFetch = vi.fn();
  global.fetch = mockFetch;

  beforeEach(() => {
    vi.resetAllMocks();
+    // Re-apply the withTimeout spy after resetAllMocks
+    vi.spyOn(withTimeoutModule, 'withTimeout').mockImplementation((fn) =>
+      fn(new AbortController().signal),
+    );
  });

  it('should crawl url successfully', async () => {
-    const mockResponse = {
-      ok: true,
-      json: () =>
-        Promise.resolve({
-          code: 200,
-          data: {
-            content: 'test content',
-            description: 'test description',
-            siteName: 'test site',
-            title: 'test title',
-          },
-        }),
-    };
+    const testContent =
+      'This is a test content that is long enough to pass the minimum length validation check. '.repeat(
+        2,
+      );
+
+    const mockResponse = createMockResponse(
+      {
+        code: 200,
+        data: {
+          content: testContent,
+          description: 'test description',
+          siteName: 'test site',
+          title: 'test title',
+        },
+      },
+      { ok: true },
+    );

    mockFetch.mockResolvedValue(mockResponse);

@ -38,13 +53,14 @@ describe('jina crawler', () => {
        'Authorization': 'Bearer test-key',
        'x-send-from': 'LobeChat Community',
      },
+      signal: expect.any(AbortSignal),
    });

    expect(result).toEqual({
-      content: 'test content',
+      content: testContent,
      contentType: 'text',
      description: 'test description',
-      length: 12,
+      length: testContent.length,
      siteName: 'test site',
      title: 'test title',
      url: 'https://example.com',
@ -54,16 +70,15 @@ describe('jina crawler', () => {
  it('should use JINA_READER_API_KEY from env if apiKey not provided', async () => {
    process.env.JINA_READER_API_KEY = 'env-reader-key';

-    const mockResponse = {
-      ok: true,
-      json: () =>
-        Promise.resolve({
-          code: 200,
-          data: {
-            content: 'test content',
-          },
-        }),
-    };
+    const mockResponse = createMockResponse(
+      {
+        code: 200,
+        data: {
+          content: 'test content',
+        },
+      },
+      { ok: true },
+    );

    mockFetch.mockResolvedValue(mockResponse);

@ -75,6 +90,7 @@ describe('jina crawler', () => {
        'Authorization': 'Bearer env-reader-key',
        'x-send-from': 'LobeChat Community',
      },
+      signal: expect.any(AbortSignal),
    });

    delete process.env.JINA_READER_API_KEY;
@ -83,16 +99,15 @@ describe('jina crawler', () => {
  it('should use JINA_API_KEY from env if apiKey and JINA_READER_API_KEY not provided', async () => {
    process.env.JINA_API_KEY = 'env-key';

-    const mockResponse = {
-      ok: true,
-      json: () =>
-        Promise.resolve({
-          code: 200,
-          data: {
-            content: 'test content',
-          },
-        }),
-    };
+    const mockResponse = createMockResponse(
+      {
+        code: 200,
+        data: {
+          content: 'test content',
+        },
+      },
+      { ok: true },
+    );

    mockFetch.mockResolvedValue(mockResponse);

@ -104,22 +119,22 @@ describe('jina crawler', () => {
        'Authorization': 'Bearer env-key',
        'x-send-from': 'LobeChat Community',
      },
+      signal: expect.any(AbortSignal),
    });

    delete process.env.JINA_API_KEY;
  });

  it('should send empty Authorization header if no api key provided', async () => {
-    const mockResponse = {
-      ok: true,
-      json: () =>
-        Promise.resolve({
-          code: 200,
-          data: {
-            content: 'test content',
-          },
-        }),
-    };
+    const mockResponse = createMockResponse(
+      {
+        code: 200,
+        data: {
+          content: 'test content',
+        },
+      },
+      { ok: true },
+    );

    mockFetch.mockResolvedValue(mockResponse);

@ -131,11 +146,14 @@ describe('jina crawler', () => {
        'Authorization': '',
        'x-send-from': 'LobeChat Community',
      },
+      signal: expect.any(AbortSignal),
    });
  });

  it('should return undefined if response is not ok', async () => {
-    mockFetch.mockResolvedValue({ ok: false });
+    mockFetch.mockResolvedValue(
+      createMockResponse(null, { ok: false, status: 500, statusText: 'Internal Server Error' }),
+    );

    const result = await jina('https://example.com', { filterOptions: {} });

@ -143,14 +161,13 @@ describe('jina crawler', () => {
  });

  it('should return undefined if response code is not 200', async () => {
-    const mockResponse = {
-      ok: true,
-      json: () =>
-        Promise.resolve({
-          code: 400,
-          message: 'Bad Request',
-        }),
-    };
+    const mockResponse = createMockResponse(
+      {
+        code: 400,
+        message: 'Bad Request',
+      },
+      { ok: true },
+    );

    mockFetch.mockResolvedValue(mockResponse);

@ -159,11 +176,11 @@ describe('jina crawler', () => {
    expect(result).toBeUndefined();
  });

-  it('should return undefined if fetch throws error', async () => {
+  it('should throw error if fetch throws non-fetch-failed error', async () => {
    mockFetch.mockRejectedValue(new Error('Network error'));

-    const result = await jina('https://example.com', { filterOptions: {} });
-
-    expect(result).toBeUndefined();
+    await expect(jina('https://example.com', { filterOptions: {} })).rejects.toThrow(
+      'Network error',
+    );
  });
 });
--- a/packages/web-crawler/src/crawImpl/tests/naive.test.ts
+++ b/packages/web-crawler/src/crawImpl/tests/naive.test.ts
@ -22,9 +22,10 @@ describe('naive crawler', () => {
    vi.clearAllMocks();
  });

-  it('should return undefined for normal pages (due to cloudflare logic)', async () => {
+  it('should return content for normal pages', async () => {
    const mockResponse = {
      status: 200,
+      ok: true,
      headers: new Map([['content-type', 'text/html']]),
      text: vi.fn().mockResolvedValue('<html><body>Test content</body></html>'),
    };
@ -34,8 +35,8 @@ describe('naive crawler', () => {

    const { htmlToMarkdown } = await import('../../utils/htmlToMarkdown');
    vi.mocked(htmlToMarkdown).mockReturnValue({
-      content: 'Test content'.padEnd(101, ' '), // Ensure length > 100
-      title: 'Normal Page Title', // Not "Just a moment..." so it returns undefined
+      content: 'Test content'.padEnd(101, ' '),
+      title: 'Normal Page Title',
      description: 'Test description',
      siteName: 'Test Site',
      length: 101,
@ -43,13 +44,22 @@ describe('naive crawler', () => {

    const result = await naive('https://example.com', { filterOptions: {} });

-    expect(result).toBeUndefined();
+    expect(result).toEqual({
+      content: 'Test content'.padEnd(101, ' '),
+      contentType: 'text',
+      description: 'Test description',
+      length: 101,
+      siteName: 'Test Site',
+      title: 'Normal Page Title',
+      url: 'https://example.com',
+    });
  });

  it('should successfully crawl JSON content', async () => {
    const mockJsonData = { message: 'Hello world', data: [1, 2, 3] };
    const mockResponse = {
      status: 200,
+      ok: true,
      headers: new Map([['content-type', 'application/json']]),
      clone: () => ({
        json: vi.fn().mockResolvedValue(mockJsonData),
@ -74,6 +84,7 @@ describe('naive crawler', () => {
    const mockText = '{"invalid": json}';
    const mockResponse = {
      status: 200,
+      ok: true,
      headers: new Map([['content-type', 'application/json']]),
      clone: () => ({
        json: vi.fn().mockRejectedValue(new Error('Invalid JSON')),
@ -97,6 +108,7 @@ describe('naive crawler', () => {
  it('should return undefined for short content', async () => {
    const mockResponse = {
      status: 200,
+      ok: true,
      headers: new Map([['content-type', 'text/html']]),
      text: vi.fn().mockResolvedValue('<html><body>Short</body></html>'),
    };
@ -116,9 +128,10 @@ describe('naive crawler', () => {
    expect(result).toBeUndefined();
  });

-  it('should return content when NOT blocked by Cloudflare', async () => {
+  it('should return undefined when blocked by Cloudflare', async () => {
    const mockResponse = {
      status: 200,
+      ok: true,
      headers: new Map([['content-type', 'text/html']]),
      text: vi.fn().mockResolvedValue('<html><body>Normal content</body></html>'),
    };
@ -129,7 +142,7 @@ describe('naive crawler', () => {
    const { htmlToMarkdown } = await import('../../utils/htmlToMarkdown');
    vi.mocked(htmlToMarkdown).mockReturnValue({
      content: 'Test content'.padEnd(101, ' '),
-      title: 'Just a moment...', // Cloudflare blocking page - this will cause return
+      title: 'Just a moment...', // Cloudflare blocking page
      description: 'Test description',
      siteName: 'Test Site',
      length: 101,
@ -137,15 +150,21 @@ describe('naive crawler', () => {

    const result = await naive('https://example.com', { filterOptions: {} });

-    expect(result).toEqual({
-      content: 'Test content'.padEnd(101, ' '),
-      contentType: 'text',
-      description: 'Test description',
-      length: 101,
-      siteName: 'Test Site',
-      title: 'Just a moment...',
-      url: 'https://example.com',
-    });
+    expect(result).toBeUndefined();
+  });
+
+  it('should throw error for non-ok status codes', async () => {
+    const mockResponse = {
+      status: 500,
+      ok: false,
+      statusText: 'Internal Server Error',
+      text: vi.fn().mockResolvedValue('Server Error'),
+    };
+
+    const { withTimeout } = await import('../../utils/withTimeout');
+    vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
+
+    await expect(naive('https://example.com', { filterOptions: {} })).rejects.toThrow(/500/);
  });

  it('should throw PageNotFoundError for 404 status', async () => {
@ -164,7 +183,7 @@ describe('naive crawler', () => {

  it('should throw NetworkConnectionError for fetch failures', async () => {
    const { withTimeout } = await import('../../utils/withTimeout');
-    vi.mocked(withTimeout).mockRejectedValue(new Error('fetch failed'));
+    vi.mocked(withTimeout).mockRejectedValue(new TypeError('fetch failed'));

    await expect(naive('https://example.com', { filterOptions: {} })).rejects.toThrow(
      NetworkConnectionError,
@ -194,6 +213,7 @@ describe('naive crawler', () => {
  it('should return undefined when HTML processing fails', async () => {
    const mockResponse = {
      status: 200,
+      ok: true,
      headers: new Map([['content-type', 'text/html']]),
      text: vi.fn().mockRejectedValue(new Error('Failed to read text')),
    };
@ -209,6 +229,7 @@ describe('naive crawler', () => {
  it('should pass filter options to htmlToMarkdown', async () => {
    const mockResponse = {
      status: 200,
+      ok: true,
      headers: new Map([['content-type', 'text/html']]),
      text: vi.fn().mockResolvedValue('<html><body>Test content</body></html>'),
    };
--- a/packages/web-crawler/src/crawImpl/tests/search1api.test.ts
+++ b/packages/web-crawler/src/crawImpl/tests/search1api.test.ts
@ -1,5 +1,6 @@
 import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';

+import { createMockResponse } from '../../test-utils';
 import { NetworkConnectionError, PageNotFoundError, TimeoutError } from '../../utils/errorType';
 import * as withTimeoutModule from '../../utils/withTimeout';
 import { search1api } from '../search1api';
@ -17,8 +18,10 @@ describe('search1api crawler', () => {
    originalEnv = { ...process.env };
    process.env.SEARCH1API_API_KEY = 'test-api-key';

-    // Mock withTimeout to directly return the promise
-    vi.spyOn(withTimeoutModule, 'withTimeout').mockImplementation((promise) => promise);
+    // Mock withTimeout to call the factory function directly (bypassing real timeout)
+    vi.spyOn(withTimeoutModule, 'withTimeout').mockImplementation((fn) =>
+      fn(new AbortController().signal),
+    );
  });

  afterEach(() => {
@ -26,7 +29,7 @@ describe('search1api crawler', () => {
  });

  it('should throw NetworkConnectionError when fetch fails', async () => {
-    mockFetch.mockRejectedValue(new Error('fetch failed'));
+    mockFetch.mockRejectedValue(new TypeError('fetch failed'));

    await expect(search1api('https://example.com', { filterOptions: {} })).rejects.toThrow(
      NetworkConnectionError,
@ -48,11 +51,13 @@ describe('search1api crawler', () => {
  });

  it('should throw PageNotFoundError when status is 404', async () => {
-    mockFetch.mockResolvedValue({
-      ok: false,
-      status: 404,
-      statusText: 'Not Found',
-    });
+    mockFetch.mockResolvedValue(
+      createMockResponse('Not Found', {
+        ok: false,
+        status: 404,
+        statusText: 'Not Found',
+      }),
+    );

    await expect(search1api('https://example.com', { filterOptions: {} })).rejects.toThrow(
      PageNotFoundError,
@ -60,11 +65,13 @@ describe('search1api crawler', () => {
  });

  it('should throw error for other failed responses', async () => {
-    mockFetch.mockResolvedValue({
-      ok: false,
-      status: 500,
-      statusText: 'Internal Server Error',
-    });
+    mockFetch.mockResolvedValue(
+      createMockResponse('', {
+        ok: false,
+        status: 500,
+        statusText: 'Internal Server Error',
+      }),
+    );

    await expect(search1api('https://example.com', { filterOptions: {} })).rejects.toThrow(
      'Search1API request failed with status 500: Internal Server Error',
@ -72,18 +79,19 @@ describe('search1api crawler', () => {
  });

  it('should return undefined when content is too short', async () => {
-    mockFetch.mockResolvedValue({
-      ok: true,
-      json: () =>
-        Promise.resolve({
+    mockFetch.mockResolvedValue(
+      createMockResponse(
+        {
          crawlParameters: { url: 'https://example.com' },
          results: {
            title: 'Test Title',
            link: 'https://example.com',
            content: 'Short', // Less than 100 characters
          },
-        }),
-    });
+        },
+        { ok: true },
+      ),
+    );

    const result = await search1api('https://example.com', { filterOptions: {} });
    expect(result).toBeUndefined();
@ -92,18 +100,19 @@ describe('search1api crawler', () => {
  it('should return crawl result on successful fetch', async () => {
    const mockContent = 'This is a test content that is longer than 100 characters. '.repeat(3);

-    mockFetch.mockResolvedValue({
-      ok: true,
-      json: () =>
-        Promise.resolve({
+    mockFetch.mockResolvedValue(
+      createMockResponse(
+        {
          crawlParameters: { url: 'https://example.com' },
          results: {
            title: 'Test Title',
            link: 'https://example.com',
            content: mockContent,
          },
-        }),
-    });
+        },
+        { ok: true },
+      ),
+    );

    const result = await search1api('https://example.com', { filterOptions: {} });

@ -116,6 +125,7 @@ describe('search1api crawler', () => {
      body: JSON.stringify({
        url: 'https://example.com',
      }),
+      signal: expect.any(AbortSignal),
    });

    expect(result).toEqual({
@ -130,12 +140,18 @@ describe('search1api crawler', () => {
  });

  it('should handle JSON parse errors', async () => {
-    mockFetch.mockResolvedValue({
-      ok: true,
-      json: () => Promise.reject(new Error('Invalid JSON')),
-    });
+    mockFetch.mockResolvedValue(createMockResponse('invalid json', { ok: true }));
+    // Override json to reject for this specific test
+    const response = createMockResponse('invalid json', { ok: true });
+    response.json = () => Promise.reject(new Error('Invalid JSON'));
+    // clone should also return a response whose text() works for error reporting
+    response.clone = () => {
+      const cloned = createMockResponse('invalid json', { ok: true });
+      cloned.json = () => Promise.reject(new Error('Invalid JSON'));
+      return cloned;
+    };
+    mockFetch.mockResolvedValue(response);

-    const result = await search1api('https://example.com', { filterOptions: {} });
-    expect(result).toBeUndefined();
+    await expect(search1api('https://example.com', { filterOptions: {} })).rejects.toThrow();
  });
 });
--- a/packages/web-crawler/src/crawImpl/tests/tavily.test.ts
+++ b/packages/web-crawler/src/crawImpl/tests/tavily.test.ts
@ -1,5 +1,6 @@
 import { beforeEach, describe, expect, it, vi } from 'vitest';

+import { createMockResponse } from '../../test-utils';
 import { NetworkConnectionError, PageNotFoundError, TimeoutError } from '../../utils/errorType';
 import { tavily } from '../tavily';

@ -19,21 +20,18 @@ describe('tavily crawler', () => {
  it('should successfully crawl content with API key', async () => {
    process.env.TAVILY_API_KEY = 'test-api-key';

-    const mockResponse = {
-      ok: true,
-      json: vi.fn().mockResolvedValue({
-        base_url: 'https://api.tavily.com',
-        response_time: 1.5,
-        results: [
-          {
-            url: 'https://example.com',
-            raw_content:
-              'This is a test raw content with sufficient length to pass validation. '.repeat(3),
-            images: ['https://example.com/image1.jpg', 'https://example.com/image2.jpg'],
-          },
-        ],
-      }),
-    };
+    const mockResponse = createMockResponse({
+      base_url: 'https://api.tavily.com',
+      response_time: 1.5,
+      results: [
+        {
+          url: 'https://example.com',
+          raw_content:
+            'This is a test raw content with sufficient length to pass validation. '.repeat(3),
+          images: ['https://example.com/image1.jpg', 'https://example.com/image2.jpg'],
+        },
+      ],
+    });

    const { withTimeout } = await import('../../utils/withTimeout');
    vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
@ -50,69 +48,60 @@ describe('tavily crawler', () => {
      url: 'https://example.com',
    });

-    expect(withTimeout).toHaveBeenCalledWith(expect.any(Promise), 30000);
+    expect(withTimeout).toHaveBeenCalledWith(expect.any(Function), 30000);
  });

  it('should use custom extract depth when provided', async () => {
    process.env.TAVILY_API_KEY = 'test-api-key';
    process.env.TAVILY_EXTRACT_DEPTH = 'advanced';

-    const mockResponse = {
-      ok: true,
-      json: vi.fn().mockResolvedValue({
-        base_url: 'https://api.tavily.com',
-        response_time: 2.1,
-        results: [
-          {
-            url: 'https://example.com',
-            raw_content: 'Advanced extraction content with more details. '.repeat(5),
-          },
-        ],
-      }),
-    };
+    const mockResponse = createMockResponse({
+      base_url: 'https://api.tavily.com',
+      response_time: 2.1,
+      results: [
+        {
+          url: 'https://example.com',
+          raw_content: 'Advanced extraction content with more details. '.repeat(5),
+        },
+      ],
+    });

    const { withTimeout } = await import('../../utils/withTimeout');
    vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);

    await tavily('https://example.com', { filterOptions: {} });

-    expect(withTimeout).toHaveBeenCalledWith(expect.any(Promise), 30000);
+    expect(withTimeout).toHaveBeenCalledWith(expect.any(Function), 30000);
  });

  it('should handle missing API key', async () => {
-    const mockResponse = {
-      ok: true,
-      json: vi.fn().mockResolvedValue({
-        base_url: 'https://api.tavily.com',
-        response_time: 1.2,
-        results: [
-          {
-            url: 'https://example.com',
-            raw_content: 'Test content with sufficient length. '.repeat(5),
-          },
-        ],
-      }),
-    };
+    const mockResponse = createMockResponse({
+      base_url: 'https://api.tavily.com',
+      response_time: 1.2,
+      results: [
+        {
+          url: 'https://example.com',
+          raw_content: 'Test content with sufficient length. '.repeat(5),
+        },
+      ],
+    });

    const { withTimeout } = await import('../../utils/withTimeout');
    vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);

    await tavily('https://example.com', { filterOptions: {} });

-    expect(withTimeout).toHaveBeenCalledWith(expect.any(Promise), 30000);
+    expect(withTimeout).toHaveBeenCalledWith(expect.any(Function), 30000);
  });

  it('should return undefined when no results are returned', async () => {
    process.env.TAVILY_API_KEY = 'test-api-key';

-    const mockResponse = {
-      ok: true,
-      json: vi.fn().mockResolvedValue({
-        base_url: 'https://api.tavily.com',
-        response_time: 0.8,
-        results: [],
-      }),
-    };
+    const mockResponse = createMockResponse({
+      base_url: 'https://api.tavily.com',
+      response_time: 0.8,
+      results: [],
+    });

    const { withTimeout } = await import('../../utils/withTimeout');
    vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
@ -133,19 +122,16 @@ describe('tavily crawler', () => {
  it('should return undefined for short content', async () => {
    process.env.TAVILY_API_KEY = 'test-api-key';

-    const mockResponse = {
-      ok: true,
-      json: vi.fn().mockResolvedValue({
-        base_url: 'https://api.tavily.com',
-        response_time: 1.1,
-        results: [
-          {
-            url: 'https://example.com',
-            raw_content: 'Short', // Content too short
-          },
-        ],
-      }),
-    };
+    const mockResponse = createMockResponse({
+      base_url: 'https://api.tavily.com',
+      response_time: 1.1,
+      results: [
+        {
+          url: 'https://example.com',
+          raw_content: 'Short', // Content too short
+        },
+      ],
+    });

    const { withTimeout } = await import('../../utils/withTimeout');
    vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
@ -158,20 +144,17 @@ describe('tavily crawler', () => {
  it('should return undefined when raw_content is missing', async () => {
    process.env.TAVILY_API_KEY = 'test-api-key';

-    const mockResponse = {
-      ok: true,
-      json: vi.fn().mockResolvedValue({
-        base_url: 'https://api.tavily.com',
-        response_time: 1,
-        results: [
-          {
-            url: 'https://example.com',
-            // raw_content is missing
-            images: ['https://example.com/image.jpg'],
-          },
-        ],
-      }),
-    };
+    const mockResponse = createMockResponse({
+      base_url: 'https://api.tavily.com',
+      response_time: 1,
+      results: [
+        {
+          url: 'https://example.com',
+          // raw_content is missing
+          images: ['https://example.com/image.jpg'],
+        },
+      ],
+    });

    const { withTimeout } = await import('../../utils/withTimeout');
    vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
@ -184,11 +167,11 @@ describe('tavily crawler', () => {
  it('should throw PageNotFoundError for 404 status', async () => {
    process.env.TAVILY_API_KEY = 'test-api-key';

-    const mockResponse = {
+    const mockResponse = createMockResponse('Not Found', {
      ok: false,
      status: 404,
      statusText: 'Not Found',
-    };
+    });

    const { withTimeout } = await import('../../utils/withTimeout');
    vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
@ -201,11 +184,11 @@ describe('tavily crawler', () => {
  it('should throw error for other HTTP errors', async () => {
    process.env.TAVILY_API_KEY = 'test-api-key';

-    const mockResponse = {
+    const mockResponse = createMockResponse('', {
      ok: false,
      status: 500,
      statusText: 'Internal Server Error',
-    };
+    });

    const { withTimeout } = await import('../../utils/withTimeout');
    vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
@ -219,7 +202,7 @@ describe('tavily crawler', () => {
    process.env.TAVILY_API_KEY = 'test-api-key';

    const { withTimeout } = await import('../../utils/withTimeout');
-    vi.mocked(withTimeout).mockRejectedValue(new Error('fetch failed'));
+    vi.mocked(withTimeout).mockRejectedValue(new TypeError('fetch failed'));

    await expect(tavily('https://example.com', { filterOptions: {} })).rejects.toThrow(
      NetworkConnectionError,
@ -252,43 +235,38 @@ describe('tavily crawler', () => {
    );
  });

-  it('should return undefined when JSON parsing fails', async () => {
+  it('should throw ResponseBodyParseError when JSON parsing fails', async () => {
    process.env.TAVILY_API_KEY = 'test-api-key';

-    const mockResponse = {
-      ok: true,
+    const mockResponse = createMockResponse('not json', { ok: true });
+    mockResponse.json = vi.fn().mockRejectedValue(new Error('Invalid JSON'));
+    mockResponse.clone.mockReturnValue({
+      ...mockResponse,
      json: vi.fn().mockRejectedValue(new Error('Invalid JSON')),
-    };
+      text: vi.fn().mockResolvedValue('not json'),
+    });

    const { withTimeout } = await import('../../utils/withTimeout');
    vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);

-    const consoleSpy = vi.spyOn(console, 'error').mockImplementation(() => {});
-
-    const result = await tavily('https://example.com', { filterOptions: {} });
-
-    expect(result).toBeUndefined();
-    expect(consoleSpy).toHaveBeenCalled();
-
-    consoleSpy.mockRestore();
+    await expect(tavily('https://example.com', { filterOptions: {} })).rejects.toThrow(
+      'Tavily returned non-JSON response: not json',
+    );
  });

  it('should use result URL when available', async () => {
    process.env.TAVILY_API_KEY = 'test-api-key';

-    const mockResponse = {
-      ok: true,
-      json: vi.fn().mockResolvedValue({
-        base_url: 'https://api.tavily.com',
-        response_time: 1.3,
-        results: [
-          {
-            url: 'https://redirected.example.com',
-            raw_content: 'Test content with sufficient length. '.repeat(5),
-          },
-        ],
-      }),
-    };
+    const mockResponse = createMockResponse({
+      base_url: 'https://api.tavily.com',
+      response_time: 1.3,
+      results: [
+        {
+          url: 'https://redirected.example.com',
+          raw_content: 'Test content with sufficient length. '.repeat(5),
+        },
+      ],
+    });

    const { withTimeout } = await import('../../utils/withTimeout');
    vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
@ -301,19 +279,16 @@ describe('tavily crawler', () => {
  it('should fallback to original URL when result URL is missing', async () => {
    process.env.TAVILY_API_KEY = 'test-api-key';

-    const mockResponse = {
-      ok: true,
-      json: vi.fn().mockResolvedValue({
-        base_url: 'https://api.tavily.com',
-        response_time: 1.4,
-        results: [
-          {
-            raw_content: 'Test content with sufficient length. '.repeat(5),
-            // url is missing
-          },
-        ],
-      }),
-    };
+    const mockResponse = createMockResponse({
+      base_url: 'https://api.tavily.com',
+      response_time: 1.4,
+      results: [
+        {
+          raw_content: 'Test content with sufficient length. '.repeat(5),
+          // url is missing
+        },
+      ],
+    });

    const { withTimeout } = await import('../../utils/withTimeout');
    vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
@ -326,20 +301,17 @@ describe('tavily crawler', () => {
  it('should handle failed results in response', async () => {
    process.env.TAVILY_API_KEY = 'test-api-key';

-    const mockResponse = {
-      ok: true,
-      json: vi.fn().mockResolvedValue({
-        base_url: 'https://api.tavily.com',
-        response_time: 1.6,
-        results: [],
-        failed_results: [
-          {
-            url: 'https://example.com',
-            error: 'Page not accessible',
-          },
-        ],
-      }),
-    };
+    const mockResponse = createMockResponse({
+      base_url: 'https://api.tavily.com',
+      response_time: 1.6,
+      results: [],
+      failed_results: [
+        {
+          url: 'https://example.com',
+          error: 'Page not accessible',
+        },
+      ],
+    });

    const { withTimeout } = await import('../../utils/withTimeout');
    vi.mocked(withTimeout).mockResolvedValue(mockResponse as any);
--- a/packages/web-crawler/src/crawImpl/browserless.ts
+++ b/packages/web-crawler/src/crawImpl/browserless.ts
@ -2,7 +2,10 @@ import qs from 'query-string';
 import urlJoin from 'url-join';

 import type { CrawlImpl, CrawlSuccessResult } from '../type';
+import { PageNotFoundError, toFetchError } from '../utils/errorType';
 import { htmlToMarkdown } from '../utils/htmlToMarkdown';
+import { createHTTPStatusError } from '../utils/response';
+import { DEFAULT_TIMEOUT, withTimeout } from '../utils/withTimeout';

 const BASE_URL = process.env.BROWSERLESS_URL ?? 'https://chrome.browserless.io';
 // Allowed file types: html, css, js, json, xml, webmanifest, txt, md
@ -31,46 +34,62 @@ export const browserless: CrawlImpl = async (url, { filterOptions }) => {
    url,
  };

+  let res: Response;
+
  try {
-    const res = await fetch(
-      qs.stringifyUrl({
-        query: {
-          blockAds: BROWSERLESS_BLOCK_ADS,
-          launch: JSON.stringify({ stealth: BROWSERLESS_STEALTH_MODE }),
-          token: BROWSERLESS_TOKEN,
-        },
-        url: urlJoin(BASE_URL, '/content'),
-      }),
-      {
-        body: JSON.stringify(input),
-        headers: {
-          'Content-Type': 'application/json',
-        },
-        method: 'POST',
-      },
+    res = await withTimeout(
+      (signal) =>
+        fetch(
+          qs.stringifyUrl({
+            query: {
+              blockAds: BROWSERLESS_BLOCK_ADS,
+              launch: JSON.stringify({ stealth: BROWSERLESS_STEALTH_MODE }),
+              token: BROWSERLESS_TOKEN,
+            },
+            url: urlJoin(BASE_URL, '/content'),
+          }),
+          {
+            body: JSON.stringify(input),
+            headers: {
+              'Content-Type': 'application/json',
+            },
+            method: 'POST',
+            signal,
+          },
+        ),
+      DEFAULT_TIMEOUT,
    );
-    const html = await res.text();
+  } catch (e) {
+    throw toFetchError(e);
+  }

-    const result = htmlToMarkdown(html, { filterOptions, url });
-
-    if (
-      !!result.content &&
-      result.title &&
-      // "Just a moment..." indicates being blocked by CloudFlare
-      result.title.trim() !== 'Just a moment...'
-    ) {
-      return {
-        content: result.content,
-        contentType: 'text',
-        description: result?.description,
-        length: result.length,
-        siteName: result?.siteName,
-        title: result?.title,
-        url,
-      } satisfies CrawlSuccessResult;
+  if (!res.ok) {
+    if (res.status === 404) {
+      throw new PageNotFoundError(res.statusText);
    }
-  } catch (error) {
-    console.error(error);
+
+    throw await createHTTPStatusError(res, 'Browserless');
+  }
+
+  const html = await res.text();
+  const result = htmlToMarkdown(html, { filterOptions, url });
+
+  if (
+    !!result.content &&
+    result.content.length > 100 &&
+    result.title &&
+    // "Just a moment..." indicates being blocked by CloudFlare
+    result.title.trim() !== 'Just a moment...'
+  ) {
+    return {
+      content: result.content,
+      contentType: 'text',
+      description: result?.description,
+      length: result.length,
+      siteName: result?.siteName,
+      title: result?.title,
+      url,
+    } satisfies CrawlSuccessResult;
  }

  return;
--- a/packages/web-crawler/src/crawImpl/exa.ts
+++ b/packages/web-crawler/src/crawImpl/exa.ts
@ -1,5 +1,6 @@
 import type { CrawlImpl, CrawlSuccessResult } from '../type';
-import { NetworkConnectionError, PageNotFoundError, TimeoutError } from '../utils/errorType';
+import { PageNotFoundError, toFetchError } from '../utils/errorType';
+import { createHTTPStatusError, parseJSONResponse } from '../utils/response';
 import { DEFAULT_TIMEOUT, withTimeout } from '../utils/withTimeout';

 interface ExaResults {
@ -27,31 +28,24 @@ export const exa: CrawlImpl = async (url) => {

  try {
    res = await withTimeout(
-      fetch('https://api.exa.ai/contents', {
-        body: JSON.stringify({
-          livecrawl: 'fallback', // always, fallback
-          text: true,
-          urls: [url],
+      (signal) =>
+        fetch('https://api.exa.ai/contents', {
+          body: JSON.stringify({
+            livecrawl: 'fallback', // always, fallback
+            text: true,
+            urls: [url],
+          }),
+          headers: {
+            'Content-Type': 'application/json',
+            'x-api-key': !apiKey ? '' : apiKey,
+          },
+          method: 'POST',
+          signal,
        }),
-        headers: {
-          'Content-Type': 'application/json',
-          'x-api-key': !apiKey ? '' : apiKey,
-        },
-        method: 'POST',
-      }),
      DEFAULT_TIMEOUT,
    );
  } catch (e) {
-    const error = e as Error;
-    if (error.message === 'fetch failed') {
-      throw new NetworkConnectionError();
-    }
-
-    if (error instanceof TimeoutError) {
-      throw error;
-    }
-
-    throw e;
+    throw toFetchError(e);
  }

  if (!res.ok) {
@ -59,35 +53,29 @@ export const exa: CrawlImpl = async (url) => {
      throw new PageNotFoundError(res.statusText);
    }

-    throw new Error(`Exa request failed with status ${res.status}: ${res.statusText}`);
+    throw await createHTTPStatusError(res, 'Exa');
  }

-  try {
-    const data = (await res.json()) as ExaResponse;
+  const data = await parseJSONResponse<ExaResponse>(res, 'Exa');

-    if (!data.results || data.results.length === 0) {
-      console.warn('Exa API returned no results for URL:', url);
-      return;
-    }
-
-    const firstResult = data.results[0];
-
-    // Check if content is empty or too short
-    if (!firstResult.text || firstResult.text.length < 100) {
-      return;
-    }
-
-    return {
-      content: firstResult.text,
-      contentType: 'text',
-      length: firstResult.text.length,
-      siteName: new URL(url).hostname,
-      title: firstResult.title,
-      url: firstResult.url || url,
-    } satisfies CrawlSuccessResult;
-  } catch (error) {
-    console.error(error);
+  if (!data.results || data.results.length === 0) {
+    console.warn('Exa API returned no results for URL:', url);
+    return;
  }

-  return;
+  const firstResult = data.results[0];
+
+  // Check if content is empty or too short
+  if (!firstResult.text || firstResult.text.length < 100) {
+    return;
+  }
+
+  return {
+    content: firstResult.text,
+    contentType: 'text',
+    length: firstResult.text.length,
+    siteName: new URL(url).hostname,
+    title: firstResult.title,
+    url: firstResult.url || url,
+  } satisfies CrawlSuccessResult;
 };
--- a/packages/web-crawler/src/crawImpl/firecrawl.ts
+++ b/packages/web-crawler/src/crawImpl/firecrawl.ts
@ -1,5 +1,6 @@
 import type { CrawlImpl, CrawlSuccessResult } from '../type';
-import { NetworkConnectionError, PageNotFoundError, TimeoutError } from '../utils/errorType';
+import { PageNotFoundError, toFetchError } from '../utils/errorType';
+import { createHTTPStatusError, parseJSONResponse } from '../utils/response';
 import { DEFAULT_TIMEOUT, withTimeout } from '../utils/withTimeout';

 interface FirecrawlMetadata {
@ -57,30 +58,23 @@ export const firecrawl: CrawlImpl = async (url) => {

  try {
    res = await withTimeout(
-      fetch(`${baseUrl}/scrape`, {
-        body: JSON.stringify({
-          formats: ['markdown'], // ["markdown", "html"]
-          url,
+      (signal) =>
+        fetch(`${baseUrl}/scrape`, {
+          body: JSON.stringify({
+            formats: ['markdown'], // ["markdown", "html"]
+            url,
+          }),
+          headers: {
+            'Authorization': !apiKey ? '' : `Bearer ${apiKey}`,
+            'Content-Type': 'application/json',
+          },
+          method: 'POST',
+          signal,
        }),
-        headers: {
-          'Authorization': !apiKey ? '' : `Bearer ${apiKey}`,
-          'Content-Type': 'application/json',
-        },
-        method: 'POST',
-      }),
      DEFAULT_TIMEOUT,
    );
  } catch (e) {
-    const error = e as Error;
-    if (error.message === 'fetch failed') {
-      throw new NetworkConnectionError();
-    }
-
-    if (error instanceof TimeoutError) {
-      throw error;
-    }
-
-    throw e;
+    throw toFetchError(e);
  }

  if (!res.ok) {
@ -88,37 +82,34 @@ export const firecrawl: CrawlImpl = async (url) => {
      throw new PageNotFoundError(res.statusText);
    }

-    throw new Error(`Firecrawl request failed with status ${res.status}: ${res.statusText}`);
+    throw await createHTTPStatusError(res, 'Firecrawl');
  }

-  try {
-    const data = (await res.json()) as FirecrawlResponse;
-
-    if (data.data.warning) {
-      console.warn('[Firecrawl] Warning:', data.data.warning);
-    }
-
-    if (data.data.metadata.error) {
-      console.error('[Firecrawl] Metadata error:', data.data.metadata.error);
-    }
-
-    // Check if content is empty or too short
-    if (!data.data.markdown || data.data.markdown.length < 100) {
-      return;
-    }
-
-    return {
-      content: data.data.markdown,
-      contentType: 'text',
-      description: data.data.metadata.description || '',
-      length: data.data.markdown.length,
-      siteName: new URL(url).hostname,
-      title: data.data.metadata.title || '',
-      url: url,
-    } satisfies CrawlSuccessResult;
-  } catch (error) {
-    console.error('[Firecrawl] Parse error:', error);
+  const data = await parseJSONResponse<FirecrawlResponse>(res, 'Firecrawl');
+  if (!data.data) {
+    throw new Error('Firecrawl response missing data field');
  }

-  return;
+  if (data.data.warning) {
+    console.warn('[Firecrawl] Warning:', data.data.warning);
+  }
+
+  if (data.data.metadata.error) {
+    console.error('[Firecrawl] Metadata error:', data.data.metadata.error);
+  }
+
+  // Check if content is empty or too short
+  if (!data.data.markdown || data.data.markdown.length < 100) {
+    return;
+  }
+
+  return {
+    content: data.data.markdown,
+    contentType: 'text',
+    description: data.data.metadata.description || '',
+    length: data.data.markdown.length,
+    siteName: new URL(url).hostname,
+    title: data.data.metadata.title || '',
+    url,
+  } satisfies CrawlSuccessResult;
 };
--- a/packages/web-crawler/src/crawImpl/jina.ts
+++ b/packages/web-crawler/src/crawImpl/jina.ts
@ -1,37 +1,59 @@
 import type { CrawlImpl } from '../type';
+import { toFetchError } from '../utils/errorType';
+import { parseJSONResponse } from '../utils/response';
+import { DEFAULT_TIMEOUT, withTimeout } from '../utils/withTimeout';

 export const jina: CrawlImpl<{ apiKey?: string }> = async (url, params) => {
  const token = params.apiKey ?? process.env.JINA_READER_API_KEY ?? process.env.JINA_API_KEY;
+  let res: Response;

  try {
-    const res = await fetch(`https://r.jina.ai/${url}`, {
-      headers: {
-        'Accept': 'application/json',
-        'Authorization': token ? `Bearer ${token}` : '',
-        'x-send-from': 'LobeChat Community',
-      },
-    });
-
-    if (res.ok) {
-      const json = await res.json();
-      if (json.code === 200) {
-        const result = json.data;
-        return {
-          content: result.content,
-          contentType: 'text',
-          description: result?.description,
-          length: result.content.length,
-          siteName: result?.siteName,
-          title: result?.title,
-          url: url,
-        };
-      }
-
-      throw json;
-    }
-  } catch (error) {
-    console.error(error);
+    res = await withTimeout(
+      (signal) =>
+        fetch(`https://r.jina.ai/${url}`, {
+          headers: {
+            'Accept': 'application/json',
+            'Authorization': token ? `Bearer ${token}` : '',
+            'x-send-from': 'LobeChat Community',
+          },
+          signal,
+        }),
+      DEFAULT_TIMEOUT,
+    );
+  } catch (e) {
+    throw toFetchError(e);
  }

-  return;
+  if (!res.ok) {
+    return;
+  }
+
+  const json = await parseJSONResponse<{
+    code: number;
+    data: {
+      content: string;
+      description?: string;
+      siteName?: string;
+      title?: string;
+    };
+  }>(res, 'Jina');
+
+  if (json.code !== 200) {
+    return;
+  }
+
+  const result = json.data;
+  if (!result?.content || result.content.length < 100) {
+    return;
+  }
+
+  return {
+    content: result.content,
+    contentType: 'text',
+    description: result?.description,
+    length: result.content.length,
+    siteName: result?.siteName,
+    title: result?.title,
+    url,
+  };
 };
--- a/packages/web-crawler/src/crawImpl/naive.ts
+++ b/packages/web-crawler/src/crawImpl/naive.ts
@ -1,8 +1,9 @@
 import { ssrfSafeFetch } from '@lobechat/ssrf-safe-fetch';

 import type { CrawlImpl, CrawlSuccessResult } from '../type';
-import { NetworkConnectionError, PageNotFoundError, TimeoutError } from '../utils/errorType';
+import { PageNotFoundError, toFetchError } from '../utils/errorType';
 import { htmlToMarkdown } from '../utils/htmlToMarkdown';
+import { createHTTPStatusError } from '../utils/response';
 import { DEFAULT_TIMEOUT, withTimeout } from '../utils/withTimeout';

 const mixinHeaders = {
@ -39,28 +40,25 @@ export const naive: CrawlImpl = async (url, { filterOptions }) => {

  try {
    res = await withTimeout(
-      ssrfSafeFetch(url, {
-        headers: mixinHeaders,
-        signal: new AbortController().signal,
-      }),
+      (signal) =>
+        ssrfSafeFetch(url, {
+          headers: mixinHeaders,
+          signal,
+        }),
      DEFAULT_TIMEOUT,
    );
  } catch (e) {
-    const error = e as Error;
-    if (error.message === 'fetch failed') {
-      throw new NetworkConnectionError();
-    }
-
-    if (error instanceof TimeoutError) {
-      throw error;
-    }
-
-    throw e;
+    throw toFetchError(e);
  }

  if (res.status === 404) {
    throw new PageNotFoundError(res.statusText);
  }
+
+  if (!res.ok) {
+    throw await createHTTPStatusError(res, 'Naive');
+  }
+
  const type = res.headers.get('content-type');

  if (type?.includes('application/json')) {
@ -74,7 +72,7 @@ export const naive: CrawlImpl = async (url, { filterOptions }) => {
    }

    return {
-      content: content,
+      content,
      contentType: 'json',
      length: content.length,
      url,
@ -91,8 +89,8 @@ export const naive: CrawlImpl = async (url, { filterOptions }) => {
      return;
    }

-    // it's blocked by cloudflare
-    if (result.title !== 'Just a moment...') {
+    // It's blocked by Cloudflare.
+    if (result.title === 'Just a moment...') {
      return;
    }

--- a/packages/web-crawler/src/crawImpl/search1api.ts
+++ b/packages/web-crawler/src/crawImpl/search1api.ts
@ -1,5 +1,6 @@
 import type { CrawlImpl, CrawlSuccessResult } from '../type';
-import { NetworkConnectionError, PageNotFoundError, TimeoutError } from '../utils/errorType';
+import { PageNotFoundError, toFetchError } from '../utils/errorType';
+import { createHTTPStatusError, parseJSONResponse } from '../utils/response';
 import { DEFAULT_TIMEOUT, withTimeout } from '../utils/withTimeout';

 interface Search1ApiResponse {
@ -21,29 +22,22 @@ export const search1api: CrawlImpl = async (url) => {

  try {
    res = await withTimeout(
-      fetch('https://api.search1api.com/crawl', {
-        body: JSON.stringify({
-          url,
+      (signal) =>
+        fetch('https://api.search1api.com/crawl', {
+          body: JSON.stringify({
+            url,
+          }),
+          headers: {
+            'Authorization': !apiKey ? '' : `Bearer ${apiKey}`,
+            'Content-Type': 'application/json',
+          },
+          method: 'POST',
+          signal,
        }),
-        headers: {
-          'Authorization': !apiKey ? '' : `Bearer ${apiKey}`,
-          'Content-Type': 'application/json',
-        },
-        method: 'POST',
-      }),
      DEFAULT_TIMEOUT,
    );
  } catch (e) {
-    const error = e as Error;
-    if (error.message === 'fetch failed') {
-      throw new NetworkConnectionError();
-    }
-
-    if (error instanceof TimeoutError) {
-      throw error;
-    }
-
-    throw e;
+    throw toFetchError(e);
  }

  if (!res.ok) {
@ -51,30 +45,24 @@ export const search1api: CrawlImpl = async (url) => {
      throw new PageNotFoundError(res.statusText);
    }

-    throw new Error(`Search1API request failed with status ${res.status}: ${res.statusText}`);
+    throw await createHTTPStatusError(res, 'Search1API');
  }

-  try {
-    const data = (await res.json()) as Search1ApiResponse;
+  const data = await parseJSONResponse<Search1ApiResponse>(res, 'Search1API');

-    // Check if content is empty or too short
-    if (!data.results.content || data.results.content.length < 100) {
-      return;
-    }
-
-    return {
-      content: data.results.content,
-      contentType: 'text',
-      description: data.results.title,
-      // Using title as description since API doesn't provide a separate description
-      length: data.results.content.length,
-      siteName: new URL(url).hostname,
-      title: data.results.title,
-      url: data.results.link || url,
-    } satisfies CrawlSuccessResult;
-  } catch (error) {
-    console.error(error);
+  // Check if content is empty or too short
+  if (!data.results?.content || data.results.content.length < 100) {
+    return;
  }

-  return;
+  return {
+    content: data.results.content,
+    contentType: 'text',
+    description: data.results?.title,
+    // Using title as description since API doesn't provide a separate description
+    length: data.results.content.length,
+    siteName: new URL(url).hostname,
+    title: data.results?.title,
+    url: data.results?.link || url,
+  } satisfies CrawlSuccessResult;
 };
--- a/packages/web-crawler/src/crawImpl/tavily.ts
+++ b/packages/web-crawler/src/crawImpl/tavily.ts
@ -1,5 +1,6 @@
 import type { CrawlImpl, CrawlSuccessResult } from '../type';
-import { NetworkConnectionError, PageNotFoundError, TimeoutError } from '../utils/errorType';
+import { PageNotFoundError, toFetchError } from '../utils/errorType';
+import { createHTTPStatusError, parseJSONResponse } from '../utils/response';
 import { DEFAULT_TIMEOUT, withTimeout } from '../utils/withTimeout';

 interface TavilyResults {
@ -28,31 +29,24 @@ export const tavily: CrawlImpl = async (url) => {

  try {
    res = await withTimeout(
-      fetch('https://api.tavily.com/extract', {
-        body: JSON.stringify({
-          extract_depth: process.env.TAVILY_EXTRACT_DEPTH || 'basic', // basic or advanced
-          include_images: false,
-          urls: url,
+      (signal) =>
+        fetch('https://api.tavily.com/extract', {
+          body: JSON.stringify({
+            extract_depth: process.env.TAVILY_EXTRACT_DEPTH || 'basic', // basic or advanced
+            include_images: false,
+            urls: url,
+          }),
+          headers: {
+            'Authorization': !apiKey ? '' : `Bearer ${apiKey}`,
+            'Content-Type': 'application/json',
+          },
+          method: 'POST',
+          signal,
        }),
-        headers: {
-          'Authorization': !apiKey ? '' : `Bearer ${apiKey}`,
-          'Content-Type': 'application/json',
-        },
-        method: 'POST',
-      }),
      DEFAULT_TIMEOUT,
    );
  } catch (e) {
-    const error = e as Error;
-    if (error.message === 'fetch failed') {
-      throw new NetworkConnectionError();
-    }
-
-    if (error instanceof TimeoutError) {
-      throw error;
-    }
-
-    throw e;
+    throw toFetchError(e);
  }

  if (!res.ok) {
@ -60,35 +54,29 @@ export const tavily: CrawlImpl = async (url) => {
      throw new PageNotFoundError(res.statusText);
    }

-    throw new Error(`Tavily request failed with status ${res.status}: ${res.statusText}`);
+    throw await createHTTPStatusError(res, 'Tavily');
  }

-  try {
-    const data = (await res.json()) as TavilyResponse;
+  const data = await parseJSONResponse<TavilyResponse>(res, 'Tavily');

-    if (!data.results || data.results.length === 0) {
-      console.warn('Tavily API returned no results for URL:', url);
-      return;
-    }
-
-    const firstResult = data.results[0];
-
-    // Check if content is empty or too short
-    if (!firstResult.raw_content || firstResult.raw_content.length < 100) {
-      return;
-    }
-
-    return {
-      content: firstResult.raw_content,
-      contentType: 'text',
-      length: firstResult.raw_content.length,
-      siteName: new URL(url).hostname,
-      title: new URL(url).hostname,
-      url: firstResult.url || url,
-    } satisfies CrawlSuccessResult;
-  } catch (error) {
-    console.error(error);
+  if (!data.results || data.results.length === 0) {
+    console.warn('Tavily API returned no results for URL:', url);
+    return;
  }

-  return;
+  const firstResult = data.results[0];
+
+  // Check if content is empty or too short
+  if (!firstResult.raw_content || firstResult.raw_content.length < 100) {
+    return;
+  }
+
+  return {
+    content: firstResult.raw_content,
+    contentType: 'text',
+    length: firstResult.raw_content.length,
+    siteName: new URL(url).hostname,
+    title: new URL(url).hostname,
+    url: firstResult.url || url,
+  } satisfies CrawlSuccessResult;
 };
--- a/packages/web-crawler/src/crawler.ts
+++ b/packages/web-crawler/src/crawler.ts
@ -59,13 +59,18 @@ export class Crawler {
      try {
        const res = await crawlImpls[impl](transformedUrl, { filterOptions: mergedFilterOptions });

-        if (res && res.content && res.content?.length > 100)
+        if (res && res.content && res.content.length > 100) {
          return {
            crawler: impl,
            data: res,
            originalUrl: url,
            transformedUrl: transformedUrl !== url ? transformedUrl : undefined,
          };
+        }
+
+        finalError = new Error(`${impl} returned empty or short content`);
+        finalError.name = 'EmptyCrawlResultError';
+        finalCrawler = impl;
      } catch (error) {
        console.error(error);
        finalError = error as Error;
@ -77,10 +82,10 @@ export class Crawler {
    const errorMessage = finalError?.message;

    return {
-      crawler: finalCrawler!,
+      crawler: finalCrawler || finalImpls.at(-1) || 'unknown',
      data: {
        content: `Fail to crawl the page. Error type: ${errorType}, error message: ${errorMessage}`,
-        errorMessage: errorMessage,
+        errorMessage,
        errorType,
      },
      originalUrl: url,
--- a/packages/web-crawler/src/test-utils.ts
+++ b/packages/web-crawler/src/test-utils.ts
@ -0,0 +1,25 @@
+import { vi } from 'vitest';
+
+/**
+ * Create a mock Response object for crawler tests.
+ * Uses `vi.fn()` for `json`, `text`, and `clone` so individual tests can override them.
+ */
+export const createMockResponse = (
+  body: any,
+  opts: { ok: boolean; status?: number; statusText?: string } = { ok: true },
+) => {
+  const self: any = {
+    ok: opts.ok,
+    status: opts.status ?? (opts.ok ? 200 : 500),
+    statusText: opts.statusText ?? (opts.ok ? 'OK' : 'Internal Server Error'),
+    json: vi.fn().mockResolvedValue(body),
+    text: vi.fn().mockResolvedValue(typeof body === 'string' ? body : JSON.stringify(body)),
+    clone: vi.fn(),
+  };
+  self.clone.mockReturnValue({
+    ...self,
+    json: vi.fn().mockResolvedValue(body),
+    text: vi.fn().mockResolvedValue(typeof body === 'string' ? body : JSON.stringify(body)),
+  });
+  return self;
+};
--- a/packages/web-crawler/src/utils/tests/appUrlRules.test.ts
+++ b/packages/web-crawler/src/utils/tests/appUrlRules.test.ts
@ -1,6 +1,6 @@
 import { describe, expect, it } from 'vitest';

-import { applyUrlRules } from './appUrlRules';
+import { applyUrlRules } from '../appUrlRules';

 describe('applyUrlRules', () => {
  // @gru-agent github file rules 不要改
--- a/packages/web-crawler/src/utils/tests/errorType.test.ts
+++ b/packages/web-crawler/src/utils/tests/errorType.test.ts
@ -1,6 +1,12 @@
 import { describe, expect, it } from 'vitest';

-import { NetworkConnectionError, PageNotFoundError, TimeoutError } from '../errorType';
+import {
+  isFetchNetworkError,
+  NetworkConnectionError,
+  PageNotFoundError,
+  TimeoutError,
+  toFetchError,
+} from '../errorType';

 describe('errorType', () => {
  describe('PageNotFoundError', () => {
@ -170,6 +176,43 @@ describe('errorType', () => {
    });
  });

+  describe('isFetchNetworkError', () => {
+    it('should return true for TypeError with "fetch failed" message', () => {
+      expect(isFetchNetworkError(new TypeError('fetch failed'))).toBe(true);
+    });
+
+    it('should return false for plain Error with "fetch failed" message', () => {
+      expect(isFetchNetworkError(new Error('fetch failed'))).toBe(false);
+    });
+
+    it('should return false for TypeError with different message', () => {
+      expect(isFetchNetworkError(new TypeError('something else'))).toBe(false);
+    });
+
+    it('should return false for non-error values', () => {
+      expect(isFetchNetworkError('fetch failed')).toBe(false);
+      expect(isFetchNetworkError(null)).toBe(false);
+      expect(isFetchNetworkError(undefined)).toBe(false);
+    });
+  });
+
+  describe('toFetchError', () => {
+    it('should return NetworkConnectionError for fetch network errors', () => {
+      const result = toFetchError(new TypeError('fetch failed'));
+      expect(result).toBeInstanceOf(NetworkConnectionError);
+    });
+
+    it('should return TimeoutError as-is', () => {
+      const timeout = new TimeoutError('Request timeout after 10000ms');
+      expect(toFetchError(timeout)).toBe(timeout);
+    });
+
+    it('should return unknown errors unchanged', () => {
+      const unknown = new Error('something unexpected');
+      expect(toFetchError(unknown)).toBe(unknown);
+    });
+  });
+
  describe('error catching scenarios', () => {
    it('should allow catching specific error types', () => {
      const testErrors = [
--- a/packages/web-crawler/src/utils/tests/response.test.ts
+++ b/packages/web-crawler/src/utils/tests/response.test.ts
@ -0,0 +1,102 @@
+import { describe, expect, it } from 'vitest';
+
+import { createHTTPStatusError, parseJSONResponse, ResponseBodyParseError } from '../response';
+
+const createMockResponse = (
+  body: string,
+  options: { ok?: boolean; status?: number; statusText?: string } = {},
+) => {
+  const { ok = true, status = 200, statusText = 'OK' } = options;
+  return new Response(body, {
+    status,
+    statusText,
+    headers: { 'Content-Type': ok ? 'application/json' : 'text/html' },
+  });
+};
+
+describe('ResponseBodyParseError', () => {
+  it('should create error with provider and body snippet', () => {
+    const error = new ResponseBodyParseError('Jina', '<html>error</html>');
+    expect(error.message).toBe('Jina returned non-JSON response: <html>error</html>');
+    expect(error.name).toBe('ResponseBodyParseError');
+  });
+
+  it('should create error without body snippet', () => {
+    const error = new ResponseBodyParseError('Firecrawl');
+    expect(error.message).toBe('Firecrawl returned non-JSON response');
+  });
+});
+
+describe('parseJSONResponse', () => {
+  it('should parse valid JSON response', async () => {
+    const data = { code: 200, results: ['a', 'b'] };
+    const response = createMockResponse(JSON.stringify(data));
+
+    const result = await parseJSONResponse<typeof data>(response, 'TestProvider');
+
+    expect(result).toEqual(data);
+  });
+
+  it('should throw ResponseBodyParseError for non-JSON response', async () => {
+    const response = createMockResponse('<html><body>Error</body></html>');
+
+    await expect(parseJSONResponse(response, 'Jina')).rejects.toThrow(ResponseBodyParseError);
+    await expect(
+      parseJSONResponse(createMockResponse('<html><body>Error</body></html>'), 'Jina'),
+    ).rejects.toThrow('Jina returned non-JSON response');
+  });
+
+  it('should include body snippet in error for non-JSON response', async () => {
+    const htmlBody = '<html><body>Internal Server Error</body></html>';
+    const response = createMockResponse(htmlBody);
+
+    await expect(parseJSONResponse(response, 'Firecrawl')).rejects.toThrow(
+      /Firecrawl returned non-JSON response: .*Internal Server Error/,
+    );
+  });
+
+  it('should handle empty response body', async () => {
+    const response = createMockResponse('');
+
+    await expect(parseJSONResponse(response, 'TestProvider')).rejects.toThrow(
+      'TestProvider returned non-JSON response',
+    );
+  });
+});
+
+describe('createHTTPStatusError', () => {
+  it('should create error with status and body snippet', async () => {
+    const response = createMockResponse('Not Found', {
+      ok: false,
+      status: 404,
+      statusText: 'Not Found',
+    });
+
+    const error = await createHTTPStatusError(response, 'Exa');
+
+    expect(error.message).toContain('Exa request failed with status 404: Not Found');
+    expect(error.message).toContain('Not Found');
+  });
+
+  it('should create error without body when response text fails', async () => {
+    const response = createMockResponse('', {
+      ok: false,
+      status: 500,
+      statusText: 'Internal Server Error',
+    });
+
+    const error = await createHTTPStatusError(response, 'Tavily');
+
+    expect(error.message).toBe('Tavily request failed with status 500: Internal Server Error');
+  });
+
+  it('should truncate long body snippets', async () => {
+    const longBody = 'x'.repeat(500);
+    const response = createMockResponse(longBody, { ok: false, status: 500, statusText: 'Error' });
+
+    const error = await createHTTPStatusError(response, 'Test');
+
+    // Body snippet should be truncated to 200 chars
+    expect(error.message.length).toBeLessThan(500 + 100);
+  });
+});
--- a/packages/web-crawler/src/utils/tests/withTimeout.test.ts
+++ b/packages/web-crawler/src/utils/tests/withTimeout.test.ts
@ -12,18 +12,18 @@ describe('withTimeout', () => {
    vi.useRealTimers();
  });

-  it('should resolve when promise resolves before timeout', async () => {
-    const promise = Promise.resolve('success');
-    const result = await withTimeout(promise, 1000);
+  it('should resolve when factory function resolves before timeout', async () => {
+    const result = await withTimeout(() => Promise.resolve('success'), 1000);
    expect(result).toBe('success');
  });

-  it('should reject with TimeoutError when promise takes too long', async () => {
-    const slowPromise = new Promise((resolve) => {
-      setTimeout(() => resolve('too late'), 200);
-    });
+  it('should reject with TimeoutError when factory takes too long', async () => {
+    const fn = () =>
+      new Promise((resolve) => {
+        setTimeout(() => resolve('too late'), 200);
+      });

-    const timeoutPromise = withTimeout(slowPromise, 100);
+    const timeoutPromise = withTimeout(fn, 100);
    vi.advanceTimersByTime(100);

    await expect(timeoutPromise).rejects.toThrow(TimeoutError);
@ -31,32 +31,70 @@ describe('withTimeout', () => {
  });

  it('should use DEFAULT_TIMEOUT when no timeout specified', async () => {
-    const slowPromise = new Promise((resolve) => {
-      setTimeout(() => resolve('success'), DEFAULT_TIMEOUT + 100);
-    });
+    const fn = () =>
+      new Promise((resolve) => {
+        setTimeout(() => resolve('success'), DEFAULT_TIMEOUT + 100);
+      });

-    const timeoutPromise = withTimeout(slowPromise);
+    const timeoutPromise = withTimeout(fn);
    vi.advanceTimersByTime(DEFAULT_TIMEOUT);

    await expect(timeoutPromise).rejects.toThrow(TimeoutError);
    await expect(timeoutPromise).rejects.toThrow(`Request timeout after ${DEFAULT_TIMEOUT}ms`);
  });

-  it('should reject with original error if promise rejects before timeout', async () => {
+  it('should reject with original error if factory rejects before timeout', async () => {
    const error = new Error('Original error');
-    const failingPromise = Promise.reject(error);
+    const fn = () => Promise.reject(error);

-    await expect(withTimeout(failingPromise, 1000)).rejects.toThrow('Original error');
+    await expect(withTimeout(fn, 1000)).rejects.toThrow('Original error');
  });

-  it('should abort controller when timeout occurs', async () => {
-    const slowPromise = new Promise((resolve) => {
-      setTimeout(() => resolve('too late'), 2000);
-    });
+  it('should pass AbortSignal to the factory function', async () => {
+    const factoryFn = vi.fn().mockResolvedValue('result');
+    await withTimeout(factoryFn, 1000);

-    const timeoutPromise = withTimeout(slowPromise, 1000);
-    vi.advanceTimersByTime(1000);
+    expect(factoryFn).toHaveBeenCalledTimes(1);
+    const signal = factoryFn.mock.calls[0][0];
+    expect(signal).toBeInstanceOf(AbortSignal);
+    expect(signal.aborted).toBe(false);
+  });

+  it('should abort the signal when timeout occurs', async () => {
+    let capturedSignal: AbortSignal | undefined;
+    const fn = (signal: AbortSignal) => {
+      capturedSignal = signal;
+      return new Promise((resolve) => {
+        setTimeout(() => resolve('too late'), 2000);
+      });
+    };
+
+    const timeoutPromise = withTimeout(fn, 100);
+    expect(capturedSignal!.aborted).toBe(false);
+
+    vi.advanceTimersByTime(100);
    await expect(timeoutPromise).rejects.toThrow(TimeoutError);
+
+    expect(capturedSignal!.aborted).toBe(true);
+  });
+
+  it('should clear timeout timer when promise resolves successfully', async () => {
+    const clearTimeoutSpy = vi.spyOn(globalThis, 'clearTimeout');
+
+    await withTimeout(() => Promise.resolve('success'), 5000);
+
+    expect(clearTimeoutSpy).toHaveBeenCalled();
+    clearTimeoutSpy.mockRestore();
+  });
+
+  it('should clear timeout timer when promise rejects', async () => {
+    const clearTimeoutSpy = vi.spyOn(globalThis, 'clearTimeout');
+
+    await expect(withTimeout(() => Promise.reject(new Error('fail')), 5000)).rejects.toThrow(
+      'fail',
+    );
+
+    expect(clearTimeoutSpy).toHaveBeenCalled();
+    clearTimeoutSpy.mockRestore();
  });
 });
--- a/packages/web-crawler/src/utils/errorType.ts
+++ b/packages/web-crawler/src/utils/errorType.ts
@ -17,3 +17,34 @@ export class TimeoutError extends Error {
    this.name = 'TimeoutError';
  }
 }
+
+/**
+ * Check if an error is a Node.js fetch network failure.
+ * Node.js undici throws TypeError with message "fetch failed" on network errors.
+ */
+export const isFetchNetworkError = (error: unknown): boolean =>
+  error instanceof TypeError && (error as Error).message === 'fetch failed';
+
+/**
+ * Normalize a fetch error into a typed error for consistent handling.
+ * Converts network failures to `NetworkConnectionError`, passes through `TimeoutError`,
+ * and returns any other error unchanged. Callers should `throw` the returned value.
+ *
+ * @example
+ * ```ts
+ * } catch (e) {
+ *   throw toFetchError(e);
+ * }
+ * ```
+ */
+export const toFetchError = (error: unknown): Error => {
+  if (isFetchNetworkError(error)) {
+    return new NetworkConnectionError();
+  }
+
+  if (error instanceof TimeoutError) {
+    return error;
+  }
+
+  return error as Error;
+};
--- a/packages/web-crawler/src/utils/htmlToMarkdown.test.ts
+++ b/packages/web-crawler/src/utils/htmlToMarkdown.test.ts
@ -1,5 +1,5 @@
 import { readFileSync } from 'node:fs';
-import * as path from 'node:path';
+import path from 'node:path';

 import { describe, expect, it } from 'vitest';

@ -33,4 +33,29 @@ describe('htmlToMarkdown', () => {
      expect(data).toMatchSnapshot();
    }, 20000);
  });
+
+  it('should truncate HTML exceeding 1 MB', () => {
+    // Create HTML slightly over 1 MB
+    const maxSize = 1024 * 1024;
+    const largeContent = 'x'.repeat(maxSize + 1000);
+    const html = `<html><body><p>${largeContent}</p></body></html>`;
+
+    // Should not throw - the function handles large HTML by truncating
+    const result = htmlToMarkdown(html, { url: 'https://example.com', filterOptions: {} });
+
+    // Verify content was produced (truncated HTML is still parseable)
+    expect(result).toBeDefined();
+    expect(result.content).toBeDefined();
+    // The output content should be smaller than the input due to truncation
+    expect(result.content.length).toBeLessThan(html.length);
+  }, 20000);
+
+  it('should not truncate HTML under 1 MB', () => {
+    const html = '<html><body><p>Small content</p></body></html>';
+
+    const result = htmlToMarkdown(html, { url: 'https://example.com', filterOptions: {} });
+
+    expect(result).toBeDefined();
+    expect(result.content).toContain('Small content');
+  });
 });
--- a/packages/web-crawler/src/utils/htmlToMarkdown.ts
+++ b/packages/web-crawler/src/utils/htmlToMarkdown.ts
@ -5,6 +5,9 @@ import { NodeHtmlMarkdown } from 'node-html-markdown';

 import type { FilterOptions } from '../type';

+/** Truncate HTML to 1 MB before DOM parsing to prevent CPU spikes on large pages */
+const MAX_HTML_SIZE = 1024 * 1024;
+
 const cleanObj = <T extends object>(
  obj: T,
 ): {
@ -24,9 +27,10 @@ interface HtmlToMarkdownOutput {
 }

 export const htmlToMarkdown = (
-  html: string,
+  rawHtml: string,
  { url, filterOptions }: { filterOptions: FilterOptions; url: string },
 ): HtmlToMarkdownOutput => {
+  const html = rawHtml.length > MAX_HTML_SIZE ? rawHtml.slice(0, MAX_HTML_SIZE) : rawHtml;
  const window = new Window({ url });

  const document = window.document;
--- a/packages/web-crawler/src/utils/response.ts
+++ b/packages/web-crawler/src/utils/response.ts
@ -0,0 +1,49 @@
+const ERROR_BODY_SNIPPET_LIMIT = 200;
+
+const normalizeBodySnippet = (body: string) => body.replaceAll(/\s+/g, ' ').trim();
+
+export class ResponseBodyParseError extends Error {
+  constructor(provider: string, bodySnippet?: string) {
+    super(
+      bodySnippet
+        ? `${provider} returned non-JSON response: ${bodySnippet}`
+        : `${provider} returned non-JSON response`,
+    );
+    this.name = 'ResponseBodyParseError';
+  }
+}
+
+const getBodySnippet = async (response: Response): Promise<string | undefined> => {
+  try {
+    const body = await response.text();
+    const snippet = normalizeBodySnippet(body).slice(0, ERROR_BODY_SNIPPET_LIMIT);
+
+    return snippet.length > 0 ? snippet : undefined;
+  } catch {
+    return undefined;
+  }
+};
+
+export const parseJSONResponse = async <T>(response: Response, provider: string): Promise<T> => {
+  const clonedResponse = response.clone();
+
+  try {
+    return (await response.json()) as T;
+  } catch {
+    const bodySnippet = await getBodySnippet(clonedResponse);
+    throw new ResponseBodyParseError(provider, bodySnippet);
+  }
+};
+
+export const createHTTPStatusError = async (
+  response: Response,
+  provider: string,
+): Promise<Error> => {
+  const bodySnippet = await getBodySnippet(response);
+
+  return new Error(
+    bodySnippet
+      ? `${provider} request failed with status ${response.status}: ${response.statusText}. Response: ${bodySnippet}`
+      : `${provider} request failed with status ${response.status}: ${response.statusText}`,
+  );
+};
--- a/packages/web-crawler/src/utils/withTimeout.ts
+++ b/packages/web-crawler/src/utils/withTimeout.ts
@ -3,19 +3,28 @@ import { TimeoutError } from './errorType';
 export const DEFAULT_TIMEOUT = 10_000;

 /**
- * Wraps a promise with a timeout
- * @param promise Promise to wrap
+ * Wraps a factory function with a timeout and abort support.
+ * The factory receives an AbortSignal that is aborted on timeout,
+ * allowing the underlying request (e.g. fetch) to be properly cancelled.
+ * @param fn Factory function that receives an AbortSignal and returns a Promise
 * @param ms Timeout in milliseconds
- * @returns Promise that will be rejected if it takes longer than ms to resolve
 */
-export const withTimeout = <T>(promise: Promise<T>, ms: number = DEFAULT_TIMEOUT): Promise<T> => {
+export const withTimeout = <T>(
+  fn: (signal: AbortSignal) => Promise<T>,
+  ms: number = DEFAULT_TIMEOUT,
+): Promise<T> => {
  const controller = new AbortController();
+  let timeoutId: ReturnType<typeof setTimeout>;
+
  const timeoutPromise = new Promise<T>((_, reject) => {
-    setTimeout(() => {
+    timeoutId = setTimeout(() => {
      controller.abort();
      reject(new TimeoutError(`Request timeout after ${ms}ms`));
    }, ms);
  });

-  return Promise.race([promise, timeoutPromise]);
+  return Promise.race([
+    fn(controller.signal).finally(() => clearTimeout(timeoutId)),
+    timeoutPromise,
+  ]);
 };
--- a/src/envs/tools.ts
+++ b/src/envs/tools.ts
@ -1,15 +1,25 @@
 import { createEnv } from '@t3-oss/env-nextjs';
 import { z } from 'zod';

+const optionalNumberEnv = (min: number, max: number) =>
+  z.preprocess(
+    (value) => (value === '' || value === null ? undefined : value),
+    z.coerce.number().int().max(max).min(min).optional(),
+  );
+
 export const getToolsConfig = () => {
  return createEnv({
    runtimeEnv: {
+      CRAWL_CONCURRENCY: process.env.CRAWL_CONCURRENCY,
+      CRAWLER_RETRY: process.env.CRAWLER_RETRY,
      CRAWLER_IMPLS: process.env.CRAWLER_IMPLS,
      SEARCH_PROVIDERS: process.env.SEARCH_PROVIDERS,
      SEARXNG_URL: process.env.SEARXNG_URL,
    },

    server: {
+      CRAWL_CONCURRENCY: optionalNumberEnv(1, 10),
+      CRAWLER_RETRY: optionalNumberEnv(0, 3),
      CRAWLER_IMPLS: z.string().optional(),
      SEARCH_PROVIDERS: z.string().optional(),
      SEARXNG_URL: z.string().url().optional(),
--- a/src/server/routers/tools/search.test.ts
+++ b/src/server/routers/tools/search.test.ts
@ -46,6 +46,27 @@ describe('searchRouter', () => {
      expect(result.results[1]).toEqual({ content: 'test content' });
    });

+    it('should accept all supported crawler implementations', async () => {
+      const caller = searchRouter.createCaller(mockContext as any);
+
+      const allImpls = [
+        'browserless',
+        'exa',
+        'firecrawl',
+        'jina',
+        'naive',
+        'search1api',
+        'tavily',
+      ] as const;
+      for (const impl of allImpls) {
+        const result = await caller.crawlPages({
+          urls: ['http://test.com'],
+          impls: [impl],
+        });
+        expect(result.results).toHaveLength(1);
+      }
+    });
+
    it('should work without specifying impls', async () => {
      const caller = searchRouter.createCaller(mockContext as any);

--- a/src/server/routers/tools/search.ts
+++ b/src/server/routers/tools/search.ts
@ -9,7 +9,10 @@ export const searchRouter = router({
  crawlPages: searchProcedure
    .input(
      z.object({
-        impls: z.enum(['jina', 'naive', 'browserless']).array().optional(),
+        impls: z
+          .enum(['browserless', 'exa', 'firecrawl', 'jina', 'naive', 'search1api', 'tavily'])
+          .array()
+          .optional(),
        urls: z.string().array(),
      }),
    )
--- a/src/server/services/search/index.test.ts
+++ b/src/server/services/search/index.test.ts
@ -3,7 +3,7 @@ import { beforeEach, describe, expect, it, vi } from 'vitest';

 import { toolsEnv } from '@/envs/tools';

-import { createSearchServiceImpl,SearchImplType } from './impls';
+import { createSearchServiceImpl, SearchImplType } from './impls';
 import { SearchService } from './index';

 // Mock dependencies
@ -11,7 +11,9 @@ vi.mock('@lobechat/web-crawler');
 vi.mock('./impls');
 vi.mock('@/envs/tools', () => ({
  toolsEnv: {
+    CRAWL_CONCURRENCY: undefined,
    CRAWLER_IMPLS: '',
+    CRAWLER_RETRY: undefined,
    SEARCH_PROVIDERS: '',
  },
 }));
@ -279,10 +281,9 @@ describe('SearchService', () => {
  describe('crawlPages', () => {
    it('should crawl multiple pages concurrently', async () => {
      const mockCrawlResult = {
-        content: 'Page content',
-        description: 'Page description',
-        title: 'Page title',
-        url: 'https://example.com',
+        crawler: 'naive',
+        data: { content: 'Page content', contentType: 'text' },
+        originalUrl: 'https://example.com',
      };

      const mockCrawler = {
@ -304,8 +305,13 @@ describe('SearchService', () => {
    it('should use crawler implementations from env', async () => {
      vi.mocked(toolsEnv).CRAWLER_IMPLS = 'jina,reader';

+      const mockSuccessResult = {
+        crawler: 'jina',
+        data: { content: 'ok', contentType: 'text' },
+        originalUrl: 'https://example.com',
+      };
      const mockCrawler = {
-        crawl: vi.fn().mockResolvedValue({}),
+        crawl: vi.fn().mockResolvedValue(mockSuccessResult),
      };
      vi.mocked(Crawler).mockImplementation(() => mockCrawler as any);

@ -317,8 +323,13 @@ describe('SearchService', () => {
    });

    it('should pass impls parameter to crawler.crawl', async () => {
+      const mockSuccessResult = {
+        crawler: 'jina',
+        data: { content: 'ok', contentType: 'text' },
+        originalUrl: 'https://example.com',
+      };
      const mockCrawler = {
-        crawl: vi.fn().mockResolvedValue({}),
+        crawl: vi.fn().mockResolvedValue(mockSuccessResult),
      };
      vi.mocked(Crawler).mockImplementation(() => mockCrawler as any);

@ -334,5 +345,133 @@ describe('SearchService', () => {
        url: 'https://example.com',
      });
    });
+
+    it('should use CRAWL_CONCURRENCY from env', async () => {
+      vi.mocked(toolsEnv).CRAWL_CONCURRENCY = 1;
+
+      const mockCrawler = {
+        crawl: vi.fn().mockResolvedValue({
+          crawler: 'naive',
+          data: { content: 'ok', contentType: 'text' },
+          originalUrl: 'https://example.com',
+        }),
+      };
+      vi.mocked(Crawler).mockImplementation(() => mockCrawler as any);
+
+      searchService = new SearchService();
+      const urls = ['https://a.com', 'https://b.com'];
+      await searchService.crawlPages({ urls });
+
+      // All URLs should still be crawled
+      expect(mockCrawler.crawl).toHaveBeenCalledTimes(2);
+    });
+
+    it('should retry on failed crawl results', async () => {
+      vi.mocked(toolsEnv).CRAWLER_RETRY = 1;
+
+      const failedResult = {
+        crawler: 'naive',
+        data: { content: 'Fail', errorType: 'NetworkError', errorMessage: 'timeout' },
+        originalUrl: 'https://example.com',
+      };
+      const successResult = {
+        crawler: 'naive',
+        data: { content: 'Page content', contentType: 'text' },
+        originalUrl: 'https://example.com',
+      };
+
+      const mockCrawler = {
+        crawl: vi.fn().mockResolvedValueOnce(failedResult).mockResolvedValueOnce(successResult),
+      };
+      vi.mocked(Crawler).mockImplementation(() => mockCrawler as any);
+
+      searchService = new SearchService();
+      const result = await searchService.crawlPages({ urls: ['https://example.com'] });
+
+      expect(mockCrawler.crawl).toHaveBeenCalledTimes(2);
+      expect(result.results[0]).toBe(successResult);
+    });
+
+    it('should return last failed result after all retries exhausted', async () => {
+      vi.mocked(toolsEnv).CRAWLER_RETRY = 1;
+
+      const failedResult = {
+        crawler: 'naive',
+        data: { content: 'Fail', errorType: 'NetworkError', errorMessage: 'timeout' },
+        originalUrl: 'https://example.com',
+      };
+
+      const mockCrawler = {
+        crawl: vi.fn().mockResolvedValue(failedResult),
+      };
+      vi.mocked(Crawler).mockImplementation(() => mockCrawler as any);
+
+      searchService = new SearchService();
+      const result = await searchService.crawlPages({ urls: ['https://example.com'] });
+
+      expect(mockCrawler.crawl).toHaveBeenCalledTimes(2); // 1 + 1 retry
+      expect(result.results[0]).toBe(failedResult);
+    });
+
+    it('should not retry when CRAWLER_RETRY is 0', async () => {
+      vi.mocked(toolsEnv).CRAWLER_RETRY = 0;
+
+      const failedResult = {
+        crawler: 'naive',
+        data: { content: 'Fail', errorType: 'Error', errorMessage: 'fail' },
+        originalUrl: 'https://example.com',
+      };
+
+      const mockCrawler = {
+        crawl: vi.fn().mockResolvedValue(failedResult),
+      };
+      vi.mocked(Crawler).mockImplementation(() => mockCrawler as any);
+
+      searchService = new SearchService();
+      const result = await searchService.crawlPages({ urls: ['https://example.com'] });
+
+      expect(mockCrawler.crawl).toHaveBeenCalledTimes(1);
+      expect(result.results[0]).toBe(failedResult);
+    });
+
+    it('should handle crawl exceptions during retry', async () => {
+      vi.mocked(toolsEnv).CRAWLER_RETRY = 1;
+
+      const mockCrawler = {
+        crawl: vi.fn().mockRejectedValue(new Error('Network error')),
+      };
+      vi.mocked(Crawler).mockImplementation(() => mockCrawler as any);
+
+      searchService = new SearchService();
+      const result = await searchService.crawlPages({ urls: ['https://example.com'] });
+
+      expect(mockCrawler.crawl).toHaveBeenCalledTimes(2);
+      expect(result.results[0].data).toMatchObject({
+        errorType: 'Error',
+        errorMessage: 'Network error',
+      });
+    });
+
+    it('should detect successful results by contentType presence', async () => {
+      vi.mocked(toolsEnv).CRAWLER_RETRY = 1;
+
+      const successResult = {
+        crawler: 'naive',
+        data: { content: 'Page content', contentType: 'text' },
+        originalUrl: 'https://example.com',
+      };
+
+      const mockCrawler = {
+        crawl: vi.fn().mockResolvedValue(successResult),
+      };
+      vi.mocked(Crawler).mockImplementation(() => mockCrawler as any);
+
+      searchService = new SearchService();
+      const result = await searchService.crawlPages({ urls: ['https://example.com'] });
+
+      // Should not retry since result has contentType (successful)
+      expect(mockCrawler.crawl).toHaveBeenCalledTimes(1);
+      expect(result.results[0]).toBe(successResult);
+    });
  });
 });
--- a/src/server/services/search/index.ts
+++ b/src/server/services/search/index.ts
@ -1,5 +1,5 @@
-import { type SearchParams, type SearchQuery } from '@lobechat/types';
-import { type CrawlImplType } from '@lobechat/web-crawler';
+import type { SearchParams, SearchQuery } from '@lobechat/types';
+import type { Crawler, CrawlImplType, CrawlUniformResult } from '@lobechat/web-crawler';
 import pMap from 'p-map';

 import { toolsEnv } from '@/envs/tools';
@ -7,6 +7,9 @@ import { toolsEnv } from '@/envs/tools';
 import { type SearchImplType, type SearchServiceImpl } from './impls';
 import { createSearchServiceImpl } from './impls';

+const DEFAULT_CRAWL_CONCURRENCY = 3;
+const DEFAULT_CRAWLER_RETRY = 1;
+
 const parseImplEnv = (envString: string = '') => {
  // Handle full-width commas and extra whitespace
  const envValue = envString.replaceAll('，', ',').trim();
@ -24,6 +27,14 @@ export class SearchService {
    return parseImplEnv(toolsEnv.CRAWLER_IMPLS);
  }

+  private get crawlConcurrency() {
+    return toolsEnv.CRAWL_CONCURRENCY ?? DEFAULT_CRAWL_CONCURRENCY;
+  }
+
+  private get crawlerRetry() {
+    return toolsEnv.CRAWLER_RETRY ?? DEFAULT_CRAWLER_RETRY;
+  }
+
  constructor() {
    const impls = this.searchImpls;
    // TODO: need use turn mode
@ -37,14 +48,59 @@ export class SearchService {
    const results = await pMap(
      input.urls,
      async (url) => {
-        return await crawler.crawl({ impls: input.impls, url });
+        return await this.crawlWithRetry(crawler, url, input.impls);
      },
-      { concurrency: 3 },
+      { concurrency: this.crawlConcurrency },
    );

    return { results };
  }

+  private async crawlWithRetry(
+    crawler: Crawler,
+    url: string,
+    impls?: CrawlImplType[],
+  ): Promise<CrawlUniformResult> {
+    const maxAttempts = this.crawlerRetry + 1;
+    let lastResult: CrawlUniformResult | undefined;
+    let lastError: Error | undefined;
+
+    for (let attempt = 1; attempt <= maxAttempts; attempt++) {
+      try {
+        const result = await crawler.crawl({ impls, url });
+        lastResult = result;
+
+        if (!this.isFailedCrawlResult(result)) {
+          return result;
+        }
+      } catch (error) {
+        lastError = error as Error;
+      }
+    }
+
+    if (lastResult) {
+      return lastResult;
+    }
+
+    return {
+      crawler: 'unknown',
+      data: {
+        content: `Fail to crawl the page. Error type: ${lastError?.name || 'UnknownError'}, error message: ${lastError?.message}`,
+        errorMessage: lastError?.message,
+        errorType: lastError?.name || 'UnknownError',
+      },
+      originalUrl: url,
+    };
+  }
+
+  /**
+   * A successful crawl result always includes `contentType` (e.g. 'text', 'json')
+   * in `result.data`, while a failed result contains `errorType`/`errorMessage` instead.
+   */
+  private isFailedCrawlResult(result: CrawlUniformResult): boolean {
+    return !('contentType' in result.data);
+  }
+
  private get searchImpls() {
    return parseImplEnv(toolsEnv.SEARCH_PROVIDERS) as SearchImplType[];
  }
@ -58,17 +114,17 @@ export class SearchService {

  async webSearch({ query, searchCategories, searchEngines, searchTimeRange }: SearchQuery) {
    let data = await this.query(query, {
-      searchCategories: searchCategories,
-      searchEngines: searchEngines,
-      searchTimeRange: searchTimeRange,
+      searchCategories,
+      searchEngines,
+      searchTimeRange,
    });

    // First retry: remove search engine restrictions if no results found
    if (data.results.length === 0 && searchEngines && searchEngines?.length > 0) {
      const paramsExcludeSearchEngines = {
-        searchCategories: searchCategories,
+        searchCategories,
        searchEngines: undefined,
-        searchTimeRange: searchTimeRange,
+        searchTimeRange,
      };
      data = await this.query(query, paramsExcludeSearchEngines);
    }