fix: Lazy load heavy imports and add future guidance (#26903)

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-21 15:47:20 +00:00 · 2026-03-16 09:51:25 +00:00 · 2026-03-16 09:51:25 +00:00 · 2d6a0e1041
commit 2d6a0e1041
parent af0ac3ff3a
9 changed files with 104 additions and 55 deletions
--- a/AGENTS.md
+++ b/AGENTS.md
@ -126,6 +126,9 @@ const children = getChildNodes(workflow.connections, 'NodeName', 'main', 1);
 - **NEVER use `any` type** - use proper types or `unknown`
 - **Avoid type casting with `as`** - use type guards or type predicates instead (except in test code where `as` is acceptable)
 - **Define shared interfaces in `@n8n/api-types`** package for FE/BE communication
+- **Lazy-load heavy modules** — if a module is only used in a specific code
+  path (not every request), use `await import()` at point of use instead of
+  top-level `import`. Applies especially to native modules and large parsers.

 ### Error Handling
 - Don't use `ApplicationError` class in CLI and nodes for throwing errors,
--- a/cubic.yaml
+++ b/cubic.yaml
@ -49,21 +49,6 @@ reviews:
    If one exists, suggest using it. If none exist, ask why the hard-coded
    value is required.
  custom_rules:
-    - name: Tests
-      description: |-
-        BE REASONABLE when evaluating test coverage:
-
-        **PASS if:**
-
-        - Core functionality has tests
-        - Critical paths are tested
-        - Coverage is reasonable (not necessarily 100%)
-
-        **DO NOT require tests for:**
-
-        - Exports, types, configs
-        - Metadata files
-        - Version files
    - name: Security Review
      description: |-
        Proactively review PRs and flag security vulnerabilities specific to n8n's architecture:
@ -195,6 +180,34 @@ reviews:
        - Higher scrutiny for: expression engine, credential handling, code execution nodes, license enforcement, SSO integrations
        - Consider n8n's security audit categories: credentials, database, nodes, instance, filesystem risks
        - Community/custom nodes have higher risk profile than official nodes
+    - name: Quality & Performance Review
+      description: |-
+        You are a Staff Quality Engineer reviewing this PR for production
+        readiness. You think in terms of blast radius, resource cost, and
+        failure modes. You are pragmatic, not pedantic.
+
+        Below is a curated list of known issues from real incidents and
+        review standards the team enforces. Review the diff against each
+        item. If none match, say nothing. If one matches, explain the
+        concrete risk and suggest a fix. Never flag existing unchanged code.
+
+        ## Known Issues
+
+        ### 1. Eager imports of heavy or native modules
+        Top-level `import` of modules only used in a specific code path loads
+        them into every process at startup, increasing baseline memory.
+        Native modules (e.g. `isolated-vm`) can crash instances that lack the
+        binary. Large parsers (e.g. `jsdom` at ~16 MB heap) waste memory when
+        the code path is rarely hit.
+        **Fix:** Use `await import()` at point of use. For barrel files, use
+        `export type` instead of value re-exports when consumers only need
+        the type.
+
+        ### 2. Unreasonable test coverage expectations
+        Be pragmatic about test requirements. Pass if core functionality and
+        critical paths are tested at reasonable coverage. Do NOT require tests
+        for exports, types, configs, metadata files, or version files. Let
+        humans handle edge cases.
    - name: Design System Tokens
      description: |-
        For any hard-coded visual values in CSS (colors, shadows, spacing),
--- a/packages/@n8n/ai-workflow-builder.ee/src/tools/test/web-fetch.tool.test.ts
+++ b/packages/@n8n/ai-workflow-builder.ee/src/tools/test/web-fetch.tool.test.ts
@ -139,7 +139,7 @@ describe('web_fetch tool', () => {
 				contentType: 'text/html',
 			});

-			mockExtractReadableContent.mockReturnValue({
+			mockExtractReadableContent.mockResolvedValue({
 				title: 'Test Page',
 				content: 'Extracted content',
 				truncated: false,
@ -174,7 +174,7 @@ describe('web_fetch tool', () => {
 				contentType: 'text/html',
 			});

-			mockExtractReadableContent.mockReturnValue({
+			mockExtractReadableContent.mockResolvedValue({
 				title: 'Test Page',
 				content: 'Extracted content',
 				truncated: false,
@ -228,7 +228,7 @@ describe('web_fetch tool', () => {
 				contentType: 'text/html',
 			});

-			mockExtractReadableContent.mockReturnValue({
+			mockExtractReadableContent.mockResolvedValue({
 				title: 'Test',
 				content: 'Content',
 				truncated: false,
@ -260,7 +260,7 @@ describe('web_fetch tool', () => {
 				contentType: 'text/html',
 			});

-			mockExtractReadableContent.mockReturnValue({
+			mockExtractReadableContent.mockResolvedValue({
 				title: 'Test',
 				content: 'Content',
 				truncated: false,
@ -338,7 +338,7 @@ describe('web_fetch tool', () => {
 				contentType: 'text/html',
 			});

-			mockExtractReadableContent.mockReturnValue({
+			mockExtractReadableContent.mockResolvedValue({
 				title: 'Test Page',
 				content: 'Extracted readable content',
 				truncated: false,
@ -364,7 +364,7 @@ describe('web_fetch tool', () => {
 				contentType: 'text/html',
 			});

-			mockExtractReadableContent.mockReturnValue({
+			mockExtractReadableContent.mockResolvedValue({
 				title: 'Long Page',
 				content: 'Truncated content',
 				truncated: true,
@ -387,7 +387,7 @@ describe('web_fetch tool', () => {
 				contentType: 'text/html',
 			});

-			mockExtractReadableContent.mockReturnValue({
+			mockExtractReadableContent.mockResolvedValue({
 				title: 'Page',
 				content: 'Content',
 				truncated: false,
@ -460,7 +460,7 @@ describe('web_fetch tool', () => {
 				action: 'allow_once',
 			});

-			mockExtractReadableContent.mockReturnValue({
+			mockExtractReadableContent.mockResolvedValue({
 				title: 'Redirected Page',
 				content: 'Redirected content',
 				truncated: false,
@ -540,7 +540,7 @@ describe('web_fetch tool', () => {
 				action: 'allow_domain',
 			});

-			mockExtractReadableContent.mockReturnValue({
+			mockExtractReadableContent.mockResolvedValue({
 				title: 'Test',
 				content: 'Content',
 				truncated: false,
@ -572,7 +572,7 @@ describe('web_fetch tool', () => {
 					contentType: 'text/html',
 				});

-			mockExtractReadableContent.mockReturnValue({
+			mockExtractReadableContent.mockResolvedValue({
 				title: 'Docs',
 				content: 'Documentation content',
 				truncated: false,
@ -627,7 +627,7 @@ describe('web_fetch tool', () => {
 				contentType: 'text/html',
 			});

-			mockExtractReadableContent.mockReturnValue({
+			mockExtractReadableContent.mockResolvedValue({
 				title: 'Test',
 				content: 'Content',
 				truncated: false,
@ -656,7 +656,7 @@ describe('web_fetch tool', () => {
 				contentType: 'text/html',
 			});

-			mockExtractReadableContent.mockReturnValue({
+			mockExtractReadableContent.mockResolvedValue({
 				title: 'Test',
 				content: 'Content',
 				truncated: false,
@ -691,7 +691,7 @@ describe('web_fetch tool', () => {
 				contentType: 'text/html',
 			});

-			mockExtractReadableContent.mockReturnValue({
+			mockExtractReadableContent.mockResolvedValue({
 				title: 'Test',
 				content: 'Content',
 				truncated: false,
@ -751,7 +751,7 @@ describe('web_fetch tool', () => {
 				contentType: 'text/html',
 			});

-			mockExtractReadableContent.mockReturnValue({
+			mockExtractReadableContent.mockResolvedValue({
 				title: 'Test',
 				content: 'Content',
 				truncated: false,
@ -788,7 +788,7 @@ describe('web_fetch tool', () => {
 				contentType: 'text/html',
 			});

-			mockExtractReadableContent.mockReturnValue({
+			mockExtractReadableContent.mockResolvedValue({
 				title: 'Test',
 				content: 'Content',
 				truncated: false,
@ -820,7 +820,7 @@ describe('web_fetch tool', () => {
 				contentType: 'text/html',
 			});

-			mockExtractReadableContent.mockReturnValue({
+			mockExtractReadableContent.mockResolvedValue({
 				title: 'Test',
 				content: 'Content',
 				truncated: false,
@ -853,7 +853,7 @@ describe('web_fetch tool', () => {
 					contentType: 'text/html',
 				});

-			mockExtractReadableContent.mockReturnValue({
+			mockExtractReadableContent.mockResolvedValue({
 				title: 'Test',
 				content: 'Redirected content',
 				truncated: false,
--- a/packages/@n8n/ai-workflow-builder.ee/src/tools/utils/test/web-fetch.utils.test.ts
+++ b/packages/@n8n/ai-workflow-builder.ee/src/tools/utils/test/web-fetch.utils.test.ts
@ -241,7 +241,7 @@ describe('web-fetch.utils', () => {
 	});

 	describe('extractReadableContent', () => {
-		it('should extract title and Markdown-formatted content from HTML', () => {
+		it('should extract title and Markdown-formatted content from HTML', async () => {
 			const html = `
 				<html>
 				<head><title>Test Page</title></head>
@ -256,7 +256,7 @@ describe('web-fetch.utils', () => {
 				</html>
 			`;

-			const result = extractReadableContent(html, 'https://example.com/page');
+			const result = await extractReadableContent(html, 'https://example.com/page');
 			expect(result.title).toBe('Test Page');
 			expect(result.content).toContain('main content');
 			// Verify Markdown formatting is preserved
@ -266,14 +266,14 @@ describe('web-fetch.utils', () => {
 			expect(result.truncated).toBe(false);
 		});

-		it('should handle HTML with no readable content', () => {
+		it('should handle HTML with no readable content', async () => {
 			const html = '<html><body></body></html>';
-			const result = extractReadableContent(html, 'https://example.com/empty');
+			const result = await extractReadableContent(html, 'https://example.com/empty');
 			expect(result.content).toBe('');
 			expect(result.truncated).toBe(false);
 		});

-		it('should truncate content exceeding max chars', () => {
+		it('should truncate content exceeding max chars', async () => {
 			// Generate content longer than WEB_FETCH_MAX_CONTENT_CHARS (30000)
 			const longText = 'A'.repeat(40_000);
 			const html = `
@ -287,7 +287,7 @@ describe('web-fetch.utils', () => {
 				</html>
 			`;

-			const result = extractReadableContent(html, 'https://example.com/long');
+			const result = await extractReadableContent(html, 'https://example.com/long');
 			expect(result.truncated).toBe(true);
 			expect(result.truncateReason).toContain('30000');
 			expect(result.content.length).toBeLessThanOrEqual(30_000);
--- a/packages/@n8n/ai-workflow-builder.ee/src/tools/utils/web-fetch.utils.ts
+++ b/packages/@n8n/ai-workflow-builder.ee/src/tools/utils/web-fetch.utils.ts
@ -1,8 +1,5 @@
 import { HumanMessage, type BaseMessage } from '@langchain/core/messages';
-import { Readability } from '@mozilla/readability';
 import dns from 'dns';
-import { JSDOM, VirtualConsole } from 'jsdom';
-import TurndownService from 'turndown';
 import { promisify } from 'util';

 import {
@ -318,8 +315,12 @@ export async function fetchUrl(url: string, signal?: AbortSignal): Promise<Fetch

 /**
 * Extract readable content from HTML using JSDOM + Readability.
+ * Libraries are lazy-loaded to avoid pulling jsdom (~15-20MB) into memory at startup.
 */
-export function extractReadableContent(html: string, url: string): ExtractedContent {
+export async function extractReadableContent(html: string, url: string): Promise<ExtractedContent> {
+	const [{ JSDOM, VirtualConsole }, { Readability }, { default: TurndownService }] =
+		await Promise.all([import('jsdom'), import('@mozilla/readability'), import('turndown')]);
+
 	const virtualConsole = new VirtualConsole();
 	const dom = new JSDOM(html, { url, virtualConsole });
 	const article = new Readability(dom.window.document, { keepClasses: true }).parse();
--- a/packages/@n8n/ai-workflow-builder.ee/src/tools/web-fetch.tool.ts
+++ b/packages/@n8n/ai-workflow-builder.ee/src/tools/web-fetch.tool.ts
@ -244,7 +244,10 @@ export function createWebFetchTool(createSecurity: () => WebFetchSecurityManager

 				// 5. Extract readable content
 				reporter.progress('Extracting content...');
-				const extracted = extractReadableContent(fetchResult.body, fetchResult.finalUrl ?? url);
+				const extracted = await extractReadableContent(
+					fetchResult.body,
+					fetchResult.finalUrl ?? url,
+				);

 				// 6. Build response
 				const responseLines = [
--- a/packages/@n8n/expression-runtime/src/bridge/isolated-vm-bridge.ts
+++ b/packages/@n8n/expression-runtime/src/bridge/isolated-vm-bridge.ts
@ -1,9 +1,23 @@
-import ivm from 'isolated-vm';
+import type ivm from 'isolated-vm';
 import { readFile } from 'node:fs/promises';
 import * as path from 'node:path';
 import type { RuntimeBridge, BridgeConfig, ExecuteOptions } from '../types';
 import { DEFAULT_BRIDGE_CONFIG, TimeoutError, MemoryLimitError } from '../types';

+// Lazy-loaded isolated-vm — avoids loading the native binary when the barrel
+// file is statically imported (e.g. for error classes). The native module is
+// only loaded when IsolatedVmBridge is actually constructed.
+type IsolatedVm = typeof import('isolated-vm');
+let _ivm: IsolatedVm | null = null;
+
+function getIvm(): IsolatedVm {
+	if (!_ivm) {
+		// eslint-disable-next-line @typescript-eslint/no-require-imports
+		_ivm = require('isolated-vm') as IsolatedVm;
+	}
+	return _ivm;
+}
+
 const BUNDLE_RELATIVE_PATH = path.join('dist', 'bundle', 'runtime.iife.js');

 /**
@ -68,7 +82,7 @@ export class IsolatedVmBridge implements RuntimeBridge {

 		// Create isolate with memory limit
 		// Note: memoryLimit is in MB
-		this.isolate = new ivm.Isolate({ memoryLimit: this.config.memoryLimit });
+		this.isolate = new (getIvm().Isolate)({ memoryLimit: this.config.memoryLimit });
 	}

 	/**
@ -300,7 +314,7 @@ export class IsolatedVmBridge implements RuntimeBridge {

 		// Callback 1: Get value/metadata at path
 		// Used by createDeepLazyProxy when accessing properties
-		const getValueAtPath = new ivm.Reference((path: string[]) => {
+		const getValueAtPath = new (getIvm().Reference)((path: string[]) => {
 			// Navigate to value
 			let value: unknown = data;
 			for (const key of path) {
@ -343,7 +357,7 @@ export class IsolatedVmBridge implements RuntimeBridge {

 		// Callback 2: Get array element at index
 		// Used by array proxy when accessing numeric indices
-		const getArrayElement = new ivm.Reference((path: string[], index: number) => {
+		const getArrayElement = new (getIvm().Reference)((path: string[], index: number) => {
 			// Navigate to array
 			let arr: unknown = data;
 			for (const key of path) {
@ -380,7 +394,7 @@ export class IsolatedVmBridge implements RuntimeBridge {

 		// Callback 3: Call function at path with arguments
 		// Used when expressions invoke functions from workflow data
-		const callFunctionAtPath = new ivm.Reference((path: string[], ...args: unknown[]) => {
+		const callFunctionAtPath = new (getIvm().Reference)((path: string[], ...args: unknown[]) => {
 			// Navigate to function, tracking parent to preserve `this` context
 			let fn: unknown = data;
 			let parent: unknown = undefined;
--- a/packages/@n8n/expression-runtime/src/index.ts
+++ b/packages/@n8n/expression-runtime/src/index.ts
@ -1,7 +1,8 @@
 // Main exports
 export { ExpressionEvaluator } from './evaluator/expression-evaluator';

-// Bridge exports
+// Bridge exports — IsolatedVmBridge lazy-loads isolated-vm internally,
+// so this value re-export does NOT pull in the native binary at import time.
 export { IsolatedVmBridge } from './bridge/isolated-vm-bridge';

 // Types
--- a/packages/nodes-base/nodes/Merge/v3/helpers/sandbox-utils.ts
+++ b/packages/nodes-base/nodes/Merge/v3/helpers/sandbox-utils.ts
@ -1,13 +1,25 @@
 import { readFile } from 'node:fs/promises';
 import { randomUUID } from 'node:crypto';

-import ivm from 'isolated-vm';
-
 import type { IDataObject } from 'n8n-workflow';
+import type IsolatedVM from 'isolated-vm';
+
+// Lazy-loaded isolated-vm — avoids loading the native module at startup.
+// The module is only loaded when combineBySql is actually used.
+type IsolatedVm = typeof IsolatedVM;
+let _ivm: IsolatedVm | null = null;
+
+async function getIvm(): Promise<IsolatedVm> {
+	if (!_ivm) {
+		const mod = await import('isolated-vm');
+		_ivm = mod.default;
+	}
+	return _ivm!;
+}

 // Singleton – recreated only after resetSandboxCache() (tests) or isolate disposal.
-let sandboxIsolate: ivm.Isolate | null = null;
-let sandboxContext: ivm.Context | null = null;
+let sandboxIsolate: IsolatedVM.Isolate | null = null;
+let sandboxContext: IsolatedVM.Context | null = null;

 /** Disposes the cached isolate. Exposed for tests only. */
 export function resetSandboxCache(): void {
@ -19,11 +31,13 @@ export function resetSandboxCache(): void {
 }

 /** Returns a cached isolated-vm context with alasql pre-loaded. Creates it on first call. */
-export async function loadAlaSqlSandbox(): Promise<ivm.Context> {
+export async function loadAlaSqlSandbox(): Promise<IsolatedVM.Context> {
 	if (sandboxContext && sandboxIsolate && !sandboxIsolate.isDisposed) {
 		return sandboxContext;
 	}

+	const ivm = await getIvm();
+
 	sandboxIsolate = new ivm.Isolate({ memoryLimit: 64 }); // 64 MB hard limit
 	sandboxContext = await sandboxIsolate.createContext();

@ -41,7 +55,7 @@ export async function loadAlaSqlSandbox(): Promise<ivm.Context> {
 * Only JSON-serialisable values cross the isolate boundary.
 */
 export async function runAlaSqlInSandbox(
-	context: ivm.Context,
+	context: IsolatedVM.Context,
 	tableData: unknown[][],
 	query: string,
 ): Promise<IDataObject[]> {