fix: Lazy load heavy imports and add future guidance (#26903)

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Declan Carroll 2026-03-16 09:51:25 +00:00 committed by GitHub
parent af0ac3ff3a
commit 2d6a0e1041
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 104 additions and 55 deletions

View file

@ -126,6 +126,9 @@ const children = getChildNodes(workflow.connections, 'NodeName', 'main', 1);
- **NEVER use `any` type** - use proper types or `unknown`
- **Avoid type casting with `as`** - use type guards or type predicates instead (except in test code where `as` is acceptable)
- **Define shared interfaces in `@n8n/api-types`** package for FE/BE communication
- **Lazy-load heavy modules** — if a module is only used in a specific code
path (not every request), use `await import()` at point of use instead of
top-level `import`. Applies especially to native modules and large parsers.
### Error Handling
- Don't use `ApplicationError` class in CLI and nodes for throwing errors,

View file

@ -49,21 +49,6 @@ reviews:
If one exists, suggest using it. If none exist, ask why the hard-coded
value is required.
custom_rules:
- name: Tests
description: |-
BE REASONABLE when evaluating test coverage:
**PASS if:**
- Core functionality has tests
- Critical paths are tested
- Coverage is reasonable (not necessarily 100%)
**DO NOT require tests for:**
- Exports, types, configs
- Metadata files
- Version files
- name: Security Review
description: |-
Proactively review PRs and flag security vulnerabilities specific to n8n's architecture:
@ -195,6 +180,34 @@ reviews:
- Higher scrutiny for: expression engine, credential handling, code execution nodes, license enforcement, SSO integrations
- Consider n8n's security audit categories: credentials, database, nodes, instance, filesystem risks
- Community/custom nodes have higher risk profile than official nodes
- name: Quality & Performance Review
description: |-
You are a Staff Quality Engineer reviewing this PR for production
readiness. You think in terms of blast radius, resource cost, and
failure modes. You are pragmatic, not pedantic.
Below is a curated list of known issues from real incidents and
review standards the team enforces. Review the diff against each
item. If none match, say nothing. If one matches, explain the
concrete risk and suggest a fix. Never flag existing unchanged code.
## Known Issues
### 1. Eager imports of heavy or native modules
Top-level `import` of modules only used in a specific code path loads
them into every process at startup, increasing baseline memory.
Native modules (e.g. `isolated-vm`) can crash instances that lack the
binary. Large parsers (e.g. `jsdom` at ~16 MB heap) waste memory when
the code path is rarely hit.
**Fix:** Use `await import()` at point of use. For barrel files, use
`export type` instead of value re-exports when consumers only need
the type.
### 2. Unreasonable test coverage expectations
Be pragmatic about test requirements. Pass if core functionality and
critical paths are tested at reasonable coverage. Do NOT require tests
for exports, types, configs, metadata files, or version files. Let
humans handle edge cases.
- name: Design System Tokens
description: |-
For any hard-coded visual values in CSS (colors, shadows, spacing),

View file

@ -139,7 +139,7 @@ describe('web_fetch tool', () => {
contentType: 'text/html',
});
mockExtractReadableContent.mockReturnValue({
mockExtractReadableContent.mockResolvedValue({
title: 'Test Page',
content: 'Extracted content',
truncated: false,
@ -174,7 +174,7 @@ describe('web_fetch tool', () => {
contentType: 'text/html',
});
mockExtractReadableContent.mockReturnValue({
mockExtractReadableContent.mockResolvedValue({
title: 'Test Page',
content: 'Extracted content',
truncated: false,
@ -228,7 +228,7 @@ describe('web_fetch tool', () => {
contentType: 'text/html',
});
mockExtractReadableContent.mockReturnValue({
mockExtractReadableContent.mockResolvedValue({
title: 'Test',
content: 'Content',
truncated: false,
@ -260,7 +260,7 @@ describe('web_fetch tool', () => {
contentType: 'text/html',
});
mockExtractReadableContent.mockReturnValue({
mockExtractReadableContent.mockResolvedValue({
title: 'Test',
content: 'Content',
truncated: false,
@ -338,7 +338,7 @@ describe('web_fetch tool', () => {
contentType: 'text/html',
});
mockExtractReadableContent.mockReturnValue({
mockExtractReadableContent.mockResolvedValue({
title: 'Test Page',
content: 'Extracted readable content',
truncated: false,
@ -364,7 +364,7 @@ describe('web_fetch tool', () => {
contentType: 'text/html',
});
mockExtractReadableContent.mockReturnValue({
mockExtractReadableContent.mockResolvedValue({
title: 'Long Page',
content: 'Truncated content',
truncated: true,
@ -387,7 +387,7 @@ describe('web_fetch tool', () => {
contentType: 'text/html',
});
mockExtractReadableContent.mockReturnValue({
mockExtractReadableContent.mockResolvedValue({
title: 'Page',
content: 'Content',
truncated: false,
@ -460,7 +460,7 @@ describe('web_fetch tool', () => {
action: 'allow_once',
});
mockExtractReadableContent.mockReturnValue({
mockExtractReadableContent.mockResolvedValue({
title: 'Redirected Page',
content: 'Redirected content',
truncated: false,
@ -540,7 +540,7 @@ describe('web_fetch tool', () => {
action: 'allow_domain',
});
mockExtractReadableContent.mockReturnValue({
mockExtractReadableContent.mockResolvedValue({
title: 'Test',
content: 'Content',
truncated: false,
@ -572,7 +572,7 @@ describe('web_fetch tool', () => {
contentType: 'text/html',
});
mockExtractReadableContent.mockReturnValue({
mockExtractReadableContent.mockResolvedValue({
title: 'Docs',
content: 'Documentation content',
truncated: false,
@ -627,7 +627,7 @@ describe('web_fetch tool', () => {
contentType: 'text/html',
});
mockExtractReadableContent.mockReturnValue({
mockExtractReadableContent.mockResolvedValue({
title: 'Test',
content: 'Content',
truncated: false,
@ -656,7 +656,7 @@ describe('web_fetch tool', () => {
contentType: 'text/html',
});
mockExtractReadableContent.mockReturnValue({
mockExtractReadableContent.mockResolvedValue({
title: 'Test',
content: 'Content',
truncated: false,
@ -691,7 +691,7 @@ describe('web_fetch tool', () => {
contentType: 'text/html',
});
mockExtractReadableContent.mockReturnValue({
mockExtractReadableContent.mockResolvedValue({
title: 'Test',
content: 'Content',
truncated: false,
@ -751,7 +751,7 @@ describe('web_fetch tool', () => {
contentType: 'text/html',
});
mockExtractReadableContent.mockReturnValue({
mockExtractReadableContent.mockResolvedValue({
title: 'Test',
content: 'Content',
truncated: false,
@ -788,7 +788,7 @@ describe('web_fetch tool', () => {
contentType: 'text/html',
});
mockExtractReadableContent.mockReturnValue({
mockExtractReadableContent.mockResolvedValue({
title: 'Test',
content: 'Content',
truncated: false,
@ -820,7 +820,7 @@ describe('web_fetch tool', () => {
contentType: 'text/html',
});
mockExtractReadableContent.mockReturnValue({
mockExtractReadableContent.mockResolvedValue({
title: 'Test',
content: 'Content',
truncated: false,
@ -853,7 +853,7 @@ describe('web_fetch tool', () => {
contentType: 'text/html',
});
mockExtractReadableContent.mockReturnValue({
mockExtractReadableContent.mockResolvedValue({
title: 'Test',
content: 'Redirected content',
truncated: false,

View file

@ -241,7 +241,7 @@ describe('web-fetch.utils', () => {
});
describe('extractReadableContent', () => {
it('should extract title and Markdown-formatted content from HTML', () => {
it('should extract title and Markdown-formatted content from HTML', async () => {
const html = `
<html>
<head><title>Test Page</title></head>
@ -256,7 +256,7 @@ describe('web-fetch.utils', () => {
</html>
`;
const result = extractReadableContent(html, 'https://example.com/page');
const result = await extractReadableContent(html, 'https://example.com/page');
expect(result.title).toBe('Test Page');
expect(result.content).toContain('main content');
// Verify Markdown formatting is preserved
@ -266,14 +266,14 @@ describe('web-fetch.utils', () => {
expect(result.truncated).toBe(false);
});
it('should handle HTML with no readable content', () => {
it('should handle HTML with no readable content', async () => {
const html = '<html><body></body></html>';
const result = extractReadableContent(html, 'https://example.com/empty');
const result = await extractReadableContent(html, 'https://example.com/empty');
expect(result.content).toBe('');
expect(result.truncated).toBe(false);
});
it('should truncate content exceeding max chars', () => {
it('should truncate content exceeding max chars', async () => {
// Generate content longer than WEB_FETCH_MAX_CONTENT_CHARS (30000)
const longText = 'A'.repeat(40_000);
const html = `
@ -287,7 +287,7 @@ describe('web-fetch.utils', () => {
</html>
`;
const result = extractReadableContent(html, 'https://example.com/long');
const result = await extractReadableContent(html, 'https://example.com/long');
expect(result.truncated).toBe(true);
expect(result.truncateReason).toContain('30000');
expect(result.content.length).toBeLessThanOrEqual(30_000);

View file

@ -1,8 +1,5 @@
import { HumanMessage, type BaseMessage } from '@langchain/core/messages';
import { Readability } from '@mozilla/readability';
import dns from 'dns';
import { JSDOM, VirtualConsole } from 'jsdom';
import TurndownService from 'turndown';
import { promisify } from 'util';
import {
@ -318,8 +315,12 @@ export async function fetchUrl(url: string, signal?: AbortSignal): Promise<Fetch
/**
* Extract readable content from HTML using JSDOM + Readability.
* Libraries are lazy-loaded to avoid pulling jsdom (~15-20MB) into memory at startup.
*/
export function extractReadableContent(html: string, url: string): ExtractedContent {
export async function extractReadableContent(html: string, url: string): Promise<ExtractedContent> {
const [{ JSDOM, VirtualConsole }, { Readability }, { default: TurndownService }] =
await Promise.all([import('jsdom'), import('@mozilla/readability'), import('turndown')]);
const virtualConsole = new VirtualConsole();
const dom = new JSDOM(html, { url, virtualConsole });
const article = new Readability(dom.window.document, { keepClasses: true }).parse();

View file

@ -244,7 +244,10 @@ export function createWebFetchTool(createSecurity: () => WebFetchSecurityManager
// 5. Extract readable content
reporter.progress('Extracting content...');
const extracted = extractReadableContent(fetchResult.body, fetchResult.finalUrl ?? url);
const extracted = await extractReadableContent(
fetchResult.body,
fetchResult.finalUrl ?? url,
);
// 6. Build response
const responseLines = [

View file

@ -1,9 +1,23 @@
import ivm from 'isolated-vm';
import type ivm from 'isolated-vm';
import { readFile } from 'node:fs/promises';
import * as path from 'node:path';
import type { RuntimeBridge, BridgeConfig, ExecuteOptions } from '../types';
import { DEFAULT_BRIDGE_CONFIG, TimeoutError, MemoryLimitError } from '../types';
// Lazy-loaded isolated-vm — avoids loading the native binary when the barrel
// file is statically imported (e.g. for error classes). The native module is
// only loaded when IsolatedVmBridge is actually constructed.
type IsolatedVm = typeof import('isolated-vm');
let _ivm: IsolatedVm | null = null;
function getIvm(): IsolatedVm {
if (!_ivm) {
// eslint-disable-next-line @typescript-eslint/no-require-imports
_ivm = require('isolated-vm') as IsolatedVm;
}
return _ivm;
}
const BUNDLE_RELATIVE_PATH = path.join('dist', 'bundle', 'runtime.iife.js');
/**
@ -68,7 +82,7 @@ export class IsolatedVmBridge implements RuntimeBridge {
// Create isolate with memory limit
// Note: memoryLimit is in MB
this.isolate = new ivm.Isolate({ memoryLimit: this.config.memoryLimit });
this.isolate = new (getIvm().Isolate)({ memoryLimit: this.config.memoryLimit });
}
/**
@ -300,7 +314,7 @@ export class IsolatedVmBridge implements RuntimeBridge {
// Callback 1: Get value/metadata at path
// Used by createDeepLazyProxy when accessing properties
const getValueAtPath = new ivm.Reference((path: string[]) => {
const getValueAtPath = new (getIvm().Reference)((path: string[]) => {
// Navigate to value
let value: unknown = data;
for (const key of path) {
@ -343,7 +357,7 @@ export class IsolatedVmBridge implements RuntimeBridge {
// Callback 2: Get array element at index
// Used by array proxy when accessing numeric indices
const getArrayElement = new ivm.Reference((path: string[], index: number) => {
const getArrayElement = new (getIvm().Reference)((path: string[], index: number) => {
// Navigate to array
let arr: unknown = data;
for (const key of path) {
@ -380,7 +394,7 @@ export class IsolatedVmBridge implements RuntimeBridge {
// Callback 3: Call function at path with arguments
// Used when expressions invoke functions from workflow data
const callFunctionAtPath = new ivm.Reference((path: string[], ...args: unknown[]) => {
const callFunctionAtPath = new (getIvm().Reference)((path: string[], ...args: unknown[]) => {
// Navigate to function, tracking parent to preserve `this` context
let fn: unknown = data;
let parent: unknown = undefined;

View file

@ -1,7 +1,8 @@
// Main exports
export { ExpressionEvaluator } from './evaluator/expression-evaluator';
// Bridge exports
// Bridge exports — IsolatedVmBridge lazy-loads isolated-vm internally,
// so this value re-export does NOT pull in the native binary at import time.
export { IsolatedVmBridge } from './bridge/isolated-vm-bridge';
// Types

View file

@ -1,13 +1,25 @@
import { readFile } from 'node:fs/promises';
import { randomUUID } from 'node:crypto';
import ivm from 'isolated-vm';
import type { IDataObject } from 'n8n-workflow';
import type IsolatedVM from 'isolated-vm';
// Lazy-loaded isolated-vm — avoids loading the native module at startup.
// The module is only loaded when combineBySql is actually used.
type IsolatedVm = typeof IsolatedVM;
let _ivm: IsolatedVm | null = null;
async function getIvm(): Promise<IsolatedVm> {
if (!_ivm) {
const mod = await import('isolated-vm');
_ivm = mod.default;
}
return _ivm!;
}
// Singleton recreated only after resetSandboxCache() (tests) or isolate disposal.
let sandboxIsolate: ivm.Isolate | null = null;
let sandboxContext: ivm.Context | null = null;
let sandboxIsolate: IsolatedVM.Isolate | null = null;
let sandboxContext: IsolatedVM.Context | null = null;
/** Disposes the cached isolate. Exposed for tests only. */
export function resetSandboxCache(): void {
@ -19,11 +31,13 @@ export function resetSandboxCache(): void {
}
/** Returns a cached isolated-vm context with alasql pre-loaded. Creates it on first call. */
export async function loadAlaSqlSandbox(): Promise<ivm.Context> {
export async function loadAlaSqlSandbox(): Promise<IsolatedVM.Context> {
if (sandboxContext && sandboxIsolate && !sandboxIsolate.isDisposed) {
return sandboxContext;
}
const ivm = await getIvm();
sandboxIsolate = new ivm.Isolate({ memoryLimit: 64 }); // 64 MB hard limit
sandboxContext = await sandboxIsolate.createContext();
@ -41,7 +55,7 @@ export async function loadAlaSqlSandbox(): Promise<ivm.Context> {
* Only JSON-serialisable values cross the isolate boundary.
*/
export async function runAlaSqlInSandbox(
context: ivm.Context,
context: IsolatedVM.Context,
tableData: unknown[][],
query: string,
): Promise<IDataObject[]> {