diff --git a/packages/core/src/services/shellExecutionService.ts b/packages/core/src/services/shellExecutionService.ts index 4fbee62e2f..4d165f96a9 100644 --- a/packages/core/src/services/shellExecutionService.ts +++ b/packages/core/src/services/shellExecutionService.ts @@ -1128,7 +1128,7 @@ export class ShellExecutionService { const sniffBuffer = Buffer.concat(sniffChunks); sniffedBytes = sniffBuffer.length; - if (isBinary(sniffBuffer)) { + if (isBinary(sniffBuffer, 512, true)) { isStreamingRawContent = false; binaryBytesReceived = sniffBuffer.length; const event: ShellOutputEvent = { type: 'binary_detected' }; diff --git a/packages/core/src/utils/textUtils.test.ts b/packages/core/src/utils/textUtils.test.ts index c1c572a170..d191ba9c6e 100644 --- a/packages/core/src/utils/textUtils.test.ts +++ b/packages/core/src/utils/textUtils.test.ts @@ -198,3 +198,89 @@ describe('safeTemplateReplace', () => { expect(safeTemplateReplace(tmpl, replacements)).toBe('Value: $&'); }); }); + +describe('isBinary', () => { + describe('non-PTY mode (default)', () => { + it('should return false for null/undefined input', () => { + expect(isBinary(null)).toBe(false); + expect(isBinary(undefined)).toBe(false); + }); + + it('should return false for a plain text buffer', () => { + expect(isBinary(Buffer.from('Hello, world!'))).toBe(false); + }); + + it('should return true for a buffer containing a single null byte', () => { + expect(isBinary(Buffer.from([0x48, 0x65, 0x00, 0x6c, 0x6f]))).toBe( + true, + ); + }); + + it('should return true for a purely binary buffer', () => { + expect(isBinary(Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x00]))).toBe( + true, + ); + }); + + it('should only check up to sampleSize bytes', () => { + // Null byte is at position 4, sampleSize is 4 — should not be detected + const buf = Buffer.from([0x41, 0x42, 0x43, 0x44, 0x00]); + expect(isBinary(buf, 4)).toBe(false); + }); + }); + + describe('PTY mode (isPtyOutput=true)', () => { + it('should return false for a buffer with ANSI escape sequences containing null bytes', () => { + // Simulate a PTY stream: ESC[0m (ANSI reset) + stray null + text + const buf = Buffer.from([ + 0x1b, 0x5b, 0x30, 0x6d, // ESC[0m + 0x00, // stray null byte + 0x48, 0x65, 0x6c, 0x6c, 0x6f, // "Hello" + ]); + expect(isBinary(buf, 512, true)).toBe(false); + }); + + it('should return false when buffer is entirely ANSI escape sequences', () => { + // ESC[0m ESC[1m + const buf = Buffer.from([ + 0x1b, 0x5b, 0x30, 0x6d, 0x1b, 0x5b, 0x31, 0x6d, + ]); + expect(isBinary(buf, 512, true)).toBe(false); + }); + + it('should return true for a buffer with >10% null bytes after ANSI stripping', () => { + // 10 bytes: 2 null + 8 text => 20% null => binary + const buf = Buffer.from([ + 0x00, 0x00, 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x21, 0x41, 0x42, + ]); + expect(isBinary(buf, 512, true)).toBe(true); + }); + + it('should return false for normal text with no null bytes in PTY mode', () => { + expect(isBinary(Buffer.from('Normal PTY output text'), 512, true)).toBe( + false, + ); + }); + + it('should return false for Windows PTY output with VT sequences and stray nulls', () => { + // Realistic Windows PTY: OSC title sequence + stray null + prompt text + const title = Buffer.from('cmd'); + const buf = Buffer.concat([ + Buffer.from([0x1b, 0x5d, 0x30, 0x3b]), // ESC ] 0 ; + title, + Buffer.from([0x07]), // BEL (end of OSC) + Buffer.from([0x00]), // stray null from PTY + Buffer.from('C:\\Users>'), // actual command output + ]); + expect(isBinary(buf, 512, true)).toBe(false); + }); + + it('should return false for an empty buffer in PTY mode', () => { + expect(isBinary(Buffer.from([]), 512, true)).toBe(false); + }); + + it('should return false for null input in PTY mode', () => { + expect(isBinary(null, 512, true)).toBe(false); + }); + }); +}); \ No newline at end of file diff --git a/packages/core/src/utils/textUtils.ts b/packages/core/src/utils/textUtils.ts index c5d62074a0..f7776d48bb 100644 --- a/packages/core/src/utils/textUtils.ts +++ b/packages/core/src/utils/textUtils.ts @@ -26,34 +26,123 @@ export function safeLiteralReplace( } /** - * Checks if a Buffer is likely binary by testing for the presence of a NULL byte. - * The presence of a NULL byte is a strong indicator that the data is not plain text. + * Checks if a Buffer is likely binary by testing for the presence of NULL bytes. + * The presence of NULL bytes is a strong indicator that the data is not plain text. + * + * When `isPtyOutput` is true, the check strips ANSI escape sequences first and + * uses a ratio-based threshold instead of failing on a single NULL byte. This + * prevents false positives caused by PTY control sequences on Windows, which + * can contain NULL bytes in ANSI/VT escape data. + * * @param data The Buffer to check. * @param sampleSize The number of bytes from the start of the buffer to test. - * @returns True if a NULL byte is found, false otherwise. + * @param isPtyOutput If true, apply PTY-aware heuristics to avoid false positives + * from ANSI control sequences (fixes Windows node-pty issue #25164). + * @returns True if the data appears to be binary, false otherwise. */ export function isBinary( data: Buffer | null | undefined, sampleSize = 512, + isPtyOutput = false, ): boolean { if (!data) { return false; } - const sample = data.length > sampleSize ? data.subarray(0, sampleSize) : data; + let sample: Buffer | Uint8Array = + data.length > sampleSize ? data.subarray(0, sampleSize) : data; + if (isPtyOutput) { + // Strip ANSI escape sequences before performing the binary check. + // PTY streams (especially on Windows) emit VT/ANSI control sequences + // that can contain null bytes, causing false positives. + sample = stripAnsiFromBuffer(sample); + + if (sample.length === 0) { + // If the entire sample was ANSI escape sequences, it's not binary. + return false; + } + + // Use a ratio-based threshold for PTY output: if more than 10% of the + // (non-ANSI) bytes are NULL, consider it binary. A stray null byte in + // a PTY stream should not trigger binary detection. + const NULL_BYTE_THRESHOLD = 0.1; + let nullCount = 0; + for (const byte of sample) { + if (byte === 0) { + nullCount++; + } + } + return nullCount / sample.length > NULL_BYTE_THRESHOLD; + } + + // Non-PTY path: original strict check — any single NULL byte means binary. for (const byte of sample) { - // The presence of a NULL byte (0x00) is one of the most reliable - // indicators of a binary file. Text files should not contain them. if (byte === 0) { return true; } } - // If no NULL bytes were found in the sample, we assume it's text. return false; } +/** + * Strips ANSI/VT escape sequences from a raw byte buffer. + * This handles CSI sequences (ESC [ ... final_byte), OSC sequences (ESC ] ... ST), + * and simple two-byte escape sequences (ESC + single char). + * + * @param buf The raw buffer to strip ANSI sequences from. + * @returns A new Buffer with ANSI escape sequences removed. + */ +function stripAnsiFromBuffer(buf: Buffer | Uint8Array): Buffer { + const ESC = 0x1b; + const result: number[] = []; + let i = 0; + + while (i < buf.length) { + if (buf[i] === ESC) { + i++; // skip ESC + if (i >= buf.length) break; + + if (buf[i] === 0x5b) { + // '[' — CSI sequence: ESC [ + i++; // skip '[' + // Skip parameter bytes (0x30–0x3F) and intermediate bytes (0x20–0x2F) + while (i < buf.length && buf[i]! >= 0x20 && buf[i]! <= 0x3f) { + i++; + } + // Skip the final byte (0x40–0x7E) + if (i < buf.length && buf[i]! >= 0x40 && buf[i]! <= 0x7e) { + i++; + } + } else if (buf[i] === 0x5d) { + // ']' — OSC sequence: ESC ] ... (ST or BEL) + i++; // skip ']' + while (i < buf.length) { + // ST = ESC '\' (0x1b 0x5c) or BEL (0x07) + if (buf[i] === 0x07) { + i++; + break; + } + if (buf[i] === ESC && i + 1 < buf.length && buf[i + 1] === 0x5c) { + i += 2; + break; + } + i++; + } + } else { + // Simple two-byte escape sequence (ESC + single char) + i++; + } + } else { + result.push(buf[i]!); + i++; + } + } + + return Buffer.from(result); +} + /** * Detects the line ending style of a string. * @param content The string content to analyze.