This commit is contained in:
Khairul Hasan 2026-04-21 09:37:47 +00:00 committed by GitHub
commit 72a8214899
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 183 additions and 8 deletions

View file

@ -1128,7 +1128,7 @@ export class ShellExecutionService {
const sniffBuffer = Buffer.concat(sniffChunks);
sniffedBytes = sniffBuffer.length;
if (isBinary(sniffBuffer)) {
if (isBinary(sniffBuffer, 512, true)) {
isStreamingRawContent = false;
binaryBytesReceived = sniffBuffer.length;
const event: ShellOutputEvent = { type: 'binary_detected' };

View file

@ -198,3 +198,89 @@ describe('safeTemplateReplace', () => {
expect(safeTemplateReplace(tmpl, replacements)).toBe('Value: $&');
});
});
describe('isBinary', () => {
describe('non-PTY mode (default)', () => {
it('should return false for null/undefined input', () => {
expect(isBinary(null)).toBe(false);
expect(isBinary(undefined)).toBe(false);
});
it('should return false for a plain text buffer', () => {
expect(isBinary(Buffer.from('Hello, world!'))).toBe(false);
});
it('should return true for a buffer containing a single null byte', () => {
expect(isBinary(Buffer.from([0x48, 0x65, 0x00, 0x6c, 0x6f]))).toBe(
true,
);
});
it('should return true for a purely binary buffer', () => {
expect(isBinary(Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x00]))).toBe(
true,
);
});
it('should only check up to sampleSize bytes', () => {
// Null byte is at position 4, sampleSize is 4 — should not be detected
const buf = Buffer.from([0x41, 0x42, 0x43, 0x44, 0x00]);
expect(isBinary(buf, 4)).toBe(false);
});
});
describe('PTY mode (isPtyOutput=true)', () => {
it('should return false for a buffer with ANSI escape sequences containing null bytes', () => {
// Simulate a PTY stream: ESC[0m (ANSI reset) + stray null + text
const buf = Buffer.from([
0x1b, 0x5b, 0x30, 0x6d, // ESC[0m
0x00, // stray null byte
0x48, 0x65, 0x6c, 0x6c, 0x6f, // "Hello"
]);
expect(isBinary(buf, 512, true)).toBe(false);
});
it('should return false when buffer is entirely ANSI escape sequences', () => {
// ESC[0m ESC[1m
const buf = Buffer.from([
0x1b, 0x5b, 0x30, 0x6d, 0x1b, 0x5b, 0x31, 0x6d,
]);
expect(isBinary(buf, 512, true)).toBe(false);
});
it('should return true for a buffer with >10% null bytes after ANSI stripping', () => {
// 10 bytes: 2 null + 8 text => 20% null => binary
const buf = Buffer.from([
0x00, 0x00, 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x21, 0x41, 0x42,
]);
expect(isBinary(buf, 512, true)).toBe(true);
});
it('should return false for normal text with no null bytes in PTY mode', () => {
expect(isBinary(Buffer.from('Normal PTY output text'), 512, true)).toBe(
false,
);
});
it('should return false for Windows PTY output with VT sequences and stray nulls', () => {
// Realistic Windows PTY: OSC title sequence + stray null + prompt text
const title = Buffer.from('cmd');
const buf = Buffer.concat([
Buffer.from([0x1b, 0x5d, 0x30, 0x3b]), // ESC ] 0 ;
title,
Buffer.from([0x07]), // BEL (end of OSC)
Buffer.from([0x00]), // stray null from PTY
Buffer.from('C:\\Users>'), // actual command output
]);
expect(isBinary(buf, 512, true)).toBe(false);
});
it('should return false for an empty buffer in PTY mode', () => {
expect(isBinary(Buffer.from([]), 512, true)).toBe(false);
});
it('should return false for null input in PTY mode', () => {
expect(isBinary(null, 512, true)).toBe(false);
});
});
});

View file

@ -26,34 +26,123 @@ export function safeLiteralReplace(
}
/**
* Checks if a Buffer is likely binary by testing for the presence of a NULL byte.
* The presence of a NULL byte is a strong indicator that the data is not plain text.
* Checks if a Buffer is likely binary by testing for the presence of NULL bytes.
* The presence of NULL bytes is a strong indicator that the data is not plain text.
*
* When `isPtyOutput` is true, the check strips ANSI escape sequences first and
* uses a ratio-based threshold instead of failing on a single NULL byte. This
* prevents false positives caused by PTY control sequences on Windows, which
* can contain NULL bytes in ANSI/VT escape data.
*
* @param data The Buffer to check.
* @param sampleSize The number of bytes from the start of the buffer to test.
* @returns True if a NULL byte is found, false otherwise.
* @param isPtyOutput If true, apply PTY-aware heuristics to avoid false positives
* from ANSI control sequences (fixes Windows node-pty issue #25164).
* @returns True if the data appears to be binary, false otherwise.
*/
export function isBinary(
data: Buffer | null | undefined,
sampleSize = 512,
isPtyOutput = false,
): boolean {
if (!data) {
return false;
}
const sample = data.length > sampleSize ? data.subarray(0, sampleSize) : data;
let sample: Buffer | Uint8Array =
data.length > sampleSize ? data.subarray(0, sampleSize) : data;
if (isPtyOutput) {
// Strip ANSI escape sequences before performing the binary check.
// PTY streams (especially on Windows) emit VT/ANSI control sequences
// that can contain null bytes, causing false positives.
sample = stripAnsiFromBuffer(sample);
if (sample.length === 0) {
// If the entire sample was ANSI escape sequences, it's not binary.
return false;
}
// Use a ratio-based threshold for PTY output: if more than 10% of the
// (non-ANSI) bytes are NULL, consider it binary. A stray null byte in
// a PTY stream should not trigger binary detection.
const NULL_BYTE_THRESHOLD = 0.1;
let nullCount = 0;
for (const byte of sample) {
if (byte === 0) {
nullCount++;
}
}
return nullCount / sample.length > NULL_BYTE_THRESHOLD;
}
// Non-PTY path: original strict check — any single NULL byte means binary.
for (const byte of sample) {
// The presence of a NULL byte (0x00) is one of the most reliable
// indicators of a binary file. Text files should not contain them.
if (byte === 0) {
return true;
}
}
// If no NULL bytes were found in the sample, we assume it's text.
return false;
}
/**
* Strips ANSI/VT escape sequences from a raw byte buffer.
* This handles CSI sequences (ESC [ ... final_byte), OSC sequences (ESC ] ... ST),
* and simple two-byte escape sequences (ESC + single char).
*
* @param buf The raw buffer to strip ANSI sequences from.
* @returns A new Buffer with ANSI escape sequences removed.
*/
function stripAnsiFromBuffer(buf: Buffer | Uint8Array): Buffer {
const ESC = 0x1b;
const result: number[] = [];
let i = 0;
while (i < buf.length) {
if (buf[i] === ESC) {
i++; // skip ESC
if (i >= buf.length) break;
if (buf[i] === 0x5b) {
// '[' — CSI sequence: ESC [ <params> <final_byte>
i++; // skip '['
// Skip parameter bytes (0x300x3F) and intermediate bytes (0x200x2F)
while (i < buf.length && buf[i]! >= 0x20 && buf[i]! <= 0x3f) {
i++;
}
// Skip the final byte (0x400x7E)
if (i < buf.length && buf[i]! >= 0x40 && buf[i]! <= 0x7e) {
i++;
}
} else if (buf[i] === 0x5d) {
// ']' — OSC sequence: ESC ] ... (ST or BEL)
i++; // skip ']'
while (i < buf.length) {
// ST = ESC '\' (0x1b 0x5c) or BEL (0x07)
if (buf[i] === 0x07) {
i++;
break;
}
if (buf[i] === ESC && i + 1 < buf.length && buf[i + 1] === 0x5c) {
i += 2;
break;
}
i++;
}
} else {
// Simple two-byte escape sequence (ESC + single char)
i++;
}
} else {
result.push(buf[i]!);
i++;
}
}
return Buffer.from(result);
}
/**
* Detects the line ending style of a string.
* @param content The string content to analyze.