mirror of
https://github.com/google-gemini/gemini-cli
synced 2026-04-21 13:37:17 +00:00
Merge d395a0b18d into a38e2f0048
This commit is contained in:
commit
72a8214899
3 changed files with 183 additions and 8 deletions
|
|
@ -1128,7 +1128,7 @@ export class ShellExecutionService {
|
|||
const sniffBuffer = Buffer.concat(sniffChunks);
|
||||
sniffedBytes = sniffBuffer.length;
|
||||
|
||||
if (isBinary(sniffBuffer)) {
|
||||
if (isBinary(sniffBuffer, 512, true)) {
|
||||
isStreamingRawContent = false;
|
||||
binaryBytesReceived = sniffBuffer.length;
|
||||
const event: ShellOutputEvent = { type: 'binary_detected' };
|
||||
|
|
|
|||
|
|
@ -198,3 +198,89 @@ describe('safeTemplateReplace', () => {
|
|||
expect(safeTemplateReplace(tmpl, replacements)).toBe('Value: $&');
|
||||
});
|
||||
});
|
||||
|
||||
describe('isBinary', () => {
|
||||
describe('non-PTY mode (default)', () => {
|
||||
it('should return false for null/undefined input', () => {
|
||||
expect(isBinary(null)).toBe(false);
|
||||
expect(isBinary(undefined)).toBe(false);
|
||||
});
|
||||
|
||||
it('should return false for a plain text buffer', () => {
|
||||
expect(isBinary(Buffer.from('Hello, world!'))).toBe(false);
|
||||
});
|
||||
|
||||
it('should return true for a buffer containing a single null byte', () => {
|
||||
expect(isBinary(Buffer.from([0x48, 0x65, 0x00, 0x6c, 0x6f]))).toBe(
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
it('should return true for a purely binary buffer', () => {
|
||||
expect(isBinary(Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x00]))).toBe(
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
it('should only check up to sampleSize bytes', () => {
|
||||
// Null byte is at position 4, sampleSize is 4 — should not be detected
|
||||
const buf = Buffer.from([0x41, 0x42, 0x43, 0x44, 0x00]);
|
||||
expect(isBinary(buf, 4)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('PTY mode (isPtyOutput=true)', () => {
|
||||
it('should return false for a buffer with ANSI escape sequences containing null bytes', () => {
|
||||
// Simulate a PTY stream: ESC[0m (ANSI reset) + stray null + text
|
||||
const buf = Buffer.from([
|
||||
0x1b, 0x5b, 0x30, 0x6d, // ESC[0m
|
||||
0x00, // stray null byte
|
||||
0x48, 0x65, 0x6c, 0x6c, 0x6f, // "Hello"
|
||||
]);
|
||||
expect(isBinary(buf, 512, true)).toBe(false);
|
||||
});
|
||||
|
||||
it('should return false when buffer is entirely ANSI escape sequences', () => {
|
||||
// ESC[0m ESC[1m
|
||||
const buf = Buffer.from([
|
||||
0x1b, 0x5b, 0x30, 0x6d, 0x1b, 0x5b, 0x31, 0x6d,
|
||||
]);
|
||||
expect(isBinary(buf, 512, true)).toBe(false);
|
||||
});
|
||||
|
||||
it('should return true for a buffer with >10% null bytes after ANSI stripping', () => {
|
||||
// 10 bytes: 2 null + 8 text => 20% null => binary
|
||||
const buf = Buffer.from([
|
||||
0x00, 0x00, 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x21, 0x41, 0x42,
|
||||
]);
|
||||
expect(isBinary(buf, 512, true)).toBe(true);
|
||||
});
|
||||
|
||||
it('should return false for normal text with no null bytes in PTY mode', () => {
|
||||
expect(isBinary(Buffer.from('Normal PTY output text'), 512, true)).toBe(
|
||||
false,
|
||||
);
|
||||
});
|
||||
|
||||
it('should return false for Windows PTY output with VT sequences and stray nulls', () => {
|
||||
// Realistic Windows PTY: OSC title sequence + stray null + prompt text
|
||||
const title = Buffer.from('cmd');
|
||||
const buf = Buffer.concat([
|
||||
Buffer.from([0x1b, 0x5d, 0x30, 0x3b]), // ESC ] 0 ;
|
||||
title,
|
||||
Buffer.from([0x07]), // BEL (end of OSC)
|
||||
Buffer.from([0x00]), // stray null from PTY
|
||||
Buffer.from('C:\\Users>'), // actual command output
|
||||
]);
|
||||
expect(isBinary(buf, 512, true)).toBe(false);
|
||||
});
|
||||
|
||||
it('should return false for an empty buffer in PTY mode', () => {
|
||||
expect(isBinary(Buffer.from([]), 512, true)).toBe(false);
|
||||
});
|
||||
|
||||
it('should return false for null input in PTY mode', () => {
|
||||
expect(isBinary(null, 512, true)).toBe(false);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -26,34 +26,123 @@ export function safeLiteralReplace(
|
|||
}
|
||||
|
||||
/**
|
||||
* Checks if a Buffer is likely binary by testing for the presence of a NULL byte.
|
||||
* The presence of a NULL byte is a strong indicator that the data is not plain text.
|
||||
* Checks if a Buffer is likely binary by testing for the presence of NULL bytes.
|
||||
* The presence of NULL bytes is a strong indicator that the data is not plain text.
|
||||
*
|
||||
* When `isPtyOutput` is true, the check strips ANSI escape sequences first and
|
||||
* uses a ratio-based threshold instead of failing on a single NULL byte. This
|
||||
* prevents false positives caused by PTY control sequences on Windows, which
|
||||
* can contain NULL bytes in ANSI/VT escape data.
|
||||
*
|
||||
* @param data The Buffer to check.
|
||||
* @param sampleSize The number of bytes from the start of the buffer to test.
|
||||
* @returns True if a NULL byte is found, false otherwise.
|
||||
* @param isPtyOutput If true, apply PTY-aware heuristics to avoid false positives
|
||||
* from ANSI control sequences (fixes Windows node-pty issue #25164).
|
||||
* @returns True if the data appears to be binary, false otherwise.
|
||||
*/
|
||||
export function isBinary(
|
||||
data: Buffer | null | undefined,
|
||||
sampleSize = 512,
|
||||
isPtyOutput = false,
|
||||
): boolean {
|
||||
if (!data) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const sample = data.length > sampleSize ? data.subarray(0, sampleSize) : data;
|
||||
let sample: Buffer | Uint8Array =
|
||||
data.length > sampleSize ? data.subarray(0, sampleSize) : data;
|
||||
|
||||
if (isPtyOutput) {
|
||||
// Strip ANSI escape sequences before performing the binary check.
|
||||
// PTY streams (especially on Windows) emit VT/ANSI control sequences
|
||||
// that can contain null bytes, causing false positives.
|
||||
sample = stripAnsiFromBuffer(sample);
|
||||
|
||||
if (sample.length === 0) {
|
||||
// If the entire sample was ANSI escape sequences, it's not binary.
|
||||
return false;
|
||||
}
|
||||
|
||||
// Use a ratio-based threshold for PTY output: if more than 10% of the
|
||||
// (non-ANSI) bytes are NULL, consider it binary. A stray null byte in
|
||||
// a PTY stream should not trigger binary detection.
|
||||
const NULL_BYTE_THRESHOLD = 0.1;
|
||||
let nullCount = 0;
|
||||
for (const byte of sample) {
|
||||
if (byte === 0) {
|
||||
nullCount++;
|
||||
}
|
||||
}
|
||||
return nullCount / sample.length > NULL_BYTE_THRESHOLD;
|
||||
}
|
||||
|
||||
// Non-PTY path: original strict check — any single NULL byte means binary.
|
||||
for (const byte of sample) {
|
||||
// The presence of a NULL byte (0x00) is one of the most reliable
|
||||
// indicators of a binary file. Text files should not contain them.
|
||||
if (byte === 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// If no NULL bytes were found in the sample, we assume it's text.
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Strips ANSI/VT escape sequences from a raw byte buffer.
|
||||
* This handles CSI sequences (ESC [ ... final_byte), OSC sequences (ESC ] ... ST),
|
||||
* and simple two-byte escape sequences (ESC + single char).
|
||||
*
|
||||
* @param buf The raw buffer to strip ANSI sequences from.
|
||||
* @returns A new Buffer with ANSI escape sequences removed.
|
||||
*/
|
||||
function stripAnsiFromBuffer(buf: Buffer | Uint8Array): Buffer {
|
||||
const ESC = 0x1b;
|
||||
const result: number[] = [];
|
||||
let i = 0;
|
||||
|
||||
while (i < buf.length) {
|
||||
if (buf[i] === ESC) {
|
||||
i++; // skip ESC
|
||||
if (i >= buf.length) break;
|
||||
|
||||
if (buf[i] === 0x5b) {
|
||||
// '[' — CSI sequence: ESC [ <params> <final_byte>
|
||||
i++; // skip '['
|
||||
// Skip parameter bytes (0x30–0x3F) and intermediate bytes (0x20–0x2F)
|
||||
while (i < buf.length && buf[i]! >= 0x20 && buf[i]! <= 0x3f) {
|
||||
i++;
|
||||
}
|
||||
// Skip the final byte (0x40–0x7E)
|
||||
if (i < buf.length && buf[i]! >= 0x40 && buf[i]! <= 0x7e) {
|
||||
i++;
|
||||
}
|
||||
} else if (buf[i] === 0x5d) {
|
||||
// ']' — OSC sequence: ESC ] ... (ST or BEL)
|
||||
i++; // skip ']'
|
||||
while (i < buf.length) {
|
||||
// ST = ESC '\' (0x1b 0x5c) or BEL (0x07)
|
||||
if (buf[i] === 0x07) {
|
||||
i++;
|
||||
break;
|
||||
}
|
||||
if (buf[i] === ESC && i + 1 < buf.length && buf[i + 1] === 0x5c) {
|
||||
i += 2;
|
||||
break;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
} else {
|
||||
// Simple two-byte escape sequence (ESC + single char)
|
||||
i++;
|
||||
}
|
||||
} else {
|
||||
result.push(buf[i]!);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
return Buffer.from(result);
|
||||
}
|
||||
|
||||
/**
|
||||
* Detects the line ending style of a string.
|
||||
* @param content The string content to analyze.
|
||||
|
|
|
|||
Loading…
Reference in a new issue