mirror of
https://github.com/google-gemini/gemini-cli
synced 2026-04-21 13:37:17 +00:00
Merge 3f5b7323aa into 4b2091d402
This commit is contained in:
commit
63b938e1a0
2 changed files with 384 additions and 36 deletions
|
|
@ -10,6 +10,8 @@ import {
|
|||
getResponseText,
|
||||
flatMapTextParts,
|
||||
appendToLastTextPart,
|
||||
base64ByteSize,
|
||||
describeInlineData,
|
||||
} from './partUtils.js';
|
||||
import type { GenerateContentResponse, Part, PartUnion } from '@google/genai';
|
||||
|
||||
|
|
@ -86,7 +88,7 @@ describe('partUtils', () => {
|
|||
});
|
||||
|
||||
it('should return descriptive string for thought part', () => {
|
||||
const part = { thought: 'thinking' } as unknown as Part;
|
||||
const part: Part = { thought: true, text: 'thinking' };
|
||||
expect(partToString(part, verboseOptions)).toBe('[Thought: thinking]');
|
||||
});
|
||||
|
||||
|
|
@ -123,19 +125,7 @@ describe('partUtils', () => {
|
|||
|
||||
it('should return descriptive string for inlineData part', () => {
|
||||
const part = { inlineData: { mimeType: 'image/png', data: '' } } as Part;
|
||||
expect(partToString(part, verboseOptions)).toBe(
|
||||
'[Image: image/png, 0.0 KB]',
|
||||
);
|
||||
});
|
||||
|
||||
it('should show size for inlineData with non-empty base64 data', () => {
|
||||
// 4 base64 chars → ceil(4*3/4) = 3 bytes → 3/1024 ≈ 0.0 KB
|
||||
const part = {
|
||||
inlineData: { mimeType: 'audio/mp3', data: 'AAAA' },
|
||||
} as Part;
|
||||
expect(partToString(part, verboseOptions)).toBe(
|
||||
'[Audio: audio/mp3, 0.0 KB]',
|
||||
);
|
||||
expect(partToString(part, verboseOptions)).toBe('[Image: image/png]');
|
||||
});
|
||||
|
||||
it('should return an empty string for an unknown part type', () => {
|
||||
|
|
@ -154,7 +144,7 @@ describe('partUtils', () => {
|
|||
],
|
||||
];
|
||||
expect(partToString(parts as Part, verboseOptions)).toBe(
|
||||
'start middle[Function Call: func1] end[Audio: audio/mp3, 0.0 KB]',
|
||||
'start middle[Function Call: func1] end[Audio: audio/mp3]',
|
||||
);
|
||||
});
|
||||
});
|
||||
|
|
@ -272,6 +262,241 @@ describe('partUtils', () => {
|
|||
});
|
||||
});
|
||||
|
||||
describe('base64ByteSize', () => {
|
||||
it('should compute byte size for unpadded base64', () => {
|
||||
// 4 base64 chars = 3 bytes (no padding)
|
||||
expect(base64ByteSize('AAAA')).toBe(3);
|
||||
});
|
||||
|
||||
it('should account for single padding character', () => {
|
||||
// 4 base64 chars with "=" padding = 2 bytes
|
||||
expect(base64ByteSize('AAA=')).toBe(2);
|
||||
});
|
||||
|
||||
it('should account for double padding characters', () => {
|
||||
// 4 base64 chars with "==" padding = 1 byte
|
||||
expect(base64ByteSize('AA==')).toBe(1);
|
||||
});
|
||||
|
||||
it('should handle empty string', () => {
|
||||
expect(base64ByteSize('')).toBe(0);
|
||||
});
|
||||
|
||||
it('should compute correct size for larger data', () => {
|
||||
// 8 base64 chars = 6 bytes
|
||||
expect(base64ByteSize('AAAAAAAA')).toBe(6);
|
||||
// 12 base64 chars with '==' padding = floor(12*3/4) - 2 = 7 bytes
|
||||
expect(base64ByteSize('AAAAAAAAAA==')).toBe(7);
|
||||
});
|
||||
});
|
||||
|
||||
describe('describeInlineData', () => {
|
||||
// Helper: create a base64 string of approximately N raw bytes.
|
||||
// base64 encodes 3 bytes per 4 chars, so we need ceil(N/3)*4 chars.
|
||||
function makeBase64(rawBytes: number): string {
|
||||
const chars = Math.ceil(rawBytes / 3) * 4;
|
||||
return 'A'.repeat(chars);
|
||||
}
|
||||
|
||||
describe('audio descriptions', () => {
|
||||
it('should describe audio with MIME type only when data is empty', () => {
|
||||
expect(describeInlineData('audio/mp3', '')).toBe('[Audio: audio/mp3]');
|
||||
});
|
||||
|
||||
it('should describe audio with MIME type only when data is undefined', () => {
|
||||
expect(describeInlineData('audio/mp3', undefined)).toBe(
|
||||
'[Audio: audio/mp3]',
|
||||
);
|
||||
});
|
||||
|
||||
it('should include size and duration for mp3 audio', () => {
|
||||
// 16000 bytes at 16000 bytes/sec (128 kbps) = ~1.0s
|
||||
const data = makeBase64(16000);
|
||||
const result = describeInlineData('audio/mp3', data);
|
||||
expect(result).toMatch(/^\[Audio: audio\/mp3, [\d.]+ KB, ~[\d.]+s\]$/);
|
||||
});
|
||||
|
||||
it('should include size and duration for wav audio', () => {
|
||||
const data = makeBase64(176400);
|
||||
const result = describeInlineData('audio/wav', data);
|
||||
expect(result).toMatch(/^\[Audio: audio\/wav, [\d.]+ KB, ~[\d.]+s\]$/);
|
||||
});
|
||||
|
||||
it('should include size and duration for ogg audio', () => {
|
||||
const data = makeBase64(32000);
|
||||
const result = describeInlineData('audio/ogg', data);
|
||||
expect(result).toMatch(/^\[Audio: audio\/ogg, [\d.]+ KB, ~[\d.]+s\]$/);
|
||||
});
|
||||
|
||||
it('should include size and duration for opus audio', () => {
|
||||
const data = makeBase64(8000);
|
||||
const result = describeInlineData('audio/opus', data);
|
||||
expect(result).toMatch(/^\[Audio: audio\/opus, [\d.]+ KB, ~[\d.]+s\]$/);
|
||||
});
|
||||
|
||||
it('should include size and duration for webm audio', () => {
|
||||
const data = makeBase64(16000);
|
||||
const result = describeInlineData('audio/webm', data);
|
||||
expect(result).toMatch(/^\[Audio: audio\/webm, [\d.]+ KB, ~[\d.]+s\]$/);
|
||||
});
|
||||
|
||||
it('should include size and duration for aac audio', () => {
|
||||
const data = makeBase64(16000);
|
||||
const result = describeInlineData('audio/aac', data);
|
||||
expect(result).toMatch(/^\[Audio: audio\/aac, [\d.]+ KB, ~[\d.]+s\]$/);
|
||||
});
|
||||
|
||||
it('should include size and duration for flac audio', () => {
|
||||
const data = makeBase64(88200);
|
||||
const result = describeInlineData('audio/flac', data);
|
||||
expect(result).toMatch(/^\[Audio: audio\/flac, [\d.]+ KB, ~[\d.]+s\]$/);
|
||||
});
|
||||
|
||||
it('should include size and duration for mpeg audio', () => {
|
||||
const data = makeBase64(16000);
|
||||
const result = describeInlineData('audio/mpeg', data);
|
||||
expect(result).toMatch(/^\[Audio: audio\/mpeg, [\d.]+ KB, ~[\d.]+s\]$/);
|
||||
});
|
||||
|
||||
it('should show size but no duration for unknown audio codec', () => {
|
||||
const data = makeBase64(10000);
|
||||
const result = describeInlineData('audio/x-custom', data);
|
||||
expect(result).toMatch(/^\[Audio: audio\/x-custom, [\d.]+ KB\]$/);
|
||||
expect(result).not.toContain('~');
|
||||
});
|
||||
|
||||
it('should format duration as minutes and seconds for long audio', () => {
|
||||
// 120 seconds of mp3: 120 * 16000 = 1,920,000 bytes
|
||||
const data = makeBase64(1920000);
|
||||
const result = describeInlineData('audio/mp3', data);
|
||||
expect(result).toMatch(/\d+m \d+s/);
|
||||
});
|
||||
|
||||
it('should not produce "60s" in duration from rounding edge cases', () => {
|
||||
// 119.9 seconds of mp3: 119.9 * 16000 = 1,918,400 bytes
|
||||
// Without rounding total first, this could produce "1m 60s"
|
||||
const data = makeBase64(1918400);
|
||||
const result = describeInlineData('audio/mp3', data);
|
||||
expect(result).not.toContain('60s');
|
||||
expect(result).toMatch(/\d+m \d+s/);
|
||||
});
|
||||
});
|
||||
|
||||
describe('video descriptions', () => {
|
||||
it('should describe video with MIME type only when data is empty', () => {
|
||||
expect(describeInlineData('video/mp4', '')).toBe('[Video: video/mp4]');
|
||||
});
|
||||
|
||||
it('should include size and duration for mp4 video', () => {
|
||||
const data = makeBase64(375000);
|
||||
const result = describeInlineData('video/mp4', data);
|
||||
expect(result).toMatch(/^\[Video: video\/mp4, [\d.]+ KB, ~[\d.]+s\]$/);
|
||||
});
|
||||
|
||||
it('should include size and duration for webm video', () => {
|
||||
const data = makeBase64(312500);
|
||||
const result = describeInlineData('video/webm', data);
|
||||
expect(result).toMatch(/^\[Video: video\/webm, [\d.]+ KB, ~[\d.]+s\]$/);
|
||||
});
|
||||
|
||||
it('should include size and duration for quicktime video', () => {
|
||||
const data = makeBase64(375000);
|
||||
const result = describeInlineData('video/quicktime', data);
|
||||
expect(result).toMatch(
|
||||
/^\[Video: video\/quicktime, [\d.]+ KB, ~[\d.]+s\]$/,
|
||||
);
|
||||
});
|
||||
|
||||
it('should show size but no duration for unknown video codec', () => {
|
||||
const data = makeBase64(50000);
|
||||
const result = describeInlineData('video/x-matroska', data);
|
||||
expect(result).toMatch(/^\[Video: video\/x-matroska, [\d.]+ KB\]$/);
|
||||
expect(result).not.toContain('~');
|
||||
});
|
||||
|
||||
it('should format large video size in MB', () => {
|
||||
// 5 MB video
|
||||
const data = makeBase64(5 * 1024 * 1024);
|
||||
const result = describeInlineData('video/mp4', data);
|
||||
expect(result).toContain('MB');
|
||||
});
|
||||
});
|
||||
|
||||
describe('image descriptions', () => {
|
||||
it('should describe image with MIME type only when data is empty', () => {
|
||||
expect(describeInlineData('image/png', '')).toBe('[Image: image/png]');
|
||||
});
|
||||
|
||||
it('should include size for image with data', () => {
|
||||
const data = makeBase64(50000);
|
||||
const result = describeInlineData('image/png', data);
|
||||
expect(result).toMatch(/^\[Image: image\/png, [\d.]+ KB\]$/);
|
||||
});
|
||||
|
||||
it('should not include duration estimate for images', () => {
|
||||
const data = makeBase64(50000);
|
||||
const result = describeInlineData('image/jpeg', data);
|
||||
expect(result).not.toContain('~');
|
||||
});
|
||||
});
|
||||
|
||||
describe('PDF descriptions', () => {
|
||||
it('should describe PDF without MIME type label', () => {
|
||||
expect(describeInlineData('application/pdf', '')).toBe('[PDF]');
|
||||
});
|
||||
|
||||
it('should include size for PDF with data', () => {
|
||||
const data = makeBase64(100000);
|
||||
const result = describeInlineData('application/pdf', data);
|
||||
expect(result).toMatch(/^\[PDF, [\d.]+ KB\]$/);
|
||||
});
|
||||
});
|
||||
|
||||
describe('other/unknown types', () => {
|
||||
it('should describe unknown MIME type with Data label', () => {
|
||||
expect(describeInlineData('application/octet-stream', '')).toBe(
|
||||
'[Data: application/octet-stream]',
|
||||
);
|
||||
});
|
||||
|
||||
it('should handle undefined MIME type', () => {
|
||||
expect(describeInlineData(undefined, '')).toBe('[Data: unknown]');
|
||||
});
|
||||
|
||||
it('should handle both undefined MIME type and data', () => {
|
||||
expect(describeInlineData(undefined, undefined)).toBe(
|
||||
'[Data: unknown]',
|
||||
);
|
||||
});
|
||||
|
||||
it('should include size for unknown type with data', () => {
|
||||
const data = makeBase64(512);
|
||||
const result = describeInlineData('application/octet-stream', data);
|
||||
expect(result).toMatch(/^\[Data: application\/octet-stream, \d+ B\]$/);
|
||||
});
|
||||
});
|
||||
|
||||
describe('size formatting', () => {
|
||||
it('should format small sizes in bytes', () => {
|
||||
const data = makeBase64(500);
|
||||
const result = describeInlineData('application/octet-stream', data);
|
||||
expect(result).toMatch(/\d+ B/);
|
||||
});
|
||||
|
||||
it('should format medium sizes in KB', () => {
|
||||
const data = makeBase64(50000);
|
||||
const result = describeInlineData('application/octet-stream', data);
|
||||
expect(result).toMatch(/[\d.]+ KB/);
|
||||
});
|
||||
|
||||
it('should format large sizes in MB', () => {
|
||||
const data = makeBase64(2 * 1024 * 1024);
|
||||
const result = describeInlineData('application/octet-stream', data);
|
||||
expect(result).toMatch(/[\d.]+ MB/);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('appendToLastTextPart', () => {
|
||||
it('should append to an empty prompt', () => {
|
||||
const prompt: PartUnion[] = [];
|
||||
|
|
|
|||
|
|
@ -11,6 +11,144 @@ import type {
|
|||
PartUnion,
|
||||
} from '@google/genai';
|
||||
|
||||
// Bytes per second for common audio codecs (used for duration estimation).
|
||||
// These are conservative averages; actual bitrates vary with encoding settings.
|
||||
const AUDIO_BYTES_PER_SECOND: Record<string, number> = {
|
||||
'audio/mp3': 16000, // ~128 kbps
|
||||
'audio/mpeg': 16000, // ~128 kbps
|
||||
'audio/wav': 176400, // 44.1 kHz, 16-bit, stereo (uncompressed)
|
||||
'audio/ogg': 16000, // ~128 kbps Vorbis
|
||||
'audio/aac': 16000, // ~128 kbps
|
||||
'audio/flac': 88200, // ~50% of WAV (lossless)
|
||||
'audio/webm': 16000, // ~128 kbps Opus
|
||||
'audio/opus': 8000, // ~64 kbps (Opus is very efficient)
|
||||
};
|
||||
|
||||
// Average bytes per second for common video containers.
|
||||
// Assumes typical web video bitrates (~2-4 Mbps video + audio).
|
||||
const VIDEO_BYTES_PER_SECOND: Record<string, number> = {
|
||||
'video/mp4': 375000, // ~3 Mbps
|
||||
'video/webm': 312500, // ~2.5 Mbps
|
||||
'video/ogg': 312500, // ~2.5 Mbps
|
||||
'video/quicktime': 375000, // ~3 Mbps
|
||||
'video/x-msvideo': 375000, // ~3 Mbps (AVI)
|
||||
};
|
||||
|
||||
/**
|
||||
* Computes the raw byte size from a base64-encoded string.
|
||||
* Accounts for padding characters ('=') that don't contribute to data.
|
||||
*/
|
||||
export function base64ByteSize(base64: string): number {
|
||||
let padding = 0;
|
||||
if (base64.endsWith('==')) {
|
||||
padding = 2;
|
||||
} else if (base64.endsWith('=')) {
|
||||
padding = 1;
|
||||
}
|
||||
return Math.floor((base64.length * 3) / 4) - padding;
|
||||
}
|
||||
|
||||
/**
|
||||
* Formats a byte count as a human-readable string (e.g., "1.5 KB", "3.2 MB").
|
||||
*/
|
||||
function formatBytes(bytes: number): string {
|
||||
if (bytes < 1024) {
|
||||
return `${bytes} B`;
|
||||
}
|
||||
if (bytes < 1024 * 1024) {
|
||||
return `${(bytes / 1024).toFixed(1)} KB`;
|
||||
}
|
||||
return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Formats a duration in seconds as a human-readable string.
|
||||
* Uses "Xm Ys" for durations >= 60s, otherwise "Xs".
|
||||
*/
|
||||
function formatDuration(seconds: number): string {
|
||||
if (seconds < 60) {
|
||||
return `${seconds.toFixed(1)}s`;
|
||||
}
|
||||
const totalSeconds = Math.round(seconds);
|
||||
const minutes = Math.floor(totalSeconds / 60);
|
||||
const remaining = totalSeconds % 60;
|
||||
return `${minutes}m ${remaining}s`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Classifies a MIME type into a media category.
|
||||
*/
|
||||
function classifyMimeType(
|
||||
mimeType: string,
|
||||
): 'audio' | 'video' | 'image' | 'pdf' | 'other' {
|
||||
if (mimeType.startsWith('audio/')) return 'audio';
|
||||
if (mimeType.startsWith('video/')) return 'video';
|
||||
if (mimeType.startsWith('image/')) return 'image';
|
||||
if (mimeType === 'application/pdf') return 'pdf';
|
||||
return 'other';
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds a verbose description for an inlineData part.
|
||||
*
|
||||
* For audio: `[Audio: audio/mp3, 45.2 KB, ~3.0s]`
|
||||
* For video: `[Video: video/mp4, 1.2 MB, ~3.2s]`
|
||||
* For images: `[Image: image/png, 120.5 KB]`
|
||||
* For PDFs: `[PDF: 2.3 MB]`
|
||||
* For other: `[Data: application/octet-stream, 512 B]`
|
||||
*
|
||||
* Falls back gracefully when data or mimeType is missing.
|
||||
*/
|
||||
export function describeInlineData(
|
||||
mimeType: string | undefined,
|
||||
data: string | undefined,
|
||||
): string {
|
||||
const effectiveMime = mimeType ?? 'unknown';
|
||||
const category = classifyMimeType(effectiveMime);
|
||||
|
||||
const parts: string[] = [];
|
||||
|
||||
// Label based on category
|
||||
switch (category) {
|
||||
case 'audio':
|
||||
parts.push(`Audio: ${effectiveMime}`);
|
||||
break;
|
||||
case 'video':
|
||||
parts.push(`Video: ${effectiveMime}`);
|
||||
break;
|
||||
case 'image':
|
||||
parts.push(`Image: ${effectiveMime}`);
|
||||
break;
|
||||
case 'pdf':
|
||||
parts.push('PDF');
|
||||
break;
|
||||
default:
|
||||
parts.push(`Data: ${effectiveMime}`);
|
||||
break;
|
||||
}
|
||||
|
||||
// Size info from base64 data
|
||||
if (data && data.length > 0) {
|
||||
const byteSize = base64ByteSize(data);
|
||||
parts.push(formatBytes(byteSize));
|
||||
|
||||
// Duration estimate for audio/video
|
||||
if (category === 'audio') {
|
||||
const bytesPerSec = AUDIO_BYTES_PER_SECOND[effectiveMime];
|
||||
if (bytesPerSec !== undefined && byteSize > 0) {
|
||||
parts.push(`~${formatDuration(byteSize / bytesPerSec)}`);
|
||||
}
|
||||
} else if (category === 'video') {
|
||||
const bytesPerSec = VIDEO_BYTES_PER_SECOND[effectiveMime];
|
||||
if (bytesPerSec !== undefined && byteSize > 0) {
|
||||
parts.push(`~${formatDuration(byteSize / bytesPerSec)}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return `[${parts.join(', ')}]`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a PartListUnion into a string.
|
||||
* If verbose is true, includes summary representations of non-text parts.
|
||||
|
|
@ -29,21 +167,17 @@ export function partToString(
|
|||
return value.map((part) => partToString(part, options)).join('');
|
||||
}
|
||||
|
||||
// Cast to Part, assuming it might contain project-specific fields
|
||||
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
|
||||
const part = value as Part & {
|
||||
videoMetadata?: unknown;
|
||||
thought?: string;
|
||||
codeExecutionResult?: unknown;
|
||||
executableCode?: unknown;
|
||||
};
|
||||
// After ruling out string and array, value is narrowed to Part.
|
||||
// All checked fields (videoMetadata, thought, codeExecutionResult,
|
||||
// executableCode) are declared on the Part interface from @google/genai.
|
||||
const part: Part = value;
|
||||
|
||||
if (options?.verbose) {
|
||||
if (part.videoMetadata !== undefined) {
|
||||
return `[Video Metadata]`;
|
||||
}
|
||||
if (part.thought !== undefined) {
|
||||
return `[Thought: ${part.thought}]`;
|
||||
return `[Thought: ${part.text ?? ''}]`;
|
||||
}
|
||||
if (part.codeExecutionResult !== undefined) {
|
||||
return `[Code Execution Result]`;
|
||||
|
|
@ -63,18 +197,7 @@ export function partToString(
|
|||
return `[Function Response: ${part.functionResponse.name}]`;
|
||||
}
|
||||
if (part.inlineData !== undefined) {
|
||||
const mimeType = part.inlineData.mimeType ?? 'unknown';
|
||||
const data = part.inlineData.data ?? '';
|
||||
const bytes = Math.ceil((data.length * 3) / 4);
|
||||
const kb = (bytes / 1024).toFixed(1);
|
||||
const category = mimeType.startsWith('audio/')
|
||||
? 'Audio'
|
||||
: mimeType.startsWith('video/')
|
||||
? 'Video'
|
||||
: mimeType.startsWith('image/')
|
||||
? 'Image'
|
||||
: 'Media';
|
||||
return `[${category}: ${mimeType}, ${kb} KB]`;
|
||||
return describeInlineData(part.inlineData.mimeType, part.inlineData.data);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue