This commit is contained in:
Himanshu Kumar 2026-04-20 16:05:29 -04:00 committed by GitHub
commit 63b938e1a0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 384 additions and 36 deletions

View file

@ -10,6 +10,8 @@ import {
getResponseText,
flatMapTextParts,
appendToLastTextPart,
base64ByteSize,
describeInlineData,
} from './partUtils.js';
import type { GenerateContentResponse, Part, PartUnion } from '@google/genai';
@ -86,7 +88,7 @@ describe('partUtils', () => {
});
it('should return descriptive string for thought part', () => {
const part = { thought: 'thinking' } as unknown as Part;
const part: Part = { thought: true, text: 'thinking' };
expect(partToString(part, verboseOptions)).toBe('[Thought: thinking]');
});
@ -123,19 +125,7 @@ describe('partUtils', () => {
it('should return descriptive string for inlineData part', () => {
const part = { inlineData: { mimeType: 'image/png', data: '' } } as Part;
expect(partToString(part, verboseOptions)).toBe(
'[Image: image/png, 0.0 KB]',
);
});
it('should show size for inlineData with non-empty base64 data', () => {
// 4 base64 chars → ceil(4*3/4) = 3 bytes → 3/1024 ≈ 0.0 KB
const part = {
inlineData: { mimeType: 'audio/mp3', data: 'AAAA' },
} as Part;
expect(partToString(part, verboseOptions)).toBe(
'[Audio: audio/mp3, 0.0 KB]',
);
expect(partToString(part, verboseOptions)).toBe('[Image: image/png]');
});
it('should return an empty string for an unknown part type', () => {
@ -154,7 +144,7 @@ describe('partUtils', () => {
],
];
expect(partToString(parts as Part, verboseOptions)).toBe(
'start middle[Function Call: func1] end[Audio: audio/mp3, 0.0 KB]',
'start middle[Function Call: func1] end[Audio: audio/mp3]',
);
});
});
@ -272,6 +262,241 @@ describe('partUtils', () => {
});
});
describe('base64ByteSize', () => {
it('should compute byte size for unpadded base64', () => {
// 4 base64 chars = 3 bytes (no padding)
expect(base64ByteSize('AAAA')).toBe(3);
});
it('should account for single padding character', () => {
// 4 base64 chars with "=" padding = 2 bytes
expect(base64ByteSize('AAA=')).toBe(2);
});
it('should account for double padding characters', () => {
// 4 base64 chars with "==" padding = 1 byte
expect(base64ByteSize('AA==')).toBe(1);
});
it('should handle empty string', () => {
expect(base64ByteSize('')).toBe(0);
});
it('should compute correct size for larger data', () => {
// 8 base64 chars = 6 bytes
expect(base64ByteSize('AAAAAAAA')).toBe(6);
// 12 base64 chars with '==' padding = floor(12*3/4) - 2 = 7 bytes
expect(base64ByteSize('AAAAAAAAAA==')).toBe(7);
});
});
describe('describeInlineData', () => {
// Helper: create a base64 string of approximately N raw bytes.
// base64 encodes 3 bytes per 4 chars, so we need ceil(N/3)*4 chars.
function makeBase64(rawBytes: number): string {
const chars = Math.ceil(rawBytes / 3) * 4;
return 'A'.repeat(chars);
}
describe('audio descriptions', () => {
it('should describe audio with MIME type only when data is empty', () => {
expect(describeInlineData('audio/mp3', '')).toBe('[Audio: audio/mp3]');
});
it('should describe audio with MIME type only when data is undefined', () => {
expect(describeInlineData('audio/mp3', undefined)).toBe(
'[Audio: audio/mp3]',
);
});
it('should include size and duration for mp3 audio', () => {
// 16000 bytes at 16000 bytes/sec (128 kbps) = ~1.0s
const data = makeBase64(16000);
const result = describeInlineData('audio/mp3', data);
expect(result).toMatch(/^\[Audio: audio\/mp3, [\d.]+ KB, ~[\d.]+s\]$/);
});
it('should include size and duration for wav audio', () => {
const data = makeBase64(176400);
const result = describeInlineData('audio/wav', data);
expect(result).toMatch(/^\[Audio: audio\/wav, [\d.]+ KB, ~[\d.]+s\]$/);
});
it('should include size and duration for ogg audio', () => {
const data = makeBase64(32000);
const result = describeInlineData('audio/ogg', data);
expect(result).toMatch(/^\[Audio: audio\/ogg, [\d.]+ KB, ~[\d.]+s\]$/);
});
it('should include size and duration for opus audio', () => {
const data = makeBase64(8000);
const result = describeInlineData('audio/opus', data);
expect(result).toMatch(/^\[Audio: audio\/opus, [\d.]+ KB, ~[\d.]+s\]$/);
});
it('should include size and duration for webm audio', () => {
const data = makeBase64(16000);
const result = describeInlineData('audio/webm', data);
expect(result).toMatch(/^\[Audio: audio\/webm, [\d.]+ KB, ~[\d.]+s\]$/);
});
it('should include size and duration for aac audio', () => {
const data = makeBase64(16000);
const result = describeInlineData('audio/aac', data);
expect(result).toMatch(/^\[Audio: audio\/aac, [\d.]+ KB, ~[\d.]+s\]$/);
});
it('should include size and duration for flac audio', () => {
const data = makeBase64(88200);
const result = describeInlineData('audio/flac', data);
expect(result).toMatch(/^\[Audio: audio\/flac, [\d.]+ KB, ~[\d.]+s\]$/);
});
it('should include size and duration for mpeg audio', () => {
const data = makeBase64(16000);
const result = describeInlineData('audio/mpeg', data);
expect(result).toMatch(/^\[Audio: audio\/mpeg, [\d.]+ KB, ~[\d.]+s\]$/);
});
it('should show size but no duration for unknown audio codec', () => {
const data = makeBase64(10000);
const result = describeInlineData('audio/x-custom', data);
expect(result).toMatch(/^\[Audio: audio\/x-custom, [\d.]+ KB\]$/);
expect(result).not.toContain('~');
});
it('should format duration as minutes and seconds for long audio', () => {
// 120 seconds of mp3: 120 * 16000 = 1,920,000 bytes
const data = makeBase64(1920000);
const result = describeInlineData('audio/mp3', data);
expect(result).toMatch(/\d+m \d+s/);
});
it('should not produce "60s" in duration from rounding edge cases', () => {
// 119.9 seconds of mp3: 119.9 * 16000 = 1,918,400 bytes
// Without rounding total first, this could produce "1m 60s"
const data = makeBase64(1918400);
const result = describeInlineData('audio/mp3', data);
expect(result).not.toContain('60s');
expect(result).toMatch(/\d+m \d+s/);
});
});
describe('video descriptions', () => {
it('should describe video with MIME type only when data is empty', () => {
expect(describeInlineData('video/mp4', '')).toBe('[Video: video/mp4]');
});
it('should include size and duration for mp4 video', () => {
const data = makeBase64(375000);
const result = describeInlineData('video/mp4', data);
expect(result).toMatch(/^\[Video: video\/mp4, [\d.]+ KB, ~[\d.]+s\]$/);
});
it('should include size and duration for webm video', () => {
const data = makeBase64(312500);
const result = describeInlineData('video/webm', data);
expect(result).toMatch(/^\[Video: video\/webm, [\d.]+ KB, ~[\d.]+s\]$/);
});
it('should include size and duration for quicktime video', () => {
const data = makeBase64(375000);
const result = describeInlineData('video/quicktime', data);
expect(result).toMatch(
/^\[Video: video\/quicktime, [\d.]+ KB, ~[\d.]+s\]$/,
);
});
it('should show size but no duration for unknown video codec', () => {
const data = makeBase64(50000);
const result = describeInlineData('video/x-matroska', data);
expect(result).toMatch(/^\[Video: video\/x-matroska, [\d.]+ KB\]$/);
expect(result).not.toContain('~');
});
it('should format large video size in MB', () => {
// 5 MB video
const data = makeBase64(5 * 1024 * 1024);
const result = describeInlineData('video/mp4', data);
expect(result).toContain('MB');
});
});
describe('image descriptions', () => {
it('should describe image with MIME type only when data is empty', () => {
expect(describeInlineData('image/png', '')).toBe('[Image: image/png]');
});
it('should include size for image with data', () => {
const data = makeBase64(50000);
const result = describeInlineData('image/png', data);
expect(result).toMatch(/^\[Image: image\/png, [\d.]+ KB\]$/);
});
it('should not include duration estimate for images', () => {
const data = makeBase64(50000);
const result = describeInlineData('image/jpeg', data);
expect(result).not.toContain('~');
});
});
describe('PDF descriptions', () => {
it('should describe PDF without MIME type label', () => {
expect(describeInlineData('application/pdf', '')).toBe('[PDF]');
});
it('should include size for PDF with data', () => {
const data = makeBase64(100000);
const result = describeInlineData('application/pdf', data);
expect(result).toMatch(/^\[PDF, [\d.]+ KB\]$/);
});
});
describe('other/unknown types', () => {
it('should describe unknown MIME type with Data label', () => {
expect(describeInlineData('application/octet-stream', '')).toBe(
'[Data: application/octet-stream]',
);
});
it('should handle undefined MIME type', () => {
expect(describeInlineData(undefined, '')).toBe('[Data: unknown]');
});
it('should handle both undefined MIME type and data', () => {
expect(describeInlineData(undefined, undefined)).toBe(
'[Data: unknown]',
);
});
it('should include size for unknown type with data', () => {
const data = makeBase64(512);
const result = describeInlineData('application/octet-stream', data);
expect(result).toMatch(/^\[Data: application\/octet-stream, \d+ B\]$/);
});
});
describe('size formatting', () => {
it('should format small sizes in bytes', () => {
const data = makeBase64(500);
const result = describeInlineData('application/octet-stream', data);
expect(result).toMatch(/\d+ B/);
});
it('should format medium sizes in KB', () => {
const data = makeBase64(50000);
const result = describeInlineData('application/octet-stream', data);
expect(result).toMatch(/[\d.]+ KB/);
});
it('should format large sizes in MB', () => {
const data = makeBase64(2 * 1024 * 1024);
const result = describeInlineData('application/octet-stream', data);
expect(result).toMatch(/[\d.]+ MB/);
});
});
});
describe('appendToLastTextPart', () => {
it('should append to an empty prompt', () => {
const prompt: PartUnion[] = [];

View file

@ -11,6 +11,144 @@ import type {
PartUnion,
} from '@google/genai';
// Bytes per second for common audio codecs (used for duration estimation).
// These are conservative averages; actual bitrates vary with encoding settings.
const AUDIO_BYTES_PER_SECOND: Record<string, number> = {
'audio/mp3': 16000, // ~128 kbps
'audio/mpeg': 16000, // ~128 kbps
'audio/wav': 176400, // 44.1 kHz, 16-bit, stereo (uncompressed)
'audio/ogg': 16000, // ~128 kbps Vorbis
'audio/aac': 16000, // ~128 kbps
'audio/flac': 88200, // ~50% of WAV (lossless)
'audio/webm': 16000, // ~128 kbps Opus
'audio/opus': 8000, // ~64 kbps (Opus is very efficient)
};
// Average bytes per second for common video containers.
// Assumes typical web video bitrates (~2-4 Mbps video + audio).
const VIDEO_BYTES_PER_SECOND: Record<string, number> = {
'video/mp4': 375000, // ~3 Mbps
'video/webm': 312500, // ~2.5 Mbps
'video/ogg': 312500, // ~2.5 Mbps
'video/quicktime': 375000, // ~3 Mbps
'video/x-msvideo': 375000, // ~3 Mbps (AVI)
};
/**
* Computes the raw byte size from a base64-encoded string.
* Accounts for padding characters ('=') that don't contribute to data.
*/
export function base64ByteSize(base64: string): number {
let padding = 0;
if (base64.endsWith('==')) {
padding = 2;
} else if (base64.endsWith('=')) {
padding = 1;
}
return Math.floor((base64.length * 3) / 4) - padding;
}
/**
* Formats a byte count as a human-readable string (e.g., "1.5 KB", "3.2 MB").
*/
function formatBytes(bytes: number): string {
if (bytes < 1024) {
return `${bytes} B`;
}
if (bytes < 1024 * 1024) {
return `${(bytes / 1024).toFixed(1)} KB`;
}
return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
}
/**
* Formats a duration in seconds as a human-readable string.
* Uses "Xm Ys" for durations >= 60s, otherwise "Xs".
*/
function formatDuration(seconds: number): string {
if (seconds < 60) {
return `${seconds.toFixed(1)}s`;
}
const totalSeconds = Math.round(seconds);
const minutes = Math.floor(totalSeconds / 60);
const remaining = totalSeconds % 60;
return `${minutes}m ${remaining}s`;
}
/**
* Classifies a MIME type into a media category.
*/
function classifyMimeType(
mimeType: string,
): 'audio' | 'video' | 'image' | 'pdf' | 'other' {
if (mimeType.startsWith('audio/')) return 'audio';
if (mimeType.startsWith('video/')) return 'video';
if (mimeType.startsWith('image/')) return 'image';
if (mimeType === 'application/pdf') return 'pdf';
return 'other';
}
/**
* Builds a verbose description for an inlineData part.
*
* For audio: `[Audio: audio/mp3, 45.2 KB, ~3.0s]`
* For video: `[Video: video/mp4, 1.2 MB, ~3.2s]`
* For images: `[Image: image/png, 120.5 KB]`
* For PDFs: `[PDF: 2.3 MB]`
* For other: `[Data: application/octet-stream, 512 B]`
*
* Falls back gracefully when data or mimeType is missing.
*/
export function describeInlineData(
mimeType: string | undefined,
data: string | undefined,
): string {
const effectiveMime = mimeType ?? 'unknown';
const category = classifyMimeType(effectiveMime);
const parts: string[] = [];
// Label based on category
switch (category) {
case 'audio':
parts.push(`Audio: ${effectiveMime}`);
break;
case 'video':
parts.push(`Video: ${effectiveMime}`);
break;
case 'image':
parts.push(`Image: ${effectiveMime}`);
break;
case 'pdf':
parts.push('PDF');
break;
default:
parts.push(`Data: ${effectiveMime}`);
break;
}
// Size info from base64 data
if (data && data.length > 0) {
const byteSize = base64ByteSize(data);
parts.push(formatBytes(byteSize));
// Duration estimate for audio/video
if (category === 'audio') {
const bytesPerSec = AUDIO_BYTES_PER_SECOND[effectiveMime];
if (bytesPerSec !== undefined && byteSize > 0) {
parts.push(`~${formatDuration(byteSize / bytesPerSec)}`);
}
} else if (category === 'video') {
const bytesPerSec = VIDEO_BYTES_PER_SECOND[effectiveMime];
if (bytesPerSec !== undefined && byteSize > 0) {
parts.push(`~${formatDuration(byteSize / bytesPerSec)}`);
}
}
}
return `[${parts.join(', ')}]`;
}
/**
* Converts a PartListUnion into a string.
* If verbose is true, includes summary representations of non-text parts.
@ -29,21 +167,17 @@ export function partToString(
return value.map((part) => partToString(part, options)).join('');
}
// Cast to Part, assuming it might contain project-specific fields
// eslint-disable-next-line @typescript-eslint/no-unsafe-type-assertion
const part = value as Part & {
videoMetadata?: unknown;
thought?: string;
codeExecutionResult?: unknown;
executableCode?: unknown;
};
// After ruling out string and array, value is narrowed to Part.
// All checked fields (videoMetadata, thought, codeExecutionResult,
// executableCode) are declared on the Part interface from @google/genai.
const part: Part = value;
if (options?.verbose) {
if (part.videoMetadata !== undefined) {
return `[Video Metadata]`;
}
if (part.thought !== undefined) {
return `[Thought: ${part.thought}]`;
return `[Thought: ${part.text ?? ''}]`;
}
if (part.codeExecutionResult !== undefined) {
return `[Code Execution Result]`;
@ -63,18 +197,7 @@ export function partToString(
return `[Function Response: ${part.functionResponse.name}]`;
}
if (part.inlineData !== undefined) {
const mimeType = part.inlineData.mimeType ?? 'unknown';
const data = part.inlineData.data ?? '';
const bytes = Math.ceil((data.length * 3) / 4);
const kb = (bytes / 1024).toFixed(1);
const category = mimeType.startsWith('audio/')
? 'Audio'
: mimeType.startsWith('video/')
? 'Video'
: mimeType.startsWith('image/')
? 'Image'
: 'Media';
return `[${category}: ${mimeType}, ${kb} KB]`;
return describeInlineData(part.inlineData.mimeType, part.inlineData.data);
}
}