lobehub/packages/eval-dataset-parser/__tests__/detectFormat.edge.test.ts
LobeHub Bot afefe217db
test: add unit tests for eval-dataset-parser (#13197)
Co-authored-by: claude[bot] <41898282+claude[bot]@users.noreply.github.com>
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-25 10:55:58 +08:00

74 lines
2.6 KiB
TypeScript

import { describe, expect, it } from 'vitest';
import * as XLSX from 'xlsx';
import { detectFormat } from '../src/detect';
const XLSX_MAGIC = new Uint8Array([0x50, 0x4b, 0x03, 0x04]);
describe('detectFormat - edge cases', () => {
it('should detect XLS by filename extension', () => {
expect(detectFormat('', 'data.xls')).toBe('xlsx');
});
it('should detect XLSX magic bytes from Uint8Array without filename', () => {
// Create a real minimal XLSX binary
const workbook = XLSX.utils.book_new();
XLSX.utils.book_append_sheet(workbook, XLSX.utils.aoa_to_sheet([['a']]), 'Sheet1');
const buf = XLSX.write(workbook, { bookType: 'xlsx', type: 'array' });
const data = new Uint8Array(buf);
const result = detectFormat(data);
expect(result).toBe('xlsx');
});
it('should detect XLSX magic bytes from Buffer without filename', () => {
const workbook = XLSX.utils.book_new();
XLSX.utils.book_append_sheet(workbook, XLSX.utils.aoa_to_sheet([['a']]), 'Sheet1');
const buf = XLSX.write(workbook, { bookType: 'xlsx', type: 'buffer' }) as Buffer;
const result = detectFormat(buf);
expect(result).toBe('xlsx');
});
it('should parse JSON from Uint8Array containing JSON array', () => {
const json = '[{"a":1}]';
const data = new TextEncoder().encode(json);
expect(detectFormat(data)).toBe('json');
});
it('should parse JSONL from Uint8Array', () => {
const jsonl = '{"a":1}\n{"b":2}';
const data = new TextEncoder().encode(jsonl);
expect(detectFormat(data)).toBe('jsonl');
});
it('should fall back to CSV from Uint8Array with CSV content', () => {
const csv = 'col1,col2\nval1,val2';
const data = new TextEncoder().encode(csv);
expect(detectFormat(data)).toBe('csv');
});
it('should not detect XLSX from short Uint8Array (less than 4 bytes)', () => {
const data = new Uint8Array([0x50, 0x4b]);
// Not enough bytes for magic number → falls through to string detection
expect(detectFormat(data)).toBe('csv');
});
it('filename extension takes precedence over content', () => {
// Content looks like JSON but filename says CSV
const json = '[{"a":1}]';
expect(detectFormat(json, 'data.csv')).toBe('csv');
});
it('should treat a JSON-like string that fails parse as CSV', () => {
// Starts with '[' but is not valid JSON
const badJson = '[not valid json';
expect(detectFormat(badJson)).toBe('csv');
});
it('should treat an object-like first line that fails parse as CSV', () => {
// Starts with '{' on first line but is not valid JSON
const badJsonL = '{not valid jsonl}\nmore data';
expect(detectFormat(badJsonL)).toBe('csv');
});
});