@flowlab/all
Version:
A cool library focusing on handling various flows
100 lines (88 loc) • 3.68 kB
text/typescript
// tests/extractors/fileExtractor.test.ts
import * as path from 'path';
import * as fs from 'fs/promises';
import { createLogger } from '../utils/logger';
import { FileExtractor } from '../extractors/fileExtractor';
import { PipelineContext, FileSourceConfig } from '../core/interfaces';
const TEST_DIR = path.join(__dirname, 'test-files');
const JSONL_FILE = path.join(TEST_DIR, 'data.jsonl');
const TEXT_FILE = path.join(TEST_DIR, 'data.txt');
// MARK: - beforeAll
beforeAll(async () => {
await fs.mkdir(TEST_DIR, { recursive: true });
// Create JSON Lines file
const jsonlContent = [
JSON.stringify({ id: 1, name: 'Alice' }),
JSON.stringify({ id: 2, name: 'Bob' }),
'{"id": 3, "name": "Charlie"}', // Slightly different format
'', // Empty line
'invalid json', // Invalid line
JSON.stringify({ id: 4, name: 'David' }),
].join('\n');
await fs.writeFile(JSONL_FILE, jsonlContent);
// Create Text file
const textContent = ['Line 1', 'Line 2', '', 'Line 4'].join('\n');
await fs.writeFile(TEXT_FILE, textContent);
});
// Helper to clean up test files after tests run
// MARK: - afterAll
afterAll(async () => {
await fs.rm(TEST_DIR, { recursive: true, force: true });
});
const mockContext: PipelineContext = {
logger: createLogger({ level: 'silent' }),
runId: 'test-run-file',
};
describe('FileExtractor', () => {
it('should extract objects from a JSON Lines file', async () => {
const config: FileSourceConfig = {
type: 'file',
path: JSONL_FILE,
format: 'json',
};
const extractor = new FileExtractor(config);
const dataSource = await extractor.extract(mockContext);
const results: object[] = [];
// Type assertion needed as extract returns AsyncIterable<string | object>
for await (const item of dataSource as AsyncIterable<object>) {
results.push(item);
}
expect(results).toHaveLength(4); // Skips empty line and invalid json
expect(results).toEqual([
{ id: 1, name: 'Alice' },
{ id: 2, name: 'Bob' },
{ id: 3, name: 'Charlie' },
{ id: 4, name: 'David' },
]);
});
it('should extract lines from a text file', async () => {
const config: FileSourceConfig = {
type: 'file',
path: TEXT_FILE,
format: 'text',
};
const extractor = new FileExtractor(config);
const dataSource = await extractor.extract(mockContext);
const results: string[] = [];
// Type assertion needed as extract returns AsyncIterable<string | object>
for await (const item of dataSource as AsyncIterable<string>) {
results.push(item);
}
expect(results).toHaveLength(4); // Includes empty line if present
expect(results).toEqual(['Line 1', 'Line 2', '', 'Line 4']);
});
it('should throw an error for a non-existent file', async () => {
const config: FileSourceConfig = {
type: 'file',
path: path.join(TEST_DIR, 'nonexistent.txt'),
format: 'text',
};
const extractor = new FileExtractor(config);
// Expect the async iterator creation itself might throw, or the first iteration
await expect(async () => {
const dataSource = await extractor.extract(mockContext);
// Attempt to iterate to trigger file read
for await (const _ of dataSource) {}
}).rejects.toThrow(/ENOENT|Error reading file stream/); // Check for file not found or read error
});
});