UNPKG

@flowlab/all

Version:

A cool library focusing on handling various flows

100 lines (88 loc) 3.68 kB
// tests/extractors/fileExtractor.test.ts import * as path from 'path'; import * as fs from 'fs/promises'; import { createLogger } from '../utils/logger'; import { FileExtractor } from '../extractors/fileExtractor'; import { PipelineContext, FileSourceConfig } from '../core/interfaces'; const TEST_DIR = path.join(__dirname, 'test-files'); const JSONL_FILE = path.join(TEST_DIR, 'data.jsonl'); const TEXT_FILE = path.join(TEST_DIR, 'data.txt'); // MARK: - beforeAll beforeAll(async () => { await fs.mkdir(TEST_DIR, { recursive: true }); // Create JSON Lines file const jsonlContent = [ JSON.stringify({ id: 1, name: 'Alice' }), JSON.stringify({ id: 2, name: 'Bob' }), '{"id": 3, "name": "Charlie"}', // Slightly different format '', // Empty line 'invalid json', // Invalid line JSON.stringify({ id: 4, name: 'David' }), ].join('\n'); await fs.writeFile(JSONL_FILE, jsonlContent); // Create Text file const textContent = ['Line 1', 'Line 2', '', 'Line 4'].join('\n'); await fs.writeFile(TEXT_FILE, textContent); }); // Helper to clean up test files after tests run // MARK: - afterAll afterAll(async () => { await fs.rm(TEST_DIR, { recursive: true, force: true }); }); const mockContext: PipelineContext = { logger: createLogger({ level: 'silent' }), runId: 'test-run-file', }; describe('FileExtractor', () => { it('should extract objects from a JSON Lines file', async () => { const config: FileSourceConfig = { type: 'file', path: JSONL_FILE, format: 'json', }; const extractor = new FileExtractor(config); const dataSource = await extractor.extract(mockContext); const results: object[] = []; // Type assertion needed as extract returns AsyncIterable<string | object> for await (const item of dataSource as AsyncIterable<object>) { results.push(item); } expect(results).toHaveLength(4); // Skips empty line and invalid json expect(results).toEqual([ { id: 1, name: 'Alice' }, { id: 2, name: 'Bob' }, { id: 3, name: 'Charlie' }, { id: 4, name: 'David' }, ]); }); it('should extract lines from a text file', async () => { const config: FileSourceConfig = { type: 'file', path: TEXT_FILE, format: 'text', }; const extractor = new FileExtractor(config); const dataSource = await extractor.extract(mockContext); const results: string[] = []; // Type assertion needed as extract returns AsyncIterable<string | object> for await (const item of dataSource as AsyncIterable<string>) { results.push(item); } expect(results).toHaveLength(4); // Includes empty line if present expect(results).toEqual(['Line 1', 'Line 2', '', 'Line 4']); }); it('should throw an error for a non-existent file', async () => { const config: FileSourceConfig = { type: 'file', path: path.join(TEST_DIR, 'nonexistent.txt'), format: 'text', }; const extractor = new FileExtractor(config); // Expect the async iterator creation itself might throw, or the first iteration await expect(async () => { const dataSource = await extractor.extract(mockContext); // Attempt to iterate to trigger file read for await (const _ of dataSource) {} }).rejects.toThrow(/ENOENT|Error reading file stream/); // Check for file not found or read error }); });