UNPKG

@flowlab/all

Version:

A cool library focusing on handling various flows

64 lines (58 loc) 2.98 kB
// src/extractors/fileExtractor.ts import * as fs from 'fs'; // Use sync fs for createReadStream import * as readline from 'readline'; import { IExtractor, PipelineContext, DataSource, FileSourceConfig } from '../core/interfaces'; import { ComponentError } from '../core/errors'; export class FileExtractor implements IExtractor<string | object> { // Output depends on format private config: FileSourceConfig; constructor(config: FileSourceConfig) { if (!config.path || !config.format) { throw new ComponentError('FileExtractor requires "path" and "format" in config.'); } this.config = config; } async extract(context: PipelineContext): Promise<DataSource<string | object>> { context.logger.info(`Extracting data from file: ${this.config.path} (format: ${this.config.format})`); // Use AsyncIterable for large files return this.extractAsStream(context); } private async *extractAsStream(context: PipelineContext): AsyncIterable<string | object> { const fileStream = fs.createReadStream(this.config.path, { encoding: this.config.encoding || 'utf-8' }); const rl = readline.createInterface({ input: fileStream, crlfDelay: Infinity // Handle different line endings }); let lineNum = 0; try { for await (const line of rl) { lineNum++; if (this.config.format === 'json') { // Assume JSON Lines format (one JSON object per line) if(line.trim()){ // Avoid parsing empty lines try { yield JSON.parse(line); } catch (error) { context.logger.error({ err: error, line, lineNum, file: this.config.path }, `Skipping invalid JSON line`); } } } else if (this.config.format === 'csv') { // Basic CSV parsing (split by comma), consider a proper CSV library for robustness yield line.split(','); // VERY basic, use 'csv-parse' for real cases // TODO: Add header handling / object creation based on header } else { // text format yield line; } } context.logger.info(`Finished reading file ${this.config.path}, ${lineNum} lines processed.`); } catch (error: any) { context.logger.error({ err: error, file: this.config.path }, `Error reading file stream`); throw new ComponentError(`Error reading file ${this.config.path}`, 'FileExtractor', error); } finally { // Ensure stream is closed if readline doesn't do it automatically on error/completion if (!fileStream.destroyed) { fileStream.destroy(); } } } }