UNPKG

@db2lake/core

Version:

Core interfaces and utilities for db2lake

262 lines 9.16 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.Pipeline = void 0; /** * Abstract pipeline class for orchestrating data flow from source to destination. * Handles connection management, data transformation, error handling, and progress tracking. * * Features: * - Automatic connection management for source and destination * - Batch processing with cursor tracking for resumability * - Optional data transformation * - Comprehensive logging and metrics * - Error handling with proper cleanup * * @property source - The source driver instance that implements ISourceDriver * @property destination - The destination driver instance that implements IDestinationDriver * @property transformer - Optional transformer function to convert source data format to destination format * @property logger - Optional logger function for tracking pipeline events and errors * * @example * ```typescript * const pipeline = new MyPipeline( * new MySourceDriver(), * new MyDestinationDriver(), * (data) => data.map(transform), * (level, msg, data) => console.log(level, msg, data) * ); * * try { * await pipeline.run(); * } catch (error) { * console.error('Pipeline failed:', error); * } * ``` */ class IPipeline { constructor(source, destination, transformer, logger) { this.source = source; this.destination = destination; this.transformer = transformer; this.logger = logger; this.connected = false; this.batchCount = 0; this.totalRows = 0; this.currentCursor = null; } /** * Main entry point for pipeline execution. * This abstract method must be implemented by concrete pipeline classes. * The typical implementation should: * 1. Connect to source and destination * 2. Process data * 3. Ensure cleanup is called, even if processing fails * * @protected * @throws Will throw an error if connections fail or data processing encounters an error * * @example * ```typescript * protected async run(): Promise<void> { * await this.connect(); * try { * await this.process(); * } finally { * await this.cleanup(); * } * } * ``` */ async run() { await this.connect(); try { await this.process(); } finally { await this.cleanup(); } } /** * Establishes connections to both source and destination. * This method is idempotent - calling it multiple times will only connect once. * * @protected * @throws Will throw an error if either connection fails * @emits Logs 'info' events for connection status * @emits Logs 'error' event if connections fail */ async connect() { if (this.connected) return; try { this.log('info', 'Establishing connections...'); await this.source.connect(); await this.destination.connect(); this.connected = true; this.log('info', 'Connections established successfully'); } catch (error) { this.log('error', 'Failed to establish connections', error); await this.cleanup(); throw error; } } /** * Main data processing loop. * Orchestrates the entire data pipeline: * 1. Fetches data in batches from the source * 2. Applies transformations if configured * 3. Writes transformed data to the destination * 4. Maintains cursor state for tracking progress * 5. Provides detailed logging of the process * * @protected * @throws Will throw an error if batch processing fails * @emits Logs 'info' events for start/completion * @emits Logs 'debug' events for each batch * @emits Logs 'error' events for failures */ async process() { try { this.log('info', 'Starting data processing'); for await (const batch of this.source.fetch()) { try { // Transform data if transformer is provided const transformedBatch = this.transformer ? this.transformer(batch) : batch; // Insert transformed data into destination await this.destination.insert(transformedBatch); // Update metrics this.batchCount++; this.totalRows += batch.length; // Update cursor information this.currentCursor = { position: this.totalRows, lastItem: batch[batch.length - 1], timestamp: new Date().toISOString() }; this.log('debug', `Processed batch ${this.batchCount}`, { batchSize: batch.length, totalRows: this.totalRows, cursor: this.currentCursor }); } catch (error) { this.log('error', `Failed to process batch ${this.batchCount}`, { error, batchSize: batch.length, cursor: this.currentCursor }); throw error; } } this.log('info', 'Data processing completed', { totalBatches: this.batchCount, totalRows: this.totalRows, finalCursor: this.currentCursor }); } catch (error) { this.log('error', 'Data processing failed', { error, lastCursor: this.currentCursor }); throw error; } } /** * Cleanup resources by closing connections. * Attempts to close both source and destination connections, handling each independently * to ensure both are attempted even if one fails. * * @protected * @throws Will throw an error if cleanup completely fails * @emits Logs 'info' events for cleanup status * @emits Logs 'error' events for individual connection close failures * * @note Always call this method in a finally block to ensure resources are released * @note Includes current cursor state in cleanup logs for debugging/resumability */ async cleanup() { try { this.log('info', 'Cleaning up resources...'); // Close source connection try { await this.source.close(); } catch (error) { this.log('error', 'Failed to close source connection', error); } // Close destination connection try { await this.destination.close(); } catch (error) { this.log('error', 'Failed to close destination connection', error); } this.connected = false; this.log('info', 'Cleanup completed'); } catch (error) { this.log('error', 'Cleanup failed', error); throw error; } } /** * Helper method for consistent logging throughout the pipeline. * Only logs if a logger was provided in the constructor. * * @protected * @param level - The severity level of the log (error, info, debug) * @param message - The main log message * @param data - Optional structured data to include in the log * * @example * ```typescript * this.log('error', 'Failed to process batch', { * error, * batchId: 123, * cursor: this.currentCursor * }); * ``` */ log(level, message, data) { if (this.logger) { this.logger(level, message, data); } } /** * Get current pipeline metrics and progress information. * Provides a snapshot of the pipeline's current state including: * - Number of batches processed * - Total rows processed * - Current cursor position for resumability * * @public * @returns {Object} Pipeline metrics and cursor state * @returns {number} returns.batchCount - Number of batches processed * @returns {number} returns.totalRows - Total number of rows processed * @returns {PipelineCursor | null} returns.cursor - Current cursor state or null if no data processed * * @example * ```typescript * const { batchCount, totalRows, cursor } = pipeline.getMetrics(); * console.log(`Processed ${totalRows} rows in ${batchCount} batches`); * if (cursor) { * console.log(`Last position: ${cursor.position}`); * } * ``` */ getMetrics() { return { batchCount: this.batchCount, totalRows: this.totalRows, cursor: this.currentCursor }; } } class Pipeline extends IPipeline { } exports.Pipeline = Pipeline; //# sourceMappingURL=pipeline.js.map