UNPKG

@db2lake/core

Version:

Core interfaces and utilities for db2lake

159 lines 5.97 kB
import { ISourceDriver, IDestinationDriver } from "./driver.type"; import { ITransformer, ILogger, LogLevel, PipelineCursor } from "./pipeline.type"; export type { ITransformer, ILogger, LogLevel, PipelineCursor } from "./pipeline.type"; /** * Abstract pipeline class for orchestrating data flow from source to destination. * Handles connection management, data transformation, error handling, and progress tracking. * * Features: * - Automatic connection management for source and destination * - Batch processing with cursor tracking for resumability * - Optional data transformation * - Comprehensive logging and metrics * - Error handling with proper cleanup * * @property source - The source driver instance that implements ISourceDriver * @property destination - The destination driver instance that implements IDestinationDriver * @property transformer - Optional transformer function to convert source data format to destination format * @property logger - Optional logger function for tracking pipeline events and errors * * @example * ```typescript * const pipeline = new MyPipeline( * new MySourceDriver(), * new MyDestinationDriver(), * (data) => data.map(transform), * (level, msg, data) => console.log(level, msg, data) * ); * * try { * await pipeline.run(); * } catch (error) { * console.error('Pipeline failed:', error); * } * ``` */ declare abstract class IPipeline { protected source: ISourceDriver; protected destination: IDestinationDriver; protected transformer?: ITransformer | undefined; protected logger?: ILogger | undefined; private connected; private batchCount; private totalRows; private currentCursor; constructor(source: ISourceDriver, destination: IDestinationDriver, transformer?: ITransformer | undefined, logger?: ILogger | undefined); /** * Main entry point for pipeline execution. * This abstract method must be implemented by concrete pipeline classes. * The typical implementation should: * 1. Connect to source and destination * 2. Process data * 3. Ensure cleanup is called, even if processing fails * * @protected * @throws Will throw an error if connections fail or data processing encounters an error * * @example * ```typescript * protected async run(): Promise<void> { * await this.connect(); * try { * await this.process(); * } finally { * await this.cleanup(); * } * } * ``` */ run(): Promise<void>; /** * Establishes connections to both source and destination. * This method is idempotent - calling it multiple times will only connect once. * * @protected * @throws Will throw an error if either connection fails * @emits Logs 'info' events for connection status * @emits Logs 'error' event if connections fail */ protected connect(): Promise<void>; /** * Main data processing loop. * Orchestrates the entire data pipeline: * 1. Fetches data in batches from the source * 2. Applies transformations if configured * 3. Writes transformed data to the destination * 4. Maintains cursor state for tracking progress * 5. Provides detailed logging of the process * * @protected * @throws Will throw an error if batch processing fails * @emits Logs 'info' events for start/completion * @emits Logs 'debug' events for each batch * @emits Logs 'error' events for failures */ protected process(): Promise<void>; /** * Cleanup resources by closing connections. * Attempts to close both source and destination connections, handling each independently * to ensure both are attempted even if one fails. * * @protected * @throws Will throw an error if cleanup completely fails * @emits Logs 'info' events for cleanup status * @emits Logs 'error' events for individual connection close failures * * @note Always call this method in a finally block to ensure resources are released * @note Includes current cursor state in cleanup logs for debugging/resumability */ protected cleanup(): Promise<void>; /** * Helper method for consistent logging throughout the pipeline. * Only logs if a logger was provided in the constructor. * * @protected * @param level - The severity level of the log (error, info, debug) * @param message - The main log message * @param data - Optional structured data to include in the log * * @example * ```typescript * this.log('error', 'Failed to process batch', { * error, * batchId: 123, * cursor: this.currentCursor * }); * ``` */ protected log(level: LogLevel, message: string, data?: any): void; /** * Get current pipeline metrics and progress information. * Provides a snapshot of the pipeline's current state including: * - Number of batches processed * - Total rows processed * - Current cursor position for resumability * * @public * @returns {Object} Pipeline metrics and cursor state * @returns {number} returns.batchCount - Number of batches processed * @returns {number} returns.totalRows - Total number of rows processed * @returns {PipelineCursor | null} returns.cursor - Current cursor state or null if no data processed * * @example * ```typescript * const { batchCount, totalRows, cursor } = pipeline.getMetrics(); * console.log(`Processed ${totalRows} rows in ${batchCount} batches`); * if (cursor) { * console.log(`Last position: ${cursor.position}`); * } * ``` */ getMetrics(): { batchCount: number; totalRows: number; cursor: PipelineCursor | null; }; } export declare class Pipeline extends IPipeline { } //# sourceMappingURL=pipeline.d.ts.map