UNPKG

@flowlab/all

Version:

A cool library focusing on handling various flows

32 lines (27 loc) 1.86 kB
// src/extractors/mongoExtractor.ts import { Collection as MongoCollection, Filter as MongoFilter, Document } from 'mongodb'; import { IExtractor, PipelineContext, DataSource, DatabaseSourceConfig, MongoConnection } from '../core/interfaces'; import { ComponentError } from '../core/errors'; export class MongoExtractor<TOutput extends Document> implements IExtractor<TOutput> { private config: DatabaseSourceConfig; private collection: MongoCollection<TOutput>; constructor(config: DatabaseSourceConfig) { if (config.type !== 'mongodb' || !(config.connection as MongoConnection)?.db || !(config.connection as MongoConnection)?.collection) { throw new ComponentError('MongoExtractor requires config type "mongodb" and "connection" object with "db" (MongoDB Db instance) and "collection" (string name).'); } this.config = config; const mongoConn = config.connection as MongoConnection; this.collection = mongoConn.db.collection<TOutput>(mongoConn.collection); this.config.batchSize = config.batchSize || 1000; // Default batch size for cursor } async extract(context: PipelineContext): Promise<DataSource<TOutput>> { const filter = (this.config.query || {}) as MongoFilter<TOutput>; context.logger.info({ filter, collection: this.collection.collectionName }, `Extracting data from MongoDB collection: ${this.collection.collectionName}`); // Use cursor for efficient iteration over large collections const cursor = this.collection.find(filter).batchSize(this.config.batchSize || 1000); // Return the cursor directly as an AsyncIterable // Add error handling around cursor operations if needed // Note: The pipeline's run method will handle iterating the async iterable. return cursor as AsyncIterable<TOutput>; } }