@flowlab/all
Version:
A cool library focusing on handling various flows
32 lines (27 loc) • 1.86 kB
text/typescript
// src/extractors/mongoExtractor.ts
import { Collection as MongoCollection, Filter as MongoFilter, Document } from 'mongodb';
import { IExtractor, PipelineContext, DataSource, DatabaseSourceConfig, MongoConnection } from '../core/interfaces';
import { ComponentError } from '../core/errors';
export class MongoExtractor<TOutput extends Document> implements IExtractor<TOutput> {
private config: DatabaseSourceConfig;
private collection: MongoCollection<TOutput>;
constructor(config: DatabaseSourceConfig) {
if (config.type !== 'mongodb' || !(config.connection as MongoConnection)?.db || !(config.connection as MongoConnection)?.collection) {
throw new ComponentError('MongoExtractor requires config type "mongodb" and "connection" object with "db" (MongoDB Db instance) and "collection" (string name).');
}
this.config = config;
const mongoConn = config.connection as MongoConnection;
this.collection = mongoConn.db.collection<TOutput>(mongoConn.collection);
this.config.batchSize = config.batchSize || 1000; // Default batch size for cursor
}
async extract(context: PipelineContext): Promise<DataSource<TOutput>> {
const filter = (this.config.query || {}) as MongoFilter<TOutput>;
context.logger.info({ filter, collection: this.collection.collectionName }, `Extracting data from MongoDB collection: ${this.collection.collectionName}`);
// Use cursor for efficient iteration over large collections
const cursor = this.collection.find(filter).batchSize(this.config.batchSize || 1000);
// Return the cursor directly as an AsyncIterable
// Add error handling around cursor operations if needed
// Note: The pipeline's run method will handle iterating the async iterable.
return cursor as AsyncIterable<TOutput>;
}
}