UNPKG

@flowlab/all

Version:

A cool library focusing on handling various flows

79 lines (66 loc) 3.81 kB
// src/extractors/prismaExtractor.ts import { IExtractor, PipelineContext, DataSource, DatabaseSourceConfig } from '../core/interfaces'; import { PrismaClient } from '@prisma/client'; // Needs peer dependency // Define a more specific config type if needed interface PrismaSourceConfig extends DatabaseSourceConfig { type: 'postgresql' | 'mysql'; // Add others supported by Prisma connection: PrismaClient; // Expecting an instantiated PrismaClient model: string; // e.g., 'user', 'product' queryArgs?: object; // Prisma findMany arguments (where, select, include, etc.) batchSize?: number; // For cursor-based pagination } export class PrismaExtractor<TOutput> implements IExtractor<TOutput> { private config: PrismaSourceConfig; constructor(config: PrismaSourceConfig) { if (!config.connection || !config.model) { throw new Error('PrismaExtractor requires "connection" (PrismaClient instance) and "model" in config.'); } this.config = config; this.config.batchSize = config.batchSize || 1000; // Default batch size for iteration } async extract(context: PipelineContext): Promise<DataSource<TOutput>> { context.logger.info(`Extracting data using Prisma from model: ${this.config.model}`); const modelDelegate = (this.config.connection as any)[this.config.model]; if (!modelDelegate) { throw new Error(`Prisma model "${this.config.model}" not found on the provided client.`); } // Simple findMany - loads all into memory (potentially bad for large tables) // return modelDelegate.findMany(this.config.queryArgs || {}); // Better: Use cursor-based pagination for large datasets -> AsyncIterable return this.extractWithCursor(context, modelDelegate); } private async *extractWithCursor(context: PipelineContext, modelDelegate: any): AsyncIterable<TOutput> { let cursor: string | number | undefined = undefined; // Adjust cursor type based on your @id field const take = this.config.batchSize!; const idField = 'id'; // *** IMPORTANT: Determine the unique cursor field (@id or @unique) dynamically or configure it *** context.logger.debug(`Starting cursor-based extraction for ${this.config.model} with batch size ${take}`); while (true) { const queryArgs = { ...(this.config.queryArgs || {}), // Original query args (where, select, etc.) take: take, skip: cursor ? 1 : 0, // Skip the cursor item itself on subsequent fetches cursor: cursor ? { [idField]: cursor } : undefined, orderBy: { // Must order by the unique cursor field [idField]: 'asc', }, }; context.logger.trace({ queryArgs }, 'Fetching next batch from Prisma'); const batch = await modelDelegate.findMany(queryArgs) as TOutput[]; if (batch.length === 0) { context.logger.debug('Extraction complete, no more items found.'); break; // No more data } for (const item of batch) { yield item; } // Update cursor to the ID of the last item in the batch cursor = (batch[batch.length - 1] as any)[idField]; if (!cursor) { context.logger.error({ lastItem: batch[batch.length - 1] }, `Could not get cursor field '${idField}' from last item.`); throw new Error(`Cursor field '${idField}' not found or null in extracted data.`); } context.logger.trace(`Cursor updated to: ${cursor}`); } context.logger.info(`Finished Prisma extraction for ${this.config.model}`); } }