@flowlab/all
Version:
A cool library focusing on handling various flows
124 lines (103 loc) • 5.25 kB
text/typescript
// tests/core/pipeline.test.ts
import { DataPipeline } from '../core/pipeline';
import { createLogger } from '../utils/logger';
import { IExtractor, ILoader, ITransformer, ICleaner, PipelineContext, DataSource } from '../core/interfaces';
import { jest } from '@jest/globals'; // Use Jest's ESM-compatible mocking utilities
// --- Mock Components ---
class MockExtractor implements IExtractor<number> {
constructor(private data: DataSource<number>) {}
extract = jest.fn(async (ctx: PipelineContext): Promise<DataSource<number>> => this.data);
}
class MockCleaner implements ICleaner<number> {
clean = jest.fn(async (data: number, ctx: PipelineContext): Promise<number | null> => {
return data % 2 === 0 ? data : null; // Keep only even numbers
});
}
class MockTransformer implements ITransformer<number, string> {
transform = jest.fn(async (data: number, ctx: PipelineContext): Promise<string> => {
return `Item-${data * 10}`;
});
}
class MockLoader implements ILoader<string> {
loadedData: string[][] = []; // Store batches loaded
loadBatch = jest.fn(async (batch: string[], ctx: PipelineContext): Promise<void> => {
this.loadedData.push([...batch]); // Store a copy of the batch
});
}
// --- ---
describe('DataPipeline', () => {
let pipeline: DataPipeline<number, string>;
let extractor: MockExtractor;
let cleaner: MockCleaner;
let transformer: MockTransformer;
let loader: MockLoader;
const inputData = [1, 2, 3, 4, 5, 6, 7, 8];
beforeEach(() => {
// Reset mocks and pipeline before each test
extractor = new MockExtractor(inputData);
cleaner = new MockCleaner();
transformer = new MockTransformer();
loader = new MockLoader();
pipeline = new DataPipeline<number, string>('test-pipeline', {
logger: createLogger({ level: 'silent' }),
});
// Configure the pipeline
pipeline
.configure({ batchSize: 2 }) // Loader batch size
.extract(extractor)
.clean(cleaner)
.transform(transformer)
.load(loader);
});
it('should run the full ETL process correctly', async () => {
await pipeline.run();
// Verify extractor was called
expect(extractor.extract).toHaveBeenCalledTimes(1);
// Verify cleaner was called for each item
expect(cleaner.clean).toHaveBeenCalledTimes(inputData.length);
expect(cleaner.clean).toHaveBeenCalledWith(1, expect.anything());
expect(cleaner.clean).toHaveBeenCalledWith(8, expect.anything());
// Verify transformer was called only for items that passed the cleaner
expect(transformer.transform).toHaveBeenCalledTimes(4); // Only 2, 4, 6, 8 pass cleaner
expect(transformer.transform).toHaveBeenCalledWith(2, expect.anything());
expect(transformer.transform).toHaveBeenCalledWith(4, expect.anything());
expect(transformer.transform).toHaveBeenCalledWith(6, expect.anything());
expect(transformer.transform).toHaveBeenCalledWith(8, expect.anything());
expect(transformer.transform).not.toHaveBeenCalledWith(1, expect.anything());
// Verify loader received the correct transformed data in batches
expect(loader.loadBatch).toHaveBeenCalledTimes(2); // 4 items / batchSize 2 = 2 batches
expect(loader.loadedData).toHaveLength(2);
expect(loader.loadedData[0]).toEqual(['Item-20', 'Item-40']); // First batch
expect(loader.loadedData[1]).toEqual(['Item-60', 'Item-80']); // Second batch
});
it('should handle empty input data gracefully', async () => {
const emptyExtractor = new MockExtractor([]);
pipeline.extract(emptyExtractor); // Override extractor
await pipeline.run();
expect(emptyExtractor.extract).toHaveBeenCalledTimes(1);
expect(cleaner.clean).not.toHaveBeenCalled();
expect(transformer.transform).not.toHaveBeenCalled();
expect(loader.loadBatch).not.toHaveBeenCalled(); // No batches to load
});
it('should handle async iterable input', async () => {
async function* asyncInputGenerator() {
for (const item of inputData) {
await new Promise(r => setTimeout(r, 1)); // Simulate async fetch
yield item;
}
}
const iterableExtractor = new MockExtractor(asyncInputGenerator());
pipeline.extract(iterableExtractor);
await pipeline.run();
// Verify counts are the same as array input
expect(iterableExtractor.extract).toHaveBeenCalledTimes(1);
expect(cleaner.clean).toHaveBeenCalledTimes(inputData.length);
expect(transformer.transform).toHaveBeenCalledTimes(4);
expect(loader.loadBatch).toHaveBeenCalledTimes(2);
expect(loader.loadedData[0]).toEqual(['Item-20', 'Item-40']);
expect(loader.loadedData[1]).toEqual(['Item-60', 'Item-80']);
});
// TODO: Add tests for error handling (e.g., transformer throws error)
// TODO: Add tests for retry logic (mock component to fail then succeed)
// TODO: Add tests for parallel processing once implemented
});