data-transformation-engine
Version:
A high-performance data transformation engine for processing large hierarchical datasets with emphasis on categorical and temporal data.
111 lines (95 loc) • 3.96 kB
JavaScript
const { generateUUID } = require('./utils/uuid');
const validators = require('./utils/validators');
const transformers = require('./utils/transformers');
const createDataEngine = (config = {}) => {
const engineConfig = {
...config,
stream: config.stream || 'stream',
unique: config.unique || 'unique',
};
const cache = new Map();
const processRawData = (data) => {
try {
const cacheKey = JSON.stringify(data);
if (cache.has(cacheKey)) {
return cache.get(cacheKey);
}
const termMap = new Map();
data.forEach(item => {
if (!validators.isValidTerm(item.term)) {
console.warn(`Invalid term found:`, item);
return;
}
if (!termMap.has(item.term)) {
termMap.set(item.term, {
id: generateUUID(),
term: item.term,
info: new Map()
});
}
const termData = termMap.get(item.term);
item.info.forEach(info => {
if (!validators.isValidStream(info.stream) || !validators.isValidDetailed(info.detailed)) {
console.warn(`Invalid info data found:`, info);
return;
}
const existingInfo = termData.info.get(info[engineConfig.stream]);
if (existingInfo) {
existingInfo.detailed = transformers.mergeDetailed(existingInfo.detailed, info.detailed);
const aggregates = transformers.calculateAggregates(existingInfo.detailed);
existingInfo.value = aggregates.total;
existingInfo.uniqueBranches = aggregates.branches.size;
existingInfo.monthRange = {
start: Math.min(...aggregates.months),
end: Math.max(...aggregates.months)
};
} else {
const aggregates = transformers.calculateAggregates(info.detailed);
termData.info.set(info[engineConfig.stream], {
...info,
uniqueBranches: aggregates.branches.size,
monthRange: {
start: Math.min(...aggregates.months),
end: Math.max(...aggregates.months)
}
});
}
});
});
const processed = Array.from(termMap.values()).map(item => ({
...item,
info: Array.from(item.info.values()).map(info => ({
...info,
detailed: info.detailed.sort((a, b) => b.month.localeCompare(a.month))
}))
}));
cache.set(cacheKey, processed);
return processed;
} catch (error) {
console.error('Error processing data:', error);
return [];
}
};
const clearCache = () => cache.clear();
return {
processRawData,
clearCache,
validators,
transformers
};
};
/**
* Example usage:
*
* const engine = createDataEngine({
* stream: 'stream', // I might want to use a different existing property to identify the stream like: stream: 'unique'
* unique: 'unique'
* });
*
* const data = [
* { term: 'term1', info: [{ stream: 'stream1', detailed: [{ month: '2025-01', branch: 'branch1', value: 100 }], unique: 'unique1' }] },
* { term: 'term2', info: [{ stream: 'stream2', detailed: [{ month: '2025-01', branch: 'branch2', value: 200 }], unique: 'unique2' }] }
* ];
*
*/
module.exports = createDataEngine;