UNPKG

semantic-ds-toolkit

Version:

Performance-first semantic layer for modern data stacks - Stable Column Anchors & intelligent inference

246 lines 10.3 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.TimeAligner = void 0; const timezone_handler_1 = require("./timezone-handler"); const grain_adjuster_1 = require("./grain-adjuster"); class TimeAligner { timezoneHandler; grainAdjuster; constructor() { this.timezoneHandler = new timezone_handler_1.TimezoneHandler(); this.grainAdjuster = new grain_adjuster_1.GrainAdjuster(); } async alignTimeSeries(data, config = {}) { const { targetTimezone, targetGrain, preserveStatistics = true, fillMethod = 'forward', alignmentStrategy = 'floor' } = config; // Sort data by timestamp const sortedData = [...data].sort((a, b) => a.timestamp.getTime() - b.timestamp.getTime()); let processedData = sortedData; let sourceTimezone; let sourceGrain; // Step 1: Detect source timezone and grain if (sortedData.length > 0) { sourceTimezone = this.timezoneHandler.detectTimezone(sortedData.map(d => d.timestamp)); sourceGrain = this.grainAdjuster.detectGrain(sortedData.map(d => d.timestamp)); } // Step 2: Convert timezone if needed if (targetTimezone && sourceTimezone !== targetTimezone) { processedData = await this.convertTimezone(processedData, sourceTimezone, targetTimezone); } // Step 3: Adjust grain if needed let duplicatesRemoved = 0; let gapsFound = 0; if (targetGrain && sourceGrain !== targetGrain) { const grainResult = await this.adjustGrain(processedData, sourceGrain || 'auto', targetGrain, alignmentStrategy, preserveStatistics); processedData = grainResult.data; duplicatesRemoved = grainResult.duplicatesRemoved; gapsFound = grainResult.gapsFound; } // Step 4: Fill gaps if requested let gapsFilledCount = 0; if (fillMethod !== 'drop' && targetGrain) { const fillResult = this.fillGaps(processedData, targetGrain, fillMethod); processedData = fillResult.data; gapsFilledCount = fillResult.gapsFilled; } // Calculate statistics const dataLoss = data.length > 0 ? Math.max(0, (data.length - processedData.length) / data.length * 100) : 0; return { originalData: data, alignedData: processedData, alignment: { sourceTimezone, targetTimezone, sourceGrain, targetGrain, strategy: alignmentStrategy, fillMethod, dataLoss }, statistics: { originalCount: data.length, alignedCount: processedData.length, duplicatesRemoved, gapsfilled: gapsFilledCount } }; } async convertTimezone(data, sourceTimezone, targetTimezone) { return Promise.all(data.map(async (item) => { const convertedTimestamp = await this.timezoneHandler.convertTimezone(item.timestamp, sourceTimezone || 'UTC', targetTimezone); return { ...item, timestamp: convertedTimestamp, metadata: { ...item.metadata, originalTimezone: sourceTimezone, convertedTimezone: targetTimezone } }; })); } async adjustGrain(data, sourceGrain, targetGrain, strategy, preserveStatistics) { const adjustResult = await this.grainAdjuster.adjustGrain(data.map(d => d.timestamp), sourceGrain, targetGrain, strategy); // Map adjusted timestamps back to data const timestampValueMap = new Map(); // Group data by original timestamp data.forEach(item => { const key = item.timestamp.getTime(); if (!timestampValueMap.has(key)) { timestampValueMap.set(key, []); } timestampValueMap.get(key).push(item); }); const adjustedData = []; let duplicatesRemoved = 0; adjustResult.adjustedTimestamps.forEach((adjustedTs, index) => { const originalTs = data[index]?.timestamp.getTime(); const originalItems = timestampValueMap.get(originalTs) || []; if (originalItems.length === 0) return; if (originalItems.length > 1) { // Handle duplicates - aggregate if preserving statistics if (preserveStatistics) { const aggregatedValue = this.aggregateValues(originalItems); adjustedData.push({ timestamp: adjustedTs, value: aggregatedValue, metadata: { ...originalItems[0].metadata, aggregated: true, originalCount: originalItems.length, aggregationMethod: 'mean' } }); } else { // Take first value adjustedData.push({ ...originalItems[0], timestamp: adjustedTs }); } duplicatesRemoved += originalItems.length - 1; } else { adjustedData.push({ ...originalItems[0], timestamp: adjustedTs }); } }); return { data: adjustedData, duplicatesRemoved, gapsFound: adjustResult.gapsFound }; } aggregateValues(items) { const values = items.map(item => item.value).filter(v => !isNaN(v)); if (values.length === 0) return 0; return values.reduce((sum, val) => sum + val, 0) / values.length; } fillGaps(data, grain, method) { if (data.length === 0) return { data, gapsFilled: 0 }; const sortedData = [...data].sort((a, b) => a.timestamp.getTime() - b.timestamp.getTime()); const result = []; let gapsFilled = 0; const grainMs = this.grainAdjuster.getGrainMilliseconds(grain); const startTime = sortedData[0].timestamp.getTime(); const endTime = sortedData[sortedData.length - 1].timestamp.getTime(); let currentTime = startTime; let dataIndex = 0; while (currentTime <= endTime) { const currentDate = new Date(currentTime); // Check if we have data for this timestamp if (dataIndex < sortedData.length && sortedData[dataIndex].timestamp.getTime() === currentTime) { result.push(sortedData[dataIndex]); dataIndex++; } else { // Fill the gap const filledValue = this.calculateFilledValue(result, sortedData, dataIndex, method); result.push({ timestamp: currentDate, value: filledValue, metadata: { filled: true, fillMethod: method } }); gapsFilled++; } currentTime += grainMs; } return { data: result, gapsFilled }; } calculateFilledValue(processedData, originalData, nextIndex, method) { switch (method) { case 'zero': return 0; case 'forward': return processedData.length > 0 ? processedData[processedData.length - 1].value : 0; case 'backward': return nextIndex < originalData.length ? originalData[nextIndex].value : (processedData.length > 0 ? processedData[processedData.length - 1].value : 0); case 'interpolate': { if (processedData.length === 0) return 0; if (nextIndex >= originalData.length) { return processedData[processedData.length - 1].value; } const prevValue = processedData[processedData.length - 1].value; const nextValue = originalData[nextIndex].value; return (prevValue + nextValue) / 2; } default: return 0; } } async alignMultipleSeries(seriesData, config = {}) { const results = {}; // Process each series for (const [seriesName, data] of Object.entries(seriesData)) { results[seriesName] = await this.alignTimeSeries(data, config); } return results; } async createCommonTimeGrid(seriesData, config = {}) { // Find common time range and grain const allTimestamps = []; Object.values(seriesData).forEach(series => { series.forEach(point => allTimestamps.push(point.timestamp)); }); if (allTimestamps.length === 0) { return { timeGrid: [], alignedSeries: {}, statistics: {} }; } allTimestamps.sort((a, b) => a.getTime() - b.getTime()); const commonGrain = config.targetGrain || this.grainAdjuster.detectGrain(allTimestamps); const startTime = allTimestamps[0]; const endTime = allTimestamps[allTimestamps.length - 1]; // Create time grid const timeGrid = this.grainAdjuster.createTimeGrid(startTime, endTime, commonGrain); // Align all series to this grid const alignedSeries = {}; const statistics = {}; for (const [seriesName, data] of Object.entries(seriesData)) { const alignmentResult = await this.alignTimeSeries(data, { ...config, targetGrain: commonGrain }); alignedSeries[seriesName] = alignmentResult.alignedData; statistics[seriesName] = alignmentResult.statistics; } return { timeGrid, alignedSeries, statistics }; } } exports.TimeAligner = TimeAligner; //# sourceMappingURL=align-time.js.map