UNPKG

@electric-sql/d2mini

Version:

D2Mini is a minimal implementation of Differential Dataflow for performing in-memory incremental view maintenance.

257 lines 9.25 kB
import { map } from './map.js'; import { reduce } from './reduce.js'; function isPipedAggregateFunction(aggregate) { return 'pipe' in aggregate; } /** * Groups data by key and applies multiple aggregate operations * @param keyExtractor Function to extract grouping key from data * @param aggregates Object mapping aggregate names to aggregate functions */ export function groupBy(keyExtractor, aggregates = {}) { const basicAggregates = Object.fromEntries(Object.entries(aggregates).filter(([_, aggregate]) => !isPipedAggregateFunction(aggregate))); // @ts-expect-error - TODO: we don't use this yet, but we will // eslint-disable-next-line @typescript-eslint/no-unused-vars const pipedAggregates = Object.fromEntries(Object.entries(aggregates).filter(([_, aggregate]) => isPipedAggregateFunction(aggregate))); return (stream) => { // Special key to store the original key object const KEY_SENTINEL = '__original_key__'; // First map to extract keys and pre-aggregate values const withKeysAndValues = stream.pipe(map((data) => { const key = keyExtractor(data); const keyString = JSON.stringify(key); // Create values object with pre-aggregated values const values = {}; // Store the original key object values[KEY_SENTINEL] = key; // Add pre-aggregated values for (const [name, aggregate] of Object.entries(basicAggregates)) { values[name] = aggregate.preMap(data); } return [keyString, values]; })); // Then reduce to compute aggregates const reduced = withKeysAndValues.pipe(reduce((values) => { // Calculate total multiplicity to check if the group should exist let totalMultiplicity = 0; for (const [_, multiplicity] of values) { totalMultiplicity += multiplicity; } // If total multiplicity is 0 or negative, the group should be removed completely if (totalMultiplicity <= 0) { return []; } const result = {}; // Get the original key from first value in group const originalKey = values[0][0][KEY_SENTINEL]; result[KEY_SENTINEL] = originalKey; // Apply each aggregate function for (const [name, aggregate] of Object.entries(basicAggregates)) { const preValues = values.map(([v, m]) => [v[name], m]); result[name] = aggregate.reduce(preValues); } return [[result, 1]]; })); // Finally map to extract the key and include all values return reduced.pipe(map(([keyString, values]) => { // Extract the original key const key = values[KEY_SENTINEL]; // Create intermediate result with key values and aggregate results const result = {}; // Add key properties to result Object.assign(result, key); // Apply postMap if provided for (const [name, aggregate] of Object.entries(basicAggregates)) { if (aggregate.postMap) { result[name] = aggregate.postMap(values[name]); } else { result[name] = values[name]; } } // Return with the string key instead of the object return [keyString, result]; })); }; } /** * Creates a sum aggregate function */ export function sum(valueExtractor = (v) => v) { return { preMap: (data) => valueExtractor(data), reduce: (values) => { let total = 0; for (const [value, multiplicity] of values) { total += value * multiplicity; } return total; }, }; } /** * Creates a count aggregate function */ export function count() { return { preMap: () => 1, reduce: (values) => { let count = 0; for (const [_, multiplicity] of values) { count += multiplicity; } return count; }, }; } /** * Creates an average aggregate function */ export function avg(valueExtractor = (v) => v) { return { preMap: (data) => ({ sum: valueExtractor(data), count: 0, }), reduce: (values) => { let totalSum = 0; let totalCount = 0; for (const [value, multiplicity] of values) { totalSum += value.sum * multiplicity; totalCount += multiplicity; } return { sum: totalSum, count: totalCount, }; }, postMap: (result) => { return result.sum / result.count; }, }; } /** * Creates a min aggregate function that computes the minimum value in a group * @param valueExtractor Function to extract a numeric value from each data entry */ export function min(valueExtractor = (v) => v) { return { preMap: (data) => valueExtractor(data), reduce: (values) => { let minValue = Number.POSITIVE_INFINITY; for (const [value, _multiplicity] of values) { if (value < minValue) { minValue = value; } } return minValue === Number.POSITIVE_INFINITY ? 0 : minValue; }, }; } /** * Creates a max aggregate function that computes the maximum value in a group * @param valueExtractor Function to extract a numeric value from each data entry */ export function max(valueExtractor = (v) => v) { return { preMap: (data) => valueExtractor(data), reduce: (values) => { let maxValue = Number.NEGATIVE_INFINITY; for (const [value, _multiplicity] of values) { if (value > maxValue) { maxValue = value; } } return maxValue === Number.NEGATIVE_INFINITY ? 0 : maxValue; }, }; } /** * Creates a median aggregate function that computes the middle value in a sorted group * If there's an even number of values, returns the average of the two middle values * @param valueExtractor Function to extract a numeric value from each data entry */ export function median(valueExtractor = (v) => v) { return { preMap: (data) => [valueExtractor(data)], reduce: (values) => { // Flatten all values, taking multiplicity into account const allValues = []; for (const [valueArray, multiplicity] of values) { for (const value of valueArray) { // Add each value multiple times based on multiplicity for (let i = 0; i < multiplicity; i++) { allValues.push(value); } } } // Return empty array if no values if (allValues.length === 0) { return []; } // Sort values allValues.sort((a, b) => a - b); return allValues; }, postMap: (result) => { if (result.length === 0) return 0; const mid = Math.floor(result.length / 2); // If even number of values, average the two middle values if (result.length % 2 === 0) { return (result[mid - 1] + result[mid]) / 2; } // If odd number of values, return the middle value return result[mid]; }, }; } /** * Creates a mode aggregate function that computes the most frequent value in a group * If multiple values have the same highest frequency, returns the first one encountered * @param valueExtractor Function to extract a value from each data entry */ export function mode(valueExtractor = (v) => v) { return { preMap: (data) => { const value = valueExtractor(data); const map = new Map(); map.set(value, 1); return map; }, reduce: (values) => { // Combine all frequency maps const combinedMap = new Map(); for (const [map, multiplicity] of values) { for (const [value, count] of map.entries()) { const currentCount = combinedMap.get(value) || 0; combinedMap.set(value, currentCount + count * multiplicity); } } return combinedMap; }, postMap: (result) => { if (result.size === 0) return 0; let modeValue = 0; let maxFrequency = 0; for (const [value, frequency] of result.entries()) { if (frequency > maxFrequency) { maxFrequency = frequency; modeValue = value; } } return modeValue; }, }; } export const groupByOperators = { sum, count, avg, min, max, median, mode, }; //# sourceMappingURL=groupBy.js.map