UNPKG

mingo

Version:

MongoDB query language for in-memory objects

693 lines (692 loc) 10.5 kB
import { computeValue } from "../../core/_internal"; import { Lazy } from "../../lazy"; import { assert, compare, findInsertIndex, isArray, isEqual, isNil, isNumber } from "../../util"; const $bucketAuto = (collection, expr, options) => { const { buckets: bucketCount, groupBy: groupByExpr, output: optOutputExpr, // Available only if the all groupBy values are numeric and none of them are NaN. granularity } = expr; const outputExpr = optOutputExpr ?? { count: { $sum: 1 } }; assert( bucketCount > 0, `$bucketAuto: 'buckets' field must be greater than 0, but found: ${bucketCount}` ); if (granularity) { assert( /^(POWERSOF2|1-2-5|E(6|12|24|48|96|192)|R(5|10|20|40|80))$/.test( granularity ), `$bucketAuto: invalid granularity '${granularity}'.` ); } const keyMap = /* @__PURE__ */ new Map(); const setKey = !granularity ? (o, k) => keyMap.set(o, k) : (_, _2) => { }; const sorted = collection.map((o) => { const k = computeValue(o, groupByExpr, null, options) ?? null; assert( !granularity || isNumber(k), "$bucketAuto: groupBy values must be numeric when granularity is specified." ); setKey(o, k ?? null); return [k ?? null, o]; }).collect(); sorted.sort((x, y) => { if (isNil(x[0])) return -1; if (isNil(y[0])) return 1; return compare(x[0], y[0]); }); let getNext; if (!granularity) { getNext = granularityDefault(sorted, bucketCount, keyMap); } else if (granularity == "POWERSOF2") { getNext = granularityPowerOfTwo( sorted, bucketCount ); } else { getNext = granularityPreferredSeries( sorted, bucketCount, granularity ); } let terminate = false; return Lazy(() => { if (terminate) return { done: true }; const { min, max, bucket, done } = getNext(); terminate = done; const outFields = computeValue( bucket, outputExpr, null, options ); for (const [k, v] of Object.entries(outFields)) { if (isArray(v)) outFields[k] = v.filter((v2) => v2 !== void 0); } return { done: false, value: { ...outFields, _id: { min, max } } }; }); }; function granularityDefault(sorted, bucketCount, keyMap) { const size = sorted.length; const approxBucketSize = Math.max(1, Math.round(sorted.length / bucketCount)); let index = 0; let nBuckets = 0; return () => { const isLastBucket = ++nBuckets == bucketCount; const bucket = new Array(); while (index < size && (isLastBucket || bucket.length < approxBucketSize || index > 0 && isEqual(sorted[index - 1][0], sorted[index][0]))) { bucket.push(sorted[index++][1]); } const min = keyMap.get(bucket[0]); let max; if (index < size) { max = sorted[index][0]; } else { max = keyMap.get(bucket[bucket.length - 1]); } assert( isNil(max) || isNil(min) || min <= max, `error: $bucketAuto boundary must be in order.` ); return { min, max, bucket, done: index >= size }; }; } function granularityPowerOfTwo(sorted, bucketCount) { const size = sorted.length; const approxBucketSize = Math.max(1, Math.round(sorted.length / bucketCount)); const roundUp2 = (n) => n === 0 ? 0 : 2 ** (Math.floor(Math.log2(n)) + 1); let index = 0; let min = 0; let max = 0; return () => { const bucket = new Array(); const boundValue = roundUp2(max); min = index > 0 ? max : 0; while (bucket.length < approxBucketSize && index < size && (max === 0 || sorted[index][0] < boundValue)) { bucket.push(sorted[index++][1]); } max = max == 0 ? roundUp2(sorted[index - 1][0]) : boundValue; while (index < size && sorted[index][0] < max) { bucket.push(sorted[index++][1]); } return { min, max, bucket, done: index >= size }; }; } const PREFERRED_NUMBERS = { // "Least rounded" Renard number series, taken from Wikipedia page on preferred // numbers: https://en.wikipedia.org/wiki/Preferred_number#Renard_numbers R5: [10, 16, 25, 40, 63], R10: [100, 125, 160, 200, 250, 315, 400, 500, 630, 800], R20: [ 100, 112, 125, 140, 160, 180, 200, 224, 250, 280, 315, 355, 400, 450, 500, 560, 630, 710, 800, 900 ], R40: [ 100, 106, 112, 118, 125, 132, 140, 150, 160, 170, 180, 190, 200, 212, 224, 236, 250, 265, 280, 300, 315, 355, 375, 400, 425, 450, 475, 500, 530, 560, 600, 630, 670, 710, 750, 800, 850, 900, 950 ], R80: [ 103, 109, 115, 122, 128, 136, 145, 155, 165, 175, 185, 195, 206, 218, 230, 243, 258, 272, 290, 307, 325, 345, 365, 387, 412, 437, 462, 487, 515, 545, 575, 615, 650, 690, 730, 775, 825, 875, 925, 975 ], // https://en.wikipedia.org/wiki/Preferred_number#1-2-5_series "1-2-5": [10, 20, 50], // E series, taken from Wikipedia page on preferred numbers: // https://en.wikipedia.org/wiki/Preferred_number#E_series E6: [10, 15, 22, 33, 47, 68], E12: [10, 12, 15, 18, 22, 27, 33, 39, 47, 56, 68, 82], E24: [ 10, 11, 12, 13, 15, 16, 18, 20, 22, 24, 27, 30, 33, 36, 39, 43, 47, 51, 56, 62, 68, 75, 82, 91 ], E48: [ 100, 105, 110, 115, 121, 127, 133, 140, 147, 154, 162, 169, 178, 187, 196, 205, 215, 226, 237, 249, 261, 274, 287, 301, 316, 332, 348, 365, 383, 402, 422, 442, 464, 487, 511, 536, 562, 590, 619, 649, 681, 715, 750, 787, 825, 866, 909, 953 ], E96: [ 100, 102, 105, 107, 110, 113, 115, 118, 121, 124, 127, 130, 133, 137, 140, 143, 147, 150, 154, 158, 162, 165, 169, 174, 178, 182, 187, 191, 196, 200, 205, 210, 215, 221, 226, 232, 237, 243, 249, 255, 261, 267, 274, 280, 287, 294, 301, 309, 316, 324, 332, 340, 348, 357, 365, 374, 383, 392, 402, 412, 422, 432, 442, 453, 464, 475, 487, 499, 511, 523, 536, 549, 562, 576, 590, 604, 619, 634, 649, 665, 681, 698, 715, 732, 750, 768, 787, 806, 825, 845, 866, 887, 909, 931, 953, 976 ], E192: [ 100, 101, 102, 104, 105, 106, 107, 109, 110, 111, 113, 114, 115, 117, 118, 120, 121, 123, 124, 126, 127, 129, 130, 132, 133, 135, 137, 138, 140, 142, 143, 145, 147, 149, 150, 152, 154, 156, 158, 160, 162, 164, 165, 167, 169, 172, 174, 176, 178, 180, 182, 184, 187, 189, 191, 193, 196, 198, 200, 203, 205, 208, 210, 213, 215, 218, 221, 223, 226, 229, 232, 234, 237, 240, 243, 246, 249, 252, 255, 258, 261, 264, 267, 271, 274, 277, 280, 284, 287, 291, 294, 298, 301, 305, 309, 312, 316, 320, 324, 328, 332, 336, 340, 344, 348, 352, 357, 361, 365, 370, 374, 379, 383, 388, 392, 397, 402, 407, 412, 417, 422, 427, 432, 437, 442, 448, 453, 459, 464, 470, 475, 481, 487, 493, 499, 505, 511, 517, 523, 530, 536, 542, 549, 556, 562, 569, 576, 583, 590, 597, 604, 612, 619, 626, 634, 642, 649, 657, 665, 673, 681, 690, 698, 706, 715, 723, 732, 741, 750, 759, 768, 777, 787, 796, 806, 816, 825, 835, 845, 856, 866, 876, 887, 898, 909, 920, 931, 942, 953, 965, 976, 988 ] }; const roundUp = (n, granularity) => { if (n == 0) return 0; const series = PREFERRED_NUMBERS[granularity]; const first = series[0]; const last = series[series.length - 1]; let multiplier = 1; while (n >= last * multiplier) { multiplier *= 10; } let previousMin = 0; while (n < first * multiplier) { previousMin = first * multiplier; multiplier /= 10; if (n >= last * multiplier) { return previousMin; } } assert( n >= first * multiplier && n < last * multiplier, "$bucketAuto: number out of range of series." ); const i = findInsertIndex(series, n, (a, b) => { b *= multiplier; if (a < b) return -1; if (a > b) return 1; return 0; }); const seriesNumber = series[i] * multiplier; return n == seriesNumber ? series[i + 1] * multiplier : seriesNumber; }; function granularityPreferredSeries(sorted, bucketCount, granularity) { const size = sorted.length; const approxBucketSize = Math.max(1, Math.round(sorted.length / bucketCount)); let index = 0; let nBuckets = 0; let min = 0; let max = 0; return () => { const isLastBucket = ++nBuckets == bucketCount; const bucket = new Array(); min = index > 0 ? max : 0; while (index < size && (isLastBucket || bucket.length < approxBucketSize)) { bucket.push(sorted[index++][1]); } max = roundUp(sorted[index - 1][0], granularity); const nItems = bucket.length; while (index < size && (isLastBucket || sorted[index][0] < max)) { bucket.push(sorted[index++][1]); } if (nItems != bucket.length) { max = roundUp(sorted[index - 1][0], granularity); } assert(min < max, `$bucketAuto: ${min} < ${max}.`); return { min, max, bucket, done: index >= size }; }; } export { $bucketAuto };