mingo
Version:
MongoDB query language for in-memory objects
693 lines (692 loc) • 10.5 kB
JavaScript
import { computeValue } from "../../core/_internal";
import { Lazy } from "../../lazy";
import {
assert,
compare,
findInsertIndex,
isArray,
isEqual,
isNil,
isNumber
} from "../../util";
const $bucketAuto = (collection, expr, options) => {
const {
buckets: bucketCount,
groupBy: groupByExpr,
output: optOutputExpr,
// Available only if the all groupBy values are numeric and none of them are NaN.
granularity
} = expr;
const outputExpr = optOutputExpr ?? { count: { $sum: 1 } };
assert(
bucketCount > 0,
`$bucketAuto: 'buckets' field must be greater than 0, but found: ${bucketCount}`
);
if (granularity) {
assert(
/^(POWERSOF2|1-2-5|E(6|12|24|48|96|192)|R(5|10|20|40|80))$/.test(
granularity
),
`$bucketAuto: invalid granularity '${granularity}'.`
);
}
const keyMap = /* @__PURE__ */ new Map();
const setKey = !granularity ? (o, k) => keyMap.set(o, k) : (_, _2) => {
};
const sorted = collection.map((o) => {
const k = computeValue(o, groupByExpr, null, options) ?? null;
assert(
!granularity || isNumber(k),
"$bucketAuto: groupBy values must be numeric when granularity is specified."
);
setKey(o, k ?? null);
return [k ?? null, o];
}).collect();
sorted.sort((x, y) => {
if (isNil(x[0])) return -1;
if (isNil(y[0])) return 1;
return compare(x[0], y[0]);
});
let getNext;
if (!granularity) {
getNext = granularityDefault(sorted, bucketCount, keyMap);
} else if (granularity == "POWERSOF2") {
getNext = granularityPowerOfTwo(
sorted,
bucketCount
);
} else {
getNext = granularityPreferredSeries(
sorted,
bucketCount,
granularity
);
}
let terminate = false;
return Lazy(() => {
if (terminate) return { done: true };
const { min, max, bucket, done } = getNext();
terminate = done;
const outFields = computeValue(
bucket,
outputExpr,
null,
options
);
for (const [k, v] of Object.entries(outFields)) {
if (isArray(v)) outFields[k] = v.filter((v2) => v2 !== void 0);
}
return {
done: false,
value: {
...outFields,
_id: { min, max }
}
};
});
};
function granularityDefault(sorted, bucketCount, keyMap) {
const size = sorted.length;
const approxBucketSize = Math.max(1, Math.round(sorted.length / bucketCount));
let index = 0;
let nBuckets = 0;
return () => {
const isLastBucket = ++nBuckets == bucketCount;
const bucket = new Array();
while (index < size && (isLastBucket || bucket.length < approxBucketSize || index > 0 && isEqual(sorted[index - 1][0], sorted[index][0]))) {
bucket.push(sorted[index++][1]);
}
const min = keyMap.get(bucket[0]);
let max;
if (index < size) {
max = sorted[index][0];
} else {
max = keyMap.get(bucket[bucket.length - 1]);
}
assert(
isNil(max) || isNil(min) || min <= max,
`error: $bucketAuto boundary must be in order.`
);
return {
min,
max,
bucket,
done: index >= size
};
};
}
function granularityPowerOfTwo(sorted, bucketCount) {
const size = sorted.length;
const approxBucketSize = Math.max(1, Math.round(sorted.length / bucketCount));
const roundUp2 = (n) => n === 0 ? 0 : 2 ** (Math.floor(Math.log2(n)) + 1);
let index = 0;
let min = 0;
let max = 0;
return () => {
const bucket = new Array();
const boundValue = roundUp2(max);
min = index > 0 ? max : 0;
while (bucket.length < approxBucketSize && index < size && (max === 0 || sorted[index][0] < boundValue)) {
bucket.push(sorted[index++][1]);
}
max = max == 0 ? roundUp2(sorted[index - 1][0]) : boundValue;
while (index < size && sorted[index][0] < max) {
bucket.push(sorted[index++][1]);
}
return {
min,
max,
bucket,
done: index >= size
};
};
}
const PREFERRED_NUMBERS = {
// "Least rounded" Renard number series, taken from Wikipedia page on preferred
// numbers: https://en.wikipedia.org/wiki/Preferred_number#Renard_numbers
R5: [10, 16, 25, 40, 63],
R10: [100, 125, 160, 200, 250, 315, 400, 500, 630, 800],
R20: [
100,
112,
125,
140,
160,
180,
200,
224,
250,
280,
315,
355,
400,
450,
500,
560,
630,
710,
800,
900
],
R40: [
100,
106,
112,
118,
125,
132,
140,
150,
160,
170,
180,
190,
200,
212,
224,
236,
250,
265,
280,
300,
315,
355,
375,
400,
425,
450,
475,
500,
530,
560,
600,
630,
670,
710,
750,
800,
850,
900,
950
],
R80: [
103,
109,
115,
122,
128,
136,
145,
155,
165,
175,
185,
195,
206,
218,
230,
243,
258,
272,
290,
307,
325,
345,
365,
387,
412,
437,
462,
487,
515,
545,
575,
615,
650,
690,
730,
775,
825,
875,
925,
975
],
// https://en.wikipedia.org/wiki/Preferred_number#1-2-5_series
"1-2-5": [10, 20, 50],
// E series, taken from Wikipedia page on preferred numbers:
// https://en.wikipedia.org/wiki/Preferred_number#E_series
E6: [10, 15, 22, 33, 47, 68],
E12: [10, 12, 15, 18, 22, 27, 33, 39, 47, 56, 68, 82],
E24: [
10,
11,
12,
13,
15,
16,
18,
20,
22,
24,
27,
30,
33,
36,
39,
43,
47,
51,
56,
62,
68,
75,
82,
91
],
E48: [
100,
105,
110,
115,
121,
127,
133,
140,
147,
154,
162,
169,
178,
187,
196,
205,
215,
226,
237,
249,
261,
274,
287,
301,
316,
332,
348,
365,
383,
402,
422,
442,
464,
487,
511,
536,
562,
590,
619,
649,
681,
715,
750,
787,
825,
866,
909,
953
],
E96: [
100,
102,
105,
107,
110,
113,
115,
118,
121,
124,
127,
130,
133,
137,
140,
143,
147,
150,
154,
158,
162,
165,
169,
174,
178,
182,
187,
191,
196,
200,
205,
210,
215,
221,
226,
232,
237,
243,
249,
255,
261,
267,
274,
280,
287,
294,
301,
309,
316,
324,
332,
340,
348,
357,
365,
374,
383,
392,
402,
412,
422,
432,
442,
453,
464,
475,
487,
499,
511,
523,
536,
549,
562,
576,
590,
604,
619,
634,
649,
665,
681,
698,
715,
732,
750,
768,
787,
806,
825,
845,
866,
887,
909,
931,
953,
976
],
E192: [
100,
101,
102,
104,
105,
106,
107,
109,
110,
111,
113,
114,
115,
117,
118,
120,
121,
123,
124,
126,
127,
129,
130,
132,
133,
135,
137,
138,
140,
142,
143,
145,
147,
149,
150,
152,
154,
156,
158,
160,
162,
164,
165,
167,
169,
172,
174,
176,
178,
180,
182,
184,
187,
189,
191,
193,
196,
198,
200,
203,
205,
208,
210,
213,
215,
218,
221,
223,
226,
229,
232,
234,
237,
240,
243,
246,
249,
252,
255,
258,
261,
264,
267,
271,
274,
277,
280,
284,
287,
291,
294,
298,
301,
305,
309,
312,
316,
320,
324,
328,
332,
336,
340,
344,
348,
352,
357,
361,
365,
370,
374,
379,
383,
388,
392,
397,
402,
407,
412,
417,
422,
427,
432,
437,
442,
448,
453,
459,
464,
470,
475,
481,
487,
493,
499,
505,
511,
517,
523,
530,
536,
542,
549,
556,
562,
569,
576,
583,
590,
597,
604,
612,
619,
626,
634,
642,
649,
657,
665,
673,
681,
690,
698,
706,
715,
723,
732,
741,
750,
759,
768,
777,
787,
796,
806,
816,
825,
835,
845,
856,
866,
876,
887,
898,
909,
920,
931,
942,
953,
965,
976,
988
]
};
const roundUp = (n, granularity) => {
if (n == 0) return 0;
const series = PREFERRED_NUMBERS[granularity];
const first = series[0];
const last = series[series.length - 1];
let multiplier = 1;
while (n >= last * multiplier) {
multiplier *= 10;
}
let previousMin = 0;
while (n < first * multiplier) {
previousMin = first * multiplier;
multiplier /= 10;
if (n >= last * multiplier) {
return previousMin;
}
}
assert(
n >= first * multiplier && n < last * multiplier,
"$bucketAuto: number out of range of series."
);
const i = findInsertIndex(series, n, (a, b) => {
b *= multiplier;
if (a < b) return -1;
if (a > b) return 1;
return 0;
});
const seriesNumber = series[i] * multiplier;
return n == seriesNumber ? series[i + 1] * multiplier : seriesNumber;
};
function granularityPreferredSeries(sorted, bucketCount, granularity) {
const size = sorted.length;
const approxBucketSize = Math.max(1, Math.round(sorted.length / bucketCount));
let index = 0;
let nBuckets = 0;
let min = 0;
let max = 0;
return () => {
const isLastBucket = ++nBuckets == bucketCount;
const bucket = new Array();
min = index > 0 ? max : 0;
while (index < size && (isLastBucket || bucket.length < approxBucketSize)) {
bucket.push(sorted[index++][1]);
}
max = roundUp(sorted[index - 1][0], granularity);
const nItems = bucket.length;
while (index < size && (isLastBucket || sorted[index][0] < max)) {
bucket.push(sorted[index++][1]);
}
if (nItems != bucket.length) {
max = roundUp(sorted[index - 1][0], granularity);
}
assert(min < max, `$bucketAuto: ${min} < ${max}.`);
return {
min,
max,
bucket,
done: index >= size
};
};
}
export {
$bucketAuto
};