@electric-sql/d2ts
Version:
D2TS is a TypeScript implementation of Differential Dataflow.
312 lines (264 loc) • 8.94 kB
text/typescript
import { DefaultMap, chunkedArrayPush, hash } from './utils.js'
export type MultiSetArray<T> = [T, number][]
export type KeyedData<T> = [key: string, value: T]
/**
* A multiset of data.
*/
export class MultiSet<T> {
#inner: MultiSetArray<T>
constructor(data: MultiSetArray<T> = []) {
this.#inner = data
}
toString(indent = false): string {
return `MultiSet(${JSON.stringify(this.#inner, null, indent ? 2 : undefined)})`
}
toJSON(): string {
return JSON.stringify(Array.from(this.getInner()))
}
static fromJSON<T>(json: string): MultiSet<T> {
return new MultiSet(JSON.parse(json))
}
/**
* Apply a function to all records in the collection.
*/
map<U>(f: (data: T) => U): MultiSet<U> {
return new MultiSet(
this.#inner.map(([data, multiplicity]) => [f(data), multiplicity]),
)
}
/**
* Filter out records for which a function f(record) evaluates to False.
*/
filter(f: (data: T) => boolean): MultiSet<T> {
return new MultiSet(this.#inner.filter(([data, _]) => f(data)))
}
/**
* Negate all multiplicities in the collection.
*/
negate(): MultiSet<T> {
return new MultiSet(
this.#inner.map(([data, multiplicity]) => [data, -multiplicity]),
)
}
/**
* Concatenate two collections together.
*/
concat(other: MultiSet<T>): MultiSet<T> {
const out: MultiSetArray<T> = []
chunkedArrayPush(out, this.#inner)
chunkedArrayPush(out, other.getInner())
return new MultiSet(out)
}
/**
* Produce as output a collection that is logically equivalent to the input
* but which combines identical instances of the same record into one
* (record, multiplicity) pair.
*/
consolidate(): MultiSet<T> {
const consolidated = new DefaultMap<string | number, number>(() => 0)
const values = new Map<string, any>()
let hasString = false
let hasNumber = false
let hasOther = false
for (const [data, _] of this.#inner) {
if (typeof data === 'string') {
hasString = true
} else if (typeof data === 'number') {
hasNumber = true
} else {
hasOther = true
break
}
}
const requireJson = hasOther || (hasString && hasNumber)
for (const [data, multiplicity] of this.#inner) {
const key = requireJson ? hash(data) : (data as string | number)
if (requireJson && !values.has(key as string)) {
values.set(key as string, data)
}
consolidated.update(key, (count) => count + multiplicity)
}
const result: MultiSetArray<T> = []
for (const [key, multiplicity] of consolidated.entries()) {
if (multiplicity !== 0) {
const parsedKey = requireJson ? values.get(key as string) : key
result.push([parsedKey as T, multiplicity])
}
}
return new MultiSet(result)
}
/**
* Match pairs (k, v1) and (k, v2) from the two input collections and produce (k, (v1, v2)).
*/
join<U>(other: MultiSet<KeyedData<U>>): MultiSet<KeyedData<[T, U]>> {
const out: MultiSetArray<KeyedData<[T, U]>> = []
for (const [[k1, v1], d1] of this.#inner as MultiSetArray<KeyedData<T>>) {
for (const [[k2, v2], d2] of other.getInner()) {
if (k1 === k2) {
out.push([[k1, [v1, v2]], d1 * d2])
}
}
}
return new MultiSet(out)
}
/**
* Apply a reduction function to all record values, grouped by key.
* This method only works on KeyedData types and returns KeyedData results.
*/
reduce<U>(f: (vals: [T, number][]) => [U, number][]): MultiSet<KeyedData<U>> {
const keys = new DefaultMap<string, [T, number][]>(() => [])
const out: MultiSetArray<KeyedData<U>> = []
for (const [[key, val], multiplicity] of this.#inner as MultiSetArray<
KeyedData<T>
>) {
keys.update(key, (existing) => {
existing.push([val, multiplicity])
return existing
})
}
for (const [key, vals] of keys.entries()) {
const results = f(vals)
for (const [val, multiplicity] of results) {
out.push([[key, val], multiplicity])
}
}
return new MultiSet(out)
}
/**
* Count the number of times each key occurs in the collection.
*/
count(): MultiSet<KeyedData<number>> {
return this.reduce<number>((vals: [T, number][]): [number, number][] => {
let out = 0
for (const [_, multiplicity] of vals) {
out += multiplicity
}
return [[out, 1]]
})
}
/**
* Produce the sum of all the values paired with a key, for all keys in the collection.
*/
sum(): MultiSet<KeyedData<number>> {
return this.reduce<number>((vals: [T, number][]): [number, number][] => {
let out = 0
for (const [val, multiplicity] of vals) {
out += (val as unknown as number) * multiplicity
}
return [[out, 1]]
})
}
/**
* Produce the minimum value associated with each key in the collection.
*
* Note that no record may have negative multiplicity when computing the min,
* as it is unclear what exactly the minimum record is in that case.
*/
min(): MultiSet<KeyedData<T>> {
return this.reduce<T>((vals: [T, number][]): [T, number][] => {
const consolidated = new Map<T, [T, number]>()
for (const [val, multiplicity] of vals) {
const current = consolidated.get(val)?.[1] || 0
consolidated.set(val, [val, current + multiplicity])
}
const validVals = Array.from(consolidated.values()).filter(
([_, multiplicity]) => multiplicity !== 0,
)
if (validVals.length === 0) return []
let minEntry = validVals[0]
for (const entry of validVals) {
if (entry[1] <= 0) {
throw new Error(
'Negative multiplicities not allowed in min operation',
)
}
if (entry[0] < minEntry[0]) {
minEntry = entry
}
}
return [[minEntry[0], 1]]
})
}
/**
* Produce the maximum value associated with each key in the collection.
*
* Note that no record may have negative multiplicity when computing the max,
* as it is unclear what exactly the maximum record is in that case.
*/
max(): MultiSet<KeyedData<T>> {
return this.reduce<T>((vals: [T, number][]): [T, number][] => {
const consolidated = new Map<T, [T, number]>()
for (const [val, multiplicity] of vals) {
const current = consolidated.get(val)?.[1] || 0
consolidated.set(val, [val, current + multiplicity])
}
const validVals = Array.from(consolidated.values()).filter(
([_, multiplicity]) => multiplicity !== 0,
)
if (validVals.length === 0) return []
let maxEntry = validVals[0]
for (const entry of validVals) {
if (entry[1] <= 0) {
throw new Error(
'Negative multiplicities not allowed in max operation',
)
}
if (entry[0] > maxEntry[0]) {
maxEntry = entry
}
}
return [[maxEntry[0], 1]]
})
}
/**
* Reduce the collection to a set of elements (from a multiset).
*
* Note that no record may have negative multiplicity when producing this set,
* as elements of sets may only have multiplicity one, and it is unclear that is
* an appropriate output for elements with negative multiplicity.
*/
distinct(): MultiSet<KeyedData<T>> {
return this.reduce((vals: [T, number][]): [T, number][] => {
const consolidated = new Map<string | number, [T, number]>()
for (const [val, multiplicity] of vals) {
const key = hash(val)
const current = consolidated.get(key)?.[1] || 0
consolidated.set(key, [val, current + multiplicity])
}
const validVals = Array.from(consolidated.values()).filter(
([_, multiplicity]) => multiplicity !== 0,
)
for (const [_, multiplicity] of validVals) {
if (multiplicity <= 0) {
throw new Error(
'Negative multiplicities not allowed in distinct operation',
)
}
}
return validVals.map(([val, _]) => [val, 1])
})
}
/**
* Repeatedly invoke a function f on a collection, and return the result
* of applying the function an infinite number of times (fixedpoint).
*
* Note that if the function does not converge to a fixedpoint this implementation
* will run forever.
*/
iterate(f: (collection: MultiSet<T>) => MultiSet<T>): MultiSet<T> {
let curr = new MultiSet(this.#inner)
let result = f(curr)
while (JSON.stringify(result.#inner) !== JSON.stringify(curr.#inner)) {
curr = result
result = f(curr)
}
return curr
}
extend(other: MultiSet<T> | MultiSetArray<T>): void {
const otherArray = other instanceof MultiSet ? other.getInner() : other
chunkedArrayPush(this.#inner, otherArray)
}
getInner(): MultiSetArray<T> {
return this.#inner
}
}