@electric-sql/d2ts
Version:
D2TS is a TypeScript implementation of Differential Dataflow.
275 lines (241 loc) • 8.72 kB
text/typescript
import { Version, Antichain } from './order.js'
import { MultiSet } from './multiset.js'
import { DefaultMap, chunkedArrayPush, hash } from './utils.js'
type VersionMap<T> = DefaultMap<Version, T[]>
type IndexMap<K, V> = DefaultMap<K, VersionMap<[V, number]>>
export interface IndexType<K, V> {
reconstructAt(key: K, requestedVersion: Version): [V, number][]
versions(key: K): Version[]
addValue(key: K, version: Version, value: [V, number]): void
append(other: IndexType<K, V>): void
join<V2>(other: IndexType<K, V2>): [Version, MultiSet<[K, [V, V2]]>][]
compact(compactionFrontier: Antichain, keys: K[]): void
keys(): K[]
has(key: K): boolean
}
/**
* A map from a difference collection trace's keys -> versions at which
* the key has nonzero multiplicity -> (value, multiplicities) that changed.
*
* Used in operations like join and reduce where the operation needs to
* exploit the key-value structure of the data to run efficiently.
*
* This implementation supports the general case of partially ordered versions.
*/
export class Index<K, V> implements IndexType<K, V> {
#inner: IndexMap<K, V>
#compactionFrontier: Antichain | null
#modifiedKeys: Set<K>
constructor() {
this.#inner = new DefaultMap<K, VersionMap<[V, number]>>(
() => new DefaultMap<Version, [V, number][]>(() => []),
)
// #inner is as map of:
// {
// [key]: {
// [version]: [value, multiplicity]
// }
// }
this.#compactionFrontier = null
this.#modifiedKeys = new Set()
}
toString(indent = false): string {
return `Index(${JSON.stringify(
[...this.#inner].map(([k, v]) => [k, [...v.entries()]]),
undefined,
indent ? ' ' : undefined,
)})`
}
#validate(requestedVersion: Version | Antichain): boolean {
if (!this.#compactionFrontier) return true
if (requestedVersion instanceof Antichain) {
if (!this.#compactionFrontier.lessEqual(requestedVersion)) {
throw new Error('Invalid version')
}
} else if (requestedVersion instanceof Version) {
if (!this.#compactionFrontier.lessEqualVersion(requestedVersion)) {
throw new Error('Invalid version')
}
}
return true
}
reconstructAt(key: K, requestedVersion: Version): [V, number][] {
this.#validate(requestedVersion)
const out: [V, number][] = []
const versions = this.#inner.get(key)
for (const [version, values] of versions.entries()) {
if (version.lessEqual(requestedVersion)) {
chunkedArrayPush(out, values)
}
}
return out
}
get(key: K): VersionMap<[V, number]> {
if (!this.#compactionFrontier) return this.#inner.get(key)
// versions may be older than the compaction frontier, so we need to
// advance them to it. This is due to not rewriting the whole version index
// to the compaction frontier as part of the compact operation.
const versions = this.#inner.get(key).entries()
const out = new DefaultMap<Version, [V, number][]>(() => [])
for (const [rawVersion, values] of versions) {
let version = rawVersion
if (!this.#compactionFrontier.lessEqualVersion(rawVersion)) {
version = rawVersion.advanceBy(this.#compactionFrontier)
}
if (out.has(version)) {
const updatedValues = [...out.get(version)]
for (const [value, multiplicity] of values) {
updatedValues.push([value, multiplicity])
}
out.set(version, updatedValues)
} else {
out.set(version, values)
}
}
return out
}
entries(): [K, VersionMap<[V, number]>][] {
return this.keys().map((key) => [key, this.get(key)])
}
versions(key: K): Version[] {
const result = Array.from(this.get(key).keys())
return result
}
addValue(key: K, version: Version, value: [V, number]): void {
this.#validate(version)
const versions = this.#inner.get(key)
versions.update(version, (values) => {
values.push(value)
return values
})
this.#modifiedKeys.add(key)
}
append(other: Index<K, V>): void {
for (const [key, versions] of other.entries()) {
const thisVersions = this.#inner.get(key)
for (const [version, data] of versions) {
thisVersions.update(version, (values) => {
chunkedArrayPush(values, data)
return values
})
}
this.#modifiedKeys.add(key)
}
}
join<V2>(other: Index<K, V2>): [Version, MultiSet<[K, [V, V2]]>][] {
const collections = new DefaultMap<Version, [K, [V, V2], number][]>(
() => [],
)
// We want to iterate over the smaller of the two indexes to reduce the
// number of operations we need to do.
if (this.#inner.size <= other.#inner.size) {
for (const [key, versions] of this.#inner) {
if (!other.has(key)) continue
const otherVersions = other.get(key)
for (const [rawVersion1, data1] of versions) {
const version1 =
this.#compactionFrontier &&
this.#compactionFrontier.lessEqualVersion(rawVersion1)
? rawVersion1.advanceBy(this.#compactionFrontier)
: rawVersion1
for (const [version2, data2] of otherVersions) {
for (const [val1, mul1] of data1) {
for (const [val2, mul2] of data2) {
const resultVersion = version1.join(version2)
collections.update(resultVersion, (existing) => {
existing.push([key, [val1, val2], mul1 * mul2])
return existing
})
}
}
}
}
}
} else {
for (const [key, otherVersions] of other.entries()) {
if (!this.has(key)) continue
const versions = this.get(key)
for (const [version2, data2] of otherVersions) {
for (const [version1, data1] of versions) {
for (const [val2, mul2] of data2) {
for (const [val1, mul1] of data1) {
const resultVersion = version1.join(version2)
collections.update(resultVersion, (existing) => {
existing.push([key, [val1, val2], mul1 * mul2])
return existing
})
}
}
}
}
}
}
const result = Array.from(collections.entries())
.filter(([_v, c]) => c.length > 0)
.map(([version, data]) => [
version,
new MultiSet(data.map(([k, v, m]) => [[k, v], m])),
])
return result as [Version, MultiSet<[K, [V, V2]]>][]
}
compact(compactionFrontier: Antichain, keys: K[] = []): void {
if (
this.#compactionFrontier &&
!this.#compactionFrontier.lessEqual(compactionFrontier)
) {
throw new Error('Invalid compaction frontier')
}
this.#validate(compactionFrontier)
const consolidateValues = (values: [V, number][]): [V, number][] => {
// Use string representation of values as keys for proper deduplication
const consolidated = new Map<string | number, [V, number]>()
for (const [value, multiplicity] of values) {
const key = hash(value)
const existing = consolidated.get(key)
if (existing) {
consolidated.set(key, [value, existing[1] + multiplicity])
} else {
consolidated.set(key, [value, multiplicity])
}
}
return Array.from(consolidated.values()).filter(
([_, multiplicity]) => multiplicity !== 0,
)
}
const keysToProcess =
keys.length > 0 ? keys : Array.from(this.#modifiedKeys)
for (const key of keysToProcess) {
const versions = this.#inner.get(key)
const toCompact = Array.from(versions.keys()).filter(
(version) => !compactionFrontier.lessEqualVersion(version),
)
const toConsolidate = new Set<Version>()
for (const version of toCompact) {
const values = versions.get(version)
versions.delete(version)
const newVersion = version.advanceBy(compactionFrontier)
versions.update(newVersion, (existing) => {
chunkedArrayPush(existing, values)
return existing
})
toConsolidate.add(newVersion)
}
for (const version of toConsolidate) {
const newValues = consolidateValues(versions.get(version))
if (newValues.length > 0) {
versions.set(version, newValues)
} else {
this.#inner.delete(key)
}
}
this.#modifiedKeys.delete(key)
}
this.#compactionFrontier = compactionFrontier
}
keys(): K[] {
return Array.from(this.#inner.keys())
}
has(key: K): boolean {
return this.#inner.has(key)
}
}