@electric-sql/d2ts
Version:
D2TS is a TypeScript implementation of Differential Dataflow.
245 lines (222 loc) • 7.69 kB
text/typescript
import { StreamBuilder } from '../../d2.js'
import {
DataMessage,
MessageType,
IStreamBuilder,
KeyValue,
} from '../../types.js'
import { MultiSet } from '../../multiset.js'
import {
DifferenceStreamReader,
DifferenceStreamWriter,
UnaryOperator,
} from '../../graph.js'
import { Version, Antichain } from '../../order.js'
import { SQLiteDb, SQLiteStatement } from '../database.js'
import { SQLIndex } from '../version-index.js'
import { SQLiteContext } from '../context.js'
interface KeysTodoRow {
version: string
key: string
}
/**
* SQLite version of the ReduceOperator
*/
export class ReduceOperatorSQLite<K, V1, V2> extends UnaryOperator<
[K, V1 | V2]
> {
#index: SQLIndex<K, V1>
#indexOut: SQLIndex<K, V2>
#preparedStatements: {
insertKeyTodo: SQLiteStatement<[string, string]>
getKeysTodo: SQLiteStatement<[], KeysTodoRow>
deleteKeysTodo: SQLiteStatement<[string]>
createKeysTodoTable: SQLiteStatement
dropKeysTodoTable: SQLiteStatement
}
#f: (values: [V1, number][]) => [V2, number][]
constructor(
id: number,
inputA: DifferenceStreamReader<[K, V1]>,
output: DifferenceStreamWriter<[K, V2]>,
f: (values: [V1, number][]) => [V2, number][],
initialFrontier: Antichain,
db: SQLiteDb,
) {
super(id, inputA, output, initialFrontier)
this.#f = f
// Initialize indexes
this.#index = new SQLIndex<K, V1>(db, `reduce_index_${id}`)
this.#indexOut = new SQLIndex<K, V2>(db, `reduce_index_out_${id}`)
// Create tables
db.exec(`
CREATE TABLE IF NOT EXISTS reduce_keys_todo_${id} (
version TEXT NOT NULL,
key TEXT NOT NULL,
PRIMARY KEY (version, key)
)
`)
// Create indexes for better performance
db.exec(`
CREATE INDEX IF NOT EXISTS reduce_keys_todo_${id}_version_idx
ON reduce_keys_todo_${id}(version)
`)
// Prepare statements
this.#preparedStatements = {
createKeysTodoTable: db.prepare(`
CREATE TABLE IF NOT EXISTS reduce_keys_todo_${id} (
version TEXT NOT NULL,
key TEXT NOT NULL,
PRIMARY KEY (version, key)
)
`),
dropKeysTodoTable: db.prepare(`
DROP TABLE IF EXISTS reduce_keys_todo_${id}
`),
insertKeyTodo: db.prepare(`
INSERT OR IGNORE INTO reduce_keys_todo_${id} (version, key)
VALUES (?, ?)
`),
getKeysTodo: db.prepare(`
SELECT version, key FROM reduce_keys_todo_${id}
`),
deleteKeysTodo: db.prepare(`
DELETE FROM reduce_keys_todo_${id}
WHERE version = ?
`),
}
}
run(): void {
for (const message of this.inputMessages()) {
if (message.type === MessageType.DATA) {
const { version, collection } = message.data as DataMessage<[K, V1]>
for (const [item, multiplicity] of collection.getInner()) {
const [key, value] = item
this.#index.addValue(key, version, [value, multiplicity])
// Add key to todo list for this version
this.#preparedStatements.insertKeyTodo.run(
version.toJSON(),
JSON.stringify(key),
)
// Add key to all join versions
for (const v2 of this.#index.versions(key)) {
const joinVersion = version.join(v2)
this.#preparedStatements.insertKeyTodo.run(
joinVersion.toJSON(),
JSON.stringify(key),
)
}
}
} else if (message.type === MessageType.FRONTIER) {
const frontier = message.data as Antichain
if (!this.inputFrontier().lessEqual(frontier)) {
throw new Error('Invalid frontier update')
}
this.setInputFrontier(frontier)
}
}
// Find versions that are complete
const finishedVersionsRows = this.#preparedStatements.getKeysTodo
.all()
.map((row) => ({
version: Version.fromJSON(row.version),
key: JSON.parse(row.key) as K,
}))
// Group by version
const finishedVersionsMap = new Map<Version, K[]>()
for (const { version, key } of finishedVersionsRows) {
const keys = finishedVersionsMap.get(version) || []
keys.push(key)
finishedVersionsMap.set(version, keys)
}
const finishedVersions = Array.from(finishedVersionsMap.entries())
.filter(([version]) => !this.inputFrontier().lessEqualVersion(version))
.sort((a, b) => (a[0].lessEqual(b[0]) ? -1 : 1))
for (const [version, keys] of finishedVersions) {
const result: [[K, V2], number][] = []
for (const key of keys) {
const curr = this.#index.reconstructAt(key, version)
const currOut = this.#indexOut.reconstructAt(key, version)
const out = this.#f(curr)
// Calculate delta between current and previous output
const delta = new Map<string, number>()
const values = new Map<string, V2>()
for (const [value, multiplicity] of out) {
const valueKey = JSON.stringify(value)
values.set(valueKey, value)
delta.set(valueKey, (delta.get(valueKey) || 0) + multiplicity)
}
for (const [value, multiplicity] of currOut) {
const valueKey = JSON.stringify(value)
values.set(valueKey, value)
delta.set(valueKey, (delta.get(valueKey) || 0) - multiplicity)
}
// Add non-zero deltas to result
for (const [valueKey, multiplicity] of delta) {
const value = values.get(valueKey)!
if (multiplicity !== 0) {
result.push([[key, value], multiplicity])
this.#indexOut.addValue(key, version, [value, multiplicity])
}
}
}
if (result.length > 0) {
this.output.sendData(version, new MultiSet(result))
}
this.#preparedStatements.deleteKeysTodo.run(version.toJSON())
}
if (!this.outputFrontier.lessEqual(this.inputFrontier())) {
throw new Error('Invalid frontier state')
}
if (this.outputFrontier.lessThan(this.inputFrontier())) {
this.outputFrontier = this.inputFrontier()
this.output.sendFrontier(this.outputFrontier)
this.#index.compact(this.outputFrontier)
this.#indexOut.compact(this.outputFrontier)
}
}
destroy(): void {
this.#index.destroy()
this.#indexOut.destroy()
this.#preparedStatements.dropKeysTodoTable.run()
}
}
/**
* Reduces the elements in the stream by key
* Persists state to SQLite
*
* @param f - The reduction function
* @param db - Optional SQLite database (can be injected via context)
*/
export function reduce<
K extends T extends KeyValue<infer K, infer _V> ? K : never,
V1 extends T extends KeyValue<K, infer V> ? V : never,
R,
T,
>(f: (values: [V1, number][]) => [R, number][], db?: SQLiteDb) {
return (stream: IStreamBuilder<T>): IStreamBuilder<KeyValue<K, R>> => {
// Get database from context if not provided explicitly
const database = db || SQLiteContext.getDb()
if (!database) {
throw new Error(
'SQLite database is required for reduce operator. ' +
'Provide it as a parameter or use withSQLite() to inject it.',
)
}
const output = new StreamBuilder<KeyValue<K, R>>(
stream.graph,
new DifferenceStreamWriter<KeyValue<K, R>>(),
)
const operator = new ReduceOperatorSQLite<K, V1, R>(
stream.graph.getNextOperatorId(),
stream.connectReader() as DifferenceStreamReader<KeyValue<K, V1>>,
output.writer,
f,
stream.graph.frontier(),
database,
)
stream.graph.addOperator(operator)
stream.graph.addStream(output.connectReader())
return output
}
}