@electric-sql/d2ts
Version:
D2TS is a TypeScript implementation of Differential Dataflow.
350 lines (331 loc) • 11.6 kB
text/typescript
import { D2, RootStreamBuilder } from '../d2.js'
import { MultiSetArray } from '../multiset.js'
import { type Version, type Antichain } from '../order.js'
import {
IStreamBuilder,
Message,
DataMessage,
MessageType,
KeyValue,
} from '../types.js'
import {
DifferenceStreamReader,
DifferenceStreamWriter,
UnaryOperator,
} from '../graph.js'
import { StreamBuilder } from '../d2.js'
import {
type Row,
type ShapeStreamInterface,
type ChangeMessage,
type Operation,
isChangeMessage,
isControlMessage,
} from '@electric-sql/client'
/*
Electric uses Postgres LSNs to track progress, each message is annotated with an LSN.
We need to keep track of these and use them to send as the version for each message to
the D2 input stream.
D2 also requires a a frontier message for be sent, this is the lower bound for all
future messages.
To do this we:
- Keep track of the last LSN we've seen
- Send the LSN as the version for each message
- When we receive an `up-to-date` message, we send the last LSN+1 as the frontier. The
addition of 1 is to account for the fact that the last LSN is the version of the last
message, and we need to send the version of the next message as the frontier.
*/
export interface ElectricStreamToD2InputOptions<T extends Row<unknown> = Row> {
/** D2 Graph to send messages to */
graph: D2
/** The Electric ShapeStream to consume */
stream: ShapeStreamInterface<T>
/** The D2 input stream to send messages to */
input: RootStreamBuilder<[key: string, T]>
/** Optional function to convert LSN to version number/Version */
lsnToVersion?: (lsn: number) => number | Version
/** Optional function to convert LSN to frontier number/Antichain */
lsnToFrontier?: (lsn: number) => number | Antichain
/** Initial LSN */
initialLsn?: number
/** When to run the graph */
runOn?: 'up-to-date' | 'lsn-advance' | false
/** Whether to log debug messages */
debug?: boolean | typeof console.log
}
/**
* Connects an Electric ShapeStream to a D2 input stream
* IMPORTANT: Requires the ShapeStream to be configured with `replica: 'full'`
* @param options Configuration options
* @param options.stream The Electric ShapeStream to consume
* @param options.input The D2 input stream to send messages to
* @param options.lsnToVersion Optional function to convert LSN to version number/Version
* @param options.lsnToFrontier Optional function to convert LSN to frontier number/Antichain
* @returns The input stream for chaining
*
* @example
* ```ts
* // Create D2 graph
* const graph = new D2({ initialFrontier: 0 })
*
* // Create D2 input
* const input = graph.newInput<any>()
*
* // Configure the pipeline
* input
* .pipe(
* map(([key, data]) => data.value),
* filter(value => value > 10)
* )
*
* // Finalize graph
* graph.finalize()
*
* // Create Electric stream (example)
* const electricStream = new ShapeStream({
* url: 'http://localhost:3000/v1/shape',
* params: {
* table: 'items',
* replica: 'full',
* }
* })
*
* // Connect Electric stream to D2 input
* electricStreamToD2Input({
* stream: electricStream,
* input,
* })
* ```
*/
export function electricStreamToD2Input<T extends Row<unknown> = Row>({
graph,
stream,
input,
lsnToVersion = (lsn: number) => lsn,
lsnToFrontier = (lsn: number) => lsn,
initialLsn = 0,
runOn = 'up-to-date',
debug = false,
}: ElectricStreamToD2InputOptions<T>): RootStreamBuilder<[key: string, T]> {
let lastLsn: number = initialLsn
let changes: MultiSetArray<[key: string, T]> = []
const sendChanges = (lsn: number) => {
const version = lsnToVersion(lsn)
log?.(`sending ${changes.length} changes at version ${version}`)
if (changes.length > 0) {
input.sendData(version, [...changes])
}
changes = []
}
const sendFrontier = (lsn: number) => {
const frontier = lsnToFrontier(lsn + 1) // +1 to account for the fact that the last LSN is the version of the last message
log?.(`sending frontier ${frontier}`)
input.sendFrontier(frontier)
}
const log =
typeof debug === 'function'
? debug
: debug === true
? // eslint-disable-next-line no-console
console.log
: undefined
log?.('subscribing to stream')
stream.subscribe((messages) => {
log?.(`received ${messages.length} messages`)
for (const message of messages) {
if (isControlMessage(message)) {
log?.(`- control message: ${message.headers.control}`)
// Handle control message
if (message.headers.control === 'up-to-date') {
log?.(`up-to-date ${JSON.stringify(message, null, 2)}`)
if (changes.length > 0) {
sendChanges(lastLsn)
}
if (typeof message.headers.global_last_seen_lsn !== `number`) {
throw new Error(`global_last_seen_lsn is not a number`)
}
const lsn = message.headers.global_last_seen_lsn
sendFrontier(lsn)
if (runOn === 'up-to-date' || runOn === 'lsn-advance') {
log?.('running graph on up-to-date')
graph.run()
}
} else if (message.headers.control === 'must-refetch') {
throw new Error(
'The server sent a "must-refetch" request, this is incompatible with a D2 pipeline and unresolvable. To handle this you will have to remove all state and start the pipeline again.',
)
}
} else if (isChangeMessage(message)) {
log?.(`- change message: ${message.headers.operation}`)
// Handle change message
if (
message.headers.lsn !== undefined &&
typeof message.headers.lsn !== `number`
) {
throw new Error(`lsn is not a number`)
}
const lsn = message.headers.lsn ?? initialLsn // The LSN is not present on the initial snapshot
const last: boolean =
(message.headers.last as boolean | undefined) ?? false
switch (message.headers.operation) {
case 'insert':
changes.push([[message.key, message.value], 1])
break
case 'update': {
// An update in D2 is a delete followed by an insert.
// `old_value` only holds the old values *that have changed*
// so we need to merge the old and new value to get a complete row
// that represents the row as it was before the update.
const oldValue = {
...message.value,
...(message.old_value ?? {}),
}
changes.push([[message.key, oldValue], -1])
changes.push([[message.key, message.value], 1])
break
}
case 'delete':
changes.push([[message.key, message.value], -1])
break
}
if (last) {
sendChanges(lsn)
sendFrontier(lsn)
if (runOn === 'lsn-advance') {
log?.('running graph on lsn-advance')
graph.run()
}
}
if (lsn > lastLsn) {
lastLsn = lsn
}
}
}
})
return input
}
/**
* Operator that outputs the messages from the stream in ElectricSQL format
*
* TODO: Have two modes `replica=default` and `replica=full` to match the two modes
* of core Electric.
*/
export class OutputElectricMessagesOperator<K, V> extends UnaryOperator<
[K, V]
> {
#fn: (data: ChangeMessage<Row<V>>[]) => void
constructor(
id: number,
inputA: DifferenceStreamReader<[K, V]>,
output: DifferenceStreamWriter<[K, V]>,
fn: (data: ChangeMessage<Row<V>>[]) => void,
initialFrontier: Antichain,
) {
super(id, inputA, output, initialFrontier)
this.#fn = fn
}
transformMessages(messages: Message<[K, V]>[]): ChangeMessage<Row<V>>[] {
const output: ChangeMessage<Row<V>>[] = []
let lastLSN = -Infinity
for (const message of messages) {
if (message.type === MessageType.DATA) {
const { version, collection } = message.data as DataMessage<[K, V]>
// We use the first element of the D2version as the LSN as its what we used
// in the input
const lsn = version.getInner()[0]
if (lsn < lastLSN) {
throw new Error(
`Invalid LSN ${lsn} less than lastLSN ${lastLSN}, you must consolidate your stream before passing it to the OutputElectricMessagesOperator`,
)
}
lastLSN = lsn
const changesByKey = new Map<
K,
{ deletes: number; inserts: number; value: V }
>()
for (const [[key, value], multiplicity] of collection.getInner()) {
let changes = changesByKey.get(key)
if (!changes) {
changes = { deletes: 0, inserts: 0, value: value }
changesByKey.set(key, changes)
}
if (multiplicity < 0) {
changes.deletes += Math.abs(multiplicity)
} else if (multiplicity > 0) {
changes.inserts += multiplicity
changes.value = value
}
}
const newMessages = Array.from(changesByKey.entries()).map(
([key, { deletes, inserts, value }]) => {
const operation: Operation =
deletes > inserts
? 'delete'
: inserts > deletes
? 'insert'
: 'update'
return {
key: (key as any).toString(),
value: value as Row<V>,
headers: { lsn, operation },
}
},
)
output.push(...newMessages)
}
}
return output
}
run(): void {
const messages = this.inputMessages()
const outputMessages = this.transformMessages(messages)
if (outputMessages.length > 0) {
this.#fn(outputMessages)
}
for (const message of messages) {
if (message.type === MessageType.DATA) {
const { version, collection } = message.data as DataMessage<[K, V]>
this.output.sendData(version, collection)
} else if (message.type === MessageType.FRONTIER) {
const frontier = message.data as Antichain
if (!this.inputFrontier().lessEqual(frontier)) {
throw new Error('Invalid frontier update')
}
this.setInputFrontier(frontier)
if (!this.outputFrontier.lessEqual(this.inputFrontier())) {
throw new Error('Invalid frontier state')
}
if (this.outputFrontier.lessThan(this.inputFrontier())) {
this.outputFrontier = this.inputFrontier()
this.output.sendFrontier(this.outputFrontier)
}
}
}
}
}
/**
* Outputs the messages from the stream in ElectricSQL format
* @param fn - The function to call with a batch of messages
*/
export function outputElectricMessages<
K extends T extends KeyValue<infer K extends string, infer _V> ? K : never,
V extends T extends KeyValue<K, infer V> ? V : never,
T,
>(fn: (data: ChangeMessage<Row<V>>[]) => void) {
return (stream: IStreamBuilder<T>): IStreamBuilder<KeyValue<K, V>> => {
const output = new StreamBuilder<KeyValue<K, V>>(
stream.graph,
new DifferenceStreamWriter<KeyValue<K, V>>(),
)
const operator = new OutputElectricMessagesOperator<K, V>(
stream.graph.getNextOperatorId(),
stream.connectReader() as DifferenceStreamReader<KeyValue<K, V>>,
output.writer,
fn,
stream.graph.frontier(),
)
stream.graph.addOperator(operator)
stream.graph.addStream(output.connectReader())
return output
}
}