UNPKG

@electric-sql/d2mini

Version:

D2Mini is a minimal implementation of Differential Dataflow for performing in-memory incremental view maintenance.

232 lines (195 loc) 5.8 kB
import { describe, test, expect } from 'vitest' import { D2 } from '../../src/d2.js' import { MultiSet } from '../../src/multiset.js' import { distinct } from '../../src/operators/distinct.js' import { output } from '../../src/operators/output.js' import { MessageTracker, assertResults } from '../test-utils.js' describe('Operators', () => { describe('Efficient distinct operation', () => { testDistinct() }) }) function testDistinct() { test('basic distinct operation', () => { const graph = new D2() const input = graph.newInput<[number, string]>() const messages: MultiSet<[number, string]>[] = [] input.pipe( distinct(), output((message) => { messages.push(message) }), ) graph.finalize() input.sendData( new MultiSet([ [[1, 'a'], 2], [[2, 'b'], 1], [[2, 'c'], 2], ]), ) graph.run() const data = messages.map((m) => m.getInner()) expect(data).toEqual([ [ [[1, 'a'], 1], [[2, 'b'], 1], [[2, 'c'], 1], ], ]) }) test('distinct by certain property', () => { const graph = new D2() const input = graph.newInput<[number, { name: string; country: string }]>() const messages: MultiSet<[number, { name: string; country: string }]>[] = [] input.pipe( distinct(([_, value]) => value.country), output((message) => { messages.push(message) }), ) graph.finalize() input.sendData( new MultiSet([ [[1, { name: 'Valter', country: 'Portugal' }], 1], [[2, { name: 'Sam', country: 'UK' }], 1], [[2, { name: 'Kevin', country: 'Belgium' }], 1], [[3, { name: 'Garry', country: 'UK' }], 1], [[4, { name: 'Kyle', country: 'USA' }], 1], ]), ) graph.run() const data = messages.map((m) => m.getInner())[0] const countries = data .map(([[_, value], multiplicity]) => [value.country, multiplicity]) .sort() expect(countries).toEqual( [ ['Belgium', 1], ['Portugal', 1], ['UK', 1], ['USA', 1], ].sort(), ) }) test('distinct with updates', () => { const graph = new D2() const input = graph.newInput<[number, string]>() const tracker = new MessageTracker<[number, string]>() input.pipe( distinct(), output((message) => { tracker.addMessage(message) }), ) graph.finalize() // Initial batch input.sendData( new MultiSet([ [[1, 'a'], 1], [[1, 'b'], 1], [[1, 'a'], 1], // Duplicate, should only result in 1 ]), ) graph.run() const initialResult = tracker.getResult() assertResults( 'distinct with updates - initial', initialResult, [ [1, 'a'], [1, 'b'], ], // Should have both distinct values 4, // Max expected messages ) tracker.reset() // Second batch - remove some, add new input.sendData( new MultiSet([ [[1, 'b'], -1], // Remove 'b' [[1, 'c'], 2], // Add 'c' (multiplicity should be capped at 1) [[1, 'a'], -1], // Remove 'a' ]), ) graph.run() const secondResult = tracker.getResult() assertResults( 'distinct with updates - second batch', secondResult, [[1, 'c']], // Should only have 'c' remaining 4, // Max expected messages ) tracker.reset() // Third batch - remove remaining input.sendData(new MultiSet([[[1, 'c'], -2]])) graph.run() const thirdResult = tracker.getResult() assertResults( 'distinct with updates - third batch', thirdResult, [], // Should have no remaining distinct values 2, // Max expected messages ) }) test('distinct with multiple batches of same key', () => { const graph = new D2() const input = graph.newInput<[string, number]>() const messages: MultiSet<[string, number]>[] = [] input.pipe( distinct(), output((message) => { messages.push(message) }), ) graph.finalize() input.sendData( new MultiSet([ [['key1', 1], 2], [['key1', 2], 3], [['key2', 1], 1], ]), ) graph.run() const data = messages.map((m) => m.getInner()) expect(data).toEqual([ [ [['key1', 1], 1], [['key1', 2], 1], [['key2', 1], 1], ], ]) }) test('distinct with multiple batches of same key that cancel out', () => { const graph = new D2() const input = graph.newInput<[string, number]>() const tracker = new MessageTracker<[string, number]>() input.pipe( distinct(), output((message) => { tracker.addMessage(message) }), ) graph.finalize() input.sendData( new MultiSet([ [['key1', 1], 2], // Add ['key1', 1] with multiplicity 2 -> should become 1 (distinct) [['key1', 2], 2], // Add ['key1', 2] with multiplicity 2 -> should become 1 (distinct) [['key1', 2], 1], // Add more ['key1', 2] with multiplicity 1 -> total 3, still 1 in distinct [['key2', 1], 1], // Add ['key2', 1] with multiplicity 1 -> should become 1 (distinct) [['key1', 2], -3], // Remove all ['key1', 2] (total was 3) -> should be removed from distinct [['key2', 1], 1], // Add more ['key2', 1] -> still 1 in distinct ]), ) graph.run() const result = tracker.getResult() assertResults( 'distinct with multiple batches that cancel out', result, [ ['key1', 1], // Should remain (multiplicity 2 -> 1 in distinct) ['key2', 1], // Should remain (multiplicity 2 -> 1 in distinct) ], 6, // Max expected messages (generous upper bound) ) }) }