@electric-sql/d2mini
Version:
D2Mini is a minimal implementation of Differential Dataflow for performing in-memory incremental view maintenance.
727 lines (603 loc) • 25.1 kB
text/typescript
import { describe, it, expect, beforeAll } from 'vitest'
import { D2 } from '../../src/d2.js'
import { MultiSet } from '../../src/multiset.js'
import { topKWithFractionalIndex } from '../../src/operators/topKWithFractionalIndex.js'
import {
loadBTree,
topKWithFractionalIndexBTree,
} from '../../src/operators/topKWithFractionalIndexBTree.js'
import { output } from '../../src/operators/index.js'
import { MessageTracker, assertOnlyKeysAffected } from '../test-utils.js'
// Helper function to check if indices are in lexicographic order
function checkLexicographicOrder(results: any[]) {
// Extract values and their indices
const valuesWithIndices = results.map(([[_, [value, index]]]) => ({
value,
index,
}))
// Sort by value using the same comparator as in the test
const sortedByValue = [...valuesWithIndices].sort((a, b) =>
a.value.value < b.value.value ? -1 : 1,
)
// Check that indices are in the same order as the sorted values
for (let i = 0; i < sortedByValue.length - 1; i++) {
const currentIndex = sortedByValue[i].index
const nextIndex = sortedByValue[i + 1].index
// Indices should be in lexicographic order
if (!(currentIndex < nextIndex)) {
return false
}
}
return true
}
// Helper function to verify the expected order of elements
function verifyOrder(results: any[], expectedOrder: string[]) {
// Extract values in the order they appear in the results
const actualOrder = results.map(([[_, [value, __]]]) => value.value)
// Sort both arrays to ensure consistent comparison
const sortedActual = [...actualOrder].sort()
const sortedExpected = [...expectedOrder].sort()
// First check that we have the same elements
expect(sortedActual).toEqual(sortedExpected)
// Now check that the indices result in the correct order
const valueToIndex = new Map()
for (const [[_, [value, index]]] of results) {
valueToIndex.set(value.value, index)
}
// Sort the values by their indices
const sortedByIndex = [...valueToIndex.entries()]
.sort((a, b) => (a[1] < b[1] ? -1 : 1))
.map(([value]) => value)
// The order should match the expected order
expect(sortedByIndex).toEqual(expectedOrder)
}
beforeAll(async () => {
await loadBTree()
})
describe('Operators', () => {
describe.each([
['with array', { topK: topKWithFractionalIndex }],
['with B+ tree', { topK: topKWithFractionalIndexBTree }],
])('TopKWithFractionalIndex operator %s', (_, { topK }) => {
it('should assign fractional indices to sorted elements', () => {
const graph = new D2()
const input = graph.newInput<[null, { id: number; value: string }]>()
const tracker = new MessageTracker<
[null, [{ id: number; value: string }, string]]
>()
input.pipe(
topK((a, b) => a.value.localeCompare(b.value)),
output((message) => {
tracker.addMessage(message)
}),
)
graph.finalize()
// Initial data - a, b, c, d, e
input.sendData(
new MultiSet([
[[null, { id: 1, value: 'a' }], 1],
[[null, { id: 2, value: 'b' }], 1],
[[null, { id: 3, value: 'c' }], 1],
[[null, { id: 4, value: 'd' }], 1],
[[null, { id: 5, value: 'e' }], 1],
]),
)
graph.run()
// Initial result should have all elements with fractional indices
const initialResult = tracker.getResult()
expect(initialResult.sortedResults.length).toBe(5) // Should have all 5 elements
expect(initialResult.messageCount).toBeLessThanOrEqual(6) // Should be efficient
// Check that indices are in lexicographic order by examining raw messages
const initialMessages = initialResult.messages
expect(
checkLexicographicOrder(
initialMessages.map(([item, mult]) => [item, mult]),
),
).toBe(true)
tracker.reset()
// Now let's move 'c' to the beginning by changing its value
input.sendData(
new MultiSet([
[[null, { id: 3, value: 'a-' }], 1], // This should now be first
[[null, { id: 3, value: 'c' }], -1], // Remove the old value
]),
)
graph.run()
// Check the incremental changes
const updateResult = tracker.getResult()
// Should have reasonable incremental changes (not recomputing everything)
expect(updateResult.messageCount).toBeLessThanOrEqual(4) // Should be incremental
expect(updateResult.messageCount).toBeGreaterThan(0) // Should have some changes
// Check that only the affected key (null) produces messages
assertOnlyKeysAffected('topKFractional update', updateResult.messages, [
null,
])
// Check that the update messages maintain lexicographic order on their own
if (updateResult.messages.length > 0) {
const updateMessages = updateResult.messages.map(([item, mult]) => [
item,
mult,
])
expect(checkLexicographicOrder(updateMessages)).toBe(true)
}
})
it('should support duplicate ordering keys', () => {
const graph = new D2()
const input = graph.newInput<[null, { id: number; value: string }]>()
const tracker = new MessageTracker<
[null, [{ id: number; value: string }, string]]
>()
input.pipe(
topK((a, b) => a.value.localeCompare(b.value)),
output((message) => {
tracker.addMessage(message)
}),
)
graph.finalize()
// Initial data - a, b, c, d, e
input.sendData(
new MultiSet([
[[null, { id: 1, value: 'a' }], 1],
[[null, { id: 2, value: 'b' }], 1],
[[null, { id: 3, value: 'c' }], 1],
[[null, { id: 4, value: 'd' }], 1],
[[null, { id: 5, value: 'e' }], 1],
]),
)
graph.run()
// Initial result should have all elements with fractional indices
const initialResult = tracker.getResult()
expect(initialResult.sortedResults.length).toBe(5) // Should have all 5 elements
expect(
checkLexicographicOrder(
initialResult.messages.map(([item, mult]) => [item, mult]),
),
).toBe(true)
tracker.reset()
// Now let's add a new element with a value that is already in there
input.sendData(new MultiSet([[[null, { id: 6, value: 'c' }], 1]]))
graph.run()
// Check the incremental changes
const updateResult = tracker.getResult()
// Should have efficient incremental update
expect(updateResult.messageCount).toBeLessThanOrEqual(2) // Should be incremental (1 addition)
expect(updateResult.messageCount).toBeGreaterThan(0) // Should have changes
// Check that only the affected key (null) produces messages
assertOnlyKeysAffected(
'topKFractional duplicate keys',
updateResult.messages,
[null],
)
// Check that the update messages maintain lexicographic order on their own
if (updateResult.messages.length > 0) {
const updateMessages = updateResult.messages.map(([item, mult]) => [
item,
mult,
])
expect(checkLexicographicOrder(updateMessages)).toBe(true)
}
// The total state should have more elements after adding a duplicate
expect(updateResult.sortedResults.length).toBeGreaterThan(0) // Should have the new element
})
it('should ignore duplicate values', () => {
const graph = new D2()
const input = graph.newInput<[null, { id: number; value: string }]>()
const allMessages: any[] = []
input.pipe(
topK((a, b) => a.value.localeCompare(b.value)),
output((message) => {
allMessages.push(message)
}),
)
graph.finalize()
// Initial data - a, b, c, d, e
const entryForC = [[null, { id: 3, value: 'c' }], 1] as [
[null, { id: number; value: string }],
number,
]
input.sendData(
new MultiSet([
[[null, { id: 1, value: 'a' }], 1],
[[null, { id: 2, value: 'b' }], 1],
entryForC,
[[null, { id: 4, value: 'd' }], 1],
[[null, { id: 5, value: 'e' }], 1],
]),
)
graph.run()
// Initial result should have all elements with fractional indices
const initialResult = allMessages[0].getInner()
expect(initialResult.length).toBe(5)
// Now add entryForC again
input.sendData(new MultiSet([entryForC]))
graph.run()
// Check that no message was emitted
// since there were no changes to the topK
expect(allMessages.length).toBe(1)
})
it('should handle limit and offset correctly', () => {
const graph = new D2()
const input = graph.newInput<[null, { id: number; value: string }]>()
const tracker = new MessageTracker<
[null, [{ id: number; value: string }, string]]
>()
input.pipe(
topK((a, b) => a.value.localeCompare(b.value), {
limit: 3,
offset: 1,
}),
output((message) => {
tracker.addMessage(message)
}),
)
graph.finalize()
// Initial data - a, b, c, d, e
input.sendData(
new MultiSet([
[[null, { id: 1, value: 'a' }], 1],
[[null, { id: 2, value: 'b' }], 1],
[[null, { id: 3, value: 'c' }], 1],
[[null, { id: 4, value: 'd' }], 1],
[[null, { id: 5, value: 'e' }], 1],
]),
)
graph.run()
// Initial result should be b, c, d (offset 1, limit 3)
const initialResult = tracker.getResult()
expect(initialResult.sortedResults.length).toBe(3) // Should have 3 elements
expect(initialResult.messageCount).toBeLessThanOrEqual(6) // Should be efficient
// Check that we have the correct elements (b, c, d) when sorted by fractional index
const sortedByIndex = initialResult.sortedResults.sort((a, b) => {
const aIndex = a[1][1] // fractional index
const bIndex = b[1][1] // fractional index
return aIndex < bIndex ? -1 : aIndex > bIndex ? 1 : 0
})
const sortedValues = sortedByIndex.map(
([_key, [value, _index]]) => value.value,
)
expect(sortedValues).toEqual(['b', 'c', 'd']) // Should be in correct order with offset 1, limit 3
tracker.reset()
// Test a few incremental updates to verify limit/offset behavior
// Add element that should be included (between c and d)
input.sendData(
new MultiSet([
[[null, { id: 6, value: 'c+' }], 1], // This should be between c and d
]),
)
graph.run()
const updateResult = tracker.getResult()
// Should have efficient incremental update
expect(updateResult.messageCount).toBeLessThanOrEqual(4) // Should be incremental
expect(updateResult.messageCount).toBeGreaterThan(0) // Should have changes
// Check that final results still maintain correct limit/offset behavior
expect(updateResult.sortedResults.length).toBeLessThanOrEqual(3) // Should respect limit
// Check that only the affected key produces messages
assertOnlyKeysAffected('topK limit+offset', updateResult.messages, [null])
})
it('should handle elements moving positions correctly', () => {
const graph = new D2()
const input = graph.newInput<[null, { id: number; value: string }]>()
const tracker = new MessageTracker<
[null, [{ id: number; value: string }, string]]
>()
input.pipe(
topK((a, b) => a.value.localeCompare(b.value)),
output((message) => {
tracker.addMessage(message)
}),
)
graph.finalize()
// Initial data - a, b, c, d, e
input.sendData(
new MultiSet([
[[null, { id: 1, value: 'a' }], 1],
[[null, { id: 2, value: 'b' }], 1],
[[null, { id: 3, value: 'c' }], 1],
[[null, { id: 4, value: 'd' }], 1],
[[null, { id: 5, value: 'e' }], 1],
]),
)
graph.run()
const initialResult = tracker.getResult()
expect(initialResult.sortedResults.length).toBe(5) // Should have all 5 elements
expect(initialResult.messageCount).toBeLessThanOrEqual(6) // Should be efficient
// Check that results are in correct order initially
const initialSortedByIndex = initialResult.sortedResults.sort((a, b) => {
const aIndex = a[1][1] // fractional index
const bIndex = b[1][1] // fractional index
return aIndex < bIndex ? -1 : aIndex > bIndex ? 1 : 0
})
const initialSortedValues = initialSortedByIndex.map(
([_key, [value, _index]]) => value.value,
)
expect(initialSortedValues).toEqual(['a', 'b', 'c', 'd', 'e']) // Should be in lexicographic order
tracker.reset()
// Now let's swap 'b' and 'd' by changing their values
input.sendData(
new MultiSet([
[[null, { id: 2, value: 'd+' }], 1], // 'b' becomes 'd+'
[[null, { id: 2, value: 'b' }], -1], // Remove old 'b'
[[null, { id: 4, value: 'b+' }], 1], // 'd' becomes 'b+'
[[null, { id: 4, value: 'd' }], -1], // Remove old 'd'
]),
)
graph.run()
const updateResult = tracker.getResult()
// Should have efficient incremental update
expect(updateResult.messageCount).toBeLessThanOrEqual(6) // Should be incremental (4 changes max)
expect(updateResult.messageCount).toBeGreaterThan(0) // Should have changes
// Check that only the affected key produces messages
assertOnlyKeysAffected('topK move positions', updateResult.messages, [
null,
])
// For position swaps, we mainly care that the operation is incremental
// The exact final state depends on the implementation details of fractional indexing
expect(updateResult.sortedResults.length).toBeGreaterThan(0) // Should have some final results
})
it('should maintain lexicographic order through multiple updates', () => {
const graph = new D2()
const input = graph.newInput<[null, { id: number; value: string }]>()
const tracker = new MessageTracker<
[null, [{ id: number; value: string }, string]]
>()
input.pipe(
topK((a, b) => a.value.localeCompare(b.value)),
output((message) => {
tracker.addMessage(message)
}),
)
graph.finalize()
// Initial data - a, c, e, g, i
input.sendData(
new MultiSet([
[[null, { id: 1, value: 'a' }], 1],
[[null, { id: 3, value: 'c' }], 1],
[[null, { id: 5, value: 'e' }], 1],
[[null, { id: 7, value: 'g' }], 1],
[[null, { id: 9, value: 'i' }], 1],
]),
)
graph.run()
const initialResult = tracker.getResult()
expect(initialResult.sortedResults.length).toBe(5) // Should have all 5 elements
expect(initialResult.messageCount).toBeLessThanOrEqual(6) // Should be efficient
tracker.reset()
// Update 1: Insert elements between existing ones - b, d, f, h
input.sendData(
new MultiSet([
[[null, { id: 2, value: 'b' }], 1],
[[null, { id: 4, value: 'd' }], 1],
[[null, { id: 6, value: 'f' }], 1],
[[null, { id: 8, value: 'h' }], 1],
]),
)
graph.run()
const update1Result = tracker.getResult()
// Should have efficient incremental update
expect(update1Result.messageCount).toBeLessThanOrEqual(6) // Should be incremental
expect(update1Result.messageCount).toBeGreaterThan(0) // Should have changes
tracker.reset()
// Update 2: Move some elements around
input.sendData(
new MultiSet([
[[null, { id: 3, value: 'j' }], 1], // Move 'c' to after 'i'
[[null, { id: 3, value: 'c' }], -1], // Remove old 'c'
[[null, { id: 7, value: 'a-' }], 1], // Move 'g' to before 'a'
[[null, { id: 7, value: 'g' }], -1], // Remove old 'g'
]),
)
graph.run()
const update2Result = tracker.getResult()
// Should have efficient incremental update for value changes
expect(update2Result.messageCount).toBeLessThanOrEqual(6) // Should be incremental
expect(update2Result.messageCount).toBeGreaterThan(0) // Should have changes
// Check that only the affected key produces messages
assertOnlyKeysAffected(
'topK lexicographic update2',
update2Result.messages,
[null],
)
})
it('should maintain correct order when cycling through multiple changes', () => {
const graph = new D2()
const input = graph.newInput<[null, { id: number; value: string }]>()
const tracker = new MessageTracker<
[null, [{ id: number; value: string }, string]]
>()
input.pipe(
topK((a, b) => a.value.localeCompare(b.value)),
output((message) => {
tracker.addMessage(message)
}),
)
graph.finalize()
// Initial data with 5 items: a, b, c, d, e
input.sendData(
new MultiSet([
[[null, { id: 1, value: 'a' }], 1],
[[null, { id: 2, value: 'b' }], 1],
[[null, { id: 3, value: 'c' }], 1],
[[null, { id: 4, value: 'd' }], 1],
[[null, { id: 5, value: 'e' }], 1],
]),
)
graph.run()
const initialResult = tracker.getResult()
expect(initialResult.sortedResults.length).toBe(5) // Should have all 5 elements
expect(initialResult.messageCount).toBeLessThanOrEqual(6) // Should be efficient
// Check that results are in correct initial order
const initialSortedByIndex = initialResult.sortedResults.sort((a, b) => {
const aIndex = a[1][1] // fractional index
const bIndex = b[1][1] // fractional index
return aIndex < bIndex ? -1 : aIndex > bIndex ? 1 : 0
})
const initialSortedValues = initialSortedByIndex.map(
([_key, [value, _index]]) => value.value,
)
expect(initialSortedValues).toEqual(['a', 'b', 'c', 'd', 'e']) // Should be in lexicographic order
tracker.reset()
// Cycle 1: Move 'a' to position after 'b' by changing it to 'bb'
input.sendData(
new MultiSet([
[[null, { id: 1, value: 'bb' }], 1], // Move 'a' to after 'b'
[[null, { id: 1, value: 'a' }], -1], // Remove old 'a'
]),
)
graph.run()
const cycle1Result = tracker.getResult()
// Should have efficient incremental update
expect(cycle1Result.messageCount).toBeLessThanOrEqual(4) // Should be incremental
expect(cycle1Result.messageCount).toBeGreaterThan(0) // Should have changes
tracker.reset()
// Cycle 2: Move 'bb' to position after 'd' by changing it to 'dd'
input.sendData(
new MultiSet([
[[null, { id: 1, value: 'dd' }], 1], // Move to after 'd'
[[null, { id: 1, value: 'bb' }], -1], // Remove old 'bb'
]),
)
graph.run()
const cycle2Result = tracker.getResult()
// Should have efficient incremental update for the repositioning
expect(cycle2Result.messageCount).toBeLessThanOrEqual(4) // Should be incremental
expect(cycle2Result.messageCount).toBeGreaterThan(0) // Should have changes
// Check that only the affected key produces messages
assertOnlyKeysAffected('topK cycling update2', cycle2Result.messages, [
null,
])
// The key point is that the fractional indexing system can handle
// multiple repositioning operations efficiently
expect(cycle2Result.sortedResults.length).toBeGreaterThan(0) // Should have final results
})
it('should handle insertion at the start of the sorted collection', () => {
const graph = new D2()
const input = graph.newInput<[null, { id: number; value: string }]>()
const allMessages: any[] = []
input.pipe(
topK((a, b) => a.value.localeCompare(b.value)),
output((message) => {
allMessages.push(message)
}),
)
graph.finalize()
// Initial data - b, c, d, e
input.sendData(
new MultiSet([
[[null, { id: 2, value: 'b' }], 1],
[[null, { id: 3, value: 'c' }], 1],
[[null, { id: 4, value: 'd' }], 1],
[[null, { id: 5, value: 'e' }], 1],
]),
)
graph.run()
// Initial result should have all elements with fractional indices
const initialResult = allMessages[0].getInner()
expect(initialResult.length).toBe(4)
// Check that indices are in lexicographic order
expect(checkLexicographicOrder(initialResult)).toBe(true)
// Keep track of the current state
let currentState = new Map()
for (const [[_, [value, index]]] of initialResult) {
currentState.set(JSON.stringify(value), [value, index])
}
// Update: Insert element at the start - 'a'
input.sendData(
new MultiSet([
[[null, { id: 1, value: 'a' }], 1], // This should be inserted at the start
]),
)
graph.run()
// Check the changes
const changes = allMessages[1].getInner()
// We should only emit as many changes as we received (1 addition)
expect(changes.length).toBe(1)
// Apply the changes to our current state
for (const [[_, [value, index]], multiplicity] of changes) {
if (multiplicity < 0) {
// Remove
currentState.delete(JSON.stringify(value))
} else {
// Add
currentState.set(JSON.stringify(value), [value, index])
}
}
// Convert to array for lexicographic order check
let currentStateArray = Array.from(currentState.values()).map(
([value, index]) => [[null, [value, index]], 1],
)
expect(checkLexicographicOrder(currentStateArray)).toBe(true)
// Verify the order of elements
const expectedOrder = ['a', 'b', 'c', 'd', 'e']
verifyOrder(currentStateArray, expectedOrder)
// Check that the new element 'a' has an index that is lexicographically before 'b'
const aValue = { id: 1, value: 'a' }
const bValue = { id: 2, value: 'b' }
const aIndex = currentState.get(JSON.stringify(aValue))[1]
const bIndex = currentState.get(JSON.stringify(bValue))[1]
// Directly check that 'a' comes before 'b' lexicographically
expect(aIndex < bIndex).toBe(true)
})
it('should handle multiple insertion at the start of the sorted collection', () => {
const graph = new D2()
const input = graph.newInput<[null, { id: number; value: string }]>()
const allMessages: any[] = []
input.pipe(
topK((a, b) => a.value.localeCompare(b.value)),
output((message) => {
allMessages.push(message)
}),
)
graph.finalize()
// Initial data - b, c, d, e
input.sendData(
new MultiSet([
[[null, { id: 3, value: 'c' }], 1],
[[null, { id: 4, value: 'd' }], 1],
[[null, { id: 5, value: 'e' }], 1],
[[null, { id: 6, value: 'f' }], 1],
]),
)
graph.run()
// Initial result should have all elements with fractional indices
const initialResult = allMessages[0].getInner()
expect(initialResult.length).toBe(4)
// Check that indices are in lexicographic order
expect(checkLexicographicOrder(initialResult)).toBe(true)
// Keep track of the current state
let currentState = new Map()
for (const [[_, [value, index]]] of initialResult) {
currentState.set(JSON.stringify(value), [value, index])
}
// Update: Insert element at the start - 'a'
input.sendData(
new MultiSet([
[[null, { id: 1, value: 'a' }], 1], // This should be inserted at the start
[[null, { id: 2, value: 'b' }], 1], // This should be inserted at the start
]),
)
graph.run()
// Check the changes
const changes = allMessages[1].getInner()
// We should only emit as many changes as we received (1 addition)
expect(changes.length).toBe(2)
// Apply the changes to our current state
for (const [[_, [value, index]], multiplicity] of changes) {
if (multiplicity < 0) {
// Remove
currentState.delete(JSON.stringify(value))
} else {
// Add
currentState.set(JSON.stringify(value), [value, index])
}
}
// Convert to array for lexicographic order check
let currentStateArray = Array.from(currentState.values()).map(
([value, index]) => [[null, [value, index]], 1],
)
expect(checkLexicographicOrder(currentStateArray)).toBe(true)
// Verify the order of elements
const expectedOrder = ['a', 'b', 'c', 'd', 'e', 'f']
verifyOrder(currentStateArray, expectedOrder)
})
})
})