UNPKG

y-mongodb-provider

Version:

MongoDB database adapter for Yjs

github.com/MaxNoetzold/y-mongodb-provider

MaxNoetzold/y-mongodb-provider

287 lines (264 loc) • 8.38 kB

JavaScript

import * as Y from 'yjs'; import * as binary from 'lib0/binary'; import * as encoding from 'lib0/encoding'; import * as decoding from 'lib0/decoding'; import { Buffer } from 'buffer'; export const PREFERRED_TRIM_SIZE = 400; const MAX_DOCUMENT_SIZE = 15000000; // ~15MB (plus space for metadata) /** * Remove all documents from db with Clock between $from and $to * * @param {import('./mongo-adapter.js').MongoAdapter} db * @param {string} docName * @param {number} from Greater than or equal * @param {number} to lower than (not equal) * @returns {Promise<import('mongodb').BulkWriteResult>} Contains status of the operation */ export const clearUpdatesRange = async (db, docName, from, to) => db.delete({ docName, clock: { $gte: from, $lt: to, }, }); /** * Create a unique key for a update message. * @param {string} docName * @param {number} [clock] must be unique * @return {{version: "v1"; docName: string; action: "update"; clock?: number; }} */ const createDocumentUpdateKey = (docName, clock) => { if (clock !== undefined) { return { version: 'v1', action: 'update', docName, clock, }; } else { return { version: 'v1', action: 'update', docName, }; } }; /** * We have a separate state vector key so we can iterate efficiently over all documents * @param {string} docName * @return {{docName: string; version: "v1_sv"}} */ export const createDocumentStateVectorKey = (docName) => ({ docName, version: 'v1_sv', }); /** * @param {string} docName * @param {string} metaKey * @return {{docName: string; version: "v1"; metaKey: string; }} */ export const createDocumentMetaKey = (docName, metaKey) => ({ version: 'v1', docName, metaKey: `meta_${metaKey}`, }); /** * @param {import('./mongo-adapter.js').MongoAdapter} db * @return {Promise<void>} */ export const flushDB = (db) => db.flush(); /** * * This function converts MongoDB updates to a buffer that can be processed by the application. * It handles both complete documents and large documents that have been split into smaller 'parts' due to MongoDB's size limit. * For split documents, it collects all the parts and merges them together. * It assumes that the parts of a split document are ordered and located exactly after the document with part number 1. * * @param {{ _id: import("mongodb").ObjectId; action: string; version: string; docName: string; clock: number; part?: number; value: import("mongodb").Binary; }[]} docs * @return {Uint8Array[]} */ const convertMongoUpdates = (docs) => { if (!Array.isArray(docs) || !docs.length) return []; /** @type {Uint8Array[]} */ const updates = []; for (let i = 0; i < docs.length; i++) { const doc = docs[i]; if (!doc.part) { updates.push(doc.value.buffer); } else if (doc.part === 1) { // merge the docs together that got split because of mongodb size limits const parts = [doc.value.buffer]; let j; let currentPartId = doc.part; for (j = i + 1; j < docs.length; j++) { const part = docs[j]; if (part.part && part.clock === doc.clock) { if (currentPartId !== part.part - 1) { throw new Error('Couldnt merge updates together because a part is missing!'); } parts.push(part.value.buffer); currentPartId = part.part; } else { break; } } updates.push(Buffer.concat(parts)); // set i to j - 1 because we already processed all parts i = j - 1; } } return updates; }; /** * Get all document updates for a specific document. * * @param {import('./mongo-adapter.js').MongoAdapter} db * @param {string} docName * @return {Promise<Uint8Array[]>} */ export const getMongoUpdates = async (db, docName) => { const docs = await db.find(createDocumentUpdateKey(docName)); // TODO: I dont know how to type this without actual typescript // @ts-ignore return convertMongoUpdates(docs); }; /** * @param {import('./mongo-adapter.js').MongoAdapter} db * @param {string} docName * @return {Promise<number>} Returns -1 if this document doesn't exist yet */ export const getCurrentUpdateClock = (db, docName) => db .findOne( { ...createDocumentUpdateKey(docName, 0), clock: { $gte: 0, $lt: binary.BITS32, }, }, { reverse: true }, ) .then((update) => { if (!update) { return -1; } else { return update.clock; } }); /** * @param {import('./mongo-adapter.js').MongoAdapter} db * @param {string} docName * @param {Uint8Array} sv state vector * @param {number} clock current clock of the document so we can determine * when this statevector was created */ const writeStateVector = async (db, docName, sv, clock) => { const encoder = encoding.createEncoder(); encoding.writeVarUint(encoder, clock); encoding.writeVarUint8Array(encoder, sv); await db.put(createDocumentStateVectorKey(docName), { value: encoding.toUint8Array(encoder), }); }; /** * @param {import('./mongo-adapter.js').MongoAdapter} db * @param {string} docName * @param {Uint8Array} update * @return {Promise<number>} Returns the clock of the stored update */ export const storeUpdate = async (db, docName, update) => { const clock = await getCurrentUpdateClock(db, docName); if (clock === -1) { // make sure that a state vector is always written, so we can search for available documents const ydoc = new Y.Doc(); Y.applyUpdate(ydoc, update); const sv = Y.encodeStateVector(ydoc); await writeStateVector(db, docName, sv, 0); } // mongodb has a maximum document size of 16MB; // if our buffer exceeds it, we store the update in multiple documents if (update.length <= MAX_DOCUMENT_SIZE) { await db.put(createDocumentUpdateKey(docName, clock + 1), { value: update, }); } else { const totalChunks = Math.ceil(update.length / MAX_DOCUMENT_SIZE); const putPromises = []; for (let i = 0; i < totalChunks; i++) { const start = i * MAX_DOCUMENT_SIZE; const end = Math.min(start + MAX_DOCUMENT_SIZE, update.length); const chunk = update.subarray(start, end); putPromises.push( db.put({ ...createDocumentUpdateKey(docName, clock + 1), part: i + 1 }, { value: chunk }), ); } await Promise.all(putPromises); } return clock + 1; }; /** * For now this is a helper method that creates a Y.Doc and then re-encodes a document update. * In the future this will be handled by Yjs without creating a Y.Doc (constant memory consumption). * * @param {Array<Uint8Array>} updates * @return {{update:Uint8Array, sv: Uint8Array}} */ export const mergeUpdates = (updates) => { const ydoc = new Y.Doc(); ydoc.transact(() => { for (let i = 0; i < updates.length; i++) { Y.applyUpdate(ydoc, updates[i]); } }); return { update: Y.encodeStateAsUpdate(ydoc), sv: Y.encodeStateVector(ydoc) }; }; /** * @param {import("mongodb").Binary} buf * @return {{ sv: Uint8Array, clock: number }} */ export const decodeMongodbStateVector = (buf) => { let decoder; if (Buffer.isBuffer(buf)) { decoder = decoding.createDecoder(buf); } else if (Buffer.isBuffer(buf?.buffer)) { decoder = decoding.createDecoder(buf.buffer); } else { throw new Error('No buffer provided at decodeMongodbStateVector()'); } const clock = decoding.readVarUint(decoder); const sv = decoding.readVarUint8Array(decoder); return { sv, clock }; }; /** * @param {import('./mongo-adapter.js').MongoAdapter} db * @param {string} docName */ export const readStateVector = async (db, docName) => { const doc = await db.findOne({ ...createDocumentStateVectorKey(docName) }); if (!doc?.value) { // no state vector created yet or no document exists return { sv: null, clock: -1 }; } return decodeMongodbStateVector(doc.value); }; /** * * @param {import('./mongo-adapter.js').MongoAdapter} db */ export const getAllSVDocs = async (db) => db.find({ version: 'v1_sv' }); /** * Merge all MongoDB documents of the same yjs document together. * @param {import('./mongo-adapter.js').MongoAdapter} db * @param {string} docName * @param {Uint8Array} stateAsUpdate * @param {Uint8Array} stateVector * @return {Promise<number>} returns the clock of the flushed doc */ export const flushDocument = async (db, docName, stateAsUpdate, stateVector) => { const clock = await storeUpdate(db, docName, stateAsUpdate); await writeStateVector(db, docName, stateVector, clock); await clearUpdatesRange(db, docName, 0, clock); return clock; };