y-mongodb-provider
Version:
MongoDB database adapter for Yjs
780 lines (710 loc) • 24.4 kB
JavaScript
'use strict';
var Y = require('yjs');
var binary = require('lib0/dist/binary.cjs');
var promise = require('lib0/dist/promise.cjs');
var mongodb = require('mongodb');
var encoding = require('lib0/dist/encoding.cjs');
var decoding = require('lib0/dist/decoding.cjs');
var buffer = require('buffer');
function _interopNamespaceDefault(e) {
var n = Object.create(null);
if (e) {
Object.keys(e).forEach(function (k) {
if (k !== 'default') {
var d = Object.getOwnPropertyDescriptor(e, k);
Object.defineProperty(n, k, d.get ? d : {
enumerable: true,
get: function () { return e[k]; }
});
}
});
}
n.default = e;
return Object.freeze(n);
}
var Y__namespace = /*#__PURE__*/_interopNamespaceDefault(Y);
var binary__namespace = /*#__PURE__*/_interopNamespaceDefault(binary);
var promise__namespace = /*#__PURE__*/_interopNamespaceDefault(promise);
var encoding__namespace = /*#__PURE__*/_interopNamespaceDefault(encoding);
var decoding__namespace = /*#__PURE__*/_interopNamespaceDefault(decoding);
/**
* Parse a MongoDB connection string and return the database name.
*
* @param {string} connectionString
* @returns {string}
*/
function getMongoDbDatabaseName(connectionString) {
const url = new URL(connectionString);
const database = url.pathname.slice(1);
return database;
}
class MongoAdapter {
/**
* Create a MongoAdapter instance.
* @param {string|{client: MongoClient, db: import('mongodb').Db}} dbConnection A MongoDB connection string or an object containing a MongoClient instance (`client`) and a database instance (`db`).
* @param {object} opts
* @param {string} opts.collection Name of the collection where all documents are stored.
* @param {boolean} opts.multipleCollections When set to true, each document gets an own
* collection (instead of all documents stored in the same one).
* When set to true, the option $collection gets ignored.
*/
constructor(dbConnection, { collection, multipleCollections }) {
this.collection = collection;
this.multipleCollections = multipleCollections;
if (typeof dbConnection === 'string') {
// Connection string logic
const databaseName = getMongoDbDatabaseName(dbConnection);
this.client = new mongodb.MongoClient(dbConnection);
this.db = this.client.db(databaseName);
} else if (typeof dbConnection === 'object' && dbConnection.client && dbConnection.db) {
// Connection object logic
this.client = dbConnection.client;
this.db = dbConnection.db;
} else {
throw new Error(
'Invalid dbConnection. Must be a connection string or an object with client and db.',
);
}
/*
NOTE: client.connect() is optional since v4.7
"However, MongoClient.connect can still be called manually and remains useful for
learning about misconfiguration (auth, server not started, connection string correctness)
early in your application's startup."
I will not use it for now, but may change that in the future.
*/
}
/**
* Get the MongoDB collection name for any docName
* @param {import('mongodb').Filter<import('mongodb').Document>} query
* @returns {string} collectionName
*/
_getCollectionName({ docName }) {
if (this.multipleCollections) {
return docName;
} else {
return this.collection;
}
}
/**
*
* @param {import('mongodb').Filter<import('mongodb').Document>} query
* @param {{limit?: number; reverse?: boolean;}} [options]
* @returns {Promise<import('mongodb').WithId<import('mongodb').Document>[]>}
*/
find(query, options) {
const { limit = 0, reverse = false } = options || {};
/** @type {{ clock: 1 | -1, part: 1 | -1 }} */
const sortQuery = reverse ? { clock: -1, part: 1 } : { clock: 1, part: 1 };
const collection = this.db.collection(this._getCollectionName(query));
return collection.find(query, { limit, sort: sortQuery }).toArray();
}
/**
* Apply a $query and get one document from MongoDB.
* @param {import('mongodb').Filter<import('mongodb').Document>} query
* @param {{limit?: number; reverse?: boolean;}} [options]
* @returns {Promise<import('mongodb').WithId<import('mongodb').Document> | null>}
*/
findOne(query, options) {
return this.find(query, options).then((docs) => docs[0] || null);
}
/**
* Store one document in MongoDB.
* @param {import('mongodb').Filter<import('mongodb').Document>} query
* @param {import('mongodb').UpdateFilter<import('mongodb').Document>} values
* @returns {Promise<import('mongodb').WithId<import('mongodb').Document> | null>} Stored document
*/
async put(query, values) {
if (!query.docName || !query.version || !values.value) {
throw new Error('Document and version must be provided');
}
const collection = this.db.collection(this._getCollectionName(query));
await collection.updateOne(query, { $set: values }, { upsert: true });
return this.findOne(query);
}
/**
* Removes all documents that fit the $query
* @param {import('mongodb').Filter<import('mongodb').Document>} query
* @returns {Promise<import('mongodb').BulkWriteResult>} Contains status of the operation
*/
delete(query) {
const collection = this.db.collection(this._getCollectionName(query));
/*
Note from mongodb v4.7 release notes:
"It's a known limitation that explicit sessions (client.startSession) and
initializeOrderedBulkOp, initializeUnorderedBulkOp cannot be used until
MongoClient.connect is first called.
Look forward to a future patch release that will correct these inconsistencies."
I dont know yet if this is a problem for me here.
*/
const bulk = collection.initializeOrderedBulkOp();
bulk.find(query).delete();
return bulk.execute();
}
/**
* Close connection to MongoDB instance.
*/
async close() {
await this.client.close();
}
/**
* Get all collection names stored on the MongoDB instance.
* @returns {Promise<string[]>}
*/
async getCollectionNames() {
const collectionInfos = await this.db.listCollections().toArray();
return collectionInfos.map((c) => c.name);
}
/**
* Delete database
*/
async flush() {
await this.db.dropDatabase();
await this.client.close();
}
/**
* Delete collection
* @param {string} collectionName
*/
dropCollection(collectionName) {
return this.db.collection(collectionName).drop();
}
}
const PREFERRED_TRIM_SIZE = 400;
const MAX_DOCUMENT_SIZE = 15000000; // ~15MB (plus space for metadata)
/**
* Remove all documents from db with Clock between $from and $to
*
* @param {import('./mongo-adapter.js').MongoAdapter} db
* @param {string} docName
* @param {number} from Greater than or equal
* @param {number} to lower than (not equal)
* @returns {Promise<import('mongodb').BulkWriteResult>} Contains status of the operation
*/
const clearUpdatesRange = async (db, docName, from, to) =>
db.delete({
docName,
clock: {
$gte: from,
$lt: to,
},
});
/**
* Create a unique key for a update message.
* @param {string} docName
* @param {number} [clock] must be unique
* @return {{version: "v1"; docName: string; action: "update"; clock?: number; }}
*/
const createDocumentUpdateKey = (docName, clock) => {
if (clock !== undefined) {
return {
version: 'v1',
action: 'update',
docName,
clock,
};
} else {
return {
version: 'v1',
action: 'update',
docName,
};
}
};
/**
* We have a separate state vector key so we can iterate efficiently over all documents
* @param {string} docName
* @return {{docName: string; version: "v1_sv"}}
*/
const createDocumentStateVectorKey = (docName) => ({
docName,
version: 'v1_sv',
});
/**
* @param {string} docName
* @param {string} metaKey
* @return {{docName: string; version: "v1"; metaKey: string; }}
*/
const createDocumentMetaKey = (docName, metaKey) => ({
version: 'v1',
docName,
metaKey: `meta_${metaKey}`,
});
/**
* @param {import('./mongo-adapter.js').MongoAdapter} db
* @return {Promise<void>}
*/
const flushDB = (db) => db.flush();
/**
*
* This function converts MongoDB updates to a buffer that can be processed by the application.
* It handles both complete documents and large documents that have been split into smaller 'parts' due to MongoDB's size limit.
* For split documents, it collects all the parts and merges them together.
* It assumes that the parts of a split document are ordered and located exactly after the document with part number 1.
*
* @param {{ _id: import("mongodb").ObjectId; action: string; version: string; docName: string; clock: number; part?: number; value: import("mongodb").Binary; }[]} docs
* @return {Uint8Array[]}
*/
const convertMongoUpdates = (docs) => {
if (!Array.isArray(docs) || !docs.length) return [];
/** @type {Uint8Array[]} */
const updates = [];
for (let i = 0; i < docs.length; i++) {
const doc = docs[i];
if (!doc.part) {
updates.push(doc.value.buffer);
} else if (doc.part === 1) {
// merge the docs together that got split because of mongodb size limits
const parts = [doc.value.buffer];
let j;
let currentPartId = doc.part;
for (j = i + 1; j < docs.length; j++) {
const part = docs[j];
if (part.part && part.clock === doc.clock) {
if (currentPartId !== part.part - 1) {
throw new Error('Couldnt merge updates together because a part is missing!');
}
parts.push(part.value.buffer);
currentPartId = part.part;
} else {
break;
}
}
updates.push(buffer.Buffer.concat(parts));
// set i to j - 1 because we already processed all parts
i = j - 1;
}
}
return updates;
};
/**
* Get all document updates for a specific document.
*
* @param {import('./mongo-adapter.js').MongoAdapter} db
* @param {string} docName
* @return {Promise<Uint8Array[]>}
*/
const getMongoUpdates = async (db, docName) => {
const docs = await db.find(createDocumentUpdateKey(docName));
// TODO: I dont know how to type this without actual typescript
// @ts-ignore
return convertMongoUpdates(docs);
};
/**
* @param {import('./mongo-adapter.js').MongoAdapter} db
* @param {string} docName
* @return {Promise<number>} Returns -1 if this document doesn't exist yet
*/
const getCurrentUpdateClock = (db, docName) =>
db
.findOne(
{
...createDocumentUpdateKey(docName, 0),
clock: {
$gte: 0,
$lt: binary__namespace.BITS32,
},
},
{ reverse: true },
)
.then((update) => {
if (!update) {
return -1;
} else {
return update.clock;
}
});
/**
* @param {import('./mongo-adapter.js').MongoAdapter} db
* @param {string} docName
* @param {Uint8Array} sv state vector
* @param {number} clock current clock of the document so we can determine
* when this statevector was created
*/
const writeStateVector = async (db, docName, sv, clock) => {
const encoder = encoding__namespace.createEncoder();
encoding__namespace.writeVarUint(encoder, clock);
encoding__namespace.writeVarUint8Array(encoder, sv);
await db.put(createDocumentStateVectorKey(docName), {
value: encoding__namespace.toUint8Array(encoder),
});
};
/**
* @param {import('./mongo-adapter.js').MongoAdapter} db
* @param {string} docName
* @param {Uint8Array} update
* @return {Promise<number>} Returns the clock of the stored update
*/
const storeUpdate = async (db, docName, update) => {
const clock = await getCurrentUpdateClock(db, docName);
if (clock === -1) {
// make sure that a state vector is always written, so we can search for available documents
const ydoc = new Y__namespace.Doc();
Y__namespace.applyUpdate(ydoc, update);
const sv = Y__namespace.encodeStateVector(ydoc);
await writeStateVector(db, docName, sv, 0);
}
// mongodb has a maximum document size of 16MB;
// if our buffer exceeds it, we store the update in multiple documents
if (update.length <= MAX_DOCUMENT_SIZE) {
await db.put(createDocumentUpdateKey(docName, clock + 1), {
value: update,
});
} else {
const totalChunks = Math.ceil(update.length / MAX_DOCUMENT_SIZE);
const putPromises = [];
for (let i = 0; i < totalChunks; i++) {
const start = i * MAX_DOCUMENT_SIZE;
const end = Math.min(start + MAX_DOCUMENT_SIZE, update.length);
const chunk = update.subarray(start, end);
putPromises.push(
db.put({ ...createDocumentUpdateKey(docName, clock + 1), part: i + 1 }, { value: chunk }),
);
}
await Promise.all(putPromises);
}
return clock + 1;
};
/**
* For now this is a helper method that creates a Y.Doc and then re-encodes a document update.
* In the future this will be handled by Yjs without creating a Y.Doc (constant memory consumption).
*
* @param {Array<Uint8Array>} updates
* @return {{update:Uint8Array, sv: Uint8Array}}
*/
const mergeUpdates = (updates) => {
const ydoc = new Y__namespace.Doc();
ydoc.transact(() => {
for (let i = 0; i < updates.length; i++) {
Y__namespace.applyUpdate(ydoc, updates[i]);
}
});
return { update: Y__namespace.encodeStateAsUpdate(ydoc), sv: Y__namespace.encodeStateVector(ydoc) };
};
/**
* @param {import("mongodb").Binary} buf
* @return {{ sv: Uint8Array, clock: number }}
*/
const decodeMongodbStateVector = (buf) => {
let decoder;
if (buffer.Buffer.isBuffer(buf)) {
decoder = decoding__namespace.createDecoder(buf);
} else if (buffer.Buffer.isBuffer(buf?.buffer)) {
decoder = decoding__namespace.createDecoder(buf.buffer);
} else {
throw new Error('No buffer provided at decodeMongodbStateVector()');
}
const clock = decoding__namespace.readVarUint(decoder);
const sv = decoding__namespace.readVarUint8Array(decoder);
return { sv, clock };
};
/**
* @param {import('./mongo-adapter.js').MongoAdapter} db
* @param {string} docName
*/
const readStateVector = async (db, docName) => {
const doc = await db.findOne({ ...createDocumentStateVectorKey(docName) });
if (!doc?.value) {
// no state vector created yet or no document exists
return { sv: null, clock: -1 };
}
return decodeMongodbStateVector(doc.value);
};
/**
*
* @param {import('./mongo-adapter.js').MongoAdapter} db
*/
const getAllSVDocs = async (db) => db.find({ version: 'v1_sv' });
/**
* Merge all MongoDB documents of the same yjs document together.
* @param {import('./mongo-adapter.js').MongoAdapter} db
* @param {string} docName
* @param {Uint8Array} stateAsUpdate
* @param {Uint8Array} stateVector
* @return {Promise<number>} returns the clock of the flushed doc
*/
const flushDocument = async (db, docName, stateAsUpdate, stateVector) => {
const clock = await storeUpdate(db, docName, stateAsUpdate);
await writeStateVector(db, docName, stateVector, clock);
await clearUpdatesRange(db, docName, 0, clock);
return clock;
};
class MongodbPersistence {
/**
* Create a y-mongodb persistence instance.
* @param {string|{client: import('mongodb').MongoClient, db: import('mongodb').Db}} connectionObj A MongoDB connection string or an object containing a MongoClient instance (`client`) and a database instance (`db`).
* @param {object} [opts] Additional optional parameters.
* @param {string} [opts.collectionName] Name of the collection where all
* documents are stored. Default: "yjs-writings"
* @param {boolean} [opts.multipleCollections] When set to true, each document gets
* an own collection (instead of all documents stored in the same one). When set to true,
* the option collectionName gets ignored. Default: false
* @param {number} [opts.flushSize] The number of stored transactions needed until
* they are merged automatically into one Mongodb document. Default: 400
*/
constructor(connectionObj, opts = {}) {
const { collectionName = 'yjs-writings', multipleCollections = false, flushSize = 400 } = opts;
if (typeof collectionName !== 'string' || !collectionName) {
throw new Error(
'Constructor option "collectionName" is not a valid string. Either dont use this option (default is "yjs-writings") or use a valid string! Take a look into the Readme for more information: https://github.com/MaxNoetzold/y-mongodb-provider#persistence--mongodbpersistenceconnectionlink-string-options-object',
);
}
if (typeof multipleCollections !== 'boolean') {
throw new Error(
'Constructor option "multipleCollections" is not a boolean. Either dont use this option (default is "false") or use a valid boolean! Take a look into the Readme for more information: https://github.com/MaxNoetzold/y-mongodb-provider#persistence--mongodbpersistenceconnectionlink-string-options-object',
);
}
if (typeof flushSize !== 'number' || flushSize <= 0) {
throw new Error(
'Constructor option "flushSize" is not a valid number. Either dont use this option (default is "400") or use a valid number larger than 0! Take a look into the Readme for more information: https://github.com/MaxNoetzold/y-mongodb-provider#persistence--mongodbpersistenceconnectionlink-string-options-object',
);
}
const db = new MongoAdapter(connectionObj, {
collection: collectionName,
multipleCollections,
});
this.flushSize = flushSize ?? PREFERRED_TRIM_SIZE;
this.multipleCollections = multipleCollections;
// scope the queue of the transaction to each docName
// -> this should allow concurrency for different rooms
// Idea and adjusted code from: https://github.com/fadiquader/y-mongodb/issues/10
this.tr = {};
/**
* Execute an transaction on a database. This will ensure that other processes are
* currently not writing.
*
* This is a private method and might change in the future.
*
* @template T
*
* @param {function(MongoAdapter):Promise<T>} f A transaction that receives the db object
* @return {Promise<T>}
*/
this._transact = (docName, f) => {
if (!this.tr[docName]) {
this.tr[docName] = promise__namespace.resolve();
}
const currTr = this.tr[docName];
let nextTr = null;
nextTr = (async () => {
await currTr;
let res = /** @type {any} */ (null);
try {
res = await f(db);
} catch (err) {
// eslint-disable-next-line no-console
console.warn('Error during saving transaction', err);
}
// once the last transaction for a given docName resolves, remove it from the queue
if (this.tr[docName] === nextTr) {
delete this.tr[docName];
}
return res;
})();
this.tr[docName] = nextTr;
return this.tr[docName];
};
}
/**
* Create a Y.Doc instance with the data persistet in mongodb.
* Use this to temporarily create a Yjs document to sync changes or extract data.
*
* @param {string} docName
* @return {Promise<Y.Doc>}
*/
getYDoc(docName) {
return this._transact(docName, async (db) => {
const updates = await getMongoUpdates(db, docName);
const ydoc = new Y__namespace.Doc();
ydoc.transact(() => {
for (let i = 0; i < updates.length; i++) {
Y__namespace.applyUpdate(ydoc, updates[i]);
}
});
if (updates.length > this.flushSize) {
await flushDocument(db, docName, Y__namespace.encodeStateAsUpdate(ydoc), Y__namespace.encodeStateVector(ydoc));
}
return ydoc;
});
}
/**
* Store a single document update to the database.
*
* @param {string} docName
* @param {Uint8Array} update
* @return {Promise<number>} Returns the clock of the stored update
*/
storeUpdate(docName, update) {
return this._transact(docName, (db) => storeUpdate(db, docName, update));
}
/**
* The state vector (describing the state of the persisted document - see https://github.com/yjs/yjs#Document-Updates) is maintained in a separate field and constantly updated.
*
* This allows you to sync changes without actually creating a Yjs document.
*
* @param {string} docName
* @return {Promise<Uint8Array>}
*/
getStateVector(docName) {
return this._transact(docName, async (db) => {
const { clock, sv } = await readStateVector(db, docName);
let curClock = -1;
if (sv !== null) {
curClock = await getCurrentUpdateClock(db, docName);
}
if (sv !== null && clock === curClock) {
return sv;
} else {
// current state vector is outdated
const updates = await getMongoUpdates(db, docName);
const { update, sv: newSv } = mergeUpdates(updates);
await flushDocument(db, docName, update, newSv);
return newSv;
}
});
}
/**
* Get the differences directly from the database.
* The same as Y.encodeStateAsUpdate(ydoc, stateVector).
* @param {string} docName
* @param {Uint8Array} stateVector
*/
async getDiff(docName, stateVector) {
const ydoc = await this.getYDoc(docName);
return Y__namespace.encodeStateAsUpdate(ydoc, stateVector);
}
/**
* Delete a document, and all associated data from the database.
* When option multipleCollections is set, it removes the corresponding collection
* @param {string} docName
* @return {Promise<void>}
*/
clearDocument(docName) {
return this._transact(docName, async (db) => {
if (!this.multipleCollections) {
await db.delete(createDocumentStateVectorKey(docName));
await clearUpdatesRange(db, docName, 0, binary__namespace.BITS32);
} else {
await db.dropCollection(docName);
}
});
}
/**
* Persist some meta information in the database and associate it
* with a document. It is up to you what you store here.
* You could, for example, store credentials here.
*
* @param {string} docName
* @param {string} metaKey
* @param {any} value
* @return {Promise<void>}
*/
setMeta(docName, metaKey, value) {
/* Unlike y-leveldb, we simply store the value here without encoding
it in a buffer beforehand. */
return this._transact(docName, async (db) => {
await db.put(createDocumentMetaKey(docName, metaKey), { value });
});
}
/**
* Retrieve a store meta value from the database. Returns undefined if the
* metaKey doesn't exist.
*
* @param {string} docName
* @param {string} metaKey
* @return {Promise<any>}
*/
getMeta(docName, metaKey) {
return this._transact(docName, async (db) => {
const res = await db.findOne({
...createDocumentMetaKey(docName, metaKey),
});
if (!res?.value) {
return undefined;
}
return res.value;
});
}
/**
* Delete a store meta value.
*
* @param {string} docName
* @param {string} metaKey
* @return {Promise<any>}
*/
delMeta(docName, metaKey) {
return this._transact(docName, (db) =>
db.delete({
...createDocumentMetaKey(docName, metaKey),
}),
);
}
/**
* Retrieve the names of all stored documents.
*
* @return {Promise<string[]>}
*/
getAllDocNames() {
return this._transact('global', async (db) => {
if (this.multipleCollections) {
// get all collection names from db
return db.getCollectionNames();
} else {
// when all docs are stored in the same collection we just need to get all
// statevectors and return their names
const docs = await getAllSVDocs(db);
return docs.map((doc) => doc.docName);
}
});
}
/**
* Retrieve the state vectors of all stored documents.
* You can use this to sync two y-mongodb instances.
* !Note: The state vectors might be outdated if the associated document
* is not yet flushed. So use with caution.
* @return {Promise<{ name: string, sv: Uint8Array, clock: number }[]>}
*/
getAllDocStateVectors() {
return this._transact('global', async (db) => {
const docs = await getAllSVDocs(db);
return docs.map((doc) => {
const { sv, clock } = decodeMongodbStateVector(doc.value);
return { name: doc.docName, sv, clock };
});
});
}
/**
* Internally y-mongodb stores incremental updates. You can merge all document
* updates to a single entry. You probably never have to use this.
* It is done automatically every $options.flushsize (default 400) transactions.
*
* @param {string} docName
* @return {Promise<void>}
*/
flushDocument(docName) {
return this._transact(docName, async (db) => {
const updates = await getMongoUpdates(db, docName);
const { update, sv } = mergeUpdates(updates);
await flushDocument(db, docName, update, sv);
});
}
/**
* Delete the whole yjs mongodb
* @return {Promise<void>}
*/
flushDB() {
return this._transact('global', async (db) => {
await flushDB(db);
});
}
/**
* Closes open database connection
* @returns {Promise<void>}
*/
destroy() {
return this._transact('global', async (db) => {
await db.close();
});
}
}
exports.MongodbPersistence = MongodbPersistence;
//# sourceMappingURL=y-mongodb.cjs.map