mongodb-data-sync
Version:
responsible of synchronizing duplicate data between collections
665 lines (569 loc) • 20.9 kB
JavaScript
const synchronizerModel = require("./synchronizer_db");
const {
debug, getObjectPropFromString, DUPLICATE_CODE_ERROR, RESUME_TOKEN_ERROR, CHANGE_STREAM_FATAL_ERROR,
changeStreamErrors
} = require("./utils");
const mysql = require("promise-mysql");
const {ObjectId} = require("mongodb");
let dependenciesMap = {};
const referenceKeyProject = {};
let changeStream;
let mysqlPingInterval;
let dbClient, mysqlOptions;
const mysqlConnection = {};
if (process.env.debug) {
require("console-from");
}
if (process.env.MYSQL) {
try {
mysqlOptions = JSON.parse(process.env.MYSQL);
mysqlOptions.multipleStatements = true;
} catch (e) {
console.error(e);
}
}
process.stdin.resume();
const exitHandler = async (options) => {
await synchronizerModel.closeConnection();
for (const dbName in mysqlConnection) {
if (typeof mysqlConnection[dbName].end === "function") {
await mysqlConnection[dbName].end();
}
}
if (options.exit === true) {
process.exit();
}
};
process.on("exit", exitHandler.bind(null, {}));
process.on("SIGTERM", exitHandler.bind(null, {exit: true}));
process.on("SIGINT", exitHandler.bind(null, {exit: true}));
process.on("SIGUSR1", exitHandler.bind(null, {exit: true}));
process.on("SIGUSR2", exitHandler.bind(null, {exit: true}));
const _checkMySqlConnections = () => {
clearInterval(mysqlPingInterval);
mysqlPingInterval = setInterval(async () => {
for (const dbName in mysqlConnection) {
try {
await mysqlConnection[dbName].ping();
} catch (e) {
await mysql.createConnection({...mysqlOptions, database: dbName});
}
}
}, 100);
};
const _removeResumeTokenAndInit = async function (err) {
if (changeStreamErrors.includes(err.code)) {
changeStream = undefined;
const oldResumeTokenDoc = await synchronizerModel.getResumeToken("sync");
await synchronizerModel.removeResumeToken("sync");
syncAll({cleanOldSyncTasks: true, fromDate: oldResumeTokenDoc.last_update}).catch(console.error);
await _initChangeStream();
return false;
}
return true;
};
const _initChangeStream = async function () {
if (changeStream) {
await changeStream.close();
}
const oldResumeTokenDoc = await synchronizerModel.getResumeToken();
const resumeAfter = oldResumeTokenDoc ? oldResumeTokenDoc.token : undefined;
let {pipeline, fullDocument} = _buildPipeline();
fullDocument = fullDocument ? "updateLookup" : undefined;
if (pipeline[0].$match.$or.length === 0) {
return;
}
changeStream = dbClient.watch(pipeline, {resumeAfter, fullDocument});
changeStream.on("change", next => {
_changeStreamLoop(next);
});
changeStream.on("error", async err => {
if (await _removeResumeTokenAndInit(err) === true) {
console.error(err);
process.exit();
}
});
};
const _buildDependenciesMap = async function () {
const dependencies = await synchronizerModel.getDependencies();
dependenciesMap = {};
const newMysqlDbs = [];
dependencies.forEach(dependency => {
dependency.fields_format = typeof dependency.fields_format === "string" ? JSON.parse(dependency.fields_format) : dependency.fields_format;
dependenciesMap[dependency.db_name] = dependenciesMap[dependency.db_name] || {};
dependenciesMap[dependency.db_name][dependency.reference_collection] = dependenciesMap[dependency.db_name][dependency.reference_collection] || [];
dependenciesMap[dependency.db_name][dependency.dependent_collection] = dependenciesMap[dependency.db_name][dependency.dependent_collection] || [];
referenceKeyProject[dependency.reference_key] = 1;
dependenciesMap[dependency.db_name][dependency.reference_collection].push({
_id: dependency._id,
type: "ref",
dependent_collection: dependency.dependent_collection,
dependent_fields: dependency.dependent_fields,
fields_format: dependency.fields_format,
reference_key: dependency.reference_key,
dependent_key: dependency.dependent_key,
reference_collection_last_update_field: dependency.reference_collection_last_update_field
});
const [mysqlPrefix, mysqlDbName] = dependency.dependent_collection.split(".");
if (mysqlPrefix === "mysql" && !mysqlConnection[mysqlDbName]) {
newMysqlDbs.push(mysqlDbName);
return;
}
dependenciesMap[dependency.db_name][dependency.dependent_collection].push({
_id: dependency._id,
type: "local",
fetch_from_collection: dependency.reference_collection,
local_collection: dependency.dependent_collection,
fields_format: dependency.fields_format,
fetch_from_key: dependency.reference_key,
local_key: dependency.dependent_key,
});
});
for (const i in newMysqlDbs) {
mysqlConnection[newMysqlDbs[i]] = await mysql.createConnection({
...mysqlOptions,
database: newMysqlDbs[i]
});
}
debug("dependenciesMap:\n", JSON.stringify(dependenciesMap));
debug("mysqlConnections:\n", JSON.stringify(Object.keys(mysqlConnection)));
};
const _extractFields = function (fieldsToSync) {
const dependentFields = new Set();
Object.keys(fieldsToSync).forEach(key => {
dependentFields.add(fieldsToSync[key]);
});
return [...dependentFields];
};
const _checkIfNeedToUpdate = function (dependency) {
let id = "new";
if (!dependenciesMap[dependency.db_name] ||
!dependenciesMap[dependency.db_name][dependency.reference_collection] ||
dependenciesMap[dependency.db_name][dependency.reference_collection] && !dependenciesMap[dependency.db_name][dependency.reference_collection].some(dep => {
return dependency.dependent_collection === dep.dependent_collection;
})
) {
return id;
}
dependenciesMap[dependency.db_name][dependency.reference_collection].some(currentDependency => {
if (currentDependency.type === "local" ||
currentDependency.reference_key !== dependency.reference_key ||
currentDependency.dependent_key !== dependency.dependent_key ||
JSON.stringify(currentDependency.dependent_fields) !== JSON.stringify(dependency.dependent_fields)
) {
return false;
}
if (dependency.dependent_collection === currentDependency.dependent_collection) {
id = currentDependency._id;
return true;
}
});
return id;
};
const _checkConflict = function (dependency) {
if (!dependenciesMap[dependency.db_name] ||
!dependenciesMap[dependency.db_name][dependency.dependent_collection] ||
!dependenciesMap[dependency.db_name][dependency.reference_collection]
) {
return;
}
dependenciesMap[dependency.db_name][dependency.reference_collection].forEach(({
reference_key,
dependent_collection
}) => {
if (dependency.dependent_collection === dependent_collection && reference_key !== dependency.reference_key) {
throw new Error(`there can only be one foreignField between the collections ${dependency.reference_collection} , ${dependency.dependent_collection} the current key is ${dependency.reference_key}`);
}
});
dependency.dependent_fields.forEach(field => {
dependenciesMap[dependency.db_name][dependency.dependent_collection].forEach(dependency => {
if (dependency.type !== "local" && dependency.dependent_fields.includes(field)) {
throw new Error("a dependency conflict has accord in field " + field);
}
});
});
};
const _buildPipeline = function () {
let fullDocument = false;
const $or = [];
const $match = {operationType: "update", $or};
const pipeline = [
{$match}
];
Object.keys(dependenciesMap).forEach(dbName => {
Object.keys(dependenciesMap[dbName]).forEach(collName => {
$or.push({"ns.db": dbName, "ns.coll": collName});
});
});
const project = {documentKey: 1, updateDescription: 1, ns: 1};
Object.keys(referenceKeyProject).forEach(key => {
if (key !== "_id") {
project.fullDocument[key] = 1;
fullDocument = true;
}
});
pipeline.push({
$project: project
});
return {pipeline, fullDocument};
};
const _changeStreamLoop = async function (next) {
if (!next || !next._id) {
return;
}
try {
const needToUpdateObj = await _getNeedToUpdateDependencies(next);
if (Object.keys(needToUpdateObj).length === 0) {
return;
}
await synchronizerModel.addResumeToken({token: next._id}, "sync");
await _updateCollections(needToUpdateObj);
} catch (e) {
console.error(e);
}
};
const _getNeedToUpdateDependencies = async function ({ns, documentKey, updateDescription, fullDocument}) {
const needToUpdateObj = {};
if (!dependenciesMap[ns.db] ||
!dependenciesMap[ns.db][ns.coll]
) {
return;
}
const changedFields = updateDescription.updatedFields;
const addRefDep = (dependency) => {
if (dependency.type !== "ref" || dependency.dependent_fields.some(field => changedFields[field]) === false) {
return;
}
const refKey = dependency.reference_key === "_id" ? documentKey._id : fullDocument[dependency.reference_key];
Object.keys(dependency.fields_format).forEach(dependentField => {
if (changedFields[dependency.fields_format[dependentField]] === undefined) {
return;
}
needToUpdateObj[ns.db] = needToUpdateObj[ns.db] || {};
needToUpdateObj[ns.db][dependency.dependent_collection] = needToUpdateObj[ns.db][dependency.dependent_collection] || {
refKey,
dependentKeys: {}
};
if (!needToUpdateObj[ns.db][dependency.dependent_collection].dependentKeys[dependency.dependent_key]) {
needToUpdateObj[ns.db][dependency.dependent_collection].dependentKeys[dependency.dependent_key] = {};
}
needToUpdateObj[ns.db][dependency.dependent_collection].dependentKeys[dependency.dependent_key][dependentField] = changedFields[dependency.fields_format[dependentField]];
});
};
const addLocalDep = async (dbName, dependency) => {
if (dependency.type !== "local" || changedFields[dependency.local_key] === undefined) {
return;
}
const db = dbClient.db(dbName);
const collection = db.collection(dependency.fetch_from_collection);
const projection = {};
Object.keys(dependency.fields_format).forEach(dependentField => {
projection[dependency.fields_format[dependentField]] = 1;
});
const fetchResult = await collection.findOne({[dependency.fetch_from_key]: changedFields[dependency.local_key]}, {projection});
needToUpdateObj[ns.db] = needToUpdateObj[ns.db] || {};
needToUpdateObj[ns.db][dependency.local_collection] = needToUpdateObj[ns.db][dependency.local_collection] || {
_id: documentKey._id,
localKeys: {}
};
Object.keys(dependency.fields_format).forEach(dependentField => {
needToUpdateObj[ns.db][dependency.local_collection].localKeys[dependentField] = fetchResult[dependency.fields_format[dependentField]];
});
};
for (let i in dependenciesMap[ns.db][ns.coll]) {
addRefDep(dependenciesMap[ns.db][ns.coll][i]);
await addLocalDep(ns.db, dependenciesMap[ns.db][ns.coll][i]);
}
debug("needToUpdateObj:\n", JSON.stringify(needToUpdateObj));
return needToUpdateObj;
};
const _updateCollections = function (needToUpdateObj) {
const all = [];
const updateFromRefs = (dbName, collName) => {
if (!needToUpdateObj[dbName][collName].dependentKeys || collName.split(".")[1]) {
return;
}
const db = dbClient.db(dbName);
const collection = db.collection(collName);
Object.keys(needToUpdateObj[dbName][collName].dependentKeys).forEach(dependentKey => {
debug("update payload:\n", JSON.stringify({...needToUpdateObj[dbName][collName].dependentKeys[dependentKey]}));
all.push(
collection.updateMany({[dependentKey]: needToUpdateObj[dbName][collName].refKey}, {$set: {...needToUpdateObj[dbName][collName].dependentKeys[dependentKey]}})
);
});
};
const updateMysql = (dbName, tableNameWithPrefix) => {
const [mysqlPrefix, mysqlDbName, tableName] = tableNameWithPrefix.split(".");
if (mysqlPrefix !== "mysql") {
return;
}
Object.keys(needToUpdateObj[dbName][tableNameWithPrefix].dependentKeys).forEach(dependentKey => {
debug("update payload:\n", JSON.stringify({...needToUpdateObj[dbName][tableNameWithPrefix].dependentKeys[dependentKey]}));
const queryParams = [];
let query = `update \`${mysqlDbName}\`.\`${tableName}\`
set `;
Object.keys(needToUpdateObj[dbName][tableNameWithPrefix].dependentKeys[dependentKey]).forEach((value, key) => {
query += ` \`${value}\` = ? ,`;
queryParams.push(needToUpdateObj[dbName][tableNameWithPrefix].dependentKeys[dependentKey][value]);
});
query = query.substr(0, query.length - 1);
query += `where \`${dependentKey}\` = ?`;
queryParams.push(needToUpdateObj[dbName][tableNameWithPrefix].refKey.toString());
all.push(
mysqlConnection[mysqlDbName].query(query, queryParams)
);
});
};
const updateFromLocals = (dbName, collName,) => {
if (!needToUpdateObj[dbName][collName].localKeys || collName.split(".")[1]) {
return;
}
const db = dbClient.db(dbName);
const collection = db.collection(collName);
debug("update payload:\n", JSON.stringify({...needToUpdateObj[dbName][collName].localKeys}));
all.push(
collection.updateOne({_id: needToUpdateObj[dbName][collName]._id}, {$set: {...needToUpdateObj[dbName][collName].localKeys}})
);
};
Object.keys(needToUpdateObj).forEach(dbName => {
Object.keys(needToUpdateObj[dbName]).forEach(collName => {
updateMysql(dbName, collName);
updateFromRefs(dbName, collName);
updateFromLocals(dbName, collName);
});
});
return Promise.all(all);
};
const _createSyncItems = async function (dbs, batchSize) {
for (const db in dependenciesMap) {
if (dbs && !dbs.includes(db)) {
continue;
}
for (const referenceCollection in dependenciesMap[db]) {
for (const i in dependenciesMap[db][referenceCollection]) {
if (dependenciesMap[db][referenceCollection][i].type !== "ref") {
continue;
}
await _createSyncItem({
...dependenciesMap[db][referenceCollection][i],
reference_collection: referenceCollection,
batchSize,
db_name: db
});
}
}
}
};
const _createSyncItem = async function ({
db_name,
reference_collection,
dependent_collection,
dependent_fields,
fields_format,
reference_key,
dependent_key,
batchSize,
last_id_checked
}) {
const {error, value} = synchronizerModel.validateSync({
db_name,
reference_collection,
dependent_collection,
dependent_fields,
fields_format,
reference_key,
dependent_key,
batchSize,
last_id_checked
});
if (error) {
console.error(error);
return;
}
try {
await synchronizerModel.addSyncItem(value);
} catch (e) {
if (e.code !== DUPLICATE_CODE_ERROR) {
throw e;
}
}
};
exports.start = async function () {
dbClient = await synchronizerModel.connect(process.env.MONGODB_URL, process.env.MONGODB_OPTIONS);
await _buildDependenciesMap();
await _initChangeStream();
_checkMySqlConnections();
};
exports.pause = async function () {
await changeStream.close();
};
exports.continue = async function () {
await _buildDependenciesMap();
await _initChangeStream();
};
exports.addDependency = async function (body) {
const payload = {
db_name: body.dbName,
reference_collection: body.refCollection,
dependent_collection: body.dependentCollection,
reference_key: body.foreignField,
dependent_key: body.localField,
fields_format: JSON.stringify(body.fieldsToSync),
dependent_fields: _extractFields(body.fieldsToSync),
reference_collection_last_update_field: body.refCollectionLastUpdateField
};
const {error, value} = synchronizerModel.validate(payload);
if (error) {
throw new Error(error);
}
const id = _checkIfNeedToUpdate(value);
if (id !== "new") {
return id;
}
_checkConflict(value);
value.fields_format = JSON.stringify(body.fieldsToSync);
const result = await synchronizerModel.addDependency(value);
await _buildDependenciesMap();
await _initChangeStream();
return result.insertedId;
};
exports.removeDependency = async function (id) {
const result = await synchronizerModel.removeDependency(id);
if (result.n > 0) {
await _buildDependenciesMap();
await _initChangeStream();
}
};
exports.showDependencies = function () {
return dependenciesMap;
};
const _updateSyncItemBatchResults = async function ({syncItem, documents, dependentCollection}) {
const bulk = [];
const updateMongo = async () => {
if (syncItem.dependent_collection.split(".")[0] === "mysql") {
return;
}
documents.forEach(doc => {
const payload = {};
for (let dependentField in syncItem.fields_format) {
let value = getObjectPropFromString(doc, syncItem.fields_format[dependentField]);
if (value === undefined) {
continue;
}
payload[dependentField] = value;
}
bulk.push({
updateMany: {
"filter": {[syncItem.dependent_key]: doc[syncItem.reference_key]},
"update": {$set: payload}
}
});
});
debug("_updateSyncItemBatchResults", JSON.stringify(bulk));
return dependentCollection.bulkWrite(bulk);
};
const updateMysql = async () => {
const [prefix, mysqlDbName, tableName] = syncItem.dependent_collection.split(".");
if (prefix !== "mysql") {
return;
}
let query = "";
documents.forEach(doc => {
let needQuery = true;
for (let dependentField in syncItem.fields_format) {
let value = getObjectPropFromString(doc, syncItem.fields_format[dependentField]);
if (value === undefined) {
continue;
}
if (needQuery === true) {
query += ` update \`${tableName}\`
set `;
needQuery = false;
}
if (value instanceof ObjectId) {
value = value.toString();
}
query += ` \`${dependentField}\`= ${mysql.escape(value)} ,`;
}
if (needQuery === true) {
return;
}
let refKey = doc[syncItem.reference_key];
if (refKey instanceof ObjectId) {
refKey = refKey.toString();
}
query = query.replace(/,$/, "");
query += ` where \`${syncItem.dependent_key}\` = ${mysql.escape(refKey)} ;`;
});
await mysqlConnection[mysqlDbName].beginTransaction();
try {
await mysqlConnection[mysqlDbName].query(query);
await mysqlConnection[mysqlDbName].commit();
} catch (e) {
await mysqlConnection[mysqlDbName].rollback();
throw e;
}
};
await updateMongo();
await updateMysql();
};
const _getSyncItemBatchResults = function ({syncItem, referenceCollection, ignoreLastUpdateField, fromDate}) {
const query = {};
if (syncItem.last_id_checked) {
query._id = {"$gt": syncItem.last_id_checked};
} else if (ignoreLastUpdateField === false && syncItem.reference_collection_last_update_field && fromDate) {
query[syncItem.reference_collection_last_update_field] = {$gte: {fromDate}};
}
const projection = {
[syncItem.reference_key]: 1
};
syncItem.dependent_fields.forEach(field => {
projection[field] = 1;
});
return referenceCollection.find(query).limit(syncItem.batchSize).project(projection).toArray();
};
const _syncItem = async function ({ignoreLastUpdateField, fromDate}) {
const syncItem = await synchronizerModel.getNextSyncItem();
if (!syncItem) {
return null;
}
const db = dbClient.db(syncItem.db_name);
const referenceCollection = db.collection(syncItem.reference_collection);
const dependentCollection = db.collection(syncItem.dependent_collection);
const documents = await _getSyncItemBatchResults({syncItem, referenceCollection, ignoreLastUpdateField, fromDate});
const active = !(documents.length < syncItem.batchSize);
if (documents.length === 0) {
synchronizerModel.updateSyncItem(syncItem._id, {active});
return null;
}
const result = await _updateSyncItemBatchResults({documents, syncItem, dependentCollection});
const lastId = documents[documents.length - 1]._id;
return synchronizerModel.updateSyncItem(syncItem._id, {last_id_checked: lastId, active});
};
const _syncItems = async function ({ignoreLastUpdateField, fromDate, retryDelay}) {
try {
while (await _syncItem({ignoreLastUpdateField, fromDate})) {
}
} catch (e) {
if (retryDelay) {
setTimeout(() => {
_syncItems({ignoreLastUpdateField, fromDate, retryDelay}).catch(console.error);
}, retryDelay);
}
}
};
const syncAll = async function ({
dbs,
batchSize = 500,
ignoreLastUpdateField = false,
fromDate,
cleanOldSyncTasks = false,
retryDelay = 0
}) {
if (cleanOldSyncTasks === true) {
await synchronizerModel.cleanSyncDatabase();
}
await _createSyncItems(dbs, batchSize);
await _syncItems({ignoreLastUpdateField, fromDate, retryDelay});
};
exports.syncAll = syncAll;