mongo2elastic
Version:
Sync MongoDB collections to Elasticsearch
156 lines (155 loc) • 5.48 kB
JavaScript
import _debug from 'debug';
import _ from 'lodash/fp.js';
import * as mongoChangeStream from 'mongochangestream';
import { convertSchema } from './convertSchema.js';
import { indexFromCollection } from './util.js';
import { renameKeys } from 'mongochangestream';
/**
* Filter errors from a bulk response
*/
const getBulkErrors = (response) => response.items.filter((item) => item.create?.error ||
item.delete?.error ||
item.index?.error ||
item.update?.error);
const getInitialCounts = () => {
const operationTypes = ['insert', 'update', 'replace', 'delete'];
const counts = {};
for (const operationType of operationTypes) {
counts[operationType] = 0;
}
return counts;
};
const debug = _debug('mongo2elastic:sync');
export const initSync = (redis, collection, elastic, options = {}) => {
const mapper = (doc) => {
renameKeys(doc, { _id: '_mongoId', ...options.rename });
debug('Mapped doc %o', doc);
return doc;
};
const index = options.index || indexFromCollection(collection);
// Initialize sync
const sync = mongoChangeStream.initSync(redis, collection, options);
// Use emitter from mongochangestream
const emitter = sync.emitter;
const emit = (event, data) => {
emitter.emit(event, { type: event, ...data });
};
const handleBulkResponse = (response, operationCounts, numDocs) => {
// There were errors
if (response.errors) {
const errors = getBulkErrors(response);
const numErrors = errors.length;
debug('Errors %O', errors);
emit('process', {
success: numDocs - numErrors,
fail: numErrors,
errors,
changeStream: true,
operationCounts,
});
}
else {
emit('process', {
success: numDocs,
changeStream: true,
operationCounts,
});
}
};
const createIndexIgnoreMalformed = async (settings = {}) => {
const obj = {
index,
settings: _.merge({
index: {
mapping: {
ignore_malformed: true,
},
},
}, settings),
};
await elastic.indices.create(obj);
};
const createMappingFromSchema = async (jsonSchema, options = {}) => {
const mappings = convertSchema(jsonSchema, options);
return elastic.indices.putMapping({ index, ...mappings });
};
/**
* Process change stream events.
*/
const processChangeStreamRecords = async (docs) => {
const operations = [];
const operationCounts = getInitialCounts();
for (const doc of docs) {
if (doc.operationType === 'insert') {
operationCounts[doc.operationType]++;
operations.push([
{ create: { _index: index, _id: doc.fullDocument._id.toString() } },
mapper(doc.fullDocument),
]);
}
else if (doc.operationType === 'update' ||
doc.operationType === 'replace') {
operationCounts[doc.operationType]++;
const document = doc.fullDocument ? mapper(doc.fullDocument) : {};
operations.push([
{ index: { _index: index, _id: doc.documentKey._id.toString() } },
document,
]);
}
else if (doc.operationType === 'delete') {
operationCounts[doc.operationType]++;
operations.push([
{ delete: { _index: index, _id: doc.documentKey._id.toString() } },
]);
}
}
const response = await elastic.bulk({
operations: operations.flat(),
});
handleBulkResponse(response, operationCounts, docs.length);
};
/**
* Process initial scan documents.
*/
const processRecords = async (docs) => {
const operationCounts = { insert: docs.length };
const response = await elastic.bulk({
operations: docs.flatMap((doc) => [
{ create: { _index: index, _id: doc.fullDocument._id } },
mapper(doc.fullDocument),
]),
});
handleBulkResponse(response, operationCounts, docs.length);
};
const processChangeStream = (options) => sync.processChangeStream(processChangeStreamRecords, {
...options,
pipeline: [
{ $unset: ['updateDescription'] },
...(options?.pipeline ?? []),
],
});
const runInitialScan = (options) => sync.runInitialScan(processRecords, options);
return {
...sync,
/**
* Process MongoDB change stream for the given collection.
* `options.batchSize` defaults to 500.
* `options.timeout` defaults to 30 seconds.
*/
processChangeStream,
/**
* Run initial collection scan. `options.batchSize` defaults to 500.
* Sorting defaults to `_id`.
*/
runInitialScan,
/**
* Create index with ignore_malformed enabled for the index.
*/
createIndexIgnoreMalformed,
/**
* Create mapping from MongoDB JSON schema
*/
createMappingFromSchema,
emitter,
};
};