UNPKG

castor-load

Version:

Traverse a directory to build a MongoDB collection with the found files. Then it's enable to keep directory and collection synchronised.

413 lines (348 loc) 12.1 kB
/*jshint node:true,laxcomma:true*/ 'use strict'; var path = require('path') , fs = require('fs') , basename = path.basename(__filename, '.js') , debug = require('debug')('castor:load:' + basename) , util = require('util') , assert = require('assert') , async = require('async') , extend = require('extend') , File = require('./file.js') , mkdirp = require('mkdirp') , parseBytes = require('bytes') , EventEmitter = require('events').EventEmitter ; function Sync(mountor, options) { if (!(this instanceof Sync)) { return new Sync(mountor, options); } var self = this; self.options = {}; self.options.collectionName = options.collectionName || 'filerake'; self.options.connexionURI = options.connexionURI || 'mongodb://localhost:27017/test/'; self.options.connexion = require('url').parse(self.options.connexionURI); self.options.concurrency = options.concurrency || 1; self.options.maxFileSize = options.maxFileSize || '128mb'; self.options.strictCompare = options.strictCompare || false; self.options.delay = options.delay || 1000; self.options.writeConcern = options.writeConcern || 0; self.options.synckey = options.synckey || null; self.dateSynchronised = new Date(); self.mountor = mountor; self.checker = []; function worker(file, done)  { self.compare(file, function(err) { if (err) { debug('error', file.doc.filename, err); } done(err); }); } self.queue = async.queue(worker, self.options.concurrency); self.queue.drain = function () { debug('queue drain', self.dateSynchronised); }; } util.inherits(Sync, EventEmitter); Sync.prototype.refresh = function () { this.dateSynchronised = new Date(); }; Sync.prototype.connect = function (fn) { var self = this; if (self.coll) { return fn(null, self.coll, self.db); } var func = function (err, db) { if (err || db === null) { return fn(err || new Error('connect failed')); } db.collection(self.options.collectionName, function(err, coll) { self.db = db; self.coll = coll; self.coll.ensureIndex({ sha1: 1 }, { w: self.options.writeConcern }, function(err) { if (err) { return fn(err, self.coll, self.db); } self.coll.ensureIndex({ fid: 1, number: 1 }, { unique: true, dropDups:true, w: self.options.writeConcern }, function(err) { fn(err, self.coll, self.db); }); }); }); }; if (self.options.connexion.protocol === 'mongodb:') { require('mongodb').MongoClient.connect(self.options.connexionURI, func); } else if (self.options.connexion.protocol === 'tingodb:') { var colldir = path.join(self.options.directory, self.options.connexion.path, 'filerake'); debug('Tingodb\'s path', colldir); mkdirp(colldir, function (err) { assert.ifError(err); var tingodb = require('tingodb')(); var handle = new tingodb.Db(colldir, {}); handle.open(func); }); } else { throw new Error('Unsupported engine : ' + self.options.connexion.protocol); } }; Sync.prototype.check = function (file, next) { assert(file instanceof File); var self = this; if (self.options.maxFileSize) { var maxfilesize = self.options.maxFileSize; if (typeof maxfilesize === 'string') { maxfilesize = parseBytes(self.options.maxFileSize); } if (typeof maxfilesize === 'number' && maxfilesize < file.doc.filesize) { debug('reject', '> ' + parseBytes(maxfilesize), file.doc.filename, file.doc.fid); return next(new Error('File exceeds the maximum size.' + parseBytes(maxfilesize))); } } if (self.checker.indexOf(file.doc.fid) === -1) { if (self.queue.length() >= self.options.concurrency) { debug('delayed', file.doc.filename, file.doc.fid); setTimeout(function() { self.check(file, next); }, self.options.delay); } else { debug('check', file.doc.filename, file.doc.fid); self.checker.push(file.doc.fid); self.queue.push(file, function(err) { self.checker = self.checker.filter(function(i) { return (i !== file.doc.fid); }); next(err); }); } } else { debug('in progress', file.doc.filename, file.doc.fid); // next() } }; Sync.prototype.drop = function (file, next) { assert(file instanceof File); var self = this; var doc = file.get(); var selector = { fid: doc.fid }; self.connect(function(err, collection) { if (err) { return next(err); } collection.updateMany(selector, { $set: { state: 'deleted' } }, function (err) { if (err) { return next(err); } if (doc) { debug('delete', doc.filename); } next(); }); }); }; Sync.prototype.cancel = function (file, next) { assert(file instanceof File); var self = this; var doc = file.get(); var selector = { fid: doc.fid }; self.connect(function(err, collection) { if (err) { return next(err); } debug('cancel', doc.filename); collection.remove(selector, next); }); }; Sync.prototype.clean = function (condition, next) { if (typeof condition == 'function') { next = condition; condition = null; } var self = this; var selector = condition || { dateSynchronised: { $lt: self.dateSynchronised } }; self.connect(function(err, collection) { if (err) { return next(err); } collection.updateMany(selector, { $set: { state: 'deleted', dateSynchronised : new Date() } }, function (err, result) { if (err) { return next(err); } debug('cleaned', result.modifiedCount); next(); }); }); }; Sync.prototype.setState = function (selector, state, next) { if (typeof selector !== 'object') { return next(new Error('cannot set state without selector')); } this.connect(function(err, collection) { if (err) { return next(err); } collection.updateMany(selector, { $set: { state: state } }, function (err, result) { if (err) { return next(err); } debug(state, result.modifiedCount); next(); }); }); }; Sync.prototype.save = function (doc, next) { var self = this; self.connect(function(err, collection) { if (err) { return next(err); } doc.dateSynchronised = new Date(); if (!doc.hasOwnProperty('state')) { doc.state = 'inserted'; } if (!doc.hasOwnProperty('number')) { doc.number = 1; } collection.updateOne( { fid: doc.fid, number: doc.number }, doc, { w: self.options.writeConcern, upsert: true }, function (err, res) { if (err) { debug('no ' + doc.state, err.toString()); self.emit('cancelled', err, doc.fid); } else { self.emit('saved', doc); } next(err, res); } ); }); }; Sync.prototype.compare = function (file, next) { assert(file instanceof File); var self = this; var doc = file.get(); var selector = { fid: doc.fid }; self.connect(function (err, collection) { if (err) { return next(err); } collection.findOne(selector, function (err, mongoDoc) { if (err) { return next(err); } if (mongoDoc) { return self.compareFileAndDoc(file, mongoDoc, next); } else { self.findMatchingSha1(file, next); } }); }); }; Sync.prototype.compareFileAndDoc = function (file, mongoDoc, next) { var self = this; var doc = file.get(); var selector = { fid: doc.fid, state: { $ne: 'deleted' } }; if (mongoDoc.state == 'inserting') { return self.mount(file, 'insert', next); } if (mongoDoc.state == 'updating') { return self.mount(file, 'update', next); } self.connect(function (err, collection) { if (err) { return next(err); } var sameDate = self.compareDates(doc, mongoDoc); if (sameDate) { var state = mongoDoc.state == 'deleted' ? 'restored' : 'unmodified'; debug(state, doc.fid); return collection.updateMany( selector, { $set: { state: state, dateSynchronised: new Date() } }, next ); } file.analyze(function (err, doc) { if (err) { return next(err); } if (doc.sha1 != mongoDoc.sha1) { return self.mount(file, 'update', next); } if (doc.dateConfig) { if (!mongoDoc.dateConfig || doc.dateConfig.getTime() != mongoDoc.dateConfig.getTime()) { return self.mount(file, 'update', next); } } var state = mongoDoc.state == 'deleted' ? 'restored' : 'unmodified'; debug(state, doc.fid); var change = { dateModified: doc.dateModified, state: state, dateSynchronised: new Date() }; collection.updateMany(selector, { $set: change }, next); }); }); }; Sync.prototype.findMatchingSha1 = function (file, next) { var self = this; file.analyze(function (err, doc) { if (err) { return next(err); } if (!doc.sha1) { return self.mount(file, 'insert', next); } self.connect(function (err, collection) { if (err) { return next(err); } collection.aggregate([ { $match: { sha1: doc.sha1 } }, { $group: { _id: '$fid', location: { $first: '$location'}}} ], function (err, results) { if (err) { return next(err); } (function checkFile() { var sameFile = results.pop(); if (!sameFile) { return self.mount(file, 'insert', next); } if (!sameFile.location) { return checkFile(); } fs.exists(sameFile.location, function (exists) { if (exists) { return checkFile(); } // A doc was found with the same sha1 and unlinked file file.doc.state = 'renamed'; file.doc.dateSynchronised = new Date(); collection.updateMany( { fid: sameFile._id }, { $set: file.doc }, function (err, result) { if (err) { return next(err); } if (result.modifiedCount > 0) { return next(); } self.mount(file, 'insert', next); } ); }); })(); }); }); }); }; Sync.prototype.mount = function (file, action, callback) { var self = this; debug('action', action); var state = action == 'update' ? 'updated' : 'inserted'; var transitionState = action == 'update' ? 'updating' : 'inserting'; var now = new Date(); var exploded = false; self.mountor.append(file, function worker(doc, cb) { if (Array.isArray(doc)) { debug(state, doc[0].filename, '(Exploded)'); exploded = true; async.eachLimit(doc, self.options.concurrency, function (d, c) { d.state = transitionState; self.save(d, c); }, cb); return; } if (doc._exploded) { debug("partial", doc.filename, doc.number); doc.state = transitionState; exploded = true; } else { debug(state, doc.filename); doc.state = state; } self.save(doc, cb); }, function (err) { if (err) { return callback(err); } self.clean({ fid: file.get().fid, dateSynchronised: { $lt: now } }, function (err) { if (err) { return callback(err); } if (exploded) { self.setState({ fid: file.get().fid, state: { $ne: 'deleted' } }, state, callback); } else { callback(err); } }); }); }; /** * Compare document date and matching document in Mongo dates * @param {Object} doc base document (before loaders chain) * @param {object} mongoDoc matching document in Mongo (based on stats) * @return {Boolean} true if doc is considered as the same as mongoDoc */ Sync.prototype.compareDates = function (doc, mongoDoc) { if (this.options.strictCompare) { return false; } var unchanged = mongoDoc.dateModified && doc.dateModified && mongoDoc.dateModified.getTime() === doc.dateModified.getTime(); var sameConfigDate = !doc.dateConfig || ( mongoDoc.dateConfig && doc.dateConfig && mongoDoc.dateConfig.getTime() === doc.dateConfig.getTime() ); return (unchanged && sameConfigDate); }; module.exports = Sync;