castor-load
Version:
Traverse a directory to build a MongoDB collection with the found files. Then it's enable to keep directory and collection synchronised.
413 lines (348 loc) • 12.1 kB
JavaScript
/*jshint node:true,laxcomma:true*/
;
var path = require('path')
, fs = require('fs')
, basename = path.basename(__filename, '.js')
, debug = require('debug')('castor:load:' + basename)
, util = require('util')
, assert = require('assert')
, async = require('async')
, extend = require('extend')
, File = require('./file.js')
, mkdirp = require('mkdirp')
, parseBytes = require('bytes')
, EventEmitter = require('events').EventEmitter
;
function Sync(mountor, options)
{
if (!(this instanceof Sync)) {
return new Sync(mountor, options);
}
var self = this;
self.options = {};
self.options.collectionName = options.collectionName || 'filerake';
self.options.connexionURI = options.connexionURI || 'mongodb://localhost:27017/test/';
self.options.connexion = require('url').parse(self.options.connexionURI);
self.options.concurrency = options.concurrency || 1;
self.options.maxFileSize = options.maxFileSize || '128mb';
self.options.strictCompare = options.strictCompare || false;
self.options.delay = options.delay || 1000;
self.options.writeConcern = options.writeConcern || 0;
self.options.synckey = options.synckey || null;
self.dateSynchronised = new Date();
self.mountor = mountor;
self.checker = [];
function worker(file, done) {
self.compare(file, function(err) {
if (err) {
debug('error', file.doc.filename, err);
}
done(err);
});
}
self.queue = async.queue(worker, self.options.concurrency);
self.queue.drain = function () {
debug('queue drain', self.dateSynchronised);
};
}
util.inherits(Sync, EventEmitter);
Sync.prototype.refresh = function () {
this.dateSynchronised = new Date();
};
Sync.prototype.connect = function (fn) {
var self = this;
if (self.coll) {
return fn(null, self.coll, self.db);
}
var func = function (err, db) {
if (err || db === null) { return fn(err || new Error('connect failed')); }
db.collection(self.options.collectionName, function(err, coll) {
self.db = db;
self.coll = coll;
self.coll.ensureIndex({ sha1: 1 }, { w: self.options.writeConcern }, function(err) {
if (err) { return fn(err, self.coll, self.db); }
self.coll.ensureIndex({ fid: 1, number: 1 }, { unique: true, dropDups:true, w: self.options.writeConcern }, function(err) {
fn(err, self.coll, self.db);
});
});
});
};
if (self.options.connexion.protocol === 'mongodb:') {
require('mongodb').MongoClient.connect(self.options.connexionURI, func);
}
else if (self.options.connexion.protocol === 'tingodb:') {
var colldir = path.join(self.options.directory, self.options.connexion.path, 'filerake');
debug('Tingodb\'s path', colldir);
mkdirp(colldir, function (err) {
assert.ifError(err);
var tingodb = require('tingodb')();
var handle = new tingodb.Db(colldir, {});
handle.open(func);
});
}
else {
throw new Error('Unsupported engine : ' + self.options.connexion.protocol);
}
};
Sync.prototype.check = function (file, next)
{
assert(file instanceof File);
var self = this;
if (self.options.maxFileSize) {
var maxfilesize = self.options.maxFileSize;
if (typeof maxfilesize === 'string') {
maxfilesize = parseBytes(self.options.maxFileSize);
}
if (typeof maxfilesize === 'number' && maxfilesize < file.doc.filesize) {
debug('reject', '> ' + parseBytes(maxfilesize), file.doc.filename, file.doc.fid);
return next(new Error('File exceeds the maximum size.' + parseBytes(maxfilesize)));
}
}
if (self.checker.indexOf(file.doc.fid) === -1) {
if (self.queue.length() >= self.options.concurrency) {
debug('delayed', file.doc.filename, file.doc.fid);
setTimeout(function() { self.check(file, next); }, self.options.delay);
}
else {
debug('check', file.doc.filename, file.doc.fid);
self.checker.push(file.doc.fid);
self.queue.push(file, function(err) {
self.checker = self.checker.filter(function(i) { return (i !== file.doc.fid); });
next(err);
});
}
}
else {
debug('in progress', file.doc.filename, file.doc.fid);
// next()
}
};
Sync.prototype.drop = function (file, next)
{
assert(file instanceof File);
var self = this;
var doc = file.get();
var selector = { fid: doc.fid };
self.connect(function(err, collection) {
if (err) { return next(err); }
collection.updateMany(selector, { $set: { state: 'deleted' } }, function (err) {
if (err) { return next(err); }
if (doc) { debug('delete', doc.filename); }
next();
});
});
};
Sync.prototype.cancel = function (file, next)
{
assert(file instanceof File);
var self = this;
var doc = file.get();
var selector = { fid: doc.fid };
self.connect(function(err, collection) {
if (err) { return next(err); }
debug('cancel', doc.filename);
collection.remove(selector, next);
});
};
Sync.prototype.clean = function (condition, next)
{
if (typeof condition == 'function') {
next = condition;
condition = null;
}
var self = this;
var selector = condition || { dateSynchronised: { $lt: self.dateSynchronised } };
self.connect(function(err, collection) {
if (err) { return next(err); }
collection.updateMany(selector, { $set: { state: 'deleted', dateSynchronised : new Date() } }, function (err, result) {
if (err) { return next(err); }
debug('cleaned', result.modifiedCount);
next();
});
});
};
Sync.prototype.setState = function (selector, state, next)
{
if (typeof selector !== 'object') { return next(new Error('cannot set state without selector')); }
this.connect(function(err, collection) {
if (err) { return next(err); }
collection.updateMany(selector, { $set: { state: state } }, function (err, result) {
if (err) { return next(err); }
debug(state, result.modifiedCount);
next();
});
});
};
Sync.prototype.save = function (doc, next)
{
var self = this;
self.connect(function(err, collection) {
if (err) { return next(err); }
doc.dateSynchronised = new Date();
if (!doc.hasOwnProperty('state')) { doc.state = 'inserted'; }
if (!doc.hasOwnProperty('number')) { doc.number = 1; }
collection.updateOne(
{ fid: doc.fid, number: doc.number }, doc,
{ w: self.options.writeConcern, upsert: true },
function (err, res) {
if (err) {
debug('no ' + doc.state, err.toString());
self.emit('cancelled', err, doc.fid);
}
else {
self.emit('saved', doc);
}
next(err, res);
}
);
});
};
Sync.prototype.compare = function (file, next)
{
assert(file instanceof File);
var self = this;
var doc = file.get();
var selector = { fid: doc.fid };
self.connect(function (err, collection) {
if (err) { return next(err); }
collection.findOne(selector, function (err, mongoDoc) {
if (err) { return next(err); }
if (mongoDoc) {
return self.compareFileAndDoc(file, mongoDoc, next);
} else {
self.findMatchingSha1(file, next);
}
});
});
};
Sync.prototype.compareFileAndDoc = function (file, mongoDoc, next) {
var self = this;
var doc = file.get();
var selector = { fid: doc.fid, state: { $ne: 'deleted' } };
if (mongoDoc.state == 'inserting') { return self.mount(file, 'insert', next); }
if (mongoDoc.state == 'updating') { return self.mount(file, 'update', next); }
self.connect(function (err, collection) {
if (err) { return next(err); }
var sameDate = self.compareDates(doc, mongoDoc);
if (sameDate) {
var state = mongoDoc.state == 'deleted' ? 'restored' : 'unmodified';
debug(state, doc.fid);
return collection.updateMany(
selector,
{ $set: { state: state, dateSynchronised: new Date() } },
next
);
}
file.analyze(function (err, doc) {
if (err) { return next(err); }
if (doc.sha1 != mongoDoc.sha1) {
return self.mount(file, 'update', next);
}
if (doc.dateConfig) {
if (!mongoDoc.dateConfig ||
doc.dateConfig.getTime() != mongoDoc.dateConfig.getTime()) {
return self.mount(file, 'update', next);
}
}
var state = mongoDoc.state == 'deleted' ? 'restored' : 'unmodified';
debug(state, doc.fid);
var change = {
dateModified: doc.dateModified,
state: state,
dateSynchronised: new Date()
};
collection.updateMany(selector, { $set: change }, next);
});
});
};
Sync.prototype.findMatchingSha1 = function (file, next) {
var self = this;
file.analyze(function (err, doc) {
if (err) { return next(err); }
if (!doc.sha1) { return self.mount(file, 'insert', next); }
self.connect(function (err, collection) {
if (err) { return next(err); }
collection.aggregate([
{ $match: { sha1: doc.sha1 } },
{ $group: { _id: '$fid', location: { $first: '$location'}}}
], function (err, results) {
if (err) { return next(err); }
(function checkFile() {
var sameFile = results.pop();
if (!sameFile) { return self.mount(file, 'insert', next); }
if (!sameFile.location) { return checkFile(); }
fs.exists(sameFile.location, function (exists) {
if (exists) { return checkFile(); }
// A doc was found with the same sha1 and unlinked file
file.doc.state = 'renamed';
file.doc.dateSynchronised = new Date();
collection.updateMany(
{ fid: sameFile._id },
{ $set: file.doc },
function (err, result) {
if (err) { return next(err); }
if (result.modifiedCount > 0) { return next(); }
self.mount(file, 'insert', next);
}
);
});
})();
});
});
});
};
Sync.prototype.mount = function (file, action, callback) {
var self = this;
debug('action', action);
var state = action == 'update' ? 'updated' : 'inserted';
var transitionState = action == 'update' ? 'updating' : 'inserting';
var now = new Date();
var exploded = false;
self.mountor.append(file, function worker(doc, cb) {
if (Array.isArray(doc)) {
debug(state, doc[0].filename, '(Exploded)');
exploded = true;
async.eachLimit(doc, self.options.concurrency, function (d, c) {
d.state = transitionState;
self.save(d, c);
}, cb);
return;
}
if (doc._exploded) {
debug("partial", doc.filename, doc.number);
doc.state = transitionState;
exploded = true;
} else {
debug(state, doc.filename);
doc.state = state;
}
self.save(doc, cb);
}, function (err) {
if (err) { return callback(err); }
self.clean({ fid: file.get().fid, dateSynchronised: { $lt: now } }, function (err) {
if (err) { return callback(err); }
if (exploded) {
self.setState({ fid: file.get().fid, state: { $ne: 'deleted' } }, state, callback);
} else {
callback(err);
}
});
});
};
/**
* Compare document date and matching document in Mongo dates
* @param {Object} doc base document (before loaders chain)
* @param {object} mongoDoc matching document in Mongo (based on stats)
* @return {Boolean} true if doc is considered as the same as mongoDoc
*/
Sync.prototype.compareDates = function (doc, mongoDoc) {
if (this.options.strictCompare) { return false; }
var unchanged = mongoDoc.dateModified && doc.dateModified &&
mongoDoc.dateModified.getTime() === doc.dateModified.getTime();
var sameConfigDate = !doc.dateConfig || (
mongoDoc.dateConfig && doc.dateConfig &&
mongoDoc.dateConfig.getTime() === doc.dateConfig.getTime()
);
return (unchanged && sameConfigDate);
};
module.exports = Sync;