manta-dir-watcher
Version:
watch a Manta directory for file changes
718 lines (648 loc) • 24.8 kB
JavaScript
/*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
/*
* Copyright 2016 Joyent, Inc.
*/
;
var assert = require('assert-plus');
var bunyan = require('bunyan');
var crypto = require('crypto');
var fs = require('fs');
var manta = require('manta');
var mkdirp = require('mkdirp');
var mod_path = require('path');
var Readable = require('stream').Readable;
var rimraf = require('rimraf');
var util = require('util');
var vasync = require('vasync');
var vstream = require('vstream');
// ---- globals/consts
var format = util.format;
var FILTER_TYPES = [
'object',
'directory'
];
// ---- support stuff
function regexpEscape(s) {
return s.replace(/[\\^$+?.()|[\]{}]/g, '\\$&');
}
function regexpFromGlob(s) {
return new RegExp(
'^'
+ regexpEscape(s).replace(/\?/g, '.').replace(/\*/g, '.*')
+ '$');
}
function objCopy(obj, target) {
if (!target) {
target = {};
}
Object.keys(obj).forEach(function (k) {
target[k] = obj[k];
});
return target;
}
/**
* Calculate and return the diff between two Manta dirents.
*/
function diffDirents(a, b) {
var isDiff = false;
var diff = {};
['type', 'etag'].forEach(function (attr) {
if (a[attr] !== b[attr]) {
diff[attr] = [a[attr], b[attr]];
isDiff = true;
}
});
return (isDiff ? diff : null);
}
// ---- MantaDirWatcher class
/*
* Create for a new Manta dir watcher (a readable stream).
* The polling process is started in `nextTick`.
*
* If no `clientOpts` are passed in, then the usual `MANTA_*` envvars are used:
*
* var watcher = new MantaDirWatcher({dir: '~~/stor/tmp'});
* watcher.on('data', function (evt) {
* console.log(JSON.stringify(evt));
* });
*
* Else, explicit Manta client options can be passed in:
*
* var watcher = new MantaDirWatcher({
* clientOpts: {
* url: 'https://us-east.manta.joyent.com',
* user: 'trent.mick',
* keyId: '31:96:29:14:6a:be:45:f6:df:73:4f:3f:32:45:45:45'
* },
* dir: '~~/stor/tmp'
* });
*
* Or an already created [node-manta](https://github.com/joyent/node-manta)
* client:
*
* var client = manta.createClient(...);
* var watcher = new MantaDirWatcher({
* client: client,
* dir: '~~/stor/tmp'
* });
*
*
* @param {String} opts.dir: Required. Manta dir path to watch.
* @param {Number} opts.interval: Optional. Polling interval (in seconds).
* Default is 60s.
* @param {String|RegExp} opts.filter.name: Optional. A glob pattern (if a
* string) or a regex to match against entry names to which to limit
* watching.
* @param {String} opts.filter.type: Optional. "object" or "directory" to limit
* watching to entries of this type.
* @param {String} opts.syncDir: Optional. A local directory to which to
* sync the watched objects. This implies `filter.type="object"` (i.e.
* sync'ing of directories is not supported.
* @param {Boolean} opts.syncDelete: Optional. Allow delete of local files
* when syncing to `syncDir`.
* @param {Boolean} opts.disableSyncDeleteGuard: Optional. Disable the guard
* that attempts to bail when it looks like the given `syncDir` was
* an accident. See "sync-delete-guard" in code below for details.
* @param {Boolean} opts.oneShot: Optional. Do a single poll and then close.
* @param {Boolean} opts.dryRun: Optional. Do a dry-run, don't actually
* sync files.
* @param {Object} opts.log: Optional. Bunyan logger.
*/
function MantaDirWatcher(opts) {
var self = this;
assert.string(opts.dir, 'opts.dir');
assert.optionalNumber(opts.interval, 'opts.interval');
this.intervalMs = (opts.interval !== undefined ? opts.interval : 60) * 1000;
assert.ok(this.intervalMs > 0,
'opts.interval is not positive: ' + opts.interval);
assert.optionalObject(opts.log, 'opts.log');
var filter = opts.filter || {};
if (filter.name) {
if (typeof(filter.name) === 'string') {
// glob -> regex (limited to '*' and '?')
filter.name = regexpFromGlob(filter.name);
}
assert.regexp(filter.name, 'opts.filter.name');
}
assert.optionalString(filter.type, 'opts.filter.type');
if (filter.type) {
assert.ok(FILTER_TYPES.indexOf(filter.type) !== -1,
'invalid opts.filter.type: ' + filter.type);
}
assert.optionalString(opts.syncDir, 'opts.syncDir');
assert.optionalBool(opts.syncDelete, 'opts.syncDelete');
assert.optionalBool(opts.disableSyncDeleteGuard,
'opts.disableSyncDeleteGuard');
assert.optionalBool(opts.oneShot, 'opts.oneShot');
assert.optionalBool(opts.dryRun, 'opts.dryRun');
// TODO other stream options? highWaterMark?
Readable.call(this, {objectMode: true});
vstream.wrapStream(this, {name: 'MantaDirWatcher'});
this.dir = opts.dir;
this.log = (opts.log
? opts.log.child({dir: this.dir}, true)
: bunyan.createLogger({name: 'manta-dir-watcher', dir: this.dir}));
this.filter = filter;
if (opts.syncDir) {
this.syncDir = mod_path.resolve(opts.syncDir);
} else {
this.syncDir = null;
}
this.syncDelete = opts.syncDelete;
this.disableSyncDeleteGuard = opts.disableSyncDeleteGuard;
this.oneShot = opts.oneShot;
this.dryRun = opts.dryRun;
var client;
if (opts.client) {
assert.object(opts.client, 'opts.client');
client = opts.client;
} else if (opts.clientOpts) {
assert.object(opts.clientOpts, 'opts.clientOpts');
assert.string(opts.clientOpts.url, 'opts.clientOpts.url');
assert.string(opts.clientOpts.user, 'opts.clientOpts.user');
assert.optionalString(opts.clientOpts.subuser,
'opts.clientOpts.subuser');
assert.optionalBool(opts.clientOpts.insecure,
'opts.clientOpts.insecure');
/*
* Just client node-manta's `createClient()`: `clientOpts.sign` can be
* empty, a signing function, or an object with:
* - `keyId`: the key fingerprint
* - `key`: a path to the private SSH key
*/
}
if (opts.client) {
this.client = opts.client;
this._closeClient = false;
} else if (opts.clientOpts) {
var clientOpts = objCopy(opts.clientOpts);
if (!clientOpts.log) {
clientOpts.log = this.log;
}
this.client = manta.createClient(clientOpts);
this._closeClient = true;
} else {
this.client = manta.createClient({
log: this.log,
sign: manta.cliSigner({
keyId: process.env.MANTA_KEY_ID,
user: process.env.MANTA_USER,
subuser: process.env.MANTA_SUBUSER
}),
user: process.env.MANTA_USER,
url: process.env.MANTA_URL,
insecure: Boolean(process.env.MANTA_TLS_INSECURE)
});
this._closeClient = true;
}
this._state = null;
self._pollTimeout = null;
self._lastPollTime = null;
self._buffer = [];
self._paused = true;
this.log.trace({intervalMs: this.intervalMs}, 'MantaDirWatcher created');
}
util.inherits(MantaDirWatcher, Readable);
MantaDirWatcher.prototype.close = function close() {
if (this.client && this._closeClient) {
this.client.close();
}
if (this._pollTimeout) {
clearTimeout(this._pollTimeout);
this._pollTimeout = null;
}
this.push(null);
};
/*
* Downstream is ready to get events: push any data we have buffered
* and resume polling.
*/
MantaDirWatcher.prototype._read = function _read() {
this._resume();
};
MantaDirWatcher.prototype._resume = function _resume() {
var self = this;
if (!this._paused) {
return;
}
this._paused = false;
// Flush buffer.
while (this._buffer.length > 0) {
if (!this.push(this._buffer.shift())) {
this._pause();
return;
}
}
// Resume polling.
if (!this._pollTimeout) {
var now = Date.now();
var timeToNextPoll = (this._lastPollTime
? (this._lastPollTime + this.intervalMs) - now
: 0);
this.log.trace({timeToNextPoll: timeToNextPoll}, '_resume: poll time');
if (timeToNextPoll <= 0) {
setImmediate(function () {
self._poll();
});
} else {
this._pollTimeout = setTimeout(function () {
self._poll();
}, timeToNextPoll);
}
}
};
MantaDirWatcher.prototype._pause = function _pause() {
this._paused = true;
if (this._pollTimeout) {
clearTimeout(this._pollTimeout);
this._pollTimeout = null;
}
};
/*
* Poke this watcher to poll now, rather than waiting for the coming
* poll interval.
*/
MantaDirWatcher.prototype.poke = function poke() {
var self = this;
if (this._pollTimeout) {
clearTimeout(this._pollTimeout);
this._pollTimeout = null;
}
setImmediate(function pokeIt() {
self._poll();
});
};
MantaDirWatcher.prototype._poll = function _poll() {
var self = this;
var log = self.log;
var context = {
oldState: self._state,
newState: {},
dirents: [],
localDirents: [],
changes: []
};
vasync.pipeline({arg: context, funcs: [
function listDir(arg, next) {
var handleDirent = function (dirent) {
if (self.filter.type && dirent.type !== self.filter.type) {
return;
}
if (self.filter.name && !self.filter.name.test(dirent.name)) {
return;
}
arg.newState[dirent.name] = dirent;
arg.dirents.push(dirent);
};
self.client.ls(self.dir, function (err, res) {
if (err) {
if (err.statusCode === 404) {
next();
} else {
next(err);
}
return;
}
res.on('object', handleDirent);
res.on('directory', handleDirent);
res.once('end', function () {
next();
});
});
},
/*
* If this is the first poll and we have a local `syncDir`, then
* we will be comparing against that dir: collect the local dirents.
*/
function firstRunLocalDirents(arg, next) {
if (!self.syncDir || self.oldState) {
next();
return;
}
fs.readdir(self.syncDir, function (err, names) {
if (err) {
if (err.code === 'ENOENT') {
next();
} else {
next(err);
}
return;
}
if (self.filter.name) {
names = names.filter(function (name) {
return self.filter.name.test(name);
});
}
vasync.forEachPipeline({
inputs: names,
func: function lstatOne(name, nextName) {
var path = mod_path.join(self.syncDir, name);
fs.lstat(path, function (err, stat) {
if (err) {
nextName(err);
return;
}
if (!stat.isDirectory()) {
arg.localDirents.push({
name: name,
path: path,
stat: stat
});
}
nextName();
});
}
}, function (err) {
log.trace({localDirents: arg.localDirents}, 'localDirents');
next(err);
});
});
},
function changesFromDirents(arg, next) {
var i, ld, name, dirent;
if (arg.oldState) {
// Compare against `oldState`.
for (i = 0; i < arg.dirents.length; i++) {
dirent = arg.dirents[i];
name = dirent.name;
var oldDirent = arg.oldState[name];
if (!oldDirent) {
arg.changes.push({action: 'create', dirent: dirent});
} else if (diffDirents(oldDirent, dirent)) {
arg.changes.push({action: 'update', dirent: dirent,
oldDirent: oldDirent});
}
}
var oldNames = Object.keys(arg.oldState);
for (i = 0; i < oldNames.length; i++) {
var n = oldNames[i];
if (!arg.newState[n]) {
arg.changes.push({action: 'delete',
oldDirent: arg.oldState[n]});
}
}
log.trace({changes: arg.changes},
'changesFromDirents: compare to oldState');
next();
} else if (self.syncDir) {
// Compare against `localDirents` from syncDir.
var localDirentFromName = {};
for (i = 0; i < arg.localDirents.length; i++) {
ld = arg.localDirents[i];
localDirentFromName[ld.name] = ld;
if (!arg.newState[ld.name]) {
arg.changes.push({action: 'delete',
oldLocalDirent: ld});
}
}
arg.possibleUpdates = [];
for (i = 0; i < arg.dirents.length; i++) {
dirent = arg.dirents[i];
name = dirent.name;
var localDirent = localDirentFromName[name];
if (!localDirent) {
arg.changes.push({action: 'create', dirent: dirent});
} else {
arg.possibleUpdates.push(name);
}
}
vasync.forEachPipeline({
inputs: arg.possibleUpdates,
func: function checkPossibleUpdate(name, nextName) {
dirent = arg.newState[name];
var path = dirent.parent + '/' + name;
var localDirent = localDirentFromName[name];
var localPath = mod_path.join(self.syncDir, name);
log.trace({path: path, localPath: localPath},
'checkPossibleUpdate');
self.client.info(path, function (err, info) {
if (err) {
nextName(err);
return;
}
if (info.size !== localDirent.stat.size) {
arg.changes.push({action: 'update',
dirent: dirent,
oldLocalDirent: localDirent});
log.trace({size: info.size,
localSize: localDirent.stat.size},
'checkPossibleUpdate: size diff');
nextName();
} else {
// Compare md5.
var md5sum = crypto.createHash('md5');
var input = fs.createReadStream(localPath);
input.on('data', function (chunk) {
md5sum.update(chunk);
});
input.on('end', function () {
var localMd5 = md5sum.digest('base64');
if (localMd5 !== info.md5) {
arg.changes.push({action: 'update',
dirent: dirent,
oldLocalDirent: localDirent});
log.trace({md5: info.md5,
localMd5: localMd5},
'checkPossibleUpdate: md5 diff');
}
nextName();
});
}
});
}
}, function (err) {
log.trace({changes: arg.changes},
'changesFromDirents: compare to localDirents');
next(err);
});
} else {
// Nothing to compare against.
log.trace('changesFromDirents: first poll, intializing');
next();
}
},
/*
* When doing syncing with `syncDelete`, we have a sanity guard
* to protect against deleting all (or many) files in the given
* `syncDir` if it looks like a mischosen local dir.
*/
function firstRunSyncDeleteGuard(arg, next) {
if (!self.syncDir || !self.syncDelete
|| arg.oldState || self.disableSyncDeleteGuard)
{
next();
return;
}
assert.arrayOfString(arg.possibleUpdates, 'arg.possibleUpdates');
var deleteNames = [];
arg.changes.forEach(function (ch) {
if (ch.action === 'delete') {
deleteNames.push(ch.oldLocalDirent.name);
}
});
/*
* `deleteNames` entries means we will be deleting local files.
* Empty `possibleUpdates` means there were no matching names
* between local and manta dirs -- in other words, there is no
* sign here that `syncDir` isn't an accident.
*/
if (deleteNames.length > 0 && arg.possibleUpdates.length === 0) {
next(new Error(format('sync-delete-guard failure: '
+ 'Are you sure syncDir="%s" is correct for syncing '
+ 'from dir="%s"; %d local file%s (%s) would be deleted '
+ 'and there are no filename matches between "syncDir" '
+ 'and "dir" to indicate syncDir is correct. (Use the '
+ '"disableSyncDeleteGuard" option to override this '
+ 'guard.)', self.syncDir, self.dir, deleteNames.length,
(deleteNames.length === 1 ? '' : 's'),
deleteNames.join(', '))));
} else {
log.trace({numDeletes: deleteNames.length,
numNameMatches: arg.possibleUpdates.length},
'passed sync-delete-guard');
next();
}
},
function syncChanges(arg, next) {
if (!self.syncDir || self.dryRun) {
next();
return;
}
vasync.forEachPipeline({
inputs: arg.changes,
func: function syncChange(change, nextChange) {
switch (change.action) {
case 'update':
case 'create':
self._syncDirent(change.dirent, nextChange);
break;
case 'delete':
if (self.syncDelete) {
var localPath = (change.oldDirent
? change.oldDirent.parent + '/'
+ change.oldDirent.name
: change.oldLocalDirent.path);
log.trace({localPath: localPath}, 'rm');
rimraf(localPath, nextChange);
} else {
nextChange();
}
break;
default:
throw new Error('unknown change action: '
+ change.action);
}
}
}, function finishSyncChanges(err) {
next(err);
});
},
function pushEvents(arg, next) {
var timeEvent = new Date().toISOString();
var events = [];
for (var i = 0; i < arg.changes.length; i++) {
var change = arg.changes[i];
var aDirent = (change.dirent || change.oldDirent
|| change.oldLocalDirent);
var name = aDirent.name;
var path = self.dir + '/' + name;
var event = {
timeEvent: timeEvent,
action: change.action,
name: name,
path: path
};
switch (change.action) {
case 'update':
event.mtime = change.dirent.mtime;
break;
case 'create':
event.mtime = change.dirent.mtime;
break;
case 'delete':
break;
default:
throw new Error('unknown change action: '
+ change.action);
}
events.push(event);
}
if (events.length > 0) {
var group = {events: events};
if (self._paused) {
log.trace({group: group}, 'pushEvents: buffering events');
self._buffer.push(group);
} else {
log.trace({group: group}, 'pushEvents: pushing events');
if (!self.push(group)) {
self._pause();
}
}
}
next();
},
function saveStateAndScheduleNextPoll(arg, next) {
self._state = arg.newState;
// Schedule next poll.
self._lastPollTime = Date.now(); // time we *completed* last poll
if (self.oneShot) {
self.close();
} else if (!self._paused) {
self._pollTimeout = setTimeout(function () {
self._poll();
}, self.intervalMs);
log.trace({delay: self.intervalMs}, 'schedule next poll');
}
next();
}
]}, function finishPoll(err) {
log.trace({err: err}, '_poll: end');
if (err) {
self.emit('error', err);
}
});
};
MantaDirWatcher.prototype._syncDirent = function _syncDirent(dirent, cb) {
assert.object(dirent, 'dirent');
assert.func(cb, 'cb');
var self = this;
var log = self.log;
var name = dirent.name;
var tmpLocalPath = mod_path.join(self.syncDir,
'.' + name + '.mwatchdirpart');
var localPath = mod_path.join(self.syncDir, name);
var path = dirent.parent + '/' + name;
vasync.pipeline({funcs: [
function mkdirpSyncDir(_, next) {
mkdirp(self.syncDir, next);
},
function downloadToTmpFile(_, next) {
self.client.get(path, function (err, src) {
var out = fs.createWriteStream(tmpLocalPath);
out.on('finish', function () {
next();
});
src.pipe(out);
});
},
function moveInPlace(_, next) {
log.trace({localPath: localPath}, 'sync');
fs.rename(tmpLocalPath, localPath, next);
}
]}, function (err) {
if (err) {
rimraf(tmpLocalPath, function (_) {
cb(err);
});
} else {
cb();
}
});
};
// ---- exports
module.exports = MantaDirWatcher;
module.exports.FILTER_TYPES = FILTER_TYPES;