cuttlefish
Version:
A simple lowlevel synchronizing library for Joyent Manta.
531 lines (458 loc) • 14.3 kB
JavaScript
module.exports = Cuttlefish
var manta = require('manta')
var EE = require('events').EventEmitter
var util = require('util')
var assert = require('assert')
var debug
if (util.debuglog)
debug = util.debuglog('cuttlefish')
else
debug = /\bcuttlefish\b/i.exec(process.env.NODE_DEBUG+'')
? console.error : function() {}
util.inherits(Cuttlefish, EE)
function Cuttlefish(options, cb) {
if (!(this instanceof Cuttlefish))
return new Cuttlefish(options, cb)
EE.call(this)
if (!options || typeof options !== 'object')
throw new TypeError('options object required')
this._timingDebug = options.timingDebug
this._timingStart = this._timingDebug ? process.hrtime() : null
if (!options.files || typeof options.files !== 'object')
throw new TypeError('options.files object required')
if (!options.path || typeof options.path !== 'string')
throw new TypeError('options.path string required')
if (!options.client || options.client.constructor.name !== 'MantaClient')
throw new TypeError('options.client of type MantaClient required')
if (!options.request || typeof options.request !== 'function')
throw new TypeError('options.request function required')
if (options.concurrency != null &&
(typeof options.concurrency !== 'number' || options.concurrency <= 0))
throw new TypeError('options.concurrency must be number > 0')
if (options.timeout != null &&
(typeof options.timeout !== 'number' || options.timeout <= 0))
throw new TypeError('options.timeout must be number > 0')
if (options.getMd5 != null && typeof options.getMd5 !== 'function')
throw new TypeError('options.getMd5 must be a function')
this._getMd5 = options.getMd5
this._concurrency = options.concurrency || 50
this._timeout = options.timeout || -1
this._headers = options.headers || {}
this._files = canonize(options.files, this._headers)
this._onlyDelete = !!options.onlyDelete
this._delete = !!options.delete || this._onlyDelete
this._dryRun = !!options.dryRun
this._request = options.request
this._client = options.client
this._path = options.path
.replace(/\/+$/, '')
.replace(/^~~/, '/' + this._client.user)
this._names = Object.keys(this._files)
this._results = {}
this._walker = null
this._tasks = []
this._inFlight = {}
this._processing = 0
this._walks = 0
if (typeof cb === 'function')
this.on('complete', cb)
this._timing('start')
this._sync()
}
Cuttlefish.prototype._process = function process() {
debug('process %d(%d) of %d',
this._processing, this._concurrency, this._tasks.length)
if (this._processing < this._concurrency) {
var task = this._tasks.shift()
if (task) {
this._processing++
this._timing('task %d %s', task.id, task.name)
this.emit('task', task)
task.fn(this._afterProcess.bind(this, task))
} else if (this._tasksDone && this._processing === 0)
this._done()
}
}
var taskId = 0
Cuttlefish.prototype._pushTask = function pushTask(task) {
assert(task)
// Either this is a new task, or a continuation of a started file
assert(!this._tasksDone || (task.file && task.file.started))
debug('pushTask', task)
if (this._dryRun && !task.dry)
task.fn = process.nextTick
if (task.file)
task.file.started = true
task.id = taskId++
this._timing('pushTask %d %s', task.id, task.name)
this._inFlight[task.id] = task
if (this._timeout > 0) {
task.timer = setTimeout(taskTimeout.bind(this, task), this._timeout)
task.timer.unref()
}
this._tasks.push(task)
this._process()
}
function taskTimeout(task) {
if (this._inFlight[task.id]) {
var er = new Error('timeout')
er.task = task
this.emit('error', er)
}
}
Cuttlefish.prototype._tasksEnd = function tasksEnd() {
this._timing('tasksEnd')
debug('tasksEnd')
assert(!this._tasksDone)
this._tasksDone = true
if (this._processing === 0)
this._process()
}
Cuttlefish.prototype._afterProcess = function afterProcess(task, er, res) {
this._timing('taskComplete %d %s', task.id, task.name)
this._processing--
clearTimeout(task.timer)
delete this._inFlight[task.id]
task.error = er
if (this._dryRun && !task.dry)
res = task.file
// Purely informative objects should just show the headers
if (res && res.headers && res.statusCode === 204) {
debug('task %d:%s respond with headers', task.id, task.name)
res.headers.statusCode = res.statusCode
res = res.headers
}
if (res) {
if (res['content-length'])
res.size = +res['content-length']
if (res['content-md5'])
res.md5 = res['content-md5']
if (res['computed-md5'])
res.md5 = res['computed-md5']
}
task.result = res
if (er) {
er.task = task
if (task.file) {
task.file.status = 'error'
res.status = 'error'
task.file.error = er
er.file = task.file
}
if (task.after)
task.after(task)
else
this.emit('error', er)
} else {
if (task.after)
task.after(task)
this.emit(task.name, task.file, task.result)
}
this._process()
}
Cuttlefish.prototype._sync = function sync() {
debug('start sync', this._names)
// Nothing to send, and not deleting, so we're done! hooray!
if (this._names.length === 0 && !this._delete)
return process.nextTick(this._done.bind(this))
this._walk(this._path)
}
Cuttlefish.prototype._walk = function walk(path) {
this._timing('walk', path)
debug('walk', path)
this._walks++
this._pushTask({
name: 'walk',
fn: this._client.ls.bind(this._client, path, { _no_dir_check: true }),
after: this._onWalk.bind(this, path),
dry: true
})
}
Cuttlefish.prototype._onWalk = function onWalk(path, task) {
debug('onWalk', task)
var er = task.error
var res = task.result
if (er && er.statusCode === 404)
this._sendUnsent()
else if (er)
this.emit('error', er)
else
this._onWalkRes(res)
}
Cuttlefish.prototype._onWalkRes = function onWalkRes(res) {
assert(res)
res.on('entry', this._onWalkEntry.bind(this))
res.on('error', this._onWalkError.bind(this))
res.on('end', this._onWalkEnd.bind(this))
}
Cuttlefish.prototype._onWalkError = function(er) {
if (er.statusCode === 404 && er.message === this._path + ' was not found')
this._sendUnsent()
else
this.emit('error', er)
}
Cuttlefish.prototype._onWalkEnd = function onWalkEnd() {
if (--this._walks === 0)
this._sendUnsent()
}
Cuttlefish.prototype._onWalkEntry = function onWalkEntry(d) {
d._path = d.parent + '/' + d.name
d._remote = d._path.substr(this._path.length + 1)
debug('walk entry', d)
if (!this._delete && !this._files[d._remote] && d.type === 'object')
return
this._timing('walk entry', d._remote)
d.toString = function() { return this._remote }
this.emit('entry', d)
if (d.type === 'directory')
this._onWalkEntryDir(d)
else if (d.type === 'object')
this._onWalkEntryObject(d)
else throw new Error("wtf")
}
Cuttlefish.prototype._onWalkEntryDir = function onWalkEntryDir(d) {
debug('onWalkEntryDir', d)
// check to see if anything needs this dir to exist.
// if not, then kill it.
for (var i = 0; i < this._names.length; i++) {
if (this._names[i].indexOf(d._remote) === 0) {
debug('dir is ok', d._remote)
this._walk(d._path)
return
}
}
if (this._delete)
this._pushTask({
name: 'rmr',
file: d,
fn: this._client.rmr.bind(this._client, d._path),
after: this._onDelete.bind(this)
})
}
Cuttlefish.prototype._onDelete = function onDelete(task) {
debug('onDelete', task)
var remote = task.file
remote.status = 'delete'
this._results[remote._remote] = remote
if (task.error && task.error.statusCode !== 404)
this.emit('error', task.error)
else
this.emit('delete', remote._remote, remote)
}
Cuttlefish.prototype._onWalkEntryObject = function onWalkEntryObject(d) {
var file = this._files[d._remote]
debug('onWalkEntryObject', d, !!file, this._delete)
if (!file) {
if (this._delete)
this._pushTask({
fn: this._client.unlink.bind(this._client, d._path),
name: 'unlink',
file: d,
after: this._onDelete.bind(this)
})
} else if (!this._onlyDelete) {
// either a match, or not
if (file.skip) {
debug('skip file %s', file.name)
this._match(file, d)
} else if (typeof file.size === 'number' && file.size !== d.size) {
debug('size mismatch local=%d remote=%d', file.size, d.size)
this._send(file)
} else if (file.md5) {
if (!d.md5)
this._pushTask({
name: 'info',
fn: this._client.info.bind(this._client, d._path),
after: this._onInfo.bind(this),
file: file,
dry: true
})
else if (d.md5 === file.md5)
this._match(file, d)
else
this._send(file)
} else if (this._getMd5 && !file.skip && !file.nodigest) {
this._pushTask({
name: 'getMd5',
fn: this._getMd5.bind(this, file),
file: file,
after: this._afterGetMd5.bind(this, file, d)
})
} else if (typeof file.size !== 'number') {
debug('no size, send anyway', file)
this._send(file)
} else {
this._match(file, d)
}
}
}
Cuttlefish.prototype._afterGetMd5 = function afterGetMd5(file, d, task) {
if (task.error)
this.emit('error', task.error)
else {
if (task.result)
task.file.md5 = canonicalMd5(task.result)
else
task.file.nodigest = true
this._onWalkEntryObject(d)
}
}
Cuttlefish.prototype._send = function(file) {
debug('send', file)
this._pushTask({
name: 'send',
fn: this._sendFile.bind(this, file),
file: file,
after: this._onSend.bind(this)
})
}
Cuttlefish.prototype._onSend = function onSend(task) {
if (task.error)
return this.emit('error', task.error)
var file = task.file
var res = task.result
res.status = file.status = 'sent'
this._results[file] = res || task.file
this.emit('file', task.file, 'sent', res)
}
Cuttlefish.prototype._onInfo = function(task) {
var file = task.file
var remote = task.result
debug('onInfo %s', file, remote)
if (task.error && task.error.statusCode === 404)
this._send(file)
else if (task.error)
this.emit('error', task.error)
else if (remote.md5 === file.md5)
this._match(file, remote)
else if (!file.md5 && remote.size === file.size)
this._match(file, remote)
else
this._send(file)
}
Cuttlefish.prototype._sendUnsent = function sendUnsent() {
debug('sendUnsent')
this._timing('sendUnsent')
this._names.forEach(sendUnsentForeach, this)
this._tasksEnd()
}
function sendUnsentForeach(f) {
debug('-sendUnsent', this._files[f], this._results[f])
if (!this._files[f].started && !this._files[f].skip)
this._send(this._files[f])
}
Cuttlefish.prototype._match = function(file, remote) {
remote.status = 'match'
file.status = 'match'
file.started = true
this._results[file] = remote
debug('match', file, remote)
this.emit('match', file, remote)
this.emit('file', file, 'match', remote)
}
Cuttlefish.prototype._done = function() {
debug('done!')
this._timing('done')
this.emit('complete', this._results)
}
Cuttlefish.prototype._sendFile = function(file, cb) {
debug('_sendFile %s', file)
assert(!this._dryRun)
// get this file, and then send it once we have it.
this._timing('request file', file.name)
this._request(file, this._onRequest.bind(this, file, cb))
}
Cuttlefish.prototype._onRequest = function onRequest(file, cb, er, stream) {
debug('_onRequest %s', file, er || 'ok')
this._timing('on request file', file.name)
assert(!this._dryRun)
if (er)
cb(er)
else
this._sendFileStream(file, stream, cb)
}
Cuttlefish.prototype._sendFileStream = function(file, stream, cb) {
debug('_sendFileStream %s', file)
assert(!this._dryRun)
var mpath = this._path + '/' + file
stream.on('error', cb)
this._client.put(mpath, stream, file, cb)
}
Cuttlefish.prototype._timing = function() {
if (this._timingDebug) {
var s = util.format.apply(util, arguments)
var el = process.hrtime(this._timingStart)
el = (el[0] + el[1] / 1e9).toPrecision(5)
console.error('%d %s', el, s)
}
}
// File class
// Because of toString = this.name, it can be used interchangeably
// as a string key, and also as the object represented by that
function canonicalMd5(md5) {
if (md5) {
if (Buffer.isBuffer(md5))
md5 = md5.toString('base64')
else if (md5 && md5.match(/^md5-/))
md5 = md5.replace(/^md5-/, '')
if (md5.length === 32)
md5 = new Buffer(md5, 'hex').toString('base64')
}
return md5
}
function File(fname, file, headers) {
this.skip = !!file.skip
this.md5 = canonicalMd5(field(file, [
'md5', 'content-md5', 'contentMd5',
'content_md5', 'digest',
'computed-md5', 'computedMd5', 'computed_md5'
]))
this.size = field(file, [
'size', 'length', 'content-length',
'contentLength', 'content_length'
])
this.type = field(file, [
'type', 'content-type', 'contentType',
'content_type', 'mime-type', 'mimeType',
'mime_type'
])
if (headers || file.headers) {
this.headers = {}
if (headers && typeof headers === 'object') {
Object.keys(headers).forEach(function(h) {
this.headers[h] = headers[h]
}, this)
}
if (file.headers && typeof file.headers === 'object') {
Object.keys(file.headers).forEach(function(h) {
return this.headers[h.toLowerCase()] = file.headers[h]
}, this)
}
}
this.name = fname
this.mkdirs = true
}
File.prototype.toString = function() {
return this.name
}
File.prototype.mkdirs = true
File.prototype.headers = null
File.prototype.md5 = null
File.prototype.size = null
File.prototype.type = null
File.prototype.name = null
File.prototype.error = null
File.prototype.started = false
File.prototype.status = null
function canonize(files, headers) {
return Object.keys(files).map(function(fname) {
return new File(fname, files[fname], headers)
}).reduce(function(set, o) {
set[o] = o
return set
}, {})
}
function field(obj, names) {
for (var i = 0; i < names.length && !obj[names[i]]; i++);
return obj[names[i]]
}