castor-load
Version:
Traverse a directory to build a MongoDB collection with the found files. Then it's enable to keep directory and collection synchronised.
139 lines (119 loc) • 3.61 kB
JavaScript
;
var path = require('path')
, basename = path.basename(__filename, '.js')
, debug = require('debug')('castor:load:' + basename)
, util = require('util')
, assert = require('assert')
, crypto = require('crypto')
, fs = require('fs')
, url = require('url')
, extend = require('extend')
, request = require('request')
;
function File(pathname, stats, startobj, modifier)
{
if (!(this instanceof File)) {
return new File(pathname, stats);
}
var self = this;
assert.equal(typeof pathname, 'string');
stats = stats || {}
var type;
self.doc = {};
if (startobj) {
extend(self.doc, startobj)
}
self.doc.location = pathname;
if (typeof(stats.isFile) === "function") {
if (stats.isFile()) {
type = 'file';
}
else if (stats.isDirectory()) {
type = 'directory';
}
}
else {
var oreq = url.parse(pathname, undefined, true);
if (oreq.host) {
type = 'url';
var now = new Date();
stats.ctime = stats.ctime || now;
stats.mtime = stats.mtime || now;
stats.atime = stats.atime || now;
pathname = oreq.pathname;
self.doc.basedir = oreq.protocol + '//' + oreq.host;
}
}
self.doc.filetype = type || 'unknown';
self.doc.fid = crypto.createHash('sha1').update(self.doc.location).digest('hex');
self.doc.basename = path.basename(pathname);
self.doc.filename = pathname.replace(self.doc.basedir, '');
self.doc.directory = path.dirname(pathname).replace(self.doc.basedir, '');
self.doc.extension = path.extname(pathname).replace('.', '');
self.doc.filesize = stats.size || 0;
self.doc.dateCreated = stats.ctime || null;
self.doc.dateModified = stats.mtime || null;
self.doc.dateAccessed = stats.atime || null;
self.doc.sha1 = undefined;
self.doc.openStream = function(opts) {
opts = opts || {};
if (this.filetype === 'url') {
return request.get(this.location, opts);
}
else {
return fs.createReadStream(this.location, opts);
}
}
if (typeof modifier === 'function') {
modifier(self.doc);
}
}
File.prototype.analyze = function (fn) {
var self = this
, shasum = crypto.createHash('sha1');
if (self.doc.sha1 === undefined) {
if (self.doc.filetype === 'file') {
var s = fs.ReadStream(self.doc.location);
s.on('data', function(d) {
shasum.update(d);
});
s.on('end', function() {
self.doc.sha1 = shasum.digest('hex');
fn(null, self.doc)
});
s.on('error', function(e) {
fn(e, self.doc);
// fn(new Error('Analyse failed'));
});
return;
}
else if (self.doc.filetype === 'url') {
request.head(self.doc.location, function(error, response, body) {
if (error) {
fn(error, self.doc);
}
else if (response.statusCode !== 200) {
fn(new Error('HTTP 1.1/' + response.statusCode));
}
else if (response && response.headers && response.headers.etag) {
self.doc.sha1 = shasum.update(response.headers.etag).digest('hex');
fn(null, self.doc)
}
else if (response && response.headers && response.headers['content-length']) {
self.doc.sha1 = shasum.update(String(response.headers['content-length']).concat(self.doc.location)).digest('hex');
fn(null, self.doc)
}
else {
fn(new Error('No sha1'));
}
});
return;
}
}
fn(null, self.doc);
}
File.prototype.get = function () {
var self = this;
return self.doc;
}
module.exports = File;