dedup
Version:
Remove duplicate identical files from a list.
130 lines (116 loc) • 3.21 kB
JavaScript
// Generated by CoffeeScript 1.6.3
(function() {
var Q, byline, concurrent, crypto, fs, hash, max_concurrent, queue, usage;
usage = function() {
return "Usage: dedup < list-of-files-to-dedup-one-per-line";
};
fs = require('fs');
crypto = require('crypto');
byline = require('byline');
Q = require('q');
hash = function(filename) {
var d, s, shasum;
d = Q.defer();
shasum = crypto.createHash('sha1');
s = fs.createReadStream(filename);
s.on('data', function(d) {
return shasum.update(d);
});
s.on('end', function() {
return d.resolve(shasum.digest('hex'));
});
s.on('error', function(e) {
return d.reject(e);
});
return d.promise;
};
hash.test = function() {
var found;
found = null;
hash('/dev/null').then(function(v) {
return found = v;
}).done();
return setTimeout((function() {
return console.assert(found === 'da39a3ee5e6b4b0d3255bfef95601890afd80709');
}), 200);
};
queue = {};
concurrent = 0;
max_concurrent = 20;
this.run = function() {
var filenames, finalize, hash_one, hashes, remove_dups;
hashes = {};
remove_dups = function() {
var k, keep, n, v, _results;
_results = [];
for (k in hashes) {
v = hashes[k];
if (v.length > 1) {
v.sort();
keep = v.shift();
_results.push((function() {
var _i, _len, _results1;
_results1 = [];
for (_i = 0, _len = v.length; _i < _len; _i++) {
n = v[_i];
console.log("Removing " + n + ", a duplicate of " + keep);
_results1.push(fs.unlinkSync(n));
}
return _results1;
})());
} else {
_results.push(void 0);
}
}
return _results;
};
finalize = false;
hash_one = function(filename) {
var accepted, d, make_concurrent, rejected;
d = hash(filename);
make_concurrent = function(next) {
return function(v) {
delete queue[filename];
concurrent -= 1;
if (concurrent < max_concurrent) {
filenames.resume();
}
if (typeof next === "function") {
next.apply(null, arguments);
}
if (finalize && concurrent === 0) {
return remove_dups();
}
};
};
accepted = make_concurrent(function(v) {
if (hashes[v] == null) {
hashes[v] = [];
}
return hashes[v].push(filename);
});
rejected = make_concurrent(null);
return d.then(accepted, rejected).done();
};
filenames = byline(process.stdin);
filenames.setEncoding('utf-8');
filenames.on('data', function(filename) {
if (queue[filename]) {
console.error("Duplicate file " + filename);
return;
}
concurrent += 1;
queue[filename] = true;
if (concurrent >= max_concurrent) {
filenames.pause();
}
return hash_one(filename);
});
return filenames.on('end', function() {
return finalize = true;
});
};
}).call(this);
/*
//@ sourceMappingURL=dedup.map
*/