@dpwolfe/dos2unix
Version:
A Node.js module to convert text files with DOS line breaks to Unix line breaks, i.e. like using `dos2unix`.
328 lines (291 loc) • 10.6 kB
JavaScript
/*
* dos2unix
* https://github.com/JamesMGreene/node-dos2unix
*
* Copyright (c) 2013 James M. Greene
* Licensed under the MIT license.
*/
;
// Built-in modules
var fs = require('fs');
var util = require('util');
var EventEmitter = require('events').EventEmitter;
// External modules
var async = require('async');
var extend = require('node.extend');
var Readable = require('stream').Readable || require('readable-stream').Readable;
// Internal modules
var encoder = require('./util/encoding');
var globber = require('./util/glob');
var validator = require('./util/validate');
// Utility
var slicer = Array.prototype.slice;
function arraysMatch(arr1, arr2) {
if (arr1 && arr2 && arr1.length === arr2.length) {
var matches = arr1.filter(function(e, i) {
return e === arr2[i];
});
return matches.length === arr1.length;
}
return arr1 === arr2;
}
// A big frickin' mess... but aimed at performance by not reading the file more times than necessary
function processFile(filePath, callback, options) {
/*jshint validthis:true */
var processor = this;
processor.emit('processing.start', {
file: filePath
});
encoder.detectBom(filePath, function(err, bomName) {
var status = 'good';
if (err) {
status = 'error';
processor.emit('processing.error', {
file: filePath,
message: 'Skipping file with errors during encoding detection: ' + err
});
return callback(null, status);
}
else {
var needsFixing = false;
var lastByteSequenceWasCR = false;
var bytesPerBom = encoder.getBytesPerBom(bomName);
var bytesPerControlChar = encoder.getBytesPerControlChar(bomName);
var transformBuffer = [];
var tempBuffer = null;
// Fast-forward past the BOM if present
var onFirstReadable = function() {
var bomBuffer = this.read(bytesPerBom);
if (bomBuffer == null || bomBuffer.length !== bytesPerBom) {
status = 'error';
processor.emit('processing.error', {
file: filePath,
message: 'Skipping file with errors during read: Unable to read past the expected BOM'
});
return callback(null, status);
}
};
var onReadable = function() {
var data = this.read(),
i = 0,
len = data ? data.length : 0,
cursor = i,
buffer,
isByteSequenceLF;
for ( ; i < len; i += bytesPerControlChar) {
buffer = slicer.call(data, i, i + bytesPerControlChar);
// If we read less bytes than the minimum number necessary to register
// as a complete character in this encoding, put them back on the front
// of the read queue for the next read and then bail out.
if (buffer.length < bytesPerControlChar) {
if (!tempBuffer || !arraysMatch(buffer, tempBuffer)) {
tempBuffer = buffer;
// Save these rogue bytes to inspect on the next time around
this.unshift(data.slice(i, i + buffer.length));
// Break out of the loop
break;
}
else {
// This will prevent an infinite loop if the number of bytes in the file
// is erroneous/irregular, e.g. a total of 7 bytes in a UTF-16 file.
status = 'error';
// Send a signal to cause early termination (i.e. trigger the 'end' event)
this.push(null);
// Break out of the loop
break;
}
}
tempBuffer = null;
if (encoder.doesByteSequenceSuggestBinary(buffer, bomName)) {
status = 'skip';
processor.emit('processing.skip', {
file: filePath,
message: 'Skipping suspected binary file'
});
// Send a signal to cause early termination (i.e. trigger the 'end' event)
this.push(null);
// Break out of the loop
break;
}
else {
if (encoder.isByteSequenceCR(buffer, bomName)) {
lastByteSequenceWasCR = true;
}
else {
if (lastByteSequenceWasCR) {
isByteSequenceLF = encoder.isByteSequenceLF(buffer, bomName);
if (isByteSequenceLF || options.convertCrLineEndings) {
needsFixing = true;
// Push everything (except for CRs) into the outgoing buffer
transformBuffer.splice.apply(transformBuffer, [transformBuffer.length, 0].concat(slicer.call(data, cursor, i - bytesPerControlChar)));
if (!isByteSequenceLF) {
transformBuffer.splice.apply(transformBuffer, [transformBuffer.length, 0].concat(encoder.getLFByteSequence(bomName)));
}
cursor = i;
}
}
// If this is the last byte of the buffer, push the remaining data into the outgoing buffer
if (i + bytesPerControlChar === len && needsFixing) {
// Push everything (except for CRs) into the outgoing buffer
transformBuffer.splice.apply(transformBuffer, [transformBuffer.length, 0].concat(slicer.call(data, cursor)));
}
// Reset
lastByteSequenceWasCR = false;
}
}
}
};
var onError = function(err) {
status = 'error';
processor.emit('processing.error', {
file: filePath,
message: 'Skipping file with errors during read: ' + err
});
return callback(null, status);
};
var onEnd = function() {
if (status === 'good') {
if (needsFixing) {
status = 'bad';
this.on('close', function() {
// Overwrite the existing file with the fixed data
process.nextTick(function() {
processor.emit('convert.start', {
file: filePath
});
//
// TODO: Is there a more optimal way to do this?
// e.g. TransformStream, write to temp then move, etc.
//
fs.writeFile(filePath, new Buffer(transformBuffer), function(err) {
if (err) {
status = 'error';
processor.emit('convert.error', {
file: filePath,
message: 'Unexpected error during conversion process: ' + err
});
}
else {
status = 'fix';
processor.emit('convert.end', {
file: filePath
});
}
processor.emit('processing.end', {
file: filePath
});
return callback(null, status);
});
});
});
}
else {
status = 'skip';
processor.emit('processing.skip', {
file: filePath,
message: 'Skipping file with all correct line endings'
});
}
}
if (status === 'skip') {
processor.emit('processing.end', {
file: filePath
});
}
if (status !== 'bad') {
return callback(null, status);
}
};
var reader = new Readable();
reader.wrap(fs.createReadStream(filePath));
if (bytesPerBom > 0) {
reader.once('readable', onFirstReadable);
}
reader
.on('readable', onReadable)
.on('error', onError)
.on('end', onEnd);
}
});
}
function processAllFiles(fileList, options, callback) {
/*jshint validthis:true */
var processor = this;
var processFileFn = processFile.bind(processor);
// For reporting final results
var defaultStats = {
'error': 0,
'skip': 0,
'fix': 0
};
// Temporarily combining both Q and async... should reduce to just one or the other
var taskList = fileList.map(function(filePath) {
return function(cb) {
processFileFn(filePath, cb, options);
};
});
// If `options.maxConcurrency` is `0` or less, then default to processing the
// whole list in parallel (a.k.a. "unlimited")
var maxConcurrency = options.maxConcurrency > 0 ? options.maxConcurrency : taskList.length;
// Parallelize, within reason
async.parallelLimit(taskList, maxConcurrency, function(err, statuses) {
// Roll up the counts of each status
var results = (statuses || []).reduce(
function(previousVal, currentVal) {
if (!previousVal.hasOwnProperty(currentVal)) {
throw new Error('Reduced object did not have the expected key "' + currentVal + '": ' + JSON.stringify(previousVal));
}
previousVal[currentVal] += 1;
return previousVal;
},
defaultStats
);
callback(null, results);
});
}
// Promise bindings
var validateGlobPatternsFn = validator.validateGlobPatterns.bind(validator);
var globFn = globber.glob.bind(globber);
function Dos2UnixConverter(defaultOptions) {
// Enforce the constructor pattern
if (!(this instanceof Dos2UnixConverter)) {
return new Dos2UnixConverter(defaultOptions);
}
// Mixin the EventEmitter API
EventEmitter.call(this);
// Validate and save the default options (if any)
var validOptions = validator.validateOptions(defaultOptions);
this.defaultOptions = extend(true, {}, validOptions);
}
// Mark it as inherited, too
util.inherits(Dos2UnixConverter, EventEmitter);
// GO!
Dos2UnixConverter.prototype.process = function(globPatterns, options) {
/*jshint validthis:true */
var processor = this;
var processAllFilesFn = processAllFiles.bind(processor);
this.emit('start');
var opts = extend(true, {}, this.defaultOptions, options);
// Find the pertinent files from the glob, sorted and without duplicates
var taskList = [
function(callback) {
validateGlobPatternsFn(globPatterns, callback);
},
function(validGlobPatterns, callback) {
globFn(validGlobPatterns, opts.glob, callback);
},
function(fileList, callback) {
processAllFilesFn(fileList, opts, callback);
}
];
async.waterfall(taskList, function(err, stats) {
if (err) {
processor.emit('error', err);
}
else {
processor.emit('end', stats);
}
});
};
// Exports
module.exports = Dos2UnixConverter;