xlrd-parser
Version:
High performance Excel file parser based on the xlrd library from www.python-excel.org.
254 lines (215 loc) • 5.62 kB
JavaScript
var _ = require('underscore'),
EventEmitter = require('events').EventEmitter,
util = require('util'),
spawn = require('child_process').spawn;
function XlrdParser(path, options) {
var self = this,
runxlrd = spawn('python', formatArgs()),
workbook,
sheet,
row,
rowindex = -1,
rows = [],
remaining = '';
function formatArgs() {
var args = ['deps/python-excel-xlrd/xlrd-parser.py'];
options = options || { debug: false };
if (options.meta) {
args.push('-m');
}
if (options.hasOwnProperty('sheet') && !options.hasOwnProperty('sheets')) {
options.sheets = options.sheet;
delete options.sheet;
}
if (options.hasOwnProperty('sheets')) {
if (_.isArray(options.sheets)) {
args.push(_.map(options.sheets, function (s) { return ['-s', s] }));
} else {
args.push(['-s', options.sheets]);
}
}
if (options.hasOwnProperty('maxRows')) {
args.push(['-r', options.maxRows]);
}
args.push(path);
return _.flatten(args);
}
function parseValue(value) {
if (_.isArray(value)) {
// parse non-native data types
if (value[0] == 'date') {
return new Date(
value[1], value[2] - 1, value[3],
value[4], value[5], value[6]
);
} else if (value[0] == 'error') {
return _.extend(new Error(value[1]), { id: 'cell_error', errorCode: value[1] });
} else if (value[0] == 'empty') {
return null;
}
}
return value;
}
function flushRows() {
if (rows.length > 0) {
// Emit an event with the accumulated rows
self.emit('data', {
workbook: workbook,
sheet: sheet,
rows: rows.slice(0)
});
// Reset rows for the next iteration
rows = [];
}
}
EventEmitter.call(this);
runxlrd.stdout.setEncoding('utf8');
runxlrd.stdout.on('data', function (data) {
var lines = data.split(/\n/g),
lastLine = _.last(lines);
if (options.debug) {
console.log(data);
}
// Fix the first line with the remaining from the previous iteration of 'data'
lines[0] = remaining + lines[0];
// Keep the remaining for the next iteration of 'data'
remaining = lastLine;
_.initial(lines).forEach(function (line) {
var record = JSON.parse(line),
value = record[1];
switch (record[0]) {
case 'w':
workbook = {
file: value.file,
meta: {
user: value.user,
sheets: value.sheets
}
};
self.emit('open', workbook);
break;
case 's':
if (sheet != null) {
// if we are changing sheets, flush rows immediately
flushRows();
}
sheet = {
index: value.index,
name: value.name,
bounds: {
rows: value.rows,
columns: value.columns
},
visibility: (function () {
switch (value.visibility) {
case 1: return 'hidden';
case 2: return 'very hidden';
default: return 'visible';
}
})()
};
break;
case 'c':
if (record[1].r !== rowindex) {
// switch rows
row = [];
rowindex = value.r;
rows.push(row);
}
// append cell to current row
row.push({
row: value.r,
column: value.c,
address: value.a,
value: parseValue(value.v)
});
break;
case 'err':
self.emit('error', _.extend(
new Error('[' + value.exception + '] ' + value.type + ': ' + value.details),
value)
);
break;
}
});
flushRows();
});
runxlrd.on('exit', function (code) {
if (code != 0) {
self.emit('error', _.extend(
new Error('exit code ' + code + ' returned from xlrd-parser.py'),
{ id: 'process_failure', code: code })
);
}
self.emit('close');
});
}
util.inherits(XlrdParser, EventEmitter);
/**
* Parses a workbook, returning an EventEmitter instance for loading the results
*
* @param {String|Array} path The path of the file(s) to load
* @param {Object} options The options object (optional)
* @returns {EventEmitter} The instance to use to stream the results
*/
exports.stream = function (path, options) {
return new XlrdParser(path, options);
};
/**
* Parses a workbook, returning a workbook/sheet/row/cell structure
*
* @param {String|Array} path The path of the file(s) to load
* @param {Object} options The options object (optional)
* @param {Function} callback The callback method to invoke with the results
*/
exports.parse = function (path, options, callback) {
if (_.isFunction(options)) {
callback = options;
options = null;
}
var reader = new XlrdParser(path, options),
currentWorkbook,
workbooks = [],
errors;
reader.on('open', function (workbook) {
currentWorkbook = workbook;
workbooks.push(workbook);
});
reader.on('data', function (data) {
if (!currentWorkbook.sheets) {
currentWorkbook.sheets = [];
}
var sheet = _.find(currentWorkbook.sheets, function (s) {
return s.index === data.sheet.index;
});
if (!sheet) {
sheet = _.extend({ rows: [] }, data.sheet);
currentWorkbook.sheets.push(sheet);
currentWorkbook.sheets[sheet.name] = sheet;
}
data.rows.forEach(function (row) {
sheet.rows.push(row);
});
});
reader.on('error', function (err) {
if (!errors) {
errors = err;
} else {
errors = [errors];
errors.push(err);
}
});
reader.on('close', function () {
callback = callback || function () {};
if (!errors && !workbooks.length) {
errors = _.extend(new Error('file not found: ' + path), { id: 'file_not_found' });
}
if (workbooks.length === 0) {
callback(errors, null);
} else if (workbooks.length === 1) {
callback(errors, workbooks[0])
} else {
callback(errors, workbooks);
}
});
};