xlsx-extractor
Version:
Extract the colums/rows from XLSX file.
296 lines (295 loc) • 7.51 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
const fs_1 = __importDefault(require("fs"));
const path_1 = __importDefault(require("path"));
const xml2js_1 = __importDefault(require("xml2js"));
const Zip = require('node-zip');
/** The maximum number of sheets (Excel 97). */
const MaxSheets = 256;
/** Defines the file path in the XLSX. */
const FilePaths = {
WorkBook: 'xl/workbook.xml',
SharedStrings: 'xl/sharedStrings.xml',
SheetBase: 'xl/worksheets/sheet'
};
/**
* Create a empty cells.
* @param rows Rows count.
* @param cols Columns count.
* @return Cells.
*/
exports.createEmptyCells = (rows, cols) => {
const arr = [];
for (let i = 0; i < rows; ++i) {
const row = [];
for (let j = 0; j < cols; ++j) {
row.push('');
}
arr.push(row);
}
return arr;
};
/**
* Get a cells from a rows.
* @param rows Rows.
* @return Cells.
*/
exports.getCells = (rows) => {
const cells = [];
rows
.filter((row) => {
return row.c && 0 < row.c.length;
})
.forEach((row) => {
row.c.forEach((cell) => {
const position = exports.getPosition(cell.$.r);
cells.push({
row: position.row,
col: position.col,
type: cell.$.t ? cell.$.t : '',
value: cell.v && 0 < cell.v.length ? cell.v[0] : ''
});
});
});
return cells;
};
/**
* Get the coordinates of the cell.
* @param text Position text. Such as "A1" and "U109".
* @return Position.
*/
exports.getPosition = (text) => {
// 'A1' -> [A, 1]
const units = text.split(/([0-9]+)/);
if (units.length < 2) {
return { row: 0, col: 0 };
}
return {
row: parseInt(units[1], 10),
col: exports.numOfColumn(units[0])
};
};
/**
* Get a sheet name.
* @param zip Extract data of XLSX (Zip) file.
* @param index Index of sheet. Range of from 1 to XlsxExtractor.count.
* @returns Sheet name.
*/
const getSheetName = async (zip, index) => {
const root = await exports.parseXML(zip.files[FilePaths.WorkBook].asText());
let name = '';
if (root &&
root.workbook &&
root.workbook.sheets &&
0 < root.workbook.sheets.length &&
root.workbook.sheets[0].sheet) {
root.workbook.sheets[0].sheet.some((sheet) => {
const id = Number(sheet.$.sheetId);
if (id === index) {
name = sheet.$.name || '';
return true;
}
return false;
});
}
return name;
};
/**
* Get a sheet data.
* @param zip Extract data of XLSX (Zip) file.
* @param index Index of sheet. Range of from 1 to XlsxExtractor.count.
* @returns Sheet data.
*/
exports.getSheetData = async (zip, index) => {
const data = {
name: '',
sheet: {}
};
data.name = await getSheetName(zip, index);
data.sheet = await exports.parseXML(zip.files[FilePaths.SheetBase + index + '.xml'].asText());
if (zip.files[FilePaths.SharedStrings]) {
data.strings = await exports.parseXML(zip.files[FilePaths.SharedStrings].asText());
}
return data;
};
/**
* Gets the number of sheets.
* @param zip Extract data of XLSX (Zip) file.
* @returns Number of sheets
*/
exports.getSheetInnerCount = (zip) => {
let count = 0;
for (let i = 1; i < MaxSheets; ++i) {
const path = FilePaths.SheetBase + i + '.xml';
if (!zip.files[path]) {
break;
}
++count;
}
return count;
};
/**
* Get the range of the sheet.
* @param sheet Sheet data.
* @param cells Cells.
* @return Range.
*/
exports.getSheetSize = (sheet, cells) => {
// Get the there if size is defined
if (sheet &&
sheet.worksheet &&
sheet.worksheet.dimension &&
0 <= sheet.worksheet.dimension.length) {
const range = sheet.worksheet.dimension[0].$.ref.split(':');
if (range.length === 2) {
const min = exports.getPosition(range[0]);
const max = exports.getPosition(range[1]);
return {
row: { min: min.row, max: max.row },
col: { min: min.col, max: max.col }
};
}
}
const ascend = (a, b) => a - b;
const rows = cells.map((cell) => cell.row).sort(ascend);
const cols = cells.map((cell) => cell.col).sort(ascend);
return {
row: { min: rows[0], max: rows[rows.length - 1] },
col: { min: cols[0], max: cols[cols.length - 1] }
};
};
/**
* Convert the column text to number.
* @param text Column text, such as A" and "AA".
* @return Column number, otherwise -1.
*/
exports.numOfColumn = (text) => {
const letters = [
'',
'A',
'B',
'C',
'D',
'E',
'F',
'G',
'H',
'I',
'J',
'K',
'L',
'M',
'N',
'O',
'P',
'Q',
'R',
'S',
'T',
'U',
'V',
'W',
'X',
'Y',
'Z'
];
const col = text.trim().split('');
let num = 0;
for (let i = 0, max = col.length; i < max; ++i) {
num *= 26;
num += letters.indexOf(col[i]);
}
return num;
};
/**
* Parse the `r` element of XML.
* @param r `r` elements.
* @return Parse result.
*/
exports.parseR = (r) => {
let value = '';
r.forEach((obj) => {
if (obj.t) {
value += exports.parseT(obj.t);
}
});
return value;
};
/**
* Parse the `t` element of XML.
* @param t `t` elements.
* @return Parse result.
*/
exports.parseT = (t) => {
let value = '';
t.forEach((obj) => {
switch (typeof obj) {
case 'string':
value += obj;
break;
// The value of xml:space="preserve" is stored in the underscore
case 'object':
if (obj._ && typeof obj._ === 'string') {
value += obj._;
}
break;
default:
break;
}
});
return value;
};
/**
* Parse the XML text.
* @param xml XML text.
* @return XML parse task.
*/
exports.parseXML = (xml) => {
return new Promise((resolve, reject) => {
xml2js_1.default.parseString(xml, (err, obj) => {
return err ? reject(err) : resolve(obj);
});
});
};
/**
* Extract a zip file.
* @param path Zip file path.
* @return If success zip object, otherwise null.
* @throws Failed to expand the XLSX file.
*/
exports.unzip = (path) => {
try {
const file = fs_1.default.readFileSync(path_1.default.resolve(path));
return Zip(file);
}
catch (err) {
throw new Error('Failed to expand the XLSX file.');
}
};
/**
* Get a value from the cell strings.
*
* @param str Cell strings.
*
* @return Value.
*/
exports.valueFromStrings = (str) => {
let value = '';
const keys = Object.keys(str);
keys.forEach((key) => {
switch (key) {
case 't':
value += exports.parseT(str[key]);
break;
case 'r':
value += exports.parseR(str[key]);
break;
default:
break;
}
});
return value;
};