UNPKG

xlsx-extractor

Version:
296 lines (295 loc) 7.51 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); const fs_1 = __importDefault(require("fs")); const path_1 = __importDefault(require("path")); const xml2js_1 = __importDefault(require("xml2js")); const Zip = require('node-zip'); /** The maximum number of sheets (Excel 97). */ const MaxSheets = 256; /** Defines the file path in the XLSX. */ const FilePaths = { WorkBook: 'xl/workbook.xml', SharedStrings: 'xl/sharedStrings.xml', SheetBase: 'xl/worksheets/sheet' }; /** * Create a empty cells. * @param rows Rows count. * @param cols Columns count. * @return Cells. */ exports.createEmptyCells = (rows, cols) => { const arr = []; for (let i = 0; i < rows; ++i) { const row = []; for (let j = 0; j < cols; ++j) { row.push(''); } arr.push(row); } return arr; }; /** * Get a cells from a rows. * @param rows Rows. * @return Cells. */ exports.getCells = (rows) => { const cells = []; rows .filter((row) => { return row.c && 0 < row.c.length; }) .forEach((row) => { row.c.forEach((cell) => { const position = exports.getPosition(cell.$.r); cells.push({ row: position.row, col: position.col, type: cell.$.t ? cell.$.t : '', value: cell.v && 0 < cell.v.length ? cell.v[0] : '' }); }); }); return cells; }; /** * Get the coordinates of the cell. * @param text Position text. Such as "A1" and "U109". * @return Position. */ exports.getPosition = (text) => { // 'A1' -> [A, 1] const units = text.split(/([0-9]+)/); if (units.length < 2) { return { row: 0, col: 0 }; } return { row: parseInt(units[1], 10), col: exports.numOfColumn(units[0]) }; }; /** * Get a sheet name. * @param zip Extract data of XLSX (Zip) file. * @param index Index of sheet. Range of from 1 to XlsxExtractor.count. * @returns Sheet name. */ const getSheetName = async (zip, index) => { const root = await exports.parseXML(zip.files[FilePaths.WorkBook].asText()); let name = ''; if (root && root.workbook && root.workbook.sheets && 0 < root.workbook.sheets.length && root.workbook.sheets[0].sheet) { root.workbook.sheets[0].sheet.some((sheet) => { const id = Number(sheet.$.sheetId); if (id === index) { name = sheet.$.name || ''; return true; } return false; }); } return name; }; /** * Get a sheet data. * @param zip Extract data of XLSX (Zip) file. * @param index Index of sheet. Range of from 1 to XlsxExtractor.count. * @returns Sheet data. */ exports.getSheetData = async (zip, index) => { const data = { name: '', sheet: {} }; data.name = await getSheetName(zip, index); data.sheet = await exports.parseXML(zip.files[FilePaths.SheetBase + index + '.xml'].asText()); if (zip.files[FilePaths.SharedStrings]) { data.strings = await exports.parseXML(zip.files[FilePaths.SharedStrings].asText()); } return data; }; /** * Gets the number of sheets. * @param zip Extract data of XLSX (Zip) file. * @returns Number of sheets */ exports.getSheetInnerCount = (zip) => { let count = 0; for (let i = 1; i < MaxSheets; ++i) { const path = FilePaths.SheetBase + i + '.xml'; if (!zip.files[path]) { break; } ++count; } return count; }; /** * Get the range of the sheet. * @param sheet Sheet data. * @param cells Cells. * @return Range. */ exports.getSheetSize = (sheet, cells) => { // Get the there if size is defined if (sheet && sheet.worksheet && sheet.worksheet.dimension && 0 <= sheet.worksheet.dimension.length) { const range = sheet.worksheet.dimension[0].$.ref.split(':'); if (range.length === 2) { const min = exports.getPosition(range[0]); const max = exports.getPosition(range[1]); return { row: { min: min.row, max: max.row }, col: { min: min.col, max: max.col } }; } } const ascend = (a, b) => a - b; const rows = cells.map((cell) => cell.row).sort(ascend); const cols = cells.map((cell) => cell.col).sort(ascend); return { row: { min: rows[0], max: rows[rows.length - 1] }, col: { min: cols[0], max: cols[cols.length - 1] } }; }; /** * Convert the column text to number. * @param text Column text, such as A" and "AA". * @return Column number, otherwise -1. */ exports.numOfColumn = (text) => { const letters = [ '', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z' ]; const col = text.trim().split(''); let num = 0; for (let i = 0, max = col.length; i < max; ++i) { num *= 26; num += letters.indexOf(col[i]); } return num; }; /** * Parse the `r` element of XML. * @param r `r` elements. * @return Parse result. */ exports.parseR = (r) => { let value = ''; r.forEach((obj) => { if (obj.t) { value += exports.parseT(obj.t); } }); return value; }; /** * Parse the `t` element of XML. * @param t `t` elements. * @return Parse result. */ exports.parseT = (t) => { let value = ''; t.forEach((obj) => { switch (typeof obj) { case 'string': value += obj; break; // The value of xml:space="preserve" is stored in the underscore case 'object': if (obj._ && typeof obj._ === 'string') { value += obj._; } break; default: break; } }); return value; }; /** * Parse the XML text. * @param xml XML text. * @return XML parse task. */ exports.parseXML = (xml) => { return new Promise((resolve, reject) => { xml2js_1.default.parseString(xml, (err, obj) => { return err ? reject(err) : resolve(obj); }); }); }; /** * Extract a zip file. * @param path Zip file path. * @return If success zip object, otherwise null. * @throws Failed to expand the XLSX file. */ exports.unzip = (path) => { try { const file = fs_1.default.readFileSync(path_1.default.resolve(path)); return Zip(file); } catch (err) { throw new Error('Failed to expand the XLSX file.'); } }; /** * Get a value from the cell strings. * * @param str Cell strings. * * @return Value. */ exports.valueFromStrings = (str) => { let value = ''; const keys = Object.keys(str); keys.forEach((key) => { switch (key) { case 't': value += exports.parseT(str[key]); break; case 'r': value += exports.parseR(str[key]); break; default: break; } }); return value; };