UNPKG

xlsx-to-json-img

Version:

Convert Excel (.xlsx) to JSON with embedded images as Base64. Supports custom column mapping.

813 lines (806 loc) 30.6 kB
import { utils, read } from 'xlsx'; import JSZip from 'jszip'; import { DOMParser } from 'xmldom'; function _arrayLikeToArray(r, a) { (null == a || a > r.length) && (a = r.length); for (var e = 0, n = Array(a); e < a; e++) n[e] = r[e]; return n; } function _arrayWithoutHoles(r) { if (Array.isArray(r)) return _arrayLikeToArray(r); } function asyncGeneratorStep(n, t, e, r, o, a, c) { try { var i = n[a](c), u = i.value; } catch (n) { return void e(n); } i.done ? t(u) : Promise.resolve(u).then(r, o); } function _asyncToGenerator(n) { return function () { var t = this, e = arguments; return new Promise(function (r, o) { var a = n.apply(t, e); function _next(n) { asyncGeneratorStep(a, r, o, _next, _throw, "next", n); } function _throw(n) { asyncGeneratorStep(a, r, o, _next, _throw, "throw", n); } _next(void 0); }); }; } function _createForOfIteratorHelper(r, e) { var t = "undefined" != typeof Symbol && r[Symbol.iterator] || r["@@iterator"]; if (!t) { if (Array.isArray(r) || (t = _unsupportedIterableToArray(r)) || e && r && "number" == typeof r.length) { t && (r = t); var n = 0, F = function () {}; return { s: F, n: function () { return n >= r.length ? { done: !0 } : { done: !1, value: r[n++] }; }, e: function (r) { throw r; }, f: F }; } throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); } var o, a = !0, u = !1; return { s: function () { t = t.call(r); }, n: function () { var r = t.next(); return a = r.done, r; }, e: function (r) { u = !0, o = r; }, f: function () { try { a || null == t.return || t.return(); } finally { if (u) throw o; } } }; } function _iterableToArray(r) { if ("undefined" != typeof Symbol && null != r[Symbol.iterator] || null != r["@@iterator"]) return Array.from(r); } function _nonIterableSpread() { throw new TypeError("Invalid attempt to spread non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); } function _regenerator() { /*! regenerator-runtime -- Copyright (c) 2014-present, Facebook, Inc. -- license (MIT): https://github.com/babel/babel/blob/main/packages/babel-helpers/LICENSE */ var e, t, r = "function" == typeof Symbol ? Symbol : {}, n = r.iterator || "@@iterator", o = r.toStringTag || "@@toStringTag"; function i(r, n, o, i) { var c = n && n.prototype instanceof Generator ? n : Generator, u = Object.create(c.prototype); return _regeneratorDefine(u, "_invoke", function (r, n, o) { var i, c, u, f = 0, p = o || [], y = !1, G = { p: 0, n: 0, v: e, a: d, f: d.bind(e, 4), d: function (t, r) { return i = t, c = 0, u = e, G.n = r, a; } }; function d(r, n) { for (c = r, u = n, t = 0; !y && f && !o && t < p.length; t++) { var o, i = p[t], d = G.p, l = i[2]; r > 3 ? (o = l === n) && (u = i[(c = i[4]) ? 5 : (c = 3, 3)], i[4] = i[5] = e) : i[0] <= d && ((o = r < 2 && d < i[1]) ? (c = 0, G.v = n, G.n = i[1]) : d < l && (o = r < 3 || i[0] > n || n > l) && (i[4] = r, i[5] = n, G.n = l, c = 0)); } if (o || r > 1) return a; throw y = !0, n; } return function (o, p, l) { if (f > 1) throw TypeError("Generator is already running"); for (y && 1 === p && d(p, l), c = p, u = l; (t = c < 2 ? e : u) || !y;) { i || (c ? c < 3 ? (c > 1 && (G.n = -1), d(c, u)) : G.n = u : G.v = u); try { if (f = 2, i) { if (c || (o = "next"), t = i[o]) { if (!(t = t.call(i, u))) throw TypeError("iterator result is not an object"); if (!t.done) return t; u = t.value, c < 2 && (c = 0); } else 1 === c && (t = i.return) && t.call(i), c < 2 && (u = TypeError("The iterator does not provide a '" + o + "' method"), c = 1); i = e; } else if ((t = (y = G.n < 0) ? u : r.call(n, G)) !== a) break; } catch (t) { i = e, c = 1, u = t; } finally { f = 1; } } return { value: t, done: y }; }; }(r, o, i), !0), u; } var a = {}; function Generator() {} function GeneratorFunction() {} function GeneratorFunctionPrototype() {} t = Object.getPrototypeOf; var c = [][n] ? t(t([][n]())) : (_regeneratorDefine(t = {}, n, function () { return this; }), t), u = GeneratorFunctionPrototype.prototype = Generator.prototype = Object.create(c); function f(e) { return Object.setPrototypeOf ? Object.setPrototypeOf(e, GeneratorFunctionPrototype) : (e.__proto__ = GeneratorFunctionPrototype, _regeneratorDefine(e, o, "GeneratorFunction")), e.prototype = Object.create(u), e; } return GeneratorFunction.prototype = GeneratorFunctionPrototype, _regeneratorDefine(u, "constructor", GeneratorFunctionPrototype), _regeneratorDefine(GeneratorFunctionPrototype, "constructor", GeneratorFunction), GeneratorFunction.displayName = "GeneratorFunction", _regeneratorDefine(GeneratorFunctionPrototype, o, "GeneratorFunction"), _regeneratorDefine(u), _regeneratorDefine(u, o, "Generator"), _regeneratorDefine(u, n, function () { return this; }), _regeneratorDefine(u, "toString", function () { return "[object Generator]"; }), (_regenerator = function () { return { w: i, m: f }; })(); } function _regeneratorDefine(e, r, n, t) { var i = Object.defineProperty; try { i({}, "", {}); } catch (e) { i = 0; } _regeneratorDefine = function (e, r, n, t) { function o(r, n) { _regeneratorDefine(e, r, function (e) { return this._invoke(r, n, e); }); } r ? i ? i(e, r, { value: n, enumerable: !t, configurable: !t, writable: !t }) : e[r] = n : (o("next", 0), o("throw", 1), o("return", 2)); }, _regeneratorDefine(e, r, n, t); } function _regeneratorValues(e) { if (null != e) { var t = e["function" == typeof Symbol && Symbol.iterator || "@@iterator"], r = 0; if (t) return t.call(e); if ("function" == typeof e.next) return e; if (!isNaN(e.length)) return { next: function () { return e && r >= e.length && (e = void 0), { value: e && e[r++], done: !e }; } }; } throw new TypeError(typeof e + " is not iterable"); } function _toConsumableArray(r) { return _arrayWithoutHoles(r) || _iterableToArray(r) || _unsupportedIterableToArray(r) || _nonIterableSpread(); } function _unsupportedIterableToArray(r, a) { if (r) { if ("string" == typeof r) return _arrayLikeToArray(r, a); var t = {}.toString.call(r).slice(8, -1); return "Object" === t && r.constructor && (t = r.constructor.name), "Map" === t || "Set" === t ? Array.from(r) : "Arguments" === t || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(t) ? _arrayLikeToArray(r, a) : void 0; } } /** * Helper: Convert input to ArrayBuffer * @param {Blob|Buffer|ArrayBuffer|Array} input - File input * @returns {Promise<ArrayBuffer>} ArrayBuffer */ function toArrayBuffer(input) { if (typeof Blob !== 'undefined' && input instanceof Blob) { return input.arrayBuffer(); } else if (input instanceof Buffer) { return Promise.resolve(input.buffer.slice(input.byteOffset, input.byteOffset + input.byteLength)); } else if (input instanceof ArrayBuffer) { return Promise.resolve(input); } else if (Array.isArray(input)) { return Promise.resolve(new Uint8Array(input).buffer); } else { return Promise.reject(new Error('Unsupported file type. Expected Blob, Buffer, or ArrayBuffer.')); } } /** * Main function: Convert Excel file to JSON with images * @param {File|Blob|Buffer|ArrayBuffer} file - Excel file * @param {Object} options - Column mapping config * @returns {Promise<Object>} Result with sheets, images, metadata */ function excelToJsonWithImages(_x, _x2) { return _excelToJsonWithImages.apply(this, arguments); } function _excelToJsonWithImages() { _excelToJsonWithImages = _asyncToGenerator(/*#__PURE__*/_regenerator().m(function _callee2(file, options) { var arrayBuffer, workbook, sheetNames, result, zip, sheetInfo, imageList, images, _iterator, _step, _loop, _t, _t2; return _regenerator().w(function (_context4) { while (1) switch (_context4.p = _context4.n) { case 0: _context4.p = 0; _context4.n = 1; return toArrayBuffer(file); case 1: arrayBuffer = _context4.v; _context4.n = 3; break; case 2: _context4.p = 2; _t = _context4.v; throw new Error("File conversion failed: ".concat(_t.message)); case 3: workbook = read(arrayBuffer, { type: 'array', cellFormula: true }); sheetNames = workbook.SheetNames; result = { sheets: {}, images: [], metadata: { sheetCount: sheetNames.length, imageCount: 0, fileName: '', fileSize: 0, fileLastModified: '' } }; // Extract images via JSZip _context4.n = 4; return JSZip.loadAsync(arrayBuffer); case 4: zip = _context4.v; _context4.n = 5; return getSheetList(zip.files); case 5: sheetInfo = _context4.v; _context4.n = 6; return analysisImageList(zip.files, sheetInfo); case 6: imageList = _context4.v; _context4.n = 7; return analysisImageLocation(zip.files, imageList, sheetInfo); case 7: images = _context4.v; result.images = images; result.metadata.imageCount = images.length; result.metadata.fileName = file.name; result.metadata.fileSize = file.size; result.metadata.fileLastModified = file.lastModified; _iterator = _createForOfIteratorHelper(sheetNames); _context4.p = 8; _loop = /*#__PURE__*/_regenerator().m(function _loop() { var sheetName, worksheet, range, data, sheetConfig, columns, headerRowIndex, startRow, headers, C, cell, fieldMap, R, row, hasData, _loop2, _C; return _regenerator().w(function (_context3) { while (1) switch (_context3.n) { case 0: sheetName = _step.value; worksheet = workbook.Sheets[sheetName]; if (worksheet) { _context3.n = 1; break; } return _context3.a(2, 1); case 1: range = utils.decode_range(worksheet['!ref']); data = []; sheetConfig = options === null || options === void 0 ? void 0 : options[sheetName]; columns = (sheetConfig === null || sheetConfig === void 0 ? void 0 : sheetConfig.columns) || []; headerRowIndex = (sheetConfig === null || sheetConfig === void 0 ? void 0 : sheetConfig.headerIndex) || 0; startRow = range.s.r + headerRowIndex; // Read headers headers = []; for (C = range.s.c; C <= range.e.c; C++) { cell = worksheet[utils.encode_cell({ r: startRow, c: C })]; headers[C] = cell ? cell.v : "Column_".concat(C); } // Map field names based on config fieldMap = {}; if (sheetConfig !== null && sheetConfig !== void 0 && sheetConfig.columns) { headers.forEach(function (header, index) { var col = sheetConfig.columns.find(function (c) { return c.name === header; }); if (col) fieldMap[index] = col.field; }); } // Read data rows R = startRow + 1; case 2: if (!(R <= range.e.r)) { _context3.n = 7; break; } row = {}; hasData = false; _loop2 = /*#__PURE__*/_regenerator().m(function _loop2(_C) { var cell, value, imgCell, column, _imgCell, fieldName; return _regenerator().w(function (_context2) { while (1) switch (_context2.n) { case 0: cell = worksheet[utils.encode_cell({ r: R, c: _C })]; value = null; if (cell) { // Check if cell contains image formula if (cell.h && cell.h.includes('DISPIMG')) { imgCell = images.find(function (img) { return cell.h.includes(img.imgId); }); value = imgCell ? imgCell.path : null; } else { value = cell.v; } hasData = true; } else { column = columns[_C]; if (column && column.isImage) { _imgCell = images.find(function (img) { return column.sheetName === img.name && img.isFloat && img.col === _C; }); value = _imgCell ? _imgCell.path : null; hasData = true; } } fieldName = fieldMap[_C]; if (fieldName) { if (row[fieldName] !== undefined) { if (!Array.isArray(row[fieldName])) { row[fieldName] = [row[fieldName]]; } row[fieldName].push(value); } else { row[fieldName] = value; } } case 1: return _context2.a(2); } }, _loop2); }); _C = range.s.c; case 3: if (!(_C <= range.e.c)) { _context3.n = 5; break; } return _context3.d(_regeneratorValues(_loop2(_C)), 4); case 4: _C++; _context3.n = 3; break; case 5: if (hasData) data.push(row); case 6: R++; _context3.n = 2; break; case 7: result.sheets[sheetName] = data; case 8: return _context3.a(2); } }, _loop); }); _iterator.s(); case 9: if ((_step = _iterator.n()).done) { _context4.n = 12; break; } return _context4.d(_regeneratorValues(_loop()), 10); case 10: if (!_context4.v) { _context4.n = 11; break; } return _context4.a(3, 11); case 11: _context4.n = 9; break; case 12: _context4.n = 14; break; case 13: _context4.p = 13; _t2 = _context4.v; _iterator.e(_t2); case 14: _context4.p = 14; _iterator.f(); return _context4.f(14); case 15: return _context4.a(2, result); } }, _callee2, null, [[8, 13, 14, 15], [0, 2]]); })); return _excelToJsonWithImages.apply(this, arguments); } var getSheetList = /*#__PURE__*/function () { var _ref = _asyncToGenerator(/*#__PURE__*/_regenerator().m(function _callee(files) { var sheetList, drawingsRels, drawingsXml, path, relFile, content, parser, doc, sheets, i, r, name, sheetId, sheet, rels, xml; return _regenerator().w(function (_context) { while (1) switch (_context.n) { case 0: sheetList = []; drawingsRels = []; drawingsXml = []; path = 'xl/workbook.xml'; relFile = files[path]; if (!relFile) { _context.n = 2; break; } _context.n = 1; return relFile.async('string'); case 1: content = _context.v; parser = new DOMParser(); doc = parser.parseFromString(content, 'text/xml'); sheets = doc.getElementsByTagName('sheet'); for (i = 0; i < sheets.length; i++) { r = sheets[i]; name = r.getAttribute('name'); sheetId = r.getAttribute('sheetId'); sheet = { name: name, sheetId: sheetId }; sheetList.push(sheet); rels = { path: "xl/drawings/_rels/drawing".concat(sheetId, ".xml.rels"), sheetName: name, sheetId: sheetId, isFloat: true }; drawingsRels.push(rels); xml = { path: "xl/drawings/drawing".concat(sheetId, ".xml"), isFloat: true, sheetName: name, sheetId: sheetId }; drawingsXml.push(xml); } case 2: return _context.a(2, { sheetList: sheetList, drawingsRels: drawingsRels, drawingsXml: drawingsXml }); } }, _callee); })); return function getSheetList(_x3) { return _ref.apply(this, arguments); }; }(); /** * Analyze image list from .rels files * @param {Object} files - JSZip files object * @param {Object} sheetInfo * @returns {Promise<Array>} List of images with base64 path */ function analysisImageList(_x4, _x5) { return _analysisImageList.apply(this, arguments); } /** * Analyze image location (cell or floating) * @param {Object} files - JSZip files object * @param {Array} imageList - List of extracted images * @param {Object} sheetInfo * @returns {Promise<Array>} List of images with position info */ function _analysisImageList() { _analysisImageList = _asyncToGenerator(/*#__PURE__*/_regenerator().m(function _callee3(files, sheetInfo) { var relPaths, promises, _iterator2, _step2, _loop3, _t3; return _regenerator().w(function (_context7) { while (1) switch (_context7.p = _context7.n) { case 0: relPaths = [{ path: 'xl/_rels/cellimages.xml.rels', isFloat: false }].concat(_toConsumableArray(sheetInfo.drawingsRels)); promises = []; _iterator2 = _createForOfIteratorHelper(relPaths); _context7.p = 1; _loop3 = /*#__PURE__*/_regenerator().m(function _loop3() { var key, path, isFloat, _key$sheetName, sheetName, _key$sheetId, sheetId, relFile, content, doc, relationships, _loop4, i; return _regenerator().w(function (_context6) { while (1) switch (_context6.n) { case 0: key = _step2.value; path = key.path, isFloat = key.isFloat, _key$sheetName = key.sheetName, sheetName = _key$sheetName === void 0 ? '' : _key$sheetName, _key$sheetId = key.sheetId, sheetId = _key$sheetId === void 0 ? '' : _key$sheetId; relFile = files[path]; if (relFile) { _context6.n = 1; break; } return _context6.a(2, 1); case 1: _context6.n = 2; return relFile.async('string'); case 2: content = _context6.v; doc = new DOMParser().parseFromString(content, 'text/xml'); relationships = doc.getElementsByTagName('Relationship'); _loop4 = /*#__PURE__*/_regenerator().m(function _loop4() { var r, id, target, fullPath, imgFile; return _regenerator().w(function (_context5) { while (1) switch (_context5.n) { case 0: r = relationships[i]; id = r.getAttribute('Id'); target = r.getAttribute('Target'); fullPath = isFloat ? target.replace('../', 'xl/') : "xl/".concat(target); imgFile = files[fullPath]; if (imgFile) { _context5.n = 1; break; } return _context5.a(2, 1); case 1: promises.push(imgFile.async('base64').then(function (base64) { var ext = fullPath.toLowerCase().split('.').pop(); var mimeMap = { jpg: 'jpeg', jpeg: 'jpeg', png: 'png', gif: 'gif', bmp: 'bmp' }; var mimeType = "image/".concat(mimeMap[ext] || 'png'); return { id: id, path: "data:image/".concat(mimeType, ";base64,").concat(base64), isFloat: isFloat, sheetName: sheetName, sheetId: sheetId }; })); case 2: return _context5.a(2); } }, _loop4); }); i = 0; case 3: if (!(i < relationships.length)) { _context6.n = 6; break; } return _context6.d(_regeneratorValues(_loop4()), 4); case 4: if (!_context6.v) { _context6.n = 5; break; } return _context6.a(3, 5); case 5: i++; _context6.n = 3; break; case 6: return _context6.a(2); } }, _loop3); }); _iterator2.s(); case 2: if ((_step2 = _iterator2.n()).done) { _context7.n = 5; break; } return _context7.d(_regeneratorValues(_loop3()), 3); case 3: if (!_context7.v) { _context7.n = 4; break; } return _context7.a(3, 4); case 4: _context7.n = 2; break; case 5: _context7.n = 7; break; case 6: _context7.p = 6; _t3 = _context7.v; _iterator2.e(_t3); case 7: _context7.p = 7; _iterator2.f(); return _context7.f(7); case 8: _context7.n = 9; return Promise.all(promises); case 9: return _context7.a(2, _context7.v.filter(Boolean)); } }, _callee3, null, [[1, 6, 7, 8]]); })); return _analysisImageList.apply(this, arguments); } function analysisImageLocation(_x6, _x7, _x8) { return _analysisImageLocation.apply(this, arguments); } function _analysisImageLocation() { _analysisImageLocation = _asyncToGenerator(/*#__PURE__*/_regenerator().m(function _callee4(files, imageList, sheetInfo) { var drawingPaths, result, _iterator3, _step3, _loop5, _t4; return _regenerator().w(function (_context0) { while (1) switch (_context0.p = _context0.n) { case 0: drawingPaths = [{ path: 'xl/cellimages.xml', isFloat: false }].concat(_toConsumableArray(sheetInfo.drawingsXml)); result = []; _iterator3 = _createForOfIteratorHelper(drawingPaths); _context0.p = 1; _loop5 = /*#__PURE__*/_regenerator().m(function _loop5() { var key, path, isFloat, _key$sheetName2, sheetName, _key$sheetId2, sheetId, file, content, doc, blips, cNvPrs, cols, rows, _loop6, i; return _regenerator().w(function (_context9) { while (1) switch (_context9.n) { case 0: key = _step3.value; path = key.path, isFloat = key.isFloat, _key$sheetName2 = key.sheetName, sheetName = _key$sheetName2 === void 0 ? '' : _key$sheetName2, _key$sheetId2 = key.sheetId, sheetId = _key$sheetId2 === void 0 ? '' : _key$sheetId2; file = files[path]; if (!(!file || imageList.length === 0)) { _context9.n = 1; break; } return _context9.a(2, 1); case 1: _context9.n = 2; return file.async('string'); case 2: content = _context9.v; doc = new DOMParser().parseFromString(content, 'text/xml'); blips = doc.getElementsByTagName('a:blip'); cNvPrs = doc.getElementsByTagName('xdr:cNvPr'); cols = doc.getElementsByTagName('xdr:col'); rows = doc.getElementsByTagName('xdr:row'); _loop6 = /*#__PURE__*/_regenerator().m(function _loop6() { var _cNvPrs$i, _cNvPrs$i2, _cols, _rows; var embedId, imgObj, name, descr, formX, formY; return _regenerator().w(function (_context8) { while (1) switch (_context8.n) { case 0: embedId = blips[i].getAttribute('r:embed'); if (!isFloat) { imgObj = imageList.find(function (img) { return img.id === embedId && !img.isFloat; }); } else { imgObj = imageList.find(function (img) { return img.id === embedId && img.isFloat && img.sheetId === sheetId; }); } if (imgObj) { _context8.n = 1; break; } return _context8.a(2, 1); case 1: name = ((_cNvPrs$i = cNvPrs[i]) === null || _cNvPrs$i === void 0 ? void 0 : _cNvPrs$i.getAttribute('name')) || ''; descr = ((_cNvPrs$i2 = cNvPrs[i]) === null || _cNvPrs$i2 === void 0 ? void 0 : _cNvPrs$i2.getAttribute('descr')) || ''; formX = parseInt(((_cols = cols[i * 2]) === null || _cols === void 0 ? void 0 : _cols.textContent) || 0); formY = parseInt(((_rows = rows[i * 2]) === null || _rows === void 0 ? void 0 : _rows.textContent) || 0) + 1; result.push({ form: isFloat ? String.fromCharCode(65 + formX) + formY : '', col: formX, row: formY, isFloat: isFloat, path: imgObj.path, id: imgObj.id, imgId: name, sheetName: sheetName, sheetId: sheetId, descr: descr }); case 2: return _context8.a(2); } }, _loop6); }); i = 0; case 3: if (!(i < blips.length)) { _context9.n = 6; break; } return _context9.d(_regeneratorValues(_loop6()), 4); case 4: if (!_context9.v) { _context9.n = 5; break; } return _context9.a(3, 5); case 5: i++; _context9.n = 3; break; case 6: return _context9.a(2); } }, _loop5); }); _iterator3.s(); case 2: if ((_step3 = _iterator3.n()).done) { _context0.n = 5; break; } return _context0.d(_regeneratorValues(_loop5()), 3); case 3: if (!_context0.v) { _context0.n = 4; break; } return _context0.a(3, 4); case 4: _context0.n = 2; break; case 5: _context0.n = 7; break; case 6: _context0.p = 6; _t4 = _context0.v; _iterator3.e(_t4); case 7: _context0.p = 7; _iterator3.f(); return _context0.f(7); case 8: return _context0.a(2, result); } }, _callee4, null, [[1, 6, 7, 8]]); })); return _analysisImageLocation.apply(this, arguments); } export { excelToJsonWithImages }; //# sourceMappingURL=index.js.map