UNPKG

word-math

Version:

Extended version to compatible with OMML of Word Processing Document library

216 lines 8.4 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.parse_cfb = void 0; var cfb_1 = require("cfb"); var jsdom_1 = require("jsdom"); var omml2mathml = require('omml2mathml'); /* ECMA 17.3.1.22 p CT_P */ function process_para(child, root, parsedData) { switch (child.nodeType) { case 1 /* ELEMENT_NODE */: var element = child; switch (element.tagName) { case "w:r": case "w:sdt": case "w:sdtContent": case "w:customXml": element.childNodes.forEach(function (child) { return process_para(child, root, parsedData); }); break; case "w:t": root.elts.push({ t: "s", v: child.textContent }); parsedData.parsedHTML += "".concat(child.textContent); break; case "w:hyperlink": // TODO: store actual hyperlink? element.childNodes.forEach(function (child) { return process_para(child, root, parsedData); }); break; case "w:br": break; case "w:annotationRef": case "w:bookmarkEnd": case "w:bookmarkStart": case "w:commentRangeStart": case "w:commentRangeEnd": case "w:commentReference": //TODO: add reference support case "w:del": case "w:drawing": case "w:endnoteReference": case "w:fldChar": case "w:fldSimple": case "w:footnoteReference": case "w:ins": case "w:instrText": case "w:lastRenderedPageBreak": case "w:moveFrom": case "w:moveFromRangeStart": case "w:moveFromRangeEnd": case "w:moveTo": case "w:moveToRangeStart": case "w:moveToRangeEnd": case "w:noBreakHyphen": case "w:object": case "w:pict": case "w:pPr": case "w:proofErr": case "w:rPr": case "w:ruby": case "w:sdtEndPr": case "w:sdtPr": case "w:sectPr": case "w:smartTag": case "w:softHyphen": case "w:sym": case "w:tab": //TODO: Add tab support case "mc:AlternateContent": case "m:oMath": case "m:oMathPara": var mathmlElement = omml2mathml(element); if (mathmlElement) { element = mathmlElement; parsedData.parsedHTML += element.outerHTML; parsedData.maths.push(element.outerHTML); } break; case "w16se:sym": break; default: throw "DOCX para unsupported ".concat(element.tagName, " element"); } break; } } ; function process_tc(tcelt, parsedData) { var tableCell = { t: "c", p: [] }; tcelt.childNodes.forEach(function (child) { var data = process_body_elt(child, false); if (data) tableCell.p.push(data); // console.log(tableCell.p[0]); }); return tableCell; } function process_tr(trelt, parsedData) { var tableRow = { t: "r", c: [] }; // console.log(trelt.outerHTML) trelt.childNodes.forEach(function (child) { if (child.nodeType != 1) return; var element = child; switch (element.tagName) { case "w:trPr": case "w:sdt": case "w:tblPrEx": case "w:commentRangeEnd": break; case "w:tc": tableRow.c.push(process_tc(element, parsedData)); // console.log("cells: ", tableRow.c); parsedData.parsedHTML = "<tr>".concat(parsedData.parsedHTML, "</tr>"); break; default: throw "DOCX tablerow unsupported ".concat(element.tagName, " element"); } }); return tableRow; } function mapHTMLTable(table) { var parsedHTML = ''; var listRow = []; for (var _i = 0, _a = table.r; _i < _a.length; _i++) { var row = _a[_i]; var listCol = []; for (var _b = 0, _c = row.c; _b < _c.length; _b++) { var col = _c[_b]; var listPara = []; for (var _d = 0, _e = col.p; _d < _e.length; _d++) { var para = _e[_d]; listPara.push("<p>".concat(para.localData.join(''), "</p>")); } listCol.push("<td>".concat(listPara.join(''), "</td>")); } listRow.push("<tr>".concat(listCol.join(''), "</tr>")); } parsedHTML = "<table>".concat(listRow.join(''), "</table>"); return parsedHTML; } function process_table(tablelt, parsedData) { var table = { t: "t", r: [] }; tablelt.childNodes.forEach(function (child) { if (child.nodeType != 1) return; var element = child; switch (element.tagName) { case "w:tblPr": case "w:tblGrid": case "w:bookmarkEnd": break; case "w:tr": table.r.push(process_tr(element, parsedData)); // console.log("rows: ", table.r); break; default: throw "DOCX table unsuported ".concat(element.tagName, " element"); } }); return table; } function process_body_elt(child, root) { if (root === void 0) { root = false; } var para = { elts: [], localData: [], maths: [] }; var parsedData = { parsedHTML: "", maths: [] }; switch (child.nodeType) { case 1: /* ELEMENT_NODE */ var element = child; switch (element.tagName) { case "w:p": element.childNodes.forEach(function (child) { return process_para(child, para, parsedData); }); para.localData.push(parsedData.parsedHTML); para.maths = para.maths.concat(parsedData.maths); return para; case "w:tbl": var table = process_table(element, parsedData); para.elts.push(table); para.localData.push(mapHTMLTable(table)); return para; // console.log("tables: ", para.elts); case "w:customXML": if (root) break; case "w:sectPr": case "w:bookmarkStart": case "w:bookmarkEnd": case "w:commentRangeEnd": case "w:moveFromRangeEnd": case "w:tcPr": case "w:sdt": case "w:altChunk": //TODO: implicit/explicit link handeling case "mc:AlternateContent": break; default: throw "DOCX body unsupported ".concat(element.tagName, " element"); } break; } } function parse_cfb(file) { // Get content of document.xml var buf = (0, cfb_1.find)(file, "/word/document.xml").content; // Parse with JSDOM var dom = new jsdom_1.JSDOM(buf.toString(), { contentType: "text/xml" }); var docx = { p: [], html: '', maths: [] }; var rootelt = dom.window.document.children[0]; var bodyelt = rootelt.querySelector("w\\:document > w\\:body"); var htmlData = []; var mathData = []; bodyelt.childNodes.forEach(function (child) { var res = process_body_elt(child, true); if (res) { docx.p.push(res); htmlData.push(res.localData); mathData = mathData.concat(res.maths); } }); docx.html = htmlData.join(''); docx.maths = mathData; return docx; // const paragraphs = dom.window.document.querySelectorAll("w\\:p"); // const para = parse_para(paragraphs); } exports.parse_cfb = parse_cfb; //# sourceMappingURL=index.js.map