UNPKG

usfm-grammar

Version:

Uses the tree-sitter-usfm3 parser to convert USFM files to other formats such as USJ, USX, and CSV, and converts them back to USFM

1,181 lines (1,160 loc) 776 kB
var $a9A1a$treesitter = require("tree-sitter"); var $a9A1a$assert = require("assert"); var $a9A1a$treesitterusfm3 = require("tree-sitter-usfm3"); var $a9A1a$xmldom = require("xmldom"); var $a9A1a$xpath = require("xpath"); var $a9A1a$nodefs = require("node:fs"); var $a9A1a$ajv = require("ajv"); function $parcel$export(e, n, v, s) { Object.defineProperty(e, n, {get: v, set: s, enumerable: true, configurable: true}); } $parcel$export(module.exports, "USFMParser", () => $4fa36e821943b400$export$db89aa78daab09ca, (v) => $4fa36e821943b400$export$db89aa78daab09ca = v); $parcel$export(module.exports, "Filter", () => $4fa36e821943b400$export$ec91da630f36d5ea, (v) => $4fa36e821943b400$export$ec91da630f36d5ea = v); $parcel$export(module.exports, "Validator", () => $4fa36e821943b400$export$9eeb22c0bba4ed5e, (v) => $4fa36e821943b400$export$9eeb22c0bba4ed5e = v); $parcel$export(module.exports, "ORIGINAL_VREF", () => $4fa36e821943b400$export$a324ce9cedb2444c, (v) => $4fa36e821943b400$export$a324ce9cedb2444c = v); var $4fa36e821943b400$export$db89aa78daab09ca; var $4fa36e821943b400$export$ec91da630f36d5ea; // exports.Format = Format; var $4fa36e821943b400$export$9eeb22c0bba4ed5e; var $4fa36e821943b400$export$a324ce9cedb2444c; var $be3194b3d68e926d$export$db89aa78daab09ca; var $be3194b3d68e926d$export$ec91da630f36d5ea; var $be3194b3d68e926d$export$a324ce9cedb2444c// exports.Format = Format; ; var $b37353ce824c37f3$export$69486ebd11f334d1; var $0783021dea42787b$export$b32929182cc4fefe; var $0783021dea42787b$export$75b2f422ff60acaf; var $0783021dea42787b$export$aa24f570494ce8d0; var $0783021dea42787b$export$1790a88fa433c8d6; var $0783021dea42787b$export$c63c0b6e358f0774; var $0783021dea42787b$export$1187115108c84d45; $0783021dea42787b$export$b32929182cc4fefe = [ "USJ", "table" ]; $0783021dea42787b$export$75b2f422ff60acaf = [ "char", "note", "figure", "ref" ]; $0783021dea42787b$export$aa24f570494ce8d0 = [ "type", "marker", "content", "number", "sid", "code", "caller", "align", "version", "altnumber", "pubnumber", "category" ]; $0783021dea42787b$export$1790a88fa433c8d6 = [ "style", "number", "sid", "code", "caller", "align", "version", "altnumber", "pubnumber", "category" ]; $0783021dea42787b$export$c63c0b6e358f0774 = [ "char", "note", "verse", "table:cell" ]; $0783021dea42787b$export$1187115108c84d45 = [ "char", "note", "verse", "cell" ]; var $b37353ce824c37f3$require$NO_USFM_USJ_TYPES = $0783021dea42787b$export$b32929182cc4fefe; var $b37353ce824c37f3$require$CLOSING_USJ_TYPES = $0783021dea42787b$export$75b2f422ff60acaf; var $b37353ce824c37f3$require$NON_ATTRIB_USJ_KEYS = $0783021dea42787b$export$aa24f570494ce8d0; var $b37353ce824c37f3$require$NO_NEWLINE_USJ_TYPES = $0783021dea42787b$export$c63c0b6e358f0774; var $b37353ce824c37f3$require$NON_ATTRIB_USX_KEYS = $0783021dea42787b$export$1790a88fa433c8d6; var $b37353ce824c37f3$require$NO_NEWLINE_USX_TYPES = $0783021dea42787b$export$1187115108c84d45; class $b37353ce824c37f3$var$USFMGenerator { constructor(){ this.usfmString = ""; this.warnings = []; } usjToUsfm(usjObj, nested = false) { if (usjObj.type === 'optbreak') { if (this.usfmString !== '' && ![ '\n', '\r', ' ', '\t' ].includes(this.usfmString.slice(-1))) this.usfmString += ' '; this.usfmString += '// '; return; } if (usjObj.type === "ref") usjObj.marker = "ref"; if (!$b37353ce824c37f3$require$NO_USFM_USJ_TYPES.includes(usjObj.type)) { this.usfmString += "\\"; if (nested && usjObj.type === "char") this.usfmString += "+"; this.usfmString += `${usjObj.marker} `; } [ "code", "number", "caller" ].forEach((key)=>{ if (usjObj[key]) this.usfmString += `${usjObj[key]} `; }); if (usjObj.category) this.usfmString += `\\cat ${usjObj.category}\\cat*\n`; if (usjObj.altnumber) { if (usjObj.marker === "c") this.usfmString += `\\ca ${usjObj.altnumber} \\ca*\n`; else if (usjObj.marker === "v") this.usfmString += `\\va ${usjObj.altnumber} \\va* `; } if (usjObj.pubnumber) { if (usjObj.marker === "c") this.usfmString += `\\cp ${usjObj.pubnumber}\n`; else if (usjObj.marker === "v") this.usfmString += `\\vp ${usjObj.pubnumber} \\vp* `; } if (Array.isArray(usjObj.content)) usjObj.content.forEach((item)=>{ if (typeof item === "string") this.usfmString += item; else this.usjToUsfm(item, usjObj.type === "char" && item.marker !== "fv"); }); let attributes = []; Object.keys(usjObj).forEach((key)=>{ if (!$b37353ce824c37f3$require$NON_ATTRIB_USJ_KEYS.includes(key)) { let lhs = key; if (key === "file") lhs = "src"; attributes.push(`${lhs}="${usjObj[key]}"`); } }); if (attributes.length > 0) this.usfmString += `|${attributes.join(" ")}`; if ($b37353ce824c37f3$require$CLOSING_USJ_TYPES.includes(usjObj.type)) { this.usfmString += `\\`; if (nested && usjObj.type === "char") this.usfmString += "+"; this.usfmString += `${usjObj.marker}* `; } if (usjObj.type === "ms") { if ("sid" in usjObj) { if (attributes.length == 0) this.usfmString += '|'; this.usfmString += `sid="${usjObj.sid}" `; } this.usfmString = this.usfmString.trim() + "\\*"; } if (usjObj.type === "sidebar") this.usfmString += "\\esbe"; if (!$b37353ce824c37f3$require$NO_NEWLINE_USJ_TYPES.includes(usjObj.type) && this.usfmString[this.usfmString.length - 1] !== "\n") this.usfmString += "\n"; return this.usfmString; } usxToUsfm(xmlObj, nested = false) { // Check if xmlObj is a string // if (typeof xmlObj === 'string') { // // this.usfmString += xmlObj; // return; // } const objType = xmlObj.tagName; let marker = null; let usfmAttributes = []; if ([ 'verse', 'chapter' ].includes(objType) && xmlObj.hasAttribute('eid')) return; if (!$b37353ce824c37f3$require$NO_NEWLINE_USX_TYPES.includes(objType)) this.usfmString += '\n'; if (objType === 'optbreak') { if (this.usfmString !== '' && ![ '\n', '\r', ' ', '\t' ].includes(this.usfmString.slice(-1))) this.usfmString += ' '; this.usfmString += '// '; } if (xmlObj.hasAttribute('style')) { marker = xmlObj.getAttribute('style'); if (nested && objType === 'char' && ![ 'xt', 'fv', 'ref' ].includes(marker)) marker = `+${marker}`; this.usfmString += `\\${marker} `; } else if (objType === 'ref') { marker = 'ref'; this.usfmString += `\\${marker} `; } if (xmlObj.hasAttribute('code')) this.usfmString += xmlObj.getAttribute('code'); if (xmlObj.hasAttribute('number')) this.usfmString += `${xmlObj.getAttribute('number')} `; if (xmlObj.hasAttribute('caller')) this.usfmString += `${xmlObj.getAttribute('caller')} `; if (xmlObj.hasAttribute('altnumber')) { if (objType === 'verse') this.usfmString += `\\va ${xmlObj.getAttribute('altnumber')}\\va*`; else if (objType === 'chapter') this.usfmString += `\n\\ca ${xmlObj.getAttribute('altnumber')}\\ca*`; } if (xmlObj.hasAttribute('pubnumber')) { if (objType === 'verse') this.usfmString += `\\vp ${xmlObj.getAttribute('pubnumber')}\\vp*`; else if (objType === 'chapter') this.usfmString += `\n\\cp ${xmlObj.getAttribute('pubnumber')}`; } if (xmlObj.hasAttribute('category')) this.usfmString += `\n\\cat ${xmlObj.getAttribute('category')} \\cat*`; const children = Array.from(xmlObj.childNodes); for (const child of children){ if (child.nodeType === 1) { if (objType === 'char') this.usxToUsfm(child, true); else this.usxToUsfm(child, false); } if (child.nodeType === 3 && child.nodeValue.trim()) { if (this.usfmString !== '' && ![ '\n', '\r', ' ', '\t' ].includes(this.usfmString.slice(-1))) this.usfmString += ' '; this.usfmString += child.nodeValue.trim(); } } const attributes = Array.from(xmlObj.attributes); for (const attrNode of attributes){ let key = attrNode.name; let val = attrNode.value.replace(/"/g, ''); if (key === 'file' && objType === 'figure') usfmAttributes.push(`src="${val}"`); else if (!$b37353ce824c37f3$require$NON_ATTRIB_USX_KEYS.includes(key)) usfmAttributes.push(`${key}="${val}"`); if ([ 'sid', 'eid' ].includes(key) && objType === 'ms') usfmAttributes.push(`${key}="${val}"`); } if (usfmAttributes.length > 0) { this.usfmString += '|'; this.usfmString += usfmAttributes.join(' '); } if (xmlObj.hasAttribute('closed') && xmlObj.getAttribute('closed') === 'true' || $b37353ce824c37f3$require$CLOSING_USJ_TYPES.includes(objType) || usfmAttributes.length > 0) { if (objType === 'ms') this.usfmString += '\\*'; else this.usfmString += `\\${marker}*`; } if (objType === 'sidebar') this.usfmString += '\n\\esbe\n'; } bibleNlptoUsfm(bibleNlpObj) { const vrefPattern = /([a-zA-Z0-9]{3}) (\d+):(.*)/; let currBook = null; let currChapter = null; for(let i = 0; i < bibleNlpObj.vref.length; i++){ const vref = bibleNlpObj.vref[i]; const verseText = bibleNlpObj.text[i]; const refMatch = vref.match(vrefPattern); if (!refMatch) throw new Error(`Incorrect format: ${vref}.\nIn BibleNlp, vref should have ` + `three-letter book code, chapter, and verse in the following format: GEN 1:1`); const book = refMatch[1].toUpperCase(); const chap = refMatch[2]; const verse = refMatch[3]; if (book !== currBook) { if (currBook !== null) { this.warnings.push(`USFM can contain only one book per file. ` + `Only ${currBook} is processed. Specify bookCode for other books.`); break; } this.usfmString += `\\id ${book}`; currBook = book; } if (chap !== currChapter) { this.usfmString += `\n\\c ${chap}\n\\p\n`; currChapter = chap; } if (!this.usfmString.endsWith("\n")) this.usfmString += ' '; this.usfmString += `\\v ${verse} ${verseText}`; } } } $b37353ce824c37f3$export$69486ebd11f334d1 = $b37353ce824c37f3$var$USFMGenerator; var $be3194b3d68e926d$require$USFMGenerator = $b37353ce824c37f3$export$69486ebd11f334d1; //Logics for syntax-tree to dict(USJ) conversions var $bc15495701a08748$export$da1572eff96010ef; const { Query: $bc15495701a08748$var$Query } = $a9A1a$treesitter; var $48a7bc78362df58a$export$3ea3efefd6f5792b; var $48a7bc78362df58a$export$bfe3d604e5046dbb; var $48a7bc78362df58a$export$32d1909bfb943eb0; var $48a7bc78362df58a$export$ec3044778745fabd; var $48a7bc78362df58a$export$38bc6f52843beb2a; var $48a7bc78362df58a$export$af42bd8f70df8555; var $48a7bc78362df58a$export$c4d2f24f22330b2a; const $48a7bc78362df58a$var$CHAR_STYLE_MARKERS = [ "add", "bk", "dc", "ior", "iqt", "k", "litl", "nd", "ord", "pn", "png", "qac", "qs", "qt", "rq", "sig", "sls", "tl", "wj", "em", "bd", "bdit", "it", "no", "sc", "sup", "rb", "pro", "w", "wh", "wa", "wg", "lik", "liv", "jmp", "fr", "ft", "fk", "fq", "fqa", "fl", "fw", "fp", "fv", "fdc", "xo", "xop", "xt", "xta", "xk", "xq", "xot", "xnt", "xdc" ]; $48a7bc78362df58a$export$3ea3efefd6f5792b = [ "ide", "usfm", "h", "toc", "toca", "imt", "is", "ip", "ipi", "im", "imi", "ipq", "imq", "ipr", "iq", "ib", "ili", "iot", "io", "iex", "imte", "ie", "mt", "mte", "cl", "cd", "ms", "mr", "s", "sr", "r", "d", "sp", "sd", "q", "qr", "qc", "qa", "qm", "qd", "lh", "li", "lf", "lim", "litl", "sts", "rem", "lit", "restore" ]; $48a7bc78362df58a$export$bfe3d604e5046dbb = [ "f", "fe", "ef", "efe", "x", "ex" ]; $48a7bc78362df58a$export$32d1909bfb943eb0 = $48a7bc78362df58a$var$CHAR_STYLE_MARKERS; $48a7bc78362df58a$export$ec3044778745fabd = $48a7bc78362df58a$var$CHAR_STYLE_MARKERS.map((item)=>item + "Nested"); $48a7bc78362df58a$export$38bc6f52843beb2a = { w: "lemma", rb: "gloss", xt: "href", fig: "alt", xt_standalone: "href", xtNested: "href", ref: "loc", "milestone": "who", "k": "key" }; $48a7bc78362df58a$export$af42bd8f70df8555 = [ "tc", "th", "tcr", "thr", "tcc" ]; $48a7bc78362df58a$export$c4d2f24f22330b2a = [ "fig", "cat", "esb", "b", "ph", "pi" ]; var $bc15495701a08748$require$PARA_STYLE_MARKERS = $48a7bc78362df58a$export$3ea3efefd6f5792b; var $bc15495701a08748$require$NOTE_MARKERS = $48a7bc78362df58a$export$bfe3d604e5046dbb; var $bc15495701a08748$require$CHAR_STYLE_MARKERS = $48a7bc78362df58a$export$32d1909bfb943eb0; var $bc15495701a08748$require$NESTED_CHAR_STYLE_MARKERS = $48a7bc78362df58a$export$ec3044778745fabd; var $bc15495701a08748$require$DEFAULT_ATTRIB_MAP = $48a7bc78362df58a$export$38bc6f52843beb2a; var $bc15495701a08748$require$TABLE_CELL_MARKERS = $48a7bc78362df58a$export$af42bd8f70df8555; var $bc15495701a08748$require$MISC_MARKERS = $48a7bc78362df58a$export$c4d2f24f22330b2a; class $bc15495701a08748$var$USJGenerator { constructor(treeSitterLanguageObj, usfmString, usjRootObj = null){ this.usfmLanguage = treeSitterLanguageObj; this.usfm = usfmString; this.jsonRootObj = usjRootObj || { type: "USJ", version: "3.1", content: [] }; } findLastFromJson(jsonObj, typeValue) { let output = null; if (typeValue === jsonObj.type || jsonObj.marker && typeValue === jsonObj.marker) output = jsonObj; if (jsonObj.content) jsonObj.content.forEach((child)=>{ if (typeof child === "string") return; const childOutput = this.findLastFromJson(child, typeValue); if (childOutput !== null) output = childOutput; }); return output; } nodeToUSJId(node, parentJsonObj) { const idCaptures = new $bc15495701a08748$var$Query(this.usfmLanguage, "(id (bookcode) @book-code (description)? @desc)").captures(node); let code = null; let desc = null; idCaptures.forEach((capture)=>{ if (capture.name === "book-code") code = this.usfm.slice(capture.node.startIndex, capture.node.endIndex); else if (capture.name === "desc") desc = this.usfm.slice(capture.node.startIndex, capture.node.endIndex); }); const bookJsonObj = { type: "book", marker: "id", code: code, content: [] }; if (desc && desc.trim() !== "") bookJsonObj.content.push(desc.trim()); parentJsonObj.content.push(bookJsonObj); } // Similar conversion methods for other node types nodeToUSJC(node, parentJsonObj) { // Build c, the chapter milestone node in usj const chapCap = new $bc15495701a08748$var$Query(this.usfmLanguage, `(c (chapterNumber) @chap-num (ca (chapterNumber) @alt-num)? (cp (text) @pub-num)?)`).captures(node); const chapNum = this.usfm.slice(chapCap[0].node.startIndex, chapCap[0].node.endIndex); let chapRef = null; this.jsonRootObj.content.forEach((child)=>{ if (child.type === "book") { chapRef = `${child.code} ${chapNum}`; return; } }); const chapJsonObj = { type: "chapter", marker: "c", number: chapNum, sid: chapRef }; chapCap.forEach((cap)=>{ if (cap.name === "alt-num") chapJsonObj.altnumber = this.usfm.substring(cap.node.startIndex, cap.node.endIndex).trim(); if (cap.name === "pub-num") chapJsonObj.pubnumber = this.usfm.substring(cap.node.startIndex, cap.node.endIndex).trim(); }); parentJsonObj.content.push(chapJsonObj); node.children.forEach((child)=>{ if ([ "cl", "cd" ].includes(child.type)) this.nodeToUSJ(child, parentJsonObj); }); } nodeToUSJChapter(node, parentJsonObj) { // Build chapter node in USJ node.children.forEach((child)=>{ if (child.type === "c") this.nodeToUSJC(child, parentJsonObj); else this.nodeToUSJ(child, parentJsonObj); }); } nodeToUSJVerse(node, parentJsonObj) { // Build verse node in USJ const verseNumCap = new $bc15495701a08748$var$Query(this.usfmLanguage, ` (v (verseNumber) @vnum (va (verseNumber) @alt)? (vp (text) @vp)? )`).captures(node); const verseNum = this.usfm.substring(verseNumCap[0].node.startIndex, verseNumCap[0].node.endIndex); const vJsonObj = { type: "verse", marker: "v", number: verseNum.trim() }; verseNumCap.forEach((capture)=>{ if (capture.name === "alt") { const altNum = this.usfm.slice(capture.node.startIndex, capture.node.endIndex); vJsonObj.altnumber = altNum; } else if (capture.name === "vp") { const vpText = this.usfm.substring(capture.node.startIndex, capture.node.endIndex); vJsonObj.pubnumber = vpText; } }); const ref = `${this.findLastFromJson(this.jsonRootObj, "chapter").sid}:${verseNum}`; vJsonObj.sid = ref.trim(); parentJsonObj.content.push(vJsonObj); } nodeToUSJCaVa(node, parentJsonObj) { // Build elements for independent ca and va away from c and v const style = node.type; const charJsonObj = { type: "char", marker: style }; const altNumMatch = new $bc15495701a08748$var$Query(this.usfmLanguage, `([ (chapterNumber) (verseNumber) ] @alt-num)`).captures(node); const altNum = this.usfm.slice(altNumMatch[0].node.startIndex, altNumMatch[0].node.endIndex).trim(); charJsonObj.altnumber = altNum; parentJsonObj.content.push(charJsonObj); } nodeToUSJPara(node, parentJsonObj) { // Build paragraph nodes in USJ if (node.children[0].type.endsWith("Block")) node.children[0].children.forEach((child)=>{ this.nodeToUSJPara(child, parentJsonObj); }); else if (node.type === "paragraph") { const paraTagCap = new $bc15495701a08748$var$Query(this.usfmLanguage, "(paragraph (_) @para-marker)").captures(node)[0]; const paraMarker = paraTagCap.node.type; let paraJsonObj = null; if (paraMarker === "b") parentJsonObj.content.push({ type: "para", marker: paraMarker }); else if (!paraMarker.endsWith("Block")) { let paraJsonObj = { type: "para", marker: paraMarker, content: [] }; paraTagCap.node.children.forEach((child)=>{ this.nodeToUSJ(child, paraJsonObj); }); parentJsonObj.content.push(paraJsonObj); } } else if ([ "pi", "ph" ].includes(node.type)) { const paraMarker = this.usfm.substring(node.children[0].startIndex, node.children[0].endIndex).replace("\\", "").trim(); let paraJsonObj = { type: "para", marker: paraMarker, content: [] }; node.children.slice(1).forEach((child)=>{ this.nodeToUSJ(child, paraJsonObj); }); parentJsonObj.content.push(paraJsonObj); } } nodeToUSJNotes(node, parentJsonObj) { // Build USJ nodes for footnotes and cross-references const tagNode = node.children[0]; const callerNode = node.children[1]; const style = this.usfm.substring(tagNode.startIndex, tagNode.endIndex).replace("\\", "").trim(); const noteJsonObj = { type: "note", marker: style, content: [] }; noteJsonObj.caller = this.usfm.substring(callerNode.startIndex, callerNode.endIndex).trim(); for(let i = 2; i < node.children.length - 1; i++)this.nodeToUSJ(node.children[i], noteJsonObj); parentJsonObj.content.push(noteJsonObj); } nodeToUSJChar(node, parentJsonObj) { // Build USJ nodes for character markups, both regular and nested const tagNode = node.children[0]; let childrenRange = node.children.length; if (node.children[node.children.length - 1].type.startsWith("\\")) childrenRange -= 1; // Exclude the last node if it starts with '\', treating it as a closing node const style = this.usfm.substring(tagNode.startIndex, tagNode.endIndex).replace("\\", "").replace("+", "").trim(); const charJsonObj = { type: "char", marker: style, content: [] }; // Assume a flag for closed markup, toggle this if your conditions and data structure require // charJsonObj.closed = node.children[node.children.length - 1].type.startsWith('\\'); for(let i = 1; i < childrenRange; i++)this.nodeToUSJ(node.children[i], charJsonObj); parentJsonObj.content.push(charJsonObj); } nodeToUSJTable(node, parentJsonObj) { // Handle table related components and convert to USJ if (node.type === "table") { const tableJsonObj = { type: "table", content: [] }; node.children.forEach((child)=>{ this.nodeToUSJ(child, tableJsonObj); }); parentJsonObj.content.push(tableJsonObj); } else if (node.type === "tr") { const rowJsonObj = { type: "table:row", marker: "tr", content: [] }; node.children.slice(1).forEach((child)=>{ this.nodeToUSJ(child, rowJsonObj); }); parentJsonObj.content.push(rowJsonObj); } else if ($bc15495701a08748$require$TABLE_CELL_MARKERS.includes(node.type)) { const tagNode = node.children[0]; const style = this.usfm.substring(tagNode.startIndex, tagNode.endIndex).replace("\\", "").trim(); const cellJsonObj = { type: "table:cell", marker: style, content: [], align: style.includes("tcc") ? "center" : style.includes("r") ? "end" : "start" }; node.children.slice(1).forEach((child)=>{ this.nodeToUSJ(child, cellJsonObj); }); parentJsonObj.content.push(cellJsonObj); } } nodeToUSJAttrib(node, parentJsonObj) { // Add attribute values to USJ elements const attribNameNode = node.children[0]; let attribName = this.usfm.slice(attribNameNode.startIndex, attribNameNode.endIndex).trim(); // Handling special cases for attribute names if (attribName === "|") { let parentType = node.parent.type; if (parentType.includes("Nested")) parentType = parentType.replace("Nested", ""); attribName = $bc15495701a08748$require$DEFAULT_ATTRIB_MAP[parentType]; } if (attribName === "src") // for \fig attribName = "file"; const attribValCap = new $bc15495701a08748$var$Query(this.usfmLanguage, "((attributeValue) @attrib-val)").captures(node); let attribValue = ""; if (attribValCap.length > 0) attribValue = this.usfm.substring(attribValCap[0].node.startIndex, attribValCap[0].node.endIndex).trim(); parentJsonObj[attribName] = attribValue; } nodeToUSJMilestone(node, parentJsonObj) { // Create ms node in USJ const msNameCap = new $bc15495701a08748$var$Query(this.usfmLanguage, `( [(milestoneTag) (milestoneStartTag) (milestoneEndTag) (zSpaceTag) ] @ms-name)`).captures(node)[0]; const style = this.usfm.slice(msNameCap.node.startIndex, msNameCap.node.endIndex).replace("\\", "").trim(); const msJsonObj = { type: "ms", marker: style, content: [] }; node.children.forEach((child)=>{ if (child.type.endsWith("Attribute")) this.nodeToUSJ(child, msJsonObj); }); // Though normally milestones don't have contents, custom z-namespaces could have them if (!msJsonObj.content.length) delete msJsonObj.content; // Remove empty content array if not used parentJsonObj.content.push(msJsonObj); } nodeToUSJSpecial(node, parentJsonObj) { // Build nodes for esb, cat, fig, optbreak in USJ if (node.type === "esb") { const sidebarJsonObj = { type: "sidebar", marker: "esb", content: [] }; node.children.slice(1, -1).forEach((child)=>{ this.nodeToUSJ(child, sidebarJsonObj); }); parentJsonObj.content.push(sidebarJsonObj); } else if (node.type === "cat") { const catCap = new $bc15495701a08748$var$Query(this.usfmLanguage, "((category) @category)").captures(node)[0]; const category = this.usfm.substring(catCap.node.startIndex, catCap.node.endIndex).trim(); parentJsonObj.category = category; } else if (node.type === "fig") { const figJsonObj = { type: "figure", marker: "fig", content: [] }; node.children.slice(1, -1).forEach((child)=>{ this.nodeToUSJ(child, figJsonObj); }); parentJsonObj.content.push(figJsonObj); } else if (node.type === "ref") { const refJsonObj = { type: "ref", content: [] }; node.children.slice(1, -1).forEach((child)=>{ this.nodeToUSJ(child, refJsonObj); }); parentJsonObj.content.push(refJsonObj); } } nodeToUSJGeneric(node, parentJsonObj) { // Build nodes for para style markers in USJ const tagNode = node.children[0]; let style = this.usfm.substring(tagNode.startIndex, tagNode.endIndex); if (style.startsWith("\\")) style = style.replace("\\", "").trim(); // console.log(node.children.length, node.children[0].type, node.children[1].type) let childrenRangeStart = 1; // if ( // node.children.length > 1 && // node.children[1].type.startsWith("numbered") // ) { // const numNode = node.children[1]; // const num = this.usfm.substring(numNode.startIndex, numNode.endIndex); // style += num; // childrenRangeStart = 2; // } const paraJsonObj = { type: "para", marker: style, content: [] }; parentJsonObj.content.push(paraJsonObj); for(let i = childrenRangeStart; i < node.children.length; i++){ const child = node.children[i]; if ($bc15495701a08748$require$CHAR_STYLE_MARKERS.includes(child.type) || $bc15495701a08748$require$NESTED_CHAR_STYLE_MARKERS.includes(child.type) || [ "text", "footnote", "crossref", "verseText", "v", "b", "milestone", "zNameSpace" ].includes(child.type)) // Only nest these types inside the upper para style node this.nodeToUSJ(child, paraJsonObj); else this.nodeToUSJ(child, parentJsonObj); } } nodeToUSJ(node, parentJsonObj) { // Check each node and based on the type convert to corresponding XML element switch(node.type){ case "id": this.nodeToUSJId(node, parentJsonObj); break; case "chapter": this.nodeToUSJChapter(node, parentJsonObj); break; case "cl": case "cp": case "cd": case "vp": this.nodeToUSJGeneric(node, parentJsonObj); break; case "ca": case "va": this.nodeToUSJCaVa(node, parentJsonObj); break; case "v": this.nodeToUSJVerse(node, parentJsonObj); break; case "verseText": node.children.forEach((child)=>this.nodeToUSJ(child, parentJsonObj)); break; case "paragraph": case "pi": case "ph": this.nodeToUSJPara(node, parentJsonObj); break; case "text": let textVal = this.usfm.substring(node.startIndex, node.endIndex); textVal = textVal.replace("~", " "); if (textVal !== "") parentJsonObj.content.push(textVal); break; case "table": case "tr": this.nodeToUSJTable(node, parentJsonObj); break; case "milestone": case "zNameSpace": this.nodeToUSJMilestone(node, parentJsonObj); break; case "esb": case "cat": case "fig": case "ref": this.nodeToUSJSpecial(node, parentJsonObj); break; case "usfm": break; default: if ($bc15495701a08748$require$NOTE_MARKERS.includes(node.type)) this.nodeToUSJNotes(node, parentJsonObj); else if ($bc15495701a08748$require$CHAR_STYLE_MARKERS.includes(node.type) || $bc15495701a08748$require$NESTED_CHAR_STYLE_MARKERS.includes(node.type) || [ "xt_standalone" ].includes(node.type)) this.nodeToUSJChar(node, parentJsonObj); else if ($bc15495701a08748$require$TABLE_CELL_MARKERS.includes(node.type)) this.nodeToUSJTable(node, parentJsonObj); else if (node.type.endsWith("Attribute")) this.nodeToUSJAttrib(node, parentJsonObj); else if ($bc15495701a08748$require$PARA_STYLE_MARKERS.includes(node.type) || $bc15495701a08748$require$PARA_STYLE_MARKERS.includes(node.type.replace("\\", "").trim())) this.nodeToUSJGeneric(node, parentJsonObj); else if ([ "", "|" ].includes(node.type.trim())) break; else if (node.children.length > 0) node.children.forEach((child)=>this.nodeToUSJ(child, parentJsonObj)); break; } } } $bc15495701a08748$export$da1572eff96010ef = $bc15495701a08748$var$USJGenerator; var $be3194b3d68e926d$require$USJGenerator = $bc15495701a08748$export$da1572eff96010ef; var $0ed9cb36901d4d14$export$24715706bc524307; class $0ed9cb36901d4d14$var$ListGenerator { /* Combines the methods used for List generation from USJ */ constructor(){ /* Variables shared by functions */ this.book = ""; this.currentChapter = ""; this.currentVerse = ""; this.list = [ [ "Book", "Chapter", "Verse", "Text", "Type", "Marker" ] ]; this.bibleNlpFormat = { "text": [], "vref": [] }; this.prevChapter = ""; this.prevVerse = ""; } usjToListId(obj) { /* Update book code */ this.book = obj.code; } usjToListC(obj) { /* Update current chapter */ this.currentChapter = obj.number; this.currentVerse = ""; } usjToListV(obj) { /* Update current verse */ this.currentVerse = obj.number; } usjToList(obj, excludeMarkers = null, includeMarkers = null) { /* Traverse the USJ dict and build the table in this.list */ if (obj.type === "book") { this.usjToListId(obj); if (excludeMarkers && excludeMarkers.includes("id") || includeMarkers && !includeMarkers.includes("id")) return; } else if (obj.type === "chapter") this.usjToListC(obj); else if (obj.type === "verse") this.usjToListV(obj); let markerType = obj.type; let markerName = obj.marker ? obj.marker : ''; if (markerType === "USJ") // This would occur if the JSON got flattened after removing paragraph markers markerType = ""; if (obj.content && obj.content.length > 0) { for (let item of obj.content)if (typeof item === "string") { if (excludeMarkers && excludeMarkers.includes("text")) item = ""; this.list.push([ this.book, this.currentChapter, this.currentVerse, item, markerType, markerName ]); } else this.usjToList(item, excludeMarkers, includeMarkers); } else if (!excludeMarkers && !includeMarkers || excludeMarkers && !excludeMarkers.includes(markerName) || includeMarkers && includeMarkers.includes(markerName)) this.list.push([ this.book, this.currentChapter, this.currentVerse, "", markerType, markerName ]); } usjToBibleNlpFormat(obj) { // Traverse the USJ object and build a dictionary for Bible NLP format if (obj.type === "book") this.usjToListId(obj); else if (obj.type === "chapter") this.usjToListC(obj); else if (obj.type === "verse") this.usjToListV(obj); else if (obj.content) { for (const item of obj.content)if (typeof item === "string") { if (this.currentChapter === this.prevChapter && this.currentVerse === this.prevVerse) this.bibleNlpFormat.text[this.bibleNlpFormat.text.length - 1] += " " + item.replace(/[\n\r]/g, " ").trim(); else { const vref = `${this.book} ${this.currentChapter}:${this.currentVerse}`; this.bibleNlpFormat.text.push(item.replace(/[\n\r]/g, " ").trim()); this.bibleNlpFormat.vref.push(vref); this.prevChapter = this.currentChapter; this.prevVerse = this.currentVerse; } } else this.usjToBibleNlpFormat(item); } } } $0ed9cb36901d4d14$export$24715706bc524307 = $0ed9cb36901d4d14$var$ListGenerator; var $be3194b3d68e926d$require$ListGenerator = $0ed9cb36901d4d14$export$24715706bc524307; //Logics for syntax-tree to xml(USX) conversions var $27f438441a7f3c46$export$69d2127c7776f273; var $27f438441a7f3c46$require$DOMImplementation = $a9A1a$xmldom.DOMImplementation; var $27f438441a7f3c46$require$XMLSerializer = $a9A1a$xmldom.XMLSerializer; const { Query: $27f438441a7f3c46$var$Query } = $a9A1a$treesitter; var $27f438441a7f3c46$require$PARA_STYLE_MARKERS = $48a7bc78362df58a$export$3ea3efefd6f5792b; var $27f438441a7f3c46$require$NOTE_MARKERS = $48a7bc78362df58a$export$bfe3d604e5046dbb; var $27f438441a7f3c46$require$CHAR_STYLE_MARKERS = $48a7bc78362df58a$export$32d1909bfb943eb0; var $27f438441a7f3c46$require$NESTED_CHAR_STYLE_MARKERS = $48a7bc78362df58a$export$ec3044778745fabd; var $27f438441a7f3c46$require$DEFAULT_ATTRIB_MAP = $48a7bc78362df58a$export$38bc6f52843beb2a; var $27f438441a7f3c46$require$TABLE_CELL_MARKERS = $48a7bc78362df58a$export$af42bd8f70df8555; var $27f438441a7f3c46$require$MISC_MARKERS = $48a7bc78362df58a$export$c4d2f24f22330b2a; class $27f438441a7f3c46$var$USXGenerator { /** * A binding for all methods used in generating USX from Syntax tree * @param {object} treeSitterLanguageObj - The Tree-sitter language object * @param {Buffer} usfmString - The USFM byte data * @param {Element} [usxRootElement] - The root element of the USX (optional) */ constructor(treeSitterLanguageObj, usfmString, usxRootElement = null){ this.usfmLanguage = treeSitterLanguageObj; this.usfm = usfmString; const domImpl = new $27f438441a7f3c46$require$DOMImplementation(); const doc = domImpl.createDocument(null, 'usx', null); if (usxRootElement === null) { this.xmlRootNode = doc.documentElement; this.xmlRootNode.setAttribute('version', '3.1'); } else this.xmlRootNode = usxRootElement; this.parseState = { prevVerseSid: null, prevVerseParent: null }; } /** * Builds the ID node in USX * @param {SyntaxNode} node - The syntax node * @param {Element} parentXmlNode - The parent XML node to append the ID to */ node2UsxId(node, parentXmlNode) { const idCaptures = new $27f438441a7f3c46$var$Query(this.usfmLanguage, "(id (bookcode) @book-code (description)? @desc)").captures(node); let code = null; let desc = null; idCaptures.forEach((capture)=>{ if (capture.name === 'book-code') code = this.usfm.slice(capture.node.startIndex, capture.node.endIndex); else if (capture.name === 'desc') desc = this.usfm.slice(capture.node.startIndex, capture.node.endIndex); }); const bookXmlNode = parentXmlNode.ownerDocument.createElement('book'); bookXmlNode.setAttribute('code', code); bookXmlNode.setAttribute('style', 'id'); if (desc && desc.trim() !== '') { const textNode = parentXmlNode.ownerDocument.createTextNode(desc.trim()); bookXmlNode.appendChild(textNode); } parentXmlNode.appendChild(bookXmlNode); } node2UsxC(node, parentXmlNode) { // Build c, the chapter milestone node in usj const chapCap = new $27f438441a7f3c46$var$Query(this.usfmLanguage, `(c (chapterNumber) @chap-num (ca (chapterNumber) @alt-num)? (cp (text) @pub-num)?)`).captures(node); const chapNum = this.usfm.slice(chapCap[0].node.startIndex, chapCap[0].node.endIndex); const bookNode = $a9A1a$xpath.select1("book", parentXmlNode); const bookCode = bookNode.getAttribute("code"); const chapRef = `${bookCode} ${chapNum}`; // Create the 'chapter' element const chapXmlNode = parentXmlNode.ownerDocument.createElement('chapter'); chapXmlNode.setAttribute("number", chapNum); chapXmlNode.setAttribute("style", "c"); chapXmlNode.setAttribute("sid", chapRef); chapCap.forEach((cap)=>{ if (cap.name === "alt-num") { const altNum = this.usfm.substring(cap.node.startIndex, cap.node.endIndex).trim(); chapXmlNode.setAttribute('altnumber', altNum); } if (cap.name === "pub-num") { const pubNum = this.usfm.substring(cap.node.startIndex, cap.node.endIndex).trim(); chapXmlNode.setAttribute('pubnumber', pubNum); } }); parentXmlNode.appendChild(chapXmlNode); node.children.forEach((child)=>{ if ([ "cl", "cd" ].includes(child.type)) this.node2Usx(child, parentXmlNode); }); } node2UsxChapter(node, parentXmlNode) { // Build chapter node in USJ node.children.forEach((child)=>{ if (child.type === "c") this.node2UsxC(child, parentXmlNode); else this.node2Usx(child, parentXmlNode); }); const prevVerses = $a9A1a$xpath.select("//verse", this.xmlRootNode); if (prevVerses.length > 0 && prevVerses[prevVerses.length - 1].hasAttribute('sid')) { const vEndXmlNode = parentXmlNode.ownerDocument.createElement('verse'); vEndXmlNode.setAttribute('eid', prevVerses[prevVerses.length - 1].getAttribute('sid')); const sibblingCount = parentXmlNode.childNodes.length; const lastSibbling = parentXmlNode.childNodes[sibblingCount - 1]; if (lastSibbling.tagName === "para") lastSibbling.appendChild(vEndXmlNode); else if (lastSibbling.tagName === "table") { const rows = lastSibbling.getElementsByTagName('row'); rows[rows.length - 1].appendChild(vEndXmlNode); } else parentXmlNode.appendChild(vEndXmlNode); } } node2UsxVerse(node, parentXmlNode) { // Check if there are previous verses to close if (this.parseState.prevVerseSid !== null) { let prevPara = this.parseState.prevVerseParent; let vEndXmlNode = prevPara.ownerDocument.createElement('verse'); vEndXmlNode.setAttribute("eid", this.parseState.prevVerseSid); prevPara.appendChild(vEndXmlNode); } // Query to capture verse-related elements const verseNumCap = new $27f438441a7f3c46$var$Query(this.usfmLanguage, ` (v (verseNumber) @vnum (va (verseNumber) @alt)? (vp (text) @vp)? )`).captures(node); const verseNum = this.usfm.substring(verseNumCap[0].node.startIndex, verseNumCap[0].node.endIndex); const vXmlNode = parentXmlNode.ownerDocument.createElement('verse'); parentXmlNode.appendChild(vXmlNode); // Loop through the captured elements and set the attributes verseNumCap.forEach((capture)=>{ if (capture.name === 'alt') { const altNum = this.usfm.slice(capture.node.startIndex, capture.node.endIndex); vXmlNode.setAttribute('altnumber', altNum); } else if (capture.name === 'vp') { const vpText = this.usfm.slice(capture.node.startIndex, capture.node.endIndex).trim(); vXmlNode.setAttribute('pubnumber', vpText); } }); // Get the last chapter's 'sid' attribute to form the verse reference const chapterSid = $a9A1a$xpath.select("//chapter", this.xmlRootNode).pop().getAttribute('sid'); const ref = `${chapterSid}:${verseNum}`; // Set attributes on the newly created 'verse' element vXmlNode.setAttribute('number', verseNum.trim()); vXmlNode.setAttribute('style', 'v'); vXmlNode.setAttribute('sid', ref.trim()); } node2UsxCaVa(node, parentXmlNode) { // Build elements for independent ca and va away from c and v const style = node.type; // Create a new 'char' element under the parent XML node const charXmlNode = parentXmlNode.ownerDocument.createElement('char'); charXmlNode.setAttribute('style', style); // Query to capture chapterNumber or verseNumber const altNumMatch = new $27f438441a7f3c46$var$Query(this.usfmLanguage, `([ (chapterNumber) (verseNumber) ] @alt-num)`).captures(node); // Extract the alternate number from the captured range const altNum = this.usfm.slice(altNumMatch[0].node.startIndex, altNumMatch[0].node.endIndex).trim(); // Set the attributes on the 'char' element charXmlNode.setAttribute('altnumber', altNum); charXmlNode.setAttribute('closed', 'true'); // Append the 'char' element to the parent XML node parentXmlNode.appendChild(charXmlNode); } node2UsxPara(node, parentXmlNode) { // Build paragraph nodes in USX if (node.children[0].type.endsWith('Block')) for (const child of node.children[0].children)this.node2UsxPara(child, parentXmlNode); else if (node.type === 'paragraph') { const paraTagCap = new $27f438441a7f3c46$var$Query(this.usfmLanguage, "(paragraph (_) @para-marker)").captures(node)[0]; const paraMarker = paraTagCap.node.type; if (!paraMarker.endsWith("Block")) { const paraXmlNode = parentXmlNode.ownerDocument.createElement("para"); paraXmlNode.setAttribute("style", paraMarker); parentXmlNode.appendChild(paraXmlNode); for (const child of paraTagCap.node.children.slice(1))this.node2Usx(child, paraXmlNode); } } else if ([ 'pi', 'ph' ].includes(node.type)) { const paraMarker = this.usfm.slice(node.children[0].startIndex, node.children[0].endIndex).replace("\\", "").trim(); const paraXmlNode = parentXmlNode.ownerDocument.createElement("para"); paraXmlNode.setAttribute("style", paraMarker); parentXmlNode.appendChild(paraXmlNode); for (const child of node.children.slice(1))this.node2Usx(child, paraXmlNode); } } node2UsxNotes(node, parentXmlNode) { // Build USJ nodes for footnotes and cross-references const tagNode = node.children[0]; const callerNode = node.children[1]; const style = this.usfm.substring(tagNode.startIndex, tagNode.endIndex).replace("\\", "").trim(); const noteXmlNode = parentXmlNode.ownerDocument.createElement('note'); noteXmlNode.setAttribute('style', style); const caller = this.usfm.substring(callerNode.startIndex, callerNode.endIndex).trim(); noteXmlNode.setAttribute('caller', caller); parentXmlNode.appendChild(noteXmlNode); for(let i = 2; i < node.children.length - 1; i++)this.node2Usx(node.children[i], noteXmlNode); } node2UsxChar(node, parentXmlNode) { // Build USJ nodes for character markups, both regular and nested const tagNode = node.children[0]; let childrenRange = node.children.length; if (node.children[node.children.length - 1].type.startsWith("\\")) childrenRange -= 1; // Exclude the last node if it starts with '\', treating it as a closing node const charXmlNode = parentXmlNode.ownerDocument.createElement('char'); const style = this.usfm.substring(tagNode.startIndex, tagNode.endIndex).replace("\\", "").replace("+", "").trim(); charXmlNode.setAttribute('style', style); parentXmlNode.appendChild(charXmlNode); for(let i = 1; i < childrenRange; i++)this.node2Usx(node.children[i], charXmlNode); } node2UsxAttrib(node, parentXmlNode) { // Add attribute values to USJ elements const attribNameNode = node.children[0]; let attribName = this.usfm.slice(attribNameNode.startIndex, attribNameNode.endIndex).trim(); // Handling special cases for attribute names if (attribName === "|") { let parentType = node.parent.type; if (parentType.includes("Nested")) parentType = parentType.replace("Nested", ""); attribName = $27f438441a7f3c46$require$DEFAULT_ATTRIB_MAP[parentType]; } if (attribName === "src") // for \fig attribName = "file"; const attribValCap = new $27f438441a7f3c46$var$Query(this.usfmLanguage, "((attributeValue) @attrib-val)").captures(node); let attribValue = ""; if (attribValCap.length > 0) attribValue = this.usfm.substring(attribValCap[0].node.startIndex, attribValCap[0].node.endIndex).trim(); parentXmlNode.setAttribute(attribName, attribValue); } node2UsxTable(node, parentXmlNode) { // Handle table related components and convert to USJ if (node.type === "table") { const tableXmlNode = parentXmlNode.ownerDocument.createElement('table'); parentXmlNode.appendChild(tableXmlNode); node.children.forEach((child)=>{ this.node2Usx(child, tableXmlNode); }); } else if (node.type === "tr") { const rowXmlNode = parentXmlNode.ownerDocument.createElement('row'); rowXmlNode.setAttribute("style", "tr"); parentXmlNode.appendChild(rowXmlNode); node.children.slice(1).forEach((child)=>{ this.node2Usx(child, rowXmlNode); }); } else if ($27f438441a7f3c46$require$TABLE_CELL_MARKERS.includes(node.type)) { const tagNode = node.children[0]; const style = this.usfm.substring(tagNode.startIndex, tagNode.endIndex).replace("\\", "").trim(); const cellXmlNode = parentXmlNode.ownerDocument.createElement("cell"); cellXmlNode.setAttr