UNPKG

usfm-grammar

Version:

Uses the tree-sitter-usfm3 parser to convert USFM files to other formats such as USJ, USX, and CSV, and converts them back to USFM

1,224 lines (1,201 loc) 782 kB
import * as $9wIQi$treesitter from "tree-sitter"; import * as $9wIQi$assert from "assert"; import * as $9wIQi$treesitterusfm3 from "tree-sitter-usfm3"; import {DOMImplementation as $9wIQi$DOMImplementation} from "@xmldom/xmldom"; import {readFileSync as $9wIQi$readFileSync} from "node:fs"; import * as $9wIQi$ajv from "ajv"; function $parcel$export(e, n, v, s) { Object.defineProperty(e, n, {get: v, set: s, enumerable: true, configurable: true}); } var $cf838c15c8b009ba$exports = {}; $parcel$export($cf838c15c8b009ba$exports, "USFMParser", () => $cf838c15c8b009ba$export$db89aa78daab09ca, (v) => $cf838c15c8b009ba$export$db89aa78daab09ca = v); $parcel$export($cf838c15c8b009ba$exports, "Filter", () => $cf838c15c8b009ba$export$ec91da630f36d5ea, (v) => $cf838c15c8b009ba$export$ec91da630f36d5ea = v); $parcel$export($cf838c15c8b009ba$exports, "Validator", () => $cf838c15c8b009ba$export$9eeb22c0bba4ed5e, (v) => $cf838c15c8b009ba$export$9eeb22c0bba4ed5e = v); $parcel$export($cf838c15c8b009ba$exports, "ORIGINAL_VREF", () => $cf838c15c8b009ba$export$a324ce9cedb2444c, (v) => $cf838c15c8b009ba$export$a324ce9cedb2444c = v); var $cf838c15c8b009ba$export$db89aa78daab09ca; var $cf838c15c8b009ba$export$ec91da630f36d5ea; // exports.Format = Format; var $cf838c15c8b009ba$export$9eeb22c0bba4ed5e; var $cf838c15c8b009ba$export$a324ce9cedb2444c; var $236d01ba6f80be5e$export$db89aa78daab09ca; var $236d01ba6f80be5e$export$ec91da630f36d5ea; var $236d01ba6f80be5e$export$a324ce9cedb2444c; var $ce44dcc4919407eb$export$69486ebd11f334d1; var $142605215cbb4f88$export$b32929182cc4fefe; var $142605215cbb4f88$export$75b2f422ff60acaf; var $142605215cbb4f88$export$aa24f570494ce8d0; var $142605215cbb4f88$export$1790a88fa433c8d6; var $142605215cbb4f88$export$c63c0b6e358f0774; var $142605215cbb4f88$export$1187115108c84d45; $142605215cbb4f88$export$b32929182cc4fefe = [ 'USJ', 'table' ]; $142605215cbb4f88$export$75b2f422ff60acaf = [ 'char', 'note', 'figure', 'ref' ]; $142605215cbb4f88$export$aa24f570494ce8d0 = [ 'type', 'marker', 'content', 'number', 'sid', 'code', 'caller', 'align', 'version', 'altnumber', 'pubnumber', 'category' ]; $142605215cbb4f88$export$1790a88fa433c8d6 = [ 'style', 'number', 'sid', 'code', 'caller', 'align', 'version', 'altnumber', 'pubnumber', 'category' ]; $142605215cbb4f88$export$c63c0b6e358f0774 = [ 'char', 'note', 'verse', 'table:cell' ]; $142605215cbb4f88$export$1187115108c84d45 = [ 'char', 'note', 'verse', 'cell' ]; var $ce44dcc4919407eb$require$NO_USFM_USJ_TYPES = $142605215cbb4f88$export$b32929182cc4fefe; var $ce44dcc4919407eb$require$CLOSING_USJ_TYPES = $142605215cbb4f88$export$75b2f422ff60acaf; var $ce44dcc4919407eb$require$NON_ATTRIB_USJ_KEYS = $142605215cbb4f88$export$aa24f570494ce8d0; var $ce44dcc4919407eb$require$NO_NEWLINE_USJ_TYPES = $142605215cbb4f88$export$c63c0b6e358f0774; var $ce44dcc4919407eb$require$NON_ATTRIB_USX_KEYS = $142605215cbb4f88$export$1790a88fa433c8d6; var $ce44dcc4919407eb$require$NO_NEWLINE_USX_TYPES = $142605215cbb4f88$export$1187115108c84d45; class $ce44dcc4919407eb$var$USFMGenerator { constructor(){ this.usfmString = ''; this.warnings = []; } usjToUsfm(usjObj, nested = false) { if (usjObj.type === 'optbreak') { if (this.usfmString !== '' && ![ '\n', '\r', ' ', '\t' ].includes(this.usfmString.slice(-1))) this.usfmString += ' '; this.usfmString += '// '; return; } if (usjObj.type === 'ref') usjObj.marker = 'ref'; if (!$ce44dcc4919407eb$require$NO_USFM_USJ_TYPES.includes(usjObj.type)) { this.usfmString += '\\'; if (nested && usjObj.type === 'char') this.usfmString += '+'; this.usfmString += `${usjObj.marker} `; } [ 'code', 'number', 'caller' ].forEach((key)=>{ if (usjObj[key]) this.usfmString += `${usjObj[key]} `; }); if (usjObj.category) this.usfmString += `\\cat ${usjObj.category}\\cat*\n`; if (usjObj.altnumber) { if (usjObj.marker === 'c') this.usfmString += `\\ca ${usjObj.altnumber} \\ca*\n`; else if (usjObj.marker === 'v') this.usfmString += `\\va ${usjObj.altnumber} \\va* `; } if (usjObj.pubnumber) { if (usjObj.marker === 'c') this.usfmString += `\\cp ${usjObj.pubnumber}\n`; else if (usjObj.marker === 'v') this.usfmString += `\\vp ${usjObj.pubnumber} \\vp* `; } if (Array.isArray(usjObj.content)) usjObj.content.forEach((item)=>{ if (typeof item === 'string') this.usfmString += item; else this.usjToUsfm(item, usjObj.type === 'char' && item.marker !== 'fv'); }); const attributes = []; Object.keys(usjObj).forEach((key)=>{ if (!$ce44dcc4919407eb$require$NON_ATTRIB_USJ_KEYS.includes(key)) { let lhs = key; if (key === 'file') lhs = 'src'; attributes.push(`${lhs}="${usjObj[key]}"`); } }); if (attributes.length > 0) this.usfmString += `|${attributes.join(' ')}`; if ($ce44dcc4919407eb$require$CLOSING_USJ_TYPES.includes(usjObj.type)) { this.usfmString += '\\'; if (nested && usjObj.type === 'char') this.usfmString += '+'; this.usfmString += `${usjObj.marker}* `; } if (usjObj.type === 'ms') { if ('sid' in usjObj) { if (attributes.length === 0) this.usfmString += '|'; this.usfmString += `sid="${usjObj.sid}" `; } this.usfmString = `${this.usfmString.trim()}\\*`; } if (usjObj.type === 'sidebar') this.usfmString += '\\esbe'; if (!$ce44dcc4919407eb$require$NO_NEWLINE_USJ_TYPES.includes(usjObj.type) && this.usfmString[this.usfmString.length - 1] !== '\n') this.usfmString += '\n'; return this.usfmString; } usxToUsfm(xmlObj, nested = false) { // Check if xmlObj is a string // if (typeof xmlObj === 'string') { // // this.usfmString += xmlObj; // return; // } const objType = xmlObj.tagName; let marker = null; const usfmAttributes = []; if ([ 'verse', 'chapter' ].includes(objType) && xmlObj.hasAttribute('eid')) return; if (!$ce44dcc4919407eb$require$NO_NEWLINE_USX_TYPES.includes(objType)) this.usfmString += '\n'; if (objType === 'optbreak') { if (this.usfmString !== '' && ![ '\n', '\r', ' ', '\t' ].includes(this.usfmString.slice(-1))) this.usfmString += ' '; this.usfmString += '// '; } if (xmlObj.hasAttribute('style')) { marker = xmlObj.getAttribute('style'); if (nested && objType === 'char' && ![ 'xt', 'fv', 'ref' ].includes(marker)) marker = `+${marker}`; this.usfmString += `\\${marker} `; } else if (objType === 'ref') { marker = 'ref'; this.usfmString += `\\${marker} `; } if (xmlObj.hasAttribute('code')) this.usfmString += xmlObj.getAttribute('code'); if (xmlObj.hasAttribute('number')) this.usfmString += `${xmlObj.getAttribute('number')} `; if (xmlObj.hasAttribute('caller')) this.usfmString += `${xmlObj.getAttribute('caller')} `; if (xmlObj.hasAttribute('altnumber')) { if (objType === 'verse') this.usfmString += `\\va ${xmlObj.getAttribute('altnumber')}\\va*`; else if (objType === 'chapter') this.usfmString += `\n\\ca ${xmlObj.getAttribute('altnumber')}\\ca*`; } if (xmlObj.hasAttribute('pubnumber')) { if (objType === 'verse') this.usfmString += `\\vp ${xmlObj.getAttribute('pubnumber')}\\vp*`; else if (objType === 'chapter') this.usfmString += `\n\\cp ${xmlObj.getAttribute('pubnumber')}`; } if (xmlObj.hasAttribute('category')) this.usfmString += `\n\\cat ${xmlObj.getAttribute('category')} \\cat*`; const children = Array.from(xmlObj.childNodes); for (const child of children){ if (child.nodeType === 1) { if (objType === 'char') this.usxToUsfm(child, true); else this.usxToUsfm(child, false); } if (child.nodeType === 3 && child.nodeValue.trim()) { if (this.usfmString !== '' && ![ '\n', '\r', ' ', '\t' ].includes(this.usfmString.slice(-1))) this.usfmString += ' '; this.usfmString += child.nodeValue.trim(); } } const attributes = Array.from(xmlObj.attributes); for (const attrNode of attributes){ const key = attrNode.name; const val = attrNode.value.replace(/"/g, ''); if (key === 'file' && objType === 'figure') usfmAttributes.push(`src="${val}"`); else if (!$ce44dcc4919407eb$require$NON_ATTRIB_USX_KEYS.includes(key)) usfmAttributes.push(`${key}="${val}"`); if ([ 'sid', 'eid' ].includes(key) && objType === 'ms') usfmAttributes.push(`${key}="${val}"`); } if (usfmAttributes.length > 0) { this.usfmString += '|'; this.usfmString += usfmAttributes.join(' '); } if (xmlObj.hasAttribute('closed') && xmlObj.getAttribute('closed') === 'true' || $ce44dcc4919407eb$require$CLOSING_USJ_TYPES.includes(objType) || usfmAttributes.length > 0) { if (objType === 'ms') this.usfmString += '\\*'; else this.usfmString += `\\${marker}*`; } if (objType === 'sidebar') this.usfmString += '\n\\esbe\n'; } bibleNlptoUsfm(bibleNlpObj) { const vrefPattern = /([a-zA-Z0-9]{3}) (\d+):(.*)/; let currBook = null; let currChapter = null; for(let i = 0; i < bibleNlpObj.vref.length; i++){ const vref = bibleNlpObj.vref[i]; const verseText = bibleNlpObj.text[i]; const refMatch = vref.match(vrefPattern); if (!refMatch) throw new Error(`Incorrect format: ${vref}.\nIn BibleNlp, vref should have ` + 'three-letter book code, chapter, and verse in the following format: GEN 1:1'); const book = refMatch[1].toUpperCase(); const chap = refMatch[2]; const verse = refMatch[3]; if (book !== currBook) { if (currBook !== null) { this.warnings.push('USFM can contain only one book per file. ' + `Only ${currBook} is processed. Specify bookCode for other books.`); break; } this.usfmString += `\\id ${book}`; currBook = book; } if (chap !== currChapter) { this.usfmString += `\n\\c ${chap}\n\\p\n`; currChapter = chap; } if (!this.usfmString.endsWith('\n')) this.usfmString += ' '; this.usfmString += `\\v ${verse} ${verseText}`; } } } $ce44dcc4919407eb$export$69486ebd11f334d1 = $ce44dcc4919407eb$var$USFMGenerator; var $236d01ba6f80be5e$require$USFMGenerator = $ce44dcc4919407eb$export$69486ebd11f334d1; //Logics for syntax-tree to dict(USJ) conversions var $1bdca1468aa54399$export$da1572eff96010ef; var $28b0061e56e1edf2$export$3ea3efefd6f5792b; var $28b0061e56e1edf2$export$bfe3d604e5046dbb; var $28b0061e56e1edf2$export$32d1909bfb943eb0; var $28b0061e56e1edf2$export$ec3044778745fabd; var $28b0061e56e1edf2$export$38bc6f52843beb2a; var $28b0061e56e1edf2$export$af42bd8f70df8555; var $28b0061e56e1edf2$export$c4d2f24f22330b2a; var $28b0061e56e1edf2$export$d19275a111c0e9e6; const $28b0061e56e1edf2$var$CHAR_STYLE_MARKERS = [ 'add', 'bk', 'dc', 'ior', 'iqt', 'k', 'litl', 'nd', 'ord', 'pn', 'png', 'qac', 'qs', 'qt', 'rq', 'sig', 'sls', 'tl', 'wj', 'em', 'bd', 'bdit', 'it', 'no', 'sc', 'sup', 'rb', 'pro', 'w', 'wh', 'wa', 'wg', 'lik', 'liv', 'jmp', 'fr', 'ft', 'fk', 'fq', 'fqa', 'fl', 'fw', 'fp', 'fv', 'fdc', 'xo', 'xop', 'xt', 'xta', 'xk', 'xq', 'xot', 'xnt', 'xdc' ]; const $28b0061e56e1edf2$var$NESTED_CHAR_STYLE_MARKERS = $28b0061e56e1edf2$var$CHAR_STYLE_MARKERS.map((item)=>`${item}Nested`); const $28b0061e56e1edf2$var$PARA_STYLE_MARKERS = [ 'ide', 'usfm', 'h', 'toc', 'toca', 'imt', 'is', 'ip', 'ipi', 'im', 'imi', 'ipq', 'imq', 'ipr', 'iq', 'ib', 'ili', 'iot', 'io', 'iex', 'imte', 'ie', 'mt', 'mte', 'cl', 'cd', 'ms', 'mr', 's', 'sr', 'r', 'd', 'sp', 'sd', 'q', 'qr', 'qc', 'qa', 'qm', 'qd', 'lh', 'li', 'lf', 'lim', 'sts', 'rem', 'lit', 'restore' ]; $28b0061e56e1edf2$export$3ea3efefd6f5792b = $28b0061e56e1edf2$var$PARA_STYLE_MARKERS; const $28b0061e56e1edf2$var$NOTE_MARKERS = [ 'f', 'fe', 'ef', 'efe', 'x', 'ex' ]; $28b0061e56e1edf2$export$bfe3d604e5046dbb = $28b0061e56e1edf2$var$NOTE_MARKERS; $28b0061e56e1edf2$export$32d1909bfb943eb0 = $28b0061e56e1edf2$var$CHAR_STYLE_MARKERS; $28b0061e56e1edf2$export$ec3044778745fabd = $28b0061e56e1edf2$var$NESTED_CHAR_STYLE_MARKERS; const $28b0061e56e1edf2$var$DEFAULT_ATTRIB_MAP = { w: 'lemma', rb: 'gloss', xt: 'href', fig: 'alt', xt_standalone: 'href', xtNested: 'href', ref: 'loc', milestone: 'who', k: 'key' }; $28b0061e56e1edf2$export$38bc6f52843beb2a = $28b0061e56e1edf2$var$DEFAULT_ATTRIB_MAP; const $28b0061e56e1edf2$var$TABLE_CELL_MARKERS = [ 'tc', 'th', 'tcr', 'thr', 'tcc', 'thc' ]; $28b0061e56e1edf2$export$af42bd8f70df8555 = $28b0061e56e1edf2$var$TABLE_CELL_MARKERS; const $28b0061e56e1edf2$var$MISC_MARKERS = [ 'fig', 'cat', 'esb', 'b', 'ph', 'pi' ]; $28b0061e56e1edf2$export$c4d2f24f22330b2a = $28b0061e56e1edf2$var$MISC_MARKERS; const $28b0061e56e1edf2$var$MARKER_SETS = { TABLE_CELL_MARKERS: new Set($28b0061e56e1edf2$var$TABLE_CELL_MARKERS), CHAR_STYLE_MARKERS: new Set($28b0061e56e1edf2$var$CHAR_STYLE_MARKERS), NESTED_CHAR_STYLE_MARKERS: new Set($28b0061e56e1edf2$var$NESTED_CHAR_STYLE_MARKERS), OTHER_PARA_NESTABLES: new Set([ 'text', 'footnote', 'crossref', 'verseText', 'v', 'b', 'milestone', 'zNameSpace' ]), NOTE_MARKERS: new Set($28b0061e56e1edf2$var$NOTE_MARKERS), PARA_STYLE_MARKERS: new Set($28b0061e56e1edf2$var$PARA_STYLE_MARKERS) }; $28b0061e56e1edf2$export$d19275a111c0e9e6 = $28b0061e56e1edf2$var$MARKER_SETS; var $1bdca1468aa54399$require$PARA_STYLE_MARKERS = $28b0061e56e1edf2$export$3ea3efefd6f5792b; var $1bdca1468aa54399$require$NOTE_MARKERS = $28b0061e56e1edf2$export$bfe3d604e5046dbb; var $1bdca1468aa54399$require$CHAR_STYLE_MARKERS = $28b0061e56e1edf2$export$32d1909bfb943eb0; var $1bdca1468aa54399$require$NESTED_CHAR_STYLE_MARKERS = $28b0061e56e1edf2$export$ec3044778745fabd; var $1bdca1468aa54399$require$DEFAULT_ATTRIB_MAP = $28b0061e56e1edf2$export$38bc6f52843beb2a; var $1bdca1468aa54399$require$TABLE_CELL_MARKERS = $28b0061e56e1edf2$export$af42bd8f70df8555; var $1bdca1468aa54399$require$MARKER_SETS = $28b0061e56e1edf2$export$d19275a111c0e9e6; var $6dfa97aa668afc29$exports = {}; const { Query: $6dfa97aa668afc29$var$Query } = $9wIQi$treesitter; function $6dfa97aa668afc29$var$getIdQuery(lang) { return new $6dfa97aa668afc29$var$Query(lang, '(id (bookcode) @book-code (description)? @desc)'); } function $6dfa97aa668afc29$var$usjCaVaquery(lang) { return new $6dfa97aa668afc29$var$Query(lang, `([ (chapterNumber) (verseNumber) ] @alt-num)`); } function $6dfa97aa668afc29$var$attribValQuery(lang) { return new $6dfa97aa668afc29$var$Query(lang, '((attributeValue) @attrib-val)'); } function $6dfa97aa668afc29$var$getChapQuery(lang) { return new $6dfa97aa668afc29$var$Query(lang, `(c (chapterNumber) @chap-num (ca (chapterNumber) @alt-num)? (cp (text) @pub-num)?)`); } function $6dfa97aa668afc29$var$paraQuery(lang) { return new $6dfa97aa668afc29$var$Query(lang, '(paragraph (_) @para-marker)'); } function $6dfa97aa668afc29$var$mileStoneQuery(lang) { return new $6dfa97aa668afc29$var$Query(lang, `([ (milestoneTag) (milestoneStartTag) (milestoneEndTag) (zSpaceTag) ] @ms-name)`); } function $6dfa97aa668afc29$var$categoryQuery(lang) { return new $6dfa97aa668afc29$var$Query(lang, '((category) @category)'); } function $6dfa97aa668afc29$var$verseNumCapQuery(lang) { return new $6dfa97aa668afc29$var$Query(lang, `(v (verseNumber) @vnum (va (verseNumber) @alt)? (vp (text) @vp)? )`); } function $6dfa97aa668afc29$var$createQueriesAsNeeded(name, lang) { switch(name){ case 'chapter': return $6dfa97aa668afc29$var$getChapQuery(lang); case 'usjCaVa': return $6dfa97aa668afc29$var$usjCaVaquery(lang); case 'attribVal': return $6dfa97aa668afc29$var$attribValQuery(lang); case 'para': return $6dfa97aa668afc29$var$paraQuery(lang); case 'id': return $6dfa97aa668afc29$var$getIdQuery(lang); case 'milestone': return $6dfa97aa668afc29$var$mileStoneQuery(lang); case 'category': return $6dfa97aa668afc29$var$categoryQuery(lang); case 'verseNumCap': return $6dfa97aa668afc29$var$verseNumCapQuery(lang); default: break; } } $6dfa97aa668afc29$exports = { createQueriesAsNeeded: $6dfa97aa668afc29$var$createQueriesAsNeeded }; // exports.createQueriesAsNeeded = createQueriesAsNeeded; var $1bdca1468aa54399$require$createQueriesAsNeeded = $6dfa97aa668afc29$exports.createQueriesAsNeeded; class $1bdca1468aa54399$var$USJGenerator { constructor(treeSitterLanguageObj, usfmString, usjRootObj = null){ this.usfmLanguage = treeSitterLanguageObj; this.usfm = usfmString; this.jsonRootObj = usjRootObj || { type: 'USJ', version: '3.1', content: [] }; // Cache for the query objects this.queries = {}; // this would be nicer with TS types and not stringly typed, but this pattern creates queries as needed. And creating tree-sitter queries is nearly all the overhead (not single time travee traversal, and not node gerneration and allocation). So only create queries if they are actually neeeded. this.getQuery = (name)=>{ if (!this.queries[name]) this.queries[name] = this.createQuery(name); return this.queries[name]; }; this.createQuery = (name)=>$1bdca1468aa54399$require$createQueriesAsNeeded(name, this.usfmLanguage); // Make o(1) sets for marker lookups this.markerSets = $1bdca1468aa54399$require$MARKER_SETS; this.parseState = { bookSlug: null, currentChapter: null }; // maps and id to a fn; this.dispatchMap = this.populateDispatchMap(); } nodeToUSJId(node, parentJsonObj) { const idCaptures = this.getQuery('id').captures(node); // const idCaptures = this.queries.id.captures(node); let code = null; let desc = null; idCaptures.forEach((capture)=>{ if (capture.name === 'book-code') code = this.usfm.slice(capture.node.startIndex, capture.node.endIndex); else if (capture.name === 'desc') desc = this.usfm.slice(capture.node.startIndex, capture.node.endIndex); }); const bookJsonObj = { type: 'book', marker: 'id', code: code, content: [] }; this.parseState.bookSlug = code; if (desc && desc.trim() !== '') bookJsonObj.content.push(desc.trim()); parentJsonObj.content.push(bookJsonObj); } // Similar conversion methods for other node types nodeToUSJC(node, parentJsonObj) { // Build c, the chapter milestone node in usj const chapCap = this.getQuery('chapter').captures(node); // const chapCap = this.queries.chapter.captures(node); const chapNum = this.usfm.slice(chapCap[0].node.startIndex, chapCap[0].node.endIndex); const chapRef = `${this.parseState.bookSlug} ${chapNum}`; const chapJsonObj = { type: 'chapter', marker: 'c', number: chapNum, sid: chapRef }; this.parseState.currentChapter = chapNum; chapCap.forEach((cap)=>{ if (cap.name === 'alt-num') chapJsonObj.altnumber = this.usfm.substring(cap.node.startIndex, cap.node.endIndex).trim(); if (cap.name === 'pub-num') chapJsonObj.pubnumber = this.usfm.substring(cap.node.startIndex, cap.node.endIndex).trim(); }); parentJsonObj.content.push(chapJsonObj); node.children.forEach((child)=>{ if ([ 'cl', 'cd' ].includes(child.type)) this.nodeToUSJ(child, parentJsonObj); }); } nodeToUSJChapter(node, parentJsonObj) { // Build chapter node in USJ node.children.forEach((child)=>{ if (child.type === 'c') this.nodeToUSJC(child, parentJsonObj); else this.nodeToUSJ(child, parentJsonObj); }); } nodeToUSJVerse(node, parentJsonObj) { // Build verse node in USJ const verseNumCap = this.getQuery('verseNumCap').captures(node); // const verseNumCap = this.queries.verseNumCap.captures(node); const verseNum = this.usfm.substring(verseNumCap[0].node.startIndex, verseNumCap[0].node.endIndex); const vJsonObj = { type: 'verse', marker: 'v', number: verseNum.trim() }; verseNumCap.forEach((capture)=>{ if (capture.name === 'alt') { const altNum = this.usfm.slice(capture.node.startIndex, capture.node.endIndex); vJsonObj.altnumber = altNum; } else if (capture.name === 'vp') { const vpText = this.usfm.substring(capture.node.startIndex, capture.node.endIndex); vJsonObj.pubnumber = vpText; } }); const ref = `${this.parseState.bookSlug} ${this.parseState.currentChapter}:${verseNum}`; vJsonObj.sid = ref.trim(); parentJsonObj.content.push(vJsonObj); } nodeToUSJCaVa(node, parentJsonObj) { // Build elements for independent ca and va away from c and v const style = node.type; const charJsonObj = { type: 'char', marker: style }; const altNumMatch = this.getQuery('usjCaVa').captures(node); // const altNumMatch = this.queries.usjCaVa.captures(node); const altNum = this.usfm.slice(altNumMatch[0].node.startIndex, altNumMatch[0].node.endIndex).trim(); charJsonObj.altnumber = altNum; parentJsonObj.content.push(charJsonObj); } nodeToUSJPara(node, parentJsonObj) { // Build paragraph nodes in USJ if (node.children[0].type.endsWith('Block')) node.children[0].children.forEach((child)=>{ this.nodeToUSJPara(child, parentJsonObj); }); else if (node.type === 'paragraph') { const paraTagCap = this.getQuery('para').captures(node)[0]; // const paraTagCap = this.queries.para.captures(node)[0]; const paraMarker = paraTagCap.node.type; if (paraMarker === 'b') parentJsonObj.content.push({ type: 'para', marker: paraMarker }); else if (!paraMarker.endsWith('Block')) { const paraJsonObj = { type: 'para', marker: paraMarker, content: [] }; paraTagCap.node.children.forEach((child)=>{ this.nodeToUSJ(child, paraJsonObj); }); parentJsonObj.content.push(paraJsonObj); } } else if ([ 'pi', 'ph' ].includes(node.type)) { const paraMarker = this.usfm.substring(node.children[0].startIndex, node.children[0].endIndex).replace('\\', '').trim(); const paraJsonObj = { type: 'para', marker: paraMarker, content: [] }; node.children.slice(1).forEach((child)=>{ this.nodeToUSJ(child, paraJsonObj); }); parentJsonObj.content.push(paraJsonObj); } } nodeToUSJNotes(node, parentJsonObj) { // Build USJ nodes for footnotes and cross-references const tagNode = node.children[0]; const callerNode = node.children[1]; const style = this.usfm.substring(tagNode.startIndex, tagNode.endIndex).replace('\\', '').trim(); const noteJsonObj = { type: 'note', marker: style, content: [] }; noteJsonObj.caller = this.usfm.substring(callerNode.startIndex, callerNode.endIndex).trim(); for(let i = 2; i < node.children.length - 1; i++)this.nodeToUSJ(node.children[i], noteJsonObj); parentJsonObj.content.push(noteJsonObj); } nodeToUSJChar(node, parentJsonObj) { // Build USJ nodes for character markups, both regular and nested const tagNode = node.children[0]; let childrenRange = node.children.length; for(let i = node.children.length - 1; i > 0; i--)if (node.children[i].type.startsWith('\\') || node.children[i].type === '*' || node.children[i].type.endsWith('Tag')) childrenRange -= 1; const style = this.usfm.substring(tagNode.startIndex, tagNode.endIndex).replace('\\', '').replace('+', '').trim(); const charJsonObj = { type: 'char', marker: style, content: [] }; // Assume a flag for closed markup, toggle this if your conditions and data structure require // charJsonObj.closed = node.children[node.children.length - 1].type.startsWith('\\'); for(let i = 1; i < childrenRange; i++)this.nodeToUSJ(node.children[i], charJsonObj); parentJsonObj.content.push(charJsonObj); } nodeToUSJTable(node, parentJsonObj) { // Handle table related components and convert to USJ if (node.type === 'table') { const tableJsonObj = { type: 'table', content: [] }; node.children.forEach((child)=>{ this.nodeToUSJ(child, tableJsonObj); }); parentJsonObj.content.push(tableJsonObj); } else if (node.type === 'tr') { const rowJsonObj = { type: 'table:row', marker: 'tr', content: [] }; node.children.slice(1).forEach((child)=>{ this.nodeToUSJ(child, rowJsonObj); }); parentJsonObj.content.push(rowJsonObj); } else if (this.markerSets.TABLE_CELL_MARKERS.has(node.type)) { const tagNode = node.children[0]; const style = this.usfm.substring(tagNode.startIndex, tagNode.endIndex).replace('\\', '').trim(); const cellJsonObj = { type: 'table:cell', marker: style, content: [], align: style.includes('tcc') || style.includes('thc') ? 'center' : style.includes('r') ? 'end' : 'start' }; node.children.slice(1).forEach((child)=>{ this.nodeToUSJ(child, cellJsonObj); }); parentJsonObj.content.push(cellJsonObj); } } nodeToUSJAttrib(node, parentJsonObj) { // Add attribute values to USJ elements const attribNameNode = node.children[0]; let attribName = this.usfm.slice(attribNameNode.startIndex, attribNameNode.endIndex).trim(); // Handling special cases for attribute names if (attribName === '|') { let parentType = node.parent.type; if (parentType.includes('Nested')) parentType = parentType.replace('Nested', ''); attribName = $1bdca1468aa54399$require$DEFAULT_ATTRIB_MAP[parentType]; } if (attribName === 'src') // for \fig attribName = 'file'; const attribValCap = this.getQuery('attribVal').captures(node); // const attribValCap = this.queries.attribVal.captures(node); let attribValue = ''; if (attribValCap.length > 0) attribValue = this.usfm.substring(attribValCap[0].node.startIndex, attribValCap[0].node.endIndex).trim(); parentJsonObj[attribName] = attribValue; } nodeToUSJMilestone(node, parentJsonObj) { // Create ms node in USJ const msNameCap = this.getQuery('milestone').captures(node)[0]; // this.queries.milestone.captures(node)[0]; // const msNameCap = this.queries.milestone.captures(node)[0]; // slice, not substring. Hence not using util fxn extractAndCleanMarker const style = this.usfm.slice(msNameCap.node.startIndex, msNameCap.node.endIndex).replace('\\', '').trim(); const msJsonObj = { type: 'ms', marker: style, content: [] }; node.children.forEach((child)=>{ if (child.type.endsWith('Attribute')) this.nodeToUSJ(child, msJsonObj); }); // Though normally milestones don't have contents, custom z-namespaces could have them if (!msJsonObj.content.length) delete msJsonObj.content; // Remove empty content array if not used parentJsonObj.content.push(msJsonObj); } nodeToUSJSpecial(node, parentJsonObj) { // Build nodes for esb, cat, fig, optbreak in USJ if (node.type === 'esb') { const sidebarJsonObj = { type: 'sidebar', marker: 'esb', content: [] }; node.children.slice(1, -1).forEach((child)=>{ this.nodeToUSJ(child, sidebarJsonObj); }); parentJsonObj.content.push(sidebarJsonObj); } else if (node.type === 'cat') { const catCap = this.getQuery('category').captures(node)[0]; // const catCap = this.queries.category.captures(node)[0]; const category = this.usfm.substring(catCap.node.startIndex, catCap.node.endIndex).trim(); parentJsonObj.category = category; } else if (node.type === 'fig') { const figJsonObj = { type: 'figure', marker: 'fig', content: [] }; node.children.slice(1, -1).forEach((child)=>{ this.nodeToUSJ(child, figJsonObj); }); parentJsonObj.content.push(figJsonObj); } else if (node.type === 'ref') { const refJsonObj = { type: 'ref', content: [] }; node.children.slice(1, -1).forEach((child)=>{ this.nodeToUSJ(child, refJsonObj); }); parentJsonObj.content.push(refJsonObj); } } nodeToUSJGeneric(node, parentJsonObj) { // Build nodes for para style markers in USJ const tagNode = node.children[0]; let style = this.usfm.substring(tagNode.startIndex, tagNode.endIndex); if (style.startsWith('\\')) style = style.replace('\\', '').trim(); else style = node.type; let childrenRangeStart = 1; if (node.children.length > 1 && node.children[1].type.startsWith('numbered')) { const numNode = node.children[1]; const num = this.usfm.substring(numNode.startIndex, numNode.endIndex); style += num; childrenRangeStart = 2; } const paraJsonObj = { type: 'para', marker: style, content: [] }; parentJsonObj.content.push(paraJsonObj); for(let i = childrenRangeStart; i < node.children.length; i++){ const child = node.children[i]; if ([ this.markerSets.CHAR_STYLE_MARKERS, this.markerSets.NESTED_CHAR_STYLE_MARKERS, this.markerSets.OTHER_PARA_NESTABLES ].some((markerSet)=>markerSet.has(child.type))) // Only nest these types inside the upper para style node this.nodeToUSJ(child, paraJsonObj); else this.nodeToUSJ(child, parentJsonObj); } } pushTextNode(node, parentJsonObj) { const textVal = this.usfm.substring(node.startIndex, node.endIndex).replace('~', ' '); if (textVal !== '') parentJsonObj.content.push(textVal); } handleVerseText(node, parentJsonObj) { node.children.forEach((child)=>this.nodeToUSJ(child, parentJsonObj)); } populateDispatchMap() { const thisMap = new Map(); const thisClass = this; const bindToClass = (method)=>method.bind(thisClass); const addHandlers = (markers, handler)=>{ markers.forEach((marker)=>thisMap.set(marker, handler.bind(thisClass))); }; // Instead of at worst O(n) lookup time in switch statement, we can map marker to a handler and then at most O(1) lookup time with room for fallback on stuff like type ends with ATtributes: returned functions take the args of the handler thisMap.set('text', bindToClass(this.pushTextNode)); thisMap.set('verseText', bindToClass(this.handleVerseText)); thisMap.set('v', bindToClass(this.nodeToUSJVerse)); thisMap.set('id', this.nodeToUSJId.bind(this)); thisMap.set('chapter', this.nodeToUSJChapter.bind(this)); // nooop thisMap.set('usfm', ()=>{}); addHandlers([ 'paragraph', 'q', 'w' ], this.nodeToUSJPara); addHandlers([ 'cl', 'cp', 'vp' ], this.nodeToUSJGeneric); addHandlers([ 'ca', 'va' ], this.nodeToUSJCaVa); addHandlers([ 'table', 'tr' ], this.nodeToUSJTable); addHandlers([ 'milestone', 'zNameSpace' ], this.nodeToUSJMilestone); addHandlers([ 'esb', 'cat', 'fig', 'ref' ], this.nodeToUSJSpecial); addHandlers($1bdca1468aa54399$require$NOTE_MARKERS, this.nodeToUSJNotes); addHandlers([ $1bdca1468aa54399$require$CHAR_STYLE_MARKERS, $1bdca1468aa54399$require$NESTED_CHAR_STYLE_MARKERS, 'xt_standalone' ].flat(), this.nodeToUSJChar); // addHandlers(NESTED_CHAR_STYLE_MARKERS, this.nodeToUSJChar); // thisMap.set("xt_standalone", this.nodeToUSJChar.bind(this)); addHandlers($1bdca1468aa54399$require$TABLE_CELL_MARKERS, this.nodeToUSJTable); addHandlers($1bdca1468aa54399$require$PARA_STYLE_MARKERS.filter((m)=>m !== 'usfm'), this.nodeToUSJGeneric); return thisMap; } nodeToUSJ(node, parentJsonObj) { const nodeType = node.type?.replace('\\', ''); const handler = this.dispatchMap.get(nodeType); if (handler) { handler(node, parentJsonObj); return; } else { if (!nodeType) return; // some edge cases where we can't cleanly map to a marker: if (nodeType.endsWith('Attribute')) return this.nodeToUSJAttrib(node, parentJsonObj); if ([ '', '|' ].includes(node.type.trim())) // known noop; return; // Process children while discarding nodes that don't go into usj if (node.children.length > 0) node.children.forEach((child)=>this.nodeToUSJ(child, parentJsonObj)); } } } $1bdca1468aa54399$export$da1572eff96010ef = $1bdca1468aa54399$var$USJGenerator; var $236d01ba6f80be5e$require$USJGenerator = $1bdca1468aa54399$export$da1572eff96010ef; var $0143e94dfd2d689d$export$24715706bc524307; class $0143e94dfd2d689d$var$ListGenerator { /* Combines the methods used for List generation from USJ */ constructor(){ /* Variables shared by functions */ this.book = ''; this.currentChapter = ''; this.currentVerse = ''; this.list = [ [ 'Book', 'Chapter', 'Verse', 'Text', 'Type', 'Marker' ] ]; this.bibleNlpFormat = { 'text': [], 'vref': [] }; this.prevChapter = ''; this.prevVerse = ''; } usjToListId(obj) { /* Update book code */ this.book = obj.code; } usjToListC(obj) { /* Update current chapter */ this.currentChapter = obj.number; this.currentVerse = ''; } usjToListV(obj) { /* Update current verse */ this.currentVerse = obj.number; } usjToList(obj, excludeMarkers = null, includeMarkers = null) { /* Traverse the USJ dict and build the table in this.list */ if (obj.type === 'book') { this.usjToListId(obj); if (excludeMarkers && excludeMarkers.includes('id') || includeMarkers && !includeMarkers.includes('id')) return; } else if (obj.type === 'chapter') this.usjToListC(obj); else if (obj.type === 'verse') this.usjToListV(obj); let markerType = obj.type; const markerName = obj.marker ? obj.marker : ''; if (markerType === 'USJ') // This would occur if the JSON got flattened after removing paragraph markers markerType = ''; if (obj.content && obj.content.length > 0) { for (let item of obj.content)if (typeof item === 'string') { if (excludeMarkers && excludeMarkers.includes('text')) item = ''; this.list.push([ this.book, this.currentChapter, this.currentVerse, item, markerType, markerName ]); } else this.usjToList(item, excludeMarkers, includeMarkers); } else if (!excludeMarkers && !includeMarkers || excludeMarkers && !excludeMarkers.includes(markerName) || includeMarkers && includeMarkers.includes(markerName)) this.list.push([ this.book, this.currentChapter, this.currentVerse, '', markerType, markerName ]); } usjToBibleNlpFormat(obj) { // Traverse the USJ object and build a dictionary for Bible NLP format if (obj.type === 'book') this.usjToListId(obj); else if (obj.type === 'chapter') this.usjToListC(obj); else if (obj.type === 'verse') this.usjToListV(obj); else if (obj.content) { for (const item of obj.content)if (typeof item === 'string') { if (this.currentChapter === this.prevChapter && this.currentVerse === this.prevVerse) this.bibleNlpFormat.text[this.bibleNlpFormat.text.length - 1] += ` ${item.replace(/[\n\r]/g, ' ').trim()}`; else { const vref = `${this.book} ${this.currentChapter}:${this.currentVerse}`; this.bibleNlpFormat.text.push(item.replace(/[\n\r]/g, ' ').trim()); this.bibleNlpFormat.vref.push(vref); this.prevChapter = this.currentChapter; this.prevVerse = this.currentVerse; } } else this.usjToBibleNlpFormat(item); } } } $0143e94dfd2d689d$export$24715706bc524307 = $0143e94dfd2d689d$var$ListGenerator; var $236d01ba6f80be5e$require$ListGenerator = $0143e94dfd2d689d$export$24715706bc524307; //Logics for syntax-tree to xml(USX) conversions var $02bc2da10d8b76e8$export$69d2127c7776f273; var $02bc2da10d8b76e8$require$DOMImplementation = $9wIQi$DOMImplementation; var $02bc2da10d8b76e8$require$PARA_STYLE_MARKERS = $28b0061e56e1edf2$export$3ea3efefd6f5792b; var $02bc2da10d8b76e8$require$NOTE_MARKERS = $28b0061e56e1edf2$export$bfe3d604e5046dbb; var $02bc2da10d8b76e8$require$CHAR_STYLE_MARKERS = $28b0061e56e1edf2$export$32d1909bfb943eb0; var $02bc2da10d8b76e8$require$NESTED_CHAR_STYLE_MARKERS = $28b0061e56e1edf2$export$ec3044778745fabd; var $02bc2da10d8b76e8$require$DEFAULT_ATTRIB_MAP = $28b0061e56e1edf2$export$38bc6f52843beb2a; var $02bc2da10d8b76e8$require$TABLE_CELL_MARKERS = $28b0061e56e1edf2$export$af42bd8f70df8555; var $02bc2da10d8b76e8$require$MARKER_SETS = $28b0061e56e1edf2$export$d19275a111c0e9e6; var $02bc2da10d8b76e8$require$createQueriesAsNeeded = $6dfa97aa668afc29$exports.createQueriesAsNeeded; class $02bc2da10d8b76e8$var$USXGenerator { /** * A binding for all methods used in generating USX from Syntax tree * @param {object} treeSitterLanguageObj - The Tree-sitter language object * @param {Buffer} usfmString - The USFM byte data * @param {Element} [usxRootElement] - The root element of the USX (optional) */ constructor(treeSitterLanguageObj, usfmString, usxRootElement = null){ this.usfmLanguage = treeSitterLanguageObj; this.usfm = usfmString; const domImpl = new $02bc2da10d8b76e8$require$DOMImplementation(); const doc = domImpl.createDocument(null, 'usx', null); if (usxRootElement === null) { this.xmlRootNode = doc.documentElement; this.xmlRootNode.setAttribute('version', '3.1'); } else this.xmlRootNode = usxRootElement; // Cache for the query objects this.queries = {}; this.getQuery = (name)=>{ if (!this.queries[name]) this.queries[name] = this.createQuery(name); return this.queries[name]; }; this.createQuery = (name)=>$02bc2da10d8b76e8$require$createQueriesAsNeeded(name, this.usfmLanguage); this.markerSets = $02bc2da10d8b76e8$require$MARKER_SETS; this.parseState = { bookSlug: null, currentChapter: null, prevVerseSid: null, prevChapterSid: null, prevVerse: null }; // maps and id to a fn; this.dispatchMap = this.populateDispatchMap(); } populateDispatchMap() { const thisMap = new Map(); const thisClass = this; const bindToClass = (method)=>method.bind(thisClass); const addHandlers = (markers, handler)=>{ markers.forEach((marker)=>thisMap.set(marker, handler.bind(thisClass))); }; // Instead of at worst O(n) lookup time in switch statement, we can map marker to a handler and then at most O(1) lookup time with room for fallback on stuff like type ends with ATtributes: returned functions take the args of the handler thisMap.set('text', bindToClass(this.pushTextNode)); thisMap.set('verseText', bindToClass(this.handleVerseText)); thisMap.set('v', bindToClass(this.node2UsxVerse)); thisMap.set('id', this.node2UsxId.bind(this)); thisMap.set('chapter', this.node2UsxChapter.bind(this)); // nooop thisMap.set('usfm', ()=>{}); addHandlers([ 'paragraph', 'q', 'w' ], this.node2UsxPara); addHandlers([ 'cl', 'cl', 'cp', 'vp' ], this.node2UsxGeneric); addHandlers([ 'ca', 'va' ], this.node2UsxCaVa); addHandlers([ 'table', 'tr' ], this.node2UsxTable); addHandlers([ 'milestone', 'zNameSpace' ], this.node2UsxMilestone); addHandlers([ 'esb', 'cat', 'fig', 'ref' ], this.node2UsxSpecial); addHandlers($02bc2da10d8b76e8$require$NOTE_MARKERS, this.node2UsxNotes); addHandlers([ $02bc2da10d8b76e8$require$CHAR_STYLE_MARKERS, $02bc2da10d8b76e8$require$NESTED_CHAR_STYLE_MARKERS, 'xt_standalone' ].flat(), this.node2UsxChar); // addHandlers(NESTED_CHAR_STYLE_MARKERS, this.node2UsxChar); // thisMap.set("xt_standalone", this.node2UsxChar.bind(this)); addHandlers($02bc2da10d8b76e8$require$TABLE_CELL_MARKERS, this.node2UsxTable); addHandlers($02bc2da10d8b76e8$require$PARA_STYLE_MARKERS.filter((m)=>m !== 'usfm'), this.node2UsxGeneric); return thisMap; } /** * Builds the ID node in USX * @param {SyntaxNode} node - The syntax node * @param {Element} parentXmlNode - The parent XML node to append the ID to */ node2UsxId(node, parentXmlNode) { const idCaptures = this.getQuery('id').captures(node); let code = null; let desc = null; idCaptures.forEach((capture)=>{ if (capture.name === 'book-code') code = this.usfm.slice(capture.node.startIndex, capture.node.endIndex); else if (capture.name === 'desc') desc = this.usfm.slice(capture.node.startIndex, capture.node.endIndex); }); const bookXmlNode = parentXmlNode.ownerDocument.createElement('book'); bookXmlNode.setAttribute('code', code); bookXmlNode.setAttribute('style', 'id'); this.parseState.bookSlug = code; if (desc && desc.trim() !== '') { const textNode = parentXmlNode.ownerDocument.createTextNode(desc.trim()); bookXmlNode.appendChild(textNode); } parentXmlNode.appendChild(bookXmlNode); } node2UsxC(node, parentXmlNode) { // Build c, the chapter milestone node in usj const chapCap = this.getQuery('chapter').captures(node); const chapNum = this.usfm.slice(chapCap[0].node.startIndex, chapCap[0].node.endIndex); // const bookNode = xpath.select1("book", parentXmlNode); const bookCode = this.parseState.bookSlug; const chapRef = `${bookCode} ${chapNum}`; this.parseState.prevChapterSid = chapRef; // Create the 'chapter' element const chapXmlNode = parentXmlNode.ownerDocument.createElement('chapter'); chapXmlNode.setAttribute('number', chapNum); chapXmlNode.setAttribute('style', 'c'); chapXmlNode.setAttribute('sid', chapRef); this.parseState.currentChapter = chapNum; chapCap.forEach((cap)=>{ if (cap.name === 'alt-num') { const altNum = this.usfm.substring(cap.node.startIndex, cap.node.endIndex).trim(); chapXmlNode.setAttribute('altnumber', altNum); } if (cap.name === 'pub-num') { const pubNum = this.usfm.substring(cap.node.startIndex, cap.node.endIndex).trim(); chapXmlNode.setAttribute('pubnumber', pubNum); } }); parentXmlNode.appendChild(chapXmlNode); node.children.forEach((child)=>{ if ([ 'cl', 'cd' ].includes(child.type)) this.node2Usx(child, parentXmlNode); }); } handleVerseText(node, parentXmlNode) { node.children.forEach((child)=>this.node2Usx(child, parentXmlNode)); this.parseState.prevVerseParent = parentXmlNode; } node2UsxChapter(node, parentXmlNode) { // Build chapter node in USJ node.children.forEach((child)=>{ if (child.type === 'c') this.node2UsxC(child, parentXmlNode); else this.node2Usx(child, parentXmlNode); }); // const prevVerses = xpath.select("//verse", this.xmlRootNode); // chapter means we need both closing verse and closing chapter eids const lastVerse = this.parseState.prevVerse; if (lastVerse && !lastVerse.getAttribute('eid')) { const vEndXmlNode = parentXmlNode.ownerDocument.createElement('verse'); vEndXmlNode.setAttribute('eid', this.parseState.prevVerseSid); this.parseState.prevVerseSid = null; this.parseState.prevVerse = null; const sibblingCount = parentXmlNode.childNodes.length; const lastSibbling = parentXmlNode.childNodes[sibblingCount - 1]; if (lastSibbling.tagName === 'para') lastSibbling.appendChild(vEndXmlNode); else if (lastSibbling.tagName === 'table') { const rows = lastSibbling.getElementsByTagName('row'); rows[rows.length - 1].appendChild(vEndXmlNode); } else parentXmlNode.appendChild(vEndXmlNode); } const cEndXmlNode = parentXmlNode.ownerDocument.createElement('chapter'); cEndXmlNode.setAttribute('eid', this.parseState.prevChapterSid); this.parseState.prevChapterSid = null; parentXmlNode.appendChild(cEndXmlNode); } findPrevUncle(parentXmlNode) { // Get the grandparent node const grandParent = parentXmlNode.parentNode; let uncleIndex = grandParent.childNodes.length - 2; // Start from the previous sibling while(uncleIndex >= 0){ const uncle = grandParent.childNodes[uncleIndex]; // Skip 'sidebar' and 'ms' elements if (uncle.tagName === 'sidebar' || uncle.tagName === 'ms') uncleIndex--;