usfm-grammar
Version:
Uses the tree-sitter-usfm3 parser to convert USFM files to other formats such as USJ, USX, and CSV, and converts them back to USFM
1,181 lines (1,160 loc) • 776 kB
JavaScript
var $a9A1a$treesitter = require("tree-sitter");
var $a9A1a$assert = require("assert");
var $a9A1a$treesitterusfm3 = require("tree-sitter-usfm3");
var $a9A1a$xmldom = require("xmldom");
var $a9A1a$xpath = require("xpath");
var $a9A1a$nodefs = require("node:fs");
var $a9A1a$ajv = require("ajv");
function $parcel$export(e, n, v, s) {
Object.defineProperty(e, n, {get: v, set: s, enumerable: true, configurable: true});
}
$parcel$export(module.exports, "USFMParser", () => $4fa36e821943b400$export$db89aa78daab09ca, (v) => $4fa36e821943b400$export$db89aa78daab09ca = v);
$parcel$export(module.exports, "Filter", () => $4fa36e821943b400$export$ec91da630f36d5ea, (v) => $4fa36e821943b400$export$ec91da630f36d5ea = v);
$parcel$export(module.exports, "Validator", () => $4fa36e821943b400$export$9eeb22c0bba4ed5e, (v) => $4fa36e821943b400$export$9eeb22c0bba4ed5e = v);
$parcel$export(module.exports, "ORIGINAL_VREF", () => $4fa36e821943b400$export$a324ce9cedb2444c, (v) => $4fa36e821943b400$export$a324ce9cedb2444c = v);
var $4fa36e821943b400$export$db89aa78daab09ca;
var $4fa36e821943b400$export$ec91da630f36d5ea;
// exports.Format = Format;
var $4fa36e821943b400$export$9eeb22c0bba4ed5e;
var $4fa36e821943b400$export$a324ce9cedb2444c;
var $be3194b3d68e926d$export$db89aa78daab09ca;
var $be3194b3d68e926d$export$ec91da630f36d5ea;
var $be3194b3d68e926d$export$a324ce9cedb2444c// exports.Format = Format;
;
var $b37353ce824c37f3$export$69486ebd11f334d1;
var $0783021dea42787b$export$b32929182cc4fefe;
var $0783021dea42787b$export$75b2f422ff60acaf;
var $0783021dea42787b$export$aa24f570494ce8d0;
var $0783021dea42787b$export$1790a88fa433c8d6;
var $0783021dea42787b$export$c63c0b6e358f0774;
var $0783021dea42787b$export$1187115108c84d45;
$0783021dea42787b$export$b32929182cc4fefe = [
"USJ",
"table"
];
$0783021dea42787b$export$75b2f422ff60acaf = [
"char",
"note",
"figure",
"ref"
];
$0783021dea42787b$export$aa24f570494ce8d0 = [
"type",
"marker",
"content",
"number",
"sid",
"code",
"caller",
"align",
"version",
"altnumber",
"pubnumber",
"category"
];
$0783021dea42787b$export$1790a88fa433c8d6 = [
"style",
"number",
"sid",
"code",
"caller",
"align",
"version",
"altnumber",
"pubnumber",
"category"
];
$0783021dea42787b$export$c63c0b6e358f0774 = [
"char",
"note",
"verse",
"table:cell"
];
$0783021dea42787b$export$1187115108c84d45 = [
"char",
"note",
"verse",
"cell"
];
var $b37353ce824c37f3$require$NO_USFM_USJ_TYPES = $0783021dea42787b$export$b32929182cc4fefe;
var $b37353ce824c37f3$require$CLOSING_USJ_TYPES = $0783021dea42787b$export$75b2f422ff60acaf;
var $b37353ce824c37f3$require$NON_ATTRIB_USJ_KEYS = $0783021dea42787b$export$aa24f570494ce8d0;
var $b37353ce824c37f3$require$NO_NEWLINE_USJ_TYPES = $0783021dea42787b$export$c63c0b6e358f0774;
var $b37353ce824c37f3$require$NON_ATTRIB_USX_KEYS = $0783021dea42787b$export$1790a88fa433c8d6;
var $b37353ce824c37f3$require$NO_NEWLINE_USX_TYPES = $0783021dea42787b$export$1187115108c84d45;
class $b37353ce824c37f3$var$USFMGenerator {
constructor(){
this.usfmString = "";
this.warnings = [];
}
usjToUsfm(usjObj, nested = false) {
if (usjObj.type === 'optbreak') {
if (this.usfmString !== '' && ![
'\n',
'\r',
' ',
'\t'
].includes(this.usfmString.slice(-1))) this.usfmString += ' ';
this.usfmString += '// ';
return;
}
if (usjObj.type === "ref") usjObj.marker = "ref";
if (!$b37353ce824c37f3$require$NO_USFM_USJ_TYPES.includes(usjObj.type)) {
this.usfmString += "\\";
if (nested && usjObj.type === "char") this.usfmString += "+";
this.usfmString += `${usjObj.marker} `;
}
[
"code",
"number",
"caller"
].forEach((key)=>{
if (usjObj[key]) this.usfmString += `${usjObj[key]} `;
});
if (usjObj.category) this.usfmString += `\\cat ${usjObj.category}\\cat*\n`;
if (usjObj.altnumber) {
if (usjObj.marker === "c") this.usfmString += `\\ca ${usjObj.altnumber} \\ca*\n`;
else if (usjObj.marker === "v") this.usfmString += `\\va ${usjObj.altnumber} \\va* `;
}
if (usjObj.pubnumber) {
if (usjObj.marker === "c") this.usfmString += `\\cp ${usjObj.pubnumber}\n`;
else if (usjObj.marker === "v") this.usfmString += `\\vp ${usjObj.pubnumber} \\vp* `;
}
if (Array.isArray(usjObj.content)) usjObj.content.forEach((item)=>{
if (typeof item === "string") this.usfmString += item;
else this.usjToUsfm(item, usjObj.type === "char" && item.marker !== "fv");
});
let attributes = [];
Object.keys(usjObj).forEach((key)=>{
if (!$b37353ce824c37f3$require$NON_ATTRIB_USJ_KEYS.includes(key)) {
let lhs = key;
if (key === "file") lhs = "src";
attributes.push(`${lhs}="${usjObj[key]}"`);
}
});
if (attributes.length > 0) this.usfmString += `|${attributes.join(" ")}`;
if ($b37353ce824c37f3$require$CLOSING_USJ_TYPES.includes(usjObj.type)) {
this.usfmString += `\\`;
if (nested && usjObj.type === "char") this.usfmString += "+";
this.usfmString += `${usjObj.marker}* `;
}
if (usjObj.type === "ms") {
if ("sid" in usjObj) {
if (attributes.length == 0) this.usfmString += '|';
this.usfmString += `sid="${usjObj.sid}" `;
}
this.usfmString = this.usfmString.trim() + "\\*";
}
if (usjObj.type === "sidebar") this.usfmString += "\\esbe";
if (!$b37353ce824c37f3$require$NO_NEWLINE_USJ_TYPES.includes(usjObj.type) && this.usfmString[this.usfmString.length - 1] !== "\n") this.usfmString += "\n";
return this.usfmString;
}
usxToUsfm(xmlObj, nested = false) {
// Check if xmlObj is a string
// if (typeof xmlObj === 'string') {
// // this.usfmString += xmlObj;
// return;
// }
const objType = xmlObj.tagName;
let marker = null;
let usfmAttributes = [];
if ([
'verse',
'chapter'
].includes(objType) && xmlObj.hasAttribute('eid')) return;
if (!$b37353ce824c37f3$require$NO_NEWLINE_USX_TYPES.includes(objType)) this.usfmString += '\n';
if (objType === 'optbreak') {
if (this.usfmString !== '' && ![
'\n',
'\r',
' ',
'\t'
].includes(this.usfmString.slice(-1))) this.usfmString += ' ';
this.usfmString += '// ';
}
if (xmlObj.hasAttribute('style')) {
marker = xmlObj.getAttribute('style');
if (nested && objType === 'char' && ![
'xt',
'fv',
'ref'
].includes(marker)) marker = `+${marker}`;
this.usfmString += `\\${marker} `;
} else if (objType === 'ref') {
marker = 'ref';
this.usfmString += `\\${marker} `;
}
if (xmlObj.hasAttribute('code')) this.usfmString += xmlObj.getAttribute('code');
if (xmlObj.hasAttribute('number')) this.usfmString += `${xmlObj.getAttribute('number')} `;
if (xmlObj.hasAttribute('caller')) this.usfmString += `${xmlObj.getAttribute('caller')} `;
if (xmlObj.hasAttribute('altnumber')) {
if (objType === 'verse') this.usfmString += `\\va ${xmlObj.getAttribute('altnumber')}\\va*`;
else if (objType === 'chapter') this.usfmString += `\n\\ca ${xmlObj.getAttribute('altnumber')}\\ca*`;
}
if (xmlObj.hasAttribute('pubnumber')) {
if (objType === 'verse') this.usfmString += `\\vp ${xmlObj.getAttribute('pubnumber')}\\vp*`;
else if (objType === 'chapter') this.usfmString += `\n\\cp ${xmlObj.getAttribute('pubnumber')}`;
}
if (xmlObj.hasAttribute('category')) this.usfmString += `\n\\cat ${xmlObj.getAttribute('category')} \\cat*`;
const children = Array.from(xmlObj.childNodes);
for (const child of children){
if (child.nodeType === 1) {
if (objType === 'char') this.usxToUsfm(child, true);
else this.usxToUsfm(child, false);
}
if (child.nodeType === 3 && child.nodeValue.trim()) {
if (this.usfmString !== '' && ![
'\n',
'\r',
' ',
'\t'
].includes(this.usfmString.slice(-1))) this.usfmString += ' ';
this.usfmString += child.nodeValue.trim();
}
}
const attributes = Array.from(xmlObj.attributes);
for (const attrNode of attributes){
let key = attrNode.name;
let val = attrNode.value.replace(/"/g, '');
if (key === 'file' && objType === 'figure') usfmAttributes.push(`src="${val}"`);
else if (!$b37353ce824c37f3$require$NON_ATTRIB_USX_KEYS.includes(key)) usfmAttributes.push(`${key}="${val}"`);
if ([
'sid',
'eid'
].includes(key) && objType === 'ms') usfmAttributes.push(`${key}="${val}"`);
}
if (usfmAttributes.length > 0) {
this.usfmString += '|';
this.usfmString += usfmAttributes.join(' ');
}
if (xmlObj.hasAttribute('closed') && xmlObj.getAttribute('closed') === 'true' || $b37353ce824c37f3$require$CLOSING_USJ_TYPES.includes(objType) || usfmAttributes.length > 0) {
if (objType === 'ms') this.usfmString += '\\*';
else this.usfmString += `\\${marker}*`;
}
if (objType === 'sidebar') this.usfmString += '\n\\esbe\n';
}
bibleNlptoUsfm(bibleNlpObj) {
const vrefPattern = /([a-zA-Z0-9]{3}) (\d+):(.*)/;
let currBook = null;
let currChapter = null;
for(let i = 0; i < bibleNlpObj.vref.length; i++){
const vref = bibleNlpObj.vref[i];
const verseText = bibleNlpObj.text[i];
const refMatch = vref.match(vrefPattern);
if (!refMatch) throw new Error(`Incorrect format: ${vref}.\nIn BibleNlp, vref should have ` + `three-letter book code, chapter, and verse in the following format: GEN 1:1`);
const book = refMatch[1].toUpperCase();
const chap = refMatch[2];
const verse = refMatch[3];
if (book !== currBook) {
if (currBook !== null) {
this.warnings.push(`USFM can contain only one book per file. ` + `Only ${currBook} is processed. Specify bookCode for other books.`);
break;
}
this.usfmString += `\\id ${book}`;
currBook = book;
}
if (chap !== currChapter) {
this.usfmString += `\n\\c ${chap}\n\\p\n`;
currChapter = chap;
}
if (!this.usfmString.endsWith("\n")) this.usfmString += ' ';
this.usfmString += `\\v ${verse} ${verseText}`;
}
}
}
$b37353ce824c37f3$export$69486ebd11f334d1 = $b37353ce824c37f3$var$USFMGenerator;
var $be3194b3d68e926d$require$USFMGenerator = $b37353ce824c37f3$export$69486ebd11f334d1;
//Logics for syntax-tree to dict(USJ) conversions
var $bc15495701a08748$export$da1572eff96010ef;
const { Query: $bc15495701a08748$var$Query } = $a9A1a$treesitter;
var $48a7bc78362df58a$export$3ea3efefd6f5792b;
var $48a7bc78362df58a$export$bfe3d604e5046dbb;
var $48a7bc78362df58a$export$32d1909bfb943eb0;
var $48a7bc78362df58a$export$ec3044778745fabd;
var $48a7bc78362df58a$export$38bc6f52843beb2a;
var $48a7bc78362df58a$export$af42bd8f70df8555;
var $48a7bc78362df58a$export$c4d2f24f22330b2a;
const $48a7bc78362df58a$var$CHAR_STYLE_MARKERS = [
"add",
"bk",
"dc",
"ior",
"iqt",
"k",
"litl",
"nd",
"ord",
"pn",
"png",
"qac",
"qs",
"qt",
"rq",
"sig",
"sls",
"tl",
"wj",
"em",
"bd",
"bdit",
"it",
"no",
"sc",
"sup",
"rb",
"pro",
"w",
"wh",
"wa",
"wg",
"lik",
"liv",
"jmp",
"fr",
"ft",
"fk",
"fq",
"fqa",
"fl",
"fw",
"fp",
"fv",
"fdc",
"xo",
"xop",
"xt",
"xta",
"xk",
"xq",
"xot",
"xnt",
"xdc"
];
$48a7bc78362df58a$export$3ea3efefd6f5792b = [
"ide",
"usfm",
"h",
"toc",
"toca",
"imt",
"is",
"ip",
"ipi",
"im",
"imi",
"ipq",
"imq",
"ipr",
"iq",
"ib",
"ili",
"iot",
"io",
"iex",
"imte",
"ie",
"mt",
"mte",
"cl",
"cd",
"ms",
"mr",
"s",
"sr",
"r",
"d",
"sp",
"sd",
"q",
"qr",
"qc",
"qa",
"qm",
"qd",
"lh",
"li",
"lf",
"lim",
"litl",
"sts",
"rem",
"lit",
"restore"
];
$48a7bc78362df58a$export$bfe3d604e5046dbb = [
"f",
"fe",
"ef",
"efe",
"x",
"ex"
];
$48a7bc78362df58a$export$32d1909bfb943eb0 = $48a7bc78362df58a$var$CHAR_STYLE_MARKERS;
$48a7bc78362df58a$export$ec3044778745fabd = $48a7bc78362df58a$var$CHAR_STYLE_MARKERS.map((item)=>item + "Nested");
$48a7bc78362df58a$export$38bc6f52843beb2a = {
w: "lemma",
rb: "gloss",
xt: "href",
fig: "alt",
xt_standalone: "href",
xtNested: "href",
ref: "loc",
"milestone": "who",
"k": "key"
};
$48a7bc78362df58a$export$af42bd8f70df8555 = [
"tc",
"th",
"tcr",
"thr",
"tcc"
];
$48a7bc78362df58a$export$c4d2f24f22330b2a = [
"fig",
"cat",
"esb",
"b",
"ph",
"pi"
];
var $bc15495701a08748$require$PARA_STYLE_MARKERS = $48a7bc78362df58a$export$3ea3efefd6f5792b;
var $bc15495701a08748$require$NOTE_MARKERS = $48a7bc78362df58a$export$bfe3d604e5046dbb;
var $bc15495701a08748$require$CHAR_STYLE_MARKERS = $48a7bc78362df58a$export$32d1909bfb943eb0;
var $bc15495701a08748$require$NESTED_CHAR_STYLE_MARKERS = $48a7bc78362df58a$export$ec3044778745fabd;
var $bc15495701a08748$require$DEFAULT_ATTRIB_MAP = $48a7bc78362df58a$export$38bc6f52843beb2a;
var $bc15495701a08748$require$TABLE_CELL_MARKERS = $48a7bc78362df58a$export$af42bd8f70df8555;
var $bc15495701a08748$require$MISC_MARKERS = $48a7bc78362df58a$export$c4d2f24f22330b2a;
class $bc15495701a08748$var$USJGenerator {
constructor(treeSitterLanguageObj, usfmString, usjRootObj = null){
this.usfmLanguage = treeSitterLanguageObj;
this.usfm = usfmString;
this.jsonRootObj = usjRootObj || {
type: "USJ",
version: "3.1",
content: []
};
}
findLastFromJson(jsonObj, typeValue) {
let output = null;
if (typeValue === jsonObj.type || jsonObj.marker && typeValue === jsonObj.marker) output = jsonObj;
if (jsonObj.content) jsonObj.content.forEach((child)=>{
if (typeof child === "string") return;
const childOutput = this.findLastFromJson(child, typeValue);
if (childOutput !== null) output = childOutput;
});
return output;
}
nodeToUSJId(node, parentJsonObj) {
const idCaptures = new $bc15495701a08748$var$Query(this.usfmLanguage, "(id (bookcode) @book-code (description)? @desc)").captures(node);
let code = null;
let desc = null;
idCaptures.forEach((capture)=>{
if (capture.name === "book-code") code = this.usfm.slice(capture.node.startIndex, capture.node.endIndex);
else if (capture.name === "desc") desc = this.usfm.slice(capture.node.startIndex, capture.node.endIndex);
});
const bookJsonObj = {
type: "book",
marker: "id",
code: code,
content: []
};
if (desc && desc.trim() !== "") bookJsonObj.content.push(desc.trim());
parentJsonObj.content.push(bookJsonObj);
}
// Similar conversion methods for other node types
nodeToUSJC(node, parentJsonObj) {
// Build c, the chapter milestone node in usj
const chapCap = new $bc15495701a08748$var$Query(this.usfmLanguage, `(c (chapterNumber) @chap-num
(ca (chapterNumber) @alt-num)?
(cp (text) @pub-num)?)`).captures(node);
const chapNum = this.usfm.slice(chapCap[0].node.startIndex, chapCap[0].node.endIndex);
let chapRef = null;
this.jsonRootObj.content.forEach((child)=>{
if (child.type === "book") {
chapRef = `${child.code} ${chapNum}`;
return;
}
});
const chapJsonObj = {
type: "chapter",
marker: "c",
number: chapNum,
sid: chapRef
};
chapCap.forEach((cap)=>{
if (cap.name === "alt-num") chapJsonObj.altnumber = this.usfm.substring(cap.node.startIndex, cap.node.endIndex).trim();
if (cap.name === "pub-num") chapJsonObj.pubnumber = this.usfm.substring(cap.node.startIndex, cap.node.endIndex).trim();
});
parentJsonObj.content.push(chapJsonObj);
node.children.forEach((child)=>{
if ([
"cl",
"cd"
].includes(child.type)) this.nodeToUSJ(child, parentJsonObj);
});
}
nodeToUSJChapter(node, parentJsonObj) {
// Build chapter node in USJ
node.children.forEach((child)=>{
if (child.type === "c") this.nodeToUSJC(child, parentJsonObj);
else this.nodeToUSJ(child, parentJsonObj);
});
}
nodeToUSJVerse(node, parentJsonObj) {
// Build verse node in USJ
const verseNumCap = new $bc15495701a08748$var$Query(this.usfmLanguage, `
(v
(verseNumber) @vnum
(va (verseNumber) @alt)?
(vp (text) @vp)?
)`).captures(node);
const verseNum = this.usfm.substring(verseNumCap[0].node.startIndex, verseNumCap[0].node.endIndex);
const vJsonObj = {
type: "verse",
marker: "v",
number: verseNum.trim()
};
verseNumCap.forEach((capture)=>{
if (capture.name === "alt") {
const altNum = this.usfm.slice(capture.node.startIndex, capture.node.endIndex);
vJsonObj.altnumber = altNum;
} else if (capture.name === "vp") {
const vpText = this.usfm.substring(capture.node.startIndex, capture.node.endIndex);
vJsonObj.pubnumber = vpText;
}
});
const ref = `${this.findLastFromJson(this.jsonRootObj, "chapter").sid}:${verseNum}`;
vJsonObj.sid = ref.trim();
parentJsonObj.content.push(vJsonObj);
}
nodeToUSJCaVa(node, parentJsonObj) {
// Build elements for independent ca and va away from c and v
const style = node.type;
const charJsonObj = {
type: "char",
marker: style
};
const altNumMatch = new $bc15495701a08748$var$Query(this.usfmLanguage, `([
(chapterNumber)
(verseNumber)
] @alt-num)`).captures(node);
const altNum = this.usfm.slice(altNumMatch[0].node.startIndex, altNumMatch[0].node.endIndex).trim();
charJsonObj.altnumber = altNum;
parentJsonObj.content.push(charJsonObj);
}
nodeToUSJPara(node, parentJsonObj) {
// Build paragraph nodes in USJ
if (node.children[0].type.endsWith("Block")) node.children[0].children.forEach((child)=>{
this.nodeToUSJPara(child, parentJsonObj);
});
else if (node.type === "paragraph") {
const paraTagCap = new $bc15495701a08748$var$Query(this.usfmLanguage, "(paragraph (_) @para-marker)").captures(node)[0];
const paraMarker = paraTagCap.node.type;
let paraJsonObj = null;
if (paraMarker === "b") parentJsonObj.content.push({
type: "para",
marker: paraMarker
});
else if (!paraMarker.endsWith("Block")) {
let paraJsonObj = {
type: "para",
marker: paraMarker,
content: []
};
paraTagCap.node.children.forEach((child)=>{
this.nodeToUSJ(child, paraJsonObj);
});
parentJsonObj.content.push(paraJsonObj);
}
} else if ([
"pi",
"ph"
].includes(node.type)) {
const paraMarker = this.usfm.substring(node.children[0].startIndex, node.children[0].endIndex).replace("\\", "").trim();
let paraJsonObj = {
type: "para",
marker: paraMarker,
content: []
};
node.children.slice(1).forEach((child)=>{
this.nodeToUSJ(child, paraJsonObj);
});
parentJsonObj.content.push(paraJsonObj);
}
}
nodeToUSJNotes(node, parentJsonObj) {
// Build USJ nodes for footnotes and cross-references
const tagNode = node.children[0];
const callerNode = node.children[1];
const style = this.usfm.substring(tagNode.startIndex, tagNode.endIndex).replace("\\", "").trim();
const noteJsonObj = {
type: "note",
marker: style,
content: []
};
noteJsonObj.caller = this.usfm.substring(callerNode.startIndex, callerNode.endIndex).trim();
for(let i = 2; i < node.children.length - 1; i++)this.nodeToUSJ(node.children[i], noteJsonObj);
parentJsonObj.content.push(noteJsonObj);
}
nodeToUSJChar(node, parentJsonObj) {
// Build USJ nodes for character markups, both regular and nested
const tagNode = node.children[0];
let childrenRange = node.children.length;
if (node.children[node.children.length - 1].type.startsWith("\\")) childrenRange -= 1; // Exclude the last node if it starts with '\', treating it as a closing node
const style = this.usfm.substring(tagNode.startIndex, tagNode.endIndex).replace("\\", "").replace("+", "").trim();
const charJsonObj = {
type: "char",
marker: style,
content: []
};
// Assume a flag for closed markup, toggle this if your conditions and data structure require
// charJsonObj.closed = node.children[node.children.length - 1].type.startsWith('\\');
for(let i = 1; i < childrenRange; i++)this.nodeToUSJ(node.children[i], charJsonObj);
parentJsonObj.content.push(charJsonObj);
}
nodeToUSJTable(node, parentJsonObj) {
// Handle table related components and convert to USJ
if (node.type === "table") {
const tableJsonObj = {
type: "table",
content: []
};
node.children.forEach((child)=>{
this.nodeToUSJ(child, tableJsonObj);
});
parentJsonObj.content.push(tableJsonObj);
} else if (node.type === "tr") {
const rowJsonObj = {
type: "table:row",
marker: "tr",
content: []
};
node.children.slice(1).forEach((child)=>{
this.nodeToUSJ(child, rowJsonObj);
});
parentJsonObj.content.push(rowJsonObj);
} else if ($bc15495701a08748$require$TABLE_CELL_MARKERS.includes(node.type)) {
const tagNode = node.children[0];
const style = this.usfm.substring(tagNode.startIndex, tagNode.endIndex).replace("\\", "").trim();
const cellJsonObj = {
type: "table:cell",
marker: style,
content: [],
align: style.includes("tcc") ? "center" : style.includes("r") ? "end" : "start"
};
node.children.slice(1).forEach((child)=>{
this.nodeToUSJ(child, cellJsonObj);
});
parentJsonObj.content.push(cellJsonObj);
}
}
nodeToUSJAttrib(node, parentJsonObj) {
// Add attribute values to USJ elements
const attribNameNode = node.children[0];
let attribName = this.usfm.slice(attribNameNode.startIndex, attribNameNode.endIndex).trim();
// Handling special cases for attribute names
if (attribName === "|") {
let parentType = node.parent.type;
if (parentType.includes("Nested")) parentType = parentType.replace("Nested", "");
attribName = $bc15495701a08748$require$DEFAULT_ATTRIB_MAP[parentType];
}
if (attribName === "src") // for \fig
attribName = "file";
const attribValCap = new $bc15495701a08748$var$Query(this.usfmLanguage, "((attributeValue) @attrib-val)").captures(node);
let attribValue = "";
if (attribValCap.length > 0) attribValue = this.usfm.substring(attribValCap[0].node.startIndex, attribValCap[0].node.endIndex).trim();
parentJsonObj[attribName] = attribValue;
}
nodeToUSJMilestone(node, parentJsonObj) {
// Create ms node in USJ
const msNameCap = new $bc15495701a08748$var$Query(this.usfmLanguage, `(
[(milestoneTag)
(milestoneStartTag)
(milestoneEndTag)
(zSpaceTag)
] @ms-name)`).captures(node)[0];
const style = this.usfm.slice(msNameCap.node.startIndex, msNameCap.node.endIndex).replace("\\", "").trim();
const msJsonObj = {
type: "ms",
marker: style,
content: []
};
node.children.forEach((child)=>{
if (child.type.endsWith("Attribute")) this.nodeToUSJ(child, msJsonObj);
});
// Though normally milestones don't have contents, custom z-namespaces could have them
if (!msJsonObj.content.length) delete msJsonObj.content; // Remove empty content array if not used
parentJsonObj.content.push(msJsonObj);
}
nodeToUSJSpecial(node, parentJsonObj) {
// Build nodes for esb, cat, fig, optbreak in USJ
if (node.type === "esb") {
const sidebarJsonObj = {
type: "sidebar",
marker: "esb",
content: []
};
node.children.slice(1, -1).forEach((child)=>{
this.nodeToUSJ(child, sidebarJsonObj);
});
parentJsonObj.content.push(sidebarJsonObj);
} else if (node.type === "cat") {
const catCap = new $bc15495701a08748$var$Query(this.usfmLanguage, "((category) @category)").captures(node)[0];
const category = this.usfm.substring(catCap.node.startIndex, catCap.node.endIndex).trim();
parentJsonObj.category = category;
} else if (node.type === "fig") {
const figJsonObj = {
type: "figure",
marker: "fig",
content: []
};
node.children.slice(1, -1).forEach((child)=>{
this.nodeToUSJ(child, figJsonObj);
});
parentJsonObj.content.push(figJsonObj);
} else if (node.type === "ref") {
const refJsonObj = {
type: "ref",
content: []
};
node.children.slice(1, -1).forEach((child)=>{
this.nodeToUSJ(child, refJsonObj);
});
parentJsonObj.content.push(refJsonObj);
}
}
nodeToUSJGeneric(node, parentJsonObj) {
// Build nodes for para style markers in USJ
const tagNode = node.children[0];
let style = this.usfm.substring(tagNode.startIndex, tagNode.endIndex);
if (style.startsWith("\\")) style = style.replace("\\", "").trim();
// console.log(node.children.length, node.children[0].type, node.children[1].type)
let childrenRangeStart = 1;
// if (
// node.children.length > 1 &&
// node.children[1].type.startsWith("numbered")
// ) {
// const numNode = node.children[1];
// const num = this.usfm.substring(numNode.startIndex, numNode.endIndex);
// style += num;
// childrenRangeStart = 2;
// }
const paraJsonObj = {
type: "para",
marker: style,
content: []
};
parentJsonObj.content.push(paraJsonObj);
for(let i = childrenRangeStart; i < node.children.length; i++){
const child = node.children[i];
if ($bc15495701a08748$require$CHAR_STYLE_MARKERS.includes(child.type) || $bc15495701a08748$require$NESTED_CHAR_STYLE_MARKERS.includes(child.type) || [
"text",
"footnote",
"crossref",
"verseText",
"v",
"b",
"milestone",
"zNameSpace"
].includes(child.type)) // Only nest these types inside the upper para style node
this.nodeToUSJ(child, paraJsonObj);
else this.nodeToUSJ(child, parentJsonObj);
}
}
nodeToUSJ(node, parentJsonObj) {
// Check each node and based on the type convert to corresponding XML element
switch(node.type){
case "id":
this.nodeToUSJId(node, parentJsonObj);
break;
case "chapter":
this.nodeToUSJChapter(node, parentJsonObj);
break;
case "cl":
case "cp":
case "cd":
case "vp":
this.nodeToUSJGeneric(node, parentJsonObj);
break;
case "ca":
case "va":
this.nodeToUSJCaVa(node, parentJsonObj);
break;
case "v":
this.nodeToUSJVerse(node, parentJsonObj);
break;
case "verseText":
node.children.forEach((child)=>this.nodeToUSJ(child, parentJsonObj));
break;
case "paragraph":
case "pi":
case "ph":
this.nodeToUSJPara(node, parentJsonObj);
break;
case "text":
let textVal = this.usfm.substring(node.startIndex, node.endIndex);
textVal = textVal.replace("~", " ");
if (textVal !== "") parentJsonObj.content.push(textVal);
break;
case "table":
case "tr":
this.nodeToUSJTable(node, parentJsonObj);
break;
case "milestone":
case "zNameSpace":
this.nodeToUSJMilestone(node, parentJsonObj);
break;
case "esb":
case "cat":
case "fig":
case "ref":
this.nodeToUSJSpecial(node, parentJsonObj);
break;
case "usfm":
break;
default:
if ($bc15495701a08748$require$NOTE_MARKERS.includes(node.type)) this.nodeToUSJNotes(node, parentJsonObj);
else if ($bc15495701a08748$require$CHAR_STYLE_MARKERS.includes(node.type) || $bc15495701a08748$require$NESTED_CHAR_STYLE_MARKERS.includes(node.type) || [
"xt_standalone"
].includes(node.type)) this.nodeToUSJChar(node, parentJsonObj);
else if ($bc15495701a08748$require$TABLE_CELL_MARKERS.includes(node.type)) this.nodeToUSJTable(node, parentJsonObj);
else if (node.type.endsWith("Attribute")) this.nodeToUSJAttrib(node, parentJsonObj);
else if ($bc15495701a08748$require$PARA_STYLE_MARKERS.includes(node.type) || $bc15495701a08748$require$PARA_STYLE_MARKERS.includes(node.type.replace("\\", "").trim())) this.nodeToUSJGeneric(node, parentJsonObj);
else if ([
"",
"|"
].includes(node.type.trim())) break;
else if (node.children.length > 0) node.children.forEach((child)=>this.nodeToUSJ(child, parentJsonObj));
break;
}
}
}
$bc15495701a08748$export$da1572eff96010ef = $bc15495701a08748$var$USJGenerator;
var $be3194b3d68e926d$require$USJGenerator = $bc15495701a08748$export$da1572eff96010ef;
var $0ed9cb36901d4d14$export$24715706bc524307;
class $0ed9cb36901d4d14$var$ListGenerator {
/* Combines the methods used for List generation from USJ */ constructor(){
/* Variables shared by functions */ this.book = "";
this.currentChapter = "";
this.currentVerse = "";
this.list = [
[
"Book",
"Chapter",
"Verse",
"Text",
"Type",
"Marker"
]
];
this.bibleNlpFormat = {
"text": [],
"vref": []
};
this.prevChapter = "";
this.prevVerse = "";
}
usjToListId(obj) {
/* Update book code */ this.book = obj.code;
}
usjToListC(obj) {
/* Update current chapter */ this.currentChapter = obj.number;
this.currentVerse = "";
}
usjToListV(obj) {
/* Update current verse */ this.currentVerse = obj.number;
}
usjToList(obj, excludeMarkers = null, includeMarkers = null) {
/* Traverse the USJ dict and build the table in this.list */ if (obj.type === "book") {
this.usjToListId(obj);
if (excludeMarkers && excludeMarkers.includes("id") || includeMarkers && !includeMarkers.includes("id")) return;
} else if (obj.type === "chapter") this.usjToListC(obj);
else if (obj.type === "verse") this.usjToListV(obj);
let markerType = obj.type;
let markerName = obj.marker ? obj.marker : '';
if (markerType === "USJ") // This would occur if the JSON got flattened after removing paragraph markers
markerType = "";
if (obj.content && obj.content.length > 0) {
for (let item of obj.content)if (typeof item === "string") {
if (excludeMarkers && excludeMarkers.includes("text")) item = "";
this.list.push([
this.book,
this.currentChapter,
this.currentVerse,
item,
markerType,
markerName
]);
} else this.usjToList(item, excludeMarkers, includeMarkers);
} else if (!excludeMarkers && !includeMarkers || excludeMarkers && !excludeMarkers.includes(markerName) || includeMarkers && includeMarkers.includes(markerName)) this.list.push([
this.book,
this.currentChapter,
this.currentVerse,
"",
markerType,
markerName
]);
}
usjToBibleNlpFormat(obj) {
// Traverse the USJ object and build a dictionary for Bible NLP format
if (obj.type === "book") this.usjToListId(obj);
else if (obj.type === "chapter") this.usjToListC(obj);
else if (obj.type === "verse") this.usjToListV(obj);
else if (obj.content) {
for (const item of obj.content)if (typeof item === "string") {
if (this.currentChapter === this.prevChapter && this.currentVerse === this.prevVerse) this.bibleNlpFormat.text[this.bibleNlpFormat.text.length - 1] += " " + item.replace(/[\n\r]/g, " ").trim();
else {
const vref = `${this.book} ${this.currentChapter}:${this.currentVerse}`;
this.bibleNlpFormat.text.push(item.replace(/[\n\r]/g, " ").trim());
this.bibleNlpFormat.vref.push(vref);
this.prevChapter = this.currentChapter;
this.prevVerse = this.currentVerse;
}
} else this.usjToBibleNlpFormat(item);
}
}
}
$0ed9cb36901d4d14$export$24715706bc524307 = $0ed9cb36901d4d14$var$ListGenerator;
var $be3194b3d68e926d$require$ListGenerator = $0ed9cb36901d4d14$export$24715706bc524307;
//Logics for syntax-tree to xml(USX) conversions
var $27f438441a7f3c46$export$69d2127c7776f273;
var $27f438441a7f3c46$require$DOMImplementation = $a9A1a$xmldom.DOMImplementation;
var $27f438441a7f3c46$require$XMLSerializer = $a9A1a$xmldom.XMLSerializer;
const { Query: $27f438441a7f3c46$var$Query } = $a9A1a$treesitter;
var $27f438441a7f3c46$require$PARA_STYLE_MARKERS = $48a7bc78362df58a$export$3ea3efefd6f5792b;
var $27f438441a7f3c46$require$NOTE_MARKERS = $48a7bc78362df58a$export$bfe3d604e5046dbb;
var $27f438441a7f3c46$require$CHAR_STYLE_MARKERS = $48a7bc78362df58a$export$32d1909bfb943eb0;
var $27f438441a7f3c46$require$NESTED_CHAR_STYLE_MARKERS = $48a7bc78362df58a$export$ec3044778745fabd;
var $27f438441a7f3c46$require$DEFAULT_ATTRIB_MAP = $48a7bc78362df58a$export$38bc6f52843beb2a;
var $27f438441a7f3c46$require$TABLE_CELL_MARKERS = $48a7bc78362df58a$export$af42bd8f70df8555;
var $27f438441a7f3c46$require$MISC_MARKERS = $48a7bc78362df58a$export$c4d2f24f22330b2a;
class $27f438441a7f3c46$var$USXGenerator {
/**
* A binding for all methods used in generating USX from Syntax tree
* @param {object} treeSitterLanguageObj - The Tree-sitter language object
* @param {Buffer} usfmString - The USFM byte data
* @param {Element} [usxRootElement] - The root element of the USX (optional)
*/ constructor(treeSitterLanguageObj, usfmString, usxRootElement = null){
this.usfmLanguage = treeSitterLanguageObj;
this.usfm = usfmString;
const domImpl = new $27f438441a7f3c46$require$DOMImplementation();
const doc = domImpl.createDocument(null, 'usx', null);
if (usxRootElement === null) {
this.xmlRootNode = doc.documentElement;
this.xmlRootNode.setAttribute('version', '3.1');
} else this.xmlRootNode = usxRootElement;
this.parseState = {
prevVerseSid: null,
prevVerseParent: null
};
}
/**
* Builds the ID node in USX
* @param {SyntaxNode} node - The syntax node
* @param {Element} parentXmlNode - The parent XML node to append the ID to
*/ node2UsxId(node, parentXmlNode) {
const idCaptures = new $27f438441a7f3c46$var$Query(this.usfmLanguage, "(id (bookcode) @book-code (description)? @desc)").captures(node);
let code = null;
let desc = null;
idCaptures.forEach((capture)=>{
if (capture.name === 'book-code') code = this.usfm.slice(capture.node.startIndex, capture.node.endIndex);
else if (capture.name === 'desc') desc = this.usfm.slice(capture.node.startIndex, capture.node.endIndex);
});
const bookXmlNode = parentXmlNode.ownerDocument.createElement('book');
bookXmlNode.setAttribute('code', code);
bookXmlNode.setAttribute('style', 'id');
if (desc && desc.trim() !== '') {
const textNode = parentXmlNode.ownerDocument.createTextNode(desc.trim());
bookXmlNode.appendChild(textNode);
}
parentXmlNode.appendChild(bookXmlNode);
}
node2UsxC(node, parentXmlNode) {
// Build c, the chapter milestone node in usj
const chapCap = new $27f438441a7f3c46$var$Query(this.usfmLanguage, `(c (chapterNumber) @chap-num
(ca (chapterNumber) @alt-num)?
(cp (text) @pub-num)?)`).captures(node);
const chapNum = this.usfm.slice(chapCap[0].node.startIndex, chapCap[0].node.endIndex);
const bookNode = $a9A1a$xpath.select1("book", parentXmlNode);
const bookCode = bookNode.getAttribute("code");
const chapRef = `${bookCode} ${chapNum}`;
// Create the 'chapter' element
const chapXmlNode = parentXmlNode.ownerDocument.createElement('chapter');
chapXmlNode.setAttribute("number", chapNum);
chapXmlNode.setAttribute("style", "c");
chapXmlNode.setAttribute("sid", chapRef);
chapCap.forEach((cap)=>{
if (cap.name === "alt-num") {
const altNum = this.usfm.substring(cap.node.startIndex, cap.node.endIndex).trim();
chapXmlNode.setAttribute('altnumber', altNum);
}
if (cap.name === "pub-num") {
const pubNum = this.usfm.substring(cap.node.startIndex, cap.node.endIndex).trim();
chapXmlNode.setAttribute('pubnumber', pubNum);
}
});
parentXmlNode.appendChild(chapXmlNode);
node.children.forEach((child)=>{
if ([
"cl",
"cd"
].includes(child.type)) this.node2Usx(child, parentXmlNode);
});
}
node2UsxChapter(node, parentXmlNode) {
// Build chapter node in USJ
node.children.forEach((child)=>{
if (child.type === "c") this.node2UsxC(child, parentXmlNode);
else this.node2Usx(child, parentXmlNode);
});
const prevVerses = $a9A1a$xpath.select("//verse", this.xmlRootNode);
if (prevVerses.length > 0 && prevVerses[prevVerses.length - 1].hasAttribute('sid')) {
const vEndXmlNode = parentXmlNode.ownerDocument.createElement('verse');
vEndXmlNode.setAttribute('eid', prevVerses[prevVerses.length - 1].getAttribute('sid'));
const sibblingCount = parentXmlNode.childNodes.length;
const lastSibbling = parentXmlNode.childNodes[sibblingCount - 1];
if (lastSibbling.tagName === "para") lastSibbling.appendChild(vEndXmlNode);
else if (lastSibbling.tagName === "table") {
const rows = lastSibbling.getElementsByTagName('row');
rows[rows.length - 1].appendChild(vEndXmlNode);
} else parentXmlNode.appendChild(vEndXmlNode);
}
}
node2UsxVerse(node, parentXmlNode) {
// Check if there are previous verses to close
if (this.parseState.prevVerseSid !== null) {
let prevPara = this.parseState.prevVerseParent;
let vEndXmlNode = prevPara.ownerDocument.createElement('verse');
vEndXmlNode.setAttribute("eid", this.parseState.prevVerseSid);
prevPara.appendChild(vEndXmlNode);
}
// Query to capture verse-related elements
const verseNumCap = new $27f438441a7f3c46$var$Query(this.usfmLanguage, `
(v
(verseNumber) @vnum
(va (verseNumber) @alt)?
(vp (text) @vp)?
)`).captures(node);
const verseNum = this.usfm.substring(verseNumCap[0].node.startIndex, verseNumCap[0].node.endIndex);
const vXmlNode = parentXmlNode.ownerDocument.createElement('verse');
parentXmlNode.appendChild(vXmlNode);
// Loop through the captured elements and set the attributes
verseNumCap.forEach((capture)=>{
if (capture.name === 'alt') {
const altNum = this.usfm.slice(capture.node.startIndex, capture.node.endIndex);
vXmlNode.setAttribute('altnumber', altNum);
} else if (capture.name === 'vp') {
const vpText = this.usfm.slice(capture.node.startIndex, capture.node.endIndex).trim();
vXmlNode.setAttribute('pubnumber', vpText);
}
});
// Get the last chapter's 'sid' attribute to form the verse reference
const chapterSid = $a9A1a$xpath.select("//chapter", this.xmlRootNode).pop().getAttribute('sid');
const ref = `${chapterSid}:${verseNum}`;
// Set attributes on the newly created 'verse' element
vXmlNode.setAttribute('number', verseNum.trim());
vXmlNode.setAttribute('style', 'v');
vXmlNode.setAttribute('sid', ref.trim());
}
node2UsxCaVa(node, parentXmlNode) {
// Build elements for independent ca and va away from c and v
const style = node.type;
// Create a new 'char' element under the parent XML node
const charXmlNode = parentXmlNode.ownerDocument.createElement('char');
charXmlNode.setAttribute('style', style);
// Query to capture chapterNumber or verseNumber
const altNumMatch = new $27f438441a7f3c46$var$Query(this.usfmLanguage, `([
(chapterNumber)
(verseNumber)
] @alt-num)`).captures(node);
// Extract the alternate number from the captured range
const altNum = this.usfm.slice(altNumMatch[0].node.startIndex, altNumMatch[0].node.endIndex).trim();
// Set the attributes on the 'char' element
charXmlNode.setAttribute('altnumber', altNum);
charXmlNode.setAttribute('closed', 'true');
// Append the 'char' element to the parent XML node
parentXmlNode.appendChild(charXmlNode);
}
node2UsxPara(node, parentXmlNode) {
// Build paragraph nodes in USX
if (node.children[0].type.endsWith('Block')) for (const child of node.children[0].children)this.node2UsxPara(child, parentXmlNode);
else if (node.type === 'paragraph') {
const paraTagCap = new $27f438441a7f3c46$var$Query(this.usfmLanguage, "(paragraph (_) @para-marker)").captures(node)[0];
const paraMarker = paraTagCap.node.type;
if (!paraMarker.endsWith("Block")) {
const paraXmlNode = parentXmlNode.ownerDocument.createElement("para");
paraXmlNode.setAttribute("style", paraMarker);
parentXmlNode.appendChild(paraXmlNode);
for (const child of paraTagCap.node.children.slice(1))this.node2Usx(child, paraXmlNode);
}
} else if ([
'pi',
'ph'
].includes(node.type)) {
const paraMarker = this.usfm.slice(node.children[0].startIndex, node.children[0].endIndex).replace("\\", "").trim();
const paraXmlNode = parentXmlNode.ownerDocument.createElement("para");
paraXmlNode.setAttribute("style", paraMarker);
parentXmlNode.appendChild(paraXmlNode);
for (const child of node.children.slice(1))this.node2Usx(child, paraXmlNode);
}
}
node2UsxNotes(node, parentXmlNode) {
// Build USJ nodes for footnotes and cross-references
const tagNode = node.children[0];
const callerNode = node.children[1];
const style = this.usfm.substring(tagNode.startIndex, tagNode.endIndex).replace("\\", "").trim();
const noteXmlNode = parentXmlNode.ownerDocument.createElement('note');
noteXmlNode.setAttribute('style', style);
const caller = this.usfm.substring(callerNode.startIndex, callerNode.endIndex).trim();
noteXmlNode.setAttribute('caller', caller);
parentXmlNode.appendChild(noteXmlNode);
for(let i = 2; i < node.children.length - 1; i++)this.node2Usx(node.children[i], noteXmlNode);
}
node2UsxChar(node, parentXmlNode) {
// Build USJ nodes for character markups, both regular and nested
const tagNode = node.children[0];
let childrenRange = node.children.length;
if (node.children[node.children.length - 1].type.startsWith("\\")) childrenRange -= 1; // Exclude the last node if it starts with '\', treating it as a closing node
const charXmlNode = parentXmlNode.ownerDocument.createElement('char');
const style = this.usfm.substring(tagNode.startIndex, tagNode.endIndex).replace("\\", "").replace("+", "").trim();
charXmlNode.setAttribute('style', style);
parentXmlNode.appendChild(charXmlNode);
for(let i = 1; i < childrenRange; i++)this.node2Usx(node.children[i], charXmlNode);
}
node2UsxAttrib(node, parentXmlNode) {
// Add attribute values to USJ elements
const attribNameNode = node.children[0];
let attribName = this.usfm.slice(attribNameNode.startIndex, attribNameNode.endIndex).trim();
// Handling special cases for attribute names
if (attribName === "|") {
let parentType = node.parent.type;
if (parentType.includes("Nested")) parentType = parentType.replace("Nested", "");
attribName = $27f438441a7f3c46$require$DEFAULT_ATTRIB_MAP[parentType];
}
if (attribName === "src") // for \fig
attribName = "file";
const attribValCap = new $27f438441a7f3c46$var$Query(this.usfmLanguage, "((attributeValue) @attrib-val)").captures(node);
let attribValue = "";
if (attribValCap.length > 0) attribValue = this.usfm.substring(attribValCap[0].node.startIndex, attribValCap[0].node.endIndex).trim();
parentXmlNode.setAttribute(attribName, attribValue);
}
node2UsxTable(node, parentXmlNode) {
// Handle table related components and convert to USJ
if (node.type === "table") {
const tableXmlNode = parentXmlNode.ownerDocument.createElement('table');
parentXmlNode.appendChild(tableXmlNode);
node.children.forEach((child)=>{
this.node2Usx(child, tableXmlNode);
});
} else if (node.type === "tr") {
const rowXmlNode = parentXmlNode.ownerDocument.createElement('row');
rowXmlNode.setAttribute("style", "tr");
parentXmlNode.appendChild(rowXmlNode);
node.children.slice(1).forEach((child)=>{
this.node2Usx(child, rowXmlNode);
});
} else if ($27f438441a7f3c46$require$TABLE_CELL_MARKERS.includes(node.type)) {
const tagNode = node.children[0];
const style = this.usfm.substring(tagNode.startIndex, tagNode.endIndex).replace("\\", "").trim();
const cellXmlNode = parentXmlNode.ownerDocument.createElement("cell");
cellXmlNode.setAttr