UNPKG

bible-passage-reference-parser

Version:

Identifies and parses Bible references (like John 3:16) in over 40 languages.

github.com/openbibleinfo/Bible-Passage-Reference-Parser

openbibleinfo/Bible-Passage-Reference-Parser

1,240 lines (1,238 loc) • 90.5 kB

JavaScript

// build/bcv_matcher.ts var bcv_matcher = class { constructor(parent, grammar2) { this.parent = parent; this.grammar = grammar2; } // ## Parsing-Related Functions // Replace control characters and spaces since we replace books with a specific character pattern. The string changes, but the length stays the same so that indices remain valid. If we want to use Latin numbers rather than non-Latin ones, replace them here. replace_control_characters(s) { return s.replace(this.parent.regexps.control, " "); } // Replace any /[^0-9]/ digits if requested so that the parser can find chapter and verse references. This replacement happens after removing books. replace_non_ascii_numbers(s) { if (this.parent.options.non_latin_digits_strategy === "replace") { s = s.replace(/[٠۰߀०০੦૦୦0౦೦൦๐໐༠၀႐០᠐᥆᧐᪀᪐᭐᮰᱀᱐꘠꣐꤀꧐꩐꯰０]/g, "0"); s = s.replace(/[١۱߁१১੧૧୧௧౧೧൧๑໑༡၁႑១᠑᥇᧑᪁᪑᭑᮱᱁᱑꘡꣑꤁꧑꩑꯱１]/g, "1"); s = s.replace(/[٢۲߂२২੨૨୨௨౨೨൨๒໒༢၂႒២᠒᥈᧒᪂᪒᭒᮲᱂᱒꘢꣒꤂꧒꩒꯲２]/g, "2"); s = s.replace(/[٣۳߃३৩੩૩୩௩౩೩൩๓໓༣၃႓៣᠓᥉᧓᪃᪓᭓᮳᱃᱓꘣꣓꤃꧓꩓꯳３]/g, "3"); s = s.replace(/[٤۴߄४৪੪૪୪௪౪೪൪๔໔༤၄႔៤᠔᥊᧔᪄᪔᭔᮴᱄᱔꘤꣔꤄꧔꩔꯴４]/g, "4"); s = s.replace(/[٥۵߅५৫੫૫୫௫౫೫൫๕໕༥၅႕៥᠕᥋᧕᪅᪕᭕᮵᱅᱕꘥꣕꤅꧕꩕꯵５]/g, "5"); s = s.replace(/[٦۶߆६৬੬૬୬௬౬೬൬๖໖༦၆႖៦᠖᥌᧖᪆᪖᭖᮶᱆᱖꘦꣖꤆꧖꩖꯶６]/g, "6"); s = s.replace(/[٧۷߇७৭੭૭୭௭౭೭൭๗໗༧၇႗៧᠗᥍᧗᪇᪗᭗᮷᱇᱗꘧꣗꤇꧗꩗꯷７]/g, "7"); s = s.replace(/[٨۸߈८৮੮૮୮௮౮೮൮๘໘༨၈႘៨᠘᥎᧘᪈᪘᭘᮸᱈᱘꘨꣘꤈꧘꩘꯸８]/g, "8"); s = s.replace(/[٩۹߉९৯੯૯୯௯౯೯൯๙໙༩၉႙៩᠙᥏᧙᪉᪙᭙᮹᱉᱙꘩꣙꤉꧙꩙꯹９]/g, "9"); } return s; } // Find and replace instances of Bible books. match_books(s) { const books = []; for (const book of this.parent.regexps.books) { let has_replacement = false; s = s.replace(book.regexp, function(_full, bk) { has_replacement = true; books.push({ value: bk, parsed: book.osis, type: "book" }); const extra = book.extra ? `/${book.extra}` : ""; return `${books.length - 1}${extra}`; }); if (has_replacement && /^[\s\x1f\d:.,;\-\u2013\u2014]+$/.test(s)) { break; } } for (const translation_regexp of this.parent.regexps.translations) { s = s.replace(translation_regexp, function(match) { books.push({ value: match, parsed: match.toLowerCase(), type: "translation" }); return `${books.length - 1}`; }); } return [s, this.get_book_indices(books, s)]; } // Get the string index for all the books / translations, adding the start index as a new key. get_book_indices(books, s) { let add_index = 0; for (const match of s.matchAll(/([\x1f\x1e])(\d+)(?:\/\d+)?\1/g)) { const bookIndex = parseInt(match[2], 10); books[bookIndex].start_index = match.index + add_index; add_index += books[bookIndex].value.length - match[0].length; } return books; } // Create an array of all the potential passage references in the string. match_passages(s) { let entities = []; let post_context = {}; for (const match of s.matchAll(this.parent.regexps.escaped_passage)) { let [full, part, book_id] = match; const book_id_number = parseInt(book_id, 10); const original_part_length = part.length; match.index += full.length - original_part_length; part = this.clean_end_match(s, match, part); part = part.replace(/[A-Z]+/g, (capitals) => capitals.toLowerCase()); const start_index_adjust = part.startsWith("") ? 0 : part.split("")[0].length; const passage = { value: this.grammar.parse(part, { punctuation_strategy: this.parent.options.punctuation_strategy }), type: "base", // The `start_index` in `this.parent.passage` always exists after being set in `match_books`. start_index: this.parent.passage.books[book_id_number].start_index - start_index_adjust, match: part }; const book_parsed = this.parent.passage.books[book_id_number].parsed; if (start_index_adjust === 0 && this.parent.options.book_alone_strategy === "full" && this.parent.options.book_range_strategy === "include" && passage.value[0].type === "b" && Array.isArray(passage.value) && (passage.value.length === 1 || passage.value.length > 1 && passage.value[1].type === "translation_sequence") && /^[234]/.test(book_parsed)) { this.create_book_range(s, passage, book_id_number); } let accum = []; [accum, post_context] = this.parent.passage.handle_obj(passage, [], {}); entities = entities.concat(accum); const regexp_index_adjust = this.adjust_regexp_end(accum, original_part_length, part.length); if (regexp_index_adjust > 0) { this.parent.regexps.escaped_passage.lastIndex -= regexp_index_adjust; } } return [entities, post_context]; } // Clean up the end of a match by removing unnecessary characters. clean_end_match(s, match, part) { if (/\s[2-9]\d\d\s*$|\s\d{4,}\s*$/.test(part)) { part = part.replace(/\s+\d+\s*$/, ""); } if (!/[\d\x1f\x1e)]$/.test(part)) { const sub_parts = part.split(this.parent.regexps.match_end_split); const remove = sub_parts.pop(); if (sub_parts.length > 0 && remove != null && remove.length > 0) { part = part.substring(0, part.length - remove.length); } } if (this.parent.options.captive_end_digits_strategy === "delete") { const next_char_index = match.index + part.length; if (s.length > next_char_index && /^\w/.test(s.charAt(next_char_index))) { part = part.replace(/[\s*]+\d+$/, ""); } part = part.replace(/(\x1e[)\]]?)[\s*]*\d+$/, "$1"); } return part; } // Handle the objects returned from the grammar to produce entities for further processing. We may need to adjust the `RegExp.lastIndex` if we discarded characters from the end of the match or if, after parsing, we're ignoring some of them--especially with ending parenthetical statements like "Luke 8:1-3; 24:10 (and Matthew 14:1-12 and Luke 23:7-12 for background)". adjust_regexp_end(accum, old_length, new_length) { if (accum.length > 0) { return old_length - accum[accum.length - 1].indices[1] - 1; } else if (old_length !== new_length) { return old_length - new_length; } return 0; } // If a book is on its own, check whether it's preceded by something that indicates it's a book range like "1-2 Samuel". create_book_range(s, passage, book_id) { const cases = [ this.parent.regexps.first, this.parent.regexps.second, this.parent.regexps.third ]; const limit = parseInt(this.parent.passage.books[book_id].parsed[0].substring(0, 1), 10); for (let i = 1; i < limit; i++) { const range_regexp = i === limit - 1 ? this.parent.regexps.range_and : this.parent.regexps.range_only; const match_regexp = new RegExp(String.raw`${this.parent.regexps.pre_number_book.source}(${cases[i - 1].source}\s*${range_regexp.source}\s*)\x1f${book_id}\x1f`, "iu"); const prev = s.match(match_regexp); if (prev) { return this.add_book_range_object(passage, prev, i); } } return false; } // Create a synthetic object that can be parsed to show the correct result. add_book_range_object(passage, prev, start_book_number) { const length = prev[1].length; passage.value[0] = { type: "b_range_pre", value: [ { type: "b_pre", value: start_book_number.toString(), indices: [prev.index, prev.index + length] }, passage.value[0] ], indices: [0, passage.value[0].indices[1] + length] }; this.add_offset_to_indices(passage.value[0].value[1].indices, length); passage.start_index -= length; passage.match = prev[1] + passage.match; if (!Array.isArray(passage.value)) { return passage; } for (let i = 1; i < passage.value.length; i++) { if (!passage.value[i].value) { continue; } if (passage.value[i].value[0]?.indices) { this.add_offset_to_indices(passage.value[i].value[0].indices, length); } this.add_offset_to_indices(passage.value[i].indices, length); } return passage; } add_offset_to_indices(indices, value_to_add) { indices[0] += value_to_add; indices[1] += value_to_add; } }; // build/bcv_options.ts var bcv_options = class { constructor(parent) { this.consecutive_combination_strategy = "combine"; this.osis_compaction_strategy = "b"; this.book_sequence_strategy = "ignore"; this.invalid_sequence_strategy = "ignore"; this.sequence_combination_strategy = "combine"; this.punctuation_strategy = "us"; this.invalid_passage_strategy = "ignore"; this.non_latin_digits_strategy = "ignore"; // This one is shared between `this` and `bcv_passage`. this.passage_existence_strategy = "bcv"; this.book_alone_strategy = "ignore"; this.book_range_strategy = "ignore"; this.captive_end_digits_strategy = "delete"; this.ps151_strategy = "c"; this.zero_chapter_strategy = "error"; this.zero_verse_strategy = "error"; this.single_chapter_1_strategy = "chapter"; this.end_range_digits_strategy = "verse"; this.warn_level = "none"; this.#testaments = "on"; this.#case_sensitive = "none"; this.parent = parent; } #testaments; get testaments() { return this.#testaments; } set testaments(filter) { if (filter === this.#testaments || filter.length === 0) { return; } const filters = filter.split(""); let out = ""; if (filters[0] === "o") { filters.shift(); out += "o"; } if (filters.length > 0 && filters[0] === "n") { filters.shift(); out += "n"; } if (filters.length > 0 && filters[0] === "a") { out += "a"; } if (out.length > 0 && out !== this.#testaments) { const new_apocrypha = out.indexOf("a") >= 0; const old_apocrypha = this.#testaments.indexOf("a") >= 0; this.#testaments = out; if (new_apocrypha !== old_apocrypha) { this.set_apocrypha(new_apocrypha); } else { this.parent.regexps_manager.filter_books(this.#testaments, this.case_sensitive); } } } // Whether to use books and abbreviations from the Apocrypha. This function makes sure books from the Apocrypha are available as options and adjusts the Psalms to include Psalm 151. It takes a boolean argument: `true` to include the Apocrypha and `false` to not. Defaults to `false`. set_apocrypha(include_apocrypha) { this.parent.regexps_manager.filter_books(this.#testaments, this.case_sensitive); for (const translation of Object.keys(this.parent.translations.systems)) { this.parent.translations.systems[translation].chapters ??= {}; this.parent.translations.systems[translation].chapters["Ps"] ??= [...this.parent.translations.systems.current.chapters["Ps"]]; if (include_apocrypha === true) { this.parent.translations.systems[translation].chapters["Ps"][150] = this.parent.translations.systems[translation].chapters["Ps151"]?.[0] ?? this.parent.translations.systems.current.chapters["Ps151"][0]; } else if (this.parent.translations.systems[translation].chapters?.["Ps"].length === 151) { this.parent.translations.systems[translation].chapters["Ps"].pop(); } } } get versification_system() { return this.parent.translations.current_system; } // Use an alternate versification system. Takes a string argument; the built-in options are: `default` to use ESV-style versification and `vulgate` to use the Vulgate (Greek) Psalm numbering. English offers several other versification systems; see the Readme for details. set versification_system(system) { if (this.parent.translations.aliases[system]?.system) { system = this.parent.translations.aliases[system].system; } if (!this.parent.translations.systems[system]) { if (this.warn_level === "warn") { console.warn(`Unknown versification system ("${system}"). Using default instead.`); } system = "default"; } if (!system || system === this.parent.translations.current_system) { return; } if (this.parent.translations.current_system !== "default") { this.parent.translations.systems.current = structuredClone(this.parent.translations.systems.default); } this.parent.translations.current_system = system; if (system === "default") { return; } if (this.parent.translations.systems[system].order) { this.parent.translations.systems.current.order = { ...this.parent.translations.systems[system].order }; } if (this.parent.translations.systems[system].chapters) { this.parent.translations.systems.current.chapters = { ...structuredClone(this.parent.translations.systems.default.chapters), ...structuredClone(this.parent.translations.systems[system].chapters) }; } } #case_sensitive; get case_sensitive() { return this.#case_sensitive; } // Whether to treat books as case-sensitive. Valid values are `none` and `books`. set case_sensitive(arg) { if (arg === this.#case_sensitive || arg !== "none" && arg !== "books") { return; } this.#case_sensitive = arg; this.parent.regexps_manager.filter_books(this.testaments, arg); } }; // build/bcv_passage.ts var bcv_passage = class { constructor(options, translations) { this.books = []; this.indices = []; this.options = options; this.translations = translations; } // ## Public // Loop through the parsed passages. handle_array(passages, accum = [], context = {}) { for (const passage of passages) { if (passage == null) { continue; } [accum, context] = this.handle_obj(passage, accum, context); } return [accum, context]; } handle_obj(passage, accum, context) { if (passage.type && typeof this[passage.type] === "function") { return this[passage.type](passage, accum, context); } return [accum, context]; } // ## Types Returned from the Peggy Grammar // These functions correspond to `type` attributes returned from the grammar. They're designed to be called multiple times if necessary. // Handle a book on its own ("Gen"). b(passage, accum, context) { passage.start_context = structuredClone(context); passage.passages = []; const alternates = []; for (const b of this.books[passage.value].parsed) { const valid = this.validate_ref(passage.start_context.translations, { b }); const obj = { start: { b }, end: { b }, valid }; if (passage.passages.length === 0 && valid.valid) { passage.passages.push(obj); } else { alternates.push(obj); } } this.normalize_passage_and_alternates(passage, alternates); accum.push(passage); context = { b: passage.passages[0].start.b }; if (passage.start_context.translations) { context.translations = structuredClone(passage.start_context.translations); } return [accum, context]; } // This is never called. It exists to make Typescript happy. b_pre(passage, accum, context) { return [accum, context]; } // Handle book-only ranges ("Gen-Exod"). b_range(passage, accum, context) { return this.range(passage, accum, context); } // Handle book-only ranges like "1-2 Samuel". It doesn't support multiple ambiguous ranges (like "1-2C"), which it probably shouldn't, anyway. b_range_pre(passage, accum, context) { passage.start_context = structuredClone(context); passage.passages = []; const book = this.pluck("b", passage.value); let end; [[end], context] = this.b(book, [], context); passage.absolute_indices ??= this.get_absolute_indices(passage.indices); passage.passages = [{ start: { b: passage.value[0].value + end.passages[0].start.b.substring(1), type: "b" }, end: end.passages[0].end, valid: end.passages[0].valid }]; if (passage.start_context.translations) { passage.passages[0].translations = structuredClone(passage.start_context.translations); } accum.push(passage); return [accum, context]; } // Handle ranges with a book as the start of the range ("Gen-Exod 2"). b_range_start(passage, accum, context) { return this.range(passage, accum, context); } // The base (root) object in the grammar controls the base indices. base(passage, accum, context) { this.indices = this.calculate_indices(passage.match, passage.start_index); return this.handle_array(passage.value, accum, context); } // Handle book-chapter ("Gen 1"). bc(passage, accum, context) { passage.start_context = structuredClone(context); passage.passages = []; this.reset_context(context, ["b", "c", "v"]); const c = this.pluck("c", passage.value).value; const alternates = []; for (const b of this.books[this.pluck("b", passage.value).value].parsed) { let context_key = "c"; const valid = this.validate_ref(passage.start_context.translations, { b, c }); const obj = { start: { b }, end: { b }, valid }; if (valid.messages?.start_chapter_not_exist_in_single_chapter_book || valid.messages?.start_chapter_1) { obj.valid = this.validate_ref(passage.start_context.translations, { b, v: c }); if (valid.messages?.start_chapter_not_exist_in_single_chapter_book) { obj.valid.messages.start_chapter_not_exist_in_single_chapter_book = 1; } obj.start.c = 1; obj.end.c = 1; context_key = "v"; } obj.start[context_key] = c; [obj.start.c, obj.start.v] = this.fix_start_zeroes(obj.valid, obj.start.c, obj.start.v); if (obj.start.v == null) { delete obj.start.v; } obj.end[context_key] = obj.start[context_key]; if (passage.passages.length === 0 && obj.valid.valid) { passage.passages.push(obj); } else { alternates.push(obj); } } this.normalize_passage_and_alternates(passage, alternates); this.set_context_from_object(context, ["b", "c", "v"], passage.passages[0].start); accum.push(passage); return [accum, context]; } // Handle "Ps 3 title". bc_title(passage, accum, context) { passage.start_context = structuredClone(context); const bc_pluck = this.pluck("bc", passage.value); let bc; [[bc], context] = this.bc(bc_pluck, [], context); if (bc.passages[0].start.b.substring(0, 2) !== "Ps" && bc.passages[0].alternates) { for (const alternate of bc.passages[0].alternates) { if (alternate.start.b.substring(0, 2) === "Ps") { bc.passages[0] = structuredClone(alternate); break; } } } if (bc.passages[0].start.b.substring(0, 2) !== "Ps") { accum.push(bc); return [accum, context]; } this.books[this.pluck("b", bc.value).value].parsed = ["Ps"]; let title = this.pluck("title", passage.value); if (!title) { title = this.pluck("v", passage.value); } passage.value[1] = { type: "v", // Let us discover later that this was originally a `title`. original_type: "title", value: [{ type: "integer", value: 1, indices: title.indices }], indices: title.indices }; passage.type = "bcv"; return this.bcv(passage, accum, passage.start_context); } // Handle book chapter:verse ("Gen 1:1"). bcv(passage, accum, context) { passage.start_context = structuredClone(context); passage.passages = []; this.reset_context(context, ["b", "c", "v"]); const bc = this.pluck("bc", passage.value); let c = this.pluck("c", bc.value).value; let v = this.pluck("v", passage.value).value; const alternates = []; for (const b of this.books[this.pluck("b", bc.value).value].parsed) { const valid = this.validate_ref(passage.start_context.translations, { b, c, v }); [c, v] = this.fix_start_zeroes(valid, c, v); const obj = { start: { b, c, v }, end: { b, c, v }, valid }; if (passage.passages.length === 0 && valid.valid) { passage.passages.push(obj); } else { alternates.push(obj); } } this.normalize_passage_and_alternates(passage, alternates); this.set_context_from_object(context, ["b", "c", "v"], passage.passages[0].start); accum.push(passage); return [accum, context]; } // Handle "Philemon verse 6." This is unusual. bv(passage, accum, context) { passage.start_context = structuredClone(context); const [b, v] = passage.value; let bcv = { indices: passage.indices, value: [ { type: "bc", value: [b, { type: "c", value: [{ type: "integer", value: 1 }] }] }, v ] }; [[bcv], context] = this.bcv(bcv, [], context); passage.passages = bcv.passages; passage.absolute_indices ??= this.get_absolute_indices(passage.indices); accum.push(passage); return [accum, context]; } // Handle a chapter. c(passage, accum, context) { passage.start_context = structuredClone(context); let c = passage.type === "integer" ? passage.value : this.pluck("integer", passage.value).value; const valid = this.validate_ref(passage.start_context.translations, { b: context.b, c }); if (!valid.valid && valid.messages?.start_chapter_not_exist_in_single_chapter_book) { return this.v(passage, accum, context); } [c] = this.fix_start_zeroes(valid, c); passage.passages = [{ start: { b: context.b, c }, end: { b: context.b, c }, valid }]; if (passage.start_context.translations) { passage.passages[0].translations = passage.start_context.translations; } accum.push(passage); context.c = c; this.reset_context(context, ["v"]); passage.absolute_indices ??= this.get_absolute_indices(passage.indices); return [accum, context]; } // Handle "23rd Psalm" by recasting it as a `bc`. c_psalm(passage, accum, context) { passage.type = "bc"; const c = parseInt(this.books[passage.value].value.match(/^\d+/)[0], 10); passage.value = [ { type: "b", value: passage.value, indices: passage.indices }, { type: "c", value: [{ type: "integer", value: c, indices: passage.indices }], indices: passage.indices } ]; return this.bc(passage, accum, context); } // Handle "Ps 3, ch 4:title" c_title(passage, accum, context) { passage.start_context = structuredClone(context); if (context.b !== "Ps") { return this.c(passage.value[0], accum, context); } const title = this.pluck("title", passage.value); passage.value[1] = { type: "v", // Preserve the title type in case we want it later. original_type: "title", value: [{ type: "integer", value: 1, indices: title.indices }], indices: title.indices }; passage.type = "cv"; return this.cv(passage, accum, passage.start_context); } // Handle "Chapters 1-2 from Daniel". cb_range(passage, accum, context) { passage.type = "range"; const [b, start_c, end_c] = passage.value; passage.value = [{ type: "bc", value: [b, start_c], indices: passage.indices }, end_c]; end_c.indices[1] = passage.indices[1]; return this.range(passage, accum, context); } // Use an object to establish context for later objects but don't otherwise use it. context(passage, accum, context) { passage.start_context = structuredClone(context); passage.passages = []; context = Object.assign(context, this.books[passage.value].context); accum.push(passage); return [accum, context]; } // Handle a chapter:verse. cv(passage, accum, context) { passage.start_context = structuredClone(context); let c = this.pluck("c", passage.value).value; let v = this.pluck("v", passage.value).value; const valid = this.validate_ref(passage.start_context.translations, { b: context.b, c, v }); [c, v] = this.fix_start_zeroes(valid, c, v); passage.passages = [{ start: { b: context.b, c, v }, end: { b: context.b, c, v }, valid }]; if (passage.start_context.translations) { passage.passages[0].translations = passage.start_context.translations; } passage.absolute_indices ??= this.get_absolute_indices(passage.indices); context.c = c; context.v = v; accum.push(passage); return [accum, context]; } // Handle "23rd Psalm verse 1" by recasting it as a `bcv`. cv_psalm(passage, accum, context) { passage.start_context = structuredClone(context); passage.type = "bcv"; const [c_psalm, v] = passage.value; const [[bc]] = this.c_psalm(c_psalm, [], passage.start_context); passage.value = [bc, v]; return this.bcv(passage, accum, context); } // Handle "and following" (e.g., "Matt 1:1ff") by assuming it means to continue to the end of the current context (end of chapter if a verse is given, end of book if a chapter is given). ff(passage, accum, context) { passage.start_context = structuredClone(context); passage.value.push({ type: "integer", indices: structuredClone(passage.indices), value: 999 }); [[passage], context] = this.range(passage, [], passage.start_context); passage.value[0].indices = passage.value[1].indices; passage.value[0].absolute_indices = passage.value[1].absolute_indices; passage.value.pop(); for (const key of ["end_verse_not_exist", "end_chapter_not_exist"]) { delete passage.passages[0].valid.messages[key]; } accum.push(passage); return [accum, context]; } // Pass the integer off to whichever handler is relevant. integer(passage, accum, context) { if (context.v == null) { return this.c(passage, accum, context); } return this.v(passage, accum, context); } // Handle "Ps 3-4:title" or "Acts 2:22-27. Title" integer_title(passage, accum, context) { passage.start_context = structuredClone(context); if (context.b !== "Ps") { return this.integer(passage.value[0], accum, context); } passage.value[0] = { type: "c", value: [passage.value[0]], indices: structuredClone(passage.value[0].indices) }; passage.value[1].type = "v"; passage.value[1].original_type = "title"; passage.value[1].value = [{ type: "integer", value: 1, indices: structuredClone(passage.value[1].value.indices) }]; passage.type = "cv"; return this.cv(passage, accum, passage.start_context); } // Handle "next verse" (e.g., in Polish, "Matt 1:1n" should be treated as "Matt 1:1-2"). It crosses chapter boundaries but not book boundaries. When given a whole chapter, it assumes the next chapter (again, not crossing book boundaries). The logic here is similar to that of `this.ff`. next_v(passage, accum, context) { passage.start_context = structuredClone(context); const prev_integer = this.pluck_last_recursively("integer", passage.value) ?? { value: 1 }; passage.value.push({ type: "integer", indices: passage.indices, value: prev_integer.value + 1 }); let psg; [[psg], context] = this.range(passage, [], passage.start_context); if (psg.passages[0].valid.messages.end_verse_not_exist && !psg.passages[0].valid.messages.start_verse_not_exist && !psg.passages[0].valid.messages.start_chapter_not_exist && context.c != null) { passage.value.pop(); passage.value.push({ type: "cv", indices: passage.indices, value: [ { type: "c", value: [{ type: "integer", value: context.c + 1, indices: passage.indices }], indices: passage.indices }, { type: "v", value: [{ type: "integer", value: 1, indices: passage.indices }], indices: passage.indices } ] }); [[psg], context] = this.range(passage, [], passage.start_context); } psg.value[0].indices = psg.value[1].indices; psg.value[0].absolute_indices = psg.value[1].absolute_indices; psg.value.pop(); for (const key of ["end_verse_not_exist", "end_chapter_not_exist"]) { delete passage.passages[0].valid.messages[key]; } accum.push(psg); return [accum, context]; } // Handle a sequence of references. This is the only function that can return more than one object in the `passage.passages` array. sequence(passage, accum, context) { passage.start_context = structuredClone(context); passage.passages = []; for (const obj of passage.value) { let psg; [[psg], context] = this.handle_array(obj, [], context); for (const sub_psg of psg.passages) { sub_psg.type ??= psg.type; sub_psg.absolute_indices ??= psg.absolute_indices; if (psg.start_context.translations) { sub_psg.translations = psg.start_context.translations; } sub_psg.enclosed_absolute_indices = psg.type === "sequence_post_enclosed" ? [...psg.absolute_indices] : [-1, -1]; passage.passages.push(sub_psg); } } if (!passage.absolute_indices) { if (passage.passages.length > 0 && passage.type === "sequence") { passage.absolute_indices = [ passage.passages[0].absolute_indices[0], passage.passages[passage.passages.length - 1].absolute_indices[1] ]; } else { passage.absolute_indices = this.get_absolute_indices(passage.indices); } } accum.push(passage); return [accum, context]; } // Handle a sequence like "Ps 119 (118)," with parentheses. We want to include the closing parenthesis in the indices if `sequence_combination_strategy` is `combine` or if there's a consecutive. sequence_post_enclosed(passage, accum, context) { return this.sequence(passage, accum, context); } // Handle a verse, either as part of a sequence or because someone explicitly wrote "verse". v(passage, accum, context) { passage.start_context = structuredClone(context); const v = passage.type === "integer" ? passage.value : this.pluck("integer", passage.value).value; const c = context.c != null ? context.c : 1; const valid = this.validate_ref(passage.start_context.translations, { b: context.b, c, v }); const [, fixed_v] = this.fix_start_zeroes(valid, 0, v); passage.passages = [{ start: { b: context.b, c, v: fixed_v }, end: { b: context.b, c, v: fixed_v }, valid }]; if (passage.start_context.translations) { passage.passages[0].translations = structuredClone(passage.start_context.translations); } passage.absolute_indices ??= this.get_absolute_indices(passage.indices); accum.push(passage); context.v = fixed_v; return [accum, context]; } // ## Ranges // Handle any type of start and end range. It doesn't directly return multiple passages, but if there's an error parsing the range, we may convert it into a sequence. range(passage, accum, context) { passage.start_context = structuredClone(context); let [start, end] = passage.value; [[start], context] = this.handle_obj(start, [], context); if (end.type === "v" && this.options.end_range_digits_strategy === "verse" && (start.type === "bc" && !start.passages?.[0]?.valid?.messages?.start_chapter_not_exist_in_single_chapter_book || start.type === "c")) { passage.value[0] = start; return this.range_change_integer_end(passage, accum); } [[end], context] = this.handle_obj(end, [], context); passage.value = [start, end]; passage.indices = [start.indices[0], end.indices[1]]; delete passage.absolute_indices; const start_obj = { b: start.passages[0].start.b, c: start.passages[0].start.c, v: start.passages[0].start.v, type: start.type }; const end_obj = { b: end.passages[0].end.b, c: end.passages[0].end.c, v: end.passages[0].end.v, type: end.type }; if (end.passages[0].valid.messages.start_chapter_is_zero) { end_obj.c = 0; } if (end.passages[0].valid.messages.start_verse_is_zero) { end_obj.v = 0; } const valid = this.validate_ref(passage.start_context.translations, start_obj, end_obj); if (valid.valid) { const [return_now, return_value] = this.range_handle_valid(valid, passage, start, start_obj, end, end_obj, accum); if (return_now) { return return_value; } } else { return this.range_handle_invalid(valid, passage, start, start_obj, end, end_obj, accum); } passage.absolute_indices ??= this.get_absolute_indices(passage.indices); passage.passages = [{ start: start_obj, end: end_obj, valid }]; if (passage.start_context.translations) { passage.passages[0].translations = structuredClone(passage.start_context.translations); } if (start_obj.type === "b") { passage.type = end_obj.type === "b" ? "b_range" : "b_range_start"; } else if (end_obj.type === "b") { passage.type = "range_end_b"; } accum.push(passage); return [accum, context]; } // For Ps 122-23, treat the 23 as 123. range_change_end(passage, accum, new_end) { const [start, end] = passage.value; if (end.type === "integer") { end.original_value = end.value; end.value = new_end; } else if (end.type === "v") { const new_obj = this.pluck("integer", end.value); new_obj.original_value = new_obj.value; new_obj.value = new_end; } else if (end.type === "cv") { const new_obj = this.pluck("c", end.value); new_obj.original_value = new_obj.value; new_obj.value = new_end; } return this.handle_obj(passage, accum, passage.start_context); } // For "Jer 33-11", treat the "11" as a verse. range_change_integer_end(passage, accum) { const [start, end] = passage.value; passage.original_type ??= passage.type; passage.original_value ??= [start, end]; passage.type = start.type === "integer" ? "cv" : start.type + "v"; if (start.type === "integer") { passage.value[0] = { type: "c", value: [start], indices: start.indices }; } if (end.type === "integer") { passage.value[1] = { type: "v", value: [end], indices: end.indices }; } return this.handle_obj(passage, accum, passage.start_context); } // If a new end chapter/verse in a range may be necessary, calculate it. range_check_new_end(translations, start_obj, end_obj, valid) { let new_end = 0; let type; if (valid.messages?.end_chapter_before_start) { type = "c"; } else if (valid.messages?.end_verse_before_start) { type = "v"; } else { return new_end; } new_end = this.range_get_new_end_value(start_obj, end_obj, valid, type); if (new_end > 0) { const obj_to_validate = { b: end_obj.b, c: end_obj.c, v: end_obj.v }; obj_to_validate[type] = new_end; if (!this.validate_ref(translations, obj_to_validate).valid) { new_end = 0; } } return new_end; } // Handle ranges with a book as the end of the range ("Gen 2-Exod"). It's not `b_range_end` because only objects that start with an explicit book name should start with `b`. range_end_b(passage, accum, context) { return this.range(passage, accum, context); } // If a sequence has an end chapter/verse that's before the the start, check to see whether it can be salvaged: Gen 28-9 = Gen 28-29; Ps 101-24 = Ps 101-124. The `key` parameter is either `c` (for chapter) or `v` (for verse). range_get_new_end_value(start_obj, end_obj, valid, key) { let new_end = 0; if (key === "c" && valid.messages?.end_chapter_is_zero || key === "v" && valid.messages?.end_verse_is_zero) { return new_end; } if (start_obj[key] >= 10 && end_obj[key] < 10 && start_obj[key] - 10 * Math.floor(start_obj[key] / 10) < end_obj[key]) { new_end = end_obj[key] + 10 * Math.floor(start_obj[key] / 10); } else if (start_obj[key] >= 100 && end_obj[key] < 100 && start_obj[key] - 100 < end_obj[key]) { new_end = end_obj[key] + 100; } return new_end; } // The range doesn't look valid, but maybe we can fix it. If not, convert it to a sequence. range_handle_invalid(valid, passage, start, start_obj, end, end_obj, accum) { if (valid.valid === false && (valid.messages?.end_chapter_before_start || valid.messages?.end_verse_before_start) && (end.type === "integer" || end.type === "v") || valid.valid === false && valid.messages?.end_chapter_before_start && end.type === "cv") { const new_end = this.range_check_new_end(passage.start_context.translations, start_obj, end_obj, valid); if (new_end > 0) { return this.range_change_end(passage, accum, new_end); } } if (this.options.end_range_digits_strategy === "verse" && start_obj.v == null && (end.type === "integer" || end.type === "v")) { const temp_value = end.type === "v" ? this.pluck("integer", end.value) : end.value; const temp_valid = this.validate_ref(passage.start_context.translations, { b: start_obj.b, c: start_obj.c, v: temp_value }); if (temp_valid.valid) { return this.range_change_integer_end(passage, accum); } } passage.original_type ??= passage.type; passage.type = "sequence"; [passage.original_value, passage.value] = [[start, end], [[start], [end]]]; return this.sequence(passage, accum, structuredClone(passage.start_context)); } // The range looks valid, but we should check for some special cases. range_handle_valid(valid, passage, start, start_obj, end, end_obj, accum) { if (valid.messages?.end_chapter_not_exist && this.options.end_range_digits_strategy === "verse" && !start_obj.v && (end.type === "integer" || end.type === "v") && this.options.passage_existence_strategy.indexOf("v") >= 0) { const temp_value = end.type === "v" ? this.pluck("integer", end.value) : end.value; const temp_valid = this.validate_ref(passage.start_context.translations, { b: start_obj.b, c: start_obj.c, v: temp_value }); if (temp_valid.valid) { return [true, this.range_change_integer_end(passage, accum)]; } } this.range_validate(valid, start_obj, end_obj, passage); return [false, null]; } // If the end object goes past the end of the book or chapter, snap it back to a verse that exists. range_validate(valid, start_obj, end_obj, passage) { if (valid.messages?.end_chapter_not_exist || valid.messages?.end_chapter_not_exist_in_single_chapter_book) { end_obj.c = valid.messages.end_chapter_not_exist ?? valid.messages.end_chapter_not_exist_in_single_chapter_book; if (end_obj.v != null) { end_obj.v = this.validate_ref(passage.start_context.translations, { b: end_obj.b, c: end_obj.c, v: 999 }).messages.end_verse_not_exist; delete valid.messages.end_verse_is_zero; } } else if (valid.messages?.end_verse_not_exist) { end_obj.v = valid.messages.end_verse_not_exist; } if (valid.messages?.end_verse_is_zero && this.options.zero_verse_strategy !== "allow") { end_obj.v = valid.messages.end_verse_is_zero; } if (valid.messages?.end_chapter_is_zero) { end_obj.c = valid.messages.end_chapter_is_zero; } [start_obj.c, start_obj.v] = this.fix_start_zeroes(valid, start_obj.c, start_obj.v); } // ## Stop Token // Include it in `accum` so that it can stop backpropagation for translations. No context goes forward or backward past a `stop` token. stop(passage, accum, context) { passage.start_context = {}; accum.push(passage); return [accum, {}]; } // ## Translations // Even a single translation ("NIV") appears as part of a translation sequence. Here we handle the sequence and apply the translations to any previous passages lacking an explicit translation: in "Matt 1, 5 ESV," both `Matt 1` and `5` get applied, but in "Matt 1 NIV, 5 ESV," NIV only applies to Matt 1, and ESV only applies to Matt 5. translation_sequence(passage, accum, context) { passage.start_context = structuredClone(context); const translations = []; translations.push({ translation: this.books[passage.value[0].value].parsed, system: "default", osis: "" }); for (const val of passage.value[1]) { const translation = this.books[this.pluck("translation", val).value].parsed; if (translation) { translations.push({ translation, system: "default", osis: "" }); } } for (const translation of translations) { if (this.translations.aliases[translation.translation]) { translation.system = this.translations.aliases[translation.translation].system; translation.osis = this.translations.aliases[translation.translation].osis || translation.translation.toUpperCase(); } else { translation.osis = translation.translation.toUpperCase(); } } if (accum.length > 0) { context = this.translation_sequence_apply(accum, translations); } passage.absolute_indices = this.get_absolute_indices(passage.indices); accum.push(passage); this.reset_context(context, ["translations"]); return [accum, context]; } // Go back and find the earliest already-parsed passage without a translation. We start with 0 because the below loop will never yield a 0. translation_sequence_apply(accum, translations) { let use_i = 0; for (let i = accum.length - 1; i >= 0; i--) { if (accum[i].original_type) { accum[i].type = accum[i].original_type; } if (accum[i].original_value) { accum[i].value = accum[i].original_value; } if (accum[i].type === "translation_sequence" || accum[i].type === "stop") { use_i = i + 1; break; } } let context; if (use_i < accum.length) { accum[use_i].start_context.translations = translations; [, context] = this.handle_array(accum.slice(use_i), [], accum[use_i].start_context); } else { context = structuredClone(accum[accum.length - 1].start_context); } return context; } // ## Word // It doesn't need to be preserved in `accum` since it has no effect on parsing and we don't do anything with it. word(passage, accum, context) { return [accum, context]; } // ## Utilities // Pluck the object or value matching a type from an array. pluck(type, passages) { for (const passage of passages) { if (passage && passage.type && passage.type === type) { if (type === "c" || type === "v") { return this.pluck("integer", passage.value); } return passage; } } return null; } // Pluck the last object or value matching a type, descending as needed into objects. pluck_last_recursively(type, passages) { for (let i = passages.length - 1; i >= 0; i--) { const passage = passages[i]; if (!passage || !passage.type) { continue; } if (passage.type === type) { return this.pluck(type, [passage]); } const value = this.pluck_last_recursively(type, passage.value); if (value != null) { return value; } } return null; } // Set available context keys. set_context_from_object(context, keys, obj) { for (const key of keys) { if (obj[key] == null) { continue; } context[key] = obj[key]; } } // Delete existing context keys if, for example, starting with a new book. Which keys are deleted depends on the caller. reset_context(context, keys) { for (const key of keys) { delete context[key]; } } // If the start chapter or verse is 0 and the appropriate option is set to `upgrade`, convert it to a 1. fix_start_zeroes(valid, c, v = void 0) { if (valid.messages?.start_chapter_is_zero && this.options.zero_chapter_strategy === "upgrade") { c = valid.messages.start_chapter_is_zero; } if (valid.messages?.start_verse_is_zero && this.options.zero_verse_strategy === "upgrade") { v = valid.messages.start_verse_is_zero; } return [c, v]; } // Given a string and initial index, calculate indices for parts of the string. For example, a string that starts at index 10 might have a book that pushes it to index 12 starting at its third character. calculate_indices(match, adjust) { let switch_type = "book"; const indices = []; let match_index = 0; if (typeof adjust !== "number") { adjust = parseInt(adjust, 10); } for (let part of match.split(/[\x1e\x1f]/)) { switch_type = switch_type === "book" ? "rest" : "book"; const part_length = part.length; if (part_length === 0) { continue; } if (switch_type === "book") { const part_i = parseInt(part.replace(/\/\d+$/, ""), 10); const end_index = match_index + part_length; if (indices.length > 0 && indices[indices.length - 1].index === adjust) { indices[indices.length - 1].end = end_index; } else { indices.push({ start: match_index, end: end_index, index: adjust }); } match_index += part_length + 2; adjust = this.books[part_i].start_index + this.books[part_i].value.length - match_index; indices.push({ start: end_index + 1, end: end_index + 1, index: adjust }); } else { const end_index = match_index + part_length - 1; if (indices.length > 0 && indices[indices.length - 1].index === adjust) { indices[indices.length - 1].end = end_index; } else { indices.push({ start: match_index, end: end_index, index: adjust }); } match_index += part_length; } } return indices; } // Find the absolute string indices of start and end points. get_absolute_indices([start, end]) { let start_out = null; let end_out = null; for (const index of this.indices) { if (start_out === null && index.start <= start && start <= index.end) { start_out = start + index.index; } if (index.start <= end && end <= index.end) { end_out = end + index.index + 1; break; } } return [start_out, end_out]; } // Apply common transformations at the end of handling a passage object with a book. normalize_passage_and_alternates(passage, alternates) { if (passage.passages.length === 0) { passage.passages.push(alternates.shift()); } if (alternates.length > 0) { passage.passages[0].alternates = alternates; } if (passage.start_context.translations) { passage.passages[0].translations = passage.start_context.translations; } passage.absolute_indices ??= this.get_absolute_indices(passage.indices); } // ## Validators // Given a start and optional end bcv object, validate that the verse exists and is valid. It returns a `true` value for `valid` if any of the translations is valid. validate_ref(translations, start, end = null) { if (!translations || translations.length === 0 || !Array.isArray(translations)) { translations = [{ osis: "", translation: "current", system: "current" }]; } let valid = false; const messages = {}; for (const translation of translations) { if (!translation.system) { messages.translation_invalid ??= []; messages.translation_invalid.push(translation); continue; } if (!this.translations.aliases[translation.system]) { translation.system = "current"; messages.translation_unknown ??= []; messages.translation_unknown.push(translation); } let [temp_valid] = this.validate_start_ref(translation.system, start, messages); if (end) { [temp_valid] = this.validate_end_ref(translation.system, start, end, temp_valid, messages); } if (temp_valid === true) { valid = true; } } return { valid, messages }; } // The end ref pretty much just has to be after the start ref; beyond the book, we don't require the chapter or verse to exist. This approach is useful when people get end verses wrong. validate_end_ref(system, start, end, valid, messages) { const order_system = this.translations.systems[system]?.order ? system : "current"; if (end.c === 0) { messages.end_chapter_is_zero = 1; if (this.options.zero_chapter_strategy === "error") { valid = false; } else { end.c = 1; } } if (end.v === 0) { messages.end_verse_is_zero = 1; if (this.options.zero_verse_strategy === "error") { valid = false; } else if (this.options.zero_verse_strategy === "upgrade") { end.v = 1; } } if (end.b && this.translations.systems[order_system].order[end.b]) { valid = this.validate_known_end_book(system, order_system, start, end, valid, messages); } else { valid = false; messages.end_book_not_exist = true; } return [valid, messages]; } // Validate when the end book is known to exist. This function makes `validate_end_ref` easier to follow. validate_known_end_book(system, order_system, start, end, valid, messages) { const chapter_array = this.translations.systems[system]?.chapters?.[end.b] || this.translations.systems.current.chapters[end.