UNPKG

bible-ref-parse

Version:

Identifies and parses Bible references (like John 3:16) in over 40 languages.

github.com/bibleutils/bible-ref-parse

bibleutils/bible-ref-parse

808 lines • 331 kB

JavaScript

// @ts-nocheck /* * decaffeinate suggestions: * DS101: Remove unnecessary use of Array.from * DS102: Remove unnecessary code created because of implicit returns * DS202: Simplify dynamic range loops * DS203: Remove `|| {}` from converted for-own loops * DS205: Consider reworking code to avoid use of IIFEs * DS206: Consider reworking classes to avoid initClass * DS207: Consider shorter variations of null checks * DS208: Avoid top-level this * Full docs: https://github.com/decaffeinate/decaffeinate/blob/main/docs/suggestions.md */ // This class takes a string and identifies Bible passage references in that string. It's designed to handle how people actually type Bible passages and tries fairly hard to make sense of dubious possibilities. // // The aggressiveness is tunable, to a certain extent, using the below `options`. It's probably too aggressive for general text parsing (the "is 2" in "There is 2 much" becomes "Isa.2", for example). // Export to whatever the current context is. const root = this; class bcv_parser { static initClass() { this.prototype.s = ""; this.prototype.entities = []; this.prototype.passage = null; this.prototype.regexps = {}; // ## Main Options this.prototype.options = { // ### OSIS Output // * `combine`: "Matt 5, 6, 7" -> "Matt.5-Matt.7". // * `separate`: "Matt 5, 6, 7" -> "Matt.5,Matt.6,Matt.7". consecutive_combination_strategy: "combine", // * `b`: OSIS refs get reduced to the shortest possible. "Gen.1.1-Gen.50.26" and "Gen.1-Gen.50" -> "Gen", while "Gen.1.1-Gen.2.25" -> "Gen.1-Gen.2". // * `bc`: OSIS refs get reduced to complete chapters if possible, but not whole books. "Gen.1.1-Gen.50.26" -> "Gen.1-Gen.50". // * `bcv`: OSIS refs always include the full book, chapter, and verse. "Gen.1" -> "Gen.1.1-Gen.1.31". osis_compaction_strategy: "b", // ### Sequence // * `ignore`: ignore any books on their own in sequences ("Gen Is 1" -> "Isa.1"). // * `include`: any books that appear on their own get parsed according to `book_alone_strategy` ("Gen Is 1" means "Gen.1-Gen.50,Isa.1" if `book_alone_strategy` is `full` or `ignore`, or "Gen.1,Isa.1" if it's `first_chapter`). book_sequence_strategy: "ignore", // * `ignore`: "Matt 99, Gen 1" sequence index starts at the valid `Gen 1`. // * `include`: "Matt 99, Gen 1" sequence index starts at the invalid `Matt 99`. invalid_sequence_strategy: "ignore", // * `combine`: sequential references in the text are combined into a single comma-separated OSIS string: "Gen 1, 3" → `"Gen.1,Gen.3"`. // * `separate`: sequential references in the text are separated into an array of their component parts: "Gen 1, 3" → `["Gen.1", "Gen.3"]`. sequence_combination_strategy: "combine", // * `us`: commas separate sequences, periods separate chapters and verses. "Matt 1, 2. 4" → "Matt.1,Matt.2.4". // * `eu`: periods separate sequences, commas separate chapters and verses. "Matt 1, 2. 4" → "Matt.1.2,Matt.1.4". punctuation_strategy: "us", // ### Potentially Invalid Input // * `ignore`: Include only valid passages in `parsed_entities()`. // * `include`: Include invalid passages in `parsed_entities()` (they still don't have OSIS values). invalid_passage_strategy: "ignore", // * `ignore`: treat non-Latin digits the same as any other character. // * `replace`: replace non-Latin (0-9) numeric digits with Latin digits. This replacement occurs before any book substitution. non_latin_digits_strategy: "ignore", // * Include `b` in the string to validate book order ("Revelation to Genesis" is invalid). // * Include `c` in the string to validate chapter existence. If omitted, strings like "Genesis 51" (which doesn't exist) return as valid. Omitting `c` means that looking up full books will return `999` as the end chapter: "Genesis to Exodus" → "Gen.1-Exod.999". // * Include `v` in the string to validate verse existence. If omitted, strings like `Genesis 1:100` (which doesn't exist) return as valid. Omitting `v` means that looking up full chapters will return `999` as the end verse: "Genesis 1:2 to chapter 3" → "Gen.1.2-Gen.3.999". // * Tested values are `b`, `bc`, `bcv`, `bv`, `c`, `cv`, `v`, and `none`. In all cases, single-chapter books still respond as single-chapter books to allow treating strings like `Obadiah 2` as `Obad.1.2`. passage_existence_strategy: "bcv", // * `error`: zero chapters ("Matthew 0") are invalid. // * `upgrade`: zero chapters are upgraded to 1: "Matthew 0" -> "Matt.1". // Unlike `zero_verse_strategy`, chapter 0 isn't allowed. zero_chapter_strategy: "error", // * `error`: zero verses ("Matthew 5:0") are invalid. // * `upgrade`: zero verses are upgraded to 1: "Matthew 5:0" -> "Matt.5.1". // * `allow`: zero verses are kept as-is: "Matthew 5:0" -> "Matt.5.0". Some traditions use 0 for Psalm titles. zero_verse_strategy: "error", // * `chapter`: treat "Jude 1" as referring to the complete book of Jude: `Jude.1`. People almost always want this output when they enter this text in a search box. // * `verse`: treat "Jude 1" as referring to the first verse in Jude: `Jude.1.1`. If you're parsing specialized text that follows a style guide, you may want to set this option. single_chapter_1_strategy: "chapter", // ### Context // * `ignore`: any books that appear on their own don't get parsed as books ("Gen saw" doesn't trigger a match, but "Gen 1" does). // * `full`: any books that appear on their own get parsed as the complete book ("Gen" means "Gen.1-Gen.50"). // * `first_chapter`: any books that appear on their own get parsed as the first chapter ("Gen" means "Gen.1"). book_alone_strategy: "ignore", // * `ignore`: any books that appear on their own in a range are ignored ("Matt-Mark 2" means "Mark.2"). // * `include`: any books that appear on their own in a range are included as part of the range ("Matt-Mark 2" means "Matt.1-Mark.2", while "Matt 2-Mark" means "Matt.2-Mark.16"). book_range_strategy: "ignore", // * `delete`: remove any digits at the end of a sequence that are preceded by spaces and immediately followed by a `\w`: "Matt 5 1Hi" -> "Matt.5". This is better for text extraction. // * `include`: keep any digits at the end of a sequence that are preceded by spaces and immediately followed by a `\w`: "Matt 5 1Hi" -> "Matt.5.1". This is better for query parsing. captive_end_digits_strategy: "delete", // * `verse`: treat "Jer 33-11" as "Jer.33.11" (end before start) and "Heb 13-15" as "Heb.13.15" (end range too high). // * `sequence`: treat them as sequences. end_range_digits_strategy: "verse", // ### Apocrypha // Don't set this value directly; use the `include_apocrypha` or `set_options` functions. include_apocrypha: false, // `c`: treat references to Psalm 151 (if using the Apocrypha) as a chapter: "Psalm 151:1" -> "Ps.151.1" // `b`: treat references to Psalm 151 (if using the Apocrypha) as a book: "Psalm 151:1" -> "Ps151.1.1". Be aware that for ranges starting or ending in Psalm 151, you'll get two OSISes, regardless of the `sequence_combination_strategy`: "Psalms 149-151" -> "Ps.149-Ps.150,Ps151.1" Setting this option to `b` is the only way to correctly parse OSISes that treat `Ps151` as a book. ps151_strategy: "c", // ### Versification System // Don't set this value directly; use the `versification_system` or `set_options` functions. // * `default`: the default ESV-style versification. Also used in AMP and NASB. // * `ceb`: use CEB versification, which varies mostly in the Apocrypha. // * `kjv`: use KJV versification, with one fewer verse in 3John. Also used in NIV and NKJV. // `nab`: use NAB versification, which generally follows the Septuagint. // * `nlt`: use NLT versification, with one extra verse in Rev. Also used in NCV. // * `nrsv`: use NRSV versification. // * `vulgate`: use Vulgate (Greek) numbering for the Psalms. versification_system: "default", // ### Case Sensitivity // Don't use this value directly; use the `set_options` function. Changing this option repeatedly will slow down execution. // * `none`: All matches are case-insensitive. // * `books`: Book names are case-sensitive. Everything else is still case-insensitive. case_sensitive: "none" }; } // Remember default options for later use. constructor() { this.options = {}; for (var key of Object.keys(bcv_parser.prototype.options || {})) { var val = bcv_parser.prototype.options[key]; this.options[key] = val; } // If we've changed the versification system, make sure previous object invocations don't leak. this.versification_system(this.options.versification_system); } // ## Parse-Related Functions // Parse a string and prepare the object for further interrogation, depending on what's needed. parse(s) { this.reset(); this.s = s; // Replace any control characters already in the string. s = this.replace_control_characters(s); // Get a string representation suitable for passing to the parser. [s, this.passage.books] = Array.from(this.match_books(s)); // Replace potential BCVs one at a time to reduce processing time on long strings. [this.entities] = Array.from(this.match_passages(s)); // Allow chaining. return this; } // Parse a string and prepare the object for further interrogation, depending on what's needed. The second argument is a string that serves as the context for the first argument. If there's a valid partial match at the beginning of the first argument, then it will parse it using the supplied `context`. For example, `parse_string_with_context("verse 2", "Genesis 3").osis()` = `Gen.3.2`. You'd use this when you have some text that looks like it's a partial reference, and you already know the context. parse_with_context(s, context) { let entities; this.reset(); [context, this.passage.books] = Array.from(this.match_books(this.replace_control_characters(context))); [entities, context] = Array.from(this.match_passages(context)); this.reset(); this.s = s; // Replace any control characters already in the string. s = this.replace_control_characters(s); // Get a string representation suitable for passing to the parser. [s, this.passage.books] = Array.from(this.match_books(s)); this.passage.books.push({ value: "", parsed: [], start_index: 0, type: "context", context }); // Reconstruct the string, adding in the context. Because we've already called `match_books`, the resulting offsets will reflect the original string and not the new string. s = "\x1f" + (this.passage.books.length - 1) + "/9\x1f" + s; // Replace potential BCVs one at a time to reduce processing time on long strings. [this.entities] = Array.from(this.match_passages(s)); // Allow chaining. return this; } // If we have a new string to parse, reset any values from previous parses. reset() { this.s = ""; this.entities = []; if (this.passage) { this.passage.books = []; return this.passage.indices = {}; } else { this.passage = new bcv_passage; this.passage.options = this.options; return this.passage.translations = this.translations; } } // ## Options-Related Functions // Override default options. set_options(options) { for (var key of Object.keys(options || {})) { // The drawback with this approach is that calling `include_apocrypha`, `versification_system`, and `case_sensitive` could regenerate `@regexps.books` three times. var val = options[key]; if ((key === "include_apocrypha") || (key === "versification_system") || (key === "case_sensitive")) { this[key](val); } else { this.options[key] = val; } } return this; } // Whether to use books and abbreviations from the Apocrypha. Takes a boolean argument: `true` to include the Apocrypha and `false` to not. Defaults to `false`. Returns the `bcv_parser` object. include_apocrypha(arg) { if ((arg == null) || ((arg !== true) && (arg !== false))) { return this; } this.options.include_apocrypha = arg; this.regexps.books = this.regexps.get_books(arg, this.options.case_sensitive); for (var translation of Object.keys(this.translations || {})) { if ((translation === "aliases") || (translation === "alternates")) { continue; } // If the `Ps` array in the `chapters` object doesn't exist, create it so that we can add Ps 151 to the end of it. if (this.translations[translation].chapters == null) { this.translations[translation].chapters = {}; } if (this.translations[translation].chapters["Ps"] == null) { this.translations[translation].chapters["Ps"] = bcv_utils.shallow_clone_array(this.translations["default"].chapters["Ps"]); } // Add Ps 151 to the end of Psalms. The assumption here is that Ps151 always only is one chapter long. if (arg === true) { var verse_count; if (this.translations[translation].chapters["Ps151"] != null) { verse_count = this.translations[translation].chapters["Ps151"][0]; } else { verse_count = this.translations["default"].chapters["Ps151"][0]; } this.translations[translation].chapters["Ps"][150] = verse_count; // Remove Ps 151 from the end of Psalms. } else { if (this.translations[translation].chapters["Ps"].length === 151) { this.translations[translation].chapters["Ps"].pop(); } } } return this; } // Use an alternate versification system. Takes a string argument; the built-in options are: `default` to use KJV-style versification and `vulgate` to use the Vulgate (Greek) Psalm numbering. English offers several other versification systems; see the Readme for details. versification_system(system) { let book, chapter_list; if ((system == null) || (this.translations[system] == null)) { return this; } // If we've already changed the `versification_system` once, we need to do some cleanup before we change it to something else. if (this.translations.alternates.default != null) { // If we're changing to the default from something else, make sure we reset it to the correct values. if (system === "default") { if (this.translations.alternates.default.order != null) { this.translations.default.order = bcv_utils.shallow_clone(this.translations.alternates.default.order); } for (book of Object.keys(this.translations.alternates.default.chapters || {})) { chapter_list = this.translations.alternates.default.chapters[book]; this.translations.default.chapters[book] = bcv_utils.shallow_clone_array(chapter_list); } // Make sure the `versification_system` is reset to the default before applying any changes--alternate systems only include differences from the default. } else { this.versification_system("default"); } } if (this.translations.alternates.default == null) { this.translations.alternates.default = { order: null, chapters: {} }; } // If we're updating the book order (e.g., to mix the Apocrypha into the Old Testament)... if ((system !== "default") && (this.translations[system].order != null)) { // Save the existing default order so we can get it back later if necessary. We want to do everything nondestructively. if (this.translations.alternates.default.order == null) { this.translations.alternates.default.order = bcv_utils.shallow_clone(this.translations.default.order); } // The `order` key should always contain the full order; too many things can go wrong if we try to merge the old order and the new one. this.translations.default.order = bcv_utils.shallow_clone(this.translations[system].order); } // If we're updating the number of chapters in a book or the number of verses in a chapter... if ((system !== "default") && (this.translations[system].chapters != null)) { // Loop through only the books that are changing. for (book of Object.keys(this.translations[system].chapters || {})) { // Save the existing default order so we can get it back later. Only set it the first time. chapter_list = this.translations[system].chapters[book]; if (this.translations.alternates.default.chapters[book] == null) { this.translations.alternates.default.chapters[book] = bcv_utils.shallow_clone_array(this.translations.default.chapters[book]); } this.translations.default.chapters[book] = bcv_utils.shallow_clone_array(chapter_list); } } // Depending on the order of operations, the cloned list could be inconsistent with the current state. For example, if we called `versification_system`, we've cached 150 Psalms. If we then call `include_apocrypha(true)`, we now have 151 Psalms. If we then call `versification_system` again, we're back, incorrectly, to 150 Psalms because that's what was cached. this.options.versification_system = system; this.include_apocrypha(this.options.include_apocrypha); return this; } // Whether to treat books as case-sensitive. Valid values are `none` and `books`. case_sensitive(arg) { if ((arg == null) || ((arg !== "none") && (arg !== "books"))) { return this; } // If nothing is changing, don't bother continuing if (arg === this.options.case_sensitive) { return this; } this.options.case_sensitive = arg; this.regexps.books = this.regexps.get_books(this.options.include_apocrypha, arg); return this; } // ## Administrative Functions // Return translation information so that we don't have to reach into semi-private objects to grab the data we need. translation_info(new_translation) { let book; if (new_translation == null) { new_translation = "default"; } if ((new_translation != null) && ((this.translations.aliases[new_translation] != null ? this.translations.aliases[new_translation].alias : undefined) != null)) { new_translation = this.translations.aliases[new_translation].alias; } if ((new_translation == null) || (this.translations[new_translation] == null)) { new_translation = "default"; } const old_translation = this.options.versification_system; if (new_translation !== old_translation) { this.versification_system(new_translation); } const out = { alias: new_translation, books: [], chapters: {}, order: bcv_utils.shallow_clone(this.translations.default.order) }; for (book of Object.keys(this.translations.default.chapters || {})) { var chapter_list = this.translations.default.chapters[book]; out.chapters[book] = bcv_utils.shallow_clone_array(chapter_list); } for (book of Object.keys(out.order || {})) { var id = out.order[book]; out.books[id - 1] = book; } if (new_translation !== old_translation) { this.versification_system(old_translation); } return out; } // ## Parsing-Related Functions // Replace control characters and spaces since we replace books with a specific character pattern. The string changes, but the length stays the same so that indices remain valid. If we want to use Latin numbers rather than non-Latin ones, replace them here. replace_control_characters(s) { s = s.replace(this.regexps.control, " "); s = s.replace(/\uFF1A/g, ":"); s = s.replace(/\uFF1B/g, ";"); if (this.options.non_latin_digits_strategy === "replace") { s = s.replace(/[٠۰߀०০੦૦୦0౦೦൦๐໐༠၀႐០᠐᥆᧐᪀᪐᭐᮰᱀᱐꘠꣐꤀꧐꩐꯰０]/g, "0"); s = s.replace(/[١۱߁१১੧૧୧௧౧೧൧๑໑༡၁႑១᠑᥇᧑᪁᪑᭑᮱᱁᱑꘡꣑꤁꧑꩑꯱１]/g, "1"); s = s.replace(/[٢۲߂२২੨૨୨௨౨೨൨๒໒༢၂႒២᠒᥈᧒᪂᪒᭒᮲᱂᱒꘢꣒꤂꧒꩒꯲２]/g, "2"); s = s.replace(/[٣۳߃३৩੩૩୩௩౩೩൩๓໓༣၃႓៣᠓᥉᧓᪃᪓᭓᮳᱃᱓꘣꣓꤃꧓꩓꯳３]/g, "3"); s = s.replace(/[٤۴߄४৪੪૪୪௪౪೪൪๔໔༤၄႔៤᠔᥊᧔᪄᪔᭔᮴᱄᱔꘤꣔꤄꧔꩔꯴４]/g, "4"); s = s.replace(/[٥۵߅५৫੫૫୫௫౫೫൫๕໕༥၅႕៥᠕᥋᧕᪅᪕᭕᮵᱅᱕꘥꣕꤅꧕꩕꯵５]/g, "5"); s = s.replace(/[٦۶߆६৬੬૬୬௬౬೬൬๖໖༦၆႖៦᠖᥌᧖᪆᪖᭖᮶᱆᱖꘦꣖꤆꧖꩖꯶６]/g, "6"); s = s.replace(/[٧۷߇७৭੭૭୭௭౭೭൭๗໗༧၇႗៧᠗᥍᧗᪇᪗᭗᮷᱇᱗꘧꣗꤇꧗꩗꯷７]/g, "7"); s = s.replace(/[٨۸߈८৮੮૮୮௮౮೮൮๘໘༨၈႘៨᠘᥎᧘᪈᪘᭘᮸᱈᱘꘨꣘꤈꧘꩘꯸８]/g, "8"); s = s.replace(/[٩۹߉९৯੯૯୯௯౯೯൯๙໙༩၉႙៩᠙᥏᧙᪉᪙᭙᮹᱉᱙꘩꣙꤉꧙꩙꯹９]/g, "9"); } return s; } // Find and replace instances of Bible books. match_books(s) { const books = []; // Replace all book strings. for (var book of Array.from(this.regexps.books)) { var has_replacement = false; // Using array concatenation instead of replacing text directly didn't offer performance improvements in tests of the approach. s = s.replace(book.regexp, function (full, prev, bk) { has_replacement = true; // `value` contains the raw string; `book.osis` is the osis value for the book. books.push({ value: bk, parsed: book.osis, type: "book" }); const extra = (book.extra != null) ? `/${book.extra}` : ""; return `${prev}\x1f${books.length - 1}${extra}\x1f`; }); // If we've already replaced all possible books in the string, we don't need to check any further. if ((has_replacement === true) && /^[\s\x1f\d:.,;\-\u2013\u2014]+$/.test(s)) { break; } } // Replace translations. s = s.replace(this.regexps.translations, function (match) { books.push({ value: match, parsed: match.toLowerCase(), type: "translation" }); return `\x1e${books.length - 1}\x1e`; }); return [s, this.get_book_indices(books, s)]; } // Get the string index for all the books / translations, adding the start index as a new key. get_book_indices(books, s) { let match; let add_index = 0; const re = new RegExp(`\ ([\\x1f\\x1e])\ (\\d+)\ (?:/\\d+)?\ \\1\ `, 'g'); while ((match = re.exec(s))) { // Keep track of the actual start index. books[match[2]].start_index = match.index + add_index; // Add the difference between the real length of the book and what we replaced it with (`match[0]` is the replacement). add_index += books[match[2]].value.length - match[0].length; } return books; } // Create an array of all the potential bcv matches in the string. match_passages(s) { let match; let entities = []; let post_context = {}; while ((match = this.regexps.escaped_passage.exec(s))) { // * `match[0]` includes the preceding character (if any) for bounding. // * `match[1]` is the full match minus the character preceding the match used for bounding. // * `match[2]` is the book id. let accum; var [full, part, book_id] = Array.from(match); // Adjust the `index` to use the `part` offset rather than the `full` offset. We use it below for `captive_end_digits`. var original_part_length = part.length; match.index += full.length - original_part_length; // Remove most three+-character digits at the end; they won't match. if ((/\s[2-9]\d\d\s*$|\s\d{4,}\s*$/).test(part)) { part = part.replace(/\s+\d+\s*$/, ""); } // Clean up the end of the match to avoid irrelevant context. if (!/[\d\x1f\x1e)]$/.test(part)) { // Remove superfluous characters from the end of the match. part = this.replace_match_end(part); } if (this.options.captive_end_digits_strategy === "delete") { // If the match ends with a space+digit and is immediately followed by a word character, ignore the space+digit: `Matt 1, 2Text`. var next_char = match.index + part.length; if ((s.length > next_char) && /^\w/.test(s.substr(next_char, 1))) { part = part.replace(/[\s*]+\d+$/, ""); } // If the match ends with a translation indicator, remove any numbers afterward. This situation generally occurs in cases like, "Ps 1:1 ESV 1 Blessed is...", where the final `1` is a verse number that's part of the text. part = part.replace(/(\x1e[)\]]?)[\s*]*\d+$/, "$1"); } // Though PEG.js doesn't have to be case-sensitive, using the case-insensitive feature involves some repeated processing. By lower-casing here, we only pay the cost once. The grammar for words like "also" is case-sensitive; we can safely lowercase ascii letters without changing indices. We don't just call .toLowerCase() because it could affect the length of the string if it contains certain characters; maintaining the indices is the most important thing. part = part.replace(/[A-Z]+/g, capitals => capitals.toLowerCase()); // If we're in a chapter-book situation, the first character won't be a book control character, which would throw off the `start_index`. var start_index_adjust = part.substr(0, 1) === "\x1f" ? 0 : part.split("\x1f")[0].length; // * `match` is important for the length and whether it contains control characters, neither of which we've changed inconsistently with the original string. The `part` may be shorter than originally matched, but that's only to remove unneeded characters at the end. // * `grammar` is the external PEG parser. The `@options.punctuation_strategy` determines which punctuation is used for sequences and `cv` separators. var passage = { value: grammar.parse(part, { punctuation_strategy: this.options.punctuation_strategy }), type: "base", start_index: this.passage.books[book_id].start_index - start_index_adjust, match: part }; // Are we looking at a single book on its own that could be part of a range like "1-2 Sam"? if ((this.options.book_alone_strategy === "full") && (this.options.book_range_strategy === "include") && (passage.value[0].type === "b") && // Either it's on its own or a translation sequence follows it, making it effectively on its own. ((passage.value.length === 1) || ((passage.value.length > 1) && (passage.value[1].type === "translation_sequence"))) && (start_index_adjust === 0) && ((this.passage.books[book_id].parsed.length === 1) || ((this.passage.books[book_id].parsed.length > 1) && (this.passage.books[book_id].parsed[1].type === "translation"))) && /^[234]/.test(this.passage.books[book_id].parsed[0])) { this.create_book_range(s, passage, book_id); } // Handle each passage individually to prevent context leakage (e.g., translations back-propagating through unrelated entities). [accum, post_context] = Array.from(this.passage.handle_obj(passage)); entities = entities.concat(accum); // Move the next RegExp iteration to start earlier if we didn't use everything we thought we were going to. var regexp_index_adjust = this.adjust_regexp_end(accum, original_part_length, part.length); if (regexp_index_adjust > 0) { this.regexps.escaped_passage.lastIndex -= regexp_index_adjust; } } return [entities, post_context]; } // Handle the objects returned from the grammar to produce entities for further processing. We may need to adjust the `RegExp.lastIndex` if we discarded characters from the end of the match or if, after parsing, we're ignoring some of them--especially with ending parenthetical statements like "Luke 8:1-3; 24:10 (and Matthew 14:1-12 and Luke 23:7-12 for background)". adjust_regexp_end(accum, old_length, new_length) { let regexp_index_adjust = 0; if (accum.length > 0) { // `accum` uses an off-by-one end index compared to the RegExp object. "and Psa3" means `lastIndex` = 8, `old_length` and `new_length` are both 4 (omitting "and " and leaving "Psa3"), and the `accum` end index is 3. We end up with 4 - 3 - 1 = 0, or no adjustment. Compare "and Psa3 and", where the last " and" is originally considered part of the regexp. In this case, `regexp_index_adjust` is 4: 8 ("Psa3 and") - 3 ("Psa3") - 1. regexp_index_adjust = old_length - accum[accum.length - 1].indices[1] - 1; } else if (old_length !== new_length) { regexp_index_adjust = old_length - new_length; } return regexp_index_adjust; } // Remove unnecessary characters from the end of the match. replace_match_end(part) { // Split the string on valid ending characters. Remove whatever's leftover at the end of the string. It would be easier to do `part.split(@regexps.match_end_split).pop()`, but IE doesn't handle empty strings at the end. let match; let remove = part.length; while ((match = this.regexps.match_end_split.exec(part))) { remove = match.index + match[0].length; } if (remove < part.length) { part = part.substr(0, remove); } return part; } // If a book is on its own, check whether it's preceded by something that indicates it's a book range like "1-2 Samuel". create_book_range(s, passage, book_id) { const cases = [bcv_parser.prototype.regexps.first, bcv_parser.prototype.regexps.second, bcv_parser.prototype.regexps.third]; const limit = parseInt(this.passage.books[book_id].parsed[0].substr(0, 1), 10); for (let i = 1, end = limit, asc = 1 <= end; asc ? i < end : i > end; asc ? i++ : i--) { var range_regexp = i === (limit - 1) ? bcv_parser.prototype.regexps.range_and : bcv_parser.prototype.regexps.range_only; var prev = s.match(new RegExp(`(?:^|\\W)(${cases[i - 1]}\\s*${range_regexp}\\s*)\\x1f${book_id}\\x1f`, 'i')); if (prev != null) { return this.add_book_range_object(passage, prev, i); } } return false; } // Create a fake object that can be parsed to show the correct result. add_book_range_object(passage, prev, start_book_number) { const { length } = prev[1]; passage.value[0] = { type: "b_range_pre", value: [{ type: "b_pre", value: start_book_number.toString(), indices: [prev.index, prev.index + length] }, passage.value[0]], indices: [0, passage.value[0].indices[1] + length] }; // Adjust the indices of the original result so they reflect the new content. passage.value[0].value[1].indices[0] += length; passage.value[0].value[1].indices[1] += length; // These two are the most important ones; the `absolute_indices` function uses them. passage.start_index -= length; passage.match = prev[1] + passage.match; if (passage.value.length === 1) { return; } // If there are subsequent objects, also adjust their offsets. return (() => { const result = []; for (let i = 1, end = passage.value.length, asc = 1 <= end; asc ? i < end : i > end; asc ? i++ : i--) { if (passage.value[i].value == null) { continue; } // If it's an `integer` type, `passage.value[i].value` is a scalar rather than an object, so we only need to adjust the indices for the top-level object. if ((passage.value[i].value[0] != null ? passage.value[i].value[0].indices : undefined) != null) { passage.value[i].value[0].indices[0] += length; passage.value[i].value[0].indices[1] += length; } passage.value[i].indices[0] += length; result.push(passage.value[i].indices[1] += length); } return result; })(); } // ## Output-Related Functions // Return a single OSIS string (comma-separated) for all the references in the whole input string. osis() { const out = []; for (var osis of Array.from(this.parsed_entities())) { if (osis.osis.length > 0) { out.push(osis.osis); } } return out.join(","); } // Return an array of `[OSIS, TRANSLATIONS]` for each reference (combined according to `options`). osis_and_translations() { const out = []; for (var osis of Array.from(this.parsed_entities())) { if (osis.osis.length > 0) { out.push([osis.osis, osis.translations.join(",")]); } } return out; } // Return an array of `{osis: OSIS, indices:[START, END], translations: [TRANSLATIONS]}` objects for each reference (combined according to `options`). osis_and_indices() { const out = []; for (var osis of Array.from(this.parsed_entities())) { if (osis.osis.length > 0) { const entry = { osis: osis.osis, translations: osis.translations, indices: osis.indices }; if (osis.osises != null) { entry.osises = osis.osises; } out.push(entry); } } return out; } // Return all objects, probably for additional processing. parsed_entities() { let out = []; for (let entity_id = 0, end = this.entities.length, asc = 0 <= end; asc ? entity_id < end : entity_id > end; asc ? entity_id++ : entity_id--) { var entity = this.entities[entity_id]; // Be sure to include any translation identifiers in the indices we report back, but only if the translation immediately follows the previous entity. if (entity.type && (entity.type === "translation_sequence") && (out.length > 0) && (entity_id === (out[out.length - 1].entity_id + 1))) { out[out.length - 1].indices[1] = entity.absolute_indices[1]; } if (entity.passages == null) { continue; } // Do not reparse semicolon segments here; preserve original context and sequence behavior. if (((entity.type === "b") && (this.options.book_alone_strategy === "ignore")) || ((entity.type === "b_range") && (this.options.book_range_strategy === "ignore")) || (entity.type === "context")) { continue; } // A given entity, even if part of a sequence, always only has one set of translations associated with it. var translations = []; var translation_alias = null; if (entity.passages[0].translations != null) { for (var translation of Array.from(entity.passages[0].translations)) { var translation_osis = (translation.osis != null ? translation.osis.length : undefined) > 0 ? translation.osis : ""; if (translation_alias == null) { translation_alias = translation.alias; } translations.push(translation_osis); } } else { translations = [""]; translation_alias = "default"; } var osises = []; var nonCombinableOsises = []; var { length } = entity.passages; for (var j = 0, i = j, end1 = length, asc1 = 0 <= end1; asc1 ? j < end1 : j > end1; asc1 ? j++ : j--, i = j) { var passage = entity.passages[i]; // The `type` is usually only set in a sequence. if (passage.type == null) { passage.type = entity.type; } if (passage.valid.valid === false) { if ((this.options.invalid_sequence_strategy === "ignore") && (entity.type === "sequence")) { this.snap_sequence("ignore", entity, osises, i, length); } // Stop here if we're ignoring invalid passages. if (this.options.invalid_passage_strategy === "ignore") { continue; } } // If indicated in `@options`, exclude stray start/end books, resetting the parent indices as needed. if (((passage.type === "b") || (passage.type === "b_range")) && (this.options.book_sequence_strategy === "ignore") && (entity.type === "sequence")) { this.snap_sequence("book", entity, osises, i, length); continue; } if (((passage.type === "b_range_start") || (passage.type === "range_end_b")) && (this.options.book_range_strategy === "ignore")) { this.snap_range(entity, i); } if (passage.absolute_indices == null) { passage.absolute_indices = entity.absolute_indices; } if ((this.options.consecutive_combination_strategy === 'separate-chapters') && (passage.start.c !== passage.end.c)) { osises.push({ osis: passage.valid.valid ? this.to_osis(passage.start, passage.end, translation_alias) : "", osises: this.get_osises_by_chapter({ osis: "", start: passage.start, end: passage.end }, translation_alias), type: passage.type, indices: passage.absolute_indices, translations, start: passage.start, end: passage.end, enclosed_indices: passage.enclosed_absolute_indices, entity_id, entities: [passage] }); } else { osises.push({ osis: passage.valid.valid ? this.to_osis(passage.start, passage.end, translation_alias) : "", type: passage.type, indices: passage.absolute_indices, translations, start: passage.start, end: passage.end, enclosed_indices: passage.enclosed_absolute_indices, entity_id, entities: [passage] }); } } // Don't return an empty object. if ((osises.length === 0) && (nonCombinableOsises.length === 0)) { continue; } if ((this.options.consecutive_combination_strategy === "combine") || (this.options.consecutive_combination_strategy === "separate-chapters")) { if (osises.length > 1) { osises = this.combine_consecutive_passages(osises, translation_alias); } osises = osises.concat(nonCombinableOsises).sort((osis1, osis2) => osis1.indices[0] - osis2.indices[0]); } if (this.options.consecutive_combination_strategy === "separate-chapters") { for (osis of Array.from(osises)) { if (osis.osis.length === 0) { continue; } const osises_by_chapter = this.get_osises_by_chapter(osis, translation_alias); if (osises_by_chapter.length > 1) { osis.osises = osises_by_chapter; } } } // Add the osises array to the existing array. if (this.options.sequence_combination_strategy === "separate") { out = out.concat(osises); // Add the OSIS string and some data to the array. } else { if ((this.options.consecutive_combination_strategy === "separate-chapters") && (entity.type === "sequence")) { const groups = this.group_osises_by_semicolon(osises); if (groups.length > 1) { for (const group of Array.from(groups)) { if (group.length === 0) { continue; } if (group.length > 1) { for (osis of Array.from(group)) { if (osis.osis.length === 0) { continue; } const entry = { osis: osis.osis, indices: osis.indices, translations, entity_id, entities: [osis] }; if (osis.osises != null) { entry.osises = osis.osises; } out.push(entry); } continue; } const group_last_i = group.length - 1; const indices = [group[0].indices[0], group[group_last_i].indices[1]]; if ((group[group_last_i].enclosed_indices != null) && (group[group_last_i].enclosed_indices[1] >= 0)) { indices[1] = group[group_last_i].enclosed_indices[1]; } const strings = []; let osises_by_chapter = []; for (osis of Array.from(group)) { if (osis.osis.length > 0) { strings.push(osis.osis); } if (osis.osises != null) { osises_by_chapter = osises_by_chapter.concat(osis.osises); } } const out_entry = { osis: strings.join(","), indices, translations, entity_id, entities: group }; if (osises_by_chapter.length > 0) { out_entry.osises = osises_by_chapter; } out.push(out_entry); } continue; } } var osis; var strings = []; var last_i = osises.length - 1; // Adjust the end index to match a closing parenthesis when presented with `enclosed` entities. These entities always start mid-sequence (unless there's a book we're ignoring), so we don't need to worry about the start index. if ((osises[last_i].enclosed_indices != null) && (osises[last_i].enclosed_indices[1] >= 0)) { entity.absolute_indices[1] = osises[last_i].enclosed_indices[1]; } for (osis of Array.from(osises)) { if (osis.osis.length > 0) { strings.push(osis.osis); } } let osises_by_chapter = []; if (this.options.consecutive_combination_strategy === "separate-chapters") { for (osis of Array.from(osises)) { if (osis.osises != null) { osises_by_chapter = osises_by_chapter.concat(osis.osises); } } } const out_entry = { osis: strings.join(","), indices: entity.absolute_indices, translations, entity_id, entities: osises }; if (osises_by_chapter.length > 0) { out_entry.osises = osises_by_chapter; } out.push(out_entry); } } return out; } to_osis(start, end, translation) { // If it's just a book on its own, how we deal with it depends on whether we want to return just the first chapter or the complete book. let out; if ((end.c == null) && (end.v == null) && (start.b === end.b) && (start.c == null) && (start.v == null) && (this.options.book_alone_strategy === "first_chapter")) { end.c = 1; } const osis = { start: "", end: "" }; // If no start chapter or verse, assume the first possible. if (start.c == null) { start.c = 1; } if (start.v == null) { start.v = 1; } // If no end chapter or verse, assume the last possible. If it's a single-chapter book, always use the first chapter for consistency with other `passage_existence_strategy` results (which do respect the single-chapter length). if (end.c == null) { if ((this.options.passage_existence_strategy.indexOf("c") >= 0) || ((this.passage.translations[translation].chapters[end.b] != null) && (this.passage.translations[translation].chapters[end.b].length === 1))) { end.c = this.passage.translations[translation].chapters[end.b].length; } else { end.c = 999; } } if (end.v == null) { if ((this.passage.translations[translation].chapters[end.b][end.c - 1] != null) && (this.options.passage_existence_strategy.indexOf("v") >= 0)) { end.v = this.passage.translations[translation].chapters[end.b][end.c - 1]; } else { end.v = 999; } } if (this.options.include_apocrypha && (this.options.ps151_strategy === "b") && (((start.c === 151) && (start.b === "Ps")) || ((end.c === 151) && (end.b === "Ps")))) { this.fix_ps151(start, end, translation); } // If it's a complete book or range of complete books and we want the shortest possible OSIS, return just the book names. The `end.c` and `end.v` equaling 999 is for when the `passage_existence_strategy` sets them to 999, indicating that we should treat it as a complete book or chapter. if ((this.options.osis_compaction_strategy === "b") && (start.c === 1) && (start.v === 1) && (((end.c === 999) && (end.v === 999)) || ((end.c === this.passage.translations[translation].chapters[end.b].length) && (this.options.passage_existence_strategy.indexOf("c") >= 0) && ((end.v === 999) || ((end.v === this.passage.translations[translation].chapters[end.b][end.c - 1]) && (this.options.passage_existence_strategy.indexOf("v") >= 0)))))) { osis.start = start.b; osis.end = end.b; // If it's a complete chapter or range of complete chapters and we want a short OSIS, return just the books and chapters. We only care when `osis_compaction_strategy` isn't `bcv` (i.e., length 3) because `bcv` is always fully specified. } else if ((this.options.osis_compaction_strategy.length <= 2) && (start.v === 1) && ((end.v === 999) || ((end.v === this.passage.translations[translation].chapters[end.b][end.c - 1]) && (this.options.passage_existence_strategy.indexOf("v") >= 0)))) { osis.start = start.b + "." + start.c.toString(); osis.end = end.b + "." + end.c.toString(); // Otherwise, return the full BCV reference for both. } else { osis.start = start.b + "." + start.c.toString() + "." + start.v.toString(); osis.end = end.b + "." + end.c.toString() + "." + end.v.toString(); } // If it's the same verse ("Gen.1.1-Gen.1.1"), chapter ("Gen.1-Gen.1") or book ("Gen-Gen"), return just the