UNPKG

bible-ref-parse

Version:

Identifies and parses Bible references (like John 3:16) in over 40 languages.

933 lines (898 loc) 314 kB
(function() { // This class takes a string and identifies Bible passage references in that string. It's designed to handle how people actually type Bible passages and tries fairly hard to make sense of dubious possibilities. // The aggressiveness is tunable, to a certain extent, using the below `options`. It's probably too aggressive for general text parsing (the "is 2" in "There is 2 much" becomes "Isa.2", for example). // Export to whatever the current context is. var bcv_parser, bcv_passage, bcv_utils, root, hasProp = {}.hasOwnProperty; root = this; bcv_parser = (function() { class bcv_parser { // Remember default options for later use. constructor() { var key, ref, val; this.options = {}; ref = bcv_parser.prototype.options; for (key in ref) { if (!hasProp.call(ref, key)) continue; val = ref[key]; this.options[key] = val; } // If we've changed the versification system, make sure previous object invocations don't leak. this.versification_system(this.options.versification_system); } // ## Parse-Related Functions // Parse a string and prepare the object for further interrogation, depending on what's needed. parse(s) { this.reset(); this.s = s; // Replace any control characters already in the string. s = this.replace_control_characters(s); // Get a string representation suitable for passing to the parser. [s, this.passage.books] = this.match_books(s); // Replace potential BCVs one at a time to reduce processing time on long strings. [this.entities] = this.match_passages(s); return this; } // Parse a string and prepare the object for further interrogation, depending on what's needed. The second argument is a string that serves as the context for the first argument. If there's a valid partial match at the beginning of the first argument, then it will parse it using the supplied `context`. For example, `parse_string_with_context("verse 2", "Genesis 3").osis()` = `Gen.3.2`. You'd use this when you have some text that looks like it's a partial reference, and you already know the context. parse_with_context(s, context) { var entities; this.reset(); [context, this.passage.books] = this.match_books(this.replace_control_characters(context)); [entities, context] = this.match_passages(context); this.reset(); this.s = s; // Replace any control characters already in the string. s = this.replace_control_characters(s); // Get a string representation suitable for passing to the parser. [s, this.passage.books] = this.match_books(s); this.passage.books.push({ value: "", parsed: [], start_index: 0, type: "context", context: context }); // Reconstruct the string, adding in the context. Because we've already called `match_books`, the resulting offsets will reflect the original string and not the new string. s = "\x1f" + (this.passage.books.length - 1) + "/9\x1f" + s; // Replace potential BCVs one at a time to reduce processing time on long strings. [this.entities] = this.match_passages(s); return this; } // If we have a new string to parse, reset any values from previous parses. reset() { this.s = ""; this.entities = []; if (this.passage) { this.passage.books = []; return this.passage.indices = {}; } else { this.passage = new bcv_passage(); this.passage.options = this.options; return this.passage.translations = this.translations; } } // ## Options-Related Functions // Override default options. set_options(options) { var key, val; for (key in options) { if (!hasProp.call(options, key)) continue; val = options[key]; // The drawback with this approach is that calling `include_apocrypha`, `versification_system`, and `case_sensitive` could regenerate `@regexps.books` three times. if (key === "include_apocrypha" || key === "versification_system" || key === "case_sensitive") { this[key](val); } else { this.options[key] = val; } } return this; } // Whether to use books and abbreviations from the Apocrypha. Takes a boolean argument: `true` to include the Apocrypha and `false` to not. Defaults to `false`. Returns the `bcv_parser` object. include_apocrypha(arg) { var base, base1, ref, translation, verse_count; if (!((arg != null) && (arg === true || arg === false))) { return this; } this.options.include_apocrypha = arg; this.regexps.books = this.regexps.get_books(arg, this.options.case_sensitive); ref = this.translations; for (translation in ref) { if (!hasProp.call(ref, translation)) continue; if (translation === "aliases" || translation === "alternates") { continue; } // If the `Ps` array in the `chapters` object doesn't exist, create it so that we can add Ps 151 to the end of it. if ((base = this.translations[translation]).chapters == null) { base.chapters = {}; } if ((base1 = this.translations[translation].chapters)["Ps"] == null) { base1["Ps"] = bcv_utils.shallow_clone_array(this.translations["default"].chapters["Ps"]); } // Add Ps 151 to the end of Psalms. The assumption here is that Ps151 always only is one chapter long. if (arg === true) { if (this.translations[translation].chapters["Ps151"] != null) { verse_count = this.translations[translation].chapters["Ps151"][0]; } else { verse_count = this.translations["default"].chapters["Ps151"][0]; } this.translations[translation].chapters["Ps"][150] = verse_count; } else { if (this.translations[translation].chapters["Ps"].length === 151) { // Remove Ps 151 from the end of Psalms. this.translations[translation].chapters["Ps"].pop(); } } } return this; } // Use an alternate versification system. Takes a string argument; the built-in options are: `default` to use KJV-style versification and `vulgate` to use the Vulgate (Greek) Psalm numbering. English offers several other versification systems; see the Readme for details. versification_system(system) { var base, base1, base2, book, chapter_list, ref, ref1; if (!((system != null) && (this.translations[system] != null))) { return this; } // If we've already changed the `versification_system` once, we need to do some cleanup before we change it to something else. if (this.translations.alternates.default != null) { // If we're changing to the default from something else, make sure we reset it to the correct values. if (system === "default") { if (this.translations.alternates.default.order != null) { this.translations.default.order = bcv_utils.shallow_clone(this.translations.alternates.default.order); } ref = this.translations.alternates.default.chapters; for (book in ref) { if (!hasProp.call(ref, book)) continue; chapter_list = ref[book]; this.translations.default.chapters[book] = bcv_utils.shallow_clone_array(chapter_list); } } else { // Make sure the `versification_system` is reset to the default before applying any changes--alternate systems only include differences from the default. this.versification_system("default"); } } if ((base = this.translations.alternates).default == null) { base.default = { order: null, chapters: {} }; } // If we're updating the book order (e.g., to mix the Apocrypha into the Old Testament)... if (system !== "default" && (this.translations[system].order != null)) { // Save the existing default order so we can get it back later if necessary. We want to do everything nondestructively. if ((base1 = this.translations.alternates.default).order == null) { base1.order = bcv_utils.shallow_clone(this.translations.default.order); } // The `order` key should always contain the full order; too many things can go wrong if we try to merge the old order and the new one. this.translations.default.order = bcv_utils.shallow_clone(this.translations[system].order); } // If we're updating the number of chapters in a book or the number of verses in a chapter... if (system !== "default" && (this.translations[system].chapters != null)) { ref1 = this.translations[system].chapters; for (book in ref1) { if (!hasProp.call(ref1, book)) continue; chapter_list = ref1[book]; // Save the existing default order so we can get it back later. Only set it the first time. if ((base2 = this.translations.alternates.default.chapters)[book] == null) { base2[book] = bcv_utils.shallow_clone_array(this.translations.default.chapters[book]); } this.translations.default.chapters[book] = bcv_utils.shallow_clone_array(chapter_list); } } // Depending on the order of operations, the cloned list could be inconsistent with the current state. For example, if we called `versification_system`, we've cached 150 Psalms. If we then call `include_apocrypha(true)`, we now have 151 Psalms. If we then call `versification_system` again, we're back, incorrectly, to 150 Psalms because that's what was cached. this.options.versification_system = system; this.include_apocrypha(this.options.include_apocrypha); return this; } // Whether to treat books as case-sensitive. Valid values are `none` and `books`. case_sensitive(arg) { if (!((arg != null) && (arg === "none" || arg === "books"))) { return this; } if (arg === this.options.case_sensitive) { // If nothing is changing, don't bother continuing return this; } this.options.case_sensitive = arg; this.regexps.books = this.regexps.get_books(this.options.include_apocrypha, arg); return this; } // ## Administrative Functions // Return translation information so that we don't have to reach into semi-private objects to grab the data we need. translation_info(new_translation = "default") { var book, chapter_list, id, old_translation, out, ref, ref1, ref2; if ((new_translation != null) && (((ref = this.translations.aliases[new_translation]) != null ? ref.alias : void 0) != null)) { new_translation = this.translations.aliases[new_translation].alias; } if (!((new_translation != null) && (this.translations[new_translation] != null))) { new_translation = "default"; } old_translation = this.options.versification_system; if (new_translation !== old_translation) { this.versification_system(new_translation); } out = { alias: new_translation, books: [], chapters: {}, order: bcv_utils.shallow_clone(this.translations.default.order) }; ref1 = this.translations.default.chapters; for (book in ref1) { if (!hasProp.call(ref1, book)) continue; chapter_list = ref1[book]; out.chapters[book] = bcv_utils.shallow_clone_array(chapter_list); } ref2 = out.order; for (book in ref2) { if (!hasProp.call(ref2, book)) continue; id = ref2[book]; out.books[id - 1] = book; } if (new_translation !== old_translation) { this.versification_system(old_translation); } return out; } // ## Parsing-Related Functions // Replace control characters and spaces since we replace books with a specific character pattern. The string changes, but the length stays the same so that indices remain valid. If we want to use Latin numbers rather than non-Latin ones, replace them here. replace_control_characters(s) { s = s.replace(this.regexps.control, " "); if (this.options.non_latin_digits_strategy === "replace") { s = s.replace(/[٠۰߀०০੦૦୦0౦೦൦๐໐༠၀႐០᠐᥆᧐᪀᪐᭐᮰᱀᱐꘠꣐꤀꧐꩐꯰0]/g, "0"); s = s.replace(/[١۱߁१১੧૧୧௧౧೧൧๑໑༡၁႑១᠑᥇᧑᪁᪑᭑᮱᱁᱑꘡꣑꤁꧑꩑꯱1]/g, "1"); s = s.replace(/[٢۲߂२২੨૨୨௨౨೨൨๒໒༢၂႒២᠒᥈᧒᪂᪒᭒᮲᱂᱒꘢꣒꤂꧒꩒꯲2]/g, "2"); s = s.replace(/[٣۳߃३৩੩૩୩௩౩೩൩๓໓༣၃႓៣᠓᥉᧓᪃᪓᭓᮳᱃᱓꘣꣓꤃꧓꩓꯳3]/g, "3"); s = s.replace(/[٤۴߄४৪੪૪୪௪౪೪൪๔໔༤၄႔៤᠔᥊᧔᪄᪔᭔᮴᱄᱔꘤꣔꤄꧔꩔꯴4]/g, "4"); s = s.replace(/[٥۵߅५৫੫૫୫௫౫೫൫๕໕༥၅႕៥᠕᥋᧕᪅᪕᭕᮵᱅᱕꘥꣕꤅꧕꩕꯵5]/g, "5"); s = s.replace(/[٦۶߆६৬੬૬୬௬౬೬൬๖໖༦၆႖៦᠖᥌᧖᪆᪖᭖᮶᱆᱖꘦꣖꤆꧖꩖꯶6]/g, "6"); s = s.replace(/[٧۷߇७৭੭૭୭௭౭೭൭๗໗༧၇႗៧᠗᥍᧗᪇᪗᭗᮷᱇᱗꘧꣗꤇꧗꩗꯷7]/g, "7"); s = s.replace(/[٨۸߈८৮੮૮୮௮౮೮൮๘໘༨၈႘៨᠘᥎᧘᪈᪘᭘᮸᱈᱘꘨꣘꤈꧘꩘꯸8]/g, "8"); s = s.replace(/[٩۹߉९৯੯૯୯௯౯೯൯๙໙༩၉႙៩᠙᥏᧙᪉᪙᭙᮹᱉᱙꘩꣙꤉꧙꩙꯹9]/g, "9"); } return s; } // Find and replace instances of Bible books. match_books(s) { var book, books, has_replacement, k, len, ref; books = []; ref = this.regexps.books; // Replace all book strings. for (k = 0, len = ref.length; k < len; k++) { book = ref[k]; has_replacement = false; // Using array concatenation instead of replacing text directly didn't offer performance improvements in tests of the approach. s = s.replace(book.regexp, function(full, prev, bk) { var extra; has_replacement = true; // `value` contains the raw string; `book.osis` is the osis value for the book. books.push({ value: bk, parsed: book.osis, type: "book" }); extra = book.extra != null ? `/${book.extra}` : ""; return `${prev}\x1f${books.length - 1}${extra}\x1f`; }); // If we've already replaced all possible books in the string, we don't need to check any further. if (has_replacement === true && /^[\s\x1f\d:.,;\-\u2013\u2014]+$/.test(s)) { break; } } // Replace translations. s = s.replace(this.regexps.translations, function(match) { books.push({ value: match, parsed: match.toLowerCase(), type: "translation" }); return `\x1e${books.length - 1}\x1e`; }); return [s, this.get_book_indices(books, s)]; } // Get the string index for all the books / translations, adding the start index as a new key. get_book_indices(books, s) { var add_index, match, re; add_index = 0; re = /([\x1f\x1e])(\d+)(?:\/\d+)?\1/g; // opening book or translation // the number // optional extra identifier // closing delimeter while (match = re.exec(s)) { // Keep track of the actual start index. books[match[2]].start_index = match.index + add_index; // Add the difference between the real length of the book and what we replaced it with (`match[0]` is the replacement). add_index += books[match[2]].value.length - match[0].length; } return books; } // Create an array of all the potential bcv matches in the string. match_passages(s) { var accum, book_id, entities, full, match, next_char, original_part_length, part, passage, post_context, regexp_index_adjust, start_index_adjust; entities = []; post_context = {}; while (match = this.regexps.escaped_passage.exec(s)) { // * `match[0]` includes the preceding character (if any) for bounding. // * `match[1]` is the full match minus the character preceding the match used for bounding. // * `match[2]` is the book id. [full, part, book_id] = match; // Adjust the `index` to use the `part` offset rather than the `full` offset. We use it below for `captive_end_digits`. original_part_length = part.length; match.index += full.length - original_part_length; // Remove most three+-character digits at the end; they won't match. if (/\s[2-9]\d\d\s*$|\s\d{4,}\s*$/.test(part)) { part = part.replace(/\s+\d+\s*$/, ""); } // Clean up the end of the match to avoid irrelevant context. if (!/[\d\x1f\x1e)]$/.test(part)) { // Remove superfluous characters from the end of the match. part = this.replace_match_end(part); } if (this.options.captive_end_digits_strategy === "delete") { // If the match ends with a space+digit and is immediately followed by a word character, ignore the space+digit: `Matt 1, 2Text`. next_char = match.index + part.length; if (s.length > next_char && /^\w/.test(s.substr(next_char, 1))) { part = part.replace(/[\s*]+\d+$/, ""); } // If the match ends with a translation indicator, remove any numbers afterward. This situation generally occurs in cases like, "Ps 1:1 ESV 1 Blessed is...", where the final `1` is a verse number that's part of the text. part = part.replace(/(\x1e[)\]]?)[\s*]*\d+$/, "$1"); } // Though PEG.js doesn't have to be case-sensitive, using the case-insensitive feature involves some repeated processing. By lower-casing here, we only pay the cost once. The grammar for words like "also" is case-sensitive; we can safely lowercase ascii letters without changing indices. We don't just call .toLowerCase() because it could affect the length of the string if it contains certain characters; maintaining the indices is the most important thing. part = part.replace(/[A-Z]+/g, function(capitals) { return capitals.toLowerCase(); }); // If we're in a chapter-book situation, the first character won't be a book control character, which would throw off the `start_index`. start_index_adjust = part.substr(0, 1) === "\x1f" ? 0 : part.split("\x1f")[0].length; // * `match` is important for the length and whether it contains control characters, neither of which we've changed inconsistently with the original string. The `part` may be shorter than originally matched, but that's only to remove unneeded characters at the end. // * `grammar` is the external PEG parser. The `@options.punctuation_strategy` determines which punctuation is used for sequences and `cv` separators. passage = { value: grammar.parse(part, { punctuation_strategy: this.options.punctuation_strategy }), type: "base", start_index: this.passage.books[book_id].start_index - start_index_adjust, match: part }; // Are we looking at a single book on its own that could be part of a range like "1-2 Sam"? // Either it's on its own or a translation sequence follows it, making it effectively on its own. if (this.options.book_alone_strategy === "full" && this.options.book_range_strategy === "include" && passage.value[0].type === "b" && (passage.value.length === 1 || (passage.value.length > 1 && passage.value[1].type === "translation_sequence")) && start_index_adjust === 0 && (this.passage.books[book_id].parsed.length === 1 || (this.passage.books[book_id].parsed.length > 1 && this.passage.books[book_id].parsed[1].type === "translation")) && /^[234]/.test(this.passage.books[book_id].parsed[0])) { this.create_book_range(s, passage, book_id); } // Handle each passage individually to prevent context leakage (e.g., translations back-propagating through unrelated entities). [accum, post_context] = this.passage.handle_obj(passage); entities = entities.concat(accum); // Move the next RegExp iteration to start earlier if we didn't use everything we thought we were going to. regexp_index_adjust = this.adjust_regexp_end(accum, original_part_length, part.length); if (regexp_index_adjust > 0) { this.regexps.escaped_passage.lastIndex -= regexp_index_adjust; } } return [entities, post_context]; } // Handle the objects returned from the grammar to produce entities for further processing. We may need to adjust the `RegExp.lastIndex` if we discarded characters from the end of the match or if, after parsing, we're ignoring some of them--especially with ending parenthetical statements like "Luke 8:1-3; 24:10 (and Matthew 14:1-12 and Luke 23:7-12 for background)". adjust_regexp_end(accum, old_length, new_length) { var regexp_index_adjust; regexp_index_adjust = 0; if (accum.length > 0) { // `accum` uses an off-by-one end index compared to the RegExp object. "and Psa3" means `lastIndex` = 8, `old_length` and `new_length` are both 4 (omitting "and " and leaving "Psa3"), and the `accum` end index is 3. We end up with 4 - 3 - 1 = 0, or no adjustment. Compare "and Psa3 and", where the last " and" is originally considered part of the regexp. In this case, `regexp_index_adjust` is 4: 8 ("Psa3 and") - 3 ("Psa3") - 1. regexp_index_adjust = old_length - accum[accum.length - 1].indices[1] - 1; } else if (old_length !== new_length) { regexp_index_adjust = old_length - new_length; } return regexp_index_adjust; } // Remove unnecessary characters from the end of the match. replace_match_end(part) { var match, remove; // Split the string on valid ending characters. Remove whatever's leftover at the end of the string. It would be easier to do `part.split(@regexps.match_end_split).pop()`, but IE doesn't handle empty strings at the end. remove = part.length; while (match = this.regexps.match_end_split.exec(part)) { remove = match.index + match[0].length; } if (remove < part.length) { part = part.substr(0, remove); } return part; } // If a book is on its own, check whether it's preceded by something that indicates it's a book range like "1-2 Samuel". create_book_range(s, passage, book_id) { var cases, i, k, limit, prev, range_regexp, ref; cases = [bcv_parser.prototype.regexps.first, bcv_parser.prototype.regexps.second, bcv_parser.prototype.regexps.third]; limit = parseInt(this.passage.books[book_id].parsed[0].substr(0, 1), 10); for (i = k = 1, ref = limit; (1 <= ref ? k < ref : k > ref); i = 1 <= ref ? ++k : --k) { range_regexp = i === limit - 1 ? bcv_parser.prototype.regexps.range_and : bcv_parser.prototype.regexps.range_only; prev = s.match(RegExp(`(?:^|\\W)(${cases[i - 1]}\\s*${range_regexp}\\s*)\\x1f${book_id}\\x1f`, "i")); if (prev != null) { return this.add_book_range_object(passage, prev, i); } } return false; } // Create a fake object that can be parsed to show the correct result. add_book_range_object(passage, prev, start_book_number) { var i, k, length, ref, ref1, results; length = prev[1].length; passage.value[0] = { type: "b_range_pre", value: [ { type: "b_pre", value: start_book_number.toString(), indices: [prev.index, prev.index + length] }, passage.value[0] ], indices: [0, passage.value[0].indices[1] + length] }; // Adjust the indices of the original result so they reflect the new content. passage.value[0].value[1].indices[0] += length; passage.value[0].value[1].indices[1] += length; // These two are the most important ones; the `absolute_indices` function uses them. passage.start_index -= length; passage.match = prev[1] + passage.match; if (passage.value.length === 1) { return; } // If there are subsequent objects, also adjust their offsets. results = []; for (i = k = 1, ref = passage.value.length; (1 <= ref ? k < ref : k > ref); i = 1 <= ref ? ++k : --k) { if (passage.value[i].value == null) { continue; } // If it's an `integer` type, `passage.value[i].value` is a scalar rather than an object, so we only need to adjust the indices for the top-level object. if (((ref1 = passage.value[i].value[0]) != null ? ref1.indices : void 0) != null) { passage.value[i].value[0].indices[0] += length; passage.value[i].value[0].indices[1] += length; } passage.value[i].indices[0] += length; results.push(passage.value[i].indices[1] += length); } return results; } // ## Output-Related Functions // Return a single OSIS string (comma-separated) for all the references in the whole input string. osis() { var k, len, osis, out, ref; out = []; ref = this.parsed_entities(); for (k = 0, len = ref.length; k < len; k++) { osis = ref[k]; if (osis.osis.length > 0) { out.push(osis.osis); } } return out.join(","); } // Return an array of `[OSIS, TRANSLATIONS]` for each reference (combined according to `options`). osis_and_translations() { var k, len, osis, out, ref; out = []; ref = this.parsed_entities(); for (k = 0, len = ref.length; k < len; k++) { osis = ref[k]; if (osis.osis.length > 0) { out.push([osis.osis, osis.translations.join(",")]); } } return out; } // Return an array of `{osis: OSIS, indices:[START, END], translations: [TRANSLATIONS]}` objects for each reference (combined according to `options`). osis_and_indices() { var k, len, osis, out, ref; out = []; ref = this.parsed_entities(); for (k = 0, len = ref.length; k < len; k++) { osis = ref[k]; if (osis.osis.length > 0) { out.push({ osis: osis.osis, translations: osis.translations, indices: osis.indices }); } } return out; } // Return all objects, probably for additional processing. parsed_entities() { var entity, entity_id, i, k, l, last_i, len, len1, length, m, n, osis, osises, out, passage, ref, ref1, ref2, ref3, strings, translation, translation_alias, translation_osis, translations; out = []; for (entity_id = k = 0, ref = this.entities.length; (0 <= ref ? k < ref : k > ref); entity_id = 0 <= ref ? ++k : --k) { entity = this.entities[entity_id]; // Be sure to include any translation identifiers in the indices we report back, but only if the translation immediately follows the previous entity. if (entity.type && entity.type === "translation_sequence" && out.length > 0 && entity_id === out[out.length - 1].entity_id + 1) { out[out.length - 1].indices[1] = entity.absolute_indices[1]; } if (entity.passages == null) { continue; } if ((entity.type === "b" && this.options.book_alone_strategy === "ignore") || (entity.type === "b_range" && this.options.book_range_strategy === "ignore") || entity.type === "context") { continue; } // A given entity, even if part of a sequence, always only has one set of translations associated with it. translations = []; translation_alias = null; if (entity.passages[0].translations != null) { ref1 = entity.passages[0].translations; for (l = 0, len = ref1.length; l < len; l++) { translation = ref1[l]; translation_osis = ((ref2 = translation.osis) != null ? ref2.length : void 0) > 0 ? translation.osis : ""; if (translation_alias == null) { translation_alias = translation.alias; } translations.push(translation_osis); } } else { translations = [""]; translation_alias = "default"; } osises = []; length = entity.passages.length; for (i = m = 0, ref3 = length; (0 <= ref3 ? m < ref3 : m > ref3); i = 0 <= ref3 ? ++m : --m) { passage = entity.passages[i]; // The `type` is usually only set in a sequence. if (passage.type == null) { passage.type = entity.type; } if (passage.valid.valid === false) { if (this.options.invalid_sequence_strategy === "ignore" && entity.type === "sequence") { this.snap_sequence("ignore", entity, osises, i, length); } if (this.options.invalid_passage_strategy === "ignore") { // Stop here if we're ignoring invalid passages. continue; } } // If indicated in `@options`, exclude stray start/end books, resetting the parent indices as needed. if ((passage.type === "b" || passage.type === "b_range") && this.options.book_sequence_strategy === "ignore" && entity.type === "sequence") { this.snap_sequence("book", entity, osises, i, length); continue; } if ((passage.type === "b_range_start" || passage.type === "range_end_b") && this.options.book_range_strategy === "ignore") { this.snap_range(entity, i); } if (passage.absolute_indices == null) { passage.absolute_indices = entity.absolute_indices; } osises.push({ osis: passage.valid.valid ? this.to_osis(passage.start, passage.end, translation_alias) : "", type: passage.type, indices: passage.absolute_indices, translations: translations, start: passage.start, end: passage.end, enclosed_indices: passage.enclosed_absolute_indices, entity_id: entity_id, entities: [passage] }); } if (osises.length === 0) { // Don't return an empty object. continue; } if (osises.length > 1 && this.options.consecutive_combination_strategy === "combine") { osises = this.combine_consecutive_passages(osises, translation_alias); } // Add the osises array to the existing array. if (this.options.sequence_combination_strategy === "separate") { out = out.concat(osises); } else { // Add the OSIS string and some data to the array. strings = []; last_i = osises.length - 1; if ((osises[last_i].enclosed_indices != null) && osises[last_i].enclosed_indices[1] >= 0) { // Adjust the end index to match a closing parenthesis when presented with `enclosed` entities. These entities always start mid-sequence (unless there's a book we're ignoring), so we don't need to worry about the start index. entity.absolute_indices[1] = osises[last_i].enclosed_indices[1]; } for (n = 0, len1 = osises.length; n < len1; n++) { osis = osises[n]; if (osis.osis.length > 0) { strings.push(osis.osis); } } out.push({ osis: strings.join(","), indices: entity.absolute_indices, translations: translations, entity_id: entity_id, entities: osises }); } } return out; } to_osis(start, end, translation) { var osis, out; if ((end.c == null) && (end.v == null) && start.b === end.b && (start.c == null) && (start.v == null) && this.options.book_alone_strategy === "first_chapter") { // If it's just a book on its own, how we deal with it depends on whether we want to return just the first chapter or the complete book. end.c = 1; } osis = { start: "", end: "" }; // If no start chapter or verse, assume the first possible. if (start.c == null) { start.c = 1; } if (start.v == null) { start.v = 1; } // If no end chapter or verse, assume the last possible. If it's a single-chapter book, always use the first chapter for consistency with other `passage_existence_strategy` results (which do respect the single-chapter length). if (end.c == null) { if (this.options.passage_existence_strategy.indexOf("c") >= 0 || ((this.passage.translations[translation].chapters[end.b] != null) && this.passage.translations[translation].chapters[end.b].length === 1)) { end.c = this.passage.translations[translation].chapters[end.b].length; } else { end.c = 999; } } if (end.v == null) { if ((this.passage.translations[translation].chapters[end.b][end.c - 1] != null) && this.options.passage_existence_strategy.indexOf("v") >= 0) { end.v = this.passage.translations[translation].chapters[end.b][end.c - 1]; } else { end.v = 999; } } if (this.options.include_apocrypha && this.options.ps151_strategy === "b" && ((start.c === 151 && start.b === "Ps") || (end.c === 151 && end.b === "Ps"))) { this.fix_ps151(start, end, translation); } // If it's a complete book or range of complete books and we want the shortest possible OSIS, return just the book names. The `end.c` and `end.v` equaling 999 is for when the `passage_existence_strategy` sets them to 999, indicating that we should treat it as a complete book or chapter. if (this.options.osis_compaction_strategy === "b" && start.c === 1 && start.v === 1 && ((end.c === 999 && end.v === 999) || (end.c === this.passage.translations[translation].chapters[end.b].length && this.options.passage_existence_strategy.indexOf("c") >= 0 && (end.v === 999 || (end.v === this.passage.translations[translation].chapters[end.b][end.c - 1] && this.options.passage_existence_strategy.indexOf("v") >= 0))))) { osis.start = start.b; osis.end = end.b; // If it's a complete chapter or range of complete chapters and we want a short OSIS, return just the books and chapters. We only care when `osis_compaction_strategy` isn't `bcv` (i.e., length 3) because `bcv` is always fully specified. } else if (this.options.osis_compaction_strategy.length <= 2 && start.v === 1 && (end.v === 999 || (end.v === this.passage.translations[translation].chapters[end.b][end.c - 1] && this.options.passage_existence_strategy.indexOf("v") >= 0))) { osis.start = start.b + "." + start.c.toString(); osis.end = end.b + "." + end.c.toString(); } else { // Otherwise, return the full BCV reference for both. osis.start = start.b + "." + start.c.toString() + "." + start.v.toString(); osis.end = end.b + "." + end.c.toString() + "." + end.v.toString(); } // If it's the same verse ("Gen.1.1-Gen.1.1"), chapter ("Gen.1-Gen.1") or book ("Gen-Gen"), return just the start so we don't end up with an empty range. if (osis.start === osis.end) { out = osis.start; } else { // Otherwise return the range. out = osis.start + "-" + osis.end; } if (start.extra != null) { out = start.extra + "," + out; } if (end.extra != null) { out += "," + end.extra; } return out; } // If we want to treat Ps151 as a book rather than a chapter, we have to do some gymnastics to make sure it returns properly. fix_ps151(start, end, translation) { var ref; // Ps151 doesn't necessarily get promoted into the translation chapter list because during the string parsing, we treat it as `Ps` rather than `Ps151`. if (translation !== "default" && (((ref = this.translations[translation]) != null ? ref.chapters["Ps151"] : void 0) == null)) { this.passage.promote_book_to_translation("Ps151", translation); } if (start.c === 151 && start.b === "Ps") { // If the whole range is in Ps151, we can just reset both sets of books and chapters; we don't have to worry about odd ranges. if (end.c === 151 && end.b === "Ps") { start.b = "Ps151"; start.c = 1; end.b = "Ps151"; return end.c = 1; } else { // This is the string we're going to prepend to our final output. // Otherwise, we generate the OSIS for Ps151 and then set the beginning of the range to the next book. We assume that the next book is Prov, which isn't necessarily the case. I'm not aware of a canon that doesn't place Prov after Ps, however. start.extra = this.to_osis({ b: "Ps151", c: 1, v: start.v }, { b: "Ps151", c: 1, v: this.passage.translations[translation].chapters["Ps151"][0] }, translation); start.b = "Prov"; start.c = 1; return start.v = 1; } } else { // This is the string we're going to append to the final output. // We know that end is in Ps151 and start is beforehand. end.extra = this.to_osis({ b: "Ps151", c: 1, v: 1 }, { b: "Ps151", c: 1, v: end.v }, translation); // Set the end of the range to be the end of Ps.150, which immediately precedes Ps151. end.c = 150; return end.v = this.passage.translations[translation].chapters["Ps"][149]; } } // If we have the correct `option` set (checked before calling this function), merge passages that refer to sequential verses: Gen 1, 2 -> Gen 1-2. It works for any combination of books, chapters, and verses. combine_consecutive_passages(osises, translation) { var enclosed_sequence_start, has_enclosed, i, is_enclosed_last, k, last_i, osis, out, prev, prev_i, ref; out = []; prev = {}; last_i = osises.length - 1; enclosed_sequence_start = -1; has_enclosed = false; for (i = k = 0, ref = last_i; (0 <= ref ? k <= ref : k >= ref); i = 0 <= ref ? ++k : --k) { osis = osises[i]; if (osis.osis.length > 0) { prev_i = out.length - 1; is_enclosed_last = false; // Record the start index of the enclosed sequence for use in future iterations. if (osis.enclosed_indices[0] !== enclosed_sequence_start) { enclosed_sequence_start = osis.enclosed_indices[0]; } // If we're in an enclosed sequence and it's either the last item in the sequence or the next item in the sequence isn't part of the same enclosed sequence, then we've reached the end of the enclosed sequence. if (enclosed_sequence_start >= 0 && (i === last_i || osises[i + 1].enclosed_indices[0] !== osis.enclosed_indices[0])) { is_enclosed_last = true; // We may need to adjust the indices later. has_enclosed = true; } // Pretend like the previous `end` and existing `start` don't exist. if (this.is_verse_consecutive(prev, osis.start, translation)) { out[prev_i].end = osis.end; // Set the enclosed indices if it's last or at the end of a sequence of enclosed indices. Otherwise only extend the indices to the actual indices--e.g., `Ps 117 (118, 120)`, should only extend to after `118`. out[prev_i].is_enclosed_last = is_enclosed_last; out[prev_i].indices[1] = osis.indices[1]; out[prev_i].enclosed_indices[1] = osis.enclosed_indices[1]; out[prev_i].osis = this.to_osis(out[prev_i].start, osis.end, translation); } else { out.push(osis); } prev = { b: osis.end.b, c: osis.end.c, v: osis.end.v }; } else { out.push(osis); prev = {}; } } if (has_enclosed) { this.snap_enclosed_indices(out); } return out; } // If there's an enclosed reference--e.g., Ps 1 (2)--and we've combined consecutive passages in such a way that the enclosed reference is fully inside the sequence (i.e., if it starts before the enclosed sequence), then make sure the end index for the passage includes the necessary closing punctuation. snap_enclosed_indices(osises) { var k, len, osis; for (k = 0, len = osises.length; k < len; k++) { osis = osises[k]; if (osis.is_enclosed_last != null) { if (osis.enclosed_indices[0] < 0 && osis.is_enclosed_last) { osis.indices[1] = osis.enclosed_indices[1]; } delete osis.is_enclosed_last; } } return osises; } // Given two fully specified objects (complete bcvs), find whether they're sequential. is_verse_consecutive(prev, check, translation) { var translation_order; if (prev.b == null) { return false; } // A translation doesn't always have an `order` set. If it doesn't, then use the default order. translation_order = this.passage.translations[translation].order != null ? this.passage.translations[translation].order : this.passage.translations.default.order; if (prev.b === check.b) { if (prev.c === check.c) { if (prev.v === check.v - 1) { return true; } } else if (check.v === 1 && prev.c === check.c - 1) { if (prev.v === this.passage.translations[translation].chapters[prev.b][prev.c - 1]) { return true; } } } else if (check.c === 1 && check.v === 1 && translation_order[prev.b] === translation_order[check.b] - 1) { if (prev.c === this.passage.translations[translation].chapters[prev.b].length && prev.v === this.passage.translations[translation].chapters[prev.b][prev.c - 1]) { return true; } } return false; } // Snap the start/end index of the range when it includes a book on its own and `@options.book_range_strategy` is `ignore`. snap_range(entity, passage_i) { var entity_i, key, pluck, ref, source_entity, target_entity, temp, type; // If the book is at the start of the range, we want to ignore the first part of the range. if (entity.type === "b_range_start" || (entity.type === "sequence" && entity.passages[passage_i].type === "b_range_start")) { entity_i = 1; source_entity = "end"; type = "b_range_start"; } else { // If the book is at the end of the range, we want to ignore the end of the range. entity_i = 0; source_entity = "start"; type = "range_end_b"; } target_entity = source_entity === "end" ? "start" : "end"; ref = entity.passages[passage_i][target_entity]; for (key in ref) { if (!hasProp.call(ref, key)) continue; entity.passages[passage_i][target_entity][key] = entity.passages[passage_i][source_entity][key]; } if (entity.type === "sequence") { if (passage_i >= entity.value.length) { // This can be too long if a range is converted into a sequence where it ends with an open book range (`Matt 10-Rev`) that we want to ignore. At this point, the `passages` and `value` keys can get out-of-sync. passage_i = entity.value.length - 1; } pluck = this.passage.pluck(type, entity.value[passage_i]); // The `pluck` can be null if we've already overwritten its `type` in a previous recursion. This process is unusual, but can happen in "Proverbs 31:2. Vs 10 to dan". if (pluck != null) { temp = this.snap_range(pluck, 0); // Move the indices to exclude what we've omitted. We want to move it even if it isn't the last one in case there are multiple books at the end--this way it'll use the correct indices. if (passage_i === 0) { entity.absolute_indices[0] = temp.absolute_indices[0]; } else { entity.absolute_indices[1] = temp.absolute_indices[1]; } } } else { // If it's not a sequence, change the `type` and `absolute_indices` to exclude the book we're omitting. entity.original_type = entity.type; entity.type = entity.value[entity_i].type; entity.absolute_indices = [entity.value[entity_i].absolute_indices[0], entity.value[entity_i].absolute_indices[1]]; } return entity; } // Snap the start/end index of the entity or surrounding passages when there's a lone book or invalid item in a sequence. snap_sequence(type, entity, osises, i, length) { var passage; passage = entity.passages[i]; // If the passage is the first thing in the sequence and something is after it, snap the start index of the whole entity to the start index of the next item. // But we only want to do this if it's followed by a book (if it's "Matt, 5", we want to be sure to include "Matt" as part of the indices and bypass this step). We can tell if that's the case if the `type` of what follows starts with `b` or if it's a `range` starting in a different book. The tricky part occurs when we have several invalid references at the start (Matt 29, 30, Acts 1)--we need to find the first value that's a book. If there's a valid item before the next book, abort. if (passage.absolute_indices[0] === entity.absolute_indices[0] && i < length - 1 && this.get_snap_sequence_i(entity.passages, i, length) !== i) { entity.absolute_indices[0] = entity.passages[i + 1].absolute_indices[0]; this.remove_absolute_indices(entity.passages, i + 1); // If the passage is the last thing in a sequence (but not the only one), snap the entity end index to the end index of the previous valid item. To handle multiple items at the end, snap back to the last known good item if available. } else if (passage.absolute_indices[1] === entity.absolute_indices[1] && i > 0) { entity.absolute_indices[1] = osises.length > 0 ? osises[osises.length - 1].indices[1] : entity.passages[i - 1].absolute_indices[1]; // Otherwise, if the next item doesn't start with a book, link the start index of the current passage to the next one because we're including the current passage as part of the next one. In "Eph. 4. Gen, Matt, 6", the `Matt, ` should be part of the `6`, but "Eph. 4. Gen, Matt, 1cor6" should exclude `Matt`. } else if (type === "book" && i < length - 1 && !this.starts_with_book(entity.passages[i + 1])) { entity.passages[i + 1].absolute_indices[0] = passage.absolute_indices[0]; } // Return something only for unit testing. return entity; } // Identify whether there are any valid items between the current item and the next book. get_snap_sequence_i(passages, i, length) { var j, k, ref, ref1; for (j = k = ref = i + 1, ref1 = length; (ref <= ref1 ? k < ref1 : k > ref1); j = ref <= ref1 ? ++k : --k) { if (this.starts_with_book(passages[j])) { return j; } if (passages[j].valid.valid) { return i; } } return i; } // Given a passage, does it start with a book? It never takes a sequence as an argument. starts_with_book(passage) { if (passage.type.substr(0, 1) === "b") { return true; } if ((passage.type === "range" || passage.type === "ff") && passage.start.type.substr(0, 1) === "b") { return true; } return false; } // Remove absolute indices from the given passage to the end of the sequence. We do this when we don't want to include the end of a sequence in the sequence (most likely because it's invalid or a book on its own). remove_absolute_indices(passages, i) { var end, k, len, passage, ref, start; if (passages[i].enclosed_absolute_indices[0] < 0) { return false; } [start, end] = passages[i].enclosed_absolute_indices; ref = passages.slice(i); for (k = 0, len = ref.length; k < len; k++) { passage = ref[k]; if (passage.enclosed_absolute_indices[0] === start && passage.enclosed_absolute_indices[1] === end) { passage.enclosed_absolute_indice