usx-to-json

Version:

Convert USX Bible format to JSON with verse contents in either HTML or plain text.

github.com/gracious-tech/fetch/tree/master/converters/usx-to-json

4 lines • 77.1 kB

Source Map (JSON)

{ "version": 3, "sources": ["../src/index.ts", "../../../references/src/data.ts", "../../../references/src/last_verse.ts", "../../../references/src/detect.ts", "../src/common.ts", "../src/elements.ts", "../src/html.ts", "../src/txt.ts"], "sourcesContent": ["\nexport {usx_to_json_html} from './html.js'\nexport {usx_to_json_txt} from './txt.js'\n", "\n// Bible book ids in traditional order\nexport const books_ordered:readonly string[] = Object.freeze([\n 'gen', 'exo', 'lev', 'num', 'deu', 'jos', 'jdg', 'rut', '1sa', '2sa', '1ki', '2ki', '1ch',\n '2ch', 'ezr', 'neh', 'est', 'job', 'psa', 'pro', 'ecc', 'sng', 'isa', 'jer', 'lam', 'ezk',\n 'dan', 'hos', 'jol', 'amo', 'oba', 'jon', 'mic', 'nam', 'hab', 'zep', 'hag', 'zec', 'mal',\n 'mat', 'mrk', 'luk', 'jhn', 'act', 'rom', '1co', '2co', 'gal', 'eph', 'php', 'col', '1th',\n '2th', '1ti', '2ti', 'tit', 'phm', 'heb', 'jas', '1pe', '2pe', '1jn', '2jn', '3jn', 'jud',\n 'rev',\n])\n\n\n// Usual English names of Bible books\nexport const book_names_english:Readonly<Record<string, string>> = Object.freeze({\n 'gen': \"Genesis\",\n 'exo': \"Exodus\",\n 'lev': \"Leviticus\",\n 'num': \"Numbers\",\n 'deu': \"Deuteronomy\",\n 'jos': \"Joshua\",\n 'jdg': \"Judges\",\n 'rut': \"Ruth\",\n '1sa': \"1 Samuel\",\n '2sa': \"2 Samuel\",\n '1ki': \"1 Kings\",\n '2ki': \"2 Kings\",\n '1ch': \"1 Chronicles\",\n '2ch': \"2 Chronicles\",\n 'ezr': \"Ezra\",\n 'neh': \"Nehemiah\",\n 'est': \"Esther\",\n 'job': \"Job\",\n 'psa': \"Psalms\",\n 'pro': \"Proverbs\",\n 'ecc': \"Ecclesiastes\",\n 'sng': \"Song of Songs\",\n 'isa': \"Isaiah\",\n 'jer': \"Jeremiah\",\n 'lam': \"Lamentations\",\n 'ezk': \"Ezekiel\",\n 'dan': \"Daniel\",\n 'hos': \"Hosea\",\n 'jol': \"Joel\",\n 'amo': \"Amos\",\n 'oba': \"Obadiah\",\n 'jon': \"Jonah\",\n 'mic': \"Micah\",\n 'nam': \"Nahum\",\n 'hab': \"Habakkuk\",\n 'zep': \"Zephaniah\",\n 'hag': \"Haggai\",\n 'zec': \"Zechariah\",\n 'mal': \"Malachi\",\n 'mat': \"Matthew\",\n 'mrk': \"Mark\",\n 'luk': \"Luke\",\n 'jhn': \"John\",\n 'act': \"Acts\",\n 'rom': \"Romans\",\n '1co': \"1 Corinthians\",\n '2co': \"2 Corinthians\",\n 'gal': \"Galatians\",\n 'eph': \"Ephesians\",\n 'php': \"Philippians\",\n 'col': \"Colossians\",\n '1th': \"1 Thessalonians\",\n '2th': \"2 Thessalonians\",\n '1ti': \"1 Timothy\",\n '2ti': \"2 Timothy\",\n 'tit': \"Titus\",\n 'phm': \"Philemon\",\n 'heb': \"Hebrews\",\n 'jas': \"James\",\n '1pe': \"1 Peter\",\n '2pe': \"2 Peter\",\n '1jn': \"1 John\",\n '2jn': \"2 John\",\n '3jn': \"3 John\",\n 'jud': \"Jude\",\n 'rev': \"Revelation\",\n})\n\n\n// Usual English abbreviations of Bible books\n// NOTE Aiming for short but also easily recognisable\nexport const book_abbrev_english:Readonly<Record<string, string>> = Object.freeze({\n 'gen': \"Gen\",\n 'exo': \"Exo\",\n 'lev': \"Lev\",\n 'num': \"Num\",\n 'deu': \"Deut\",\n 'jos': \"Josh\",\n 'jdg': \"Judg\",\n 'rut': \"Ruth\",\n '1sa': \"1 Sam\",\n '2sa': \"2 Sam\",\n '1ki': \"1 King\",\n '2ki': \"2 King\",\n '1ch': \"1 Chr\",\n '2ch': \"2 Chr\",\n 'ezr': \"Ezra\",\n 'neh': \"Neh\",\n 'est': \"Est\",\n 'job': \"Job\",\n 'psa': \"Psalm\",\n 'pro': \"Prov\",\n 'ecc': \"Ecc\",\n 'sng': \"Song\",\n 'isa': \"Isa\",\n 'jer': \"Jer\",\n 'lam': \"Lam\",\n 'ezk': \"Ezek\",\n 'dan': \"Dan\",\n 'hos': \"Hos\",\n 'jol': \"Joel\",\n 'amo': \"Amos\",\n 'oba': \"Obad\",\n 'jon': \"Jonah\",\n 'mic': \"Micah\",\n 'nam': \"Nahum\",\n 'hab': \"Hab\",\n 'zep': \"Zeph\",\n 'hag': \"Hag\",\n 'zec': \"Zech\",\n 'mal': \"Mal\",\n 'mat': \"Matt\",\n 'mrk': \"Mark\",\n 'luk': \"Luke\",\n 'jhn': \"John\",\n 'act': \"Acts\",\n 'rom': \"Rom\",\n '1co': \"1 Cor\",\n '2co': \"2 Cor\",\n 'gal': \"Gal\",\n 'eph': \"Eph\",\n 'php': \"Phil\",\n 'col': \"Col\",\n '1th': \"1 Thes\",\n '2th': \"2 Thes\",\n '1ti': \"1 Tim\",\n '2ti': \"2 Tim\",\n 'tit': \"Titus\",\n 'phm': \"Phil\",\n 'heb': \"Heb\",\n 'jas': \"James\",\n '1pe': \"1 Pet\",\n '2pe': \"2 Pet\",\n '1jn': \"1 John\",\n '2jn': \"2 John\",\n '3jn': \"3 John\",\n 'jud': \"Jude\",\n 'rev': \"Rev\",\n})\n\n\n// Special English abbreviations of book names\n// These could in theory abbreviate multiple books, and are only specified because of convention\n// See https://www.logos.com/bible-book-abbreviations\n// These are hard-coded so that they will result in a correct match if English default is kept\nexport const english_abbrev_include:readonly [string, string][] = Object.freeze([\n // [code, abbrev]\n ['num', \"nm\"],\n ['ezr', \"ez\"],\n ['mic', \"mc\"],\n ['hab', \"hb\"],\n ['jhn', \"jn\"],\n ['php', \"phil\"],\n ['phm', \"pm\"],\n ['jas', \"jm\"],\n ['jud', \"jud\"],\n ['jud', \"jd\"],\n])\n\n\n// Abbreviations that should be ignored for being too vague\n// Words are only added if (1) common and (2) could actually match a book\n// E.g. \"So. 1\" is ok but not \"So 1 cat\"\nexport const english_abbrev_exclude:readonly string[] =\n Object.freeze([\"is\", \"so\", \"at\", \"am\", \"me\", \"he\", \"hi\"])\n", "\n// The number of verses for every chapter of the Bible\n// WARN Chapters are zero-indexed (i.e chapter 1 is at index 0)\nexport const last_verse:Readonly<Record<string, number[]>> = Object.freeze({\n '1ch': [\n 54,\n 55,\n 24,\n 43,\n 26,\n 81,\n 40,\n 40,\n 44,\n 14,\n 47,\n 40,\n 14,\n 17,\n 29,\n 43,\n 27,\n 17,\n 19,\n 8,\n 30,\n 19,\n 32,\n 31,\n 31,\n 32,\n 34,\n 21,\n 30,\n ],\n '1co': [\n 31,\n 16,\n 23,\n 21,\n 13,\n 20,\n 40,\n 13,\n 27,\n 33,\n 34,\n 31,\n 13,\n 40,\n 58,\n 24,\n ],\n '1jn': [\n 10,\n 29,\n 24,\n 21,\n 21,\n ],\n '1ki': [\n 53,\n 46,\n 28,\n 34,\n 18,\n 38,\n 51,\n 66,\n 28,\n 29,\n 43,\n 33,\n 34,\n 31,\n 34,\n 34,\n 24,\n 46,\n 21,\n 43,\n 29,\n 53,\n ],\n '1pe': [\n 25,\n 25,\n 22,\n 19,\n 14,\n ],\n '1sa': [\n 28,\n 36,\n 21,\n 22,\n 12,\n 21,\n 17,\n 22,\n 27,\n 27,\n 15,\n 25,\n 23,\n 52,\n 35,\n 23,\n 58,\n 30,\n 24,\n 42,\n 15,\n 23,\n 29,\n 22,\n 44,\n 25,\n 12,\n 25,\n 11,\n 31,\n 13,\n ],\n '1th': [\n 10,\n 20,\n 13,\n 18,\n 28,\n ],\n '1ti': [\n 20,\n 15,\n 16,\n 16,\n 25,\n 21,\n ],\n '2ch': [\n 17,\n 18,\n 17,\n 22,\n 14,\n 42,\n 22,\n 18,\n 31,\n 19,\n 23,\n 16,\n 22,\n 15,\n 19,\n 14,\n 19,\n 34,\n 11,\n 37,\n 20,\n 12,\n 21,\n 27,\n 28,\n 23,\n 9,\n 27,\n 36,\n 27,\n 21,\n 33,\n 25,\n 33,\n 27,\n 23,\n ],\n '2co': [\n 24,\n 17,\n 18,\n 18,\n 21,\n 18,\n 16,\n 24,\n 15,\n 18,\n 33,\n 21,\n 14,\n ],\n '2jn': [\n 13,\n ],\n '2ki': [\n 18,\n 25,\n 27,\n 44,\n 27,\n 33,\n 20,\n 29,\n 37,\n 36,\n 21,\n 21,\n 25,\n 29,\n 38,\n 20,\n 41,\n 37,\n 37,\n 21,\n 26,\n 20,\n 37,\n 20,\n 30,\n ],\n '2pe': [\n 21,\n 22,\n 18,\n ],\n '2sa': [\n 27,\n 32,\n 39,\n 12,\n 25,\n 23,\n 29,\n 18,\n 13,\n 19,\n 27,\n 31,\n 39,\n 33,\n 37,\n 23,\n 29,\n 33,\n 43,\n 26,\n 22,\n 51,\n 39,\n 25,\n ],\n '2th': [\n 12,\n 17,\n 18,\n ],\n '2ti': [\n 18,\n 26,\n 17,\n 22,\n ],\n '3jn': [\n 15,\n ],\n 'act': [\n 26,\n 47,\n 26,\n 37,\n 42,\n 15,\n 60,\n 40,\n 43,\n 48,\n 30,\n 25,\n 52,\n 28,\n 41,\n 40,\n 34,\n 28,\n 41,\n 38,\n 40,\n 30,\n 35,\n 27,\n 27,\n 32,\n 44,\n 31,\n ],\n 'amo': [\n 15,\n 16,\n 15,\n 13,\n 27,\n 14,\n 17,\n 14,\n 15,\n ],\n 'col': [\n 29,\n 23,\n 25,\n 18,\n ],\n 'dan': [\n 21,\n 49,\n 30,\n 37,\n 31,\n 28,\n 28,\n 27,\n 27,\n 21,\n 45,\n 13,\n ],\n 'deu': [\n 46,\n 37,\n 29,\n 49,\n 33,\n 25,\n 26,\n 20,\n 29,\n 22,\n 32,\n 32,\n 18,\n 29,\n 23,\n 22,\n 20,\n 22,\n 21,\n 20,\n 23,\n 30,\n 25,\n 22,\n 19,\n 19,\n 26,\n 68,\n 29,\n 20,\n 30,\n 52,\n 29,\n 12,\n ],\n 'ecc': [\n 18,\n 26,\n 22,\n 16,\n 20,\n 12,\n 29,\n 17,\n 18,\n 20,\n 10,\n 14,\n ],\n 'eph': [\n 23,\n 22,\n 21,\n 32,\n 33,\n 24,\n ],\n 'est': [\n 22,\n 23,\n 15,\n 17,\n 14,\n 14,\n 10,\n 17,\n 32,\n 3,\n ],\n 'exo': [\n 22,\n 25,\n 22,\n 31,\n 23,\n 30,\n 25,\n 32,\n 35,\n 29,\n 10,\n 51,\n 22,\n 31,\n 27,\n 36,\n 16,\n 27,\n 25,\n 26,\n 36,\n 31,\n 33,\n 18,\n 40,\n 37,\n 21,\n 43,\n 46,\n 38,\n 18,\n 35,\n 23,\n 35,\n 35,\n 38,\n 29,\n 31,\n 43,\n 38,\n ],\n 'ezk': [\n 28,\n 10,\n 27,\n 17,\n 17,\n 14,\n 27,\n 18,\n 11,\n 22,\n 25,\n 28,\n 23,\n 23,\n 8,\n 63,\n 24,\n 32,\n 14,\n 49,\n 32,\n 31,\n 49,\n 27,\n 17,\n 21,\n 36,\n 26,\n 21,\n 26,\n 18,\n 32,\n 33,\n 31,\n 15,\n 38,\n 28,\n 23,\n 29,\n 49,\n 26,\n 20,\n 27,\n 31,\n 25,\n 24,\n 23,\n 35,\n ],\n 'ezr': [\n 11,\n 70,\n 13,\n 24,\n 17,\n 22,\n 28,\n 36,\n 15,\n 44,\n ],\n 'gal': [\n 24,\n 21,\n 29,\n 31,\n 26,\n 18,\n ],\n 'gen': [\n 31,\n 25,\n 24,\n 26,\n 32,\n 22,\n 24,\n 22,\n 29,\n 32,\n 32,\n 20,\n 18,\n 24,\n 21,\n 16,\n 27,\n 33,\n 38,\n 18,\n 34,\n 24,\n 20,\n 67,\n 34,\n 35,\n 46,\n 22,\n 35,\n 43,\n 55,\n 32,\n 20,\n 31,\n 29,\n 43,\n 36,\n 30,\n 23,\n 23,\n 57,\n 38,\n 34,\n 34,\n 28,\n 34,\n 31,\n 22,\n 33,\n 26,\n ],\n 'hab': [\n 17,\n 20,\n 19,\n ],\n 'hag': [\n 15,\n 23,\n ],\n 'heb': [\n 14,\n 18,\n 19,\n 16,\n 14,\n 20,\n 28,\n 13,\n 28,\n 39,\n 40,\n 29,\n 25,\n ],\n 'hos': [\n 11,\n 23,\n 5,\n 19,\n 15,\n 11,\n 16,\n 14,\n 17,\n 15,\n 12,\n 14,\n 16,\n 9,\n ],\n 'isa': [\n 31,\n 22,\n 26,\n 6,\n 30,\n 13,\n 25,\n 22,\n 21,\n 34,\n 16,\n 6,\n 22,\n 32,\n 9,\n 14,\n 14,\n 7,\n 25,\n 6,\n 17,\n 25,\n 18,\n 23,\n 12,\n 21,\n 13,\n 29,\n 24,\n 33,\n 9,\n 20,\n 24,\n 17,\n 10,\n 22,\n 38,\n 22,\n 8,\n 31,\n 29,\n 25,\n 28,\n 28,\n 25,\n 13,\n 15,\n 22,\n 26,\n 11,\n 23,\n 15,\n 12,\n 17,\n 13,\n 12,\n 21,\n 14,\n 21,\n 22,\n 11,\n 12,\n 19,\n 12,\n 25,\n 24,\n ],\n 'jas': [\n 27,\n 26,\n 18,\n 17,\n 20,\n ],\n 'jdg': [\n 36,\n 23,\n 31,\n 24,\n 31,\n 40,\n 25,\n 35,\n 57,\n 18,\n 40,\n 15,\n 25,\n 20,\n 20,\n 31,\n 13,\n 31,\n 30,\n 48,\n 25,\n ],\n 'jer': [\n 19,\n 37,\n 25,\n 31,\n 31,\n 30,\n 34,\n 22,\n 26,\n 25,\n 23,\n 17,\n 27,\n 22,\n 21,\n 21,\n 27,\n 23,\n 15,\n 18,\n 14,\n 30,\n 40,\n 10,\n 38,\n 24,\n 22,\n 17,\n 32,\n 24,\n 40,\n 44,\n 26,\n 22,\n 19,\n 32,\n 21,\n 28,\n 18,\n 16,\n 18,\n 22,\n 13,\n 30,\n 5,\n 28,\n 7,\n 47,\n 39,\n 46,\n 64,\n 34,\n ],\n 'jhn': [\n 51,\n 25,\n 36,\n 54,\n 47,\n 71,\n 53,\n 59,\n 41,\n 42,\n 57,\n 50,\n 38,\n 31,\n 27,\n 33,\n 26,\n 40,\n 42,\n 31,\n 25,\n ],\n 'job': [\n 22,\n 13,\n 26,\n 21,\n 27,\n 30,\n 21,\n 22,\n 35,\n 22,\n 20,\n 25,\n 28,\n 22,\n 35,\n 22,\n 16,\n 21,\n 29,\n 29,\n 34,\n 30,\n 17,\n 25,\n 6,\n 14,\n 23,\n 28,\n 25,\n 31,\n 40,\n 22,\n 33,\n 37,\n 16,\n 33,\n 24,\n 41,\n 30,\n 24,\n 34,\n 17,\n ],\n 'jol': [\n 20,\n 32,\n 21,\n ],\n 'jon': [\n 17,\n 10,\n 10,\n 11,\n ],\n 'jos': [\n 18,\n 24,\n 17,\n 24,\n 15,\n 27,\n 26,\n 35,\n 27,\n 43,\n 23,\n 24,\n 33,\n 15,\n 63,\n 10,\n 18,\n 28,\n 51,\n 9,\n 45,\n 34,\n 16,\n 33,\n ],\n 'jud': [\n 25,\n ],\n 'lam': [\n 22,\n 22,\n 66,\n 22,\n 22,\n ],\n 'lev': [\n 17,\n 16,\n 17,\n 35,\n 19,\n 30,\n 38,\n 36,\n 24,\n 20,\n 47,\n 8,\n 59,\n 57,\n 33,\n 34,\n 16,\n 30,\n 37,\n 27,\n 24,\n 33,\n 44,\n 23,\n 55,\n 46,\n 34,\n ],\n 'luk': [\n 80,\n 52,\n 38,\n 44,\n 39,\n 49,\n 50,\n 56,\n 62,\n 42,\n 54,\n 59,\n 35,\n 35,\n 32,\n 31,\n 37,\n 43,\n 48,\n 47,\n 38,\n 71,\n 56,\n 53,\n ],\n 'mal': [\n 14,\n 17,\n 18,\n 6,\n ],\n 'mat': [\n 25,\n 23,\n 17,\n 25,\n 48,\n 34,\n 29,\n 34,\n 38,\n 42,\n 30,\n 50,\n 58,\n 36,\n 39,\n 28,\n 27,\n 35,\n 30,\n 34,\n 46,\n 46,\n 39,\n 51,\n 46,\n 75,\n 66,\n 20,\n ],\n 'mic': [\n 16,\n 13,\n 12,\n 13,\n 15,\n 16,\n 20,\n ],\n 'mrk': [\n 45,\n 28,\n 35,\n 41,\n 43,\n 56,\n 37,\n 38,\n 50,\n 52,\n 33,\n 44,\n 37,\n 72,\n 47,\n // Mark's ending: 1-8 common, 9-20 long ending\n // Some also add the alternate short ending to end of long ending with 21-22\n // Not supporting 21-22 as whether ending abrupt/long/short it definitely wasn't both\n 20,\n ],\n 'nam': [\n 15,\n 13,\n 19,\n ],\n 'neh': [\n 11,\n 20,\n 32,\n 23,\n 19,\n 19,\n 73,\n 18,\n 38,\n 39,\n 36,\n 47,\n 31,\n ],\n 'num': [\n 54,\n 34,\n 51,\n 49,\n 31,\n 27,\n 89,\n 26,\n 23,\n 36,\n 35,\n 16,\n 33,\n 45,\n 41,\n 50,\n 13,\n 32,\n 22,\n 29,\n 35,\n 41,\n 30,\n 25,\n 18,\n 65,\n 23,\n 31,\n 40,\n 16,\n 54,\n 42,\n 56,\n 29,\n 34,\n 13,\n ],\n 'oba': [\n 21,\n ],\n 'phm': [\n 25,\n ],\n 'php': [\n 30,\n 30,\n 21,\n 23,\n ],\n 'pro': [\n 33,\n 22,\n 35,\n 27,\n 23,\n 35,\n 27,\n 36,\n 18,\n 32,\n 31,\n 28,\n 25,\n 35,\n 33,\n 33,\n 28,\n 24,\n 29,\n 30,\n 31,\n 29,\n 35,\n 34,\n 28,\n 28,\n 27,\n 28,\n 27,\n 33,\n 31,\n ],\n 'psa': [\n 6,\n 12,\n 8,\n 8,\n 12,\n 10,\n 17,\n 9,\n 20,\n 18,\n 7,\n 8,\n 6,\n 7,\n 5,\n 11,\n 15,\n 50,\n 14,\n 9,\n 13,\n 31,\n 6,\n 10,\n 22,\n 12,\n 14,\n 9,\n 11,\n 12,\n 24,\n 11,\n 22,\n 22,\n 28,\n 12,\n 40,\n 22,\n 13,\n 17,\n 13,\n 11,\n 5,\n 26,\n 17,\n 11,\n 9,\n 14,\n 20,\n 23,\n 19,\n 9,\n 6,\n 7,\n 23,\n 13,\n 11,\n 11,\n 17,\n 12,\n 8,\n 12,\n 11,\n 10,\n 13,\n 20,\n 7,\n 35,\n 36,\n 5,\n 24,\n 20,\n 28,\n 23,\n 10,\n 12,\n 20,\n 72,\n 13,\n 19,\n 16,\n 8,\n 18,\n 12,\n 13,\n 17,\n 7,\n 18,\n 52,\n 17,\n 16,\n 15,\n 5,\n 23,\n 11,\n 13,\n 12,\n 9,\n 9,\n 5,\n 8,\n 28,\n 22,\n 35,\n 45,\n 48,\n 43,\n 13,\n 31,\n 7,\n 10,\n 10,\n 9,\n 8,\n 18,\n 19,\n 2,\n 29,\n 176,\n 7,\n 8,\n 9,\n 4,\n 8,\n 5,\n 6,\n 5,\n 6,\n 8,\n 8,\n 3,\n 18,\n 3,\n 3,\n 21,\n 26,\n 9,\n 8,\n 24,\n 13,\n 10,\n 7,\n 12,\n 15,\n 21,\n 10,\n 20,\n 14,\n 9,\n 6,\n ],\n 'rev': [\n 20,\n 29,\n 22,\n 11,\n 14,\n 17,\n 17,\n 13,\n 21,\n 11,\n 19,\n 18, // 12:18 Many translations append 12:18 to 12:17 as the final verse\n 18,\n 20,\n 8,\n 21,\n 18,\n 24,\n 21,\n 15,\n 27,\n 21,\n ],\n 'rom': [\n 32,\n 29,\n 31,\n 25,\n 21,\n 23,\n 25,\n 39,\n 33,\n 21,\n 36,\n 21,\n 14,\n 26,\n 33,\n 27,\n ],\n 'rut': [\n 22,\n 23,\n 18,\n 22,\n ],\n 'sng': [\n 17,\n 17,\n 11,\n 16,\n 16,\n 13,\n 13,\n 14,\n ],\n 'tit': [\n 16,\n 15,\n 15,\n ],\n 'zec': [\n 21,\n 13,\n 10,\n 14,\n 11,\n 15,\n 14,\n 23,\n 17,\n 12,\n 17,\n 14,\n 9,\n 21,\n ],\n 'zep': [\n 18,\n 15,\n 20,\n ],\n})\n", "\nimport {PassageReference, BookNamesArg} from './passage.js'\n\n\nexport interface PassageReferenceMatch {\n ref:PassageReference\n text:string\n index:number\n index_from_prev_match:number\n}\n\n\n// Regex strings used for identifying passage references in blocks of text\n// NOTE Allow two spaces but no more, to be forgiving but not match weird text\nconst regex_verse_sep = '[:\uFF1A\\\\.]'\nconst regex_book_num_prefix = '(?:(?:[123]|I{1,3}) ? ?)?'\nconst regex_book_name_tmpl = '\\\\p{Letter}[\\\\p{Letter}\\\\p{Dash} ]{MIN_MID,16}END_LETTER\\\\.? ? ?'\nconst regex_integer_with_opt_sep =\n '\\\\d{1,3}[abc]?(?: ? ?' + regex_verse_sep + ' ? ?\\\\d{1,3}[abc]?)?'\nconst regex_verse_range = regex_integer_with_opt_sep + '(?: ? ?\\\\p{Dash} ? ?'\n + regex_integer_with_opt_sep + ')?'\nconst regex_trailing = '(?![\\\\d\\\\p{Letter}@#$%])' // Doesn't make sense to be followed by these\n\nconst regex_between_ranges = ' ? ?[,;] ? ?'\nconst regex_additional_range = regex_between_ranges + '(' + regex_verse_range + ')' + regex_trailing\n\n\n// Detect the text and position of passage references in a block of text\n// Whole books aren't detected (e.g. Philemon) only references with a range (e.g. Philemon 1)\nexport function* detect_references(text:string, book_names?:BookNamesArg,\n exclude_book_names?:string[], min_chars=2, match_from_start=true)\n :Generator<PassageReferenceMatch, null, undefined>{\n\n // Shortcut for calling from_string\n const from_string = (value:string) => {\n return PassageReference.from_string(value, book_names, exclude_book_names, min_chars,\n match_from_start)\n }\n\n // Generate regexs with dynamic value based on min_chars for book name\n // MIN_MID is -2 as first and last char already specified\n // END_LETTER is not present if min_chars is 1\n const regex_book_name = regex_book_name_tmpl\n .replace('MIN_MID', String(Math.max(0, min_chars - 2)))\n .replace('END_LETTER', min_chars > 1 ? '\\\\p{Letter}' : '')\n const regex_complete =\n regex_book_num_prefix + regex_book_name + regex_verse_range + regex_trailing\n const regex_book_check =\n regex_between_ranges + '(' + regex_book_num_prefix + regex_book_name + ')'\n\n // Create regex (will manually manipulate lastIndex property of it)\n const regex = new RegExp(regex_complete, 'uig')\n\n // Keep track of end of last match\n // This is useful for callers to know if they modify the text as they go (changing its length)\n let end_of_prev_match = 0\n\n // Loop until find a valid ref (not all regex matches will be valid)\n while (true){\n const match = regex.exec(text)\n if (!match){\n return null // Either no matches or no valid matches...\n }\n\n // Confirm match is actually a valid ref\n const ref = from_string(match[0])\n if (ref && ref.args_valid){\n yield {\n ref,\n text: match[0],\n index: match.index,\n index_from_prev_match: match.index - end_of_prev_match,\n }\n end_of_prev_match = match.index + match[0].length\n\n // See if additional ranges immediately after this ref\n // WARN Sticky flag 'y' needed to ensure match is at start of lastIndex\n const add_regex = new RegExp(regex_additional_range, 'uiy')\n add_regex.lastIndex = regex.lastIndex // Move up to where main regex is up to\n while (true){\n\n // If followed by a valid book name, skip check for additional ranges\n // E.g. (John 1:1,3, 3 John 1)\n // WARN Sticky flag 'y' needed to ensure match is at start of lastIndex\n const book_look_ahead = new RegExp(regex_book_check, 'uiy')\n book_look_ahead.lastIndex = add_regex.lastIndex\n const possible_book = book_look_ahead.exec(text)\n if (possible_book && from_string(possible_book[1]!)){\n break\n }\n\n const add_match = add_regex.exec(text)\n if (!add_match){\n break\n }\n\n // Since this regex uses a capture group, need to get index of capture\n const add_match_real_index = add_match.index + add_match[0].indexOf(add_match[1]!)\n\n // Confirm valid ref, prefixing with book (and opt end chapter) from main ref\n let prefix = ref.book\n const has_verse_sep = new RegExp(regex_verse_sep).test(add_match[1]!)\n if (!has_verse_sep && ['verse', 'range_verses', 'range_multi'].includes(ref.type)){\n prefix += `${ref.end_chapter}:`\n }\n const add_ref = from_string(prefix + add_match[1]!)\n if (!add_ref || !add_ref.args_valid){\n break\n }\n yield {\n ref: add_ref,\n text: add_match[1]!,\n index: add_match_real_index,\n index_from_prev_match: add_match_real_index - end_of_prev_match,\n }\n end_of_prev_match = add_match_real_index + add_match[1]!.length\n\n // Move main regex up to where successful additional ranges regex is up to\n // WARN Only if larger as lastIndex will reset to 0 at end of string\n if (add_regex.lastIndex > regex.lastIndex){\n regex.lastIndex = add_regex.lastIndex\n }\n }\n\n } else {\n // If invalid, try next word as match might still have included a partial ref\n // e.g. \"in 1 Corinthians 9\" -> \"in 1\" -> \"1 Corinthians 9\"\n const chars_to_next_word = match[0].indexOf(' ', 1)\n if (chars_to_next_word >= 1){\n // Backtrack to exclude just first word of previous match\n regex.lastIndex -= (match[0].length - chars_to_next_word - 1)\n }\n }\n }\n}\n", "\nimport {last_verse} from '@gracious.tech/bible-references'\n\n\n// Create parser for USX and return required elements for traversing doc\nexport function parse_usx(xml:string, parser:typeof DOMParser){\n\n // Create parser\n const doc:XMLDocument = new parser().parseFromString(xml, 'application/xml')\n const usx_element = doc.documentElement as Element\n\n // Confirm was given a USX doc\n if (!usx_element || usx_element.nodeName !== 'usx') {\n throw new Error(\"Contents is not USX (missing <usx> root element)\")\n }\n\n // Identify book so can determine expected chapter/verse numbers\n const book_element = usx_element.getElementsByTagName('book')[0]\n if (!book_element){\n throw new Error(\"USX is missing <book> element\")\n }\n const book_code = book_element.getAttribute('code')?.toLowerCase()\n if (!book_code || !(book_code in last_verse)){\n throw Error(`Book code invalid: ${book_code!}`)\n }\n\n // Extract book names in file\n const get_text = (style:string) =>\n usx_element.querySelector(`:root > para[style=\"${style}\"]`)?.textContent?.trim()\n const para_h = get_text('h') // Running header text (i.e. succinct full name of book)\n const para_toc1 = get_text('toc1') // Long name\n const para_toc2 = get_text('toc2') // Short name (but often longer than 'h')\n const para_toc3 = get_text('toc3') // Abbreviation\n\n // Ensure a value exists for every name type\n const name_normal = para_h || para_toc2 || para_toc1 || ''\n const name_long = para_toc1 || para_toc2 || name_normal\n const name_abbrev = para_toc3 || name_normal.slice(0, 6)\n\n // Return elements\n return {\n doc,\n usx_element,\n book_code,\n book_name: {\n normal: name_normal,\n long: name_long,\n abbrev: name_abbrev,\n },\n num_verses: last_verse[book_code]!,\n }\n}\n", "\nexport const ignored_elements:readonly string[] = [\n 'book', // Book marker meaningless since books served separately\n 'table', // TODO Tables are probably non-biblical content (but confirm)\n 'row', // Part of a table\n 'cell', // Part of a table\n 'sidebar', // Non-biblical info not tied to specific verse\n 'periph', // Non-biblical extra info\n 'figure', // Illustrations etc\n 'optbreak', // Line breaks that are optional (and opting not to use)\n 'ms', // TODO Multi-purpose markers (could be useful in future)\n 'ref', // Passage references (can allow if within a <note> but ignored by default)\n]\n\n\nexport const ignored_para_styles:readonly string[] = [\n\n // <para> Identification [exclude all] - Running headings & table of contents\n 'ide', // See https://github.com/schierlm/BibleMultiConverter/issues/67\n 'rem', // Remarks (valid in schema though missed in docs)\n 'h', 'h1', 'h2', 'h3', 'h4',\n 'toc1', 'toc2', 'toc3',\n 'toca1', 'toca2', 'toca3',\n\n /* <para> Introductions [exclude all] - Introductionary (non-biblical) content\n Which might be helpful in a printed book, but intro material in apps is usually bad UX,\n and users that really care can research a translations methodology themselves\n */\n 'imt', 'imt1', 'imt2', 'imt3', 'imt4',\n 'is', 'is1', 'is2', 'is3', 'is4',\n 'ip',\n 'ipi',\n 'im',\n 'imi',\n 'ipq',\n 'imq',\n 'ipr',\n 'iq', 'iq1', 'iq2', 'iq3', 'iq4',\n 'ib',\n 'ili', 'ili1', 'ili2', 'ili3', 'ili4',\n 'iot',\n 'io', 'io1', 'io2', 'io3', 'io4',\n 'iex',\n 'imte',\n 'ie',\n\n /* <para> Headings [exclude some] - Exclude book & chapter headings but keep section headings\n Not excluded: ms# | mr | s# | sr | d | sp | sd#\n */\n 'mt', 'mt1', 'mt2', 'mt3', 'mt4',\n 'mte', 'mte1', 'mte2', 'mte3', 'mte4',\n 'cl',\n 'cd', // Non-biblical chapter summary, more than heading\n 'r', // Parallels to be provided by external data\n]\n\n\nexport const ignored_char_styles = [\n 'rq', // In-text cross-reference (use own system instead)\n]\n\n\nexport const ignored_note_styles = [\n 'x', 'ex', // Cross-reference footnotes (use own system instead)\n]\n\n\nexport const headings_major = ['ms', 'ms1', 'ms2', 'ms3', 'ms4', 'mr']\nexport const headings_regular = ['s', 's1', 's2', 's3', 's4', 'sr']\nexport const headings_minor = ['sp', 'qa']\n\n\n// Para styles that should have a single linebreak between them\nexport const non_para_para = [\n 'q', 'q1', 'q2', 'q3', 'q4', // Poetry\n 'qr', // Poetic refrain\n 'li', 'li1', 'li2', 'li3', 'li4', // List item\n 'lim', 'lim1', 'lim2', 'lim3', 'lim4', // Embedded list item\n]\n", "\nimport {parse_usx} from './common.js'\nimport {ignored_elements, ignored_para_styles, ignored_char_styles, ignored_note_styles,\n headings_major, headings_regular, headings_minor} from './elements.js'\nimport type {BibleJsonHtml} from './shared_types'\n\n\ninterface ParserState {\n book:string\n chapter:number\n verse:number\n para_open:string\n unknown_owner:string\n contents:BibleJsonHtml['contents']\n alignment:boolean\n}\n\n\n// Convert USX to HTML-JSON\nexport function usx_to_json_html(xml:string, alignment=true, parser=DOMParser): BibleJsonHtml {\n\n // Parse USX\n const {doc, usx_element, num_verses, book_code, book_name} = parse_usx(xml, parser)\n\n // Util for escaping text\n function escape_text(text:string|undefined|null){\n if (!text){\n return ''\n }\n const div = doc.createElement('div')\n div.appendChild(doc.createTextNode(text))\n return div.innerHTML\n }\n\n // Prepare state tracking\n const state:ParserState = {\n book: book_code,\n chapter: 0,\n verse: 0,\n para_open: '', // The opening tag of the current <para> element\n unknown_owner: '', // Content that should be prepended to next verse if current verse done\n // Prepare output with empty strings for every verse\n contents: [\n [], // Chapter 0\n ...num_verses.map(num_verses_in_ch => {\n const array = []\n // NOTE +1 for verse 0\n for (let i = 0; i < num_verses_in_ch + 1; i++){\n array.push(['', '', ''])\n }\n return array\n }),\n ] as BibleJsonHtml['contents'],\n alignment, // So available to `process_contents()`\n }\n\n // Iterate over <usx> children (elements only, text nodes not allowed at root level)\n for (const child of Array.from(usx_element.children)){\n\n // Ignore extra-biblical elements\n if (ignored_elements.includes(child.nodeName)){\n continue\n }\n\n // Handle chapter markers\n // NOTE Paragraphs never flow over chapters in USX\n if (child.nodeName === 'chapter') {\n\n if (child.hasAttribute('eid')){\n continue // Ignore chapter end markers\n }\n\n const chapter_number = parseInt(child.getAttribute('number') ?? '0', 10)\n if (chapter_number < 1 || chapter_number >= state.contents.length){\n throw new Error(`Chapter number isn't valid for the book: ${chapter_number}`)\n }\n if (chapter_number !== state.chapter + 1){\n throw new Error(`Chapter ${chapter_number} isn't +1 previous ${state.chapter}`)\n }\n state.chapter = chapter_number\n state.verse = 0\n // Add a heading for the chapter start\n add_html(state, `<h3 data-c=\"${state.chapter}\">${state.chapter}</h3>`, true)\n continue\n }\n\n // Ignore any nodes before the first chapter marker\n if (state.chapter === 0){\n continue\n }\n\n // The only element type remaining should be <para>, so skip all others to keep logic simple\n // NOTE <para> elements cannot be nested\n if (child.nodeName !== 'para'){\n console.warn(`Ignoring non-para element at document root level: ${child.nodeName}`)\n continue\n }\n\n // Get <para> style, which greatly determines how it should be processed\n const style = child.getAttribute('style') || ''\n\n // Ignore extra-biblical content\n if (ignored_para_styles.includes(style)) {\n continue\n }\n\n // Convert major headings to <h2>\n // TODO Not currently supporting nested elements within heading contents (like <char>)\n if (headings_major.includes(style)){\n add_html(state, `<h2 class=\"fb-${style}\">${escape_text(child.textContent)}</h2>`, true)\n continue\n }\n\n // Convert section headings to <h4>\n if (headings_regular.includes(style)) {\n add_html(state, `<h4 class=\"fb-${style}\">${escape_text(child.textContent)}</h4>`, true)\n continue\n }\n\n // Convert minor headings to <h5>\n if (headings_minor.includes(style)) {\n add_html(state, `<h5 class=\"fb-${style}\">${escape_text(child.textContent)}</h5>`, true)\n continue\n }\n\n // All other styles are standard <p> elements, so start one\n state.para_open = `<p class=\"fb-${style}\">`\n add_html(state, state.para_open, true)\n process_contents(state, child.childNodes, escape_text)\n add_html(state, '</p>')\n state.para_open = ''\n }\n\n // Verify no content ended up in chapter 0 or a verse 0, as they should never be used\n if (state.contents[0]!.length){\n state.contents[0] = []\n console.warn(`Content exists before chapter 1 marker (${book_code})`)\n }\n for (let ch = 1; ch < state.contents.length; ch++){\n if (state.contents[ch]![0]!.join('')){\n state.contents[ch]![0] = ['', '', '']\n console.warn(`Content exists before verse 1 marker for chapter ${ch} (${book_code})`)\n }\n }\n\n return {\n book: book_code,\n name: book_name,\n contents: state.contents,\n }\n}\n\n\nfunction process_contents(state:ParserState, nodes_raw:NodeListOf<ChildNode>,\n escape_text:(t:string|undefined|null)=>string, allow_nonbib=false){\n // Process the contents of a node (nested or not) within a <para> element\n /* WARN It's important to call this between modifying state for opening/closing tags\n e.g.\n add_html(state, '<sup>')\n process_contents(state, element.childNodes)\n add_html(state, '</sup>')\n\n */\n\n // Drop any preceeding whitespace at the beginning of a paragraph as messes up later logic\n const nodes = Array.from(nodes_raw)\n if (nodes[0]?.nodeType === 3 && !nodes[0].textContent?.trim()){\n nodes.shift()\n }\n\n for (let index = 0; index < nodes.length; index++){\n const node = nodes[index]!\n\n // If first node is not a verse element, then any headings/etc being held in unknown_owner\n // belong to current verse and not the next one\n // NOTE Verifying verse != 0 important so superscriptions in psalms prepended to verse 1\n if (index === 0 && node.nodeName !== 'verse' && state.verse){\n state.contents[state.chapter]![state.verse]![1] += state.unknown_owner\n state.unknown_owner = ''\n }\n\n // Handle text nodes\n if (node.nodeType === 3){\n add_html(state, escape_text(node.textContent))\n }\n\n // Allow <ref> if within non-biblical content (e.g. a footnote)\n if (allow_nonbib && node.nodeName === 'ref'){\n // TODO Add loc metadata if come up with way to parse it\n process_contents(state, node.childNodes, escape_text, true)\n continue\n }\n\n // Ignore all other node types that aren't elements (e.g. comments), or on ignored list\n if (node.nodeType !== 1 || ignored_elements.includes(node.nodeName)){\n continue\n }\n const element = node as Element\n\n // Handle verse elements\n if (element.nodeName === 'verse'){\n\n // Ignore verse end markers\n // TODO Could ignore non-header content until next next start marker to be extra safe\n if (element.hasAttribute('eid')){\n continue\n }\n\n // Get the new verse number\n // NOTE If a range, stick everything in the first verse of the range (e.g. 17-18 -> 17)\n // NOTE Some translations have 1a 1b, in which case ignore 'b' onwards to concat\n const digits = /^\\d+/.exec(element.getAttribute('number') ?? '')\n const new_number = parseInt(digits?.[0] ?? '-1', 10)\n if (new_number === state.verse){\n continue // Ignore as probably '1b' after a '1a', etc.\n }\n if (new_number < 0 || new_number >= state.contents[state.chapter]!.length){\n throw new Error(`Invalid verse: ${state.book} ${state.chapter}:${new_number}`)\n }\n if (new_number < state.verse){\n throw new Error(\n `${new_number} less than ${state.book} ${state.chapter}:${state.verse}`)\n }\n\n // If not at beginning of a <para> then verse now ending is mid-paragraph\n if (index > 0){\n state.contents[state.chapter]![state.verse]![2] = '</p>'\n }\n\n // Switch to new verse\n state.verse = new_number\n\n // Any unknown owner data is now known to belong to the new verse\n state.contents[state.chapter]![state.verse]![1] += state.unknown_owner\n state.unknown_owner = ''\n\n // Add verse number to contents\n add_html(state, `<sup data-v=\"${state.chapter}:${state.verse}\">${state.verse}</sup>`)\n\n // If in middle of a <para> then need opening data\n if (index > 0){\n state.contents[state.chapter]![state.verse]![0] = state.para_open\n }\n }\n\n // Handle char elements\n if (element.nodeName === 'char'){\n\n // Get the char's style\n const char_style = element.getAttribute('style') ?? ''\n if (ignored_char_styles.includes(char_style)){\n continue\n }\n\n if (char_style === 'w'){\n // Handle alignment data\n if (state.alignment){\n // TODO Convert strong/lemma data to actual word by consulting a critical text\n add_html(state, '<span>')\n process_contents(state, element.childNodes, escape_text)\n add_html(state, '</span>')\n } else {\n // Include element contents only\n process_contents(state, element.childNodes, escape_text)\n }\n } else if (['ord', 'sup'].includes(char_style)){\n add_html(state, '<sup>')\n process_contents(state, element.childNodes, escape_text)\n add_html(state, '</sup>')\n } else if (char_style === 'rb'){\n add_html(state, '<ruby>')\n process_contents(state, element.childNodes, escape_text)\n // TODO Handle splitting of words with ':' (currently ignoring)\n const gloss = element.getAttribute('gloss')?.replaceAll(':', '')\n add_html(state, `<rt>${escape_text(gloss)}</rt>`)\n add_html(state, '</ruby>')\n } else {\n // Turn all other char styles into a <span>\n add_html(state, `<span class=\"fb-${char_style}\">`)\n process_contents(state, element.childNodes, escape_text)\n add_html(state, '</span>')\n }\n }\n\n // Handle note elements\n if (element.nodeName === 'note'){\n\n // Get the note's style\n const note_style = element.getAttribute('style') ?? ''\n if (ignored_note_styles.includes(note_style)){\n continue\n }\n\n // Wrap notes in two spans so outer can display a marker and inner can be displaced\n add_html(state, '<span class=\"fb-note\"><span>')\n process_contents(state, element.childNodes, escape_text, true)\n add_html(state, '</span></span>')\n }\n }\n}\n\n\nfunction add_html(state:ParserState, content:string, may_belong_to_next_verse=false):void{\n // Add HTML to current verse, possibly buffering if may belong to next verse\n // NOTE Verifying verse != 0 important so superscriptions in psalms prepended to verse 1\n if (state.unknown_owner || may_belong_to_next_verse || !state.verse){\n state.unknown_owner += content\n } else {\n state.contents[state.chapter]![state.verse]![1] += content\n }\n}\n", "\nimport {parse_usx} from './common.js'\nimport {ignored_elements, ignored_para_styles, ignored_char_styles, ignored_note_styles,\n headings_major, headings_regular, headings_minor, non_para_para} from './elements.js'\n\nimport type {BibleJsonTxt, TxtContent} from './shared_types'\n\n\ninterface ParserState {\n book:string\n chapter:number\n verse:number\n prev_para_type:'para'|'break'|null\n unknown_owner:TxtContent[]\n contents:BibleJsonTxt['contents']\n}\n\n\nexport function usx_to_json_txt(xml:string, parser=DOMParser):BibleJsonTxt{\n\n // Parse USX\n const {usx_element, num_verses, book_code, book_name} = parse_usx(xml, parser)\n\n // Prepare state tracking\n const state:ParserState = {\n book: book_code,\n chapter: 0,\n verse: 0,\n prev_para_type: null, // Whether previous para was not actually a para (e.g. poetry)\n unknown_owner: [], // Content that should be prepended to next verse if current verse done\n // Prepare output with empty array for every verse\n contents: [\n [], // Chapter 0\n ...num_verses.map(num_verses_in_ch => {\n const array = []\n // NOTE +1 for verse 0\n for (let i = 0; i < num_verses_in_ch + 1; i++){\n array.push([])\n }\n return array\n }),\n ] as BibleJsonTxt['contents'],\n }\n\n // Iterate over <usx> children (elements only, text nodes not allowed at root level)\n for (const child of Array.from(usx_element.children)){\n\n // Get style for <para> in advance, as linebreaking logic needs it\n const style = child.getAttribute('style') || ''\n\n // Only know how many newlines to add once know next <para> type\n if (state.prev_para_type !== null){\n // Previously just added a <para> element\n if (state.prev_para_type === 'para' || !non_para_para.includes(style)){\n // Either previous was a para or current is, so add extra line break\n // i.e. Do not have two lines of poetry next to each other\n add_content(state, '\\n')\n }\n // Reset para type until know that this para won't be ignored (adding too many newlines)\n state.prev_para_type = null\n }\n\n // Ignore extra-biblical elements\n if (ignored_elements.includes(child.nodeName)){\n continue\n }\n\n // Handle chapter markers\n // NOTE Paragraphs never flow over chapters in USX\n if (child.nodeName === 'chapter') {\n\n if (child.hasAttribute('eid')){\n continue // Ignore chapter end markers\n }\n\n const chapter_number = parseInt(child.getAttribute('number') ?? '0', 10)\n if (chapter_number < 1 || chapter_number >= state.contents.length){\n throw new Error(`Chapter number isn't valid for the book: ${chapter_number}`)\n }\n if (chapter_number !== state.chapter + 1){\n throw new Error(`Chapter ${chapter_number} isn't +1 previous ${state.chapter}`)\n }\n state.chapter = chapter_number\n state.verse = 0\n continue\n }\n\n // Ignore any nodes before the first chapter marker\n if (state.chapter === 0){\n continue\n }\n\n // The only element type remaining should be <para>, so skip all others to keep logic simple\n // NOTE <para> elements cannot be nested\n if (child.nodeName !== 'para'){\n console.warn(`Ignoring non-para element at document root level: ${child.nodeName}`)\n continue\n }\n\n // Ignore extra-biblical content\n // NOTE <para style=b> only useful for forcing gap between poetry\n // This is already achieved in newline