UNPKG

sentencex

Version:
1 lines 117 kB
{"version":3,"file":"index.cjs","sources":["../../src/terminators.js","../../src/base.js","../../src/languages/en.js","../../src/languages/am.js","../../src/languages/ar.js","../../src/languages/hy.js","../../src/languages/bn.js","../../src/languages/bg.js","../../src/languages/my.js","../../src/languages/es.js","../../src/languages/ca.js","../../src/languages/da.js","../../src/languages/de.js","../../src/languages/nl.js","../../src/languages/fi.js","../../src/languages/fr.js","../../src/languages/el.js","../../src/languages/gu.js","../../src/languages/hi.js","../../src/languages/ja.js","../../src/languages/kn.js","../../src/languages/kk.js","../../src/languages/ml.js","../../src/languages/mr.js","../../src/languages/or.js","../../src/languages/pl.js","../../src/languages/pt.js","../../src/languages/pa.js","../../src/languages/ru.js","../../src/languages/sk.js","../../src/languages/ta.js","../../src/languages/te.js","../../src/languages/index.js","../../src/index.js"],"sourcesContent":["// unicode code points generated with Unicode::Tussle perl script:\n// unichars -aBbs '[\\p{Sentence_Break=STerm}\\p{Sentence_Break=ATerm}]' | awk '$2=\"\\\"\"$2\"\\\", //\"'\n// Refer: https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/SentenceBreakProperty.txt\nconst GLOBAL_SENTENCE_TERMINATORS = [\n '!', // U+00021 BC=ON BLK=Basic_Latin SC=Common EXCLAMATION MARK\n '.', // U+0002E BC=CS BLK=Basic_Latin SC=Common FULL STOP\n '?', // U+0003F BC=ON BLK=Basic_Latin SC=Common QUESTION MARK\n '։', // U+00589 BC=L BLK=Armenian SC=Armenian ARMENIAN FULL STOP\n '؝', // U+0061D BC=AL BLK=Arabic SC=Arabic ARABIC END OF TEXT MARK\n '؞', // U+0061E BC=AL BLK=Arabic SC=Arabic ARABIC TRIPLE DOT PUNCTUATION MARK\n '؟', // U+0061F BC=AL BLK=Arabic SC=Common ARABIC QUESTION MARK\n '۔', // U+006D4 BC=AL BLK=Arabic SC=Arabic ARABIC FULL STOP\n '܀', // U+00700 BC=AL BLK=Syriac SC=Syriac SYRIAC END OF PARAGRAPH\n '܁', // U+00701 BC=AL BLK=Syriac SC=Syriac SYRIAC SUPRALINEAR FULL STOP\n '܂', // U+00702 BC=AL BLK=Syriac SC=Syriac SYRIAC SUBLINEAR FULL STOP\n '߹', // U+007F9 BC=ON BLK=NKo SC=Nko NKO EXCLAMATION MARK\n '࠷', // U+00837 BC=R BLK=Samaritan SC=Samaritan SAMARITAN PUNCTUATION MELODIC QITSA\n '࠹', // U+00839 BC=R BLK=Samaritan SC=Samaritan SAMARITAN PUNCTUATION QITSA\n '࠽', // U+0083D BC=R BLK=Samaritan SC=Samaritan SAMARITAN PUNCTUATION SOF MASHFAAT\n '࠾', // U+0083E BC=R BLK=Samaritan SC=Samaritan SAMARITAN PUNCTUATION ANNAAU\n '।', // U+00964 BC=L BLK=Devanagari SC=Common DEVANAGARI DANDA\n '॥', // U+00965 BC=L BLK=Devanagari SC=Common DEVANAGARI DOUBLE DANDA\n '၊', // U+0104A BC=L BLK=Myanmar SC=Myanmar MYANMAR SIGN LITTLE SECTION\n '။', // U+0104B BC=L BLK=Myanmar SC=Myanmar MYANMAR SIGN SECTION\n '።', // U+01362 BC=L BLK=Ethiopic SC=Ethiopic ETHIOPIC FULL STOP\n '፧', // U+01367 BC=L BLK=Ethiopic SC=Ethiopic ETHIOPIC QUESTION MARK\n '፨', // U+01368 BC=L BLK=Ethiopic SC=Ethiopic ETHIOPIC PARAGRAPH SEPARATOR\n '᙮', // U+0166E BC=L BLK=Unified_Canadian_Aboriginal_Syllabics SC=Canadian_Aboriginal CANADIAN SYLLABICS FULL STOP\n '᜵', // U+01735 BC=L BLK=Hanunoo SC=Common PHILIPPINE SINGLE PUNCTUATION\n '᜶', // U+01736 BC=L BLK=Hanunoo SC=Common PHILIPPINE DOUBLE PUNCTUATION\n '᠃', // U+01803 BC=ON BLK=Mongolian SC=Common MONGOLIAN FULL STOP\n '᠉', // U+01809 BC=ON BLK=Mongolian SC=Mongolian MONGOLIAN MANCHU FULL STOP\n '᥄', // U+01944 BC=ON BLK=Limbu SC=Limbu LIMBU EXCLAMATION MARK\n '᥅', // U+01945 BC=ON BLK=Limbu SC=Limbu LIMBU QUESTION MARK\n '᪨', // U+01AA8 BC=L BLK=Tai_Tham SC=Tai_Tham TAI THAM SIGN KAAN\n '᪩', // U+01AA9 BC=L BLK=Tai_Tham SC=Tai_Tham TAI THAM SIGN KAANKUU\n '᪪', // U+01AAA BC=L BLK=Tai_Tham SC=Tai_Tham TAI THAM SIGN SATKAAN\n '᪫', // U+01AAB BC=L BLK=Tai_Tham SC=Tai_Tham TAI THAM SIGN SATKAANKUU\n '᭚', // U+01B5A BC=L BLK=Balinese SC=Balinese BALINESE PANTI\n '᭛', // U+01B5B BC=L BLK=Balinese SC=Balinese BALINESE PAMADA\n '᭞', // U+01B5E BC=L BLK=Balinese SC=Balinese BALINESE CARIK SIKI\n '᭟', // U+01B5F BC=L BLK=Balinese SC=Balinese BALINESE CARIK PAREREN\n '᭽', // U+01B7D BC=L BLK=Balinese SC=Balinese BALINESE PANTI LANTANG\n '᭾', // U+01B7E BC=L BLK=Balinese SC=Balinese BALINESE PAMADA LANTANG\n '᰻', // U+01C3B BC=L BLK=Lepcha SC=Lepcha LEPCHA PUNCTUATION TA-ROL\n '᰼', // U+01C3C BC=L BLK=Lepcha SC=Lepcha LEPCHA PUNCTUATION NYET THYOOM TA-ROL\n '᱾', // U+01C7E BC=L BLK=Ol_Chiki SC=Ol_Chiki OL CHIKI PUNCTUATION MUCAAD\n '᱿', // U+01C7F BC=L BLK=Ol_Chiki SC=Ol_Chiki OL CHIKI PUNCTUATION DOUBLE MUCAAD\n '․', // U+02024 BC=ON BLK=General_Punctuation SC=Common ONE DOT LEADER\n '‼', // U+0203C BC=ON BLK=General_Punctuation SC=Common DOUBLE EXCLAMATION MARK\n '‽', // U+0203D BC=ON BLK=General_Punctuation SC=Common INTERROBANG\n '⁇', // U+02047 BC=ON BLK=General_Punctuation SC=Common DOUBLE QUESTION MARK\n '⁈', // U+02048 BC=ON BLK=General_Punctuation SC=Common QUESTION EXCLAMATION MARK\n '⁉', // U+02049 BC=ON BLK=General_Punctuation SC=Common EXCLAMATION QUESTION MARK\n '⸮', // U+02E2E BC=ON BLK=Supplemental_Punctuation SC=Common REVERSED QUESTION MARK\n '⸼', // U+02E3C BC=ON BLK=Supplemental_Punctuation SC=Common STENOGRAPHIC FULL STOP\n '⹓', // U+02E53 BC=ON BLK=Supplemental_Punctuation SC=Common MEDIEVAL EXCLAMATION MARK\n '⹔', // U+02E54 BC=ON BLK=Supplemental_Punctuation SC=Common MEDIEVAL QUESTION MARK\n '꓿', // U+0A4FF BC=L BLK=Lisu SC=Lisu LISU PUNCTUATION FULL STOP\n '꘎', // U+0A60E BC=ON BLK=Vai SC=Vai VAI FULL STOP\n '꘏', // U+0A60F BC=ON BLK=Vai SC=Vai VAI QUESTION MARK\n '꛳', // U+0A6F3 BC=L BLK=Bamum SC=Bamum BAMUM FULL STOP\n '꛷', // U+0A6F7 BC=L BLK=Bamum SC=Bamum BAMUM QUESTION MARK\n '꡶', // U+0A876 BC=ON BLK=Phags-pa SC=Phags_Pa PHAGS-PA MARK SHAD\n '꡷', // U+0A877 BC=ON BLK=Phags-pa SC=Phags_Pa PHAGS-PA MARK DOUBLE SHAD\n '꣎', // U+0A8CE BC=L BLK=Saurashtra SC=Saurashtra SAURASHTRA DANDA\n '꣏', // U+0A8CF BC=L BLK=Saurashtra SC=Saurashtra SAURASHTRA DOUBLE DANDA\n '꤯', // U+0A92F BC=L BLK=Kayah_Li SC=Kayah_Li KAYAH LI SIGN SHYA\n '꧈', // U+0A9C8 BC=L BLK=Javanese SC=Javanese JAVANESE PADA LINGSA\n '꧉', // U+0A9C9 BC=L BLK=Javanese SC=Javanese JAVANESE PADA LUNGSI\n '꩝', // U+0AA5D BC=L BLK=Cham SC=Cham CHAM PUNCTUATION DANDA\n '꩞', // U+0AA5E BC=L BLK=Cham SC=Cham CHAM PUNCTUATION DOUBLE DANDA\n '꩟', // U+0AA5F BC=L BLK=Cham SC=Cham CHAM PUNCTUATION TRIPLE DANDA\n '꫰', // U+0AAF0 BC=L BLK=Meetei_Mayek_Extensions SC=Meetei_Mayek MEETEI MAYEK CHEIKHAN\n '꫱', // U+0AAF1 BC=L BLK=Meetei_Mayek_Extensions SC=Meetei_Mayek MEETEI MAYEK AHANG KHUDAM\n '꯫', // U+0ABEB BC=L BLK=Meetei_Mayek SC=Meetei_Mayek MEETEI MAYEK CHEIKHEI\n '﹒', // U+0FE52 BC=CS BLK=Small_Form_Variants SC=Common SMALL FULL STOP\n '﹖', // U+0FE56 BC=ON BLK=Small_Form_Variants SC=Common SMALL QUESTION MARK\n '﹗', // U+0FE57 BC=ON BLK=Small_Form_Variants SC=Common SMALL EXCLAMATION MARK\n '!', // U+0FF01 BC=ON BLK=Halfwidth_and_Fullwidth_Forms SC=Common FULLWIDTH EXCLAMATION MARK\n '.', // U+0FF0E BC=CS BLK=Halfwidth_and_Fullwidth_Forms SC=Common FULLWIDTH FULL STOP\n '?', // U+0FF1F BC=ON BLK=Halfwidth_and_Fullwidth_Forms SC=Common FULLWIDTH QUESTION MARK\n '𐩖', // U+10A56 BC=R BLK=Kharoshthi SC=Kharoshthi KHAROSHTHI PUNCTUATION DANDA\n '𐩗', // U+10A57 BC=R BLK=Kharoshthi SC=Kharoshthi KHAROSHTHI PUNCTUATION DOUBLE DANDA\n '𐽕', // U+10F55 BC=AL BLK=Sogdian SC=Sogdian SOGDIAN PUNCTUATION TWO VERTICAL BARS\n '𐽖', // U+10F56 BC=AL BLK=Sogdian SC=Sogdian SOGDIAN PUNCTUATION TWO VERTICAL BARS WITH DOTS\n '𐽗', // U+10F57 BC=AL BLK=Sogdian SC=Sogdian SOGDIAN PUNCTUATION CIRCLE WITH DOT\n '𐽘', // U+10F58 BC=AL BLK=Sogdian SC=Sogdian SOGDIAN PUNCTUATION TWO CIRCLES WITH DOTS\n '𐽙', // U+10F59 BC=AL BLK=Sogdian SC=Sogdian SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT\n '𐾆', // U+10F86 BC=R BLK=Old_Uyghur SC=Old_Uyghur OLD UYGHUR PUNCTUATION BAR\n '𐾇', // U+10F87 BC=R BLK=Old_Uyghur SC=Old_Uyghur OLD UYGHUR PUNCTUATION TWO BARS\n '𐾈', // U+10F88 BC=R BLK=Old_Uyghur SC=Old_Uyghur OLD UYGHUR PUNCTUATION TWO DOTS\n '𐾉', // U+10F89 BC=R BLK=Old_Uyghur SC=Old_Uyghur OLD UYGHUR PUNCTUATION FOUR DOTS\n '𑁇', // U+11047 BC=L BLK=Brahmi SC=Brahmi BRAHMI DANDA\n '𑁈', // U+11048 BC=L BLK=Brahmi SC=Brahmi BRAHMI DOUBLE DANDA\n '𑂾', // U+110BE BC=L BLK=Kaithi SC=Kaithi KAITHI SECTION MARK\n '𑂿', // U+110BF BC=L BLK=Kaithi SC=Kaithi KAITHI DOUBLE SECTION MARK\n '𑃀', // U+110C0 BC=L BLK=Kaithi SC=Kaithi KAITHI DANDA\n '𑃁', // U+110C1 BC=L BLK=Kaithi SC=Kaithi KAITHI DOUBLE DANDA\n '𑅁', // U+11141 BC=L BLK=Chakma SC=Chakma CHAKMA DANDA\n '𑅂', // U+11142 BC=L BLK=Chakma SC=Chakma CHAKMA DOUBLE DANDA\n '𑅃', // U+11143 BC=L BLK=Chakma SC=Chakma CHAKMA QUESTION MARK\n '𑇅', // U+111C5 BC=L BLK=Sharada SC=Sharada SHARADA DANDA\n '𑇆', // U+111C6 BC=L BLK=Sharada SC=Sharada SHARADA DOUBLE DANDA\n '𑇍', // U+111CD BC=L BLK=Sharada SC=Sharada SHARADA SUTRA MARK\n '𑇞', // U+111DE BC=L BLK=Sharada SC=Sharada SHARADA SECTION MARK-1\n '𑇟', // U+111DF BC=L BLK=Sharada SC=Sharada SHARADA SECTION MARK-2\n '𑈸', // U+11238 BC=L BLK=Khojki SC=Khojki KHOJKI DANDA\n '𑈹', // U+11239 BC=L BLK=Khojki SC=Khojki KHOJKI DOUBLE DANDA\n '𑈻', // U+1123B BC=L BLK=Khojki SC=Khojki KHOJKI SECTION MARK\n '𑈼', // U+1123C BC=L BLK=Khojki SC=Khojki KHOJKI DOUBLE SECTION MARK\n '𑊩', // U+112A9 BC=L BLK=Multani SC=Multani MULTANI SECTION MARK\n '𑑋', // U+1144B BC=L BLK=Newa SC=Newa NEWA DANDA\n '𑑌', // U+1144C BC=L BLK=Newa SC=Newa NEWA DOUBLE DANDA\n '𑗂', // U+115C2 BC=L BLK=Siddham SC=Siddham SIDDHAM DANDA\n '𑗃', // U+115C3 BC=L BLK=Siddham SC=Siddham SIDDHAM DOUBLE DANDA\n '𑗉', // U+115C9 BC=L BLK=Siddham SC=Siddham SIDDHAM END OF TEXT MARK\n '𑗊', // U+115CA BC=L BLK=Siddham SC=Siddham SIDDHAM SECTION MARK WITH TRIDENT AND U-SHAPED ORNAMENTS\n '𑗋', // U+115CB BC=L BLK=Siddham SC=Siddham SIDDHAM SECTION MARK WITH TRIDENT AND DOTTED CRESCENTS\n '𑗌', // U+115CC BC=L BLK=Siddham SC=Siddham SIDDHAM SECTION MARK WITH RAYS AND DOTTED CRESCENTS\n '𑗍', // U+115CD BC=L BLK=Siddham SC=Siddham SIDDHAM SECTION MARK WITH RAYS AND DOTTED DOUBLE CRESCENTS\n '𑗎', // U+115CE BC=L BLK=Siddham SC=Siddham SIDDHAM SECTION MARK WITH RAYS AND DOTTED TRIPLE CRESCENTS\n '𑗏', // U+115CF BC=L BLK=Siddham SC=Siddham SIDDHAM SECTION MARK DOUBLE RING\n '𑗐', // U+115D0 BC=L BLK=Siddham SC=Siddham SIDDHAM SECTION MARK DOUBLE RING WITH RAYS\n '𑗑', // U+115D1 BC=L BLK=Siddham SC=Siddham SIDDHAM SECTION MARK WITH DOUBLE CRESCENTS\n '𑗒', // U+115D2 BC=L BLK=Siddham SC=Siddham SIDDHAM SECTION MARK WITH TRIPLE CRESCENTS\n '𑗓', // U+115D3 BC=L BLK=Siddham SC=Siddham SIDDHAM SECTION MARK WITH QUADRUPLE CRESCENTS\n '𑗔', // U+115D4 BC=L BLK=Siddham SC=Siddham SIDDHAM SECTION MARK WITH SEPTUPLE CRESCENTS\n '𑗕', // U+115D5 BC=L BLK=Siddham SC=Siddham SIDDHAM SECTION MARK WITH CIRCLES AND RAYS\n '𑗖', // U+115D6 BC=L BLK=Siddham SC=Siddham SIDDHAM SECTION MARK WITH CIRCLES AND TWO ENCLOSURES\n '𑗗', // U+115D7 BC=L BLK=Siddham SC=Siddham SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES\n '𑙁', // U+11641 BC=L BLK=Modi SC=Modi MODI DANDA\n '𑙂', // U+11642 BC=L BLK=Modi SC=Modi MODI DOUBLE DANDA\n '𑜼', // U+1173C BC=L BLK=Ahom SC=Ahom AHOM SIGN SMALL SECTION\n '𑜽', // U+1173D BC=L BLK=Ahom SC=Ahom AHOM SIGN SECTION\n '𑜾', // U+1173E BC=L BLK=Ahom SC=Ahom AHOM SIGN RULAI\n '𑥄', // U+11944 BC=L BLK=Dives_Akuru SC=Dives_Akuru DIVES AKURU DOUBLE DANDA\n '𑥆', // U+11946 BC=L BLK=Dives_Akuru SC=Dives_Akuru DIVES AKURU END OF TEXT MARK\n '𑩂', // U+11A42 BC=L BLK=Zanabazar_Square SC=Zanabazar_Square ZANABAZAR SQUARE MARK SHAD\n '𑩃', // U+11A43 BC=L BLK=Zanabazar_Square SC=Zanabazar_Square ZANABAZAR SQUARE MARK DOUBLE SHAD\n '𑪛', // U+11A9B BC=L BLK=Soyombo SC=Soyombo SOYOMBO MARK SHAD\n '𑪜', // U+11A9C BC=L BLK=Soyombo SC=Soyombo SOYOMBO MARK DOUBLE SHAD\n '𑱁', // U+11C41 BC=L BLK=Bhaiksuki SC=Bhaiksuki BHAIKSUKI DANDA\n '𑱂', // U+11C42 BC=L BLK=Bhaiksuki SC=Bhaiksuki BHAIKSUKI DOUBLE DANDA\n '𑻷', // U+11EF7 BC=L BLK=Makasar SC=Makasar MAKASAR PASSIMBANG\n '𑻸', // U+11EF8 BC=L BLK=Makasar SC=Makasar MAKASAR END OF SECTION\n '𑽃', // U+11F43 BC=L BLK=Kawi SC=Kawi KAWI DANDA\n '𑽄', // U+11F44 BC=L BLK=Kawi SC=Kawi KAWI DOUBLE DANDA\n '𖩮', // U+16A6E BC=L BLK=Mro SC=Mro MRO DANDA\n '𖩯', // U+16A6F BC=L BLK=Mro SC=Mro MRO DOUBLE DANDA\n '𖫵', // U+16AF5 BC=L BLK=Bassa_Vah SC=Bassa_Vah BASSA VAH FULL STOP\n '𖬷', // U+16B37 BC=L BLK=Pahawh_Hmong SC=Pahawh_Hmong PAHAWH HMONG SIGN VOS THOM\n '𖬸', // U+16B38 BC=L BLK=Pahawh_Hmong SC=Pahawh_Hmong PAHAWH HMONG SIGN VOS TSHAB CEEB\n '𖭄', // U+16B44 BC=L BLK=Pahawh_Hmong SC=Pahawh_Hmong PAHAWH HMONG SIGN XAUS\n '𖺘', // U+16E98 BC=L BLK=Medefaidrin SC=Medefaidrin MEDEFAIDRIN FULL STOP\n '𛲟', // U+1BC9F BC=L BLK=Duployan SC=Duployan DUPLOYAN PUNCTUATION CHINOOK FULL STOP\n '𝪈' // U+1DA88 BC=L BLK=Sutton_SignWriting SC=SignWriting SIGNWRITING FULL STOP\n].concat([\n // Additional manual entries.\n '。', // U+3002 IDEOGRAPHIC FULL STOP\n '。' // U+FF61 HALFWIDTH IDEOGRAPHIC FULL STOP\n])\n\nexport default GLOBAL_SENTENCE_TERMINATORS\n","import GLOBAL_SENTENCE_TERMINATORS from './terminators.js'\n\nexport default class Language {\n static GLOBAL_SENTENCE_BOUNDARY_REGEX = new RegExp(\n `[${GLOBAL_SENTENCE_TERMINATORS.join('')}]+`,\n 'gu'\n )\n\n static EXCLAMATION_WORDS = new Set(\n (\n '!Xũ !Kung ǃʼOǃKung !Xuun !Kung-Ekoka ǃHu ǃKhung ǃKu ǃung ǃXo ǃXû ǃXung ' +\n 'ǃXũ !Xun Yahoo! Y!J Yum!'\n ).split(' ')\n )\n\n static quotePairs = {\n '\"': '\"',\n \" '\": \"'\", // Need a space before ' to avoid capturing don't , l'Avv etc\n '«': '»',\n '‘': '’',\n '‚': '‚',\n '“': '”',\n '‛': '‛',\n '„': '“',\n '‟': '‟',\n '‹': '›',\n '《': '》',\n '「': '」'\n }\n\n static quotesRegexStr = Object.entries(Language.quotePairs)\n .map(([left, right]) => `${left}(\\\\n|.)*?${right}`)\n .join('|')\n\n static quotesRegex = new RegExp(`${Language.quotesRegexStr}+`, 'g')\n\n static parensRegex = /([\\\\((<{\\\\[])(?:\\\\\\1|.)*?[\\\\)\\]})]/g\n static emailRegex = /[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Z|a-z]{2,7}/g\n\n static numberedReferenceRegex = /^(\\[\\d+])+/\n static sentenceBreakRegex = Language.GLOBAL_SENTENCE_BOUNDARY_REGEX\n static abbreviationChar = '.'\n\n constructor () {\n this.abbreviations = this.constructor.abbreviations\n }\n\n is_abbreviation (head, tail, seperator) {\n // \"\"\"\n // Do not break in abbreviations. Example D. John, St. Peter\n // In the case of \"This is Dr. Watson\", head is \"This is D/, tail is \" Watson\"\n // \"\"\"\n if (seperator !== this.constructor.abbreviationChar) {\n return false\n }\n\n const lastWord = this.get_lastword(head)\n if (!lastWord.length) {\n return false\n }\n\n const isAbbrev =\n this.abbreviations.has(lastWord) ||\n this.abbreviations.has(\n lastWord[0].toLowerCase() + lastWord.slice(1)\n ) ||\n this.abbreviations.has(lastWord.toUpperCase())\n\n return isAbbrev\n }\n\n is_exclamation_word (head, tail) {\n return Language.EXCLAMATION_WORDS.has(this.get_lastword(head) + '!')\n }\n\n get_lastword (text) {\n return text.split(/[\\s\\\\.]+/).slice(-1)[0]\n }\n\n findBoundary (text, match) {\n const tail = text.slice(match.index + 1)\n const head = text.slice(0, match.index)\n\n // If next word is numbered reference, expand boundary to that.'\n const numberRefMatch = Language.numberedReferenceRegex.exec(tail)\n\n if (numberRefMatch) {\n return match.index + 1 + numberRefMatch[0].length\n }\n\n // Next character is number or lower-case: not a sentence boundary\n if (this.continueInNextWord(tail)) {\n return null\n }\n\n const seperator = match[0]\n if (this.is_abbreviation(head, tail, seperator)) {\n return null\n }\n\n if (this.is_exclamation_word(head, tail)) {\n return null\n }\n\n // Include any closing punctuation and trailing space\n const matchLen = match[0].length\n // print(match_len)\n return match.index + matchLen\n }\n\n continueInNextWord (textAfterBoundary) {\n return textAfterBoundary.match(/^[0-9a-z]/)\n }\n\n getSkippableRanges (text) {\n const skippableRanges = []\n\n // Find matches using quotesRegex and add spans to skippableRanges\n const quotesMatches = text.matchAll(this.constructor.quotesRegex)\n\n for (const match of quotesMatches) {\n skippableRanges.push([match.index, match.index + match[0].length])\n }\n\n // Find matches using parensRegex and add spans to skippableRanges\n const parensMatches = text.matchAll(this.constructor.parensRegex)\n for (const match of parensMatches) {\n skippableRanges.push([match.index, match.index + match[0].length])\n }\n\n // Find matches using emailRegex and add spans to skippableRanges\n const emailMatches = text.matchAll(this.constructor.emailRegex)\n for (const match of emailMatches) {\n skippableRanges.push([match.index, match.index + match[0].length])\n }\n\n return skippableRanges\n }\n\n segment (text) {\n const paragraphs = text.split(/(\\n{2,})/)\n const sentences = []\n // Iterate over each paragraph.\n for (let i = 0; i < paragraphs.length; i++) {\n const paragraph = paragraphs[i]\n // Initialize a list to store the boundaries of sentences.\n const boundaries = [0]\n\n // Find all matches of sentence breaks in the paragraph.\n const matches = []\n let match\n while ((match = this.constructor.sentenceBreakRegex.exec(paragraph)) !== null) {\n matches.push(match)\n }\n\n const skippableRanges = this.getSkippableRanges(paragraph)\n\n // Iterate over each match of sentence breaks.\n for (let j = 0; j < matches.length; j++) {\n const match = matches[j]\n // Find the boundary of the sentence.\n let boundary = this.findBoundary(paragraph, match)\n\n // If boundary is null, skip to the next match.\n if (!boundary) {\n continue\n }\n\n // Check if the boundary is inside a skippable range (quote, parentheses, or email).\n let inRange = false\n\n for (let sri = 0; sri < skippableRanges.length; sri++) {\n const [skipStart, skipEnd] = skippableRanges[sri]\n\n if (boundary > skipStart && boundary < skipEnd) {\n if (boundary + 1 === skipEnd && this.isPunctuationBetweenQuotes()) {\n boundary = skipEnd\n inRange = false\n } else {\n inRange = true\n }\n break\n }\n }\n // If in_range is True, skip to the next match.\n if (inRange) {\n continue\n }\n\n // Add the boundary to the boundaries list.\n boundaries.push(boundary)\n }\n\n for (let bindex = 0; bindex < boundaries.length; bindex++) {\n // Slice the paragraph using the boundaries to get the sentence.\n const sentence = paragraph.slice(boundaries[bindex], boundaries[bindex + 1])\n\n // If the sentence has a length, yield the sentence\n // stripped of leading/trailing spaces.\n if (sentence.length) {\n sentences.push(sentence.replace(/^ +| +$/, ''))\n }\n }\n }\n return sentences\n }\n\n isPunctuationBetweenQuotes () {\n return false\n }\n}\n","import BaseLanguage from '../base.js'\n\nconst abbreviations = new Set([\n 'A',\n 'adj',\n 'adm',\n 'adv',\n 'al',\n 'ala',\n 'alta',\n 'apr',\n 'arc',\n 'ariz',\n 'ark',\n 'art',\n 'assn',\n 'asst',\n 'attys',\n 'aug',\n 'ave',\n 'B',\n 'bart',\n 'bld',\n 'bldg',\n 'blvd',\n 'brig',\n 'bros',\n 'btw',\n 'C',\n 'cal',\n 'calif',\n 'capt',\n 'cl',\n 'cmdr',\n 'co',\n 'col',\n 'colo',\n 'comdr',\n 'con',\n 'conn',\n 'corp',\n 'cpl',\n 'cres',\n 'ct',\n 'd.phil',\n 'D',\n 'dak',\n 'dec',\n 'del',\n 'dept',\n 'det',\n 'dist',\n 'dr.phil',\n 'dr.philos',\n 'dr',\n 'drs',\n 'e.g',\n 'E',\n 'ens',\n 'esp',\n 'esq',\n 'etc',\n 'exp',\n 'expy',\n 'ext',\n 'F',\n 'feb',\n 'fed',\n 'fig',\n 'fla',\n 'ft',\n 'fwy',\n 'fy',\n 'G',\n 'ga',\n 'gen',\n 'gov',\n 'H',\n 'hon',\n 'hosp',\n 'hr',\n 'hway',\n 'hwy',\n 'i.e',\n 'I',\n 'ia',\n 'id',\n 'ida',\n 'ill',\n 'inc',\n 'ind',\n 'ing',\n 'insp',\n 'J',\n 'jan',\n 'jr',\n 'jul',\n 'jun',\n 'K',\n 'kan',\n 'kans',\n 'ken',\n 'ky',\n 'L',\n 'la',\n 'lt',\n 'ltd',\n 'M',\n 'maj',\n 'man',\n 'mar',\n 'mass',\n 'may',\n 'md',\n 'me',\n 'med',\n 'messrs',\n 'mex',\n 'mfg',\n 'mich',\n 'min',\n 'minn',\n 'miss',\n 'mlle',\n 'mm',\n 'mme',\n 'mo',\n 'mont',\n 'mr',\n 'mrs',\n 'ms',\n 'msgr',\n 'mssrs',\n 'mt',\n 'mtn',\n 'Nº',\n 'N°',\n 'No̱',\n 'No',\n 'N',\n 'neb',\n 'nebr',\n 'nev',\n 'no',\n 'nos',\n 'nov',\n 'nr',\n 'O',\n 'oct',\n 'ok',\n 'okla',\n 'ont',\n 'op',\n 'ord',\n 'ore',\n 'p',\n 'P',\n 'pa',\n 'pd',\n 'pde',\n 'penn',\n 'penna',\n 'pfc',\n 'ph.d',\n 'ph',\n 'pl',\n 'plz',\n 'pp',\n 'prof',\n 'pvt',\n 'Q',\n 'que',\n 'R',\n 'rd',\n 'ref',\n 'rep',\n 'reps',\n 'res',\n 'rev',\n 'rs',\n 'rt',\n 'S',\n 'sask',\n 'sec',\n 'sen',\n 'sens',\n 'sep',\n 'sept',\n 'sfc',\n 'sgt',\n 'sr',\n 'st',\n 'supt',\n 'surg',\n 'T',\n 'tce',\n 'tenn',\n 'tex',\n 'u.s',\n 'U',\n 'univ',\n 'usafa',\n 'ut',\n 'v',\n 'V',\n 'va',\n 'ver',\n 'viz',\n 'vs',\n 'vt',\n 'W',\n 'wash',\n 'wis',\n 'wisc',\n 'wy',\n 'wyo',\n 'X',\n 'Y',\n 'yuk',\n 'Z'\n])\n\nexport default class English extends BaseLanguage {\n static abbreviations = abbreviations\n}\n","import BaseLanguage from '../base.js'\nimport English from './en.js'\n\nconst abbreviations = new Set([\n ...English.abbreviations,\n 'ዓ',\n 'ም'\n])\n\nexport default class Amharic extends BaseLanguage {\n static abbreviations = abbreviations\n continueInNextWord (textAfterBoundary) {\n return textAfterBoundary.match(/^\\W*[0-9a-z]/)\n }\n}\n","import BaseLanguage from '../base.js'\n\nconst abbreviations = new Set([\n 'إلخ',\n 'ا. د',\n 'ا.د',\n 'ا.ش.ا',\n 'ا',\n 'ت.ب',\n 'ج.ب',\n 'ج.م.ع',\n 'جم',\n 'س.ت',\n 'سم',\n 'ص.ب.',\n 'ص.ب',\n 'كج.',\n 'كلم.',\n 'م.ب',\n 'م',\n 'ه'\n\n])\n\nexport default class Arabic extends BaseLanguage {\n static abbreviations = abbreviations\n}\n","import BaseLanguage from '../base.js'\nimport English from './en.js'\nimport GLOBAL_SENTENCE_TERMINATORS from './../terminators.js'\n\nconst hyTerminators = ['։', '՜', ':'].concat(GLOBAL_SENTENCE_TERMINATORS)\nhyTerminators.splice(hyTerminators.indexOf('.'), 1)\nconst hyTerminatorsRegex = new RegExp(`[${hyTerminators.join('')}]+`, 'g')\n\nexport default class Armenian extends BaseLanguage {\n static abbreviations = English.abbreviations\n static sentenceBreakRegex = hyTerminatorsRegex\n}\n","import BaseLanguage from '../base.js'\nimport English from './en.js'\n\nconst abbreviations = new Set([\n ...English.abbreviations,\n 'এ',\n 'বি',\n 'সি',\n 'ডি',\n 'ঈ',\n 'এফ',\n 'জি',\n 'এইচ',\n 'আই',\n 'জে',\n 'কে',\n 'এল',\n 'এম',\n 'এন',\n 'ও',\n 'পি',\n 'কিউ',\n 'আর',\n 'এস',\n 'টি',\n 'ইউ',\n 'ভি',\n 'ডাবলিউ',\n 'এক্স',\n 'ওয়াই',\n 'জেড'\n])\n\nexport default class Bengali extends BaseLanguage {\n static abbreviations = abbreviations\n}\n","import BaseLanguage from '../base.js'\n\nconst abbreviations = new Set([\n 'p.s',\n 'акад',\n 'ал',\n 'б.р',\n 'б.ред',\n 'бел.а',\n 'бел.пр',\n 'бр',\n 'бул',\n 'в',\n 'вж',\n 'вкл',\n 'вм',\n 'вр',\n 'г',\n 'ген',\n 'гр',\n 'дж',\n 'дм',\n 'доц',\n 'др',\n 'ем',\n 'заб',\n 'зам',\n 'инж',\n 'к.с',\n 'кв.м',\n 'кв',\n 'кг',\n 'км',\n 'кор',\n 'куб.м',\n 'куб',\n 'л',\n 'лв',\n 'м.г',\n 'м',\n 'мин',\n 'млн',\n 'млрд',\n 'мм',\n 'н.с',\n 'напр',\n 'пл',\n 'полк',\n 'проф',\n 'р',\n 'рис',\n 'с',\n 'св',\n 'сек',\n 'см',\n 'сп',\n 'срв',\n 'ст',\n 'стр',\n 'т.г',\n 'т.е',\n 'т.н',\n 'т.нар',\n 'т',\n 'табл',\n 'тел',\n 'у',\n 'ул',\n 'фиг',\n 'ха',\n 'хил',\n 'ч',\n 'чл',\n 'щ.д'\n\n])\n\nexport default class Bulgarian extends BaseLanguage {\n static abbreviations = abbreviations\n}\n","import BaseLanguage from '../base.js'\nimport English from './en.js'\nimport GLOBAL_SENTENCE_TERMINATORS from './../terminators.js'\n\nexport default class Burmese extends BaseLanguage {\n static abbreviations = English.abbreviations\n // See https://en.wiktionary.org/wiki/၏\n static sentenceBreakRegex = new RegExp(\n `[${GLOBAL_SENTENCE_TERMINATORS.concat(['၏']).join('')}]+`,\n 'g'\n )\n}\n","import BaseLanguage from '../base.js'\n\nconst abbreviations = new Set([\n 'a.c',\n 'a',\n 'a/c',\n 'abr',\n 'adj',\n 'admón',\n 'aero',\n 'afmo',\n 'ago',\n 'almte',\n 'ambi',\n 'an',\n 'anfi',\n 'ante',\n 'anti',\n 'ap',\n 'apdo',\n 'archi',\n 'arci',\n 'arq',\n 'art',\n 'atte',\n 'auto',\n 'av',\n 'avda',\n 'bco',\n 'bi',\n 'bibl',\n 'bien',\n 'bis',\n 'bs. as',\n 'c.f',\n 'c.g',\n 'c',\n 'c/c',\n 'c/u',\n 'cap',\n 'cc.aa',\n 'cdad',\n 'cm',\n 'co',\n 'com',\n 'con',\n 'contra',\n 'cra',\n 'crio',\n 'cta',\n 'cuadri',\n 'cuasi',\n 'cuatri',\n 'cv',\n 'd.e.p',\n 'da',\n 'dcha',\n 'dcho',\n 'de',\n 'deci',\n 'dep',\n 'des',\n 'di',\n 'dic',\n 'dicc',\n 'dir',\n 'dis',\n 'dn',\n 'doc',\n 'dom',\n 'dpto',\n 'dr',\n 'dra',\n 'dto',\n 'ecto',\n 'ee',\n 'ej',\n 'en',\n 'endo',\n 'entlo',\n 'entre',\n 'epi',\n 'equi',\n 'esq',\n 'etc',\n 'ex',\n 'excmo',\n 'ext',\n 'extra',\n 'f.c',\n 'fca',\n 'fdo',\n 'febr',\n 'ff. aa',\n 'ff.cc',\n 'fig',\n 'fil',\n 'fra',\n 'g.p',\n 'g/p',\n 'geo',\n 'gob',\n 'gr',\n 'gral',\n 'grs',\n 'hemi',\n 'hetero',\n 'hiper',\n 'hipo',\n 'hnos',\n 'homo',\n 'hs',\n 'i',\n 'igl',\n 'iltre',\n 'im',\n 'imp',\n 'impr',\n 'impto',\n 'in',\n 'incl',\n 'infra',\n 'ing',\n 'inst',\n 'inter',\n 'intra',\n 'iso',\n 'izdo',\n 'izq',\n 'izqdo',\n 'j.c',\n 'jue',\n 'jul',\n 'jun',\n 'kg',\n 'km',\n 'lcdo',\n 'ldo',\n 'let',\n 'lic',\n 'ltd',\n 'lun',\n 'macro',\n 'mar',\n 'máx',\n 'may',\n 'mega',\n 'mg',\n 'micro',\n 'mié',\n 'min',\n 'mín',\n 'mini',\n 'mm',\n 'mono',\n 'mt',\n 'multi',\n 'n. del t',\n 'n.b',\n 'neo',\n 'no',\n 'nos',\n 'nov',\n 'ntra. sra',\n 'núm',\n 'oct',\n 'omni',\n 'p.a',\n 'p.d',\n 'p.ej',\n 'p.v.p',\n 'p',\n 'pág',\n 'págs',\n 'para',\n 'párr',\n 'párrf',\n 'pen',\n 'ph.d',\n 'ph',\n 'pluri',\n 'poli',\n 'pos',\n 'post',\n 'pp',\n 'ppal',\n 'pre',\n 'prev',\n 'pro',\n 'prof',\n 'prov',\n 'pseudo',\n 'ptas',\n 'pts',\n 'pza',\n 'q.e.g.e',\n 'q.e.p.d',\n 'q.e.s.m',\n 're',\n 'reg',\n 'rep',\n 'retro',\n 'rr. hh',\n 'rte',\n 's. a',\n 's.a.r',\n 's.e',\n 's.l',\n 's.r.c',\n 's.r.l',\n 's.s.s',\n 's',\n 's/n',\n 'sáb',\n 'sdad',\n 'seg',\n 'semi',\n 'sept',\n 'seudo',\n 'sig',\n 'sobre',\n 'sr',\n 'sra',\n 'sres',\n 'srta',\n 'sta',\n 'sto',\n 'sub',\n 'super',\n 'supra',\n 't.v.e',\n 'tamb',\n 'tel',\n 'tfno',\n 'trans',\n 'tras',\n 'tri',\n 'ud',\n 'uds',\n 'ulter',\n 'ultra',\n 'un',\n 'uni',\n 'univ',\n 'uu',\n 'v.b',\n 'v.e',\n 'vd',\n 'vds',\n 'vice',\n 'vid',\n 'vie',\n 'vol',\n 'vs',\n 'vto',\n 'yuxta'\n])\n\nexport default class Spanish extends BaseLanguage {\n static abbreviations = abbreviations\n}\n","import Spanish from './es.js'\n\nexport default class Catalan extends Spanish {\n}\n","import BaseLanguage from '../base.js'\n\nconst abbreviations = new Set([\n 'adm',\n 'adr',\n 'afd',\n 'afs',\n 'al',\n 'alm',\n 'ang',\n 'ank',\n 'anm',\n 'ann',\n 'ansvh',\n 'apr',\n 'årg',\n 'årh',\n 'årl',\n 'arr',\n 'ass',\n 'att',\n 'aud',\n 'aug',\n 'aut',\n 'bd',\n 'bdt',\n 'bet',\n 'bhk',\n 'bio',\n 'biol',\n 'bk',\n 'bl.a',\n 'bot',\n 'br',\n 'bto',\n 'ca',\n 'cal',\n 'cirk',\n 'cit',\n 'co',\n 'cpr-nr',\n 'cvr-nr',\n 'd.å',\n 'd.æ',\n 'd.d',\n 'd.e',\n 'd.m',\n 'd.s.s',\n 'd.s',\n 'd.y',\n 'da',\n 'dav',\n 'dec',\n 'def',\n 'del',\n 'dep',\n 'diam',\n 'din',\n 'dir',\n 'disp',\n 'distr',\n 'do',\n 'dobb',\n 'dr',\n 'ds',\n 'dvs',\n 'e.b',\n 'e.kr',\n 'e.l',\n 'e.o',\n 'e.v.t',\n 'eftf',\n 'eftm',\n 'egl',\n 'eks',\n 'eksam',\n 'ekskl',\n 'eksp',\n 'ekspl',\n 'el',\n 'emer',\n 'endv',\n 'eng',\n 'enk',\n 'etc',\n 'eur',\n 'evt',\n 'exam',\n 'f.å',\n 'f.eks',\n 'f.kr',\n 'f.m',\n 'f.n',\n 'f.o.m',\n 'f.o',\n 'f.s.v',\n 'f.t',\n 'f.v.t',\n 'f',\n 'fa',\n 'fær',\n 'fakt',\n 'feb',\n 'fec',\n 'ff',\n 'fg',\n 'fhv',\n 'fig',\n 'fl',\n 'flg',\n 'fm',\n 'fmd',\n 'forb',\n 'foreg',\n 'foren',\n 'forf',\n 'forh',\n 'fork',\n 'form',\n 'forr',\n 'fors',\n 'forsk',\n 'forts',\n 'fp',\n 'fr',\n 'frk',\n 'fru',\n 'fuldm',\n 'fung',\n 'fys',\n 'g.d',\n 'g.m',\n 'g',\n 'gd',\n 'gdr',\n 'gg',\n 'gh',\n 'gl',\n 'gn',\n 'gns',\n 'gr',\n 'grdl',\n 'gross',\n 'h.a',\n 'h.c',\n 'hdl',\n 'henh',\n 'henv',\n 'hf',\n 'hft',\n 'hhv',\n 'hort',\n 'hosp',\n 'hpl',\n 'hr',\n 'hrs',\n 'hum',\n 'i.e',\n 'i',\n 'ib',\n 'ibid',\n 'if',\n 'ifm',\n 'ill',\n 'indb',\n 'indreg',\n 'ing',\n 'inkl',\n 'insp',\n 'instr',\n 'isl',\n 'istf',\n 'jan',\n 'jf',\n 'jfr',\n 'jnr',\n 'jr',\n 'jul',\n 'jun',\n 'jur',\n 'jvf',\n 'kal',\n 'kap',\n 'kat',\n 'kbh',\n 'kem',\n 'kgl',\n 'kin',\n 'kl',\n 'kld',\n 'km/t',\n 'knsp',\n 'komm',\n 'kons',\n 'korr',\n 'kp',\n 'kr',\n 'kst',\n 'kt',\n 'ktr',\n 'kv',\n 'kvt',\n 'l.c',\n 'l',\n 'lab',\n 'lat',\n 'lb.',\n 'lb.nr',\n 'lb',\n 'lejl',\n 'lgd',\n 'lic',\n 'lign',\n 'lin',\n 'ling.merc',\n 'litt',\n 'lø',\n 'lok',\n 'lrs',\n 'ltr',\n 'm.a.o',\n 'm.fl.st',\n 'm.m',\n 'm',\n 'm/',\n 'ma',\n 'mag',\n 'maks',\n 'mar',\n 'mat',\n 'matr.nr',\n 'md',\n 'mdl',\n 'mdr',\n 'mdtl',\n 'med',\n 'medd',\n 'medflg',\n 'medl',\n 'merc',\n 'mezz',\n 'mf',\n 'mfl',\n 'mgl',\n 'mhp',\n 'mht',\n 'mi',\n 'mia',\n 'mio',\n 'ml',\n 'mods',\n 'modsv',\n 'modt',\n 'mr',\n 'mrk',\n 'mrs',\n 'ms',\n 'mul',\n 'mv',\n 'mvh',\n 'n.br',\n 'n.f',\n 'n',\n 'nat',\n 'ned',\n 'nedenn',\n 'nedenst',\n 'nederl',\n 'nkr',\n 'nl',\n 'no',\n 'nord',\n 'nov',\n 'nr',\n 'nto',\n 'nuv',\n 'o.a',\n 'ø.f',\n 'o.fl.st',\n 'o.g',\n 'o.h',\n 'o.m.a',\n 'o',\n 'obj',\n 'obl',\n 'obs',\n 'odont',\n 'oecon',\n 'off',\n 'ofl',\n 'okt',\n 'omg',\n 'omr',\n 'omtr',\n 'on',\n 'op.cit',\n 'opg',\n 'opl',\n 'opr',\n 'org',\n 'orig',\n 'osfr',\n 'osv',\n 'øv',\n 'ovenn',\n 'ovenst',\n 'overs',\n 'ovf',\n 'øvr',\n 'oz',\n 'p.a',\n 'p.b.v',\n 'p.c',\n 'p.m.v',\n 'p.p',\n 'p.s',\n 'p.t',\n 'p.v.a',\n 'p.v.c',\n 'p',\n 'pæd',\n 'par',\n 'partc',\n 'pass',\n 'pct',\n 'pd',\n 'pens',\n 'perf',\n 'pers',\n 'pg',\n 'pga',\n 'pgl',\n 'ph.d',\n 'ph',\n 'pharm',\n 'phil',\n 'pinx',\n 'pk',\n 'pkt',\n 'pl',\n 'pluskv',\n 'polit',\n 'polyt',\n 'port',\n 'pos',\n 'pp',\n 'pr',\n 'præd',\n 'præf',\n 'præp',\n 'præs',\n 'præt',\n 'prc',\n 'priv',\n 'prod',\n 'prof',\n 'pron',\n 'psych',\n 'pt',\n 'q.e.d',\n 'rad',\n 'red',\n 'ref',\n 'reg',\n 'regn',\n 'rel',\n 'rep',\n 'repr',\n 'rest',\n 'rk',\n 'russ',\n 's.å',\n 's.br',\n 's.d',\n 's.e',\n 's.f',\n 's.m.b.a',\n 's.u',\n 's',\n 's/',\n 'sa',\n 'sædv',\n 'såk',\n 'sb',\n 'sc',\n 'scient',\n 'sek',\n 'sekr',\n 'sem',\n 'sen',\n 'sep',\n 'sept',\n 'sg',\n 'sign',\n 'sj',\n 'skr',\n 'skt',\n 'slutn',\n 'sml',\n 'smp',\n 'sms',\n 'smst',\n 'sø',\n 'soc',\n 'sort',\n 'sp',\n 'spec',\n 'spm',\n 'spr',\n 'spsk',\n 'st',\n 'stk',\n 'str',\n 'stud',\n 'subj',\n 'subst',\n 'suff',\n 'sup',\n 'suppl',\n 'sv',\n 't.h',\n 't.o.m',\n 't.v',\n 't',\n 'tab',\n 'td',\n 'tdl',\n 'tdr',\n 'techn',\n 'tekn',\n 'temp',\n 'th',\n 'ti',\n 'tidl',\n 'tilf',\n 'tilh',\n 'till',\n 'tilsv',\n 'tjg',\n 'tlf',\n 'tlgr',\n 'to',\n 'tr',\n 'trp',\n 'tv',\n 'ty',\n 'u.å',\n 'u.p',\n 'u.st',\n 'u',\n 'uafh',\n 'ubf',\n 'ubøj',\n 'udb',\n 'udbet',\n 'udd',\n 'udg',\n 'uds',\n 'ugtl',\n 'ulin',\n 'ult',\n 'undt',\n 'univ',\n 'v.f',\n 'vær',\n 'var',\n 'vb',\n 'vbsb',\n 'vedk',\n 'vedl',\n 'vedr',\n 'vejl',\n 'vh',\n 'vol',\n 'vs',\n 'vsa',\n 'zool'\n])\n\nexport default class Danish extends BaseLanguage {\n static abbreviations = abbreviations\n\n continueInNextWord (textAfterBoundary) {\n return textAfterBoundary.match(/^\\W*[0-9a-z]/)\n }\n}\n","import BaseLanguage from '../base.js'\nimport English from './en.js'\n\nconst abbreviations = new Set([\n ...English.abbreviations,\n 'ä',\n 'Ä',\n 'adj',\n 'adm',\n 'adv',\n 'ao.univ.prof',\n 'art',\n 'ass.prof',\n 'ass',\n 'asst',\n 'b.a',\n 'b.s',\n 'bart',\n 'bldg',\n 'brig',\n 'bros',\n 'bse',\n 'buchst',\n 'bzgl',\n 'bzw',\n 'c.-à-d',\n 'ca',\n 'capt',\n 'chr',\n 'cmdr',\n 'co',\n 'col',\n 'comdr',\n 'con',\n 'corp',\n 'cpl',\n 'd.h',\n 'd.j',\n 'dergl',\n 'dgl',\n 'di',\n 'dipl.-ing',\n 'dkr',\n 'dr ',\n 'ens',\n 'etc',\n 'ev ',\n 'evtl',\n 'ff',\n 'g.g.a',\n 'g.u',\n 'gen',\n 'ggf',\n 'gov',\n 'hon.prof',\n 'hon',\n 'hosp',\n 'i.f',\n 'i.h.v',\n 'ii',\n 'iii',\n 'insp',\n 'iv',\n 'ix',\n 'jun',\n 'k.o',\n 'kath',\n 'lfd',\n 'lt',\n 'ltd',\n 'm.e',\n 'mag',\n 'maj',\n 'med',\n 'messrs',\n 'mio',\n 'mlle',\n 'mm',\n 'mme',\n 'mr',\n 'mrd',\n 'mrs',\n 'ms',\n 'msgr',\n 'mwst',\n 'no',\n 'nos',\n 'nr',\n 'o.ä',\n 'o.univ.-prof',\n 'op',\n 'ord',\n 'pfc',\n 'ph',\n 'pp',\n 'prof',\n 'projektass',\n 'pvt',\n 'rep',\n 'reps',\n 'res',\n 'rev',\n 'rt',\n 's',\n 's.p.a',\n 'sa',\n 'sen',\n 'sens',\n 'sfc',\n 'sgt',\n 'sog',\n 'sogen',\n 'spp',\n 'sr',\n 'st',\n 'std',\n 'str ',\n 'stud.ass',\n 'supt',\n 'surg',\n 'T',\n 'u.a ',\n 'u.ä',\n 'u.e',\n 'u.s.w',\n 'u.u',\n 'univ.-doz',\n 'univ.-prof',\n 'univ.ass',\n 'usf',\n 'usw',\n 'v',\n 'vgl',\n 'vi',\n 'vii',\n 'viii',\n 'vs',\n 'x',\n 'xi',\n 'xii',\n 'xiii',\n 'xiv',\n 'xix',\n 'xv',\n 'xvi',\n 'xvii',\n 'xviii',\n 'xx',\n 'z.b',\n 'z.t',\n 'z.z',\n 'z.zt',\n 'zt',\n 'zzt'\n\n])\n\nexport default class Deutsch extends BaseLanguage {\n static abbreviations = abbreviations\n static MONTHS = new Set([\n 'Januar',\n 'Februar',\n 'März',\n 'April',\n 'Mai',\n 'Juni',\n 'Juli',\n 'August',\n 'September',\n 'Oktober',\n 'November',\n 'Dezember'\n ])\n\n continueInNextWord (textAfterBoundary) {\n if (textAfterBoundary.match(/^\\W*[0-9a-z]/)) {\n return true\n }\n\n let nextWord = textAfterBoundary.trim().split(' ')[0]\n nextWord = nextWord.replace(/[?!.]/g, '')\n if (!nextWord.length) {\n return false\n }\n if (Deutsch.MONTHS.has(nextWord) || Deutsch.MONTHS.has(nextWord[0].toUpperCase() + nextWord.slice(1))) {\n return true\n }\n\n return false\n }\n\n isPunctuationBetweenQuotes () {\n return true\n }\n}\n","import BaseLanguage from '../base.js'\n\nconst abbreviations = new Set([\n 'riv.dir.int.\"le priv',\n '2d',\n '3e',\n 'a.2d',\n 'a.a.j.b',\n 'a.a',\n 'a.f.t',\n 'a.g.j.b',\n 'a.h.v',\n 'a.h.w',\n 'a.hosp',\n 'a.i',\n 'a.j.b',\n 'a.j.t',\n 'a.m.r',\n 'a.m',\n 'a.p.m',\n 'a.p.r',\n 'a.p.t',\n 'a.s',\n 'a.t.d.f',\n 'a.u.b',\n 'a.v.a',\n 'a.w',\n 'aanbev.comm',\n 'aanbev',\n 'aant',\n 'aanv.st',\n 'aanw.vnw',\n 'aanw',\n 'abd',\n 'abm',\n 'abs',\n 'acc.& fisc',\n 'acc.act',\n 'acc.bedr.m',\n 'acc.bedr.t',\n \"acc.thema's m.\",\n 'acc.thema’s m',\n 'achterv',\n 'act.dr.fam',\n 'act.dr',\n 'act.fisc',\n 'act.soc',\n 'adm.akk',\n 'adm.besl',\n 'adm.lex',\n 'adm.onderr',\n 'adm.ov',\n 'adv.bl',\n 'adv',\n 'afd',\n 'afl',\n 'aggl.verord',\n 'agr',\n 'al',\n 'alg.richts',\n 'alg',\n 'amén',\n 'ann.dr.lg',\n 'ann.dr.sc.pol',\n 'ann.dr',\n 'ann.ét.eur',\n 'ann.fac.dr.lg',\n 'ann.jur.créd.règl.coll',\n 'ann.jur.créd',\n 'ann.not',\n 'ann.parl',\n 'ann.prat.comm',\n 'app',\n 'arb',\n 'arbbl',\n 'arbh',\n 'arbit.besl',\n 'arbrb',\n 'arr.cass',\n 'arr.r.v.st',\n 'arr.verbr',\n 'arr',\n 'arrondrb',\n 'art',\n 'artw',\n 'aud',\n 'b.&w',\n 'b.a.s',\n 'b.a',\n 'b.b.o',\n 'b.best.dep',\n 'b.br.ex',\n 'b.coll.fr.gem.comm',\n 'b.coll.vl.gem.comm',\n 'b.d.cult.r',\n 'b.d.gem.ex',\n 'b.d.gem.reg',\n 'b.dep',\n 'b.e.b',\n 'b.f.r',\n 'b.fr.gem.ex',\n 'b.fr.gem.reg',\n 'b.i.h',\n 'b.inl.j.d',\n 'b.inl.s.reg',\n 'b.j',\n 'b.l',\n 'b.lid br.ex',\n 'b.lid d.gem.ex',\n 'b.lid fr.gem.ex',\n 'b.lid vl.ex',\n 'b.lid w.gew.ex',\n 'b.o.z',\n 'b.prov.r',\n 'b.r.h',\n 'b.s',\n 'b.sr',\n 'b.stb',\n 'b.t.i.r',\n 'b.t.s.z',\n 'b.t.w.rev',\n 'b.v',\n 'b.ver.coll.gem.gem.comm',\n 'b.verg.r.b',\n 'b.versl',\n 'b.vl.ex',\n 'b.voorl.reg',\n 'b.w.gew.ex',\n 'b.w',\n 'b.z.d.g',\n 'b.z.v',\n 'b',\n 'bab',\n 'bank fin.r',\n 'bank fin',\n 'bedr.org',\n 'begins',\n 'beheersov',\n 'bekendm.comm',\n 'bel.besch',\n 'bel.w.p',\n 'bel',\n 'beleidsov',\n 'belg',\n 'benelux jur',\n 'ber.w',\n 'ber',\n 'besch',\n 'besl',\n 'beslagr',\n 'besluitwet nr',\n 'best',\n 'bestuurswet',\n 'bet',\n 'betr',\n 'bevest',\n 'bew',\n 'bijbl.n.bijdr',\n 'bijbl',\n 'bijl',\n 'bijv',\n 'bijw',\n 'bijz.decr',\n 'bin.b',\n 'bkh',\n 'bl',\n 'blz',\n 'bm',\n 'bn',\n 'bnlx merkw',\n 'bnlx tek',\n 'bnlx uitl',\n 'bnw',\n 'bouwr',\n 'br drs',\n 'br.parl',\n 'bs',\n 'bt drs',\n 'btw rev',\n 'bull.adm.pénit',\n 'bull.ass',\n 'bull.b.m.m',\n 'bull.bel',\n 'bull.best.strafinr',\n 'bull.bmm',\n 'bull.c.b.n',\n 'bull.c.n.c',\n 'bull.cbn',\n 'bull.centr.arb',\n 'bull.cnc',\n 'bull.contr',\n 'bull.doc.min.fin',\n 'bull.f.e.b',\n 'bull.feb',\n 'bull.fisc.fin.r',\n 'bull.i.u.m',\n 'bull.inf.ass.secr.soc',\n 'bull.inf.i.e.c',\n 'bull.inf.i.n.a.m.i',\n 'bull.inf.i.r.e',\n 'bull.inf.iec',\n 'bull.inf.inami',\n 'bull.inf.ire',\n 'bull.inst.arb',\n 'bull.ium',\n 'bull.jur.imm',\n 'bull.lég.b',\n 'bull.off',\n 'bull.trim.b.dr.comp',\n 'bull.us',\n 'bull.v.b.o',\n 'bull.vbo',\n 'bull',\n 'bv i.o',\n 'bv',\n 'bw int.reg',\n 'bw',\n 'bxh',\n 'byz',\n 'c.& f.p',\n 'c.& f',\n 'c.a.-a',\n 'c.a.b.g',\n 'c.a',\n 'c.c.i',\n 'c.c.s',\n 'c.c',\n 'c.conc.jur',\n 'c.d.e',\n 'c.d.p.k',\n 'c.e',\n 'c.ex',\n 'c.f',\n 'c.h.a',\n 'c.i.f.i.c',\n 'c.i.f',\n 'c.j',\n 'c.l',\n 'c.n',\n 'c.o.d',\n 'c.p',\n 'c.pr.civ',\n 'c.q',\n 'c.r.a',\n 'c.r',\n 'c.s.a',\n 'c.s.q.n',\n 'c.s',\n 'c.v.a',\n 'c.v.o',\n 'c.v',\n 'c',\n 'ca',\n 'cadeaust',\n 'cah.const',\n 'cah.dr.europ',\n 'cah.dr.immo',\n 'cah.dr.jud',\n 'cal',\n 'cap',\n 'carg',\n 'cas',\n 'cass',\n 'cert',\n 'cf',\n 'ch',\n 'chr',\n 'chron.d.s',\n 'chron.dr.not',\n 'chron',\n 'cie',\n 'cir',\n 'circ.z',\n 'circ',\n 'cit.loc',\n 'cit',\n 'civ',\n 'cl.et.b',\n 'cmt',\n 'co',\n 'cognoss.v',\n 'coll',\n 'colp.w',\n 'com.v.min',\n 'com',\n 'comm.bijz.ov',\n 'comm.erf',\n 'comm.fin',\n 'comm.ger',\n 'comm.handel',\n 'comm.pers',\n 'comm.pub',\n 'comm.straf',\n 'comm.v.en v',\n 'comm.v',\n 'comm.venn',\n 'comm.verz',\n 'comm.voor',\n 'comm',\n 'comp',\n 'compt.w',\n 'computerr',\n 'con.m',\n 'concl',\n 'concr',\n 'conf',\n 'confl.w.huwbetr',\n 'confl.w',\n 'cons',\n 'conv',\n 'coöp',\n 'corr.bl',\n 'corr',\n 'cour de cass',\n 'cour.fisc',\n 'cour.immo',\n 'cridon',\n 'crim',\n 'crt',\n 'cur',\n 'curs',\n 'd.-g',\n 'd.a.v',\n 'd.a',\n 'd.b.f',\n 'd.c.c.r',\n 'd.c',\n 'd.d.p',\n 'd.d',\n 'd.e.t',\n 'd.gem.r',\n 'd.h.z',\n 'd.h',\n 'd.i.t',\n 'd.i',\n 'd.j',\n 'd.l.r',\n 'd.m.v',\n 'd.m',\n 'd.o.v',\n 'd.parl',\n 'd.w.z',\n 'd',\n 'dact',\n 'dat',\n 'dbesch',\n 'dbesl',\n 'de advoc',\n 'de belg.acc',\n 'de burg.st',\n 'de dr',\n 'de gem',\n 'de gerechtsd',\n 'de venn',\n 'de verz',\n 'decr.d',\n 'decr.fr',\n 'decr.vl',\n 'decr.w',\n 'decr',\n 'deelw',\n 'def',\n 'dep.opv',\n 'dep.rtl',\n 'derg',\n 'desp',\n 'det.mag',\n 'deurw.regl',\n 'dez',\n 'dgl',\n 'dhr',\n 'disp',\n 'diss',\n 'div.act',\n 'div.bel',\n 'div',\n 'dl',\n 'dln',\n 'dnotz',\n 'doc.jur.b',\n 'doc.min.fin',\n 'doc.parl',\n 'doc',\n 'doctr',\n 'dpl.besl',\n 'dpl',\n 'dr.banc.fin',\n 'dr.circ',\n 'dr.inform',\n 'dr.mr',\n 'dr.pén.entr',\n 'dr.q.m',\n 'dr',\n 'drs',\n 'dtp',\n 'dwz',\n 'dyn',\n 'e cont',\n 'e.a',\n 'e.b',\n 'e.c.a',\n 'e.c',\n 'e.d',\n 'e.e.a',\n 'e.e.g',\n 'e.e',\n 'e.g.a',\n 'e.g',\n 'e.h.a',\n 'e.i',\n 'e.j',\n 'e.m.a',\n 'e.n.a.c',\n 'e.o',\n 'e.p.c',\n 'e.r.c',\n 'e.r.f',\n 'e.r.h',\n 'e.r.o',\n 'e.r.p',\n 'e.r.v',\n 'e.s.r.a',\n 'e.s.t',\n 'e.v.a',\n 'e.v',\n 'e.w',\n 'e',\n 'e&o.e',\n 'ec.pol.r',\n 'echos log',\n 'econ',\n 'ed',\n 'ed(s)',\n 'eeg verd.v',\n 'eex san s',\n 'eff',\n 'eg rtl',\n 'eig.mag',\n 'eig',\n 'eil',\n 'elektr',\n 'en antw',\n 'en gew',\n 'en m',\n 'en prov',\n 'en s',\n 'en w',\n 'enmb',\n 'entr.et dr',\n 'enz',\n 'err',\n 'et al',\n 'et seq',\n 'etc',\n 'etq',\n 'eur.t.s',\n 'eur.verd.overdracht strafv',\n 'eur',\n 'ev rechtsh',\n 'ev uitl',\n 'ev',\n 'evt',\n 'ex.crim',\n 'ex',\n 'exec',\n 'f.a.o',\n 'f.a.q',\n 'f.a.s',\n 'f.i.b',\n 'f.j.f',\n 'f.o.b',\n 'f.o.r',\n 'f.o.s',\n 'f.o.t',\n 'f.r',\n 'f.supp',\n 'f.suppl',\n 'f',\n 'fa',\n 'facs',\n 'fare act',\n 'fasc',\n 'fg',\n 'fid.ber',\n 'fig',\n 'fin.verh.w',\n 'fin',\n 'fisc.act',\n 'fisc.koer',\n 'fisc',\n 'fl',\n 'form',\n 'foro',\n 'fr.cult.r',\n 'fr.gem.r',\n 'fr.parl',\n 'fr',\n 'fra',\n 'ft',\n 'g.a.v',\n 'g.a.w.v',\n 'g.a',\n 'g.g.d',\n 'g.m.t',\n 'g.o',\n 'g.omt.e',\n 'g.p',\n 'g.s',\n 'g.v',\n 'g.w.w',\n 'g',\n 'geb',\n 'gebr',\n 'gebrs',\n 'gec.decr',\n 'gec',\n 'ged.st',\n 'ged',\n 'gedipl',\n 'gedr.st',\n 'geh',\n 'gem.comm',\n 'gem.gem.comm',\n 'gem.st',\n 'gem.stem',\n 'gem.w',\n 'gem.wet, gem.wet',\n 'gem',\n 'gemeensch.optr',\n 'gemeensch.standp',\n 'gemeensch.strat',\n 'gemeent.b',\n 'gemeent.regl',\n 'gemeent.verord',\n 'gemeent',\n 'gen',\n 'geol',\n 'geopp',\n 'gepubl',\n 'ger.deurw',\n 'ger.w',\n 'gerekw',\n 'gereq',\n 'gesch',\n 'get',\n 'getr',\n 'gev.m',\n 'gev.maatr',\n 'gew',\n 'ghert',\n 'gir.eff.verk',\n 'gk',\n 'gr',\n 'gramm',\n 'grat.w',\n 'gron,opm.en leermed',\n 'grondw',\n 'grootb.w',\n 'grs',\n 'grur ausl',\n 'grur int',\n 'grvm',\n 'grw',\n 'gst',\n 'gw',\n 'h.a.v.o',\n 'h.a',\n 'h.b.o',\n 'h.e.a.o',\n 'h.e.g.a',\n 'h.e.geb',\n 'h.e.gestr',\n 'h.l',\n 'h.m',\n 'h.o',\n 'h.r',\n 'h.t.l',\n 'h.t.m',\n 'h.w.geb',\n 'hand',\n 'handelsn.w',\n 'handelspr',\n 'handelsr.w',\n 'handelsreg.w',\n 'handv',\n 'harv.l.rev',\n 'hc',\n 'herald',\n 'hert',\n 'herz',\n 'hfdst',\n 'hfst',\n 'hgrw',\n 'hhr',\n 'hist',\n 'hooggel',\n 'hoogl',\n 'hosp',\n 'hpw',\n 'hr.ms',\n 'hr',\n 'hregw',\n 'hrg',\n 'hst',\n 'huis.just',\n 'huisv.w',\n 'huurbl',\n 'hv.vn',\n 'hw',\n 'hyp.w',\n 'i.b.s',\n 'i.c.m.h',\n 'i.c',\n 'i.e',\n 'i.f.p',\n 'i.f',\n 'i.g.v',\n 'i.h.a',\n 'i.h.b',\n 'i.h',\n 'i.l.pr',\n 'i.o',\n 'i.p.o',\n 'i.p.r',\n 'i.p.v',\n 'i.pl.v',\n 'i.r.d.i',\n 'i.s.m',\n 'i.t.t',\n 'i.v.m',\n 'i.v.s',\n 'i.v',\n 'i.w.tr',\n 'i.z',\n 'ib',\n 'ibid',\n 'icip-ing.cons',\n 'iem',\n 'ind prop',\n 'ind',\n 'indic.soc',\n 'indiv',\n 'inf.i.d.a.c',\n 'inf.idac',\n 'inf.r.i.z.i.v',\n 'inf.riziv',\n 'inf.soc.secr',\n 'inf',\n 'ing.cons',\n 'ing',\n 'inst',\n \"int'l & comp.l.q.\",\n 'int',\n 'interm',\n 'intern.fisc.act',\n 'intern.vervoerr',\n 'inv.w',\n 'inv.wet',\n 'inv',\n 'invord.w',\n 'inz',\n 'ir',\n 'irspr',\n 'it',\n 'iwtr',\n 'j.-cl',\n 'j.c.b',\n 'j.c.e',\n 'j.c.fl',\n 'j.c.j',\n 'j.c.p',\n 'j.d.e',\n 'j.d.f',\n 'j.d.s.c',\n 'j.dr.jeun',\n 'j.j.d',\n 'j.j.p',\n 'j.j.pol',\n 'j.l.m.b',\n 'j.l.o',\n 'j.l',\n 'j.ordre pharm',\n 'j.p.a',\n 'j.r.s',\n 'j.t.d.e',\n 'j.t.dr.eur',\n 'j.t.o',\n 'j.t.t',\n 'j.t',\n 'j',\n 'jaarl',\n 'jb.hand',\n 'jb.kred.c.s',\n 'jb.kred',\n 'jb.l.r.b',\n 'jb.lrb',\n 'jb.markt',\n 'jb.mens',\n 'jb.t.r.d',\n 'jb.trd',\n 'jeugdrb',\n 'jeugdwerkg.w',\n 'jg',\n 'jis',\n 'jl',\n 'journ.jur',\n 'journ.prat.dr.fisc.fin',\n 'journ.proc',\n 'jrg',\n 'jur.comm.fl',\n 'jur.dr.soc.b.l.n',\n 'jur.f.p.e',\n 'jur.fpe',\n 'jur.niv',\n 'jur.trav.brux',\n 'jur',\n 'jura falc',\n 'jurambt',\n 'jv.cass',\n 'jv.h.r.j',\n 'jv.hrj',\n 'jw',\n 'k.b',\n 'k.g',\n 'k.k',\n 'k.m.b.o',\n 'k.o.o',\n 'k.v.k',\n 'k.v.v.v',\n 'k',\n 'kadasterw',\n 'kaderb',\n 'kador',\n 'kbo-nr',\n 'kg',\n 'kh',\n 'kiesw',\n 'kind.bes.v',\n 'kkr',\n 'koopv',\n 'kr',\n 'krankz.w',\n 'ksbel',\n 'kt',\n 'ktg',\n 'ktr',\n 'kvdm',\n 'kw.r',\n 'kymr',\n 'kzr',\n 'kzw',\n 'l.b.o',\n 'l.b',\n 'l.bas',\n 'l.c',\n 'l.gew',\n 'l.j',\n 'l.k',\n 'l.l',\n 'l.o',\n 'l.r.b',\n 'l.u.v.i',\n 'l.v.r',\n 'l.v.w',\n 'l.w',\n \"l'exp.-compt.b.\",\n 'l’exp.-compt.b',\n 'l',\n 'landinr.w',\n 'landscrt',\n 'larcier cass',\n 'lat',\n 'law.ed',\n 'lett',\n 'levensverz',\n 'lgrs',\n 'lidw',\n 'limb.rechtsl',\n 'lit',\n 'litt',\n 'liw',\n 'liwet',\n 'lk',\n 'll.(l.)l.r',\n 'll',\n 'loonw',\n 'losbl',\n 'ltd',\n 'luchtv.w',\n 'luchtv',\n 'm.a.v.o',\n 'm.a.w',\n 'm.b.o',\n 'm.b.r',\n 'm.b.t',\n 'm.b',\n 'm.d.g.o',\n 'm.e.a.o',\n 'm.e.r',\n 'm.h.d',\n 'm.h',\n 'm.i.v',\n 'm.j.t',\n 'm.k',\n 'm.m.a',\n 'm.m.h.h',\n 'm.m.v',\n 'm.m',\n 'm.n',\n 'm.not.fisc',\n 'm.nt',\n 'm.o',\n 'm.r',\n 'm.s.a',\n 'm.u.p',\n 'm.v.a',\n 'm.v.h.n',\n 'm.v.t',\n 'm.z',\n 'm',\n 'maatr.teboekgest.luchtv',\n 'maced',\n 'mand',\n 'max',\n 'mbl.not',\n 'me',\n 'med.b.u.f.r',\n 'med.bufr',\n 'med.vbo',\n 'med',\n 'meerv',\n 'meetbr.w',\n 'mém.adm',\n 'mgr',\n 'mgrs',\n 'mhd',\n 'mi.verantw',\n 'mil.bed',\n 'mil.ger',\n 'mil',\n 'min.j.omz',\n 'min.just.circ',\n 'min',\n 'mitt',\n 'mnd',\n 'mod',\n 'mon',\n 'monde ass',\n 'mouv.comm',\n 'mr',\n 'ms',\n 'muz',\n 'mv',\n 'mva ii inv',\n 'mva inv',\n 'n cont',\n 'n.a.g',\n 'n.a.v',\n 'n.a',\n 'n.b',\n 'n.c',\n 'n.chr',\n 'n.d.r',\n 'n.d',\n 'n.e.a',\n 'n.g',\n 'n.h.b.c',\n 'n.j.b',\n 'n.j.w',\n 'n.j',\n 'n.l',\n 'n.m.m',\n 'n.m',\n 'n.n.b',\n 'n.n.g',\n 'n.n.k',\n 'n.n',\n 'n.o.m',\n 'n.o.t.k',\n 'n.rapp',\n 'n.tijd.pol',\n 'n.v.d.r',\n 'n.v.d.v',\n 'n.v.h',\n 'n.v.o.b',\n 'n.v.t',\n 'n.v',\n 'n',\n 'nat.besch.w',\n 'nat.omb',\n 'nat.pers',\n 'ned.cult.r',\n 'neg.verkl',\n 'nhd',\n 'nieuw arch',\n 'njcm-bull',\n 'nl',\n 'nnd',\n 'no',\n 'not.fisc.m',\n 'not.w',\n 'not.wet',\n 'not',\n 'nr',\n 'nrs',\n 'nste',\n 'nt',\n 'numism',\n 'o.a',\n 'o.b',\n 'o.c',\n 'o.g.v',\n 'o.g',\n 'o.i.d',\n 'o.i',\n 'o.m',\n 'o.o.d',\n 'o.o.v',\n 'o.o',\n 'o.p',\n 'o.r',\n 'o.regl',\n 'o.s',\n 'o.t.s',\n 'o.t.t.t',\n 'o.t.t.z',\n 'o.t.t',\n 'o.tk.t',\n 'o.v.t.t',\n 'o.v.t',\n 'o.v.tk.t',\n 'o.v.v',\n 'o',\n 'ob',\n 'obsv',\n 'octr.gem.regl',\n 'octr.regl',\n 'octr',\n 'oe',\n 'oecd mod',\n 'off.pol',\n 'ofra',\n 'ohd',\n 'omb',\n 'omnia frat',\n 'omnil',\n 'omz',\n 'on.ww',\n 'onderr',\n 'onfrank',\n 'onteig.w',\n 'ontw',\n 'onuitg',\n 'onz',\n 'oorl.w',\n 'op.cit',\n 'opin.pa',\n 'opm',\n 'or',\n 'ord.br',\n 'ord.gem',\n 'ors',\n 'orth',\n 'os',\n 'osm',\n 'ov.w.i',\n 'ov.w.ii',\n 'ov.ww',\n 'ov',\n 'overg.w',\n 'overw',\n 'ovkst',\n 'ow kadasterw',\n 'oz',\n 'p.& b',\n 'p.a.o',\n 'p.a',\n 'p.b.o',\n 'p.e',\n 'p.g',\n 'p.j',\n 'p.m.a',\n 'p.m',\n 'p.o.j.t',\n 'p.o',\n 'p.p',\n 'p.v.s',\n 'p.v',\n 'p',\n 'pachtw',\n 'pag',\n 'pan',\n 'pand.b',\n 'pand.pér',\n 'parl.gesch',\n 'parl.st',\n 'parl',\n 'part.arb',\n 'pas',\n 'pasin',\n 'pat',\n 'pb.c',\n 'pb.l',\n 'pens',\n 'pensioenverz',\n 'per.ber.i.b.r',\n 'per.ber.ibr',\n 'pers',\n 'pft',\n 'pg wijz.rv',\n 'pk',\n 'pktg',\n 'pli jur',\n 'plv',\n 'po',\n 'pol.off',\n 'pol.r',\n 'pol.w',\n 'pol',\n 'politie j',\n 'postbankw',\n 'postw',\n 'pp',\n 'pr',\n 'preadv',\n 'pres',\n 'prf',\n 'prft',\n 'prg',\n 'prijz.w',\n 'pro jus',\n 'proc',\n 'procesregl',\n 'prof',\n 'prot',\n 'prov.b',\n 'prov.instr.h.m.g',\n 'prov.regl',\n 'prov.verord',\n 'prov.w',\n 'prov',\n 'publ.cour eur.d.h',\n 'publ.eur.court h.r',\n 'publ',\n 'pun',\n 'pw',\n 'q.b.d',\n 'q.e.d',\n 'q.q',\n 'q.r',\n 'r.a.b.g',\n 'r.a.c.e',\n 'r.a.j.b',\n 'r.b.d.c',\n 'r.b.d.i',\n 'r.b.s.s',\n 'r.c.b',\n 'r.c.d.c',\n 'r.c.j.b',\n 'r.c.s.j',\n 'r.c',\n 'r.cass',\n 'r.d.c',\n 'r.d.i.d.c',\n 'r.d.i',\n 'r.d.j.b',\n 'r.d.j.p',\n 'r.d.p.c',\n 'r.d.s',\n 'r.d.t.i',\n 'r.e',\n 'r.f.s.v.p',\n 'r.g.a.r',\n 'r.g.c.f',\n 'r.g.d.c',\n 'r.g.f',\n 'r.g.z',\n 'r.h.a',\n 'r.i.c',\n 'r.i.d.a',\n 'r.i.e.j',\n 'r.i.n',\n 'r.i.s.a',\n 'r.j.d.a',\n 'r.j.i',\n 'r.k',\n 'r.l.g.b',\n 'r.l',\n 'r.med.rechtspr',\n 'r.med',\n 'r.n.b',\n 'r.o',\n 'r.orde apoth',\n 'r.ov',\n 'r.p.d.b',\n 'r.p.o.t',\n 'r.p.r.j',\n 'r.p.s',\n 'r.p',\n 'r.r.d',\n 'r.r.s',\n 'r.s.v.p',\n 'r.s',\n 'r.stvb',\n 'r.t.d.f',\n 'r.t.d.h',\n 'r.t.l',\n 'r.trim.dr.eur',\n 'r.v.a',\n 'r.v.k',\n 'r.verkb',\n 'r.w.d',\n 'r.w',\n 'r',\n 'rap.ann.c.a',\n 'rap.ann.c.c',\n 'rap.ann.c.e',\n 'rap.ann.c.s.j',\n 'rap.ann.ca',\n 'rap.ann.cass',\n 'rap.ann.cc',\n 'rap.ann.ce',\n 'rap.