UNPKG

stdnum

Version:
306 lines (281 loc) 11.7 kB
// import PREFIXES from 'gb/nino-prefixes'; import * as exceptions from '../../exceptions'; // Map visually similar unicode values to ASCII // - solves the cut-n-paste from PDF/Word const mapped: Record<string, string> = { '-': '-', // HYPHEN-MINUS '\u{00AD}': '-', // SOFT HYPHEN '\u{00AF}': '-', // MACRON '\u{02D7}': '-', // MODIFIER LETTER MINUS SIGN '\u{058A}': '-', // ARMENIAN HYPHEN '\u{05BE}': '-', // HEBREW PUNCTUATION MAQAF '\u{180A}': '-', // MONGOLIAN NIRUGU '\u{2010}': '-', // HYPHEN '\u{2011}': '-', // NON-BREAKING HYPHEN '\u{2012}': '-', // FIGURE DASH '\u{2013}': '-', // EN DASH '\u{2014}': '-', // EM DASH '\u{2015}': '-', // HORIZONTAL BAR '\u{203E}': '-', // OVERLINE '\u{2043}': '-', // HYPHEN BULLET '\u{207B}': '-', // SUPERSCRIPT MINUS '\u{208B}': '-', // SUBSCRIPT MINUS '\u{2212}': '-', // MINUS SIGN '\u{23AF}': '-', // HORIZONTAL LINE EXTENSION '\u{23BA}': '-', // HORIZONTAL SCAN LINE-1 '\u{23BB}': '-', // HORIZONTAL SCAN LINE-3 '\u{23BC}': '-', // HORIZONTAL SCAN LINE-7 '\u{23BD}': '-', // HORIZONTAL SCAN LINE-9 '\u{23E4}': '-', // STRAIGHTNESS '\u{FF0D}': '-', // FULLWIDTH HYPHEN-MINUS '\u{FE63}': '-', // SMALL HYPHEN-MINUS '\u{FFE3}': '-', // FULLWIDTH MACRON '*': '*', '\u{066D}': '*', // ARABIC FIVE POINTED STAR '\u{070D}': '*', // SYRIAC HARKLEAN ASTERISCUS '\u{2055}': '*', // FLOWER PUNCTUATION MARK '\u{A60E}': '*', // VAI FULL STOP '\u{2217}': '*', // ASTERISK OPERATOR '\u{22C6}': '*', // STAR OPERATOR '\u{204E}': '*', // LOW ASTERISK '\u{2731}': '*', // HEAVY ASTERISK '\u{2732}': '*', // OPEN CENTRE ASTERISK '\u{2733}': '*', // EIGHT SPOKED ASTERISK '\u{273A}': '*', // SIXTEEN POINTED ASTERISK '\u{273B}': '*', // TEARDROP-SPOKED ASTERISK '\u{273C}': '*', // OPEN CENTRE TEARDROP-SPOKED ASTERISK '\u{273D}': '*', // HEAVY TEARDROP-SPOKED ASTERISK '\u{2743}': '*', // HEAVY TEARDROP-PINWHEEL ASTERISK '\u{2749}': '*', // BALLON-SPOKED ASTERISK '\u{274A}': '*', // EIGHT TEARDROP-SPOKED PROPELLER ASTERISK '\u{274B}': '*', // HEAVY EIGHT TEARDROP-SPOKED PROPELLER ASTERISK '\u{FE61}': '*', // SMALL ASTERISK '\u{FF0A}': '*', // FULLWIDTH ASTERISK ',': ',', '\u{00B8}': ',', // CEDILLA '\u{060C}': ',', // ARABIC COMMA '\u{066B}': ',', // ARABIC DECIMAL SEPARATOR '\u{066C}': ',', // ARABIC THOUSANDS SEPARATOR '\u{201A}': ',', // SINGLE LOW-9 QUOTATION MARK '\u{2032}': ',', // PRIME '\u{2E34}': ',', // RAISED COMMA '\u{3001}': ',', // IDEOGRAPHIC COMMA '\u{FF0C}': ',', // FULLWIDTH COMMA '\u{FE11}': ',', // PRESENTATION FORM FOR VERTICAL COMMA '\u{FE50}': ',', // SMALL COMMA '\u{FE51}': ',', // SMALL IDEOGRAPHIC COMMA '\u{FF64}': ',', // HALFWIDTH IDEOGRAPHIC COMMA '.': '.', // FULL STOP '\u{00B7}': '.', // MIDDLE DOT '\u{02D9}': '.', // DOT ABOVE '\u{0387}': '.', // GREEK ANO TELEIA '\u{06D4}': '.', // ARABIC FULL STOP '\u{0701}': '.', // SYRIAC SUPRALINEAR FULL STOP '\u{0702}': '.', // SYRIAC SUBLINEAR FULL STOP '\u{0830}': '.', // SAMARITAN PUNCTUATION NEQUDAA '\u{0F0B}': '.', // TIBETAN MARK INTERSYLLABIC TSHEG '\u{0F0C}': '.', // TIBETAN MARK DELIMITER TSHEG BSTAR // prettier-ignore "\u{1427}": ".", // CANADIAN SYLLABICS FINAL MIDDLE DOT '\u{16EB}': '.', // RUNIC SINGLE PUNCTUATION '\u{2219}': '.', // BULLET OPERATOR '\u{2022}': '.', // BULLET '\u{2024}': '.', // ONE DOT LEADER '\u{2027}': '.', // HYPHENATION POINT '\u{22C5}': '.', // DOT OPERATOR '\u{2E31}': '.', // WORD SEPARATOR MIDDLE DOT '\u{2E33}': '.', // RAISED DOT '\u{3002}': '.', // IDEOGRAPHIC FULL STOP '\u{30FB}': '.', // KATAKANA MIDDLE DOT '\u{FE52}': '.', // SMALL FULL STOP '\u{FF0E}': '.', // FULLWIDTH FULL STOP '\u{FF65}': '.', // HALFWIDTH KATAKANA MIDDLE DOT '\u{FBB2}': '.', // ARABIC SYMBOL DOT ABOVE '\u{FBB3}': '.', // ARABIC SYMBOL DOT BELOW '\u{10101}': '.', // AEGEAN WORD SEPARATOR DOT '\u{1091F}': '.', // PHOENICIAN WORD SEPARATOR '\u{10A50}': '.', // KHAROSHTHI PUNCTUATION DOT '/': '/', '\u{2044}': '/', // FRACTION SLASH '\u{2215}': '/', // DIVISION SLASH '\u{29F8}': '/', // BIG SOLIDUS '\u{FF0F}': '/', // FULLWIDTH SOLIDUS '\u{083C}': '/', // SAMARITAN PUNCTUATION ARKAANU '\u{27CB}': '/', // MATHEMATICAL RISING DIAGONAL ':': ':', '\u{1361}': ':', // ETHIOPIC WORDSPACE '\u{16EC}': ':', // RUNIC MULTIPLE PUNCTUATION '\u{1804}': ':', // MONGOLIAN COLON '\u{FE13}': ':', // PRESENTATION FORM FOR VERTICAL COLON '\u{FE30}': ':', // PRESENTATION FORM FOR VERTICAL TWO DOT LEADER '\u{FF1A}': ':', // FULLWIDTH COLON '\u{FE55}': ':', // SMALL COLON ' ': ' ', '\u{0009}': ' ', // TAB '\u{000B}': ' ', // VERTICAL TAB '\u{000C}': ' ', // FORM FEED '\u{00A0}': ' ', // NO-BREAK-SPACE '\u{1680}': ' ', // Ogham Space Mark '\u{2000}': ' ', // EN QUAD '\u{2001}': ' ', // EM QUAD '\u{2002}': ' ', // EN SPACE '\u{2003}': ' ', // EM SPACE '\u{2004}': ' ', // THREE-PER-EM SPACE '\u{2005}': ' ', // FOUR-PER-EM SPACE '\u{2006}': ' ', // SIX-PER-EM SPACE '\u{2007}': ' ', // FIGURE SPACE '\u{2008}': ' ', // PUNCTUATION SPACE '\u{2009}': ' ', // THIN SPACE '\u{200A}': ' ', // HAIR SPACE '\u{2028}': ' ', // LINE SEPARATOR '\u{2029}': ' ', // PARAGRAPH SEPARATOR '\u{202F}': ' ', // NARROW NO-BREAK SPACE '\u{205F}': ' ', // MEDIUM MATHEMATICAL SPACE '\u{3000}': ' ', // IDEOGRAPHIC SPACE "'": "'", '\u{0060}': "'", // GRAVE ACCENT '\u{00B4}': "'", // ACUTE ACCENT // prettier-ignore "\u{02BE}": "'", // MODIFIER LETTER RIGHT HALF RING // prettier-ignore "\u{02BF}": "'", // MODIFIER LETTER LEFT HALF RING // prettier-ignore "\u{02B9}": "'", // MODIFIER LETTER PRIME // prettier-ignore "\u{02BB}": "'", // MODIFIER LETTER TURNED COMMA // prettier-ignore "\u{02BC}": "'", // MODIFIER LETTER APOSTROPHE // prettier-ignore "\u{02C8}": "'", // MODIFIER LETTER VERTICAL LINE // prettier-ignore "\u{0300}":"'", // COMBINING GRAVE ACCENT '\u{0301}': "'", // COMBINING ACUTE ACCENT '\u{0312}': "'", // COMBINING TURNED COMMA ABOVE '\u{0313}': "'", // COMBINING COMMA ABOVE '\u{055A}': "'", // ARMENIAN APOSTROPHE '\u{201B}': "'", // SINGLE HIGH-REVERSED-9 QUOTATION MARK '\u{2018}': "'", // LEFT SINGLE QUOTATION MARK '\u{2019}': "'", // RIGHT SINGLE QUOTATION MARK '0': '0', '\u{0660}': '0', // ARABIC-INDIC DIGIT ZERO '\u{06F0}': '0', // EASTERN-ARABIC DIGIT ZERO '\u{FF10}': '0', // FULLWIDTH DIGIT ZERO '\u{1D7CE}': '0', // MATHEMATICAL BOLD DIGIT ZERO '\u{1D7D8}': '0', // MATHEMATICAL DOUBLE-STRUCK DIGIT ZERO '\u{1D7E2}': '0', // MATHEMATICAL SANS-SERIF DIGIT ZERO '\u{1D7EC}': '0', // MATHEMATICAL SANS-SERIF BOLD DIGIT ZERO '\u{1D7F6}': '0', // MATHEMATICAL MONOSPACE DIGIT ZERO '1': '1', '\u{0661}': '1', // ARABIC-INDIC DIGIT ONE '\u{06F1}': '1', // EASTERN-ARABIC DIGIT ONE '\u{FF11}': '1', // FULLWIDTH DIGIT ONE '\u{1D7CF}': '1', // MATHEMATICAL BOLD DIGIT ONE '\u{1D7D9}': '1', // MATHEMATICAL DOUBLE-STRUCK DIGIT ONE '\u{1D7E3}': '1', // MATHEMATICAL SANS-SERIF DIGIT ONE '\u{1D7ED}': '1', // MATHEMATICAL SANS-SERIF BOLD DIGIT ONE '\u{1D7F7}': '1', // MATHEMATICAL MONOSPACE DIGIT ONE '2': '2', '\u{06F2}': '2', // EASTERN-ARABIC DIGIT TWO '\u{0662}': '2', // ARABIC-INDIC DIGIT TWO '\u{FF12}': '2', // FULLWIDTH DIGIT TWO '\u{1D7D0}': '2', // MATHEMATICAL BOLD DIGIT TWO '\u{1D7DA}': '2', // MATHEMATICAL DOUBLE-STRUCK DIGIT TWO '\u{1D7E4}': '2', // MATHEMATICAL SANS-SERIF DIGIT TWO '\u{1D7EE}': '2', // MATHEMATICAL SANS-SERIF BOLD DIGIT TWO '\u{1D7F8}': '2', // MATHEMATICAL MONOSPACE DIGIT TWO '3': '3', '\u{06F3}': '3', // EASTERN-ARABIC DIGIT THREE '\u{0663}': '3', // ARABIC-INDIC DIGIT THREE '\u{FF13}': '3', // FULLWIDTH DIGIT THREE '\u{1D7D1}': '3', // MATHEMATICAL BOLD DIGIT THREE '\u{1D7DB}': '3', // MATHEMATICAL DOUBLE-STRUCK DIGIT THREE '\u{1D7E5}': '3', // MATHEMATICAL SANS-SERIF DIGIT THREE '\u{1D7EF}': '3', // MATHEMATICAL SANS-SERIF BOLD DIGIT THREE '\u{1D7F9}': '3', // MATHEMATICAL MONOSPACE DIGIT THREE '4': '4', '\u{06F4}': '4', // EASTERN-ARABIC DIGIT FOUR '\u{0664}': '4', // ARABIC-INDIC DIGIT FOUR '\u{FF14}': '4', // FULLWIDTH DIGIT FOUR '\u{1D7D2}': '4', // MATHEMATICAL BOLD DIGIT FOUR '\u{1D7DC}': '4', // MATHEMATICAL DOUBLE-STRUCK DIGIT FOUR '\u{1D7E6}': '4', // MATHEMATICAL SANS-SERIF DIGIT FOUR '\u{1D7F0}': '4', // MATHEMATICAL SANS-SERIF BOLD DIGIT FOUR '\u{1D7FA}': '4', // MATHEMATICAL MONOSPACE DIGIT FOUR '5': '5', '\u{06F5}': '5', // EASTERN-ARABIC DIGIT FIVE '\u{0665}': '5', // ARABIC-INDIC DIGIT FIVE '\u{FF15}': '5', // FULLWIDTH DIGIT FIVE '\u{1D7D3}': '5', // MATHEMATICAL BOLD DIGIT FIVE '\u{1D7DD}': '5', // MATHEMATICAL DOUBLE-STRUCK DIGIT FIVE '\u{1D7E7}': '5', // MATHEMATICAL SANS-SERIF DIGIT FIVE '\u{1D7F1}': '5', // MATHEMATICAL SANS-SERIF BOLD DIGIT FIVE '\u{1D7FB}': '5', // MATHEMATICAL MONOSPACE DIGIT FIVE '6': '6', '\u{06F6}': '6', // EASTERN-ARABIC DIGIT SIX '\u{0666}': '6', // ARABIC-INDIC DIGIT SIX '\u{FF16}': '6', // FULLWIDTH DIGIT SIX '\u{1D7D4}': '6', // MATHEMATICAL BOLD DIGIT SIX '\u{1D7DE}': '6', // MATHEMATICAL DOUBLE-STRUCK DIGIT SIX '\u{1D7E8}': '6', // MATHEMATICAL SANS-SERIF DIGIT SIX '\u{1D7F2}': '6', // MATHEMATICAL SANS-SERIF BOLD DIGIT SIX '\u{1D7FC}': '6', // MATHEMATICAL MONOSPACE DIGIT SIX '7': '7', '\u{06F7}': '7', // EASTERN-ARABIC DIGIT SEVEN '\u{0667}': '7', // ARABIC-INDIC DIGIT SEVEN '\u{FF17}': '7', // FULLWIDTH DIGIT SEVEN '\u{1D7D5}': '7', // MATHEMATICAL BOLD DIGIT SEVEN '\u{1D7DF}': '7', // MATHEMATICAL DOUBLE-STRUCK DIGIT SEVEN '\u{1D7E9}': '7', // MATHEMATICAL SANS-SERIF DIGIT SEVEN '\u{1D7F3}': '7', // MATHEMATICAL SANS-SERIF BOLD DIGIT SEVEN '\u{1D7FD}': '7', // MATHEMATICAL MONOSPACE DIGIT SEVEN '8': '8', '\u{06F8}': '8', // EASTERN-ARABIC DIGIT EIGHT '\u{0668}': '8', // ARABIC-INDIC DIGIT EIGHT '\u{FF18}': '8', // FULLWIDTH DIGIT EIGHT '\u{1D7D6}': '8', // MATHEMATICAL BOLD DIGIT EIGHT '\u{1D7E0}': '8', // MATHEMATICAL DOUBLE-STRUCK DIGIT EIGHT '\u{1D7EA}': '8', // MATHEMATICAL SANS-SERIF DIGIT EIGHT '\u{1D7F4}': '8', // MATHEMATICAL SANS-SERIF BOLD DIGIT EIGHT '\u{1D7FE}': '8', // MATHEMATICAL MONOSPACE DIGIT EIGHT // 9 '9': '9', '\u{06F9}': '9', // EASTERN-ARABIC DIGIT NINE '\u{0669}': '9', // ARABIC-INDIC DIGIT NINE '\u{FF19}': '9', // FULLWIDTH DIGIT NINE '\u{1D7D7}': '9', // MATHEMATICAL BOLD DIGIT NINE '\u{1D7E1}': '9', // MATHEMATICAL DOUBLE-STRUCK DIGIT NINE '\u{1D7EB}': '9', // MATHEMATICAL SANS-SERIF DIGIT NINE '\u{1D7F5}': '9', // MATHEMATICAL SANS-SERIF BOLD DIGIT NINE '\u{1D7FF}': '9', // MATHEMATICAL MONOSPACE DIGIT NINE }; /** * Clean up visually similar unicode values, by default * trim whitespace */ export function cleanUnicode( value: string, deletechars = ' ', stripPrefix?: string | string[], ): [string, exceptions.InvalidFormat | null] { if (typeof value !== 'string') { return ['', new exceptions.InvalidFormat()]; } // Don't use value.split("") -- doesn't work for "high" unicode const cleaned = [...value] .map(c => mapped[c] ?? c) .filter(c => !deletechars.includes(c)) .join('') .toLocaleUpperCase(); if (stripPrefix && stripPrefix.length !== 0) { let prefix; if (Array.isArray(stripPrefix)) { prefix = stripPrefix.find(p => cleaned.startsWith(p)); } else if (cleaned.startsWith(stripPrefix)) { prefix = stripPrefix; } if (prefix !== undefined) { return [cleaned.substring(prefix.length), null]; } } return [cleaned, null]; }