UNPKG

unicode-to-plain-text

Version:

Convert fancy Unicode text to plain ASCII with smart language preservation

94 lines (93 loc) 3.6 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.mapCharacters = void 0; const UNICODE_RANGES_1 = require("./maps/UNICODE_RANGES"); const MATH_SYMBOLS_MAP_1 = require("./maps/MATH_SYMBOLS_MAP"); const MIRRORED_MAP_1 = require("./maps/MIRRORED_MAP"); const CANADIAN_ABORIGINAL_MAP_1 = require("./maps/CANADIAN_ABORIGINAL_MAP"); const SMALL_CAPS_MAP_1 = require("./maps/SMALL_CAPS_MAP"); const STROKED_MAP_1 = require("./maps/STROKED_MAP"); const SUPERSCRIPT_MAP_1 = require("./maps/SUPERSCRIPT_MAP"); const SUBSCRIPT_MAP_1 = require("./maps/SUBSCRIPT_MAP"); const CURRENCY_MAP_1 = require("./maps/CURRENCY_MAP"); const CIRCLED_NUMBERS_MAP_1 = require("./maps/CIRCLED_NUMBERS_MAP"); const BLACK_CIRCLES_MAP_1 = require("./maps/BLACK_CIRCLES_MAP"); const DARK_SQUARES_MAP_1 = require("./maps/DARK_SQUARES_MAP"); const REGIONAL_INDICATORS_MAP_1 = require("./maps/REGIONAL_INDICATORS_MAP"); const PARENTHESIZED_MAP_1 = require("./maps/PARENTHESIZED_MAP"); const MISCELLANEOUS_MAP_1 = require("./maps/MISCELLANEOUS_MAP"); const mapCharacters = (text) => { const preserve = getPreserveSet(text); const map = getCharMap(); return Array.from(text) .map((char) => (preserve.has(char) ? char : (map.get(char) ?? char))) .join(''); }; exports.mapCharacters = mapCharacters; const getPreserveSet = (text) => { const set = new Set(); if (hasRealGreek(text)) { addRange(set, 0x0370, 0x03ff); addRange(set, 0x1f00, 0x1fff); } if (hasRealEthiopic(text)) { addRange(set, 0x1200, 0x137f); } return set; }; const hasRealGreek = (text) => [...text].some((char) => { const code = char.charCodeAt(0); return (code >= 0x0370 && code <= 0x03ff) || (code >= 0x1f00 && code <= 0x1fff); }); const hasRealEthiopic = (text) => { let count = 0, hasSpace = false, inEthiopicWord = false; for (const char of text) { const code = char.charCodeAt(0); if (code >= 0x1200 && code <= 0x137f) { if (++count > 5) return true; if (inEthiopicWord && char === ' ') hasSpace = true; inEthiopicWord = true; } else inEthiopicWord = false; } return hasSpace && count > 1; }; const addRange = (set, start, end) => { for (let i = start; i <= end; i++) set.add(String.fromCodePoint(i)); }; let CHAR_MAP = null; const buildCharMap = () => { const map = new Map(); for (const [start, base, length] of UNICODE_RANGES_1.UNICODE_RANGES) { const baseCode = base.charCodeAt(0); for (let i = 0; i < length; i++) { map.set(String.fromCodePoint(start + i), String.fromCharCode(baseCode + i)); } } const maps = [ MATH_SYMBOLS_MAP_1.MATH_SYMBOLS_MAP, MIRRORED_MAP_1.MIRRORED_MAP, CANADIAN_ABORIGINAL_MAP_1.CANADIAN_ABORIGINAL_MAP, SMALL_CAPS_MAP_1.SMALL_CAPS_MAP, STROKED_MAP_1.STROKED_MAP, SUPERSCRIPT_MAP_1.SUPERSCRIPT_MAP, SUBSCRIPT_MAP_1.SUBSCRIPT_MAP, CURRENCY_MAP_1.CURRENCY_MAP, CIRCLED_NUMBERS_MAP_1.CIRCLED_NUMBERS_MAP, BLACK_CIRCLES_MAP_1.BLACK_CIRCLES_MAP, DARK_SQUARES_MAP_1.DARK_SQUARES_MAP, REGIONAL_INDICATORS_MAP_1.REGIONAL_INDICATORS_MAP, PARENTHESIZED_MAP_1.PARENTHESIZED_MAP, MISCELLANEOUS_MAP_1.MISCELLANEOUS_MAP ]; for (const m of maps) { for (const [f, p] of Object.entries(m)) map.set(f, p); } return map; }; const getCharMap = () => (CHAR_MAP ?? (CHAR_MAP = buildCharMap()));