UNPKG

@beenotung/tslib

Version:
233 lines (232 loc) 6.61 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.str_contains = str_contains; exports.str_contains_any = str_contains_any; exports.strToCapWords = strToCapWords; exports.string_to_chars = string_to_chars; exports.escapeRegExp = escapeRegExp; exports.string_nbyte = string_nbyte; exports.str_like = str_like; exports.is_non_empty_string = is_non_empty_string; exports.str_dos2unix = str_dos2unix; exports.str_unix2dos = str_unix2dos; exports.str_minus = str_minus; exports.split_string_num = split_string_num; exports.compare_string = compare_string; exports.extract_lines = extract_lines; exports.capitalize = capitalize; exports.lcfirst = lcfirst; exports.ucfirst = ucfirst; exports.first_char = first_char; exports.last_char = last_char; exports.is_ascii_char = is_ascii_char; exports.concat_words = concat_words; exports.normalizeUnicode = normalizeUnicode; exports.removeAccents = removeAccents; exports.normalizeForSearch = normalizeForSearch; const compare_1 = require("./compare"); const set_1 = require("./set"); function str_contains(pattern, target, ignore_case = false) { if (ignore_case) { return str_contains(pattern.toLowerCase(), target.toLowerCase()); } return target.indexOf(pattern) !== -1; } function str_contains_any(patterns, target, ignore_case = false) { return patterns.some(p => str_contains(p, target, ignore_case)); } /** * example : 'change the words' ~> 'Change The Words' * */ function strToCapWords(s) { let res = ''; let lastSpace = true; for (const c of s) { if (c === ' ') { lastSpace = true; res += ' '; } else { if (lastSpace) { res += c.toUpperCase(); lastSpace = false; } else { res += c; } } } return res; } function string_to_chars(s) { return s.split(''); } /* source: https://stackoverflow.com/questions/1144783/how-to-replace-all-occurrences-of-a-string-in-javascript */ function escapeRegExp(str) { return str.replace(/([.*+?^=!:${}()|\[\]\/\\])/g, '\\$1'); } function string_nbyte(s) { return encodeURI(s).split(/%..|./).length - 1; } function str_like(a, b, ignore_case = true) { if (ignore_case) { return str_like(a.toUpperCase(), b.toUpperCase(), false); } else { return a.includes(b) || b.includes(a); } } function is_non_empty_string(s) { return typeof s === 'string' && s !== ''; } function str_dos2unix(s) { return s.replace(/\r\n/g, '\n'); } function str_unix2dos(s) { return s.replace(/\n/g, '\r\n'); } function str_minus(a, b) { return Array.from((0, set_1.setMinus)(new Set(a), new Set(b))).join(''); } function toNum(s, i) { const code = s.charCodeAt(i); if (48 <= code && code <= 48 + 10) { return code - 48; } else { return false; } } function parseString(s, i, res) { let acc = ''; for (; i < s.length; i++) { const num = toNum(s, i); if (num === false) { acc += s[i]; } else { if (acc.length > 0) { res.push(acc); } parseNumber(s, i + 1, num, res); return; } } if (acc.length > 0) { res.push(acc); } } function parseNumber(s, i, acc, res) { for (; i < s.length; i++) { const num = toNum(s, i); if (num === false) { res.push(acc); parseString(s, i, res); return; } acc = acc * 10 + num; } res.push(acc); } function split_string_num(s) { const acc = []; parseString(s, 0, acc); return acc; } function compare_string(a, b) { const as = split_string_num(a); const bs = split_string_num(b); const n = Math.min(as.length, bs.length); for (let i = 0; i < n; i++) { const res = (0, compare_1.compare)(as[i], bs[i]); if (res !== 0) { return res; } } return (0, compare_1.compare)(as.length, bs.length); } function extract_lines(s) { return s .split('\n') .map(s => s.trim()) .filter(s => s); } function capitalize(word) { return word[0].toLocaleUpperCase() + word.substring(1).toLocaleLowerCase(); } /** @description using perl naming conversion */ function lcfirst(word) { return word[0].toLocaleLowerCase() + word.substring(1); } /** @description using perl naming conversion */ function ucfirst(word) { return word[0].toLocaleUpperCase() + word.substring(1); } /** @description aware of unicode, e.g. emoji */ function first_char(text) { for (const char of text) { return char; } return null; } /** @description aware of unicode, e.g. emoji */ function last_char(text) { let last = null; for (const char of text) { last = char; } return last; } function is_ascii_char(char) { return char != null && char.length == 1 && char.charCodeAt(0) < 256; } function concat_words(start, end) { return is_ascii_char(first_char(end)) || is_ascii_char(last_char(start)) ? start.trimEnd() + ' ' + end.trimStart() : start + end; } /** * @description Normalize Unicode representation to NFC (Canonical Composition). * Preserves case for case-sensitive comparisons. * * The same visible character can have multiple Unicode representations: * - 'Amélie' with é as single code point (\u00e9) * - 'Amélie' with é + combining accent (\u0065\u0301) * * NFC normalization condenses these variants into a single canonical form, * useful for storing normalized text and matching in later searches. * * Source: https://stackoverflow.com/a/63013732/3156509 * * @example * 'Amélie' (composed) vs 'Amélie' (decomposed) -> same NFC output * 'CAFÉ' -> 'CAFÉ' (case preserved) */ function normalizeUnicode(text) { return text.normalize('NFC'); } /** * @description Remove accent marks (diacritics) from text. * Uses NFD normalization to decompose characters, then removes combining marks. * * @example * 'Café' -> 'Cafe' * 'résumé' -> 'resume' * 'Amélie' -> 'Amelie' * 'München' -> 'Munchen' * 'naïve' -> 'naive' */ function removeAccents(text) { return text.normalize('NFD').replace(/[\u0300-\u036f]/g, ''); } /** * @description Normalize text for search: NFC normalize, lowercase, remove accents, trim. * * @example * 'Café' -> 'cafe' * 'résumé' -> 'resume' * 'Amélie' -> 'amelie' */ function normalizeForSearch(text) { return removeAccents(normalizeUnicode(text).toLowerCase()).trim(); }