UNPKG

shelving

Version:

Toolkit for using data in JavaScript.

170 lines (169 loc) 8.48 kB
import { RequiredError } from "../error/RequiredError.js"; import { ValueError } from "../error/ValueError.js"; import { requireArray } from "./array.js"; import { isBetween } from "./number.js"; /** Is a value a string (optionally with specified min/max length). */ export function isString(value, min = 0, max = Number.POSITIVE_INFINITY) { return typeof value === "string" && value.length >= min && value.length <= max; } /** Assert that a value is a string (optionally with specified min/max length). */ export function assertString(value, min, max, caller = assertString) { if (!isString(value, min, max)) throw new RequiredError(`Must be string${min !== undefined || max !== undefined ? ` with ${min ?? 0} to ${max ?? "∞"} characters` : ""}`, { received: value, caller, }); } /** Convert an unknown value to a string, or return `undefined` if conversion fails. */ export function getString(value) { if (typeof value === "string") return value; if (typeof value === "number") return value.toString(); if (value instanceof Date) return value.toISOString(); return undefined; } /** Convert a possible string to a string (optionally with specified min/max length), or throw `RequiredError` if conversion fails. */ export function requireString(value, min, max, caller = requireString) { const str = getString(value); assertString(str, min, max, caller); return str; } /** Does a string have a length between `min` and `max` */ export function isStringBetween(str, min = 0, max = Number.POSITIVE_INFINITY) { return str.length >= min && str.length <= max; } /** Concatenate an iterable set of strings together. */ export function joinStrings(strs, joiner = "") { return requireArray(strs, undefined, undefined, joinStrings).join(joiner); } /** * Sanitize a single line of text. * - Used when you're sanitising a single-line input, e.g. a title for something. * - Remove allow control characters * - Normalise runs of whitespace to one ` ` space, * - Trim whitespace from the start and end of the string. * * @example santizeString("\x00Nice! "); // Returns `"Nice!"` */ export function sanitizeText(str) { return str .replace(/[^\P{C}\s]/gu, "") // Strip control characters (except whitespace). .replace(/\s+/gu, " ") // Normalise runs of whitespace to one ` ` space. .trim(); // Trim whitespace from the start and end of the string. } /** * Sanitize multiple lines of text. * - Used when you're sanitising a multi-line input, e.g. a description for something. * - Remove all control characters except `\n` newline. * - Normalise weird characters like paragraph separator, line separator, `\t` tab, `\r` carriage return. * - Normalise runs of whitespace to one ` ` space, * - Normalise indentation to tabs (four or more spaces are a tab, three or fewer spaces are removed). * - Allow spaces at the start of each line (for indentation) but trim the end of each line. * - Trim excess newlines at the start and end of the string and runs of more than two newlines in a row. */ export function sanitizeMultilineText(str) { return str .replace(/[^\P{C}\s]/gu, "") // Strip control characters (except whitespace). .replace(/\r\n?|\v|\x85|\u2028/g, "\n") // Normalise line separators to `\n` newline .replace(/\f|\u2029/g, "\n\n") // Normalise paragraph separators to `\n\n` double newline. .replace(/[^\S\n]+(?=\n|$)/g, "") // Trim trailing whitespace on each line. .replace(/^\n+|\n+$/g, "") // Trim leading and trailing newlines. .replace(/\n{3,}/g, "\n\n") // Normalise three or more `\n\n\n` newline to `\n\n` double newline. .replace(/(\S)[^\S\n]+/g, "$1 ") // Normalise runs of non-leading whitespace to ` ` single space. .replace(/[^\S\t\n]{4}/g, "\t") // Normalise leading ` ` four whitespace characters to a single `\t` tab. .replace(/(^|\t|\n)[^\S\t\n]+/g, "$1"); // Remove leading whitespace that isn't a tab. } /** * Simplify a string by removing anything that isn't a number, letter, or space. * - Normalizes the string by * - Useful when you're running a query against a string entered by a user. * * @example simplifyString("Däve-is\nREALLY éxcitable—apparęntly!!! 😂"); // Returns "dave is really excitable apparently" * * @todo Convert confusables (e.g. `ℵ` alef symbol or `℮` estimate symbol) to their letterlike equivalent (e.g. `N` and `e`). */ export function simplifyString(str) { return str .normalize("NFKD") // Normalize ligatures (e.g. `ff` to `ff`), combined characters (e.g. `Ⓜ` to `m`), accents (e.g. `å` to `a`). .replace(/[^\p{L}\p{N}\p{Z}\p{Pc}\p{Pd}]+/gu, "") // Strip characters that aren't `\p{L}` letters, `\p{N}` numbers, `\p{Z}` separators (e.g. ` ` space), `\p{Pc}` connector punctuation (e.g. `_` underscore_, `\p{Pd}` dash punctuation (e.g. `-` hyphen) .replace(/[\p{Z}\p{Pc}\p{Pd}]+/gu, " ") // Normalise runs of `\p{Z}` separators (e.g. ` ` space), `\p{Pc}` connector punctuation (e.g. `_` underscore_, `\p{Pd}` dash punctuation (e.g. `-` hyphen), to ` ` single space. .trim() .toLowerCase(); } /** Convert a string to a `kebab-case` URL slug, or return `undefined` if conversion resulted in an empty ref. */ export function getSlug(str) { return simplifyString(str).replaceAll(" ", "-") || undefined; } /** Convert a string to a `kebab-case` URL slug, or throw `RequiredError` if conversion resulted in an empty ref. */ export function requireSlug(str, caller = requireSlug) { const slug = getSlug(str); if (!slug) throw new RequiredError("Invalid slug", { received: str, caller }); return slug; } /** Convert a string to a unique ref e.g. `abc123`, or return `undefined` if conversion resulted in an empty string. */ export function getRef(str) { return simplifyString(str).replaceAll(" ", "") || undefined; } /** Convert a string to a unique ref e.g. `abc123`, or throw `RequiredError` if conversion resulted in an empty string. */ export function requireRef(str, caller = requireRef) { const ref = getRef(str); if (!ref) throw new RequiredError("Invalid string ref", { received: str, caller }); return ref; } /** * Return an array of the separate words and "quoted phrases" found in a string. * - Phrases enclosed "in quotes" are a single word. * - Performs no processing on the words, so control chars, punctuation, symbols, and case are all preserved. * * Note: this splits words based on spaces, so won't work well with logographic writing systems e.g. kanji. */ export function getWords(str) { return Array.from(_getWords(str)); } function* _getWords(str) { for (const [, a, b, c] of str.matchAll(WORD)) { const word = a || b || c; if (word) yield word; } } const WORD = /([^\s"]+)|"([^"]*)"|'([^']*)'/g; // Runs of characters without spaces, or "quoted phrases" /** Get the (trimmed) first full line of a string. */ export function getFirstLine(str) { const i = str.indexOf("\n"); return (i >= 0 ? str.substr(0, i) : str).trim(); } /** Is the first character of a string an uppercase letter? */ export function isUppercaseLetter(str) { return isBetween(str.charCodeAt(0), 65, 90); } /** Is the first character of a string a lowercase letter? */ export function isLowercaseLetter(str) { return isBetween(str.charCodeAt(0), 97, 122); } /** * Limit a string to a given length. * - Stops at the last space inside `maxLength` * - Appends an `…` ellipses after the string (but only if a limit is applied). */ export function limitString(str, maxLength, append = "…") { if (str.length < maxLength) return str; const lastSpace = str.lastIndexOf(" ", maxLength); return `${str.slice(0, lastSpace > 0 ? lastSpace : maxLength).trimEnd()}${append}`; } export function splitString(str, separator, min = 1, max = Number.POSITIVE_INFINITY, caller = splitString) { const segments = str.split(separator); if (segments.length > max) segments.splice(max - 1, segments.length, segments.slice(max - 1).join(separator)); if (segments.length < min || !segments.every(Boolean)) throw new ValueError(`Must be string with ${min ?? 0} to ${max ?? "∞"} non-empty segments separated by "${separator}"`, { received: str, caller, }); return segments; }