shelving
Version:
Toolkit for using data in JavaScript.
170 lines (169 loc) • 8.48 kB
JavaScript
import { RequiredError } from "../error/RequiredError.js";
import { ValueError } from "../error/ValueError.js";
import { requireArray } from "./array.js";
import { isBetween } from "./number.js";
/** Is a value a string (optionally with specified min/max length). */
export function isString(value, min = 0, max = Number.POSITIVE_INFINITY) {
return typeof value === "string" && value.length >= min && value.length <= max;
}
/** Assert that a value is a string (optionally with specified min/max length). */
export function assertString(value, min, max, caller = assertString) {
if (!isString(value, min, max))
throw new RequiredError(`Must be string${min !== undefined || max !== undefined ? ` with ${min ?? 0} to ${max ?? "∞"} characters` : ""}`, {
received: value,
caller,
});
}
/** Convert an unknown value to a string, or return `undefined` if conversion fails. */
export function getString(value) {
if (typeof value === "string")
return value;
if (typeof value === "number")
return value.toString();
if (value instanceof Date)
return value.toISOString();
return undefined;
}
/** Convert a possible string to a string (optionally with specified min/max length), or throw `RequiredError` if conversion fails. */
export function requireString(value, min, max, caller = requireString) {
const str = getString(value);
assertString(str, min, max, caller);
return str;
}
/** Does a string have a length between `min` and `max` */
export function isStringBetween(str, min = 0, max = Number.POSITIVE_INFINITY) {
return str.length >= min && str.length <= max;
}
/** Concatenate an iterable set of strings together. */
export function joinStrings(strs, joiner = "") {
return requireArray(strs, undefined, undefined, joinStrings).join(joiner);
}
/**
* Sanitize a single line of text.
* - Used when you're sanitising a single-line input, e.g. a title for something.
* - Remove allow control characters
* - Normalise runs of whitespace to one ` ` space,
* - Trim whitespace from the start and end of the string.
*
* @example santizeString("\x00Nice! "); // Returns `"Nice!"`
*/
export function sanitizeText(str) {
return str
.replace(/[^\P{C}\s]/gu, "") // Strip control characters (except whitespace).
.replace(/\s+/gu, " ") // Normalise runs of whitespace to one ` ` space.
.trim(); // Trim whitespace from the start and end of the string.
}
/**
* Sanitize multiple lines of text.
* - Used when you're sanitising a multi-line input, e.g. a description for something.
* - Remove all control characters except `\n` newline.
* - Normalise weird characters like paragraph separator, line separator, `\t` tab, `\r` carriage return.
* - Normalise runs of whitespace to one ` ` space,
* - Normalise indentation to tabs (four or more spaces are a tab, three or fewer spaces are removed).
* - Allow spaces at the start of each line (for indentation) but trim the end of each line.
* - Trim excess newlines at the start and end of the string and runs of more than two newlines in a row.
*/
export function sanitizeMultilineText(str) {
return str
.replace(/[^\P{C}\s]/gu, "") // Strip control characters (except whitespace).
.replace(/\r\n?|\v|\x85|\u2028/g, "\n") // Normalise line separators to `\n` newline
.replace(/\f|\u2029/g, "\n\n") // Normalise paragraph separators to `\n\n` double newline.
.replace(/[^\S\n]+(?=\n|$)/g, "") // Trim trailing whitespace on each line.
.replace(/^\n+|\n+$/g, "") // Trim leading and trailing newlines.
.replace(/\n{3,}/g, "\n\n") // Normalise three or more `\n\n\n` newline to `\n\n` double newline.
.replace(/(\S)[^\S\n]+/g, "$1 ") // Normalise runs of non-leading whitespace to ` ` single space.
.replace(/[^\S\t\n]{4}/g, "\t") // Normalise leading ` ` four whitespace characters to a single `\t` tab.
.replace(/(^|\t|\n)[^\S\t\n]+/g, "$1"); // Remove leading whitespace that isn't a tab.
}
/**
* Simplify a string by removing anything that isn't a number, letter, or space.
* - Normalizes the string by
* - Useful when you're running a query against a string entered by a user.
*
* @example simplifyString("Däve-is\nREALLY éxcitable—apparęntly!!! 😂"); // Returns "dave is really excitable apparently"
*
* @todo Convert confusables (e.g. `ℵ` alef symbol or `℮` estimate symbol) to their letterlike equivalent (e.g. `N` and `e`).
*/
export function simplifyString(str) {
return str
.normalize("NFKD") // Normalize ligatures (e.g. `ff` to `ff`), combined characters (e.g. `Ⓜ` to `m`), accents (e.g. `å` to `a`).
.replace(/[^\p{L}\p{N}\p{Z}\p{Pc}\p{Pd}]+/gu, "") // Strip characters that aren't `\p{L}` letters, `\p{N}` numbers, `\p{Z}` separators (e.g. ` ` space), `\p{Pc}` connector punctuation (e.g. `_` underscore_, `\p{Pd}` dash punctuation (e.g. `-` hyphen)
.replace(/[\p{Z}\p{Pc}\p{Pd}]+/gu, " ") // Normalise runs of `\p{Z}` separators (e.g. ` ` space), `\p{Pc}` connector punctuation (e.g. `_` underscore_, `\p{Pd}` dash punctuation (e.g. `-` hyphen), to ` ` single space.
.trim()
.toLowerCase();
}
/** Convert a string to a `kebab-case` URL slug, or return `undefined` if conversion resulted in an empty ref. */
export function getSlug(str) {
return simplifyString(str).replaceAll(" ", "-") || undefined;
}
/** Convert a string to a `kebab-case` URL slug, or throw `RequiredError` if conversion resulted in an empty ref. */
export function requireSlug(str, caller = requireSlug) {
const slug = getSlug(str);
if (!slug)
throw new RequiredError("Invalid slug", { received: str, caller });
return slug;
}
/** Convert a string to a unique ref e.g. `abc123`, or return `undefined` if conversion resulted in an empty string. */
export function getRef(str) {
return simplifyString(str).replaceAll(" ", "") || undefined;
}
/** Convert a string to a unique ref e.g. `abc123`, or throw `RequiredError` if conversion resulted in an empty string. */
export function requireRef(str, caller = requireRef) {
const ref = getRef(str);
if (!ref)
throw new RequiredError("Invalid string ref", { received: str, caller });
return ref;
}
/**
* Return an array of the separate words and "quoted phrases" found in a string.
* - Phrases enclosed "in quotes" are a single word.
* - Performs no processing on the words, so control chars, punctuation, symbols, and case are all preserved.
*
* Note: this splits words based on spaces, so won't work well with logographic writing systems e.g. kanji.
*/
export function getWords(str) {
return Array.from(_getWords(str));
}
function* _getWords(str) {
for (const [, a, b, c] of str.matchAll(WORD)) {
const word = a || b || c;
if (word)
yield word;
}
}
const WORD = /([^\s"]+)|"([^"]*)"|'([^']*)'/g; // Runs of characters without spaces, or "quoted phrases"
/** Get the (trimmed) first full line of a string. */
export function getFirstLine(str) {
const i = str.indexOf("\n");
return (i >= 0 ? str.substr(0, i) : str).trim();
}
/** Is the first character of a string an uppercase letter? */
export function isUppercaseLetter(str) {
return isBetween(str.charCodeAt(0), 65, 90);
}
/** Is the first character of a string a lowercase letter? */
export function isLowercaseLetter(str) {
return isBetween(str.charCodeAt(0), 97, 122);
}
/**
* Limit a string to a given length.
* - Stops at the last space inside `maxLength`
* - Appends an `…` ellipses after the string (but only if a limit is applied).
*/
export function limitString(str, maxLength, append = "…") {
if (str.length < maxLength)
return str;
const lastSpace = str.lastIndexOf(" ", maxLength);
return `${str.slice(0, lastSpace > 0 ? lastSpace : maxLength).trimEnd()}${append}`;
}
export function splitString(str, separator, min = 1, max = Number.POSITIVE_INFINITY, caller = splitString) {
const segments = str.split(separator);
if (segments.length > max)
segments.splice(max - 1, segments.length, segments.slice(max - 1).join(separator));
if (segments.length < min || !segments.every(Boolean))
throw new ValueError(`Must be string with ${min ?? 0} to ${max ?? "∞"} non-empty segments separated by "${separator}"`, {
received: str,
caller,
});
return segments;
}