@spacingbat3/lss
Version:
LSS: Literal String Sanitizer – sanitizes string based on specific inputs and tries to guess the accurate type (in TypeScript/Typed JS).
285 lines (263 loc) • 11.6 kB
text/typescript
/*------------------------------------------------------*
* Copyright (c) 2023 Dawid Papiewski "SpacingBat3". *
* *
* All rights reserved. Licensed under the ISC license. *
*------------------------------------------------------*/
/**
* Sets of chars that are correctly understood by current engine and can be
* transformed to valid {@linkcode charset}.
*
* @since v1.0.0
*/
export const parseableRange = Object.freeze(["a-z","A-Z","0-9","---"] as const);
/**
* Like `unknown`, except it includes some primitive type just so generics can
* pick any commonly-used primitive type.
*/
export type unknownLiteral = null|undefined|object|string|number|bigint;
/**
* A valid mode for trimming the string. `null` skips trimming entirely.
*
* @since v1.2.0
*/
export type trimMode = null|"left"|"right"|"both";
type stringify<T> = T extends number ? `${T}` : T extends string ? T : string;
/**
* Splits `T` to the union of chars. If `T` is an empty string, it resolves to
* `never`.
*/
export type charset<T extends string> = (
T extends `${infer C extends string}${infer R extends string}` ? (
C | charset<R>
) : T extends "" ? never : T
);
/**
* A union of letters from English alphabet (lowercase).
*/
type char = charset<"abcdefghijklmnopqrstuvwxyz">;
/**
* A union of arabic numbers (i.e. decimal number system).
*/
type digit = charset<"0123456789">;
type parseableRange = (typeof parseableRange)[number];
/**
* Maps given {@link parseableRange} to {@link charset}.
*/
type extendRange<T extends parseableRange> = (
T extends "a-z" ? char : T extends "A-Z" ? Uppercase<char> : (
T extends "0-9" ? digit : T extends "---" ? "-" : never
)
);
type range2charset<T extends string> = T extends parseableRange ? extendRange<T> : (
// a-z
T extends `${infer F extends "a-z"}${string}` ? (
T extends `${F}${infer R extends string}` ? extendRange<F>|range2charset<R> : never
// A-Z
) : T extends `${infer F extends "A-Z"}${string}` ? (
T extends `${F}${infer R extends string}` ? extendRange<F>|range2charset<R> : never
// 0-9
) : T extends `${infer F extends "0-9"}${string}` ? (
T extends `${F}${infer R extends string}` ? extendRange<F>|range2charset<R> : never
// any char
) : T extends `${infer F extends string}${infer R extends string}` ? (
charset<F>|range2charset<R>
// empty string
) : never
);
type _case<T extends string,C extends string> = (
// Charset contains any letter from English alphabet?
C extends `${string}${char}${string}` ? (
// Charset contains any upperCase English letter?
C extends `${string}${Uppercase<char>}${string}` ? T : Lowercase<T>
) : Uppercase<T>
);
/**
* A generic type which modifies literal string to replace `T` string with `R`
* replacement character based on `C` charset. It should work exactly the same
* as runtime function does when combined with {@link _trimMode}.
*/
type _replace<T extends string,C extends string,R extends string> = (
// F <- T[0], S <- T[1..N]
T extends `${infer F extends string}${infer S extends string}` ? (
// Leave valid chars as-is:
F extends range2charset<C> ? (
`${F}${_replace<S,C,R>}`
// Replace invalid chars with replacement character:
) : `${R}${_replace<S,C,R>}`
// Original type on empty or non-literal strings.
) : T
);
/**
* A helper generic type to trim string to remove leading chars outside of
* range.
*/
type _trimLeft<T extends string,C extends string> = (
T extends `${infer F extends string}${infer R extends string}` ? (
F extends range2charset<C> ? T : _trimLeft<R,C>
) : T
);
/**
* A helper generic type to trim string to remove trailing chars outside of
* range. Does not respect the left side of the string and assumes the first
* char found would be part of invalid chars on right side (which leaves empty
* string in such scenarios).
*/
type _trimRightPhase2<T extends string,C extends string> = (
T extends `${infer F extends string}${infer R extends string}` ? (
F extends range2charset<C> ? `${F}${_trimRightPhase2<R,C>}` : ""
) : T
);
/**
* A helper generic type to trim string to remove trailing chars outside of
* range. Unlike to {@link _trimRightPhase2} it leaves leading chars untouched.
*/
type _trimRight<T extends string,C extends string> = (
T extends `${infer F extends string}${infer R extends string}` ? (
F extends range2charset<C> ? `${F}${_trimRightPhase2<R,C>}` : `${F}${_trimRight<R,C>}`
) : T
);
/**
* Trims a string based on {@link trimMode} (`M`) parameter.
* It should provide the same logic as in runtime function.
*
* @template T - A value to be trimmed.
* @template C - A charset used during sanitization.
* @template M - A way of trimming the string.
*/
type _trimMode<T extends string,C extends string,M extends trimMode> = (
M extends "right" ? _trimRight<T,C> : M extends "both" ? (
_trimRightPhase2<_trimLeft<T,C>,C>
) : M extends "left" ? _trimLeft<T,C> : M extends null ? T : never
);
/**
* Ensures given string is a *char* (i.e. has `length === 1`). It will resolve
* to `never` both for strings with multiple characters and empty strings.
*
* @template T - any `string`.
*/
type ensureChar<T extends string> = (
T extends `${infer P extends string}${string}` ? (
T extends P ? T : never
) : never
);
/** Ensures given string is non-empty. Resolves to `never` otherwise. */
type ensureNonEmpty<T extends string> = T extends "" ? never : T;
/** Resolves to last character from the given string. */
type lastChar<T extends string> = (
T extends `${string}${infer R extends string}` ? (
R extends "" ? T : lastChar<R>
) : T
);
/** Infers a set of chars from given string as a union type. */
type charGroups<T extends string> = (
T extends `${infer S extends string}-${infer E extends string}${infer R extends string}` ? (
S extends "" ? charGroups<R> : `${lastChar<S>}-${E}`|charGroups<R>
) : never
);
/**
* Transforms {@link sanitizeLiteral} function parameters to provide an expected
* result type for a given set of literals.
*
* @template V - Value to be sanitized.
* @template C - A charset to be used for sanitization.
* @template R - A replacement character to be used for sanitization.
* @template M - A range of value trimming.
*/
export type sanitizeResult<V,C extends string,R extends string, M extends trimMode> = (
V extends null|undefined ? V : R extends ensureChar<R> ? (
charGroups<C> extends parseableRange ? V extends string|number ? (
ensureNonEmpty<_replace<_trimMode<_case<stringify<V>,C>,C,M>,C,R>>
) : string : never
) : never
);
/**
* A type-safe string sanitizer supporting any set of chars while being capable
* of calculating the expected result as a static type if literal is provided as
* a name. Predicts the accurate output based on input types or errors (`never`)
* if function is guaranteed to fail in the runtime.
*
* @remarks
*
* It is designed to be rather performant at runtime (it uses [`RegExp`]
* under-the-hood), however be aware of long compilation times as TypeScript
* will have to do heavy calculations for function result in case of complex
* strings and character sets (a lot of operations are done in char-by-char
* manner, using `infer` and type recursion — there's a real chance that for
* very long literal strings TypeScript will just give up at calculations and
* end compilation with an error!).
*
* [`RegExp`]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp "RegExp – JavaScript | MDN"
*
* @privateRemarks
*
* This function began, for me and anyone digging into the source code, as a
* resource for learning advanced TypeScript manipulations on string literals.
* I will also use it for my own personal projects.
*
* @param value - Value to sanitize. Should be a *non-nullish* `string`.
* @param charset - A string that represents a set of characters. For ranges, only values from {@linkcode parseableRange} are valid.
* @param replacement - A `char` (i.e. `string` with `length === 0`) which should replace invalid characters inside the string.
* @param trimMode – Definies how string should be trimmed. Defaults to `left` (compatibility reasons).
*
* @returns - Original {@link value} for nullish values, sanitized string for anything else.
* @throws - [`TypeError`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/TypeError "TypeError – JavaScript | MDN") for unresolveable {@link charset} or invalid {@link trimMode}, [`RangeError`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RangeError "RangeError – JavaScript | MDN") for non-char values in {@link replacement} and [`Error`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Error "Error – JavaScript | MDN") for {@link value} which cannot be sanitized to the expected {@link charset}.
*
* @example
*
* // (const) regular: "fooBar3"
* const regular = "fooBar3" as const;
* // (const) mod1: "FOOBAR3"
* const mod1 = sanitizeLiteral(regular,"A-Z0-9");
* // (const) mod2: "foobarz"
* const mod2 = sanitizeLiteral(regular,"a-z","z");
* // (const) mod3: "oo_ar3"
* const mod3 = sanitizeLiteral(regular,"acdeghijklmnopqrstuvwxyz0-9","_");
*
* @since v1.0.0
*/
export function sanitizeLiteral<V extends unknownLiteral,C extends string = "a-z0-9",R extends string = "-",M extends trimMode = "left">(value:V, charset="a-z0-9" as C, replacement="-" as R,trimMode="left" as M): sanitizeResult<V,C,R,M> {
if(value === null || value === undefined)
return value as sanitizeResult<V,C,R,M>;
if((charset.match(/([^])-([^])/gm)??[]).find(element => !["a-z","A-Z","0-9","---"].includes(element)) !== undefined)
throw new TypeError(`Unrecognized charset: "${charset}"!`);
if(replacement.length !== 1)
throw new RangeError("Parameter 'replacement' should be a valid character");
charset = charset.replaceAll(/([\]^\\])/g,"\\$1") as C;
let valueString:string;
const regexp = {
valid: new RegExp(`[${charset}]`),
invalid: new RegExp(`[^${charset}${replacement.replaceAll("]","\\]")}]`,"g")
}
if(typeof value !== "string")
valueString = String(value);
else
valueString = value;
if(regexp.invalid.test(valueString)||valueString.startsWith(replacement)) {
// Try to convert string to uppercase or lowercase based on charset.
valueString = !/[A-Z]/.test(charset) ? valueString.toLowerCase() :
!/[a-z]/.test(charset) ? valueString.toUpperCase() : valueString;
// Trim string based on the trimMode
switch(trimMode) {
case "both":
//@ts-expect-error – fallthrough intended
case "left":
valueString = valueString.slice(valueString.search(regexp.valid));
if(trimMode === "left")
break;
//@ts-expect-error – fallthrough intended
case "right":
valueString = valueString.slice(0,valueString.length-Array.from(valueString).reverse().findIndex(c => regexp.valid.test(c)))
case null:
break;
default:
throw new TypeError(`Invalid trim mode: "${trimMode}"`);
}
// Replace the rest with the replacement character.
valueString = valueString.replaceAll(regexp.invalid,replacement)
}
// Do not accept the empty strings
if(valueString.length === 0)
throw new Error("Parameter 'name' is not sanitizable!");
return valueString as sanitizeResult<V,C,R,M>;
}
export default sanitizeLiteral;