@dcoffey/espells
Version:
Pure JS/TS spellchecker, using Hunspell dictionaries. Based on Spylls.
283 lines • 8.81 kB
JavaScript
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
import { iterate } from "iterare";
import { CONSTANTS as C } from "./constants.js";
/**
* Creates a `RegExp` using a string template. Supports flags.
*
* @example
*
* ```ts
* const regex = re`/abc/g`
* ```
*/
export function re(strings, ...keys) {
const split = C.SPLIT_REGEX_REGEX.exec(String.raw(strings, ...keys));
if (!split)
throw new SyntaxError();
const [, , src = "", flags = ""] = split;
return new RegExp(src, flags);
}
/**
* Helper for checking if a `Set`, `Array`, or `string` contains another
* `string`. Handles undefined or null inputs by returning false.
*
* @param value - The value to check for.
* @param container - The container of strings (or another string).
*/
export function includes(value, container) {
if (value === undefined || value === null)
return false;
if (container === undefined || container === null)
return false;
if (typeof container === "string" || Array.isArray(container)) {
return container.includes(value);
}
else {
return container.has(value);
}
}
/** Takes a string and escapes any `RegExp` sensitive characters. */
export function escapeRegExp(str) {
return str.replace(/[.*+?^${}()|\[\]\\]/g, "\\$&");
}
/**
* Replaces a range inside of a string with a substitute.
*
* @param str - The string which should have a range inside of it replaced.
* @param from - The start of the replacement range.
* @param to - The end of the replacement range.
* @param sub - The replacement/substitute string.
*/
export function replaceRange(str, from, to, sub) {
return str.substring(0, from) + sub + str.substring(to);
}
/**
* Uppercases a string.
*
* @param str - The string to uppercase.
* @param locale - Uses a locale, or a list of locales, case mapping if
* provided. This usually won't be needed, as JS tries to account for
* non-ASCII/Latin text when handling casing.
*/
export function uppercase(str, locale) {
return locale ? str.toLocaleUpperCase(locale) : str.toUpperCase();
}
/**
* Lowercases a string.
*
* @param str - The string to lowercase.
* @param locale - Uses a locale, or a list of locales, case mapping if
* provided. This usually won't be needed, as JS tries to account for
* non-ASCII/Latin text when handling casing.
*/
export function lowercase(str, locale) {
return locale ? str.toLocaleLowerCase(locale) : str.toLowerCase();
}
/**
* Titlecases a string.
*
* @param str - The string to titlecase.
* @param locale - Uses a locale, or a list of locales, case mapping if
* provided. This usually won't be needed, as JS tries to account for
* non-ASCII/Latin text when handling casing.
*/
export function titlecase(str, locale) {
return replaceRange(lowercase(str, locale), 0, 1, uppercase(str[0], locale));
}
/**
* Determines if a string is titlecased.
*
* @param str - The string to check.
* @param locale - Uses a locale, or a list of locales, case mapping if
* provided. This usually won't be needed, as JS tries to account for
* non-ASCII/Latin text when handling casing.
*/
export function isTitlecased(str, locale) {
return titlecase(str, locale) === str;
}
/**
* Determines if a string is completely uppercased.
*
* @param str - The string to check.
* @param locale - Uses a locale, or a list of locales, case mapping if
* provided. This usually won't be needed, as JS tries to account for
* non-ASCII/Latin text when handling casing.
*/
export function isUppercased(str, locale) {
return uppercase(str, locale) === str;
}
/**
* Determines if a string is completely lowercased.
*
* @param str - The string to check.
* @param locale - Uses a locale, or a list of locales, case mapping if
* provided. This usually won't be needed, as JS tries to account for
* non-ASCII/Latin text when handling casing.
*/
export function isLowercased(str, locale) {
return lowercase(str, locale) === str;
}
/**
* Reverses a string.
*
* @param str - The string to reverse.
*/
export function reverse(str) {
return str.split("").reverse().join("");
}
/** Splits a line by its whitespace. */
export function split(line) {
return line.split(C.SPLIT_LINE_REGEX);
}
/**
* Returns true if the given string 3 characters and all characters are the same.
*
* @param s - The string to check.
*/
export function isTriplet(s) {
return s.length === 3 && s[1] === s[0] && s[2] === s[0];
}
/**
* Returns a new set containing all intersecting elements between two sets.
*
* @param a - The first set.
* @param b - The second set.
*/
export function intersect(a, b) {
return iterate(a)
.filter(x => b.has(x))
.toSet();
}
export function concat(a, b) {
const iter = iterate(a).concat(b);
if (typeof a === "string")
return iter.join("");
if (a instanceof Set)
return iter.toSet();
if (Array.isArray(a))
return iter.toArray();
throw new TypeError("Unknown iterable given!");
}
// https://gist.github.com/cybercase/db7dde901d7070c98c48#gistcomment-3718142
export function* product(...iterables) {
if (iterables.length === 0) {
return;
}
const iterators = iterables.map(it => it[Symbol.iterator]());
const results = iterators.map(it => it.next());
// Cycle through iterators
for (let i = 0;;) {
if (results[i].done) {
// Reset the current iterator
iterators[i] = iterables[i][Symbol.iterator]();
results[i] = iterators[i].next();
// Advance and exit if we've reached the end
if (++i >= iterators.length) {
return;
}
}
else {
yield results.map(({ value }) => value);
i = 0;
}
results[i] = iterators[i].next();
}
}
/** Returns true if the given iterator yields literally anything. */
export function any(gen) {
for (const _ of gen) {
return true;
}
return false;
}
/**
* Limits an iterator to the specified count.
*
* @param gen - The iterator to limit. param n - The number of elements to limit to.
*/
export function* limit(gen, n) {
let i = 0;
for (const x of gen) {
if (i++ >= n)
break;
yield x;
}
}
/**
* Returns the number of characters common between two strings, in both
* type and position.
*/
export function commonCharacters(s1, s2) {
return [...s1].filter((ch, index) => ch === s2[index]).length;
}
/** Returns the amount of characters in common between the left-sides of two strings. */
export function leftCommonSubstring(s1, s2) {
for (let i = 0; i < Math.max(s1.length, s2.length); i++) {
if (s1[i] !== s2[i])
return i;
}
return 0;
}
/**
* Returns the number of ngrams of `s1` are in `s2`. Higher is better.
*
* @param max - The `n` in `ngram`.
* @param s1 - String to compare against `s2`.
* @param s2 - String to compare against `s1`.
* @param weighted - Reduce score depending on number ngrams *not contained* in `s2`.
* @param longerIsWorse - Reduce score when `s2` is longer than `s1`.
* @param anyMismatch - Reduce score if the strings differ in length at all.
*/
export function ngram(max, s1, s2, weighted = false, anyMismatch = false, longerIsWorse = false) {
const l1 = s1.length;
const l2 = s2.length;
if (l2 === 0)
return 0;
let nscore = 0;
for (let size = 0; size < max + 1; size++) {
let ns = 0;
for (let pos = 0; pos < l1 - (size + 1); pos++) {
if (s2.includes(s1.slice(pos, pos + size))) {
ns++;
}
else if (weighted) {
ns--;
if (pos === 0 || pos + size === l1) {
ns--;
}
}
}
nscore += ns;
if (ns < 2 && !weighted)
break;
}
let penalty = 0;
if (longerIsWorse) {
penalty = l2 - l1 - 2;
}
else if (anyMismatch) {
penalty = Math.abs(l2 - l1) - 2;
}
return penalty > 0 ? nscore - penalty : nscore;
}
/** Length of the "longest common subsequence" in two strings. */
export function lcslen(a, b) {
let m = a.length;
let n = b.length;
let C = [];
let i;
let j;
for (i = 0; i <= m; i++)
C.push([0]);
for (j = 0; j < n; j++)
C[0].push(0);
for (i = 0; i < m; i++) {
for (j = 0; j < n; j++) {
C[i + 1][j + 1] = a[i] === b[j] ? C[i][j] + 1 : Math.max(C[i + 1][j], C[i][j + 1]);
}
}
return C[m - 1][n - 1];
}
//# sourceMappingURL=util.js.map