UNPKG

cm-tarnation

Version:

An alternative parser for CodeMirror 6

253 lines (216 loc) 7.58 kB
/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ import { Input, NodeProp } from "@lezer/common" import type { Regex } from "./grammar/definition" export interface SearchOpts { /** Starting minimum index for the search. */ min?: number /** Starting maximum index for the search. */ max?: number /** * If true, the search will return the closest index to the desired value * on failure. */ precise?: boolean } /** * Performs a binary search through an array. * * The comparator function should return -1 if undershooting the desired * value, +1 if overshooting, and 0 if the value was found. * * The comparator can also short-circuit the search by returning true or * false. Returning true is like returning a 0 (target found), but * returning false induces a null return. */ export function search<T, TR>( haystack: T[], target: TR, comparator: (element: T, target: TR) => number | boolean, { min = 0, max = haystack.length - 1, precise = true }: SearchOpts = {} ) { if (haystack.length === 0) return null let index = -1 while (min <= max) { index = min + ((max - min) >>> 1) const cmp = comparator(haystack[index], target) if (cmp === true || cmp === 0) return { element: haystack[index], index } if (cmp === false) return null if (cmp < 0) min = index + 1 else if (cmp > 0) max = index - 1 } if (index === -1) return null if (!precise) return { element: null, index } return null } /** Class that implements the Lezer `Input` interface using a normal string. */ export class StringInput implements Input { constructor(readonly string: string) {} get length() { return this.string.length } chunk(from: number) { return this.string.slice(from) } readonly lineChunks = false read(from: number, to: number) { return this.string.slice(from, to) } } /** * Safely compiles a regular expression. * * @example * * ```ts * // returns null if features aren't supported (e.g. Safari) * const regex = re`/(?<=\d)\w+/d` * ``` */ export function re(str: TemplateStringsArray | string, forceFlags = "") { const input = typeof str === "string" ? str : str.raw[0] const split = /^!?\/([^]+)\/([^]*)$/.exec(input) if (!split || !split[1]) return null let [, src = "", flags = ""] = split if (forceFlags) flags = dedupe([...flags, ...forceFlags]).join("") try { return new RegExp(src, flags) } catch (err) { console.warn("cm-tarnation: Recovered from failed RegExp construction") console.warn("cm-tarnation: RegExp source:", input) console.warn(err) return null } } /** * Tests if the given string is a "RegExp string", as in it's in the format * of a native `RegExp` statement. */ export function isRegExpString(str: string): str is Regex { const split = /^!?\/([^]+)\/([^]*)$/.exec(str) if (!split || !split[1]) return false return true } /** Returns if the given `RegExp` has any remembered capturing groups. */ export function hasCapturingGroups(regexp: RegExp) { // give an alternative that always matches const always = new RegExp(`|${regexp.source}`) // ... which means we can use it to get a successful match, // regardless of the original regex. this is a bit of a hack, // but we can use this to detect capturing groups. return always.exec("")!.length > 1 } /** * Creates a lookbehind function from a `RegExp`. This function can only * test for a pattern's (non) existence, so no matches or capturing groups * are returned. * * @param pattern - A `RegExp` to be used as a pattern. * @param negative - Negates the pattern. */ export function createLookbehind(pattern: RegExp, negative?: boolean) { // can't be sticky, global, or multiline const flags = pattern.flags.replaceAll(/[ygm]/g, "") // regexp that can only match at the end of a string const regex = new RegExp(`(?:${pattern.source})$`, flags) return (str: string, pos: number) => { const clipped = str.slice(0, pos) const result = regex.test(clipped) return negative ? !result : result } } /** * A special per-node `NodeProp` used for describing nodes where a nested * parser will be embedded. */ export const EmbeddedParserProp = new NodeProp<string>() /** * Returns a completely concatenated `Int32Array` from a list of arrays. * * @param arrays - Arrays to concatenate. * @param length - If you know the length of the final array, you can pass * it here to avoid having the function calculate it. */ export function concatInt32Arrays(arrays: Int32Array[], length?: number) { let total = length ?? 0 if (!total) { for (let i = 0; i < arrays.length; i++) { total += arrays[i].length } } const result = new Int32Array(total) let offset = 0 for (let i = 0; i < arrays.length; i++) { result.set(arrays[i], offset) offset += arrays[i].length } return result } /** * Deduplicates an array. Does not mutate the original array. * * @param arr - The array to deduplicate. * @param insert - Additional values to insert into the array, if desired. */ export function dedupe<T extends any[]>(arr: T, ...insert: T) { return [...new Set([...arr, ...insert])] as T } /** Performance measuring utility. */ export function perfy(): () => number { const start = performance.now() return () => { return parseFloat((performance.now() - start).toFixed(4)) } } /** Removes all properties assigned to `undefined` in an object. */ export function removeUndefined<T>(obj: T) { // this wacky approach is faster as it avoids an iterator const keys = Object.keys(obj) as (keyof T)[] for (let i = 0; i < keys.length; i++) { if (obj[keys[i]] === undefined) delete obj[keys[i]] } return obj as { [K in keyof T]: Exclude<T[K], undefined> } } /** Takes a string and escapes any `RegExp` sensitive characters. */ export function escapeRegExp(str: string) { return str.replace(/[.*+?^${}()|\[\]\\]/g, "\\$&") } /** Creates a simple pseudo-random ID, with an optional prefix attached. */ export function createID(prefix = "") { const suffix = Math.abs(hash(Math.random() * 100 + prefix)) return `${prefix}-${suffix}` } /** Converts a string into an array of codepoints. */ export function toPoints(str: string) { const codes: number[] = [] for (let i = 0; i < str.length; i++) { codes.push(str.codePointAt(i)!) } return codes } /** * Checks an array of codepoints against a codepoint array or a string, * starting from a given position. */ export function pointsMatch(points: number[], str: string | number[], pos: number) { if (typeof str === "string") { for (let i = 0; i < points.length; i++) { if (points[i] !== str.codePointAt(pos + i)) return false } } else { for (let i = 0; i < points.length; i++) { if (points[i] !== str[pos + i]) return false } } return true } // https://gist.github.com/hyamamoto/fd435505d29ebfa3d9716fd2be8d42f0#gistcomment-2694461 /** Very quickly generates a (non-secure) hash from the given string. */ export function hash(s: string) { let h = 0 for (let i = 0; i < s.length; i++) { h = (Math.imul(31, h) + s.charCodeAt(i)) | 0 } return h }