UNPKG

minisearch

Version:

Tiny but powerful full-text search engine for browser and Node

425 lines (375 loc) 13.4 kB
/* eslint-disable no-labels */ import { TreeIterator, ENTRIES, KEYS, VALUES, LEAF } from './TreeIterator' import fuzzySearch, { type FuzzyResults } from './fuzzySearch' import type { RadixTree, Entry, Path } from './types' /** * A class implementing the same interface as a standard JavaScript * [`Map`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map) * with string keys, but adding support for efficiently searching entries with * prefix or fuzzy search. This class is used internally by {@link MiniSearch} * as the inverted index data structure. The implementation is a radix tree * (compressed prefix tree). * * Since this class can be of general utility beyond _MiniSearch_, it is * exported by the `minisearch` package and can be imported (or required) as * `minisearch/SearchableMap`. * * @typeParam T The type of the values stored in the map. */ export default class SearchableMap<T = any> { /** * @internal */ _tree: RadixTree<T> /** * @internal */ _prefix: string private _size: number | undefined = undefined /** * The constructor is normally called without arguments, creating an empty * map. In order to create a {@link SearchableMap} from an iterable or from an * object, check {@link SearchableMap.from} and {@link * SearchableMap.fromObject}. * * The constructor arguments are for internal use, when creating derived * mutable views of a map at a prefix. */ constructor (tree: RadixTree<T> = new Map(), prefix = '') { this._tree = tree this._prefix = prefix } /** * Creates and returns a mutable view of this {@link SearchableMap}, * containing only entries that share the given prefix. * * ### Usage: * * ```javascript * let map = new SearchableMap() * map.set("unicorn", 1) * map.set("universe", 2) * map.set("university", 3) * map.set("unique", 4) * map.set("hello", 5) * * let uni = map.atPrefix("uni") * uni.get("unique") // => 4 * uni.get("unicorn") // => 1 * uni.get("hello") // => undefined * * let univer = map.atPrefix("univer") * univer.get("unique") // => undefined * univer.get("universe") // => 2 * univer.get("university") // => 3 * ``` * * @param prefix The prefix * @return A {@link SearchableMap} representing a mutable view of the original * Map at the given prefix */ atPrefix (prefix: string): SearchableMap<T> { if (!prefix.startsWith(this._prefix)) { throw new Error('Mismatched prefix') } const [node, path] = trackDown(this._tree, prefix.slice(this._prefix.length)) if (node === undefined) { const [parentNode, key] = last(path) for (const k of parentNode!.keys()) { if (k !== LEAF && k.startsWith(key)) { const node = new Map() node.set(k.slice(key.length), parentNode!.get(k)!) return new SearchableMap(node, prefix) } } } return new SearchableMap<T>(node, prefix) } /** * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/clear */ clear (): void { this._size = undefined this._tree.clear() } /** * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/delete * @param key Key to delete */ delete (key: string): void { this._size = undefined return remove(this._tree, key) } /** * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/entries * @return An iterator iterating through `[key, value]` entries. */ entries () { return new TreeIterator(this, ENTRIES) } /** * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/forEach * @param fn Iteration function */ forEach (fn: (key: string, value: T, map: SearchableMap) => void): void { for (const [key, value] of this) { fn(key, value, this) } } /** * Returns a Map of all the entries that have a key within the given edit * distance from the search key. The keys of the returned Map are the matching * keys, while the values are two-element arrays where the first element is * the value associated to the key, and the second is the edit distance of the * key to the search key. * * ### Usage: * * ```javascript * let map = new SearchableMap() * map.set('hello', 'world') * map.set('hell', 'yeah') * map.set('ciao', 'mondo') * * // Get all entries that match the key 'hallo' with a maximum edit distance of 2 * map.fuzzyGet('hallo', 2) * // => Map(2) { 'hello' => ['world', 1], 'hell' => ['yeah', 2] } * * // In the example, the "hello" key has value "world" and edit distance of 1 * // (change "e" to "a"), the key "hell" has value "yeah" and edit distance of 2 * // (change "e" to "a", delete "o") * ``` * * @param key The search key * @param maxEditDistance The maximum edit distance (Levenshtein) * @return A Map of the matching keys to their value and edit distance */ fuzzyGet (key: string, maxEditDistance: number): FuzzyResults<T> { return fuzzySearch<T>(this._tree, key, maxEditDistance) } /** * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/get * @param key Key to get * @return Value associated to the key, or `undefined` if the key is not * found. */ get (key: string): T | undefined { const node = lookup<T>(this._tree, key) return node !== undefined ? node.get(LEAF) : undefined } /** * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/has * @param key Key * @return True if the key is in the map, false otherwise */ has (key: string): boolean { const node = lookup(this._tree, key) return node !== undefined && node.has(LEAF) } /** * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/keys * @return An `Iterable` iterating through keys */ keys () { return new TreeIterator(this, KEYS) } /** * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/set * @param key Key to set * @param value Value to associate to the key * @return The {@link SearchableMap} itself, to allow chaining */ set (key: string, value: T): SearchableMap<T> { if (typeof key !== 'string') { throw new Error('key must be a string') } this._size = undefined const node = createPath(this._tree, key) node.set(LEAF, value) return this } /** * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/size */ get size (): number { if (this._size) { return this._size } /** @ignore */ this._size = 0 const iter = this.entries() while (!iter.next().done) this._size! += 1 return this._size } /** * Updates the value at the given key using the provided function. The function * is called with the current value at the key, and its return value is used as * the new value to be set. * * ### Example: * * ```javascript * // Increment the current value by one * searchableMap.update('somekey', (currentValue) => currentValue == null ? 0 : currentValue + 1) * ``` * * If the value at the given key is or will be an object, it might not require * re-assignment. In that case it is better to use `fetch()`, because it is * faster. * * @param key The key to update * @param fn The function used to compute the new value from the current one * @return The {@link SearchableMap} itself, to allow chaining */ update (key: string, fn: (value: T | undefined) => T): SearchableMap<T> { if (typeof key !== 'string') { throw new Error('key must be a string') } this._size = undefined const node = createPath(this._tree, key) node.set(LEAF, fn(node.get(LEAF))) return this } /** * Fetches the value of the given key. If the value does not exist, calls the * given function to create a new value, which is inserted at the given key * and subsequently returned. * * ### Example: * * ```javascript * const map = searchableMap.fetch('somekey', () => new Map()) * map.set('foo', 'bar') * ``` * * @param key The key to update * @param initial A function that creates a new value if the key does not exist * @return The existing or new value at the given key */ fetch (key: string, initial: () => T): T { if (typeof key !== 'string') { throw new Error('key must be a string') } this._size = undefined const node = createPath(this._tree, key) let value = node.get(LEAF) if (value === undefined) { node.set(LEAF, value = initial()) } return value } /** * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/values * @return An `Iterable` iterating through values. */ values () { return new TreeIterator(this, VALUES) } /** * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/@@iterator */ [Symbol.iterator] () { return this.entries() } /** * Creates a {@link SearchableMap} from an `Iterable` of entries * * @param entries Entries to be inserted in the {@link SearchableMap} * @return A new {@link SearchableMap} with the given entries */ static from<T = any> (entries: Iterable<Entry<T>> | Entry<T>[]) { const tree = new SearchableMap() for (const [key, value] of entries) { tree.set(key, value) } return tree } /** * Creates a {@link SearchableMap} from the iterable properties of a JavaScript object * * @param object Object of entries for the {@link SearchableMap} * @return A new {@link SearchableMap} with the given entries */ static fromObject<T = any> (object: { [key: string]: T }) { return SearchableMap.from<T>(Object.entries(object)) } } const trackDown = <T = any>(tree: RadixTree<T> | undefined, key: string, path: Path<T> = []): [RadixTree<T> | undefined, Path<T>] => { if (key.length === 0 || tree == null) { return [tree, path] } for (const k of tree.keys()) { if (k !== LEAF && key.startsWith(k)) { path.push([tree, k]) // performance: update in place return trackDown(tree.get(k)!, key.slice(k.length), path) } } path.push([tree, key]) // performance: update in place return trackDown(undefined, '', path) } const lookup = <T = any>(tree: RadixTree<T>, key: string): RadixTree<T> | undefined => { if (key.length === 0 || tree == null) { return tree } for (const k of tree.keys()) { if (k !== LEAF && key.startsWith(k)) { return lookup(tree.get(k)!, key.slice(k.length)) } } } // Create a path in the radix tree for the given key, and returns the deepest // node. This function is in the hot path for indexing. It avoids unnecessary // string operations and recursion for performance. const createPath = <T = any>(node: RadixTree<T>, key: string): RadixTree<T> => { const keyLength = key.length outer: for (let pos = 0; node && pos < keyLength;) { for (const k of node.keys()) { // Check whether this key is a candidate: the first characters must match. if (k !== LEAF && key[pos] === k[0]) { const len = Math.min(keyLength - pos, k.length) // Advance offset to the point where key and k no longer match. let offset = 1 while (offset < len && key[pos + offset] === k[offset]) ++offset const child = node.get(k)! if (offset === k.length) { // The existing key is shorter than the key we need to create. node = child } else { // Partial match: we need to insert an intermediate node to contain // both the existing subtree and the new node. const intermediate = new Map() intermediate.set(k.slice(offset), child) node.set(key.slice(pos, pos + offset), intermediate) node.delete(k) node = intermediate } pos += offset continue outer } } // Create a final child node to contain the final suffix of the key. const child = new Map() node.set(key.slice(pos), child) return child } return node } const remove = <T = any>(tree: RadixTree<T>, key: string): void => { const [node, path] = trackDown(tree, key) if (node === undefined) { return } node.delete(LEAF) if (node.size === 0) { cleanup(path) } else if (node.size === 1) { const [key, value] = node.entries().next().value! merge(path, key as string, value as RadixTree<T>) } } const cleanup = <T = any>(path: Path<T>): void => { if (path.length === 0) { return } const [node, key] = last(path) node!.delete(key) if (node!.size === 0) { cleanup(path.slice(0, -1)) } else if (node!.size === 1) { const [key, value] = node!.entries().next().value! if (key !== LEAF) { merge(path.slice(0, -1), key as string, value as RadixTree<T>) } } } const merge = <T = any>(path: Path<T>, key: string, value: RadixTree<T>): void => { if (path.length === 0) { return } const [node, nodeKey] = last(path) node!.set(nodeKey + key, value) node!.delete(nodeKey) } const last = <T = any>(array: T[]): T => { return array[array.length - 1] }