UNPKG

slimsearch

Version:

Tiny but powerful full-text search engine for browser and Node

1,334 lines (1,321 loc) 59 kB
type LeafType = "" & { readonly __tag: unique symbol; }; interface RadixTree<T> extends Map<string, T | RadixTree<T>> { get(key: LeafType): T | undefined; get(key: string): RadixTree<T> | undefined; set(key: LeafType, value: T): this; set(key: string, value: RadixTree<T>): this; } type Entry<T> = [string, T]; type Path<T> = [RadixTree<T>, string][]; type FuzzyResult<T> = [T, number]; type FuzzyResults<T> = Map<string, FuzzyResult<T>>; interface Iterators<T> { ENTRIES: Entry<T>; KEYS: string; VALUES: T; } type Kind<T> = keyof Iterators<T>; type Result<T, K extends keyof Iterators<T>> = Iterators<T>[K]; type IteratorPath<T> = { node: RadixTree<T>; keys: string[]; }[]; interface IterableSet<T> { _tree: RadixTree<T>; _prefix: string; } /** * @private */ declare class TreeIterator<T, K extends Kind<T>> implements Iterator<Result<T, K>> { set: IterableSet<T>; _type: K; _path: IteratorPath<T>; constructor(set: IterableSet<T>, type: K); next(): IteratorResult<Result<T, K>>; dive(): IteratorResult<Result<T, K>>; backtrack(): void; key(): string; value(): T; result(): Result<T, K>; [Symbol.iterator](): this; } /** * A class implementing the same interface as a standard JavaScript * [`Map`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map) * with string keys, but adding support for efficiently searching entries with * prefix or fuzzy search. This class is used internally by {@link SearchIndex} as * the inverted index data structure. The implementation is a radix tree * (compressed prefix tree). * * Since this class can be of general utility beyond _SlimSearch_, it is * exported by the `slimsearch` package and can be imported (or required) as * `slimsearch/SearchableMap`. * * @typeParam Value The type of the values stored in the map. */ declare class SearchableMap<Value = any> { /** * @ignore */ _tree: RadixTree<Value>; /** * @ignore */ _prefix: string; private _size; /** * The constructor is normally called without arguments, creating an empty * map. In order to create a {@link SearchableMap} from an iterable or from an * object, check {@link SearchableMap.from} and {@link SearchableMap.fromObject}. * * The constructor arguments are for internal use, when creating derived * mutable views of a map at a prefix. */ constructor(tree?: RadixTree<Value>, prefix?: string); /** * Creates and returns a mutable view of this {@link SearchableMap}, containing only * entries that share the given prefix. * * ### Usage: * * ```js * const map = new SearchableMap() * map.set("unicorn", 1) * map.set("universe", 2) * map.set("university", 3) * map.set("unique", 4) * map.set("hello", 5) * * const uni = map.atPrefix("uni") * uni.get("unique") // => 4 * uni.get("unicorn") // => 1 * uni.get("hello") // => undefined * * const univer = map.atPrefix("univer") * univer.get("unique") // => undefined * univer.get("universe") // => 2 * univer.get("university") // => 3 * ``` * * @param prefix The prefix * @return A {@link SearchableMap} representing a mutable view of the original Map at the given prefix */ atPrefix(prefix: string): SearchableMap<Value>; /** * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/clear */ clear(): void; /** * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/delete * @param key Key to delete */ delete(key: string): void; /** * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/entries * @return An iterator iterating through `[key, value]` entries. */ entries(): TreeIterator<Value, "ENTRIES">; /** * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/forEach * @param fn Iteration function */ forEach(fn: (key: string, value: Value, map: SearchableMap) => void): void; /** * Returns a Map of all the entries that have a key within the given edit * distance from the search key. The keys of the returned Map are the matching * keys, while the values are two-element arrays where the first element is * the value associated to the key, and the second is the edit distance of the * key to the search key. * * ### Usage: * * ```js * const map = new SearchableMap() * map.set('hello', 'world') * map.set('hell', 'yeah') * map.set('ciao', 'mondo') * * // Get all entries that match the key 'hallo' with a maximum edit distance of 2 * map.fuzzyGet('hallo', 2) * // => Map(2) { 'hello' => ['world', 1], 'hell' => ['yeah', 2] } * * // In the example, the "hello" key has value "world" and edit distance of 1 * // (change "e" to "a"), the key "hell" has value "yeah" and edit distance of 2 * // (change "e" to "a", delete "o") * ``` * * @param key The search key * @param maxEditDistance The maximum edit distance (Levenshtein) * @return A Map of the matching keys to their value and edit distance */ fuzzyGet(key: string, maxEditDistance: number): FuzzyResults<Value>; /** * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/get * @param key Key to get * @return Value associated to the key, or `undefined` if the key is not * found. */ get(key: string): Value | undefined; /** * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/has * @param key Key * @return True if the key is in the map, false otherwise */ has(key: string): boolean; /** * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/keys * @return An `Iterable` iterating through keys */ keys(): TreeIterator<Value, "KEYS">; /** * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/set * @param key Key to set * @param value Value to associate to the key * @return The {@link SearchableMap} itself, to allow chaining */ set(key: string, value: Value): this; /** * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/size */ get size(): number; /** * Updates the value at the given key using the provided function. The function * is called with the current value at the key, and its return value is used as * the new value to be set. * * ### Example: * * ```js * // Increment the current value by one * searchableMap.update('somekey', (currentValue) => currentValue == null ? 0 : currentValue + 1) * ``` * * If the value at the given key is or will be an object, it might not require * re-assignment. In that case it is better to use `fetch()`, because it is * faster. * * @param key The key to update * @param fn The function used to compute the new value from the current one * @return The {@link SearchableMap} itself, to allow chaining */ update(key: string, fn: (value: Value | undefined) => Value): this; /** * Fetches the value of the given key. If the value does not exist, calls the * given function to create a new value, which is inserted at the given key * and subsequently returned. * * ### Example: * * ```js * const map = searchableMap.fetch('somekey', () => new Map()) * map.set('foo', 'bar') * ``` * * @param key The key to update * @param initial A function that creates a new value if the key does not exist * @return The existing or new value at the given key */ fetch(key: string, initial: () => Value): Value; /** * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/values * @return An `Iterable` iterating through values. */ values(): TreeIterator<Value, "VALUES">; /** * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/@@iterator */ [Symbol.iterator](): TreeIterator<Value, "ENTRIES">; /** * Creates a {@link SearchableMap} from an `Iterable` of entries * * @param entries Entries to be inserted in the {@link SearchableMap} * @return A new {@link SearchableMap} with the given entries */ static from<T = any>(entries: Iterable<Entry<T>> | Entry<T>[]): SearchableMap<T>; /** * Creates a {@link SearchableMap} from the iterable properties of a JavaScript object * * @param object Object of entries for the {@link SearchableMap} * @return A new {@link SearchableMap} with the given entries */ static fromObject<T = any>(object: Record<string, T>): SearchableMap<T>; } declare const WILDCARD: unique symbol; type LowercaseCombinationOperator = "or" | "and" | "and_not"; type CombinationOperator = LowercaseCombinationOperator | Uppercase<LowercaseCombinationOperator> | Capitalize<LowercaseCombinationOperator>; type SerializedIndexEntry = Record<string, number>; /** * Parameters of the BM25+ scoring algorithm. Customizing these is almost never * necessary, and fine-tuning them requires an understanding of the BM25 scoring * model. * * Some information about BM25 (and BM25+) can be found at these links: * * - https://en.wikipedia.org/wiki/Okapi_BM25 * - https://opensourceconnections.com/blog/2015/10/16/bm25-the-next-generation-of-lucene-relevation/ */ interface BM25Params { /** Term frequency saturation point. * * Recommended values are between `1.2` and `2`. Higher values increase the * difference in score between documents with higher and lower term * frequencies. Setting this to `0` or a negative value is invalid. Defaults * to `1.2` */ k: number; /** * Length normalization impact. * * Recommended values are around `0.75`. Higher values increase the weight * that field length has on scoring. Setting this to `0` (not recommended) * means that the field length has no effect on scoring. Negative values are * invalid. Defaults to `0.7`. */ b: number; /** * BM25+ frequency normalization lower bound (usually called δ). * * Recommended values are between `0.5` and `1`. Increasing this parameter * increases the minimum relevance of one occurrence of a search term * regardless of its (possibly very long) field length. Negative values are * invalid. Defaults to `0.5`. */ d: number; } /** * Match information for a search result. It is a key-value object where keys * are terms that matched, and values are the list of fields that the term was * found in. */ type MatchInfo = Record<string, string[]>; /** * Type of the search results. Each search result indicates the document ID, the * terms that matched, the match information, the score, and all the stored * fields. * * @typeParam ID The type of id being indexed. * @typeParam Index The type of the documents being indexed. */ type SearchResult<ID = any, Index extends Record<string, any> = Record<never, never>> = Index & { /** * The document ID */ id: ID; /** * List of document terms that matched. For example, if a prefix search for * `"moto"` matches `"motorcycle"`, `terms` will contain `"motorcycle"`. */ terms: string[]; /** * List of query terms that matched. For example, if a prefix search for * `"moto"` matches `"motorcycle"`, `queryTerms` will contain `"moto"`. */ queryTerms: string[]; /** * Score of the search results */ score: number; /** * Match information, see {@link MatchInfo} */ match: MatchInfo; }; /** * Search options to customize the search behavior. * * @typeParam ID The type of id being indexed. * @typeParam Index The type of the documents being indexed. */ interface SearchOptions<ID = any, Index extends Record<string, any> = Record<never, never>> { /** * Names of the fields to search in. If omitted, all fields are searched. */ fields?: string[]; /** * Function used to filter search results, for example on the basis of stored * fields. It takes as argument each search result and should return a boolean * to indicate if the result should be kept or not. */ filter?: (result: SearchResult<ID, Index>) => boolean; /** * Key-value object of field names to boosting values. By default, fields are * assigned a boosting factor of 1. If one assigns to a field a boosting value * of 2, a result that matches the query in that field is assigned a score * twice as high as a result matching the query in another field, all else * being equal. */ boost?: Record<string, number>; /** * Function to calculate a boost factor for each term. * * This function, if provided, is called for each query term (as split by * `tokenize` and processed by `processTerm`). The arguments passed to the * function are the query term, the positional index of the term in the query, * and the array of all query terms. It is expected to return a numeric boost * factor for the term. A factor lower than 1 reduces the importance of the * term, a factor greater than 1 increases it. A factor of exactly 1 is * neutral, and does not affect the term's importance. */ boostTerm?: (term: string, i: number, terms: string[]) => number; /** * Relative weights to assign to prefix search results and fuzzy search * results. Exact matches are assigned a weight of 1. */ weights?: { fuzzy?: number; prefix?: number; }; /** * Function to calculate a boost factor for documents. It takes as arguments * the document ID, and a term that matches the search in that document, and * the value of the stored fields for the document (if any). It should return * a boosting factor: a number higher than 1 increases the computed score, a * number lower than 1 decreases the score, and a falsy value skips the search * result completely. */ boostDocument?: (documentId: ID, term: string, storedFields?: Index) => number; /** * Controls whether to perform prefix search. It can be a simple boolean, or a * function. * * If a boolean is passed, prefix search is performed if true. * * If a function is passed, it is called upon search with a search term, the * positional index of that search term in the tokenized search query, and the * tokenized search query. The function should return a boolean to indicate * whether to perform prefix search for that search term. */ prefix?: boolean | ((term: string, index: number, terms: string[]) => boolean); /** * Controls whether to perform fuzzy search. It can be a simple boolean, or a * number, or a function. * * If a boolean is given, fuzzy search with a default fuzziness parameter is * performed if true. * * If a number higher or equal to 1 is given, fuzzy search is performed, with * a maximum edit distance (Levenshtein) equal to the number. * * If a number between 0 and 1 is given, fuzzy search is performed within a * maximum edit distance corresponding to that fraction of the term length, * approximated to the nearest integer. For example, 0.2 would mean an edit * distance of 20% of the term length, so 1 character in a 5-characters term. * The calculated fuzziness value is limited by the `maxFuzzy` option, to * prevent slowdown for very long queries. * * If a function is passed, the function is called upon search with a search * term, a positional index of that term in the tokenized search query, and * the tokenized search query. It should return a boolean or a number, with * the meaning documented above. */ fuzzy?: boolean | number | ((term: string, index: number, terms: string[]) => boolean | number); /** * Controls the maximum fuzziness when using a fractional fuzzy value. * Very high edit distances usually don't produce meaningful results, * but can excessively impact search performance. * * @default 6 */ maxFuzzy?: number; /** * The operand to combine partial results for each term. By default it is * "OR", so results matching _any_ of the search terms are returned by a * search. If "AND" is given, only results matching _all_ the search terms are * returned by a search. */ combineWith?: CombinationOperator; /** * Function to tokenize the search query. By default, the same tokenizer used * for indexing is used also for search. */ tokenize?: (text: string) => string[]; /** * Function to process or normalize terms in the search query. By default, the * same term processor used for indexing is used also for search. */ processTerm?: (term: string) => string | string[] | null | undefined | false; /** * BM25+ algorithm parameters. Customizing these is almost never necessary, * and fine-tuning them requires an understanding of the BM25 scoring model. In * most cases, it is best to omit this option to use defaults, and instead use * boosting to tweak scoring for specific use cases. */ bm25?: BM25Params; } /** * Configuration options passed to the {@link SearchIndex} constructor * * @typeParam ID The type of id being indexed. * @typeParam Document The type of documents being indexed. * @typeParam Index The type of the documents being indexed. */ interface SearchIndexOptions<ID = any, Document = any, Index extends Record<string, any> = Record<never, never>> { /** * Names of the document fields to be indexed. */ fields: string[]; /** * Name of the ID field, uniquely identifying a document. */ idField?: string; /** * Names of fields to store, so that search results would include them. By * default none, so results would only contain the id field. */ storeFields?: string[]; /** * Function used to extract the value of each field in documents. By default, * the documents are assumed to be plain objects with field names as keys, * but by specifying a custom `extractField` function one can completely * customize how the fields are extracted. * * The function takes as arguments the document, and the name of the field to * extract from it. It should return the field value as a string. */ extractField?: (document: Document, fieldName: string) => string; /** * Function used to split a field value into individual terms to be indexed. * The default tokenizer separates terms by space or punctuation, but a * custom tokenizer can be provided for custom logic. * * The function takes as arguments string to tokenize, and the name of the * field it comes from. It should return the terms as an array of strings. * When used for tokenizing a search query instead of a document field, the * `fieldName` is undefined. */ tokenize?: (text: string, fieldName?: string) => string[]; /** * Function used to process a term before indexing or search. This can be * used for normalization (such as stemming). By default, terms are * downcased, and otherwise no other normalization is performed. * * The function takes as arguments a term to process, and the name of the * field it comes from. It should return the processed term as a string, or a * falsy value to reject the term entirely. * * It can also return an array of strings, in which case each string in the * returned array is indexed as a separate term. */ processTerm?: (term: string, fieldName?: string) => string | string[] | null | undefined | false; /** * Function called to log messages. Arguments are a log level ('debug', * 'info', 'warn', or 'error'), a log message, and an optional string code * that identifies the reason for the log. * * The default implementation uses `console`, if defined. */ logger?: (level: LogLevel, message: string, code?: string) => void; /** * If `true` (the default), vacuuming is performed automatically as soon as * {@link discard} is called a certain number of times, cleaning up * obsolete references from the index. If `false`, no automatic vacuuming is * performed. Custom settings controlling auto vacuuming thresholds, as well * as batching behavior, can be passed as an object (see the * {@link AutoVacuumOptions} type). */ autoVacuum?: boolean | AutoVacuumOptions; /** * Default search options (see the {@link SearchOptions} type and the * {@link search} method for details) */ searchOptions?: SearchOptions<ID, Index>; /** * Default auto suggest options (see the {@link SearchOptions} type and the * {@link autoSuggest} method for details) */ autoSuggestOptions?: SearchOptions<ID, Index>; } type LogLevel = "debug" | "info" | "warn" | "error"; /** * The type of auto-suggestions */ interface Suggestion { /** * The suggestion */ suggestion: string; /** * Suggestion as an array of terms */ terms: string[]; /** * Score for the suggestion */ score: number; } /** * Object format of search index when serialized * * @typeParam Index The type of the documents being indexed. */ interface IndexObject<Index extends Record<string, any> = Record<never, never>> { documentCount: number; nextId: number; documentIds: Record<string, any>; fieldIds: Record<string, number>; fieldLength: Record<string, number[]>; averageFieldLength: number[]; storedFields: Record<string, Index>; dirtCount?: number; index: [string, Record<string, SerializedIndexEntry>][]; version: number; } /** * @typeParam ID The type of id being indexed. * @typeParam Index The type of the documents being indexed. */ interface QueryCombination<ID = any, Index extends Record<string, any> = Record<never, never>> extends SearchOptions<ID, Index> { queries: Query[]; } /** * Wildcard query, used to match all terms */ type Wildcard = typeof WILDCARD; /** * Search query expression, either a query string or an expression tree * combining several queries with a combination of AND or OR. */ type Query = QueryCombination | string | Wildcard; /** * Options to control vacuuming behavior. * * Vacuuming cleans up document references made obsolete by * {@link discard} from the index. On large indexes, vacuuming is * potentially costly, because it has to traverse the whole inverted index. * Therefore, in order to dilute this cost so it does not negatively affects the * application, vacuuming is performed in batches, with a delay between each * batch. These options are used to configure the batch size and the delay * between batches. */ interface VacuumOptions { /** * Size of each vacuuming batch (the number of terms in the index that will be * traversed in each batch). * * @default 1000 */ batchSize?: number; /** * Wait time between each vacuuming batch in milliseconds. * * @default 10 */ batchWait?: number; } /** * Sets minimum thresholds for `dirtCount` and `dirtFactor` that trigger an * automatic vacuuming. */ interface VacuumConditions { /** * Minimum `dirtCount` (number of discarded documents since the last vacuuming) * under which auto vacuum is not triggered. * * @default 20 */ minDirtCount?: number; /** * Minimum `dirtFactor` (proportion of discarded documents over the total) * under which auto vacuum is not triggered. * * @default 0.1 */ minDirtFactor?: number; } /** * Options to control auto vacuum behavior. When discarding a document with * {@link discard}, a vacuuming operation is automatically started if the * `dirtCount` and `dirtFactor` are above the `minDirtCount` and `minDirtFactor` * thresholds defined by this configuration. See {@link VacuumConditions} for * details on these. * * Also, `batchSize` and `batchWait` can be specified, controlling batching * behavior (see {@link VacuumOptions}). */ type AutoVacuumOptions = VacuumOptions & VacuumConditions; interface SearchOptionsWithDefaults<ID = any, Index extends Record<string, any> = Record<string, never>> extends SearchOptions<ID, Index> { boost: Record<string, number>; weights: { fuzzy: number; prefix: number; }; prefix: boolean | ((term: string, index: number, terms: string[]) => boolean); fuzzy: boolean | number | ((term: string, index: number, terms: string[]) => boolean | number); maxFuzzy: number; combineWith: CombinationOperator; bm25: BM25Params; } type DocumentTermFrequencies = Map<number, number>; interface OptionsWithDefaults<ID = any, Document = any, Index extends Record<string, any> = Record<string, never>> extends Omit<SearchIndexOptions<ID, Document, Index>, "processTerm" | "tokenize"> { storeFields: string[]; idField: string; extractField: (document: Document, fieldName: string) => string; tokenize: (text: string, fieldName: string) => string[]; processTerm: (term: string, fieldName: string) => string | string[] | null | undefined | false; logger: (level: LogLevel, message: string, code?: string) => void; autoVacuum: false | AutoVacuumOptions; searchOptions: SearchOptionsWithDefaults<ID, Index>; autoSuggestOptions: SearchOptions<ID, Index>; } type FieldTermData = Map<number, DocumentTermFrequencies>; /** * A class to represent search index * * ### Basic example: * * ```js * const documents = [ * { * id: 1, * title: 'Moby Dick', * text: 'Call me Ishmael. Some years ago...', * category: 'fiction' * }, * { * id: 2, * title: 'Zen and the Art of Motorcycle Maintenance', * text: 'I can see by my watch...', * category: 'fiction' * }, * { * id: 3, * title: 'Neuromancer', * text: 'The sky above the port was...', * category: 'fiction' * }, * { * id: 4, * title: 'Zen and the Art of Archery', * text: 'At first sight it must seem...', * category: 'non-fiction' * }, * // ...and more * ] * * // Create a search engine that indexes the 'title' and 'text' fields for * // full-text search. Search results will include 'title' and 'category' (plus the * // id field, that is always stored and returned) * const searchIndex = createIndex({ * fields: ['title', 'text'], * storeFields: ['title', 'category'] * }) * * // Add documents to the index * addAll(searchIndex, documents) * * // Search for documents: * const results = search(searchIndex, 'zen art motorcycle') * // => [ * // { id: 2, title: 'Zen and the Art of Motorcycle Maintenance', category: 'fiction', score: 2.77258 }, * // { id: 4, title: 'Zen and the Art of Archery', category: 'non-fiction', score: 1.38629 } * // ] * ``` * * @typeParam ID The id type of the documents being indexed. * @typeParam Document The type of the documents being indexed. * @typeParam Index The type of the documents being indexed. * */ declare class SearchIndex<ID = any, Document = any, Index extends Record<string, any> = Record<never, never>> { /** * @ignore */ _options: OptionsWithDefaults<ID, Document, Index>; /** * @ignore */ _index: SearchableMap<FieldTermData>; /** * @ignore */ _documentCount: number; /** * @ignore */ _documentIds: Map<number, ID>; /** * @ignore */ _idToShortId: Map<ID, number>; /** * @ignore */ _fieldIds: Record<string, number>; /** * @ignore */ _fieldLength: Map<number, number[]>; /** * @ignore */ _avgFieldLength: number[]; /** * @ignore */ _nextId: number; /** * @ignore */ _storedFields: Map<number, Index>; /** * @ignore */ _dirtCount: number; /** * @ignore */ _currentVacuum: Promise<void> | null; /** * @ignore */ _enqueuedVacuum: Promise<void> | null; /** * @ignore */ _enqueuedVacuumConditions: VacuumConditions | undefined; /** * @param options The options for the search index */ constructor(options: SearchIndexOptions<ID, Document, Index>); /** * Is `true` if a vacuuming operation is ongoing, `false` otherwise */ get isVacuuming(): boolean; /** * The number of documents discarded since the most recent vacuuming */ get dirtCount(): number; /** * A number between 0 and 1 giving an indication about the proportion of * documents that are discarded, and can therefore be cleaned up by vacuuming. * A value close to 0 means that the index is relatively clean, while a higher * value means that the index is relatively dirty, and vacuuming could release * memory. */ get dirtFactor(): number; /** * Total number of documents available to search */ get documentCount(): number; /** * Number of terms in the index */ get termCount(): number; /** * Allows serialization of the index to JSON, to possibly store it and later * deserialize it with {@link loadJSONIndex} or {@link loadJSONIndexAsync}. * * Normally one does not directly call this method, but rather call the * standard JavaScript `JSON.stringify()` passing the {@link SearchIndex} instance, * and JavaScript will internally call this method. Upon deserialization, one * must pass to {@link loadJSONIndex} or {@link loadJSONIndexAsync} the same options used to create the original * instance that was serialized. * * ### Usage: * * ```js * // Serialize the index: * let searchIndex = createIndex({ fields: ['title', 'text'] }) * addAll(searchIndex, documents) * const json = JSON.stringify(index) * * // Later, to deserialize it: * searchIndex = loadJSONIndex(json, { fields: ['title', 'text'] }) * ``` * * @return A plain-object serializable representation of the search index. */ toJSON(): IndexObject<Index>; /** * @ignore */ private addFields; } /** * Adds a document to the index * * @typeParam ID The id type of the documents being indexed. * @typeParam Document The type of the documents being indexed. * @typeParam Index The type of the documents being indexed. * * @param searchIndex The search index * @param document The document to be indexed */ declare const add: <ID, Document, Index extends Record<string, any> = Record<never, never>>(searchIndex: SearchIndex<ID, Document, Index>, document: Document) => void; /** * Adds all the given documents to the index * * @typeParam ID The id type of the documents being indexed. * @typeParam Document The type of the documents being indexed. * @typeParam Index The type of the documents being indexed. * * @param searchIndex The search index * @param documents An array of documents to be indexed */ declare const addAll: <ID, Document, Index extends Record<string, any> = Record<never, never>>(searchIndex: SearchIndex<ID, Document, Index>, documents: readonly Document[]) => void; /** * Adds all the given documents to the index asynchronously. * * Returns a promise that resolves (to `undefined`) when the indexing is done. * This method is useful when index many documents, to avoid blocking the main * thread. The indexing is performed asynchronously and in chunks. * * @typeParam ID The id type of the documents being indexed. * @typeParam Document The type of the documents being indexed. * @typeParam Index The type of the documents being indexed. * * @param searchIndex The search index * @param documents An array of documents to be indexed * @param options Configuration options * @return A promise resolving when the indexing is done */ declare const addAllAsync: <ID, Document, Index extends Record<string, any> = Record<never, never>>(searchIndex: SearchIndex<ID, Document, Index>, documents: readonly Document[], options?: { chunkSize?: number; }) => Promise<void>; /** * Provide suggestions for the given search query * * The result is a list of suggested modified search queries, derived from the * given search query, each with a relevance score, sorted by descending score. * * By default, it uses the same options used for search, except that by * default it performs prefix search on the last term of the query, and * combine terms with `'AND'` (requiring all query terms to match). Custom * options can be passed as a second argument. Defaults can be changed by * passing an `autoSuggestOptions` option when initializing the index. * * ### Basic usage: * * ```js * // Get suggestions for 'neuro': * autoSuggest(searchIndex, 'neuro') * // => [ { suggestion: 'neuromancer', terms: [ 'neuromancer' ], score: 0.46240 } ] * ``` * * ### Multiple words: * * ```js * // Get suggestions for 'zen ar': * autoSuggest(searchIndex, 'zen ar') * // => [ * // { suggestion: 'zen archery art', terms: [ 'zen', 'archery', 'art' ], score: 1.73332 }, * // { suggestion: 'zen art', terms: [ 'zen', 'art' ], score: 1.21313 } * // ] * ``` * * ### Fuzzy suggestions: * * ```js * // Correct spelling mistakes using fuzzy search: * autoSuggest(searchIndex, 'neromancer', { fuzzy: 0.2 }) * // => [ { suggestion: 'neuromancer', terms: [ 'neuromancer' ], score: 1.03998 } ] * ``` * * ### Filtering: * * ```js * // Get suggestions for 'zen ar', but only within the 'fiction' category * // (assuming that 'category' is a stored field): * autoSuggest(searchIndex, 'zen ar', { * filter: (result) => result.category === 'fiction' * }) * // => [ * // { suggestion: 'zen archery art', terms: [ 'zen', 'archery', 'art' ], score: 1.73332 }, * // { suggestion: 'zen art', terms: [ 'zen', 'art' ], score: 1.21313 } * // ] * ``` * * @typeParam ID The id type of the documents being indexed. * @typeParam Document The type of the documents being indexed. * @typeParam Index The type of the documents being indexed. * * @param searchIndex The search index * @param queryString Query string to be expanded into suggestions * @param options Search options. The supported options and default values * are the same as for the `search` method, except that by default prefix * search is performed on the last term in the query, and terms are combined * with `'AND'`. * @return A sorted array of suggestions sorted by relevance score. */ declare const autoSuggest: <ID, Document, Index extends Record<string, any> = Record<never, never>>(searchIndex: SearchIndex<ID, Document, Index>, queryString: string, options?: SearchOptions<ID, Index>) => Suggestion[]; /** * Returns the default value of an option. It will throw an error if no option * with the given name exists. * * ### Usage: * * ```js * // Get default tokenizer * getDefaultValue('tokenize') * * // Get default term processor * getDefaultValue('processTerm') * * // Unknown options will throw an error * getDefaultValue('notExisting') * // => throws 'SlimSearch: unknown option "notExisting"' * ``` * * @typeParam ID The id type of the documents being indexed. * @typeParam Document The type of the documents being indexed. * @typeParam Index The type of the documents being indexed. * * @param optionName Name of the option * @return The default value of the given option */ declare const getDefaultValue: (optionName: string) => unknown; /** * Returns `true` if a document with the given ID is present in the index and * available for search, `false` otherwise * * @typeParam ID The id type of the documents being indexed. * @typeParam Document The type of the documents being indexed. * @typeParam Index The type of the documents being indexed. * * @param searchIndex The search index * @param id The document ID */ declare const has: <ID, Document, Index extends Record<string, any> = Record<never, never>>(searchIndex: SearchIndex<ID, Document, Index>, id: ID) => boolean; /** * Returns the stored fields (as configured in the `storeFields` constructor * option) for the given document ID. Returns `undefined` if the document is * not present in the index. * * @typeParam ID The id type of the documents being indexed. * @typeParam Document The type of the documents being indexed. * @typeParam Index The type of the documents being indexed. * * @param searchIndex The search index * @param id The document ID * @returns The stored document index */ declare const getStoredFields: <ID, Document, Index extends Record<string, any> = Record<never, never>>(searchIndex: SearchIndex<ID, Document, Index>, id: ID) => Index | undefined; /** * Create search index with given options * * @typeParam ID The id type of the documents being indexed. * @typeParam Document The type of the documents being indexed. * @typeParam Index The type of the documents being indexed. * * @param options Configuration options * @returns A instance of SearchIndex with given options * * ### Examples: * * ```js * // Create a search engine that indexes the 'title' and 'text' fields of your * // documents: * const searchIndex = createIndex({ fields: ['title', 'text'] }) * ``` * * ### ID Field: * * ```js * // Your documents are assumed to include a unique 'id' field, but if you want * // to use a different field for document identification, you can set the * // 'idField' option: * const searchIndex = createIndex({ idField: 'key', fields: ['title', 'text'] }) * ``` * * ### Options and defaults: * * ```js * // The full set of options (here with their default value) is: * const searchIndex = createIndex({ * // idField: field that uniquely identifies a document * idField: 'id', * * // extractField: function used to get the value of a field in a document. * // By default, it assumes the document is a flat object with field names as * // property keys and field values as string property values, but custom logic * // can be implemented by setting this option to a custom extractor function. * extractField: (document, fieldName) => document[fieldName], * * // tokenize: function used to split fields into individual terms. By * // default, it is also used to tokenize search queries, unless a specific * // `tokenize` search option is supplied. When tokenizing an indexed field, * // the field name is passed as the second argument. * tokenize: (string, _fieldName) => string.split(SPACE_OR_PUNCTUATION), * * // processTerm: function used to process each tokenized term before * // indexing. It can be used for stemming and normalization. Return a falsy * // value in order to discard a term. By default, it is also used to process * // search queries, unless a specific `processTerm` option is supplied as a * // search option. When processing a term from a indexed field, the field * // name is passed as the second argument. * processTerm: (term, _fieldName) => term.toLowerCase(), * * // searchOptions: default search options, see the `search` method for * // details * searchOptions: undefined, * * // fields: document fields to be indexed. Mandatory, but not set by default * fields: undefined * * // storeFields: document fields to be stored and returned as part of the * // search results. * storeFields: [] * }) * ``` */ declare const createIndex: <ID, Document, Index extends Record<string, any> = Record<never, never>>(options: SearchIndexOptions<ID, Document, Index>) => SearchIndex<ID, Document, Index>; /** * Instantiates a SearchIndex instance from a JS Object. * It should be given the same options originally used when serializing the index. * * ### Usage: * * ```js * // If the index was serialized with: * let index = createIndex({ fields: ['title', 'text'] }) * * addAll(index, documents) * * const json = index.toJSON() * // It can later be loaded like this: * index = loadJSON(json, { fields: ['title', 'text'] }) * ``` * * @typeParam ID The id type of the documents being indexed. * @typeParam Document The type of the documents being indexed. * @typeParam Index The type of the documents being indexed. * * @param indexObject index object * @param options configuration options, same as the constructor * @return An instance of SearchIndex deserialized from the given JS object. */ declare const loadIndex: <ID, Document, Index extends Record<string, any> = Record<never, never>>(indexObject: IndexObject<Index>, options: SearchIndexOptions<ID, Document, Index>) => SearchIndex<ID, Document, Index>; /** * Async equivalent of {@link loadIndex} * * This function is an alternative to {@link loadIndex} that returns * a promise, and loads the index in batches, leaving pauses between them to avoid * blocking the main thread. It tends to be slower than the synchronous * version, but does not block the main thread, so it can be a better choice * when deserializing very large indexes. * * @typeParam ID The id type of the documents being indexed. * @typeParam Document The type of the documents being indexed. * @typeParam Index The type of the documents being indexed. * * @param indexObject index object * @param options configuration options, same as the constructor * @return A Promise that will resolve to an instance of MiniSearch deserialized from the given JSON. */ declare const loadIndexAsync: <ID, Document, Index extends Record<string, any> = Record<never, never>>(indexObject: IndexObject<Index>, options: SearchIndexOptions<ID, Document, Index>) => Promise<SearchIndex<ID, Document, Index>>; /** * Deserializes a JSON index (serialized with `JSON.stringify(index)`) * and instantiates a SearchIndex instance. It should be given the same options * originally used when serializing the index. * * ### Usage: * * ```js * // If the index was serialized with: * let index = createIndex({ fields: ['title', 'text'] }) * * addAll(index, documents) * * const json = JSON.stringify(index) * // It can later be deserialized like this: * index = loadJSONIndex(json, { fields: ['title', 'text'] }) * ``` * * @typeParam ID The id type of the documents being indexed. * @typeParam Document The type of the documents being indexed. * @typeParam Index The type of the documents being indexed. * * @param json JSON-serialized index * @param options configuration options, same as the constructor * @return An instance of SearchIndex deserialized from the given JSON. */ declare const loadJSONIndex: <ID, Document, Index extends Record<string, any> = Record<never, never>>(json: string, options: SearchIndexOptions<ID, Document, Index>) => SearchIndex<ID, Document, Index>; /** * Async equivalent of {@link loadJSONIndex} * * This function is an alternative to {@link loadJSONIndex} that returns * a promise, and loads the index in batches, leaving pauses between them to avoid * blocking the main thread. It tends to be slower than the synchronous * version, but does not block the main thread, so it can be a better choice * when deserializing very large indexes. * * @typeParam ID The id type of the documents being indexed. * @typeParam Document The type of the documents being indexed. * @typeParam Index The type of the documents being indexed. * * @param json JSON-serialized index * @param options configuration options, same as the constructor * @return A Promise that will resolve to an instance of MiniSearch deserialized from the given JSON. */ declare const loadJSONIndexAsync: <ID, Document, Index extends Record<string, any> = Record<never, never>>(json: string, options: SearchIndexOptions<ID, Document, Index>) => Promise<SearchIndex<ID, Document, Index>>; /** * Search for documents matching the given search query. * * The result is a list of scored document IDs matching the query, sorted by * descending score, and each including data about which terms were matched and * in which fields. * * ### Basic usage: * * ```js * // Search for "zen art motorcycle" with default options: terms have to match * // exactly, and individual terms are joined with OR * search(searchIndex, 'zen art motorcycle') * // => [ { id: 2, score: 2.77258, match: { ... } }, { id: 4, score: 1.38629, match: { ... } } ] * ``` * * ### Restrict search to specific fields: * * ```js * // Search only in the 'title' field * search(searchIndex, 'zen', { fields: ['title'] }) * ``` * * ### Field boosting: * * ```js * // Boost a field * search(searchIndex, 'zen', { boost: { title: 2 } }) * ``` * * ### Prefix search: * * ```js * // Search for "moto" with prefix search (it will match documents * // containing terms that start with "moto" or "neuro") * search(searchIndex, 'moto neuro', { prefix: true }) * ``` * * ### Fuzzy search: * * ```js * // Search for "ismael" with fuzzy search (it will match documents containing * // terms similar to "ismael", with a maximum edit distance of 0.2 term.length * // (rounded to nearest integer) * search(searchIndex, 'ismael', { fuzzy: 0.2 }) * ``` * * ### Combining strategies: * * ```js * // Mix of exact match, prefix search, and fuzzy search * search(searchIndex, 'ismael mob', { * prefix: true, * fuzzy: 0.2 * }) * ``` * * ### Advanced prefix and fuzzy search: * * ```js * // Perform fuzzy and prefix search depending on the search term. Here * // performing prefix and fuzzy search only on terms longer than 3 characters * search(searchIndex, 'ismael mob', { * prefix: term => term.length > 3 * fuzzy: term => term.length > 3 ? 0.2 : null * }) * ``` * * ### Combine with AND: * * ```js * // Combine search terms with AND (to match only documents that contain both * // "motorcycle" and "art") * search(searchIndex, 'motorcycle art', { combineWith: 'AND' }) * ``` * * ### Combine with AND_NOT: * * There is also an AND_NOT combinator, that finds documents that match the * first term, but do not match any of the other terms. This combinator is * rarely useful with simple queries, and is meant to be used with advanced * query combinations (see later for more details). * * ### Filtering results: * * ```js * // Filter only results in the 'fiction' category (assuming that 'category' * // is a stored field) * search(searchIndex, 'motorcycle art', { * filter: (result) => result.category === 'fiction' * }) * ``` * * ### Wildcard query * * Searching for an empty string (assuming the default tokenizer) returns no * results. Sometimes though, one needs to match all documents, like in a * "wildcard" search. This is possible by passing the special value * `wildcard` as the query: * * ```javascript * // Return search results for all documents * search(index, WILDCARD) * ``` * * Note that search options such as `filter` and `boostDocument` are still * applied, influencing which results are returned, and their order: * * ```javascript * // Return search results for all documents in the 'fiction' category * search(index, WILDCARD, { * filter: (result) => result.category === 'fiction' * }) * ``` * * ### Advanced combination of queries: * * It is possible to combine different subqueries with OR, AND, and AND_NOT, * and even with different search options, by passing a query expression * tree object as the first argument, instead of a string. * * ```js * // Search for documents that contain "zen" and ("motorcycle" or "archery") * search(searchIndex, { * combineWith: 'AND', * queries: [ * 'zen', * { * combineWith: 'OR', * queries: ['motorcycle', 'archery'] * } * ] * }) * * // Search for documents that contain ("apple" or "pear") but not "juice" and * // not "tree" * search(searchIndex, { * combineWith: 'AND_NOT', * queries: [ * { * combineWith: 'OR', * queries: ['apple', 'pear'] * }, * 'juice', * 'tree' * ] * }) * ``` * * Each node in the expression tree can be either a string, or an object that * supports all `SearchOptions` fields, plus a `queries` array field for * subqueries. * * Note that, while this can become complicated to do by hand for complex or * deeply nested queries, it provides a formalized expression tree API for * external libraries that implement a parser for custom query languages. * * @typeParam ID The id type of the documents being indexed. * @typeParam Document The type of the documents being indexed. * @typeParam Index The type of the documents being indexed. * * @param searchIndex Search Index * @param query Search query * @param searchOptions Search options. Each option, if not given, defaults to the corresponding value of `searchOptions` given to the constructor, or to the library default. */ declare const search: <ID, Document, Index extends Record<string, any> = Partial<Document>>(searchIndex: SearchIndex<ID, Document, Index>, query: Query, searchOptions?: SearchOptions<ID, Index>) => SearchResult<ID, Index>[]; /** * Discards the document with the given ID, so it won't appear in sear