slimsearch
Version:
Tiny but powerful full-text search engine for browser and Node
1,334 lines (1,321 loc) • 59 kB
TypeScript
type LeafType = "" & {
readonly __tag: unique symbol;
};
interface RadixTree<T> extends Map<string, T | RadixTree<T>> {
get(key: LeafType): T | undefined;
get(key: string): RadixTree<T> | undefined;
set(key: LeafType, value: T): this;
set(key: string, value: RadixTree<T>): this;
}
type Entry<T> = [string, T];
type Path<T> = [RadixTree<T>, string][];
type FuzzyResult<T> = [T, number];
type FuzzyResults<T> = Map<string, FuzzyResult<T>>;
interface Iterators<T> {
ENTRIES: Entry<T>;
KEYS: string;
VALUES: T;
}
type Kind<T> = keyof Iterators<T>;
type Result<T, K extends keyof Iterators<T>> = Iterators<T>[K];
type IteratorPath<T> = {
node: RadixTree<T>;
keys: string[];
}[];
interface IterableSet<T> {
_tree: RadixTree<T>;
_prefix: string;
}
/**
* @private
*/
declare class TreeIterator<T, K extends Kind<T>> implements Iterator<Result<T, K>> {
set: IterableSet<T>;
_type: K;
_path: IteratorPath<T>;
constructor(set: IterableSet<T>, type: K);
next(): IteratorResult<Result<T, K>>;
dive(): IteratorResult<Result<T, K>>;
backtrack(): void;
key(): string;
value(): T;
result(): Result<T, K>;
[Symbol.iterator](): this;
}
/**
* A class implementing the same interface as a standard JavaScript
* [`Map`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map)
* with string keys, but adding support for efficiently searching entries with
* prefix or fuzzy search. This class is used internally by {@link SearchIndex} as
* the inverted index data structure. The implementation is a radix tree
* (compressed prefix tree).
*
* Since this class can be of general utility beyond _SlimSearch_, it is
* exported by the `slimsearch` package and can be imported (or required) as
* `slimsearch/SearchableMap`.
*
* @typeParam Value The type of the values stored in the map.
*/
declare class SearchableMap<Value = any> {
/**
* @ignore
*/
_tree: RadixTree<Value>;
/**
* @ignore
*/
_prefix: string;
private _size;
/**
* The constructor is normally called without arguments, creating an empty
* map. In order to create a {@link SearchableMap} from an iterable or from an
* object, check {@link SearchableMap.from} and {@link SearchableMap.fromObject}.
*
* The constructor arguments are for internal use, when creating derived
* mutable views of a map at a prefix.
*/
constructor(tree?: RadixTree<Value>, prefix?: string);
/**
* Creates and returns a mutable view of this {@link SearchableMap}, containing only
* entries that share the given prefix.
*
* ### Usage:
*
* ```js
* const map = new SearchableMap()
* map.set("unicorn", 1)
* map.set("universe", 2)
* map.set("university", 3)
* map.set("unique", 4)
* map.set("hello", 5)
*
* const uni = map.atPrefix("uni")
* uni.get("unique") // => 4
* uni.get("unicorn") // => 1
* uni.get("hello") // => undefined
*
* const univer = map.atPrefix("univer")
* univer.get("unique") // => undefined
* univer.get("universe") // => 2
* univer.get("university") // => 3
* ```
*
* @param prefix The prefix
* @return A {@link SearchableMap} representing a mutable view of the original Map at the given prefix
*/
atPrefix(prefix: string): SearchableMap<Value>;
/**
* @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/clear
*/
clear(): void;
/**
* @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/delete
* @param key Key to delete
*/
delete(key: string): void;
/**
* @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/entries
* @return An iterator iterating through `[key, value]` entries.
*/
entries(): TreeIterator<Value, "ENTRIES">;
/**
* @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/forEach
* @param fn Iteration function
*/
forEach(fn: (key: string, value: Value, map: SearchableMap) => void): void;
/**
* Returns a Map of all the entries that have a key within the given edit
* distance from the search key. The keys of the returned Map are the matching
* keys, while the values are two-element arrays where the first element is
* the value associated to the key, and the second is the edit distance of the
* key to the search key.
*
* ### Usage:
*
* ```js
* const map = new SearchableMap()
* map.set('hello', 'world')
* map.set('hell', 'yeah')
* map.set('ciao', 'mondo')
*
* // Get all entries that match the key 'hallo' with a maximum edit distance of 2
* map.fuzzyGet('hallo', 2)
* // => Map(2) { 'hello' => ['world', 1], 'hell' => ['yeah', 2] }
*
* // In the example, the "hello" key has value "world" and edit distance of 1
* // (change "e" to "a"), the key "hell" has value "yeah" and edit distance of 2
* // (change "e" to "a", delete "o")
* ```
*
* @param key The search key
* @param maxEditDistance The maximum edit distance (Levenshtein)
* @return A Map of the matching keys to their value and edit distance
*/
fuzzyGet(key: string, maxEditDistance: number): FuzzyResults<Value>;
/**
* @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/get
* @param key Key to get
* @return Value associated to the key, or `undefined` if the key is not
* found.
*/
get(key: string): Value | undefined;
/**
* @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/has
* @param key Key
* @return True if the key is in the map, false otherwise
*/
has(key: string): boolean;
/**
* @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/keys
* @return An `Iterable` iterating through keys
*/
keys(): TreeIterator<Value, "KEYS">;
/**
* @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/set
* @param key Key to set
* @param value Value to associate to the key
* @return The {@link SearchableMap} itself, to allow chaining
*/
set(key: string, value: Value): this;
/**
* @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/size
*/
get size(): number;
/**
* Updates the value at the given key using the provided function. The function
* is called with the current value at the key, and its return value is used as
* the new value to be set.
*
* ### Example:
*
* ```js
* // Increment the current value by one
* searchableMap.update('somekey', (currentValue) => currentValue == null ? 0 : currentValue + 1)
* ```
*
* If the value at the given key is or will be an object, it might not require
* re-assignment. In that case it is better to use `fetch()`, because it is
* faster.
*
* @param key The key to update
* @param fn The function used to compute the new value from the current one
* @return The {@link SearchableMap} itself, to allow chaining
*/
update(key: string, fn: (value: Value | undefined) => Value): this;
/**
* Fetches the value of the given key. If the value does not exist, calls the
* given function to create a new value, which is inserted at the given key
* and subsequently returned.
*
* ### Example:
*
* ```js
* const map = searchableMap.fetch('somekey', () => new Map())
* map.set('foo', 'bar')
* ```
*
* @param key The key to update
* @param initial A function that creates a new value if the key does not exist
* @return The existing or new value at the given key
*/
fetch(key: string, initial: () => Value): Value;
/**
* @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/values
* @return An `Iterable` iterating through values.
*/
values(): TreeIterator<Value, "VALUES">;
/**
* @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map/@@iterator
*/
[Symbol.iterator](): TreeIterator<Value, "ENTRIES">;
/**
* Creates a {@link SearchableMap} from an `Iterable` of entries
*
* @param entries Entries to be inserted in the {@link SearchableMap}
* @return A new {@link SearchableMap} with the given entries
*/
static from<T = any>(entries: Iterable<Entry<T>> | Entry<T>[]): SearchableMap<T>;
/**
* Creates a {@link SearchableMap} from the iterable properties of a JavaScript object
*
* @param object Object of entries for the {@link SearchableMap}
* @return A new {@link SearchableMap} with the given entries
*/
static fromObject<T = any>(object: Record<string, T>): SearchableMap<T>;
}
declare const WILDCARD: unique symbol;
type LowercaseCombinationOperator = "or" | "and" | "and_not";
type CombinationOperator = LowercaseCombinationOperator | Uppercase<LowercaseCombinationOperator> | Capitalize<LowercaseCombinationOperator>;
type SerializedIndexEntry = Record<string, number>;
/**
* Parameters of the BM25+ scoring algorithm. Customizing these is almost never
* necessary, and fine-tuning them requires an understanding of the BM25 scoring
* model.
*
* Some information about BM25 (and BM25+) can be found at these links:
*
* - https://en.wikipedia.org/wiki/Okapi_BM25
* - https://opensourceconnections.com/blog/2015/10/16/bm25-the-next-generation-of-lucene-relevation/
*/
interface BM25Params {
/** Term frequency saturation point.
*
* Recommended values are between `1.2` and `2`. Higher values increase the
* difference in score between documents with higher and lower term
* frequencies. Setting this to `0` or a negative value is invalid. Defaults
* to `1.2`
*/
k: number;
/**
* Length normalization impact.
*
* Recommended values are around `0.75`. Higher values increase the weight
* that field length has on scoring. Setting this to `0` (not recommended)
* means that the field length has no effect on scoring. Negative values are
* invalid. Defaults to `0.7`.
*/
b: number;
/**
* BM25+ frequency normalization lower bound (usually called δ).
*
* Recommended values are between `0.5` and `1`. Increasing this parameter
* increases the minimum relevance of one occurrence of a search term
* regardless of its (possibly very long) field length. Negative values are
* invalid. Defaults to `0.5`.
*/
d: number;
}
/**
* Match information for a search result. It is a key-value object where keys
* are terms that matched, and values are the list of fields that the term was
* found in.
*/
type MatchInfo = Record<string, string[]>;
/**
* Type of the search results. Each search result indicates the document ID, the
* terms that matched, the match information, the score, and all the stored
* fields.
*
* @typeParam ID The type of id being indexed.
* @typeParam Index The type of the documents being indexed.
*/
type SearchResult<ID = any, Index extends Record<string, any> = Record<never, never>> = Index & {
/**
* The document ID
*/
id: ID;
/**
* List of document terms that matched. For example, if a prefix search for
* `"moto"` matches `"motorcycle"`, `terms` will contain `"motorcycle"`.
*/
terms: string[];
/**
* List of query terms that matched. For example, if a prefix search for
* `"moto"` matches `"motorcycle"`, `queryTerms` will contain `"moto"`.
*/
queryTerms: string[];
/**
* Score of the search results
*/
score: number;
/**
* Match information, see {@link MatchInfo}
*/
match: MatchInfo;
};
/**
* Search options to customize the search behavior.
*
* @typeParam ID The type of id being indexed.
* @typeParam Index The type of the documents being indexed.
*/
interface SearchOptions<ID = any, Index extends Record<string, any> = Record<never, never>> {
/**
* Names of the fields to search in. If omitted, all fields are searched.
*/
fields?: string[];
/**
* Function used to filter search results, for example on the basis of stored
* fields. It takes as argument each search result and should return a boolean
* to indicate if the result should be kept or not.
*/
filter?: (result: SearchResult<ID, Index>) => boolean;
/**
* Key-value object of field names to boosting values. By default, fields are
* assigned a boosting factor of 1. If one assigns to a field a boosting value
* of 2, a result that matches the query in that field is assigned a score
* twice as high as a result matching the query in another field, all else
* being equal.
*/
boost?: Record<string, number>;
/**
* Function to calculate a boost factor for each term.
*
* This function, if provided, is called for each query term (as split by
* `tokenize` and processed by `processTerm`). The arguments passed to the
* function are the query term, the positional index of the term in the query,
* and the array of all query terms. It is expected to return a numeric boost
* factor for the term. A factor lower than 1 reduces the importance of the
* term, a factor greater than 1 increases it. A factor of exactly 1 is
* neutral, and does not affect the term's importance.
*/
boostTerm?: (term: string, i: number, terms: string[]) => number;
/**
* Relative weights to assign to prefix search results and fuzzy search
* results. Exact matches are assigned a weight of 1.
*/
weights?: {
fuzzy?: number;
prefix?: number;
};
/**
* Function to calculate a boost factor for documents. It takes as arguments
* the document ID, and a term that matches the search in that document, and
* the value of the stored fields for the document (if any). It should return
* a boosting factor: a number higher than 1 increases the computed score, a
* number lower than 1 decreases the score, and a falsy value skips the search
* result completely.
*/
boostDocument?: (documentId: ID, term: string, storedFields?: Index) => number;
/**
* Controls whether to perform prefix search. It can be a simple boolean, or a
* function.
*
* If a boolean is passed, prefix search is performed if true.
*
* If a function is passed, it is called upon search with a search term, the
* positional index of that search term in the tokenized search query, and the
* tokenized search query. The function should return a boolean to indicate
* whether to perform prefix search for that search term.
*/
prefix?: boolean | ((term: string, index: number, terms: string[]) => boolean);
/**
* Controls whether to perform fuzzy search. It can be a simple boolean, or a
* number, or a function.
*
* If a boolean is given, fuzzy search with a default fuzziness parameter is
* performed if true.
*
* If a number higher or equal to 1 is given, fuzzy search is performed, with
* a maximum edit distance (Levenshtein) equal to the number.
*
* If a number between 0 and 1 is given, fuzzy search is performed within a
* maximum edit distance corresponding to that fraction of the term length,
* approximated to the nearest integer. For example, 0.2 would mean an edit
* distance of 20% of the term length, so 1 character in a 5-characters term.
* The calculated fuzziness value is limited by the `maxFuzzy` option, to
* prevent slowdown for very long queries.
*
* If a function is passed, the function is called upon search with a search
* term, a positional index of that term in the tokenized search query, and
* the tokenized search query. It should return a boolean or a number, with
* the meaning documented above.
*/
fuzzy?: boolean | number | ((term: string, index: number, terms: string[]) => boolean | number);
/**
* Controls the maximum fuzziness when using a fractional fuzzy value.
* Very high edit distances usually don't produce meaningful results,
* but can excessively impact search performance.
*
* @default 6
*/
maxFuzzy?: number;
/**
* The operand to combine partial results for each term. By default it is
* "OR", so results matching _any_ of the search terms are returned by a
* search. If "AND" is given, only results matching _all_ the search terms are
* returned by a search.
*/
combineWith?: CombinationOperator;
/**
* Function to tokenize the search query. By default, the same tokenizer used
* for indexing is used also for search.
*/
tokenize?: (text: string) => string[];
/**
* Function to process or normalize terms in the search query. By default, the
* same term processor used for indexing is used also for search.
*/
processTerm?: (term: string) => string | string[] | null | undefined | false;
/**
* BM25+ algorithm parameters. Customizing these is almost never necessary,
* and fine-tuning them requires an understanding of the BM25 scoring model. In
* most cases, it is best to omit this option to use defaults, and instead use
* boosting to tweak scoring for specific use cases.
*/
bm25?: BM25Params;
}
/**
* Configuration options passed to the {@link SearchIndex} constructor
*
* @typeParam ID The type of id being indexed.
* @typeParam Document The type of documents being indexed.
* @typeParam Index The type of the documents being indexed.
*/
interface SearchIndexOptions<ID = any, Document = any, Index extends Record<string, any> = Record<never, never>> {
/**
* Names of the document fields to be indexed.
*/
fields: string[];
/**
* Name of the ID field, uniquely identifying a document.
*/
idField?: string;
/**
* Names of fields to store, so that search results would include them. By
* default none, so results would only contain the id field.
*/
storeFields?: string[];
/**
* Function used to extract the value of each field in documents. By default,
* the documents are assumed to be plain objects with field names as keys,
* but by specifying a custom `extractField` function one can completely
* customize how the fields are extracted.
*
* The function takes as arguments the document, and the name of the field to
* extract from it. It should return the field value as a string.
*/
extractField?: (document: Document, fieldName: string) => string;
/**
* Function used to split a field value into individual terms to be indexed.
* The default tokenizer separates terms by space or punctuation, but a
* custom tokenizer can be provided for custom logic.
*
* The function takes as arguments string to tokenize, and the name of the
* field it comes from. It should return the terms as an array of strings.
* When used for tokenizing a search query instead of a document field, the
* `fieldName` is undefined.
*/
tokenize?: (text: string, fieldName?: string) => string[];
/**
* Function used to process a term before indexing or search. This can be
* used for normalization (such as stemming). By default, terms are
* downcased, and otherwise no other normalization is performed.
*
* The function takes as arguments a term to process, and the name of the
* field it comes from. It should return the processed term as a string, or a
* falsy value to reject the term entirely.
*
* It can also return an array of strings, in which case each string in the
* returned array is indexed as a separate term.
*/
processTerm?: (term: string, fieldName?: string) => string | string[] | null | undefined | false;
/**
* Function called to log messages. Arguments are a log level ('debug',
* 'info', 'warn', or 'error'), a log message, and an optional string code
* that identifies the reason for the log.
*
* The default implementation uses `console`, if defined.
*/
logger?: (level: LogLevel, message: string, code?: string) => void;
/**
* If `true` (the default), vacuuming is performed automatically as soon as
* {@link discard} is called a certain number of times, cleaning up
* obsolete references from the index. If `false`, no automatic vacuuming is
* performed. Custom settings controlling auto vacuuming thresholds, as well
* as batching behavior, can be passed as an object (see the
* {@link AutoVacuumOptions} type).
*/
autoVacuum?: boolean | AutoVacuumOptions;
/**
* Default search options (see the {@link SearchOptions} type and the
* {@link search} method for details)
*/
searchOptions?: SearchOptions<ID, Index>;
/**
* Default auto suggest options (see the {@link SearchOptions} type and the
* {@link autoSuggest} method for details)
*/
autoSuggestOptions?: SearchOptions<ID, Index>;
}
type LogLevel = "debug" | "info" | "warn" | "error";
/**
* The type of auto-suggestions
*/
interface Suggestion {
/**
* The suggestion
*/
suggestion: string;
/**
* Suggestion as an array of terms
*/
terms: string[];
/**
* Score for the suggestion
*/
score: number;
}
/**
* Object format of search index when serialized
*
* @typeParam Index The type of the documents being indexed.
*/
interface IndexObject<Index extends Record<string, any> = Record<never, never>> {
documentCount: number;
nextId: number;
documentIds: Record<string, any>;
fieldIds: Record<string, number>;
fieldLength: Record<string, number[]>;
averageFieldLength: number[];
storedFields: Record<string, Index>;
dirtCount?: number;
index: [string, Record<string, SerializedIndexEntry>][];
version: number;
}
/**
* @typeParam ID The type of id being indexed.
* @typeParam Index The type of the documents being indexed.
*/
interface QueryCombination<ID = any, Index extends Record<string, any> = Record<never, never>> extends SearchOptions<ID, Index> {
queries: Query[];
}
/**
* Wildcard query, used to match all terms
*/
type Wildcard = typeof WILDCARD;
/**
* Search query expression, either a query string or an expression tree
* combining several queries with a combination of AND or OR.
*/
type Query = QueryCombination | string | Wildcard;
/**
* Options to control vacuuming behavior.
*
* Vacuuming cleans up document references made obsolete by
* {@link discard} from the index. On large indexes, vacuuming is
* potentially costly, because it has to traverse the whole inverted index.
* Therefore, in order to dilute this cost so it does not negatively affects the
* application, vacuuming is performed in batches, with a delay between each
* batch. These options are used to configure the batch size and the delay
* between batches.
*/
interface VacuumOptions {
/**
* Size of each vacuuming batch (the number of terms in the index that will be
* traversed in each batch).
*
* @default 1000
*/
batchSize?: number;
/**
* Wait time between each vacuuming batch in milliseconds.
*
* @default 10
*/
batchWait?: number;
}
/**
* Sets minimum thresholds for `dirtCount` and `dirtFactor` that trigger an
* automatic vacuuming.
*/
interface VacuumConditions {
/**
* Minimum `dirtCount` (number of discarded documents since the last vacuuming)
* under which auto vacuum is not triggered.
*
* @default 20
*/
minDirtCount?: number;
/**
* Minimum `dirtFactor` (proportion of discarded documents over the total)
* under which auto vacuum is not triggered.
*
* @default 0.1
*/
minDirtFactor?: number;
}
/**
* Options to control auto vacuum behavior. When discarding a document with
* {@link discard}, a vacuuming operation is automatically started if the
* `dirtCount` and `dirtFactor` are above the `minDirtCount` and `minDirtFactor`
* thresholds defined by this configuration. See {@link VacuumConditions} for
* details on these.
*
* Also, `batchSize` and `batchWait` can be specified, controlling batching
* behavior (see {@link VacuumOptions}).
*/
type AutoVacuumOptions = VacuumOptions & VacuumConditions;
interface SearchOptionsWithDefaults<ID = any, Index extends Record<string, any> = Record<string, never>> extends SearchOptions<ID, Index> {
boost: Record<string, number>;
weights: {
fuzzy: number;
prefix: number;
};
prefix: boolean | ((term: string, index: number, terms: string[]) => boolean);
fuzzy: boolean | number | ((term: string, index: number, terms: string[]) => boolean | number);
maxFuzzy: number;
combineWith: CombinationOperator;
bm25: BM25Params;
}
type DocumentTermFrequencies = Map<number, number>;
interface OptionsWithDefaults<ID = any, Document = any, Index extends Record<string, any> = Record<string, never>> extends Omit<SearchIndexOptions<ID, Document, Index>, "processTerm" | "tokenize"> {
storeFields: string[];
idField: string;
extractField: (document: Document, fieldName: string) => string;
tokenize: (text: string, fieldName: string) => string[];
processTerm: (term: string, fieldName: string) => string | string[] | null | undefined | false;
logger: (level: LogLevel, message: string, code?: string) => void;
autoVacuum: false | AutoVacuumOptions;
searchOptions: SearchOptionsWithDefaults<ID, Index>;
autoSuggestOptions: SearchOptions<ID, Index>;
}
type FieldTermData = Map<number, DocumentTermFrequencies>;
/**
* A class to represent search index
*
* ### Basic example:
*
* ```js
* const documents = [
* {
* id: 1,
* title: 'Moby Dick',
* text: 'Call me Ishmael. Some years ago...',
* category: 'fiction'
* },
* {
* id: 2,
* title: 'Zen and the Art of Motorcycle Maintenance',
* text: 'I can see by my watch...',
* category: 'fiction'
* },
* {
* id: 3,
* title: 'Neuromancer',
* text: 'The sky above the port was...',
* category: 'fiction'
* },
* {
* id: 4,
* title: 'Zen and the Art of Archery',
* text: 'At first sight it must seem...',
* category: 'non-fiction'
* },
* // ...and more
* ]
*
* // Create a search engine that indexes the 'title' and 'text' fields for
* // full-text search. Search results will include 'title' and 'category' (plus the
* // id field, that is always stored and returned)
* const searchIndex = createIndex({
* fields: ['title', 'text'],
* storeFields: ['title', 'category']
* })
*
* // Add documents to the index
* addAll(searchIndex, documents)
*
* // Search for documents:
* const results = search(searchIndex, 'zen art motorcycle')
* // => [
* // { id: 2, title: 'Zen and the Art of Motorcycle Maintenance', category: 'fiction', score: 2.77258 },
* // { id: 4, title: 'Zen and the Art of Archery', category: 'non-fiction', score: 1.38629 }
* // ]
* ```
*
* @typeParam ID The id type of the documents being indexed.
* @typeParam Document The type of the documents being indexed.
* @typeParam Index The type of the documents being indexed.
*
*/
declare class SearchIndex<ID = any, Document = any, Index extends Record<string, any> = Record<never, never>> {
/**
* @ignore
*/
_options: OptionsWithDefaults<ID, Document, Index>;
/**
* @ignore
*/
_index: SearchableMap<FieldTermData>;
/**
* @ignore
*/
_documentCount: number;
/**
* @ignore
*/
_documentIds: Map<number, ID>;
/**
* @ignore
*/
_idToShortId: Map<ID, number>;
/**
* @ignore
*/
_fieldIds: Record<string, number>;
/**
* @ignore
*/
_fieldLength: Map<number, number[]>;
/**
* @ignore
*/
_avgFieldLength: number[];
/**
* @ignore
*/
_nextId: number;
/**
* @ignore
*/
_storedFields: Map<number, Index>;
/**
* @ignore
*/
_dirtCount: number;
/**
* @ignore
*/
_currentVacuum: Promise<void> | null;
/**
* @ignore
*/
_enqueuedVacuum: Promise<void> | null;
/**
* @ignore
*/
_enqueuedVacuumConditions: VacuumConditions | undefined;
/**
* @param options The options for the search index
*/
constructor(options: SearchIndexOptions<ID, Document, Index>);
/**
* Is `true` if a vacuuming operation is ongoing, `false` otherwise
*/
get isVacuuming(): boolean;
/**
* The number of documents discarded since the most recent vacuuming
*/
get dirtCount(): number;
/**
* A number between 0 and 1 giving an indication about the proportion of
* documents that are discarded, and can therefore be cleaned up by vacuuming.
* A value close to 0 means that the index is relatively clean, while a higher
* value means that the index is relatively dirty, and vacuuming could release
* memory.
*/
get dirtFactor(): number;
/**
* Total number of documents available to search
*/
get documentCount(): number;
/**
* Number of terms in the index
*/
get termCount(): number;
/**
* Allows serialization of the index to JSON, to possibly store it and later
* deserialize it with {@link loadJSONIndex} or {@link loadJSONIndexAsync}.
*
* Normally one does not directly call this method, but rather call the
* standard JavaScript `JSON.stringify()` passing the {@link SearchIndex} instance,
* and JavaScript will internally call this method. Upon deserialization, one
* must pass to {@link loadJSONIndex} or {@link loadJSONIndexAsync} the same options used to create the original
* instance that was serialized.
*
* ### Usage:
*
* ```js
* // Serialize the index:
* let searchIndex = createIndex({ fields: ['title', 'text'] })
* addAll(searchIndex, documents)
* const json = JSON.stringify(index)
*
* // Later, to deserialize it:
* searchIndex = loadJSONIndex(json, { fields: ['title', 'text'] })
* ```
*
* @return A plain-object serializable representation of the search index.
*/
toJSON(): IndexObject<Index>;
/**
* @ignore
*/
private addFields;
}
/**
* Adds a document to the index
*
* @typeParam ID The id type of the documents being indexed.
* @typeParam Document The type of the documents being indexed.
* @typeParam Index The type of the documents being indexed.
*
* @param searchIndex The search index
* @param document The document to be indexed
*/
declare const add: <ID, Document, Index extends Record<string, any> = Record<never, never>>(searchIndex: SearchIndex<ID, Document, Index>, document: Document) => void;
/**
* Adds all the given documents to the index
*
* @typeParam ID The id type of the documents being indexed.
* @typeParam Document The type of the documents being indexed.
* @typeParam Index The type of the documents being indexed.
*
* @param searchIndex The search index
* @param documents An array of documents to be indexed
*/
declare const addAll: <ID, Document, Index extends Record<string, any> = Record<never, never>>(searchIndex: SearchIndex<ID, Document, Index>, documents: readonly Document[]) => void;
/**
* Adds all the given documents to the index asynchronously.
*
* Returns a promise that resolves (to `undefined`) when the indexing is done.
* This method is useful when index many documents, to avoid blocking the main
* thread. The indexing is performed asynchronously and in chunks.
*
* @typeParam ID The id type of the documents being indexed.
* @typeParam Document The type of the documents being indexed.
* @typeParam Index The type of the documents being indexed.
*
* @param searchIndex The search index
* @param documents An array of documents to be indexed
* @param options Configuration options
* @return A promise resolving when the indexing is done
*/
declare const addAllAsync: <ID, Document, Index extends Record<string, any> = Record<never, never>>(searchIndex: SearchIndex<ID, Document, Index>, documents: readonly Document[], options?: {
chunkSize?: number;
}) => Promise<void>;
/**
* Provide suggestions for the given search query
*
* The result is a list of suggested modified search queries, derived from the
* given search query, each with a relevance score, sorted by descending score.
*
* By default, it uses the same options used for search, except that by
* default it performs prefix search on the last term of the query, and
* combine terms with `'AND'` (requiring all query terms to match). Custom
* options can be passed as a second argument. Defaults can be changed by
* passing an `autoSuggestOptions` option when initializing the index.
*
* ### Basic usage:
*
* ```js
* // Get suggestions for 'neuro':
* autoSuggest(searchIndex, 'neuro')
* // => [ { suggestion: 'neuromancer', terms: [ 'neuromancer' ], score: 0.46240 } ]
* ```
*
* ### Multiple words:
*
* ```js
* // Get suggestions for 'zen ar':
* autoSuggest(searchIndex, 'zen ar')
* // => [
* // { suggestion: 'zen archery art', terms: [ 'zen', 'archery', 'art' ], score: 1.73332 },
* // { suggestion: 'zen art', terms: [ 'zen', 'art' ], score: 1.21313 }
* // ]
* ```
*
* ### Fuzzy suggestions:
*
* ```js
* // Correct spelling mistakes using fuzzy search:
* autoSuggest(searchIndex, 'neromancer', { fuzzy: 0.2 })
* // => [ { suggestion: 'neuromancer', terms: [ 'neuromancer' ], score: 1.03998 } ]
* ```
*
* ### Filtering:
*
* ```js
* // Get suggestions for 'zen ar', but only within the 'fiction' category
* // (assuming that 'category' is a stored field):
* autoSuggest(searchIndex, 'zen ar', {
* filter: (result) => result.category === 'fiction'
* })
* // => [
* // { suggestion: 'zen archery art', terms: [ 'zen', 'archery', 'art' ], score: 1.73332 },
* // { suggestion: 'zen art', terms: [ 'zen', 'art' ], score: 1.21313 }
* // ]
* ```
*
* @typeParam ID The id type of the documents being indexed.
* @typeParam Document The type of the documents being indexed.
* @typeParam Index The type of the documents being indexed.
*
* @param searchIndex The search index
* @param queryString Query string to be expanded into suggestions
* @param options Search options. The supported options and default values
* are the same as for the `search` method, except that by default prefix
* search is performed on the last term in the query, and terms are combined
* with `'AND'`.
* @return A sorted array of suggestions sorted by relevance score.
*/
declare const autoSuggest: <ID, Document, Index extends Record<string, any> = Record<never, never>>(searchIndex: SearchIndex<ID, Document, Index>, queryString: string, options?: SearchOptions<ID, Index>) => Suggestion[];
/**
* Returns the default value of an option. It will throw an error if no option
* with the given name exists.
*
* ### Usage:
*
* ```js
* // Get default tokenizer
* getDefaultValue('tokenize')
*
* // Get default term processor
* getDefaultValue('processTerm')
*
* // Unknown options will throw an error
* getDefaultValue('notExisting')
* // => throws 'SlimSearch: unknown option "notExisting"'
* ```
*
* @typeParam ID The id type of the documents being indexed.
* @typeParam Document The type of the documents being indexed.
* @typeParam Index The type of the documents being indexed.
*
* @param optionName Name of the option
* @return The default value of the given option
*/
declare const getDefaultValue: (optionName: string) => unknown;
/**
* Returns `true` if a document with the given ID is present in the index and
* available for search, `false` otherwise
*
* @typeParam ID The id type of the documents being indexed.
* @typeParam Document The type of the documents being indexed.
* @typeParam Index The type of the documents being indexed.
*
* @param searchIndex The search index
* @param id The document ID
*/
declare const has: <ID, Document, Index extends Record<string, any> = Record<never, never>>(searchIndex: SearchIndex<ID, Document, Index>, id: ID) => boolean;
/**
* Returns the stored fields (as configured in the `storeFields` constructor
* option) for the given document ID. Returns `undefined` if the document is
* not present in the index.
*
* @typeParam ID The id type of the documents being indexed.
* @typeParam Document The type of the documents being indexed.
* @typeParam Index The type of the documents being indexed.
*
* @param searchIndex The search index
* @param id The document ID
* @returns The stored document index
*/
declare const getStoredFields: <ID, Document, Index extends Record<string, any> = Record<never, never>>(searchIndex: SearchIndex<ID, Document, Index>, id: ID) => Index | undefined;
/**
* Create search index with given options
*
* @typeParam ID The id type of the documents being indexed.
* @typeParam Document The type of the documents being indexed.
* @typeParam Index The type of the documents being indexed.
*
* @param options Configuration options
* @returns A instance of SearchIndex with given options
*
* ### Examples:
*
* ```js
* // Create a search engine that indexes the 'title' and 'text' fields of your
* // documents:
* const searchIndex = createIndex({ fields: ['title', 'text'] })
* ```
*
* ### ID Field:
*
* ```js
* // Your documents are assumed to include a unique 'id' field, but if you want
* // to use a different field for document identification, you can set the
* // 'idField' option:
* const searchIndex = createIndex({ idField: 'key', fields: ['title', 'text'] })
* ```
*
* ### Options and defaults:
*
* ```js
* // The full set of options (here with their default value) is:
* const searchIndex = createIndex({
* // idField: field that uniquely identifies a document
* idField: 'id',
*
* // extractField: function used to get the value of a field in a document.
* // By default, it assumes the document is a flat object with field names as
* // property keys and field values as string property values, but custom logic
* // can be implemented by setting this option to a custom extractor function.
* extractField: (document, fieldName) => document[fieldName],
*
* // tokenize: function used to split fields into individual terms. By
* // default, it is also used to tokenize search queries, unless a specific
* // `tokenize` search option is supplied. When tokenizing an indexed field,
* // the field name is passed as the second argument.
* tokenize: (string, _fieldName) => string.split(SPACE_OR_PUNCTUATION),
*
* // processTerm: function used to process each tokenized term before
* // indexing. It can be used for stemming and normalization. Return a falsy
* // value in order to discard a term. By default, it is also used to process
* // search queries, unless a specific `processTerm` option is supplied as a
* // search option. When processing a term from a indexed field, the field
* // name is passed as the second argument.
* processTerm: (term, _fieldName) => term.toLowerCase(),
*
* // searchOptions: default search options, see the `search` method for
* // details
* searchOptions: undefined,
*
* // fields: document fields to be indexed. Mandatory, but not set by default
* fields: undefined
*
* // storeFields: document fields to be stored and returned as part of the
* // search results.
* storeFields: []
* })
* ```
*/
declare const createIndex: <ID, Document, Index extends Record<string, any> = Record<never, never>>(options: SearchIndexOptions<ID, Document, Index>) => SearchIndex<ID, Document, Index>;
/**
* Instantiates a SearchIndex instance from a JS Object.
* It should be given the same options originally used when serializing the index.
*
* ### Usage:
*
* ```js
* // If the index was serialized with:
* let index = createIndex({ fields: ['title', 'text'] })
*
* addAll(index, documents)
*
* const json = index.toJSON()
* // It can later be loaded like this:
* index = loadJSON(json, { fields: ['title', 'text'] })
* ```
*
* @typeParam ID The id type of the documents being indexed.
* @typeParam Document The type of the documents being indexed.
* @typeParam Index The type of the documents being indexed.
*
* @param indexObject index object
* @param options configuration options, same as the constructor
* @return An instance of SearchIndex deserialized from the given JS object.
*/
declare const loadIndex: <ID, Document, Index extends Record<string, any> = Record<never, never>>(indexObject: IndexObject<Index>, options: SearchIndexOptions<ID, Document, Index>) => SearchIndex<ID, Document, Index>;
/**
* Async equivalent of {@link loadIndex}
*
* This function is an alternative to {@link loadIndex} that returns
* a promise, and loads the index in batches, leaving pauses between them to avoid
* blocking the main thread. It tends to be slower than the synchronous
* version, but does not block the main thread, so it can be a better choice
* when deserializing very large indexes.
*
* @typeParam ID The id type of the documents being indexed.
* @typeParam Document The type of the documents being indexed.
* @typeParam Index The type of the documents being indexed.
*
* @param indexObject index object
* @param options configuration options, same as the constructor
* @return A Promise that will resolve to an instance of MiniSearch deserialized from the given JSON.
*/
declare const loadIndexAsync: <ID, Document, Index extends Record<string, any> = Record<never, never>>(indexObject: IndexObject<Index>, options: SearchIndexOptions<ID, Document, Index>) => Promise<SearchIndex<ID, Document, Index>>;
/**
* Deserializes a JSON index (serialized with `JSON.stringify(index)`)
* and instantiates a SearchIndex instance. It should be given the same options
* originally used when serializing the index.
*
* ### Usage:
*
* ```js
* // If the index was serialized with:
* let index = createIndex({ fields: ['title', 'text'] })
*
* addAll(index, documents)
*
* const json = JSON.stringify(index)
* // It can later be deserialized like this:
* index = loadJSONIndex(json, { fields: ['title', 'text'] })
* ```
*
* @typeParam ID The id type of the documents being indexed.
* @typeParam Document The type of the documents being indexed.
* @typeParam Index The type of the documents being indexed.
*
* @param json JSON-serialized index
* @param options configuration options, same as the constructor
* @return An instance of SearchIndex deserialized from the given JSON.
*/
declare const loadJSONIndex: <ID, Document, Index extends Record<string, any> = Record<never, never>>(json: string, options: SearchIndexOptions<ID, Document, Index>) => SearchIndex<ID, Document, Index>;
/**
* Async equivalent of {@link loadJSONIndex}
*
* This function is an alternative to {@link loadJSONIndex} that returns
* a promise, and loads the index in batches, leaving pauses between them to avoid
* blocking the main thread. It tends to be slower than the synchronous
* version, but does not block the main thread, so it can be a better choice
* when deserializing very large indexes.
*
* @typeParam ID The id type of the documents being indexed.
* @typeParam Document The type of the documents being indexed.
* @typeParam Index The type of the documents being indexed.
*
* @param json JSON-serialized index
* @param options configuration options, same as the constructor
* @return A Promise that will resolve to an instance of MiniSearch deserialized from the given JSON.
*/
declare const loadJSONIndexAsync: <ID, Document, Index extends Record<string, any> = Record<never, never>>(json: string, options: SearchIndexOptions<ID, Document, Index>) => Promise<SearchIndex<ID, Document, Index>>;
/**
* Search for documents matching the given search query.
*
* The result is a list of scored document IDs matching the query, sorted by
* descending score, and each including data about which terms were matched and
* in which fields.
*
* ### Basic usage:
*
* ```js
* // Search for "zen art motorcycle" with default options: terms have to match
* // exactly, and individual terms are joined with OR
* search(searchIndex, 'zen art motorcycle')
* // => [ { id: 2, score: 2.77258, match: { ... } }, { id: 4, score: 1.38629, match: { ... } } ]
* ```
*
* ### Restrict search to specific fields:
*
* ```js
* // Search only in the 'title' field
* search(searchIndex, 'zen', { fields: ['title'] })
* ```
*
* ### Field boosting:
*
* ```js
* // Boost a field
* search(searchIndex, 'zen', { boost: { title: 2 } })
* ```
*
* ### Prefix search:
*
* ```js
* // Search for "moto" with prefix search (it will match documents
* // containing terms that start with "moto" or "neuro")
* search(searchIndex, 'moto neuro', { prefix: true })
* ```
*
* ### Fuzzy search:
*
* ```js
* // Search for "ismael" with fuzzy search (it will match documents containing
* // terms similar to "ismael", with a maximum edit distance of 0.2 term.length
* // (rounded to nearest integer)
* search(searchIndex, 'ismael', { fuzzy: 0.2 })
* ```
*
* ### Combining strategies:
*
* ```js
* // Mix of exact match, prefix search, and fuzzy search
* search(searchIndex, 'ismael mob', {
* prefix: true,
* fuzzy: 0.2
* })
* ```
*
* ### Advanced prefix and fuzzy search:
*
* ```js
* // Perform fuzzy and prefix search depending on the search term. Here
* // performing prefix and fuzzy search only on terms longer than 3 characters
* search(searchIndex, 'ismael mob', {
* prefix: term => term.length > 3
* fuzzy: term => term.length > 3 ? 0.2 : null
* })
* ```
*
* ### Combine with AND:
*
* ```js
* // Combine search terms with AND (to match only documents that contain both
* // "motorcycle" and "art")
* search(searchIndex, 'motorcycle art', { combineWith: 'AND' })
* ```
*
* ### Combine with AND_NOT:
*
* There is also an AND_NOT combinator, that finds documents that match the
* first term, but do not match any of the other terms. This combinator is
* rarely useful with simple queries, and is meant to be used with advanced
* query combinations (see later for more details).
*
* ### Filtering results:
*
* ```js
* // Filter only results in the 'fiction' category (assuming that 'category'
* // is a stored field)
* search(searchIndex, 'motorcycle art', {
* filter: (result) => result.category === 'fiction'
* })
* ```
*
* ### Wildcard query
*
* Searching for an empty string (assuming the default tokenizer) returns no
* results. Sometimes though, one needs to match all documents, like in a
* "wildcard" search. This is possible by passing the special value
* `wildcard` as the query:
*
* ```javascript
* // Return search results for all documents
* search(index, WILDCARD)
* ```
*
* Note that search options such as `filter` and `boostDocument` are still
* applied, influencing which results are returned, and their order:
*
* ```javascript
* // Return search results for all documents in the 'fiction' category
* search(index, WILDCARD, {
* filter: (result) => result.category === 'fiction'
* })
* ```
*
* ### Advanced combination of queries:
*
* It is possible to combine different subqueries with OR, AND, and AND_NOT,
* and even with different search options, by passing a query expression
* tree object as the first argument, instead of a string.
*
* ```js
* // Search for documents that contain "zen" and ("motorcycle" or "archery")
* search(searchIndex, {
* combineWith: 'AND',
* queries: [
* 'zen',
* {
* combineWith: 'OR',
* queries: ['motorcycle', 'archery']
* }
* ]
* })
*
* // Search for documents that contain ("apple" or "pear") but not "juice" and
* // not "tree"
* search(searchIndex, {
* combineWith: 'AND_NOT',
* queries: [
* {
* combineWith: 'OR',
* queries: ['apple', 'pear']
* },
* 'juice',
* 'tree'
* ]
* })
* ```
*
* Each node in the expression tree can be either a string, or an object that
* supports all `SearchOptions` fields, plus a `queries` array field for
* subqueries.
*
* Note that, while this can become complicated to do by hand for complex or
* deeply nested queries, it provides a formalized expression tree API for
* external libraries that implement a parser for custom query languages.
*
* @typeParam ID The id type of the documents being indexed.
* @typeParam Document The type of the documents being indexed.
* @typeParam Index The type of the documents being indexed.
*
* @param searchIndex Search Index
* @param query Search query
* @param searchOptions Search options. Each option, if not given, defaults to the corresponding value of `searchOptions` given to the constructor, or to the library default.
*/
declare const search: <ID, Document, Index extends Record<string, any> = Partial<Document>>(searchIndex: SearchIndex<ID, Document, Index>, query: Query, searchOptions?: SearchOptions<ID, Index>) => SearchResult<ID, Index>[];
/**
* Discards the document with the given ID, so it won't appear in sear