UNPKG

@jmaitrehenry/elastic-builder

Version:

A JavaScript implementation of the elasticsearch Query DSL

elastic-builder.js.org

jmaitrehenry/elastic-builder

281 lines (260 loc) • 11.5 kB

JavaScript

'use strict'; const isNil = require('lodash.isnil'); const { consts: { SMOOTHING_MODEL_SET }, util: { recursiveToJSON, invalidParam } } = require('../core'); const AnalyzedSuggesterBase = require('./analyzed-suggester-base'); const ES_REF_URL = 'https://www.elastic.co/guide/en/elasticsearch/reference/current/search-suggesters-phrase.html'; const invalidSmoothingModeParam = invalidParam( ES_REF_URL, 'smoothing', SMOOTHING_MODEL_SET ); /** * The phrase suggester adds additional logic on top of the `term` suggester * to select entire corrected phrases instead of individual tokens weighted * based on `ngram-language` models. In practice this suggester will be able * to make better decisions about which tokens to pick based on co-occurrence * and frequencies. * * [Elasticsearch reference](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-suggesters-phrase.html) * * @example * const suggest = esb.phraseSuggester( * 'simple_phrase', * 'title.trigram', * 'noble prize' * ) * .size(1) * .gramSize(3) * .directGenerator(esb.directGenerator('title.trigram').suggestMode('always')) * .highlight('<em>', '</em>'); * * @param {string} name The name of the Suggester, an arbitrary identifier * @param {string=} field The field to fetch the candidate suggestions from. * @param {string=} txt A string to get suggestions for. * * @throws {Error} if `name` is empty * * @extends AnalyzedSuggesterBase */ class PhraseSuggester extends AnalyzedSuggesterBase { // eslint-disable-next-line require-jsdoc constructor(name, field, txt) { super('phrase', name, field, txt); } /** * Sets max size of the n-grams (shingles) in the `field`. If the field * doesn't contain n-grams (shingles) this should be omitted or set to `1`. * * Note: Elasticsearch tries to detect the gram size based on * the specified `field`. If the field uses a `shingle` filter the `gram_size` * is set to the `max_shingle_size` if not explicitly set. * @param {number} size Max size of the n-grams (shingles) in the `field`. * @returns {PhraseSuggester} returns `this` so that calls can be chained. */ gramSize(size) { this._suggestOpts.gram_size = size; return this; } /** * Sets the likelihood of a term being a misspelled even if the term exists * in the dictionary. The default is `0.95` corresponding to 5% of the * real words are misspelled. * * @param {number} factor Likelihood of a term being misspelled. Defaults to `0.95` * @returns {PhraseSuggester} returns `this` so that calls can be chained. */ realWordErrorLikelihood(factor) { this._suggestOpts.real_word_error_likelihood = factor; return this; } /** * Sets the confidence level defines a factor applied to the input phrases score * which is used as a threshold for other suggest candidates. Only candidates * that score higher than the threshold will be included in the result. * For instance a confidence level of `1.0` will only return suggestions * that score higher than the input phrase. If set to `0.0` the top N candidates * are returned. The default is `1.0`. * * @param {number} level Factor applied to the input phrases score, used as * a threshold for other suggest candidates. * @returns {PhraseSuggester} returns `this` so that calls can be chained. */ confidence(level) { this._suggestOpts.confidence = level; return this; } /** * Sets the maximum percentage of the terms that at most considered to be * misspellings in order to form a correction. This method accepts a float * value in the range `[0..1)` as a fraction of the actual query terms or a * number `>=1` as an absolute number of query terms. The default is set * to `1.0` which corresponds to that only corrections with at most * 1 misspelled term are returned. Note that setting this too high can * negatively impact performance. Low values like 1 or 2 are recommended * otherwise the time spend in suggest calls might exceed the time spend * in query execution. * * @param {number} limit The maximum percentage of the terms that at most considered * to be misspellings in order to form a correction. * @returns {PhraseSuggester} returns `this` so that calls can be chained. */ maxErrors(limit) { this._suggestOpts.max_errors = limit; return this; } /** * Sets the separator that is used to separate terms in the bigram field. * If not set the whitespace character is used as a separator. * * @param {string} sep The separator that is used to separate terms in the * bigram field. * @returns {PhraseSuggester} returns `this` so that calls can be chained. */ separator(sep) { this._suggestOpts.separator = sep; return this; } /** * Sets up suggestion highlighting. If not provided then no `highlighted` field * is returned. If provided must contain exactly `pre_tag` and `post_tag` which * are wrapped around the changed tokens. If multiple tokens in a row are changed * the entire phrase of changed tokens is wrapped rather than each token. * * @param {string} preTag Pre-tag to wrap token * @param {string} postTag Post-tag to wrap token * @returns {PhraseSuggester} returns `this` so that calls can be chained. */ highlight(preTag, postTag) { this._suggestOpts.highlight = { pre_tag: preTag, post_tag: postTag }; return this; } /** * Checks each suggestion against the specified `query` to prune suggestions * for which no matching docs exist in the index. The collate query for * a suggestion is run only on the local shard from which the suggestion * has been generated from. The `query` must be specified, and it is run * as a [`template` query](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-template-query.html). * * The current suggestion is automatically made available as the * `{{suggestion}}` variable, which should be used in your query. * Additionally, you can specify a `prune` to control if all phrase * suggestions will be returned, when set to `true` the suggestions will * have an additional option `collate_match`, which will be true if matching * documents for the phrase was found, `false` otherwise. The default value * for prune is `false`. * * @example * const suggest = esb.phraseSuggester('simple_phrase', 'title.trigram') * .size(1) * .directGenerator( * esb.directGenerator('title.trigram') * .suggestMode('always') * .minWordLength(1) * ) * .collate({ * query: { * inline: { * match: { * '{{field_name}}': '{{suggestion}}' * } * } * }, * params: { field_name: 'title' }, * prune: true * }); * * @param {Object} opts The options for `collate`. Can include the following: * - `query`: The `query` to prune suggestions for which * no matching docs exist in the index. It is run as a `template` query. * - `params`: The parameters to be passed to the template. The suggestion * value will be added to the variables you specify. * - `prune`: When set to `true`, the suggestions will * have an additional option `collate_match`, which will be true if matching * documents for the phrase was found, `false` otherwise. The default value * for prune is `false`. * @returns {PhraseSuggester} returns `this` so that calls can be chained. */ collate(opts) { // Add an instance check here? // I wanted to use `SearchTemplate` here since the syntaqx is deceptively // similar. But not quite the same. // Adding a builder object called collate doesn't make sense either. this._suggestOpts.collate = opts; return this; } /** * Sets the smoothing model to balance weight between infrequent grams * (grams (shingles) are not existing in the index) and frequent grams * (appear at least once in the index). * * Three possible values can be specified: * - `stupid_backoff`: a simple backoff model that backs off to lower order * n-gram models if the higher order count is 0 and discounts the lower order * n-gram model by a constant factor. The default `discount` is `0.4`. * Stupid Backoff is the default model * - `laplace`: a smoothing model that uses an additive smoothing where a * constant (typically `1.0` or smaller) is added to all counts to balance weights, * The default `alpha` is `0.5`. * - `linear_interpolation`: a smoothing model that takes the weighted mean of the * unigrams, bigrams and trigrams based on user supplied weights (lambdas). * Linear Interpolation doesn’t have any default values. * All parameters (`trigram_lambda`, `bigram_lambda`, `unigram_lambda`) * must be supplied. * * @param {string} model One of `stupid_backoff`, `laplace`, `linear_interpolation` * @returns {PhraseSuggester} returns `this` so that calls can be chained. */ smoothing(model) { if (isNil(model)) invalidSmoothingModeParam(model); const modelLower = model.toLowerCase(); if (!SMOOTHING_MODEL_SET.has(modelLower)) { invalidSmoothingModeParam(model); } this._suggestOpts.smoothing = modelLower; return this; } /** * Sets the given list of candicate generators which produce a list of possible terms * per term in the given text. Each of the generators in the list are * called per term in the original text. * * The output of the generators is subsequently scored in combination with the * candidates from the other terms to for suggestion candidates. * * @example * const suggest = esb.phraseSuggester('simple_phrase', 'title.trigram') * .size(1) * .directGenerator([ * esb.directGenerator('title.trigram').suggestMode('always'), * esb.directGenerator('title.reverse') * .suggestMode('always') * .preFilter('reverse') * .postFilter('reverse') * ]); * * @param {Array<DirectGenerator>|DirectGenerator} dirGen Array of `DirectGenerator` * instances or a single instance of `DirectGenerator` * @returns {PhraseSuggester} returns `this` so that calls can be chained. */ directGenerator(dirGen) { // TODO: Do instance checks on `dirGen` this._suggestOpts.direct_generator = Array.isArray(dirGen) ? dirGen : [dirGen]; return this; } /** * Override default `toJSON` to return DSL representation for the `phrase suggester` * * @override * @returns {Object} returns an Object which maps to the elasticsearch DSL */ toJSON() { return recursiveToJSON(this._body); } } module.exports = PhraseSuggester;