@jmaitrehenry/elastic-builder
Version:
A JavaScript implementation of the elasticsearch Query DSL
281 lines (260 loc) • 11.5 kB
JavaScript
'use strict';
const isNil = require('lodash.isnil');
const {
consts: { SMOOTHING_MODEL_SET },
util: { recursiveToJSON, invalidParam }
} = require('../core');
const AnalyzedSuggesterBase = require('./analyzed-suggester-base');
const ES_REF_URL =
'https://www.elastic.co/guide/en/elasticsearch/reference/current/search-suggesters-phrase.html';
const invalidSmoothingModeParam = invalidParam(
ES_REF_URL,
'smoothing',
SMOOTHING_MODEL_SET
);
/**
* The phrase suggester adds additional logic on top of the `term` suggester
* to select entire corrected phrases instead of individual tokens weighted
* based on `ngram-language` models. In practice this suggester will be able
* to make better decisions about which tokens to pick based on co-occurrence
* and frequencies.
*
* [Elasticsearch reference](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-suggesters-phrase.html)
*
* @example
* const suggest = esb.phraseSuggester(
* 'simple_phrase',
* 'title.trigram',
* 'noble prize'
* )
* .size(1)
* .gramSize(3)
* .directGenerator(esb.directGenerator('title.trigram').suggestMode('always'))
* .highlight('<em>', '</em>');
*
* @param {string} name The name of the Suggester, an arbitrary identifier
* @param {string=} field The field to fetch the candidate suggestions from.
* @param {string=} txt A string to get suggestions for.
*
* @throws {Error} if `name` is empty
*
* @extends AnalyzedSuggesterBase
*/
class PhraseSuggester extends AnalyzedSuggesterBase {
// eslint-disable-next-line require-jsdoc
constructor(name, field, txt) {
super('phrase', name, field, txt);
}
/**
* Sets max size of the n-grams (shingles) in the `field`. If the field
* doesn't contain n-grams (shingles) this should be omitted or set to `1`.
*
* Note: Elasticsearch tries to detect the gram size based on
* the specified `field`. If the field uses a `shingle` filter the `gram_size`
* is set to the `max_shingle_size` if not explicitly set.
* @param {number} size Max size of the n-grams (shingles) in the `field`.
* @returns {PhraseSuggester} returns `this` so that calls can be chained.
*/
gramSize(size) {
this._suggestOpts.gram_size = size;
return this;
}
/**
* Sets the likelihood of a term being a misspelled even if the term exists
* in the dictionary. The default is `0.95` corresponding to 5% of the
* real words are misspelled.
*
* @param {number} factor Likelihood of a term being misspelled. Defaults to `0.95`
* @returns {PhraseSuggester} returns `this` so that calls can be chained.
*/
realWordErrorLikelihood(factor) {
this._suggestOpts.real_word_error_likelihood = factor;
return this;
}
/**
* Sets the confidence level defines a factor applied to the input phrases score
* which is used as a threshold for other suggest candidates. Only candidates
* that score higher than the threshold will be included in the result.
* For instance a confidence level of `1.0` will only return suggestions
* that score higher than the input phrase. If set to `0.0` the top N candidates
* are returned. The default is `1.0`.
*
* @param {number} level Factor applied to the input phrases score, used as
* a threshold for other suggest candidates.
* @returns {PhraseSuggester} returns `this` so that calls can be chained.
*/
confidence(level) {
this._suggestOpts.confidence = level;
return this;
}
/**
* Sets the maximum percentage of the terms that at most considered to be
* misspellings in order to form a correction. This method accepts a float
* value in the range `[0..1)` as a fraction of the actual query terms or a
* number `>=1` as an absolute number of query terms. The default is set
* to `1.0` which corresponds to that only corrections with at most
* 1 misspelled term are returned. Note that setting this too high can
* negatively impact performance. Low values like 1 or 2 are recommended
* otherwise the time spend in suggest calls might exceed the time spend
* in query execution.
*
* @param {number} limit The maximum percentage of the terms that at most considered
* to be misspellings in order to form a correction.
* @returns {PhraseSuggester} returns `this` so that calls can be chained.
*/
maxErrors(limit) {
this._suggestOpts.max_errors = limit;
return this;
}
/**
* Sets the separator that is used to separate terms in the bigram field.
* If not set the whitespace character is used as a separator.
*
* @param {string} sep The separator that is used to separate terms in the
* bigram field.
* @returns {PhraseSuggester} returns `this` so that calls can be chained.
*/
separator(sep) {
this._suggestOpts.separator = sep;
return this;
}
/**
* Sets up suggestion highlighting. If not provided then no `highlighted` field
* is returned. If provided must contain exactly `pre_tag` and `post_tag` which
* are wrapped around the changed tokens. If multiple tokens in a row are changed
* the entire phrase of changed tokens is wrapped rather than each token.
*
* @param {string} preTag Pre-tag to wrap token
* @param {string} postTag Post-tag to wrap token
* @returns {PhraseSuggester} returns `this` so that calls can be chained.
*/
highlight(preTag, postTag) {
this._suggestOpts.highlight = { pre_tag: preTag, post_tag: postTag };
return this;
}
/**
* Checks each suggestion against the specified `query` to prune suggestions
* for which no matching docs exist in the index. The collate query for
* a suggestion is run only on the local shard from which the suggestion
* has been generated from. The `query` must be specified, and it is run
* as a [`template` query](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-template-query.html).
*
* The current suggestion is automatically made available as the
* `{{suggestion}}` variable, which should be used in your query.
* Additionally, you can specify a `prune` to control if all phrase
* suggestions will be returned, when set to `true` the suggestions will
* have an additional option `collate_match`, which will be true if matching
* documents for the phrase was found, `false` otherwise. The default value
* for prune is `false`.
*
* @example
* const suggest = esb.phraseSuggester('simple_phrase', 'title.trigram')
* .size(1)
* .directGenerator(
* esb.directGenerator('title.trigram')
* .suggestMode('always')
* .minWordLength(1)
* )
* .collate({
* query: {
* inline: {
* match: {
* '{{field_name}}': '{{suggestion}}'
* }
* }
* },
* params: { field_name: 'title' },
* prune: true
* });
*
* @param {Object} opts The options for `collate`. Can include the following:
* - `query`: The `query` to prune suggestions for which
* no matching docs exist in the index. It is run as a `template` query.
* - `params`: The parameters to be passed to the template. The suggestion
* value will be added to the variables you specify.
* - `prune`: When set to `true`, the suggestions will
* have an additional option `collate_match`, which will be true if matching
* documents for the phrase was found, `false` otherwise. The default value
* for prune is `false`.
* @returns {PhraseSuggester} returns `this` so that calls can be chained.
*/
collate(opts) {
// Add an instance check here?
// I wanted to use `SearchTemplate` here since the syntaqx is deceptively
// similar. But not quite the same.
// Adding a builder object called collate doesn't make sense either.
this._suggestOpts.collate = opts;
return this;
}
/**
* Sets the smoothing model to balance weight between infrequent grams
* (grams (shingles) are not existing in the index) and frequent grams
* (appear at least once in the index).
*
* Three possible values can be specified:
* - `stupid_backoff`: a simple backoff model that backs off to lower order
* n-gram models if the higher order count is 0 and discounts the lower order
* n-gram model by a constant factor. The default `discount` is `0.4`.
* Stupid Backoff is the default model
* - `laplace`: a smoothing model that uses an additive smoothing where a
* constant (typically `1.0` or smaller) is added to all counts to balance weights,
* The default `alpha` is `0.5`.
* - `linear_interpolation`: a smoothing model that takes the weighted mean of the
* unigrams, bigrams and trigrams based on user supplied weights (lambdas).
* Linear Interpolation doesn’t have any default values.
* All parameters (`trigram_lambda`, `bigram_lambda`, `unigram_lambda`)
* must be supplied.
*
* @param {string} model One of `stupid_backoff`, `laplace`, `linear_interpolation`
* @returns {PhraseSuggester} returns `this` so that calls can be chained.
*/
smoothing(model) {
if (isNil(model)) invalidSmoothingModeParam(model);
const modelLower = model.toLowerCase();
if (!SMOOTHING_MODEL_SET.has(modelLower)) {
invalidSmoothingModeParam(model);
}
this._suggestOpts.smoothing = modelLower;
return this;
}
/**
* Sets the given list of candicate generators which produce a list of possible terms
* per term in the given text. Each of the generators in the list are
* called per term in the original text.
*
* The output of the generators is subsequently scored in combination with the
* candidates from the other terms to for suggestion candidates.
*
* @example
* const suggest = esb.phraseSuggester('simple_phrase', 'title.trigram')
* .size(1)
* .directGenerator([
* esb.directGenerator('title.trigram').suggestMode('always'),
* esb.directGenerator('title.reverse')
* .suggestMode('always')
* .preFilter('reverse')
* .postFilter('reverse')
* ]);
*
* @param {Array<DirectGenerator>|DirectGenerator} dirGen Array of `DirectGenerator`
* instances or a single instance of `DirectGenerator`
* @returns {PhraseSuggester} returns `this` so that calls can be chained.
*/
directGenerator(dirGen) {
// TODO: Do instance checks on `dirGen`
this._suggestOpts.direct_generator = Array.isArray(dirGen)
? dirGen
: [dirGen];
return this;
}
/**
* Override default `toJSON` to return DSL representation for the `phrase suggester`
*
* @override
* @returns {Object} returns an Object which maps to the elasticsearch DSL
*/
toJSON() {
return recursiveToJSON(this._body);
}
}
module.exports = PhraseSuggester;