@jmaitrehenry/elastic-builder
Version:
A JavaScript implementation of the elasticsearch Query DSL
230 lines (208 loc) • 8.74 kB
JavaScript
'use strict';
const isNil = require('lodash.isnil');
const {
consts: { SUGGEST_MODE_SET },
util: { invalidParam }
} = require('../core');
const ES_REF_URL =
'https://www.elastic.co/guide/en/elasticsearch/reference/current/search-suggesters-phrase.html#_direct_generators';
const invalidSuggestModeParam = invalidParam(
ES_REF_URL,
'suggest_mode',
SUGGEST_MODE_SET
);
/**
* The `phrase` suggester uses candidate generators to produce a list of possible
* terms per term in the given text. A single candidate generator is similar
* to a `term` suggester called for each individual term in the text. The output
* of the generators is subsequently scored in combination with the candidates
* from the other terms to for suggestion candidates.
*
* The Phrase suggest API accepts a list of generators under the key `direct_generator`
* each of the generators in the list are called per term in the original text.
*
* [Elasticsearch reference](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-suggesters-phrase.html#_direct_generators)
*
* @param {string=} field The field to fetch the candidate suggestions from.
*/
class DirectGenerator {
// eslint-disable-next-line require-jsdoc
constructor(field) {
this._body = {};
if (!isNil(field)) this._body.field = field;
}
/**
* Sets field to fetch the candidate suggestions from. This is a required option
* that either needs to be set globally or per suggestion.
*
* @param {string} field a valid field name
* @returns {DirectGenerator} returns `this` so that calls can be chained
*/
field(field) {
this._body.field = field;
return this;
}
/**
* Sets the number of suggestions to return (defaults to `5`).
*
* @param {number} size
* @returns {DirectGenerator} returns `this` so that calls can be chained.
*/
size(size) {
this._body.size = size;
return this;
}
/**
* Sets the suggest mode which controls what suggestions are included
* or controls for what suggest text terms, suggestions should be suggested.
* All values other than `always` can be thought of as an optimization to
* generate fewer suggestions to test on each shard and are not rechecked
* when combining the suggestions generated on each shard. Thus `missing`
* will generate suggestions for terms on shards that do not contain them
* even other shards do contain them. Those should be filtered out
* using `confidence`.
*
* Three possible values can be specified:
* - `missing`: Only provide suggestions for suggest text terms that
* are not in the index. This is the default.
* - `popular`: Only suggest suggestions that occur in more docs
* than the original suggest text term.
* - `always`: Suggest any matching suggestions based on terms in the suggest text.
*
* @param {string} mode Can be `missing`, `popular` or `always`
* @returns {DirectGenerator} returns `this` so that calls can be chained.
* @throws {Error} If `mode` is not one of `missing`, `popular` or `always`.
*/
suggestMode(mode) {
if (isNil(mode)) invalidSuggestModeParam(mode);
const modeLower = mode.toLowerCase();
if (!SUGGEST_MODE_SET.has(modeLower)) {
invalidSuggestModeParam(mode);
}
this._body.suggest_mode = modeLower;
return this;
}
/**
* Sets the maximum edit distance candidate suggestions can have
* in order to be considered as a suggestion. Can only be a value
* between 1 and 2. Any other value result in an bad request
* error being thrown. Defaults to 2.
*
* @param {number} maxEdits Value between 1 and 2. Defaults to 2.
* @returns {DirectGenerator} returns `this` so that calls can be chained.
*/
maxEdits(maxEdits) {
this._body.max_edits = maxEdits;
return this;
}
/**
* Sets the number of minimal prefix characters that must match in order
* to be a candidate suggestions. Defaults to 1.
*
* Increasing this number improves spellcheck performance.
* Usually misspellings don't occur in the beginning of terms.
*
* @param {number} len The number of minimal prefix characters that must match in order
* to be a candidate suggestions. Defaults to 1.
* @returns {DirectGenerator} returns `this` so that calls can be chained.
*/
prefixLength(len) {
this._body.prefix_length = len;
return this;
}
/**
* Sets the minimum length a suggest text term must have in order to be included.
* Defaults to 4.
*
* @param {number} len The minimum length a suggest text term must have in order
* to be included. Defaults to 4.
* @returns {DirectGenerator} returns `this` so that calls can be chained.
*/
minWordLength(len) {
this._body.min_word_length = len;
return this;
}
/**
* Sets factor that is used to multiply with the `shards_size` in order to inspect
* more candidate spell corrections on the shard level.
* Can improve accuracy at the cost of performance. Defaults to 5.
*
* @param {number} maxInspections Factor used to multiple with `shards_size` in
* order to inspect more candidate spell corrections on the shard level.
* Defaults to 5
* @returns {DirectGenerator} returns `this` so that calls can be chained.
*/
maxInspections(maxInspections) {
this._body.max_inspections = maxInspections;
return this;
}
/**
* Sets the minimal threshold in number of documents a suggestion should appear in.
* This can be specified as an absolute number or as a relative percentage of
* number of documents. This can improve quality by only suggesting high
* frequency terms. Defaults to 0f and is not enabled. If a value higher than 1
* is specified then the number cannot be fractional. The shard level document
* frequencies are used for this option.
*
* @param {number} limit Threshold in number of documents a suggestion
* should appear in. Defaults to 0f and is not enabled.
* @returns {DirectGenerator} returns `this` so that calls can be chained.
*/
minDocFreq(limit) {
this._body.min_doc_freq = limit;
return this;
}
/**
* Sets the maximum threshold in number of documents a suggest text token can
* exist in order to be included. Can be a relative percentage number (e.g 0.4)
* or an absolute number to represent document frequencies. If an value higher
* than 1 is specified then fractional can not be specified. Defaults to 0.01f.
* This can be used to exclude high frequency terms from being spellchecked.
* High frequency terms are usually spelled correctly on top of this also
* improves the spellcheck performance. The shard level document frequencies are
* used for this option.
*
* @param {number} limit Maximum threshold in number of documents a suggest text
* token can exist in order to be included. Defaults to 0.01f.
* @returns {DirectGenerator} returns `this` so that calls can be chained.
*/
maxTermFreq(limit) {
this._body.max_term_freq = limit;
return this;
}
/**
* Sets the filter (analyzer) that is applied to each of the tokens passed to this
* candidate generator. This filter is applied to the original token before
* candidates are generated.
*
* @param {string} filter a filter (analyzer) that is applied to each of the
* tokens passed to this candidate generator.
* @returns {DirectGenerator} returns `this` so that calls can be chained.
*/
preFilter(filter) {
this._body.pre_filter = filter;
return this;
}
/**
* Sets the filter (analyzer) that is applied to each of the generated tokens
* before they are passed to the actual phrase scorer.
*
* @param {string} filter a filter (analyzer) that is applied to each of the
* generated tokens before they are passed to the actual phrase scorer.
* @returns {DirectGenerator} returns `this` so that calls can be chained.
*/
postFilter(filter) {
this._body.post_filter = filter;
return this;
}
/**
* Override default `toJSON` to return DSL representation for the `direct_generator`
*
* @override
* @returns {Object} returns an Object which maps to the elasticsearch DSL
*/
toJSON() {
return this._body;
}
}
module.exports = DirectGenerator;