elastic-builder
Version:
A JavaScript implementation of the elasticsearch Query DSL
329 lines (287 loc) • 14 kB
JavaScript
'use strict';
var _classCallCheck2 = require('babel-runtime/helpers/classCallCheck');
var _classCallCheck3 = _interopRequireDefault(_classCallCheck2);
var _createClass2 = require('babel-runtime/helpers/createClass');
var _createClass3 = _interopRequireDefault(_createClass2);
var _possibleConstructorReturn2 = require('babel-runtime/helpers/possibleConstructorReturn');
var _possibleConstructorReturn3 = _interopRequireDefault(_possibleConstructorReturn2);
var _inherits2 = require('babel-runtime/helpers/inherits');
var _inherits3 = _interopRequireDefault(_inherits2);
function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
var isNil = require('lodash.isnil');
var _require = require('../core'),
SMOOTHING_MODEL_SET = _require.consts.SMOOTHING_MODEL_SET,
_require$util = _require.util,
recursiveToJSON = _require$util.recursiveToJSON,
invalidParam = _require$util.invalidParam;
var AnalyzedSuggesterBase = require('./analyzed-suggester-base');
var ES_REF_URL = 'https://www.elastic.co/guide/en/elasticsearch/reference/current/search-suggesters-phrase.html';
var invalidSmoothingModeParam = invalidParam(ES_REF_URL, 'smoothing', SMOOTHING_MODEL_SET);
/**
* The phrase suggester adds additional logic on top of the `term` suggester
* to select entire corrected phrases instead of individual tokens weighted
* based on `ngram-language` models. In practice this suggester will be able
* to make better decisions about which tokens to pick based on co-occurrence
* and frequencies.
*
* [Elasticsearch reference](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-suggesters-phrase.html)
*
* @example
* const suggest = esb.phraseSuggester(
* 'simple_phrase',
* 'title.trigram',
* 'noble prize'
* )
* .size(1)
* .gramSize(3)
* .directGenerator(esb.directGenerator('title.trigram').suggestMode('always'))
* .highlight('<em>', '</em>');
*
* @param {string} name The name of the Suggester, an arbitrary identifier
* @param {string=} field The field to fetch the candidate suggestions from.
* @param {string=} txt A string to get suggestions for.
*
* @throws {Error} if `name` is empty
*
* @extends AnalyzedSuggesterBase
*/
var PhraseSuggester = function (_AnalyzedSuggesterBas) {
(0, _inherits3.default)(PhraseSuggester, _AnalyzedSuggesterBas);
// eslint-disable-next-line require-jsdoc
function PhraseSuggester(name, field, txt) {
(0, _classCallCheck3.default)(this, PhraseSuggester);
return (0, _possibleConstructorReturn3.default)(this, (PhraseSuggester.__proto__ || Object.getPrototypeOf(PhraseSuggester)).call(this, 'phrase', name, field, txt));
}
/**
* Sets max size of the n-grams (shingles) in the `field`. If the field
* doesn't contain n-grams (shingles) this should be omitted or set to `1`.
*
* Note: Elasticsearch tries to detect the gram size based on
* the specified `field`. If the field uses a `shingle` filter the `gram_size`
* is set to the `max_shingle_size` if not explicitly set.
* @param {number} size Max size of the n-grams (shingles) in the `field`.
* @returns {PhraseSuggester} returns `this` so that calls can be chained.
*/
(0, _createClass3.default)(PhraseSuggester, [{
key: 'gramSize',
value: function gramSize(size) {
this._suggestOpts.gram_size = size;
return this;
}
/**
* Sets the likelihood of a term being a misspelled even if the term exists
* in the dictionary. The default is `0.95` corresponding to 5% of the
* real words are misspelled.
*
* @param {number} factor Likelihood of a term being misspelled. Defaults to `0.95`
* @returns {PhraseSuggester} returns `this` so that calls can be chained.
*/
}, {
key: 'realWordErrorLikelihood',
value: function realWordErrorLikelihood(factor) {
this._suggestOpts.real_word_error_likelihood = factor;
return this;
}
/**
* Sets the confidence level defines a factor applied to the input phrases score
* which is used as a threshold for other suggest candidates. Only candidates
* that score higher than the threshold will be included in the result.
* For instance a confidence level of `1.0` will only return suggestions
* that score higher than the input phrase. If set to `0.0` the top N candidates
* are returned. The default is `1.0`.
*
* @param {number} level Factor applied to the input phrases score, used as
* a threshold for other suggest candidates.
* @returns {PhraseSuggester} returns `this` so that calls can be chained.
*/
}, {
key: 'confidence',
value: function confidence(level) {
this._suggestOpts.confidence = level;
return this;
}
/**
* Sets the maximum percentage of the terms that at most considered to be
* misspellings in order to form a correction. This method accepts a float
* value in the range `[0..1)` as a fraction of the actual query terms or a
* number `>=1` as an absolute number of query terms. The default is set
* to `1.0` which corresponds to that only corrections with at most
* 1 misspelled term are returned. Note that setting this too high can
* negatively impact performance. Low values like 1 or 2 are recommended
* otherwise the time spend in suggest calls might exceed the time spend
* in query execution.
*
* @param {number} limit The maximum percentage of the terms that at most considered
* to be misspellings in order to form a correction.
* @returns {PhraseSuggester} returns `this` so that calls can be chained.
*/
}, {
key: 'maxErrors',
value: function maxErrors(limit) {
this._suggestOpts.max_errors = limit;
return this;
}
/**
* Sets the separator that is used to separate terms in the bigram field.
* If not set the whitespace character is used as a separator.
*
* @param {string} sep The separator that is used to separate terms in the
* bigram field.
* @returns {PhraseSuggester} returns `this` so that calls can be chained.
*/
}, {
key: 'separator',
value: function separator(sep) {
this._suggestOpts.separator = sep;
return this;
}
/**
* Sets up suggestion highlighting. If not provided then no `highlighted` field
* is returned. If provided must contain exactly `pre_tag` and `post_tag` which
* are wrapped around the changed tokens. If multiple tokens in a row are changed
* the entire phrase of changed tokens is wrapped rather than each token.
*
* @param {string} preTag Pre-tag to wrap token
* @param {string} postTag Post-tag to wrap token
* @returns {PhraseSuggester} returns `this` so that calls can be chained.
*/
}, {
key: 'highlight',
value: function highlight(preTag, postTag) {
this._suggestOpts.highlight = { pre_tag: preTag, post_tag: postTag };
return this;
}
/**
* Checks each suggestion against the specified `query` to prune suggestions
* for which no matching docs exist in the index. The collate query for
* a suggestion is run only on the local shard from which the suggestion
* has been generated from. The `query` must be specified, and it is run
* as a [`template` query](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-template-query.html).
*
* The current suggestion is automatically made available as the
* `{{suggestion}}` variable, which should be used in your query.
* Additionally, you can specify a `prune` to control if all phrase
* suggestions will be returned, when set to `true` the suggestions will
* have an additional option `collate_match`, which will be true if matching
* documents for the phrase was found, `false` otherwise. The default value
* for prune is `false`.
*
* @example
* const suggest = esb.phraseSuggester('simple_phrase', 'title.trigram')
* .size(1)
* .directGenerator(
* esb.directGenerator('title.trigram')
* .suggestMode('always')
* .minWordLength(1)
* )
* .collate({
* query: {
* inline: {
* match: {
* '{{field_name}}': '{{suggestion}}'
* }
* }
* },
* params: { field_name: 'title' },
* prune: true
* });
*
* @param {Object} opts The options for `collate`. Can include the following:
* - `query`: The `query` to prune suggestions for which
* no matching docs exist in the index. It is run as a `template` query.
* - `params`: The parameters to be passed to the template. The suggestion
* value will be added to the variables you specify.
* - `prune`: When set to `true`, the suggestions will
* have an additional option `collate_match`, which will be true if matching
* documents for the phrase was found, `false` otherwise. The default value
* for prune is `false`.
* @returns {PhraseSuggester} returns `this` so that calls can be chained.
*/
}, {
key: 'collate',
value: function collate(opts) {
// Add an instance check here?
// I wanted to use `SearchTemplate` here since the syntaqx is deceptively
// similar. But not quite the same.
// Adding a builder object called collate doesn't make sense either.
this._suggestOpts.collate = opts;
return this;
}
/**
* Sets the smoothing model to balance weight between infrequent grams
* (grams (shingles) are not existing in the index) and frequent grams
* (appear at least once in the index).
*
* Three possible values can be specified:
* - `stupid_backoff`: a simple backoff model that backs off to lower order
* n-gram models if the higher order count is 0 and discounts the lower order
* n-gram model by a constant factor. The default `discount` is `0.4`.
* Stupid Backoff is the default model
* - `laplace`: a smoothing model that uses an additive smoothing where a
* constant (typically `1.0` or smaller) is added to all counts to balance weights,
* The default `alpha` is `0.5`.
* - `linear_interpolation`: a smoothing model that takes the weighted mean of the
* unigrams, bigrams and trigrams based on user supplied weights (lambdas).
* Linear Interpolation doesn’t have any default values.
* All parameters (`trigram_lambda`, `bigram_lambda`, `unigram_lambda`)
* must be supplied.
*
* @param {string} model One of `stupid_backoff`, `laplace`, `linear_interpolation`
* @returns {PhraseSuggester} returns `this` so that calls can be chained.
*/
}, {
key: 'smoothing',
value: function smoothing(model) {
if (isNil(model)) invalidSmoothingModeParam(model);
var modelLower = model.toLowerCase();
if (!SMOOTHING_MODEL_SET.has(modelLower)) {
invalidSmoothingModeParam(model);
}
this._suggestOpts.smoothing = modelLower;
return this;
}
/**
* Sets the given list of candicate generators which produce a list of possible terms
* per term in the given text. Each of the generators in the list are
* called per term in the original text.
*
* The output of the generators is subsequently scored in combination with the
* candidates from the other terms to for suggestion candidates.
*
* @example
* const suggest = esb.phraseSuggester('simple_phrase', 'title.trigram')
* .size(1)
* .directGenerator([
* esb.directGenerator('title.trigram').suggestMode('always'),
* esb.directGenerator('title.reverse')
* .suggestMode('always')
* .preFilter('reverse')
* .postFilter('reverse')
* ]);
*
* @param {Array<DirectGenerator>|DirectGenerator} dirGen Array of `DirectGenerator`
* instances or a single instance of `DirectGenerator`
* @returns {PhraseSuggester} returns `this` so that calls can be chained.
*/
}, {
key: 'directGenerator',
value: function directGenerator(dirGen) {
// TODO: Do instance checks on `dirGen`
this._suggestOpts.direct_generator = Array.isArray(dirGen) ? dirGen : [dirGen];
return this;
}
/**
* Override default `toJSON` to return DSL representation for the `phrase suggester`
*
* @override
* @returns {Object} returns an Object which maps to the elasticsearch DSL
*/
}, {
key: 'toJSON',
value: function toJSON() {
return recursiveToJSON(this._body);
}
}]);
return PhraseSuggester;
}(AnalyzedSuggesterBase);
module.exports = PhraseSuggester;