UNPKG

@committed/trie-search

Version:

A trie implementation that maps keys to objects for rapid retrieval by phrases. Most common use will be for typeahead searches.

505 lines (403 loc) 14 kB
'use strict'; Object.defineProperty(exports, '__esModule', { value: true }); function _interopDefault (ex) { return (ex && (typeof ex === 'object') && 'default' in ex) ? ex['default'] : ex; } var get = _interopDefault(require('lodash.get')); var HashArray = _interopDefault(require('hasharray')); function _extends() { _extends = Object.assign || function (target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i]; for (var key in source) { if (Object.prototype.hasOwnProperty.call(source, key)) { target[key] = source[key]; } } } return target; }; return _extends.apply(this, arguments); } function _objectWithoutPropertiesLoose(source, excluded) { if (source == null) return {}; var target = {}; var sourceKeys = Object.keys(source); var key, i; for (i = 0; i < sourceKeys.length; i++) { key = sourceKeys[i]; if (excluded.indexOf(key) >= 0) continue; target[key] = source[key]; } return target; } function _unsupportedIterableToArray(o, minLen) { if (!o) return; if (typeof o === "string") return _arrayLikeToArray(o, minLen); var n = Object.prototype.toString.call(o).slice(8, -1); if (n === "Object" && o.constructor) n = o.constructor.name; if (n === "Map" || n === "Set") return Array.from(o); if (n === "Arguments" || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n)) return _arrayLikeToArray(o, minLen); } function _arrayLikeToArray(arr, len) { if (len == null || len > arr.length) len = arr.length; for (var i = 0, arr2 = new Array(len); i < len; i++) arr2[i] = arr[i]; return arr2; } function _createForOfIteratorHelperLoose(o, allowArrayLike) { var it; if (typeof Symbol === "undefined" || o[Symbol.iterator] == null) { if (Array.isArray(o) || (it = _unsupportedIterableToArray(o)) || allowArrayLike && o && typeof o.length === "number") { if (it) o = it; var i = 0; return function () { if (i >= o.length) return { done: true }; return { done: false, value: o[i++] }; }; } throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); } it = o[Symbol.iterator](); return it.next.bind(it); } var MAX_CACHE_SIZE = 64; var IS_WHITESPACE = /^[\s]*$/; var DEFAULT_INTERNATIONALIZE_EXPAND_REGEXES = [{ regex: /[åäàáâãæ]/gi, alternate: "a" }, { regex: /[èéêë]/gi, alternate: "e" }, { regex: /[ìíîï]/gi, alternate: "i" }, { regex: /[òóôõö]/gi, alternate: "o" }, { regex: /[ùúûü]/gi, alternate: "u" }, { regex: /[æ]/gi, alternate: "ae" }]; var replaceCharAt = function replaceCharAt(index, original, replacement) { return original.substr(0, index) + replacement + original.substr(index + replacement.length); }; var TrieSearch = /*#__PURE__*/function () { function TrieSearch(keyFields, options) { if (options === void 0) { options = {}; } this.options = _extends({}, TrieSearch.DEFAULT_OPTIONS, options); this.keyFields = keyFields ? keyFields instanceof Array ? keyFields : [keyFields] : []; this.root = {}; this.size = 0; if (this.options.cache) { this.getCache = new HashArray("key"); } if (this.options.splitOnGetRegEx === undefined) { this.options.splitOnGetRegEx = this.options.splitOnRegEx; } } TrieSearch.fromJson = function fromJson(json) { var ts = new TrieSearch(json.keyFields, json.options); ts.root = json.root; return ts; }; TrieSearch.deepLookup = function deepLookup(obj, keys) { return get(obj, keys); }; var _proto = TrieSearch.prototype; _proto.add = function add(obj, customKeys) { if (this.options.cache) { this.clearCache(); } var keyFields; // Someone might have called add via an array forEach where the second param is a number if (customKeys == null || typeof customKeys === "number") { keyFields = this.keyFields; } else { keyFields = customKeys; } for (var _iterator = _createForOfIteratorHelperLoose(keyFields), _step; !(_step = _iterator()).done;) { var key = _step.value; var val = void 0; if (key instanceof Array) { val = TrieSearch.deepLookup(obj, key); } else { // @ts-ignore val = obj[key]; } if (!val) continue; val = val.toString(); var expandedValues = this.expandString(val); for (var v = 0; v < expandedValues.length; v++) { var expandedValue = expandedValues[v]; this.map(expandedValue, obj); } } }; _proto.expandString = function expandString(value) { var values = [value]; if (this.options.expandRegexes && this.options.expandRegexes.length) { for (var i = 0; i < this.options.expandRegexes.length; i++) { var er = this.options.expandRegexes[i]; var match; while ((match = er.regex.exec(value)) !== null) { var alternateValue = replaceCharAt(match.index, value, er.alternate); values.push(alternateValue); } } } return values; }; _proto.addAll = function addAll(arr, customKeys) { for (var i = 0; i < arr.length; i++) { this.add(arr[i], customKeys); } }; _proto.reset = function reset() { this.root = {}; this.size = 0; }; _proto.clearCache = function clearCache() { this.getCache = new HashArray("key"); }; _proto.cleanCache = function cleanCache() { while (this.getCache.all.length > this.options.maxCacheSize) { this.getCache.remove(this.getCache.all[0]); } }; _proto.map = function map(key, value) { if (this.options.splitOnRegEx && this.options.splitOnRegEx.test(key)) { var phrases = key.split(this.options.splitOnRegEx); var emptySplitMatch = phrases.filter(function (p) { return IS_WHITESPACE.test(p); }); var selfMatch = phrases.filter(function (p) { return p === key; }); var selfIsOnlyMatch = selfMatch.length + emptySplitMatch.length === phrases.length; // There is an edge case that a RegEx with a positive lookeahed like: // /?=[A-Z]/ // Split on capital letters for a camelcase sentence // Will then match again when we call map, creating an infinite stack loop. if (!selfIsOnlyMatch) { for (var i = 0, l = phrases.length; i < l; i++) { if (!IS_WHITESPACE.test(phrases[i])) { this.map(phrases[i], value); } } if (!this.options.insertFullUnsplitKey) { return; } } } if (this.options.cache) this.clearCache(); if (this.options.keepAll) { this.indexed = this.indexed || new HashArray([this.options.keepAllKey]); this.indexed.add(value); } if (this.options.ignoreCase) { key = key.toLowerCase(); } var keyArr = this.keyToArr(key), self = this; insert(keyArr, value, this.root); function insert(keyArr, value, node) { if (keyArr.length == 0) { node["value"] = node["value"] || []; node["value"].push(value); return; } var k = keyArr.shift(); if (!node[k]) self.size++; node[k] = node[k] || {}; // @ts-ignore insert(keyArr, value, node[k]); } } // used for testing - this not type safe and breaks generics ; _proto.addFromObject = function addFromObject(obj, valueField) { if (this.options.cache) this.clearCache(); valueField = valueField || "value"; if (this.keyFields.indexOf("_key_") == -1) this.keyFields.push("_key_"); for (var key in obj) { var o = { _key_: key }; // @ts-ignore o[valueField] = obj[key]; // @ts-ignore this.add(o); } }; _proto.keyToArr = function keyToArr(key) { var keyArr; if (this.options.min && this.options.min > 1) { if (key.length < this.options.min) return []; keyArr = [key.substr(0, this.options.min)]; keyArr = keyArr.concat(key.substr(this.options.min).split("")); } else { keyArr = key.split(""); } return keyArr; }; _proto.findNode = function findNode(key) { if (this.options.min && this.options.min > 0 && key.length < this.options.min) return undefined; return f(this.keyToArr(key), this.root); function f(keyArr, node) { if (!node) return undefined; if (keyArr.length == 0) return node; var k = keyArr.shift(); // @ts-ignore return f(keyArr, node[k]); } }; _proto._getCacheKey = function _getCacheKey(phrase, limit) { var cacheKey = phrase; if (limit) { cacheKey = phrase + "_" + limit; } return cacheKey; }; _proto._get = function _get(phrase, limit) { phrase = this.options.ignoreCase ? phrase.toLowerCase() : phrase; var c, node; if (this.options.cache && (c = this.getCache.get(this._getCacheKey(phrase, limit)))) return c.value; var ret = undefined, haKeyFields = this.options.indexField ? [this.options.indexField] : this.keyFields, words = this.options.splitOnGetRegEx ? phrase.split(this.options.splitOnGetRegEx) : [phrase]; for (var w = 0, l = words.length; w < l; w++) { if (this.options.min && words[w].length < this.options.min) continue; var temp = new HashArray(haKeyFields); if (node = this.findNode(words[w])) aggregate(node, temp); ret = ret ? ret.intersection(temp) : temp; } var v = ret ? ret.all : []; if (this.options.cache) { var cacheKey = this._getCacheKey(phrase, limit); this.getCache.add({ key: cacheKey, value: v }); this.cleanCache(); } return v; function aggregate(node, ha) { if (limit && ha.all.length === limit) { return; } if (node.value && node.value.length) { if (!limit || ha.all.length + node.value.length < limit) { ha.addAll(node.value); } else { // Limit is less than the number of entries in the node.value + ha combined ha.addAll(node.value.slice(0, limit - ha.all.length)); return; } } for (var k in node) { if (limit && ha.all.length === limit) { return; } if (k != "value") { // @ts-ignore aggregate(node[k], ha); } } } }; _proto.get = function get(phrases, reducer, limit) { var _accumulator; var haKeyFields = this.options.indexField ? [this.options.indexField] : this.keyFields; var ret = undefined; var accumulator = undefined; if (reducer && !this.options.idFieldOrFunction) { throw new Error("To use the accumulator, you must specify and idFieldOrFunction"); } phrases = phrases instanceof Array ? phrases : [phrases]; for (var i = 0, l = phrases.length; i < l; i++) { var matches = this._get(phrases[i], limit); if (reducer) { accumulator = reducer(accumulator, phrases[i], matches, this); } else { ret = ret ? ret.addAll(matches) : new HashArray(haKeyFields).addAll(matches); } } if (!reducer) { return ret.all; } return (_accumulator = accumulator) != null ? _accumulator : []; }; _proto.getId = function getId(item) { var idFieldOrFunction = this.options.idFieldOrFunction; if (idFieldOrFunction == null) { throw new Error("To use the accumulator, you must specify and idFieldOrFunction"); } return typeof idFieldOrFunction === "function" ? idFieldOrFunction(item) : // @ts-ignore item[idFieldOrFunction]; }; _proto.toJson = function toJson() { if (this.options.expandRegexes !== TrieSearch.DEFAULT_OPTIONS.expandRegexes) { throw new Error("JSON export of non-default expandRegexes option not supported"); } if (this.options.idFieldOrFunction !== TrieSearch.DEFAULT_OPTIONS.idFieldOrFunction) { throw new Error("JSON export of non-default idFieldOrFunction option not supported"); } if ( // NOTE: default value of splitOnGetRegEx is splitOnRegEx this.options.splitOnGetRegEx !== TrieSearch.DEFAULT_OPTIONS.splitOnRegEx) { throw new Error("JSON export of non-default splitOnGetRegEx option not supported"); } if (this.options.splitOnRegEx !== TrieSearch.DEFAULT_OPTIONS.splitOnRegEx) { throw new Error("JSON export of non-default splitOnRegEx option not supported"); } var _this$options = this.options, options = _objectWithoutPropertiesLoose(_this$options, ["expandRegexes", "idFieldOrFunction", "splitOnGetRegEx", "splitOnRegEx"]); return { keyFields: this.keyFields, options: options, root: this.root }; }; return TrieSearch; }(); TrieSearch.DEFAULT_OPTIONS = { cache: true, expandRegexes: DEFAULT_INTERNATIONALIZE_EXPAND_REGEXES, idFieldOrFunction: undefined, ignoreCase: true, insertFullUnsplitKey: false, keepAll: false, keepAllKey: "id", maxCacheSize: MAX_CACHE_SIZE, splitOnRegEx: /\s/g }; TrieSearch.UNION_REDUCER = function (accumulator, _phrase, matches, trie) { if (accumulator === undefined) { return matches; } var map = {}; var id; var maxLength = Math.max(accumulator.length, matches.length); var results = []; var l = 0; // One loop, O(N) for max length of accumulator or matches. for (var i = 0; i < maxLength; i++) { if (i < accumulator.length) { id = trie.getId(accumulator[i]); map[id] = map[id] ? map[id] : 0; map[id]++; if (map[id] === 2) { results[l++] = accumulator[i]; } } if (i < matches.length) { id = trie.getId(matches[i]); map[id] = map[id] ? map[id] : 0; map[id]++; if (map[id] === 2) { results[l++] = matches[i]; } } } return results; }; exports.TrieSearch = TrieSearch; //# sourceMappingURL=trie-search.cjs.development.js.map