UNPKG

dawg-set

Version:

Javascript directed acyclic word graph (DAWG)

473 lines (394 loc) 15.5 kB
/** * Directed Acyclic Word Graph (DAWG) * * Based on https://gist.github.com/smhanov/94230b422c2100ae4218 */ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }(); var _slicedToArray = function () { function sliceIterator(arr, i) { var _arr = []; var _n = true; var _d = false; var _e = undefined; try { for (var _i = arr[Symbol.iterator](), _s; !(_n = (_s = _i.next()).done); _n = true) { _arr.push(_s.value); if (i && _arr.length === i) break; } } catch (err) { _d = true; _e = err; } finally { try { if (!_n && _i["return"]) _i["return"](); } finally { if (_d) throw _e; } } return _arr; } return function (arr, i) { if (Array.isArray(arr)) { return arr; } else if (Symbol.iterator in Object(arr)) { return sliceIterator(arr, i); } else { throw new TypeError("Invalid attempt to destructure non-iterable instance"); } }; }(); function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } } var slice = Array.prototype.slice; var join = function join(joiner) { return typeof joiner === 'function' ? joiner : function (acc, current) { return (acc === null ? '' : acc + joiner) + current; }; }; var createEmptyNode = function createEmptyNode(id) { return { edges: new Map(), final: false, id: id }; }; /* Traversal ******************************************************************************/ var DONE = { done: true }; var Iterator = function Iterator(root, initial, join) { // Linked list stack of nodes to visit. this.stack = root && { node: root, value: initial, rest: null }; this.join = join; }; Iterator.prototype.next = function () { while (this.stack) { var head = this.stack; // Replace head of stack with visits to child edges of head node, then // remove head. var r = head; var _iteratorNormalCompletion = true; var _didIteratorError = false; var _iteratorError = undefined; try { for (var _iterator = head.node.edges[Symbol.iterator](), _step; !(_iteratorNormalCompletion = (_step = _iterator.next()).done); _iteratorNormalCompletion = true) { var _step$value = _slicedToArray(_step.value, 2); var edge = _step$value[0]; var value = _step$value[1]; r = r.rest = { node: value, value: this.join(head.value, edge), rest: r.rest }; } } catch (err) { _didIteratorError = true; _iteratorError = err; } finally { try { if (!_iteratorNormalCompletion && _iterator.return) { _iterator.return(); } } finally { if (_didIteratorError) { throw _iteratorError; } } } this.stack = this.stack.rest; if (head.node.final) return head; } return DONE; }; Iterator.prototype[Symbol.iterator] = function () { return this; }; /* Dawg ******************************************************************************/ var lexicographicalCompare = function lexicographicalCompare(a, b) { return a.localeCompare(b); }; var encode = function encode(node) { var s = +node.final; var _iteratorNormalCompletion2 = true; var _didIteratorError2 = false; var _iteratorError2 = undefined; try { for (var _iterator2 = node.edges[Symbol.iterator](), _step2; !(_iteratorNormalCompletion2 = (_step2 = _iterator2.next()).done); _iteratorNormalCompletion2 = true) { var _step2$value = _slicedToArray(_step2.value, 2); var k = _step2$value[0]; var v = _step2$value[1]; s += '_' + k + '_' + v.id; } } catch (err) { _didIteratorError2 = true; _iteratorError2 = err; } finally { try { if (!_iteratorNormalCompletion2 && _iterator2.return) { _iterator2.return(); } } finally { if (_didIteratorError2) { throw _iteratorError2; } } } return s; }; var DEFAULT_OPTIONS = { compare: function compare(a, b) { if (typeof a === 'string') return lexicographicalCompare(a, b); var lenA = a.length; var lenB = b.length; for (var i = 0; i < lenA && i < lenB; ++i) { var r = lexicographicalCompare(a[i], b[i]); if (r !== 0) return r; } return lenA - lenB; } }; /** * */ var Dawg = function () { /** * @param paths Optional iterable of paths to insert into the new DAWG. */ function Dawg(paths, options) { _classCallCheck(this, Dawg); this._options = Object.assign({}, options, DEFAULT_OPTIONS); this._count = 0; this._id = 0; this._root = createEmptyNode(this._id++); this._previous = ""; this._uncheckedNodes = []; this._minimizedNodes = {}; if (paths) { var _iteratorNormalCompletion3 = true; var _didIteratorError3 = false; var _iteratorError3 = undefined; try { for (var _iterator3 = paths[Symbol.iterator](), _step3; !(_iteratorNormalCompletion3 = (_step3 = _iterator3.next()).done); _iteratorNormalCompletion3 = true) { var _p = _step3.value; this.add(_p); } } catch (err) { _didIteratorError3 = true; _iteratorError3 = err; } finally { try { if (!_iteratorNormalCompletion3 && _iterator3.return) { _iterator3.return(); } } finally { if (_didIteratorError3) { throw _iteratorError3; } } } } } /** * Get the number of entries in the DAWG. */ _createClass(Dawg, [{ key: 'count', value: function count() { return this._count; } /** * Add an entry to the DAWG. * * Entries must be added in lexographic order and cannot be added after * the DAWG is marked as finalized. * * @param path Path components to add. */ }, { key: 'add', value: function add(path) { if (this._finalized) throw new Error("Dawg finalized, cannot insert new entries"); var order = this._options.compare(path, this._previous); if (order < 0) throw new Error("Paths must be inserted in lexograpic order"); if (order === 0) return this; var commonPrefix = 0; for (; commonPrefix < path.length && commonPrefix < this._previous.length; ++commonPrefix) { if (path[commonPrefix] !== this._previous[commonPrefix]) break; }this._minimize(commonPrefix); var node = this._uncheckedNodes.length === 0 ? this._root : this._uncheckedNodes[this._uncheckedNodes.length - 1][2]; var _iteratorNormalCompletion4 = true; var _didIteratorError4 = false; var _iteratorError4 = undefined; try { for (var _iterator4 = slice.call(path, commonPrefix)[Symbol.iterator](), _step4; !(_iteratorNormalCompletion4 = (_step4 = _iterator4.next()).done); _iteratorNormalCompletion4 = true) { var letter = _step4.value; var nextNode = createEmptyNode(this._id++); node.edges.set(letter, nextNode); this._uncheckedNodes.push([node, letter, nextNode]); node = nextNode; } } catch (err) { _didIteratorError4 = true; _iteratorError4 = err; } finally { try { if (!_iteratorNormalCompletion4 && _iterator4.return) { _iterator4.return(); } } finally { if (_didIteratorError4) { throw _iteratorError4; } } } node.final = true; this._previous = path; ++this._count; return this; } }, { key: '_findNode', value: function _findNode(path) { var node = this._root; var i = 0; var _iteratorNormalCompletion5 = true; var _didIteratorError5 = false; var _iteratorError5 = undefined; try { for (var _iterator5 = path[Symbol.iterator](), _step5; !(_iteratorNormalCompletion5 = (_step5 = _iterator5.next()).done); _iteratorNormalCompletion5 = true) { var x = _step5.value; if (!node) return [null, 0]; node = node.edges.get(x); ++i; } } catch (err) { _didIteratorError5 = true; _iteratorError5 = err; } finally { try { if (!_iteratorNormalCompletion5 && _iterator5.return) { _iterator5.return(); } } finally { if (_didIteratorError5) { throw _iteratorError5; } } } return [node, i]; } /** * Find the length of the longest match for `path` in the dawg. * * @returns a number in `[0, path.length]` indicating the longest match. * * @see match For actually getting the longest match */ }, { key: 'longest', value: function longest(path) { var best = 0; var node = this._root; if (node) { var i = 0; var _iteratorNormalCompletion6 = true; var _didIteratorError6 = false; var _iteratorError6 = undefined; try { for (var _iterator6 = path[Symbol.iterator](), _step6; !(_iteratorNormalCompletion6 = (_step6 = _iterator6.next()).done); _iteratorNormalCompletion6 = true) { var x = _step6.value; node = node.edges.get(x); if (!node) break; ++i; if (node.final) best = i; } } catch (err) { _didIteratorError6 = true; _iteratorError6 = err; } finally { try { if (!_iteratorNormalCompletion6 && _iterator6.return) { _iterator6.return(); } } finally { if (_didIteratorError6) { throw _iteratorError6; } } } } return best; } /** * Does an exact entry for `path` exists in the DAWG? */ }, { key: 'has', value: function has(path) { return this.longest(path) === path.length; } /** * Return the path for the longest match in the dawg. * * @see longest */ }, { key: 'match', value: function match(path) { var joiner = arguments.length <= 1 || arguments[1] === undefined ? '' : arguments[1]; return slice.call(path, 0, this.longest(path)).join(''); } /** * Mark the current DAWG as complete. * * Prevents further modifications. */ }, { key: 'finalize', value: function finalize() { if (!this._finalized) { this._minimize(0); this._minimizedNodes = null; this._uncheckedNodes = null; this._finalized = true; } return this; } /** * Get an iterator to all paths in the dawg. * * Yields arrays of raw path elements, not the joined strings. */ }, { key: 'paths', value: function paths() { return new Iterator(this._root, [], function (acc, current) { return acc.concat(current); }); } /** * Get an iterator to all values in the dawg. * * @param joiner String or function used to join paths together into strings. */ }, { key: 'values', value: function values() { var joiner = arguments.length <= 0 || arguments[0] === undefined ? '' : arguments[0]; return new Iterator(this._root, null, join(joiner)); } /** * Get an iterator to all values starting with `path` in the dawg. * * @see values */ }, { key: 'valuesStartingWith', value: function valuesStartingWith(path) { var joiner = arguments.length <= 1 || arguments[1] === undefined ? '' : arguments[1]; var _findNode2 = this._findNode(path); var _findNode3 = _slicedToArray(_findNode2, 2); var root = _findNode3[0]; var index = _findNode3[1]; var joinFn = join(joiner); return new Iterator(root, slice.call(path, 0, index).reduce(joinFn, null), joinFn); } }, { key: '_minimize', value: function _minimize(downTo) { for (var i = 0, len = this._uncheckedNodes.length - downTo; i < len; ++i) { var _uncheckedNodes$pop = this._uncheckedNodes.pop(); var _uncheckedNodes$pop2 = _slicedToArray(_uncheckedNodes$pop, 3); var parent = _uncheckedNodes$pop2[0]; var letter = _uncheckedNodes$pop2[1]; var child = _uncheckedNodes$pop2[2]; var key = encode(child); var existing = this._minimizedNodes[key]; if (existing) { parent.edges.set(letter, existing); } else { this._minimizedNodes[key] = child; } } } }]); return Dawg; }(); exports.default = Dawg; Dawg.prototype[Symbol.iterator] = Dawg.prototype.values; /** * Create a finalized DAWG from an iterable. * * @see new Dawg() */ var from = exports.from = Dawg.from = function (paths, options) { return new Dawg(paths, options).finalize(); }; //# sourceMappingURL=dawg.js.map