dawg-set
Version:
Javascript directed acyclic word graph (DAWG)
268 lines (232 loc) • 7.02 kB
JavaScript
/**
* Directed Acyclic Word Graph (DAWG)
*
* Based on https://gist.github.com/smhanov/94230b422c2100ae4218
*/
"use strict";
const slice = Array.prototype.slice;
const join = (joiner) =>
typeof joiner === 'function'
? joiner
: (acc, current) => (acc === null ? '' : acc + joiner) + current;
const createEmptyNode = id => ({
edges: new Map(),
final: false,
id: id
});
/* Traversal
******************************************************************************/
const DONE = { done: true };
const Iterator = function (root, initial, join) {
// Linked list stack of nodes to visit.
this.stack = root && { node: root, value: initial, rest: null };
this.join = join;
};
Iterator.prototype.next = function () {
while (this.stack) {
const head = this.stack;
// Replace head of stack with visits to child edges of head node, then
// remove head.
let r = head;
for (let [edge, value] of head.node.edges)
r = r.rest = ({ node: value, value: this.join(head.value, edge), rest: r.rest });
this.stack = this.stack.rest;
if (head.node.final)
return head;
}
return DONE;
};
Iterator.prototype[Symbol.iterator] = function () {
return this;
};
/* Dawg
******************************************************************************/
const lexicographicalCompare = (a, b) =>
a.localeCompare(b);
const encode = node => {
let s = +node.final;
for (let [k, v] of node.edges)
s += `_${k}_${v.id}`;
return s;
};
const DEFAULT_OPTIONS = {
compare(a, b) {
if (typeof a === 'string')
return lexicographicalCompare(a, b);
const lenA = a.length;
const lenB = b.length;
for (let i = 0; i < lenA && i < lenB; ++i) {
const r = lexicographicalCompare(a[i], b[i]);
if (r !== 0)
return r;
}
return lenA - lenB;
}
};
/**
*
*/
export default class Dawg {
/**
* @param paths Optional iterable of paths to insert into the new DAWG.
*/
constructor(paths, options) {
this._options = Object.assign({}, options, DEFAULT_OPTIONS);
this._count = 0;
this._id = 0;
this._root = createEmptyNode(this._id++);
this._previous = "";
this._uncheckedNodes = []
this._minimizedNodes = {};
if (paths)
for (let p of paths)
this.add(p);
}
/**
* Get the number of entries in the DAWG.
*/
count() {
return this._count;
}
/**
* Add an entry to the DAWG.
*
* Entries must be added in lexographic order and cannot be added after
* the DAWG is marked as finalized.
*
* @param path Path components to add.
*/
add(path) {
if (this._finalized)
throw new Error("Dawg finalized, cannot insert new entries");
const order = this._options.compare(path, this._previous);
if (order < 0)
throw new Error("Paths must be inserted in lexograpic order");
if (order === 0)
return this;
let commonPrefix = 0;
for (; commonPrefix < path.length && commonPrefix < this._previous.length; ++commonPrefix)
if (path[commonPrefix] !== this._previous[commonPrefix])
break
this._minimize(commonPrefix)
let node = this._uncheckedNodes.length === 0 ? this._root : this._uncheckedNodes[this._uncheckedNodes.length - 1][2];
for (let letter of slice.call(path, commonPrefix)) {
let nextNode = createEmptyNode(this._id++);
node.edges.set(letter, nextNode);
this._uncheckedNodes.push([node, letter, nextNode]);
node = nextNode;
}
node.final = true;
this._previous = path;
++this._count;
return this;
}
_findNode(path) {
let node = this._root;
let i = 0;
for (let x of path) {
if (!node)
return [null, 0];
node = node.edges.get(x);
++i;
}
return [node, i];
}
/**
* Find the length of the longest match for `path` in the dawg.
*
* @returns a number in `[0, path.length]` indicating the longest match.
*
* @see match For actually getting the longest match
*/
longest(path) {
let best = 0;
let node = this._root;
if (node) {
let i = 0;
for (let x of path) {
node = node.edges.get(x);
if (!node)
break;
++i;
if (node.final)
best = i;
}
}
return best;
}
/**
* Does an exact entry for `path` exists in the DAWG?
*/
has(path) {
return this.longest(path) === path.length;
}
/**
* Return the path for the longest match in the dawg.
*
* @see longest
*/
match(path, joiner = '') {
return slice.call(path, 0, this.longest(path)).join('');
}
/**
* Mark the current DAWG as complete.
*
* Prevents further modifications.
*/
finalize() {
if (!this._finalized) {
this._minimize(0);
this._minimizedNodes = null;
this._uncheckedNodes = null;
this._finalized = true;
}
return this;
}
/**
* Get an iterator to all paths in the dawg.
*
* Yields arrays of raw path elements, not the joined strings.
*/
paths() {
return new Iterator(this._root, [], (acc, current) => acc.concat(current));
}
/**
* Get an iterator to all values in the dawg.
*
* @param joiner String or function used to join paths together into strings.
*/
values(joiner = '') {
return new Iterator(this._root, null, join(joiner));
}
/**
* Get an iterator to all values starting with `path` in the dawg.
*
* @see values
*/
valuesStartingWith(path, joiner = '') {
const [root, index] = this._findNode(path);
const joinFn = join(joiner);
return new Iterator(root, slice.call(path, 0, index).reduce(joinFn, null), joinFn);
}
_minimize(downTo) {
for (let i = 0, len = this._uncheckedNodes.length - downTo; i < len; ++i) {
const [parent, letter, child] = this._uncheckedNodes.pop();
const key = encode(child);
const existing = this._minimizedNodes[key];
if (existing) {
parent.edges.set(letter, existing);
} else {
this._minimizedNodes[key] = child;
}
}
}
}
Dawg.prototype[Symbol.iterator] = Dawg.prototype.values;
/**
* Create a finalized DAWG from an iterable.
*
* @see new Dawg()
*/
export const from = Dawg.from = (paths, options) =>
new Dawg(paths, options).finalize();