bst-typed
Version:
Binary Search Tree
695 lines (612 loc) • 20 kB
text/typescript
/**
* data-structure-typed
*
* @author Pablo Zeng
* @copyright Copyright (c) 2022 Pablo Zeng <zrwusa@gmail.com>
* @license MIT License
*/
import type { ElementCallback, TrieOptions } from '../../types';
import { IterableElementBase } from '../base';
/**
* Node used by Trie to store one character and its children.
* @remarks Time O(1), Space O(1)
*/
export class TrieNode {
/**
* Create a Trie node with a character key.
* @remarks Time O(1), Space O(1)
* @returns New TrieNode instance.
*/
constructor(key: string) {
this._key = key;
this._isEnd = false;
this._children = new Map<string, TrieNode>();
}
protected _key: string;
/**
* Get the character key of this node.
* @remarks Time O(1), Space O(1)
* @returns Character key string.
*/
get key(): string {
return this._key;
}
/**
* Set the character key of this node.
* @remarks Time O(1), Space O(1)
* @param value - New character key.
* @returns void
*/
set key(value: string) {
this._key = value;
}
protected _children: Map<string, TrieNode>;
/**
* Get the child map of this node.
* @remarks Time O(1), Space O(1)
* @returns Map from character to child node.
*/
get children(): Map<string, TrieNode> {
return this._children;
}
/**
* Replace the child map of this node.
* @remarks Time O(1), Space O(1)
* @param value - New map of character → node.
* @returns void
*/
set children(value: Map<string, TrieNode>) {
this._children = value;
}
protected _isEnd: boolean;
/**
* Check whether this node marks the end of a word.
* @remarks Time O(1), Space O(1)
* @returns True if this node ends a word.
*/
get isEnd(): boolean {
return this._isEnd;
}
/**
* Mark this node as the end of a word or not.
* @remarks Time O(1), Space O(1)
* @param value - Whether this node ends a word.
* @returns void
*/
set isEnd(value: boolean) {
this._isEnd = value;
}
}
/**
* Prefix tree (Trie) for fast prefix queries and word storage.
* @remarks Time O(1), Space O(1)
* @template R
* 1. Node Structure: Each node in a Trie represents a string (or a part of a string). The root node typically represents an empty string.
* 2. Child Node Relationship: Each node's children represent the strings that can be formed by adding one character to the string at the current node. For example, if a node represents the string 'ca', one of its children might represent 'cat'.
* 3. Fast Retrieval: Trie allows retrieval in O(m) time complexity, where m is the length of the string to be searched.
* 4. Space Efficiency: Trie can store a large number of strings very space-efficiently, especially when these strings share common prefixes.
* 5. Autocomplete and Prediction: Trie can be used for implementing autocomplete and word prediction features, as it can quickly find all strings with a common prefix.
* 6. Sorting: Trie can be used to sort a set of strings in alphabetical order.
* 7. String Retrieval: For example, searching for a specific string in a large set of strings.
* 8. Autocomplete: Providing recommended words or phrases as a user types.
* 9. Spell Check: Checking the spelling of words.
* 10. IP Routing: Used in certain types of IP routing algorithms.
* 11. Text Word Frequency Count: Counting and storing the frequency of words in a large amount of text data.
* @example
* // Autocomplete: Prefix validation and checking
* const autocomplete = new Trie<string>(['gmail.com', 'gmail.co.nz', 'gmail.co.jp', 'yahoo.com', 'outlook.com']);
*
* // Get all completions for a prefix
* const gmailCompletions = autocomplete.getWords('gmail');
* console.log(gmailCompletions); // ['gmail.com', 'gmail.co.nz', 'gmail.co.jp']
* @example
* // File System Path Operations
* const fileSystem = new Trie<string>([
* '/home/user/documents/file1.txt',
* '/home/user/documents/file2.txt',
* '/home/user/pictures/photo.jpg',
* '/home/user/pictures/vacation/',
* '/home/user/downloads'
* ]);
*
* // Find common directory prefix
* console.log(fileSystem.getLongestCommonPrefix()); // '/home/user/'
*
* // List all files in a directory
* const documentsFiles = fileSystem.getWords('/home/user/documents/');
* console.log(documentsFiles); // ['/home/user/documents/file1.txt', '/home/user/documents/file2.txt']
* @example
* // Autocomplete: Basic word suggestions
* // Create a trie for autocomplete
* const autocomplete = new Trie<string>([
* 'function',
* 'functional',
* 'functions',
* 'class',
* 'classes',
* 'classical',
* 'closure',
* 'const',
* 'constructor'
* ]);
*
* // Test autocomplete with different prefixes
* console.log(autocomplete.getWords('fun')); // ['functional', 'functions', 'function']
* console.log(autocomplete.getWords('cla')); // ['classes', 'classical', 'class']
* console.log(autocomplete.getWords('con')); // ['constructor', 'const']
*
* // Test with non-matching prefix
* console.log(autocomplete.getWords('xyz')); // []
* @example
* // Dictionary: Case-insensitive word lookup
* // Create a case-insensitive dictionary
* const dictionary = new Trie<string>([], { caseSensitive: false });
*
* // Add words with mixed casing
* dictionary.add('Hello');
* dictionary.add('WORLD');
* dictionary.add('JavaScript');
*
* // Test lookups with different casings
* console.log(dictionary.has('hello')); // true
* console.log(dictionary.has('HELLO')); // true
* console.log(dictionary.has('Hello')); // true
* console.log(dictionary.has('javascript')); // true
* console.log(dictionary.has('JAVASCRIPT')); // true
* @example
* // IP Address Routing Table
* // Add IP address prefixes and their corresponding routes
* const routes = {
* '192.168.1': 'LAN_SUBNET_1',
* '192.168.2': 'LAN_SUBNET_2',
* '10.0.0': 'PRIVATE_NETWORK_1',
* '10.0.1': 'PRIVATE_NETWORK_2'
* };
*
* const ipRoutingTable = new Trie<string>(Object.keys(routes));
*
* // Check IP address prefix matching
* console.log(ipRoutingTable.hasPrefix('192.168.1')); // true
* console.log(ipRoutingTable.hasPrefix('192.168.2')); // true
*
* // Validate IP address belongs to subnet
* const ip = '192.168.1.100';
* const subnet = ip.split('.').slice(0, 3).join('.');
* console.log(ipRoutingTable.hasPrefix(subnet)); // true
*/
export class Trie<R = any> extends IterableElementBase<string, R> {
/**
* Create a Trie and optionally bulk-insert words.
* @remarks Time O(totalChars), Space O(totalChars)
* @param [words] - Iterable of strings (or raw records if toElementFn is provided).
* @param [options] - Options such as toElementFn and caseSensitive.
* @returns New Trie instance.
*/
constructor(words: Iterable<string> | Iterable<R> = [], options?: TrieOptions<R>) {
super(options);
if (options) {
const { caseSensitive } = options;
if (caseSensitive !== undefined) this._caseSensitive = caseSensitive;
}
if (words) {
this.addMany(words);
}
}
protected _size: number = 0;
/**
* Get the number of stored words.
* @remarks Time O(1), Space O(1)
* @returns Word count.
*/
get size(): number {
return this._size;
}
protected _caseSensitive: boolean = true;
/**
* Get whether comparisons are case-sensitive.
* @remarks Time O(1), Space O(1)
* @returns True if case-sensitive.
*/
get caseSensitive(): boolean {
return this._caseSensitive;
}
protected _root: TrieNode = new TrieNode('');
/**
* Get the root node.
* @remarks Time O(1), Space O(1)
* @returns Root TrieNode.
*/
get root() {
return this._root;
}
/**
* (Protected) Get total count for base class iteration.
* @remarks Time O(1), Space O(1)
* @returns Total number of elements.
*/
protected get _total() {
return this._size;
}
/**
* Insert one word into the trie.
* @remarks Time O(L), Space O(L)
* @param word - Word to insert.
* @returns True if the word was newly added.
*/
add(word: string): boolean {
word = this._caseProcess(word);
let cur = this.root;
let isNewWord = false;
for (const c of word) {
let nodeC = cur.children.get(c);
if (!nodeC) {
nodeC = new TrieNode(c);
cur.children.set(c, nodeC);
}
cur = nodeC;
}
if (!cur.isEnd) {
isNewWord = true;
cur.isEnd = true;
this._size++;
}
return isNewWord;
}
/**
* Insert many words from an iterable.
* @remarks Time O(ΣL), Space O(ΣL)
* @param words - Iterable of strings (or raw records if toElementFn is provided).
* @returns Array of per-word 'added' flags.
*/
addMany(words: Iterable<string> | Iterable<R>): boolean[] {
const ans: boolean[] = [];
for (const word of words) {
if (this.toElementFn) {
ans.push(this.add(this.toElementFn(word as R)));
} else {
ans.push(this.add(word as string));
}
}
return ans;
}
/**
* Check whether a word exists.
* @remarks Time O(L), Space O(1)
* @param word - Word to search for.
* @returns True if present.
*/
override has(word: string): boolean {
word = this._caseProcess(word);
let cur = this.root;
for (const c of word) {
const nodeC = cur.children.get(c);
if (!nodeC) return false;
cur = nodeC;
}
return cur.isEnd;
}
/**
* Check whether the trie is empty.
* @remarks Time O(1), Space O(1)
* @returns True if size is 0.
*/
isEmpty(): boolean {
return this._size === 0;
}
/**
* Remove all words and reset to a fresh root.
* @remarks Time O(1), Space O(1)
* @returns void
*/
clear(): void {
this._size = 0;
this._root = new TrieNode('');
}
/**
* Delete one word if present.
* @remarks Time O(L), Space O(1)
* @param word - Word to delete.
* @returns True if a word was removed.
*/
delete(word: string): boolean {
word = this._caseProcess(word);
let isDeleted = false;
const dfs = (cur: TrieNode, i: number): boolean => {
const char = word[i];
const child = cur.children.get(char);
if (child) {
if (i === word.length - 1) {
if (child.isEnd) {
if (child.children.size > 0) {
child.isEnd = false;
} else {
cur.children.delete(char);
}
isDeleted = true;
return true;
}
return false;
}
const res = dfs(child, i + 1);
if (res && !cur.isEnd && child.children.size === 0) {
cur.children.delete(char);
return true;
}
return false;
}
return false;
};
dfs(this.root, 0);
if (isDeleted) {
this._size--;
}
return isDeleted;
}
/**
* Compute the height (max depth) of the trie.
* @remarks Time O(N), Space O(H)
* @returns Maximum depth from root to a leaf.
*/
getHeight(): number {
const startNode = this.root;
let maxDepth = 0;
if (startNode) {
const bfs = (node: TrieNode, level: number) => {
if (level > maxDepth) {
maxDepth = level;
}
const { children } = node;
if (children) {
for (const child of children.entries()) {
bfs(child[1], level + 1);
}
}
};
bfs(startNode, 0);
}
return maxDepth;
}
/**
* Check whether input is a proper prefix of at least one word.
* @remarks Time O(L), Space O(1)
* @param input - String to test as prefix.
* @returns True if input is a prefix but not a full word.
*/
hasPurePrefix(input: string): boolean {
input = this._caseProcess(input);
let cur = this.root;
for (const c of input) {
const nodeC = cur.children.get(c);
if (!nodeC) return false;
cur = nodeC;
}
return !cur.isEnd;
}
/**
* Check whether any word starts with input.
* @remarks Time O(L), Space O(1)
* @param input - String to test as prefix.
* @returns True if input matches a path from root.
*/
hasPrefix(input: string): boolean {
input = this._caseProcess(input);
let cur = this.root;
for (const c of input) {
const nodeC = cur.children.get(c);
if (!nodeC) return false;
cur = nodeC;
}
return true;
}
/**
* Check whether the trie’s longest common prefix equals input.
* @remarks Time O(min(H,L)), Space O(1)
* @param input - Candidate longest common prefix.
* @returns True if input equals the common prefix.
*/
hasCommonPrefix(input: string): boolean {
input = this._caseProcess(input);
let commonPre = '';
const dfs = (cur: TrieNode) => {
commonPre += cur.key;
if (commonPre === input) return;
if (cur.isEnd) return;
if (cur && cur.children && cur.children.size === 1) dfs(Array.from(cur.children.values())[0]);
else return;
};
dfs(this.root);
return commonPre === input;
}
/**
* Return the longest common prefix among all words.
* @remarks Time O(H), Space O(1)
* @returns The longest common prefix string.
*/
getLongestCommonPrefix(): string {
let commonPre = '';
const dfs = (cur: TrieNode) => {
commonPre += cur.key;
if (cur.isEnd) return;
if (cur && cur.children && cur.children.size === 1) dfs(Array.from(cur.children.values())[0]);
else return;
};
dfs(this.root);
return commonPre;
}
/**
* Collect words under a prefix up to a maximum count.
* @remarks Time O(K·L), Space O(K·L)
* @param [prefix] - Prefix to match; default empty string for root.
* @param [max] - Maximum number of words to return; default is Number.MAX_SAFE_INTEGER.
* @param [isAllWhenEmptyPrefix] - When true, collect from root even if prefix is empty.
* @returns Array of collected words (at most max).
*/
getWords(prefix = '', max = Number.MAX_SAFE_INTEGER, isAllWhenEmptyPrefix = false): string[] {
prefix = this._caseProcess(prefix);
const words: string[] = [];
let found = 0;
function dfs(node: TrieNode, word: string) {
for (const char of node.children.keys()) {
const charNode = node.children.get(char);
if (charNode !== undefined) {
dfs(charNode, word.concat(char));
}
}
if (node.isEnd) {
if (found > max - 1) return;
words.push(word);
found++;
}
}
let startNode = this.root;
if (prefix) {
for (const c of prefix) {
const nodeC = startNode.children.get(c);
if (nodeC) {
startNode = nodeC;
} else {
return [];
}
}
}
if (isAllWhenEmptyPrefix || startNode !== this.root) dfs(startNode, prefix);
return words;
}
/**
* Deep clone this trie by iterating and inserting all words.
* @remarks Time O(ΣL), Space O(ΣL)
* @returns A new trie with the same words and options.
*/
clone(): this {
const next = this._createInstance();
for (const x of this) next.add(x);
return next;
}
/**
* Filter words into a new trie of the same class.
* @remarks Time O(ΣL), Space O(ΣL)
* @param predicate - Predicate (word, index, trie) → boolean to keep word.
* @param [thisArg] - Value for `this` inside the predicate.
* @returns A new trie containing words that satisfy the predicate.
*/
filter(predicate: ElementCallback<string, R, boolean>, thisArg?: any): this {
const results = this._createInstance();
let index = 0;
for (const word of this) {
if (predicate.call(thisArg, word, index, this)) {
results.add(word);
}
index++;
}
return results;
}
map<RM>(callback: ElementCallback<string, R, string>, options?: TrieOptions<RM>, thisArg?: any): Trie<RM>;
/**
* Map words into a new trie (possibly different record type).
* @remarks Time O(ΣL), Space O(ΣL)
* @template EM
* @template RM
* @param callback - Mapping function (word, index, trie) → newWord (string).
* @param [options] - Options for the output trie (e.g., toElementFn, caseSensitive).
* @param [thisArg] - Value for `this` inside the callback.
* @returns A new Trie constructed from mapped words.
*/
map<EM, RM>(
callback: ElementCallback<string, R, EM>,
options?: TrieOptions<RM>,
thisArg?: any
): IterableElementBase<EM, RM>;
map<EM, RM>(callback: ElementCallback<string, R, EM>, options?: TrieOptions<RM>, thisArg?: any): any {
const newTrie = this._createLike<RM>([], options);
let i = 0;
for (const x of this) {
const v = thisArg === undefined ? callback(x, i++, this) : callback.call(thisArg, x, i++, this);
if (typeof v !== 'string') {
throw new TypeError(`Trie.map callback must return string; got ${typeof v}`);
}
newTrie.add(v);
}
return newTrie;
}
/**
* Map words into a new trie of the same element type.
* @remarks Time O(ΣL), Space O(ΣL)
* @param callback - Mapping function (word, index, trie) → string.
* @param [thisArg] - Value for `this` inside the callback.
* @returns A new trie with mapped words.
*/
mapSame(callback: ElementCallback<string, R, string>, thisArg?: any): this {
const next = this._createInstance();
let i = 0;
for (const key of this) {
const mapped = thisArg === undefined ? callback(key, i++, this) : callback.call(thisArg, key, i++, this);
next.add(mapped);
}
return next;
}
/**
* (Protected) Create an empty instance of the same concrete class.
* @remarks Time O(1), Space O(1)
* @param [options] - Options forwarded to the constructor.
* @returns An empty like-kind trie instance.
*/
protected _createInstance(options?: TrieOptions<R>): this {
const Ctor: any = this.constructor;
const next: any = new Ctor([], {
toElementFn: this.toElementFn,
caseSensitive: this.caseSensitive,
...(options ?? {})
});
return next as this;
}
/**
* (Protected) Create a like-kind trie and seed it from an iterable.
* @remarks Time O(ΣL), Space O(ΣL)
* @template RM
* @param [elements] - Iterable used to seed the new trie.
* @param [options] - Options forwarded to the constructor.
* @returns A like-kind Trie instance.
*/
protected _createLike<RM>(elements: Iterable<string> | Iterable<RM> = [], options?: TrieOptions<RM>): Trie<RM> {
const Ctor: any = this.constructor;
return new Ctor(elements, options) as Trie<RM>;
}
/**
* (Protected) Spawn an empty like-kind trie instance.
* @remarks Time O(1), Space O(1)
* @template RM
* @param [options] - Options forwarded to the constructor.
* @returns An empty like-kind Trie instance.
*/
protected _spawnLike<RM>(options?: TrieOptions<RM>): Trie<RM> {
return this._createLike<RM>([], options);
}
/**
* (Protected) Iterate all words in lexicographic order of edges.
* @remarks Time O(ΣL), Space O(H)
* @returns Iterator of words.
*/
protected *_getIterator(): IterableIterator<string> {
function* _dfs(node: TrieNode, path: string): IterableIterator<string> {
if (node.isEnd) {
yield path;
}
for (const [char, childNode] of node.children) {
yield* _dfs(childNode, path + char);
}
}
yield* _dfs(this.root, '');
}
/**
* (Protected) Normalize a string according to case sensitivity.
* @remarks Time O(L), Space O(L)
* @param str - Input string to normalize.
* @returns Normalized string based on caseSensitive.
*/
protected _caseProcess(str: string) {
if (!this._caseSensitive) {
str = str.toLowerCase();
}
return str;
}
}