UNPKG

@huggingface/transformers

Version:

State-of-the-art Machine Learning for the web. Run 🤗 Transformers directly in your browser, with no need for a server!

262 lines • 9.19 kB
/** * @file Custom data structures. * * These are only used internally, meaning an end-user shouldn't * need to access anything here. * * @module utils/data-structures */ /** * Efficient Heap-based Implementation of a Priority Queue. * It uses an array-based binary heap, where the root is at index `0`, and the * children of node `i` are located at indices `2i + 1` and `2i + 2`, respectively. * * Adapted from the following sources: * - https://stackoverflow.com/a/42919752/13989043 (original) * - https://github.com/belladoreai/llama-tokenizer-js (minor improvements) */ export class PriorityQueue { /** * Create a new PriorityQueue. * @param {function(any, any): boolean} comparator Comparator function to determine priority. Defaults to a MaxHeap. */ constructor(comparator?: (arg0: any, arg1: any) => boolean, maxSize?: number); _heap: any[]; _comparator: (arg0: any, arg1: any) => boolean; _maxSize: number; /** * The size of the queue */ get size(): number; /** * Check if the queue is empty. * @returns {boolean} `true` if the queue is empty, `false` otherwise. */ isEmpty(): boolean; /** * Return the element with the highest priority in the queue. * @returns {any} The highest priority element in the queue. */ peek(): any; /** * Add one or more elements to the queue. * @param {...any} values The values to push into the queue. * @returns {number} The new size of the queue. */ push(...values: any[]): number; /** * Add multiple elements to the queue. * @param {any[]} values The values to push into the queue. * @returns {number} The new size of the queue. */ extend(values: any[]): number; /** * Remove and return the element with the highest priority in the queue. * @returns {any} The element with the highest priority in the queue. */ pop(): any; /** * Replace the element with the highest priority in the queue with a new value. * @param {*} value The new value. * @returns {*} The replaced value. */ replace(value: any): any; /** * Compute the index for the parent of the node at index `i`. * @param {number} i The index of the node to get the parent of. * @returns {number} The index of the parent node. * @private */ private _parent; /** * Compute the index for the left child of the node at index `i`. * @param {number} i The index of the node to get the left child of. * @returns {number} The index of the left child. * @private */ private _left; /** * Compute the index for the right child of the node at index `i`. * @param {number} i The index of the node to get the right child of. * @returns {number} The index of the right child. * @private */ private _right; /** * Check if the element at index `i` is greater than the element at index `j`. * @param {number} i The index of the first element to compare. * @param {number} j The index of the second element to compare. * @returns {boolean} `true` if the element at index `i` is greater than the element at index `j`, `false` otherwise. * @private */ private _greater; /** * Swap the elements at indices `i` and `j`. * @param {number} i The index of the first element to swap. * @param {number} j The index of the second element to swap. * @private */ private _swap; /** * Maintain the heap property by updating positions in the heap, * starting at the last element and moving up the heap. * @private */ private _siftUp; /** * Helper function to sift up from a given node. * @param {number} node The index of the node to start sifting up from. */ _siftUpFrom(node: number): void; /** * Maintain the heap property by updating positions in the heap, * starting at the first element and moving down the heap. * @private */ private _siftDown; /** * Get the index of the smallest element in the heap. Since we use an array-based heap, * the index can be computed without needing to traverse the heap. * @private */ private _smallest; } /** * A trie structure to efficiently store and search for strings. */ export class CharTrie { root: CharTrieNode; /** * Adds one or more `texts` to the trie. * @param {string[]} texts The strings to add to the trie. */ extend(texts: string[]): void; /** * Adds text to the trie. * @param {string} text The string to add to the trie. */ push(text: string): void; /** * Searches the trie for all strings with a common prefix of `text`. * @param {string} text The common prefix to search for. * @yields {string} Each string in the trie that has `text` as a prefix. */ commonPrefixSearch(text: string): Generator<string, void, unknown>; } /** * A lattice data structure to be used for tokenization. */ export class TokenLattice { /** * Creates a new TokenLattice instance. * * @param {string} sentence The input sentence to be tokenized. * @param {number} bosTokenId The beginning-of-sequence token ID. * @param {number} eosTokenId The end-of-sequence token ID. */ constructor(sentence: string, bosTokenId: number, eosTokenId: number); chars: string[]; len: number; bosTokenId: number; eosTokenId: number; nodes: TokenLatticeNode[]; beginNodes: any[][]; endNodes: any[][]; /** * Inserts a new token node into the token lattice. * * @param {number} pos The starting position of the token. * @param {number} length The length of the token. * @param {number} score The score of the token. * @param {number} tokenId The token ID of the token. */ insert(pos: number, length: number, score: number, tokenId: number): void; /** * Implements the Viterbi algorithm to compute the most likely sequence of tokens. * * @returns {TokenLatticeNode[]} The most likely sequence of tokens. */ viterbi(): TokenLatticeNode[]; /** * @param {TokenLatticeNode} node * @returns {string} The array of nodes representing the most likely sequence of tokens. */ piece(node: TokenLatticeNode): string; /** * @returns {string[]} The most likely sequence of tokens. */ tokens(): string[]; /** * @returns {number[]} The most likely sequence of token ids. */ tokenIds(): number[]; } /** * A data structure which uses a trie to split a string into tokens based on a dictionary. * It can also use a regular expression to preprocess the input text before splitting. * * NOTE: To ensure multi-byte characters are handled correctly, we operate at byte-level instead of character-level. */ export class DictionarySplitter { /** * @param {string[]} dictionary The dictionary of words to use for splitting. */ constructor(dictionary: string[]); trie: any; /** * Builds a trie from the given dictionary. * @param {string[]} dictionary The dictionary of words to build the trie from. * @returns {Object} The root node of the trie. * @private */ private _buildTrie; /** * Splits the input text into tokens based on the dictionary. * @param {string} text The input text to split. * @returns {string[]} An array of tokens. */ split(text: string): string[]; } /** * Represents a node in a character trie. */ declare class CharTrieNode { /** * Returns a new `CharTrieNode` instance with default values. * @returns {CharTrieNode} A new `CharTrieNode` instance with `isLeaf` set to `false` and an empty `children` map. */ static default(): CharTrieNode; /** * Create a new CharTrieNode. * @param {boolean} isLeaf Whether the node is a leaf node or not. * @param {Map<string, CharTrieNode>} children A map containing the node's children, where the key is a character and the value is a `CharTrieNode`. */ constructor(isLeaf: boolean, children: Map<string, CharTrieNode>); isLeaf: boolean; children: Map<string, CharTrieNode>; } declare class TokenLatticeNode { /** * Represents a node in a token lattice for a given sentence. * @param {number} tokenId The ID of the token associated with this node. * @param {number} nodeId The ID of this node. * @param {number} pos The starting position of the token in the sentence. * @param {number} length The length of the token. * @param {number} score The score associated with the token. */ constructor(tokenId: number, nodeId: number, pos: number, length: number, score: number); tokenId: number; nodeId: number; pos: number; length: number; score: number; prev: any; backtraceScore: number; /** * Returns a clone of this node. * @returns {TokenLatticeNode} A clone of this node. */ clone(): TokenLatticeNode; } export {}; //# sourceMappingURL=data-structures.d.ts.map