UNPKG

finitedomain

Version:

A fast feature rich finite domain solver

558 lines (494 loc) 17.6 kB
// NOTE: THIS IS NOT A GENERIC TRIE IMPLEMENTATION // It's specifically geared towards the use within finitedomain // Input strings are assumed to be limited to ascii 32-132 import { ASSERT, THROW, } from './helpers'; // BODY_START const TRIE_ROOT_OFFSET = 0; const TRIE_BUCKET_COUNT = 10; // 10 digits const TRIE_NODE_SIZE = TRIE_BUCKET_COUNT + 1; // inc value const TRIE_INITIAL_SIZE = 16 * 1024; const TRIE_MINIMAL_GROWTH = 4 * 1024; const TRIE_KEY_NOT_FOUND = -1; const TRIE_EMPTY = undefined; const TRIE_DEFAULT_SIZE = undefined; const TRIE_8_BIT = 8; const TRIE_16_BIT = 16; const TRIE_32_BIT = 32; const TRIE_64_BIT = 64; const TRIE_DEFAULT_BITS = undefined; // every trie node needs space for 10 jumps + 1 leaf value (must be capable of containing `size(Trie)-1`) so initially 11 bytes, later 12 bytes and then 22 bytes once the number of nodes exceeds 255 /** * Create a new trie and, optionally, initialize it * with given values as keys and their index as value. * Check `trie_add` for assumed key composition restrictions * * @param {string[]} [valuesByIndex] If exists, adds all values in array as keys, index as values * @param {number} [initialLength] Hint to help control memory consumption for large/small tries. This length is in cells, not bytes. (byteLength=length*(bitsize/8)) * @param {number} [initialBitsize] Hint to set bitsize explicitly. One of: 8 16 32 64 * @returns {$trie} */ function trie_create(valuesByIndex, initialLength, initialBitsize) { let size = (initialLength | 0) || TRIE_INITIAL_SIZE; if (!size) THROW('fixme'); // blabla it's possible the constant is not yet initialized due to minification. dont initialize a trie in module global space let bits = Math.max(trie_getValueBitsize(size), (initialBitsize | 0)); // given bitsize might be lower than max address, ignore it in that case let buf = trie_createBuffer(size, bits); // have to use a wrapper because the buffer ref may change when it grows // otherwise we could just store the meta data inside the buffer. but at // least this is easier to read :) let trie = { _class: '$trie', buffer: buf, bits: bits, // 8 16 32 (64?) lastNode: TRIE_ROOT_OFFSET, // pointer to last node in the buffer count: 0, // number of keys in the Trie // __REMOVE_BELOW_FOR_DIST__ // debug stats... any use should be wrapped in ASSERT so that it's use gets removed in a dist _mallocs: '' + buf.length, // malloc steps in a string _adds: 0, // number of trie_add calls _addSteps: 0, // sum of steps taken in all trie_add calls _hass: 0, // number of trie_has calls _gets: 0, // number of trie_get calls (and also contains has) _getSteps: 0, // sum of steps for all gets on this trie // __REMOVE_ABOVE_FOR_DIST__ }; if (valuesByIndex) { for (let i = 0, n = valuesByIndex.length; i < n; ++i) { trie_add(trie, valuesByIndex[i], i); } } return trie; } /** * Create a buffer * * @param {number} size Length of the buffer in cells, not bytes (!) * @param {number} bits One of: 8 16 32 64 * @returns {TypedArray} */ function trie_createBuffer(size, bits) { switch (bits) { case TRIE_8_BIT: return new Uint8Array(size); case 16: return new Uint16Array(size); case TRIE_32_BIT: return new Uint32Array(size); case TRIE_64_BIT: return new Float64Array(size); // let's hope not ;) } THROW('Unsupported bit size'); } /** * Reserve a part of the Trie memory to represent a node in the Trie. * * In this particular implementation nodes are of fixed width. It's * a field of 10 address cells and one value cell. * * Address cells point to other nodes. If zero, there is none (because * that would be the root node) and a search ends in not found. * * Value cells that are zero (default) are also "not found". * * @returns {Uint16Array} */ function trie_addNode(trie) { let newNodePtr = trie.lastNode + TRIE_NODE_SIZE; trie.lastNode = newNodePtr; // technically the `while` is valid (instead of an `if`) but only // if the buffer could grow by a smaller amount than the node size... // note: buffer.length is cell size, buffer.byteLength is byte size. we want cells here. while (newNodePtr + TRIE_NODE_SIZE >= trie.buffer.length) trie_grow(trie); return newNodePtr; } /** * Allocate more size for this Trie * * Basically creates a new buffer with a larger size and then copies * the current buffer into it. If the new size exceeds the max size * of the current type (16bit/32bit) then the buffer is converted to * a bigger bit size automagically. * The trie buffer reference will be updated with the new buffer * * @param {$trie} trie */ function trie_grow(trie) { let len = trie.buffer.length; // cell size! not byte size. let newSize = ~~(len * 1.1); // grow by 10% (an arbitrary number) if (len + TRIE_MINIMAL_GROWTH > newSize) newSize = TRIE_MINIMAL_GROWTH + len; trie_malloc(trie, newSize); } /** * Allocate space for a Trie and copy given Trie to it. * Will grow bitsize if required, but never shrink it. * (Bitsize must grow if cell size exceeds certain threshholds * because otherwise we can't address all bytes in the buffer) * * @param {$trie} trie * @param {number} size Cell size, not byte size */ function trie_malloc(trie, size) { // make sure addressing fits let newBits = trie_getValueBitsize(size); // dont shrink bit size even if length would allow it; "large" _values_ may require it // (our tries dont need to shrink) trie.bits = Math.max(trie.bits, newBits); let nbuf = trie_createBuffer(size, trie.bits); nbuf.set(trie.buffer, 0); ASSERT(trie._mallocs += ' ' + nbuf.length); trie.buffer = nbuf; } /** * Return the cell width in bits to fit given value. * For example, numbers below 256 can be represented in * 8 bits but numbers above it will need at least 16 bits. * Max is 64 but you can't pass on larger numbers in JS, anyways :) * * @param {number} value * @returns {number} */ function trie_getValueBitsize(value) { if (value < 0x100) return TRIE_8_BIT; else if (value < 0x10000) return TRIE_16_BIT; else if (value < 0x100000000) return TRIE_32_BIT; else return TRIE_64_BIT; } /** * Add a key/value pair * * Note: keys and values are of limited structure * * The key must be a string of ascii in range of 32-131. * This key is hashed by turning each character into its * ascii ordinal value, stringifying it padded with zero, * and hashing each of the two resulting digits. This way * we can guarantee that each node in the Trie only * requires 10 places (one for each digit) plus a value. * That makes reads super fast. * * @param {$trie} trie * @param {string} key * @param {number} value Any unsigned 32bit-1 value * @returns {number} previous value, or -1 if there wasn't any */ function trie_add(trie, key, value) { ASSERT(++trie._adds); trie_ensureValueFits(trie, value); return _trie_add(trie, TRIE_ROOT_OFFSET, key, 0, key.length, value); } /** * Recursively find the place to add the key. If * the trail runs cold, pave it. Clobbers existing * values (though in our implementation that current * shouldn't really happen...) * * @param {$trie} trie * @param {number} offset * @param {string} key * @param {number} index Current index of the key being walked * @param {number} len Cache of key.length * @param {number} value Any unsigned 32bit-1 value * @returns {number} the old value, or not found */ function _trie_add(trie, offset, key, index, len, value) { ASSERT(++trie._addSteps); ASSERT(offset >= 0, 'OFFSET_UNSIGNED'); ASSERT(typeof key === 'string', 'STRING_KEY'); ASSERT(index >= 0, 'INDEX_UNSIGNED'); ASSERT(key.length === len, 'KEY_LEN'); ASSERT(value >= 0, 'VALUE_UNSIGNED'); // dont create next path part if it would create a leaf node if (index >= len) { let buf = trie.buffer; let valuePtr = offset + TRIE_BUCKET_COUNT; let curValue = trie.buffer[valuePtr]; if (!curValue) ++trie.count; buf[valuePtr] = value + 1; // 0 is reserved to mean "unused" return curValue - 1; } let c = key.charCodeAt(index) - 32; // allow all asciis 31 < c < 130 encoded as stringified double digits offset = _trie_pavePath(trie, offset, c % 10); offset = _trie_pavePath(trie, offset, Math.floor(c / 10)); return _trie_add(trie, offset, key, index + 1, len, value); } /** * Add a key/value pair * * This adds a value under a key that is a number. This * way reads and writes take `ceil(log(n)/log(10))` steps. * Eg. as many steps as digits in the decimal number. * * @param {$trie} trie * @param {number} key Assumes an unsigned int * @param {number} value Any unsigned 32bit-1 value * @returns {number} previous value, or -1 if there wasn't any */ function trie_addNum(trie, key, value) { ASSERT(++trie._adds); trie_ensureValueFits(trie, value); return _trie_addNum(trie, TRIE_ROOT_OFFSET, key + 1, value); } /** * Recursively find the place to add the key. If * the trail runs cold, pave it. Clobbers existing * values (though in our implementation that current * shouldn't really happen...) * * @param {$trie} trie * @param {number} offset * @param {number} key Assumes an unsigned int >0 * @param {number} value Any unsigned 32bit-1 value * @returns {number} the old value, or not found */ function _trie_addNum(trie, offset, key, value) { ASSERT(++trie._addSteps); ASSERT(offset >= 0, 'OFFSET_UNSIGNED'); ASSERT(typeof key === 'number', 'NUMBER_KEY'); ASSERT(value >= 0, 'VALUE_UNSIGNED'); if (key === 0) { let buf = trie.buffer; let valuePtr = offset + TRIE_BUCKET_COUNT; let curValue = trie.buffer[valuePtr]; if (!curValue) ++trie.count; buf[valuePtr] = value + 1; // 0 is reserved to mean "unused" return curValue - 1; } offset = _trie_pavePath(trie, offset, key % 10); key = Math.floor(key / 10); return _trie_addNum(trie, offset, key, value); } /** * Make sure the Trie can hold a value of given manitude. * If the current bitsize of the trie is too small it will * grow the buffer to accomodate the larger size. * * @param {$trie} trie * @param {number} value */ function trie_ensureValueFits(trie, value) { let bitsNeeded = trie_getValueBitsize(value); if (bitsNeeded > trie.bits) { trie.bits = bitsNeeded; trie_malloc(trie, trie.buffer.length); // note: length = cell size, byteLength = byte size. we mean cell here. } } /** * One step of writing a value. Offset should be a node, if * the digit has no address yet create it. If a node needs * to be created the buffer may be grown to fit the new node. * It will return the pointer of the (possibly new) next * node for given digit. * * @param {$trie} trie * @param {number} offset Start of a node * @param {number} digit Zero through nine * @returns {number} new address */ function _trie_pavePath(trie, offset, digit) { offset += digit; let ptr = trie.buffer[offset]; if (!ptr) { ptr = trie_addNode(trie); trie.buffer[offset] = ptr; } return ptr; } /** * Find the value for given key. See trie_add for more details. * * @param {$trie} trie * @param {string} key * @returns {number} -1 if not found, >= 0 otherwise */ function trie_get(trie, key) { ASSERT(++trie._gets); return _trie_get(trie, TRIE_ROOT_OFFSET, key, 0, key.length); } /** * Recursive function to search for key * * @param {$trie} trie * @param {number} offset Start of a node * @param {string} key * @param {number} index Current index of the key being walked * @param {number} len Cache of key.length * @returns {number} -1 if not found or >= 0 otherwise */ function _trie_get(trie, offset, key, index, len) { ASSERT(++trie._getSteps); ASSERT(offset >= 0, 'OFFSET_UNSIGNED'); ASSERT(typeof key === 'string', 'STRING_KEY', key); ASSERT(index >= 0, 'INDEX_UNSIGNED'); ASSERT(key.length === len, 'KEY_LEN', key); let buf = trie.buffer; if (index >= len) { let valuePtr = offset + TRIE_BUCKET_COUNT; return buf[valuePtr] - 1; } let c = key.charCodeAt(index) - 32; // allow all asciis 31 < c < 130 encoded as stringified double digits offset = buf[offset + (c % 10)]; if (!offset) return TRIE_KEY_NOT_FOUND; offset = buf[offset + Math.floor(c / 10)]; if (!offset) return TRIE_KEY_NOT_FOUND; return _trie_get(trie, offset, key, index + 1, len); } /** * See trie_get for more details * * @param {$trie} trie * @param {string} key * @returns {boolean} */ function trie_has(trie, key) { ASSERT(++trie._hass); return trie_get(trie, key) !== TRIE_KEY_NOT_FOUND; } /** * Find the value for given number key. * See trie_addNum for more details. * * @param {$trie} trie * @param {number} key Assumed to be an unsigned int >=0 * @returns {number} -1 if not found, >= 0 otherwise */ function trie_getNum(trie, key) { ASSERT(++trie._gets); return _trie_getNum(trie, TRIE_ROOT_OFFSET, key + 1); } /** * Recursive function to search for number key * * @param {$trie} trie * @param {number} offset Start of a node * @param {number} key Assumed to be an unsigned int >=0 * @returns {number} -1 if not found or >= 0 otherwise */ function _trie_getNum(trie, offset, key) { ASSERT(++trie._getSteps); ASSERT(offset >= 0, 'OFFSET_UNSIGNED'); ASSERT(typeof key === 'number', 'NUMBER_KEY'); let buf = trie.buffer; if (key === 0) { let valuePtr = offset + TRIE_BUCKET_COUNT; return buf[valuePtr] - 1; } offset = buf[offset + (key % 10)]; if (!offset) return TRIE_KEY_NOT_FOUND; key = Math.floor(key / 10); return _trie_getNum(trie, offset, key); } /** * See trie_getNum for more details * * @param {$trie} trie * @param {number} key Assumed to be unsigned int >= 0 * @returns {boolean} */ function trie_hasNum(trie, key) { ASSERT(++trie._hass); return trie_getNum(trie, key) !== TRIE_KEY_NOT_FOUND; } /** * Human readable yay. Does not log, only returns a debug string. * * @param {$trie} trie * @param {boolean} [skipBuffer=false] * @returns {string} */ function _trie_debug(trie, skipBuffer) { /* eslint no-extend-native: "off" */ let buf = trie.buffer; let lastNode = trie.lastNode; // patch some es6 stuff for debugging. note: dont do this in prod, it may slow stuff down. if (!String.prototype.padStart) { String.prototype.padStart = function(n, c) { let s = this; if (this.length < n) for (let i = 0; i < n - this.length; ++i) s = c + s; return s; }; } if (!String.prototype.padEnd) { String.prototype.padEnd = function(n, c) { let s = this; if (this.length < n) for (let i = 0; i < n - this.length; ++i) s = s + c; return s; }; } if (!Array.from) { Array.from = function(a) { return [].concat.call(a); }; } // if one doesnt support them, they probably all dont. if (!Uint8Array.prototype.slice) { Uint8Array.prototype.slice = Uint16Array.prototype.slice = Uint32Array.prototype.slice = Float64Array.prototype.slice = Array.prototype.slice; } function bytes(b) { if (b < 1024) return b + ' b'; b /= 1024; if (b < 1024) return ~~(b * 100) / 100 + ' kb'; b /= 1024; if (b < 1024) return ~~(b * 100) / 100 + ' mb'; b /= 1024; return ~~(b * 100) / 100 + ' gb'; } let pad = 20; let npad = 6; let s = '' + '\n' + '###\n' + 'Key count:'.padEnd(pad, ' ') + trie.count + '\n' + 'Node count:'.padEnd(pad, ' ') + ((lastNode / TRIE_NODE_SIZE) + 1) + ' (' + (((lastNode / TRIE_NODE_SIZE) + 1) / trie.count) + ' nodes per key)\n' + 'Buffer cell length:'.padEnd(pad, ' ') + buf.length + '\n' + 'Buffer byte length:'.padEnd(pad, ' ') + buf.byteLength + '\n' + 'Bit size:'.padEnd(pad, ' ') + trie.bits + '\n' + 'Node len:'.padEnd(pad, ' ') + TRIE_NODE_SIZE + '\n' + 'Node size:'.padEnd(pad, ' ') + TRIE_NODE_SIZE + '\n' + 'Last Node:'.padEnd(pad, ' ') + lastNode + '\n' + 'Used space:'.padEnd(pad, ' ') + (lastNode + TRIE_NODE_SIZE) + ' cells, ' + bytes((lastNode + TRIE_NODE_SIZE) * (trie.bits >> 3)) + '\n' + 'Unused space:'.padEnd(pad, ' ') + (buf.length - (lastNode + TRIE_NODE_SIZE)) + ' cells, ' + bytes((buf.length - (lastNode + TRIE_NODE_SIZE)) * (trie.bits >> 3)) + '\n' + // __REMOVE_BELOW_FOR_DIST__ 'Mallocs:'.padEnd(pad, ' ') + trie._mallocs + '\n' + 'trie_adds:'.padEnd(pad, ' ') + trie._adds + '\n' + 'Avg key distance:'.padEnd(pad, ' ') + (trie._addSteps / trie._adds) + '\n' + 'trie_hass:'.padEnd(pad, ' ') + trie._hass + '\n' + 'trie_gets:'.padEnd(pad, ' ') + trie._gets + '\n' + 'Avg get distance:'.padEnd(pad, ' ') + trie._getSteps + ' -> ' + (trie._getSteps / trie._gets) + '\n' + // __REMOVE_ABOVE_FOR_DIST__ '\n'; if (!skipBuffer) { s += 'ptr \\ key= 0 1 2 3 4 5 6 7 8 9 -> value\n\n'; let ptr = TRIE_ROOT_OFFSET; while (ptr <= lastNode) { s += String(ptr).padStart(npad, ' ') + ': ' + Array.from(buf.slice(ptr, ptr + TRIE_NODE_SIZE - 1)).map(n => String(n).padStart(npad, ' ')).join(', ') + ' -> ' + String(buf[ptr + TRIE_NODE_SIZE - 1]).padStart(npad, ' ') + '\n'; ptr += TRIE_NODE_SIZE; } } s += '###\n\n'; return s; } // BODY_STOP export { TRIE_8_BIT, TRIE_16_BIT, TRIE_32_BIT, TRIE_64_BIT, TRIE_DEFAULT_BITS, TRIE_DEFAULT_SIZE, TRIE_INITIAL_SIZE, TRIE_KEY_NOT_FOUND, TRIE_MINIMAL_GROWTH, TRIE_NODE_SIZE, TRIE_EMPTY, trie_add, trie_addNum, trie_create, _trie_debug, trie_get, trie_getNum, trie_getValueBitsize, trie_has, trie_hasNum, };