UNPKG

dropflow

Version:

A small CSS2 document renderer built from specifications

212 lines (211 loc) 9.16 kB
// entities // // This should only be used by gen.js: it uses nodejs imports // // Copyright (c) Felix Böhm // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are met: // // Redistributions of source code must retain the above copyright notice, this // list of conditions and the following disclaimer. // // Redistributions in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // THIS IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY // OUT OF THE USE OF THIS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import * as assert from 'assert'; export function getTrie(map) { const trie = new Map(); const root = { next: trie }; for (const key of Object.keys(map)) { // Resolve the key let lastMap = trie; let next; for (let i = 0; i < key.length; i++) { const char = key.charCodeAt(i); next = lastMap.get(char) ?? {}; lastMap.set(char, next); if (i === key.length - 1) { next.value = map[key]; } else { lastMap = next.next ??= new Map(); } } } function isEqual(node1, node2) { if (node1 === node2) return true; if (node1.value !== node2.value) { return false; } // Check if the next nodes are equal. That means both are undefined. if (node1.next === node2.next) return true; if (node1.next == null || node2.next == null || node1.next.size !== node2.next.size) { return false; } const next1 = Array.from(node1.next); const next2 = Array.from(node2.next); return next1.every(([char1, node1], idx) => { const [char2, node2] = next2[idx]; return char1 === char2 && isEqual(node1, node2); }); } function mergeDuplicates(node) { const nodes = [node]; for (let nodeIdx = 0; nodeIdx < nodes.length; nodeIdx++) { const { next } = nodes[nodeIdx]; if (!next) continue; for (const [char, node] of next) { const idx = nodes.findIndex((n) => isEqual(n, node)); if (idx > -1) { next.set(char, nodes[idx]); } else { nodes.push(node); } } } } mergeDuplicates(root); return root; } function binaryLength(num) { return Math.ceil(Math.log2(num)); } /** * Encodes the trie in binary form. * * We have three different types of nodes: * - Values are UNICODE values that an entity resolves to * - Branches can be: * 1. If size is 1, then a matching character followed by the destination * 2. Two successive tables: characters and destination pointers. * Characters have to be binary-searched to get the index of the destination pointer. * 3. A jump table: For each character, the destination pointer is stored in a jump table. * - Records have a value greater than 128 (the max ASCII value). Their format is 8 bits main data, 8 bits supplemental data: * ( * 1 bit has has value flag * 7 bit branch length if this is a branch — needs to be here to ensure value is >128 with a branch * 1 bit data is multi-byte * 7 bit branch jump table offset (if branch is a jump table) * ) * */ export function encodeTrie(trie, maxJumpTableOverhead = 2) { const encodeCache = new Map(); const enc = []; function encodeNode(node) { // Cache nodes, as we can have loops const cached = encodeCache.get(node); if (cached != null) return cached; const startIndex = enc.length; encodeCache.set(node, startIndex); const nodeIdx = enc.push(0) - 1; if (node.value != null) { let valueLength = 0; /* * If we don't have a branch and the value is short, we can * store the value in the node. */ if (node.next || node.value.length > 1 || binaryLength(node.value.charCodeAt(0)) > 14) { valueLength = node.value.length; } // Add 1 to the value length, to signal that we have a value. valueLength += 1; assert.ok(binaryLength(valueLength) <= 2, 'Too many bits for value length'); enc[nodeIdx] |= valueLength << 14; if (valueLength === 1) { enc[nodeIdx] |= node.value.charCodeAt(0); } else { for (let i = 0; i < node.value.length; i++) { enc.push(node.value.charCodeAt(i)); } } } if (node.next) addBranches(node.next, nodeIdx); assert.strictEqual(nodeIdx, startIndex, 'Has expected location'); return startIndex; } function addBranches(next, nodeIdx) { const branches = Array.from(next.entries()); // Sort branches ASC by key branches.sort(([a], [b]) => a - b); assert.ok(binaryLength(branches.length) <= 6, 'Too many bits for branches'); // If we only have a single branch, we can write the next value directly if (branches.length === 1 && !encodeCache.has(branches[0][1])) { const [char, next] = branches[0]; assert.ok(binaryLength(char) <= 7, 'Too many bits for single char'); enc[nodeIdx] |= char; encodeNode(next); return; } const branchIndex = enc.length; // If we have consecutive branches, we can write the next value as a jump table /* * First, we determine how much space adding the jump table adds. * * If it is more than 2x the number of branches (which is equivalent * to the size of the dictionary), skip it. */ const jumpOffset = branches[0][0]; const jumpEndValue = branches[branches.length - 1][0]; const jumpTableLength = jumpEndValue - jumpOffset + 1; const jumpTableOverhead = jumpTableLength / branches.length; if (jumpTableOverhead <= maxJumpTableOverhead) { assert.ok(binaryLength(jumpOffset) <= 16, `Offset ${jumpOffset} too large at ${binaryLength(jumpOffset)}`); // Write the length of the adjusted table, plus jump offset enc[nodeIdx] |= (jumpTableLength << 7) | jumpOffset; assert.ok(binaryLength(jumpTableLength) <= 7, `Too many bits (${binaryLength(jumpTableLength)}) for branches`); // Reserve space for the jump table for (let i = 0; i < jumpTableLength; i++) enc.push(0); // Write the jump table for (const [char, next] of branches) { const index = char - jumpOffset; // Write all values + 1, so 0 will result in a -1 when decoding enc[branchIndex + index] = encodeNode(next) + 1; } return; } enc[nodeIdx] |= branches.length << 7; enc.push(...branches.map(([char]) => char), // Reserve space for destinations, using a value that is out of bounds ...branches.map((_) => Number.MAX_SAFE_INTEGER)); assert.strictEqual(enc.length, branchIndex + branches.length * 2, 'Did not reserve enough space'); // Encode the branches branches.forEach(([val, next], idx) => { assert.ok(val < 128, 'Branch value too large'); const currentIndex = branchIndex + branches.length + idx; assert.strictEqual(enc[currentIndex - branches.length], val, 'Should have the value as the first element'); assert.strictEqual(enc[currentIndex], Number.MAX_SAFE_INTEGER, 'Should have the placeholder as the second element'); const offset = encodeNode(next); assert.ok(binaryLength(offset) <= 16, 'Too many bits for offset'); enc[currentIndex] = offset; }); } encodeNode(trie); // Make sure that every value fits in a UInt16 assert.ok(enc.every((val) => typeof val === 'number' && val >= 0 && binaryLength(val) <= 16), 'Too many bits'); return enc; }