UNPKG

reign

Version:

A persistent, typed-objects implementation.

645 lines (557 loc) 20.9 kB
/* @flow */ import Backing from "backing"; import { $Backing, $Address, } from "../symbols"; import {forceInline} from "../performance"; import type {Realm} from "../"; export type StringPool = { size: uint32; hash (input: string): uint32; get (input: string): float64; add (input: string): float64; remove (input: string): boolean; unref (address: float64): boolean; has (input: string): boolean; }; const HEADER_SIZE = 16; const ARRAY_POINTER_OFFSET = 0; const ARRAY_LENGTH_OFFSET = 8; const CARDINALITY_OFFSET = 12; const INITIAL_BUCKET_COUNT = 4096; const TYPE_ASCII = 'TYPE_ASCII'; const TYPE_CHAR_ARRAY = 'TYPE_CHAR_ARRAY'; const STRING_LENGTH_OFFSET = 0; const STRING_HASH_OFFSET = 4; const STRING_HEADER_SIZE = 8; const STRING_DATA_OFFSET = STRING_HEADER_SIZE; type PossibleTypes = 'TYPE_ASCII'|'TYPE_CHAR_ARRAY'; export function make (realm: Realm, poolPointerAddress: float64): StringPool { const {TypeClass, StringType, backing} = realm; class StringPool { constructor (input?: Backing, address?: float64) { trace: `Creating string pool.`; if (!(input instanceof Backing)) { input = backing; address = backing.calloc(HEADER_SIZE); createPool(backing, address); } // @flowIssue 252 this[$Backing] = input; // @flowIssue 252 this[$Address] = address; } get size (): uint32 { // @flowIssue 252 return getCardinality(this[$Backing], this[$Address]); } /** * Return the hash code for the given string. */ hash (input: string): uint32 { return hashString(input); } /** * Gets the address of the given string, if it exists, otherwise 0. */ get (input: string): float64 { let hash = 0x811c9dc5; let allAscii = true; for (let i = 0; i < input.length; i++) { const code: uint16 = input.charCodeAt(i); if (code > 127) { allAscii = false; } hash ^= code; hash += (hash << 1) + (hash << 4) + (hash << 7) + (hash << 8) + (hash << 24); } hash = hash >>> 0; // @flowIssue 252 const backing = this[$Backing]; trace: `Looking up hash ${hash}`; // @flowIssue 252 return lookupString(backing, this[$Address], input, hash, allAscii); } /** * Adds the given string to the pool if it does not already exist, and returns its address. * Note that adding a string to the pool will increment the string's reference count by 1. */ add (input: string): float64 { trace: `Adding ${input} to the pool.`; // @flowIssue 252 return createString(this[$Backing], this[$Address], ''+input); } /** * Remove the given string from the pool *if* its reference count is 1, * otherwise decrement the reference count by one. * * Returns `true` if the string was actually removed, otherwise `false`. */ remove (input: string): boolean { let hash = 0x811c9dc5; let allAscii = true; for (let i = 0; i < input.length; i++) { const code: uint16 = input.charCodeAt(i); if (code > 127) { allAscii = false; } hash ^= code; hash += (hash << 1) + (hash << 4) + (hash << 7) + (hash << 8) + (hash << 24); } hash = hash >>> 0; // @flowIssue 252 return removeString(this[$Backing], this[$Address], input, hash, allAscii); } /** * Decrement the reference count of a string at the given address. */ unref (address: float64): boolean { // @flowIssue 252 return unrefString(this[$Backing], this[$Address], address); } /** * Determines whether the given string exists in the pool. */ has (input: string): boolean { let hash = 0x811c9dc5; let allAscii = true; for (let i = 0; i < input.length; i++) { const code: uint16 = input.charCodeAt(i); if (code > 127) { allAscii = false; } hash ^= code; hash += (hash << 1) + (hash << 4) + (hash << 7) + (hash << 8) + (hash << 24); } hash = hash >>> 0; // @flowIssue 252 return lookupString(this[$Backing], this[$Address], input, hash, allAscii) !== 0; } } /** * Create a string pool at the given address. */ function createPool (backing: Backing, address: float64): void { trace: `Creating a new string pool at ${address}.`; const pointerArrayLength = INITIAL_BUCKET_COUNT; const pointerArrayAddress = backing.calloc(pointerArrayLength * 8); setArrayAddress(backing, address, pointerArrayAddress); setArrayLength(backing, address, pointerArrayLength); setCardinality(backing, address, 0); } function getArrayAddress (backing: Backing, address: float64): float64 { return backing.getFloat64(address); } function setArrayAddress (backing: Backing, address: float64, value: float64): void { backing.setFloat64(address, value); } function getArrayLength (backing: Backing, address: float64): float64 { return backing.getUint32(address + ARRAY_LENGTH_OFFSET); } function setArrayLength (backing: Backing, address: float64, value: float64): void { backing.setUint32(address + ARRAY_LENGTH_OFFSET, value); } function getCardinality (backing: Backing, address: float64): uint32 { return backing.getUint32(address + CARDINALITY_OFFSET); } function setCardinality (backing: Backing, address: float64, value: uint32): void { backing.setUint32(address + CARDINALITY_OFFSET, value); } /** * Read the hash for the given string. */ function getStringHash (backing: Backing, address: float64): uint32 { return backing.getUint32(address + STRING_HASH_OFFSET); } forceInline(getStringHash); /** * Write the hash for the given string. */ function setStringHash (backing: Backing, address: float64, hash: uint32): void { return backing.setUint32(address + STRING_HASH_OFFSET, hash); } forceInline(setStringHash); /** * Read the number of characters in the string at the given address. */ function getNumberOfCharacters (backing: Backing, address: float64): uint32 { return Math.abs(backing.getInt32(address)); // STRING_LENGTH_OFFSET === 0 so no need to add. } forceInline(getNumberOfCharacters); /** * Read the string at the given address. */ function getString (backing: Backing, address: float64): string { if (address === 0) { return ''; } const arena = backing.arenaFor(address); let offset: uint32 = backing.offsetFor(address); const length: int32 = arena.int32Array[offset >> 2]; if (length < 0) { offset = (offset + STRING_DATA_OFFSET) >> 1; return String.fromCharCode(...arena.uint16Array.slice(offset, offset + Math.abs(length))); } else { offset = (offset + STRING_DATA_OFFSET); return String.fromCharCode(...arena.uint8Array.slice(offset, offset + Math.abs(length))); } } forceInline(getString); /** * Check that the string stored at the given address matches the given input + hash. */ function checkEqual (backing: Backing, address: float64, input: string, hash: uint32, allAscii: boolean): boolean { assert: Math.floor(address) === address; trace: `Checking address ${address} vs ${input} (${hash}).`; if (getStringHash(backing, address) !== hash) { trace: `Hash does not match ${hash}.`; return false; } let length = backing.getInt32(address); assert: Math.floor(length) === length && Math.abs(length) < Math.pow(2, 31); trace: `Got raw string length ${length} ${Math.floor(length)}`; if (length < 0) { if (allAscii) { return false; } length = -length; if (length !== input.length) { return false; } const arena = backing.arenaFor(address); const chars: Uint16Array = arena.uint16Array; const offset = (backing.offsetFor(address + STRING_HEADER_SIZE)) >> 1; for (let i = 0; i < length; i++) { if (input.charCodeAt(i) !== chars[offset + i]) { return false; } } return true; } else { if (!allAscii) { return false; } else if (length !== input.length) { return false; } const arena = backing.arenaFor(address); const chars: Uint8Array = arena.uint8Array; const offset = backing.offsetFor(address + STRING_HEADER_SIZE); for (let i = 0; i < length; i++) { if (input.charCodeAt(i) !== chars[offset + i]) { return false; } } return true; } } forceInline(checkEqual); /** * Store the given string, intern it and return the address. * If a string already exists in the pool, the existing string's address * will be returned and no duplicate will be created. */ function createString (backing: Backing, poolAddress: float64, input: string): float64 { let hash = 0x811c9dc5; let allAscii = true; const length = input.length; for (let i = 0; i < length; i++) { const code: uint16 = input.charCodeAt(i); if (code > 127) { allAscii = false; } hash ^= code; hash += (hash << 1) + (hash << 4) + (hash << 7) + (hash << 8) + (hash << 24); } hash = hash >>> 0; trace: `Got hash ${hash} for ${allAscii ? 'ascii' : 'multi-byte'} string of ${length} characters.`; return lookupOrInsertString(backing, poolAddress, input, hash, allAscii); } forceInline(createString); /** * Store the given raw string and return the address. * The string will NOT be interned. */ function createRawString (backing: Backing, input: string): float64 { let hash = 0x811c9dc5; let allAscii = true; const length = input.length; for (let i = 0; i < length; i++) { const code: uint16 = input.charCodeAt(i); if (code > 127) { allAscii = false; } hash ^= code; hash += (hash << 1) + (hash << 4) + (hash << 7) + (hash << 8) + (hash << 24); } hash = hash >>> 0; trace: `Got hash ${hash} for ${allAscii ? 'ascii' : 'multi-byte'} string of ${length} characters.`; return storeString(backing, input, hash, allAscii); } forceInline(createRawString); /** * Returns the hash for the given string. */ function hashString (input: string): uint32 { let hash = 0x811c9dc5; for (let i = 0; i < input.length; i++) { hash ^= input.charCodeAt(i); hash += (hash << 1) + (hash << 4) + (hash << 7) + (hash << 8) + (hash << 24); } return hash >>> 0; } forceInline(hashString); /** * Return the appropriate bucket for the given input + hash. */ function probe (backing: Backing, poolAddress: float64, input: string, hash: uint32, allAscii: boolean): float64 { const pointerArrayLength = getArrayLength(backing, poolAddress); const pointerArrayAddress = getArrayAddress(backing, poolAddress); trace: `Probing for hash ${hash}`; const arena = backing.arenaFor(pointerArrayAddress); const float64Array = arena.float64Array; assert: float64Array.length > 0; const startOffset = backing.offsetFor(pointerArrayAddress) >> 3; let index = (hash & (pointerArrayLength - 1)); let offset = startOffset + index; let address: float64 = 0; while ((address = float64Array[offset]) !== 0 && !checkEqual(backing, address, input, hash, allAscii)) { index++; if (index >= pointerArrayLength) { index = 0; } offset = startOffset + index; } return arena.startAddress + (offset << 3); } forceInline(probe); /** * Find the address of the string for the given input + hash, or 0 if it does not exist. */ function lookupString (backing: Backing, poolAddress: float64, input: string, hash: uint32, allAscii: boolean): float64 { return backing.getFloat64(probe(backing, poolAddress, input, hash, allAscii)); } forceInline(lookupString); /** * Find the address of the string for the given input + hash, or create it if it does not exist. */ function lookupOrInsertString (backing: Backing, poolAddress: float64, input: string, hash: uint32, allAscii: boolean): float64 { const pointerAddress: float64 = probe(backing, poolAddress, input, hash, allAscii); let address: float64 = backing.getFloat64(pointerAddress); if (address !== 0) { trace: `String already exists at ${address}, incrementing ref count.`; backing.gc.ref(address); return address; } trace: `No entry found for input ${input}, inserting one.`; address = storeString(backing, input, hash, allAscii); backing.setFloat64(pointerAddress, address); const size = getCardinality(backing, poolAddress) + 1; setCardinality(backing, poolAddress, size); const pointerArrayLength = getArrayLength(backing, poolAddress); if (size + (size >> 2) >= pointerArrayLength) { trace: `Growing the hash map because we reached >= 80% occupancy.`; resize(backing, poolAddress); } return address; } forceInline(lookupOrInsertString); /** * Store a string and return its address. */ function storeString (backing: Backing, input: string, hash: uint32, allAscii: boolean): float64 { if (allAscii) { return storeAsciiString(backing, input, hash); } else { return storeMultibyteString(backing, input, hash); } } forceInline(storeString); function storeAsciiString (backing: Backing, input: string, hash: uint32): float64 { const length = input.length; trace: `Storing an ascii string of ${length} character(s): ${JSON.stringify(input)}`; const byteLength = length + STRING_HEADER_SIZE; const address: float64 = backing.gc.alloc(byteLength, 0, 1); backing.setInt32(address, length); backing.setUint32(address + STRING_HASH_OFFSET, hash); const offset = backing.offsetFor(address + STRING_DATA_OFFSET); const chars: Uint8Array = backing.arenaFor(address).uint8Array; for (let i = 0; i < length; i++) { chars[offset + i] = input.charCodeAt(i); } return address; } forceInline(storeAsciiString); function storeMultibyteString (backing: Backing, input: string, hash: uint32): float64 { const length = input.length; const byteLength = length + length + STRING_HEADER_SIZE; const address: float64 = backing.gc.alloc(byteLength, 0, 1); backing.setInt32(address, -length); backing.setUint32(address + STRING_HASH_OFFSET, hash); const offset = backing.offsetFor(address + STRING_DATA_OFFSET) >> 1; const chars: Uint16Array = backing.arenaFor(address).uint16Array; for (let i = 0; i < length; i++) { chars[offset + i] = input.charCodeAt(i); } return address; } forceInline(storeMultibyteString); /** * Decrement the reference count for the given string and remove it from the pool if appropriate. */ function unrefString (backing: Backing, poolAddress: float64, target: float64): boolean { const hash = getStringHash(backing, target); const pointerArrayLength = getArrayLength(backing, poolAddress); const pointerArrayAddress = getArrayAddress(backing, poolAddress); trace: `Probing for hash ${hash}`; const arena = backing.arenaFor(pointerArrayAddress); const float64Array = arena.float64Array; assert: float64Array.length > 0; const startOffset = backing.offsetFor(pointerArrayAddress) >> 3; let index = (hash & (pointerArrayLength - 1)); let offset = startOffset + index; let address: float64 = 0; while ((address = float64Array[offset]) !== 0 && address !== target) { index++; if (index >= pointerArrayLength) { index = 0; } offset = startOffset + index; } const p = arena.startAddress + (offset << 3); return removeStringByPointer(backing, poolAddress, p); } forceInline(unrefString); /** * Remove the given input + hash from the hash map. */ function removeString (backing: Backing, poolAddress: float64, input: string, hash: uint32, allAscii: boolean): boolean { let p: float64 = probe(backing, poolAddress, input, hash, allAscii); return removeStringByPointer(backing, poolAddress, p); } forceInline(removeString); /** * Remove the string at the given address from the hash map. */ function removeStringByPointer (backing: Backing, poolAddress: float64, p: float64): boolean { const address = backing.getFloat64(p); if (address === 0) { // Item does not exist. return false; } if (backing.gc.unref(address) > 0) { // Item has other references return false; } const pointerArrayLength = getArrayLength(backing, poolAddress); const pointerArrayAddress = getArrayAddress(backing, poolAddress); const end = pointerArrayAddress + (pointerArrayLength * 8); let q: float64 = p; while (true) { // Move q to the next entry q = q + 8; if (q === end) { q = pointerArrayAddress; } const qPointer = backing.getFloat64(q); // All entries between p and q have their initial position between p and q // and the entry p can be cleared without breaking the search for these // entries. if (qPointer === 0) { break; } const qHash = getStringHash(backing, qPointer); // Find the initial position for the entry at position q. const r: float64 = pointerArrayAddress + ((qHash & (pointerArrayLength - 1)) * 8); // If the entry at position q has its initial position outside the range // between p and q it can be moved forward to position p and will still be // found. There is now a new candidate entry for clearing. if ((q > p && (r <= p || r > q)) || (q < p && (r <= p && r > q))) { backing.copy(p, q, 8); p = q; } } // Clear the entry which is allowed to be emptied. setStringHash(backing, backing.getFloat64(p), 0); setCardinality(backing, poolAddress, getCardinality(backing, poolAddress) - 1); return true; } forceInline(removeStringByPointer); function resize (backing: Backing, poolAddress: float64): void { const pointerArrayLength = getArrayLength(backing, poolAddress); const pointerArrayAddress = getArrayAddress(backing, poolAddress); trace: `Resizing string pool to ${pointerArrayLength * 2} buckets.`; const newPointerArrayLength = pointerArrayLength * 2; const newPointerArrayAddress = backing.calloc(newPointerArrayLength * 8); setArrayAddress(backing, poolAddress, newPointerArrayAddress); setArrayLength(backing, poolAddress, newPointerArrayLength); const newOffset = backing.offsetFor(newPointerArrayAddress) / 8; const newPointers = backing.arenaFor(newPointerArrayAddress).float64Array; const oldOffset = backing.offsetFor(pointerArrayAddress) / 8; const oldPointers = backing.arenaFor(pointerArrayAddress).float64Array; for (let oldIndex = 0; oldIndex < pointerArrayLength; oldIndex++) { const address = oldPointers[oldOffset + oldIndex]; if (address === 0) { continue; } const hash: uint32 = getStringHash(backing, address); let targetIndex = (hash & (newPointerArrayLength - 1)); let offset = newOffset + targetIndex; let pointer = 0; while (newPointers[offset] !== 0) { targetIndex++; if (targetIndex >= newPointerArrayLength) { targetIndex = 0; } offset = newOffset + targetIndex; } newPointers[offset] = address; } backing.free(pointerArrayAddress); } { const address = backing.getFloat64(poolPointerAddress); if (address === 0) { trace: `Found no existing string pool, creating a new one.`; const pool = new StringPool(); // @flowIssue 252 backing.setFloat64(poolPointerAddress, pool[$Address]); return pool; } else { trace: `Loading an existing string pool from ${address}.`; return new StringPool(backing, address); } } } function randomAsciiString (): string { const length: uint32 = Math.floor(Math.random() * 255); const chars: uint8[] = new Array(length); let seed = Math.round(Math.random() * 100000); for (let i = 0; i < length; i++) { seed = (seed + (i * 333)) % 127; if (seed < 32) { seed += 32; } chars[i] = seed; } return String.fromCharCode(...chars); } function randomMultiByteString (): string { const length: uint32 = Math.floor(Math.random() * 255); const chars: uint16[] = new Array(length); let seed = Math.round(Math.random() * 100000); for (let i = 0; i < length; i++) { seed = (seed + (i * 333)) % 512; if (seed < 32) { seed += 32; } chars[i] = seed; } return String.fromCharCode(...chars); }