UNPKG

sc4

Version:

A command line utility for automating SimCity 4 modding tasks & modifying savegames

314 lines (313 loc) 11.9 kB
// Parameters below are tuned for optimal balance between the probability of // hash collisions and memory consumption. For the type mask, it turns out that // with 256, we get a perfect hash function for all the types that are commonly // found in game assets - i.e. *not* within savegames. We don't bother too much // about collissions for savegames though, as there are only limited collissions // in savegames. However, for stuff like plugin indexing for example, the amount // of exemplars is huge, so there it's crucial to avoid the collision detection! const BUCKETS_TYPE = 0x100; function generateMap(entries, size, hash, label = '') { // We will first generate the hash for every TGI and then create the linked // lists for every bucket. We need two things for this: // 1. A Uint32Array that contains a tuple of the pointer to the first and // last element of the linked list. // 2. Another Uint32Array that contains every TGI pointer, and a pointer // to the next element in the linked list. performance.mark(`${label}:start`); performance.mark(`${label}:hash:start`); const mask = size - 1; const nEntries = entries.length / 3; const firstLastTuples = new Uint32Array(2 * size).fill(0xffffffff); const nextList = new Uint32Array(nEntries).fill(0xffffffff); for (let i = 0, iii = 0; i < nEntries; i++, iii += 3) { const hashValue = hash(entries, iii) & mask; const bucketIndex = (hashValue << 1) >>> 0; const first = firstLastTuples[bucketIndex]; if (first === 0xffffffff) { firstLastTuples[bucketIndex] = i; firstLastTuples[bucketIndex + 1] = i; } else { const prevLast = firstLastTuples[bucketIndex + 1]; nextList[prevLast] = i; firstLastTuples[bucketIndex + 1] = i; } } performance.mark(`${label}:hash:end`); // Now build up the actual index. The size of it is known upfront: // - "1" slot for the bucket size // - "size" slots that points to the start of every bucket // - "size" slots for the length value of every bucket // - "entries" slots for every pointer to an entry performance.mark(`${label}:serialize:start`); const output = new Uint32Array(1 + 2 * size + nEntries); output[0] = size; const pointers = output.subarray(1, 1 + size); const buckets = output.subarray(1 + size); for (let i = 0, currentOffset = 0; i < size; i++) { // We will now fill up the bucket from the linked list that we've built // up. const lengthOffset = currentOffset; let count = 0; let next = firstLastTuples[(i << 1) >>> 0]; let j = lengthOffset + 1; while (next !== 0xffffffff) { count++; buckets[j++] = next; next = nextList[next]; } buckets[lengthOffset] = count; pointers[i] = currentOffset; currentOffset += count + 1; } performance.mark(`${label}:serialize:end`); performance.mark(`${label}:end`); return output; } // # find(buffer, hash) // Finds all pointers - with potential collisions - for the given hash. function find(index, hash) { const size = index[0]; const mask = size - 1; const bucketIndex = hash & mask; const ptr = 1 + size + index[1 + bucketIndex]; const length = index[ptr]; const start = ptr + 1; return index.subarray(start, start + length); } // # getPerformanceLabel(name) // Helper for generting unique labels for measuring build performance. let instance = 0; function getPerformanceLabel(name, instance) { return `${name}${instance}`; } // # Index export default class Index { instance = 0; tgis; t; ti; tgi; constructor(opts) { this.instance = opts.instance ?? 0; this.tgis = opts.tgis; this.t = opts.t; this.ti = opts.ti; this.tgi = opts.tgi; } // # fromEntries() static fromEntries(entries) { instance++; let tgis = new Uint32Array(3 * entries.length); let offset = 0; for (let tgi of entries) { tgis[offset++] = tgi.type; tgis[offset++] = tgi.group; tgis[offset++] = tgi.instance; } let t = generateMap(tgis, BUCKETS_TYPE, hashType, getPerformanceLabel('t', instance)); // The bucket size for our TGI index depends on the size of the tgis. We // aim for a filling degree of 0.75. const amount = tgis.length / 3; const buckets = nextPowerOf2(amount / 0.75); let tgi = generateMap(tgis, buckets, hashTypeGroupInstance, getPerformanceLabel('tgi', instance)); // Same for ti. let ti = generateMap(tgis, buckets, hashTypeInstance, getPerformanceLabel('ti', instance)); return new Index({ instance, tgis, t, ti, tgi }); } // ## findType() // Finds the *pointers* - i.e. indices - to all entries with the given Type // ID. findType(type) { const hash = hash32to16(type); const pointers = find(this.t, hash); const filtered = []; for (let i = 0; i < pointers.length; i++) { const ptr = pointers[i]; if (equalsType(this.tgis, 3 * ptr, type)) filtered.push(ptr); } return filtered; } // ## findTGI(type, group, index) // Finds the *pointers* - i.e. indices - to all entries with the given TGI. findTGI(type, group, instance) { const hash = hash96to32(type, group, instance); const pointers = find(this.tgi, hash); const filtered = []; for (let i = 0; i < pointers.length; i++) { const ptr = pointers[i]; if (equalsTGI(this.tgis, 3 * ptr, type, group, instance)) { filtered.push(ptr); } } return filtered; } // ## findTI(type, index) // Finds the *pointers* - i.e. indices - to all entries with the given TI. // We're not sure whether we actually need this, as the game only seems to // look for stuff by TGI, so perhaps we can get rid of this. findTI(type, instance) { const hash = hash64to32(type, instance); const pointers = find(this.ti, hash); const filtered = []; for (let i = 0; i < pointers.length; i++) { const ptr = pointers[i]; if (equalsTI(this.tgis, 3 * ptr, type, instance)) { filtered.push(ptr); } } return filtered; } // ## getPerformanceLabel() getPerformanceLabel(name) { return getPerformanceLabel(name, this.instance); } // ## getStats() // Returns a bunch of stats about the index, useful for debugging & // profiling purposes. getStats() { const indices = { t: this.t, ti: this.ti, tgi: this.tgi }; return { size: this.tgis.length / 3, indices: Object.entries(indices).map(([name, index]) => { const size = index[0]; const pointers = index.subarray(1, 1 + size); const buckets = index.subarray(1 + size); const empty = pointers.reduce((mem, ptr) => mem + (buckets[ptr] === 0 ? 1 : 0), 0); const label = this.getPerformanceLabel(name); return { name, buckets: size, byteLength: index.byteLength, fillingDegree: 1 - empty / size, performance: [ measure('Total build time', label), measure('Insert values', label, 'hash'), measure('Serialize buffer', label, 'serialize'), ].map(measure => { return { name: measure.name, duration: measure.duration, }; }), }; }), }; } // ## serialize() serialize() { const { tgis, t, ti, tgi } = this; const output = new ArrayBuffer(Uint32Array.BYTES_PER_ELEMENT * (tgis.length + t.length + ti.length + tgi.length + 4)); let i = 0; const set = (arr) => { const target = new Uint32Array(output, i, arr.length + 1); target[0] = arr.length; target.set(arr, 1); i += target.byteLength; }; set(tgis); set(t); set(ti); set(tgi); return new Uint8Array(output); } // ## fromBuffer() static fromBuffer({ buffer, byteOffset }) { let i = 0; const get = () => { const sizeof = Uint32Array.BYTES_PER_ELEMENT; const start = byteOffset + i; const length = new DataView(buffer, start).getUint32(0, true); const copy = new Uint32Array(buffer.slice(start + sizeof, start + sizeof * (1 + length))); i += sizeof * (length + 1); return copy; }; const tgis = get(); const t = get(); const ti = get(); const tgi = get(); return new Index({ tgis, t, ti, tgi }); } } function measure(name, label, sublabel) { let sub = sublabel ? `:${sublabel}` : ''; return performance.measure(name, `${label}${sub}:start`, `${label}${sub}:end`); } // # hash32to16(x) // Hashes a 32-bit integer to a 16-bit integer. The multiplier is carefully // chosen to spread out the bits as much as possible function hash32to16(x) { return ((x * 2654435761) >>> 13); } // # hashType() // The hash function for the type indexing, but which operates by accepting the // TGI array and the index of the TGI in the index. function hashType(entries, index) { return hash32to16(entries[index]); } // # equalsType() // Checks whether the Type ID of two TGIs in the array are equal, by index. function equalsType(entries, ptr, type) { return entries[ptr] === type; } // # hashTGI(t, g, i) // Hashes 3 32-bit integers to a 32-bit integer. Optimized for generating as // mush unique hashas as possible for TGIs. function hash96to32(t, g, i) { t = Math.imul(t, 2654435761) ^ (t >> 5); g ^= t; i ^= t; g = Math.imul(g, 0x9E3779B9); i = Math.imul(i, 0x85EBCA6B); g ^= g >> 16; i ^= i >> 13; return (g ^ i) >>> 0; } // # hashTypeGroupInstance() // Same as hashType, but now hashes the TGI. function hashTypeGroupInstance(entries, index) { return hash96to32(entries[index], entries[index + 1], entries[index + 2]); } // # equalsTypeGroupInstance() // Checks whether the Type ID of two TGIs in the array are equal, by index. function equalsTGI(entries, ptr, t, g, i) { // Note: groups are more likely to differ, so we use that first. Slight // performance optimization, lol. return (entries[ptr + 1] === g && entries[ptr + 2] === i && entries[ptr] === t); } // # hash64to32(t, g, i) // Hashes 2 32-bit integers to a 32-bit integer. Optimized for generating as // much unique hashas as possible for TIs. function hash64to32(t, i) { return ((Math.imul(t, 2654435761) ^ (t >> 5)) ^ i) >>> 0; } // # hashTypeInstance() // Same as hashType, but now hashes the TGI. function hashTypeInstance(entries, index) { return hash64to32(entries[index], entries[index + 2]); } // # equalsTypeInstance() // Checks whether the Type ID of two TGIs in the array are equal, by index. function equalsTI(entries, ptr, t, i) { return entries[ptr + 2] === i && entries[ptr] === t; } // # nextPowerOf2(n) // Finds the next power of 2 to automatically calculate the bucket size. function nextPowerOf2(n) { if (n < 1) return 1; n--; n |= n >> 1; n |= n >> 2; n |= n >> 4; n |= n >> 8; n |= n >> 16; return n + 1; }