json-as
Version:
The only JSON library you'll need for AssemblyScript with SIMD and SWAR
197 lines (172 loc) • 6.16 kB
text/typescript
import { bs } from "../../../lib/as-bs";
import { BACK_SLASH } from "../../custom/chars";
import { DESERIALIZE_ESCAPE_TABLE, ESCAPE_HEX_TABLE } from "../../globals/tables";
import { hex4_to_u16_swar } from "../../util/swar";
// @ts-ignore: decorator allowed
@lazy const SPLAT_5C = i16x8.splat(0x5C); // \
// Overflow Pattern for Unicode Escapes (READ)
// \u0001 0 \u0001__| + 0
// -\u0001 2 -\u0001_| + 0
// --\u0001 4 --\u0001| + 0
// ---\u0001 6 ---\u000|1 + 2
// ----\u0001 8 ----\u00|01 + 4
// -----\u0001 10 -----\u0|001 + 6
// ------\u0001 12 ------\u|0001 + 8
// -------\u0001 14 -------\|u0001 + 10
// Formula: overflow = max(0, lane - 4)
// Overflow Pattern for Unicode Escapes (WRITE)
// * = escape, _ = empty
// \u0001 0 *_______| - 14
// -\u0001 2 -*______| - 12
// --\u0001 4 --*_____| - 10
// ---\u0001 6 ---*____| - 8
// ----\u0001 8 ----*___| - 6
// -----\u0001 10 -----*__| - 4
// ------\u0001 12 ------*_| - 2
// -------\u0001 14 -------*| + 0
// Formula: overflow = lane - 14
// Overflow pattern for Short Escapes (READ)
// \n------ 0 \n------| - 12
// -\n----- 2 -\n-----| - 10
// --\n---- 4 --\n----| - 8
// ---\n--- 6 ---\n---| - 6
// ----\n-- 8 ----\n--| - 4
// -----\n- 10 -----\n-| - 2
// ------\n 12 ------\n| + 0
// -------\n 14 -------\|n + 2
// Formula: overflow = lane - 12
// Overflow pattern for Short Escapes (WRITE)
// * = escape, _ = empty
// \n------ 0 *_______| - 14
// -\n----- 2 -*______| - 12
// --\n---- 4 --*_____| - 10
// ---\n--- 6 ---*____| - 8
// ----\n-- 8 ----*___| - 6
// -----\n- 10 -----*__| - 4
// ------\n 12 ------*_| - 2
// -------\n 14 -------*| + 0
// Formula: overflow = lane - 14
/**
* Deserializes strings back into into their original form using SIMD operations
* @param src string to deserialize
* @param dst buffer to write to
* @returns number of bytes written
*/
// todo: optimize and stuff. it works, its not pretty. ideally, i'd like this to be (nearly) branchless
export function deserializeString_SIMD(srcStart: usize, srcEnd: usize): string {
// Strip quotes
srcStart += 2;
srcEnd -= 2;
const srcEnd16 = srcEnd - 16;
bs.ensureSize(u32(srcEnd - srcStart));
while (srcStart < srcEnd16) {
const block = load<v128>(srcStart);
store<v128>(bs.offset, block);
const eq5C = i16x8.eq(load<v128>(srcStart), SPLAT_5C);
let mask = i16x8.bitmask(eq5C);
// Early exit
if (mask === 0) {
srcStart += 16;
bs.offset += 16;
continue;
}
let srcChg: usize = 0;
let lastLane: usize = 0;
do {
const laneIdx = usize(ctz(mask) << 1); // 0 2 4 6 8 10 12 14
mask &= mask - 1;
const srcIdx = srcStart + laneIdx;
const code = load<u16>(srcIdx, 2);
bs.offset += laneIdx - lastLane;
// Hot path (negative bias)
if (code !== 0x75) {
// Short escapes (\n \t \" \\)
const escaped = load<u16>(DESERIALIZE_ESCAPE_TABLE + code);
mask &= mask - i32(escaped === 0x5C);
store<u16>(bs.offset, escaped);
store<v128>(bs.offset, load<v128>(srcIdx, 4), 2);
const l6 = usize(laneIdx === 14);
// bs.offset -= (1 - l6) << 1;
bs.offset += 2;
srcStart += l6 << 1;
lastLane = laneIdx + 4;
continue;
}
// Unicode escape (\uXXXX)
const block = load<u64>(srcIdx, 4); // XXXX
const escaped = hex4_to_u16_swar(block);
store<u16>(bs.offset, escaped);
store<u64>(bs.offset, load<u64>(srcIdx, 12), 2);
bs.offset += 2;
if (laneIdx >= 6) {
srcStart += laneIdx - 4;
}
lastLane = laneIdx + 12;
} while (mask !== 0);
if (lastLane < 16) {
bs.offset += 16 - lastLane;
}
srcStart += 16 + srcChg;
}
while (srcStart < srcEnd) {
const block = load<u16>(srcStart);
store<u16>(bs.offset, block);
srcStart += 2;
// Early exit
if (block !== 0x5C) {
bs.offset += 2;
continue;
}
const code = load<u16>(srcStart);
if (code !== 0x75) {
// Short escapes (\n \t \" \\)
const block = load<u16>(srcStart);
const escape = load<u16>(DESERIALIZE_ESCAPE_TABLE + block);
store<u16>(bs.offset, escape);
srcStart += 2;
} else {
// Unicode escape (\uXXXX)
const block = load<u64>(srcStart, 2); // XXXX
const escaped = hex4_to_u16_swar(block);
store<u16>(bs.offset, escaped);
srcStart += 10;
}
bs.offset += 2;
}
return bs.out<string>();
}
/**
* Computes a per-lane mask identifying UTF-16 code units whose **low byte**
* is the ASCII backslash (`'\\'`, 0x5C).
*
* The mask is produced in two stages:
* 1. Detects bytes equal to 0x5C using a SWAR equality test.
* 2. Clears matches where 0x5C appears in the **high byte** of a UTF-16 code unit,
* ensuring only valid low-byte backslashes are reported.
*
* Each matching lane sets itself to 0x80.
*/
// @ts-ignore: decorator
@inline function backslash_mask(block: u64): u64 {
const b = block ^ 0x005C_005C_005C_005C;
const backslash_mask = (b - 0x0001_0001_0001_0001) & ~b & 0x0080_0080_0080_0080;
const high_byte_mask =
~(((block - 0x0100_0100_0100_0100) & ~block & 0x8000_8000_8000_8000)
^ 0x8000_8000_8000_8000) >> 8;
return backslash_mask & high_byte_mask;
}
/**
* Computes a per-lane mask identifying UTF-16 code units whose **low byte**
* is the ASCII backslash (`'\\'`, 0x5C).
*
* Each matching lane sets itself to 0x80.
*
* WARNING: The low byte of a code unit *may* be a backslash, thus triggering false positives!
* This is useful for a hot path where it is possible to detect the false positive scalarly.
*/
// @ts-ignore: decorator
@inline function backslash_mask_unsafe(block: u64): u64 {
const b = block ^ 0x005C_005C_005C_005C;
const backslash_mask = (b - 0x0001_0001_0001_0001) & ~b & 0x0080_0080_0080_0080;
return backslash_mask;
}