assemblyscript
Version:
A TypeScript-like language for WebAssembly.
276 lines (247 loc) • 7.96 kB
text/typescript
import { E_URI_MALFORMED } from "./error";
import { CharCode } from "./string";
// Truncated lookup boolean table that helps us quickly determine
// if a char needs to be escaped for URIs (RFC 2396).
// @ts-ignore: decorator
@lazy export const URI_UNSAFE = memory.data<u8>([
/* skip 32 + 1 always set to '1' head slots
*/ 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, /*
skip 128 + 1 always set to '1' tail slots */
]);
// Truncated lookup boolean table that helps us quickly determine
// if a char needs to be escaped for URLs (RFC 3986).
// @ts-ignore: decorator
@lazy export const URL_UNSAFE = memory.data<u8>([
/* skip 32 + 1 always set to '1' head slots
*/ 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, /*
skip 128 + 1 always set to '1' tail slots */
]);
// Truncated lookup boolean table for determine reserved chars: ;/?:@&=+$,#
// @ts-ignore: decorator
@lazy export const URI_RESERVED = memory.data<u8>([
/* skip 32 + 3 always set to '0' head slots
*/ 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1,
1, /* skip 191 always set to '0' tail slots */
]);
export function encode(src: usize, len: usize, table: usize): usize {
if (!len) return src;
let i: usize = 0, offset: usize = 0, outSize = len << 1;
let dst = __new(outSize, idof<String>());
while (i < len) {
let org = i;
let c: u32, c1: u32;
// fast scan a check chars until it valid ASCII
// and safe for copying withoud escaping.
do {
c = <u32>load<u16>(src + (i << 1));
// is it valid ASII and safe?
if (c - 33 < 94) { // 127 - 33
if (load<u8>(table + (c - 33))) break;
} else break;
} while (++i < len);
// if we have some safe range of sequence just copy it without encoding
if (i > org) {
let size = i - org << 1;
if (offset + size > outSize) {
outSize = offset + size;
dst = __renew(dst, outSize);
}
// TODO: should we optimize for short cases like 2 byte size?
memory.copy(
dst + offset,
src + (org << 1),
size
);
offset += size;
// return if we reach end on input string
if (i >= len) break;
}
// decode UTF16 with checking for unpaired surrogates
if (c >= 0xD800) {
if (c >= 0xDC00 && c <= 0xDFFF) {
throw new URIError(E_URI_MALFORMED);
}
if (c <= 0xDBFF) {
if (i >= len) {
throw new URIError(E_URI_MALFORMED);
}
c1 = <u32>load<u16>(src + (++i << 1));
if (c1 < 0xDC00 || c1 > 0xDFFF) {
throw new URIError(E_URI_MALFORMED);
}
c = (((c & 0x3FF) << 10) | (c1 & 0x3FF)) + 0x10000;
}
}
let estSize = offset + (c < 0x80 ? 1 * 6 : 4 * 6);
if (estSize > outSize) {
// doubling estimated size but only for greater than one
// input lenght due to we already estemated it for worst case
outSize = len > 1 ? estSize << 1 : estSize;
dst = __renew(dst, outSize);
}
if (c < 0x80) {
// encode ASCII unsafe code point
storeHex(dst, offset, c);
offset += 6;
} else {
// encode UTF-8 unsafe code point
if (c < 0x800) {
storeHex(dst, offset, (c >> 6) | 0xC0);
offset += 6;
} else {
if (c < 0x10000) {
storeHex(dst, offset, (c >> 12) | 0xE0);
offset += 6;
} else {
storeHex(dst, offset, (c >> 18) | 0xF0);
offset += 6;
storeHex(dst, offset, (c >> 12 & 0x3F) | 0x80);
offset += 6;
}
storeHex(dst, offset, (c >> 6 & 0x3F) | 0x80);
offset += 6;
}
storeHex(dst, offset, (c & 0x3F) | 0x80);
offset += 6;
}
++i;
}
// shink output string buffer if necessary
if (outSize > offset) {
dst = __renew(dst, offset);
}
return dst;
}
export function decode(src: usize, len: usize, component: bool): usize {
if (!len) return src;
let i: usize = 0, offset: usize = 0, ch: u32 = 0;
let dst = __new(len << 1, idof<String>());
while (i < len) {
let org = i;
while (i < len && (ch = load<u16>(src + (i << 1))) != CharCode.PERCENT) i++;
if (i > org) {
let size = i - org << 1;
// TODO: should we optimize for short cases like 2 byte size?
memory.copy(
dst + offset,
src + (org << 1),
size
);
offset += size;
if (i >= len) break;
}
// decode hex
if (
i + 2 >= len ||
ch != CharCode.PERCENT ||
(ch = loadHex(src, i + 1 << 1)) == -1
) throw new URIError(E_URI_MALFORMED);
i += 3;
if (ch < 0x80) {
if (!component && isReserved(ch)) {
ch = CharCode.PERCENT;
i -= 2;
}
} else {
// decode UTF-8 sequence
let nb = utf8LenFromUpperByte(ch);
// minimal surrogate: 2 => 0x80, 3 => 0x800, 4 => 0x10000, _ => -1
let lo: u32 = 1 << (17 * nb >> 2) - 1;
// mask: 2 => 31, 3 => 15, 4 => 7, _ => 0
ch &= nb ? (0x80 >> nb) - 1 : 0;
while (--nb != 0) {
let c1: u32;
// decode hex
if (
i + 2 >= len ||
load<u16>(src + (i << 1)) != CharCode.PERCENT ||
(c1 = loadHex(src, i + 1 << 1)) == -1
) throw new URIError(E_URI_MALFORMED);
i += 3;
if ((c1 & 0xC0) != 0x80) {
ch = 0;
break;
}
ch = (ch << 6) | (c1 & 0x3F);
}
// check if UTF8 code point properly fit into invalid UTF16 encoding
if (ch < lo || lo == -1 || ch > 0x10FFFF || (ch >= 0xD800 && ch < 0xE000)) {
throw new URIError(E_URI_MALFORMED);
}
// encode UTF16
if (ch >= 0x10000) {
ch -= 0x10000;
let lo = ch >> 10 | 0xD800;
let hi = (ch & 0x03FF) | 0xDC00;
store<u32>(dst + offset, lo | (hi << 16));
offset += 4;
continue;
}
}
store<u16>(dst + offset, ch);
offset += 2;
}
assert(offset <= (len << 1));
// shink output string buffer if necessary
if ((len << 1) > offset) {
dst = __renew(dst, offset);
}
return dst;
}
function storeHex(dst: usize, offset: usize, ch: u32): void {
// @ts-ignore: decorator
const HEX_CHARS = memory.data<u8>([
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
0x38, 0x39, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46
]);
store<u16>(dst + offset, CharCode.PERCENT, 0); // %
store<u32>(
dst + offset,
<u32>load<u8>(HEX_CHARS + (ch >> 4 & 0x0F)) |
<u32>load<u8>(HEX_CHARS + (ch & 0x0F)) << 16,
2
); // XX
}
function loadHex(src: usize, offset: usize): u32 {
let c0 = <u32>load<u16>(src + offset, 0);
let c1 = <u32>load<u16>(src + offset, 2);
return isHex(c0) && isHex(c1)
? fromHex(c0) << 4 | fromHex(c1)
: -1;
}
// @ts-ignore: decorator
@inline function fromHex(ch: u32): u32 {
return (ch | 32) % 39 - 9;
}
// @ts-ignore: decorator
@inline function utf8LenFromUpperByte(c0: u32): u32 {
// same as
// if (c0 - 0xC0 <= 0xDF - 0xC0) return 2;
// if (c0 - 0xE0 <= 0xEF - 0xE0) return 3;
// if (c0 - 0xF0 <= 0xF7 - 0xF0) return 4;
// return 0;
return c0 - 0xC0 < 56
? clz(~(c0 << 24))
: 0;
}
// @ts-ignore: decorator
@inline function isReserved(ch: u32): bool {
return ch - 35 < 30
? <bool>load<u8>(URI_RESERVED + (ch - 35))
: false;
}
// @ts-ignore: decorator
@inline function isHex(ch: u32): bool {
return (ch - CharCode._0 < 10) || ((ch | 32) - CharCode.a < 6);
}