assemblyscript

import { E_URI_MALFORMED } from "./error"; import { CharCode } from "./string"; // Truncated lookup boolean table that helps us quickly determine // if a char needs to be escaped for URIs (RFC 2396). // @ts-ignore: decorator @lazy export const URI_UNSAFE = memory.data<u8>([ /* skip 32 + 1 always set to '1' head slots */ 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, /* skip 128 + 1 always set to '1' tail slots */ ]); // Truncated lookup boolean table that helps us quickly determine // if a char needs to be escaped for URLs (RFC 3986). // @ts-ignore: decorator @lazy export const URL_UNSAFE = memory.data<u8>([ /* skip 32 + 1 always set to '1' head slots */ 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, /* skip 128 + 1 always set to '1' tail slots */ ]); // Truncated lookup boolean table for determine reserved chars: ;/?:@&=+$,# // @ts-ignore: decorator @lazy export const URI_RESERVED = memory.data<u8>([ /* skip 32 + 3 always set to '0' head slots */ 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, /* skip 191 always set to '0' tail slots */ ]); export function encode(src: usize, len: usize, table: usize): usize { if (!len) return src; let i: usize = 0, offset: usize = 0, outSize = len << 1; let dst = __new(outSize, idof<String>()); while (i < len) { let org = i; let c: u32, c1: u32; // fast scan a check chars until it valid ASCII // and safe for copying withoud escaping. do { c = <u32>load<u16>(src + (i << 1)); // is it valid ASII and safe? if (c - 33 < 94) { // 127 - 33 if (load<u8>(table + (c - 33))) break; } else break; } while (++i < len); // if we have some safe range of sequence just copy it without encoding if (i > org) { let size = i - org << 1; if (offset + size > outSize) { outSize = offset + size; dst = __renew(dst, outSize); } // TODO: should we optimize for short cases like 2 byte size? memory.copy( dst + offset, src + (org << 1), size ); offset += size; // return if we reach end on input string if (i >= len) break; } // decode UTF16 with checking for unpaired surrogates if (c >= 0xD800) { if (c >= 0xDC00 && c <= 0xDFFF) { throw new URIError(E_URI_MALFORMED); } if (c <= 0xDBFF) { if (i >= len) { throw new URIError(E_URI_MALFORMED); } c1 = <u32>load<u16>(src + (++i << 1)); if (c1 < 0xDC00 || c1 > 0xDFFF) { throw new URIError(E_URI_MALFORMED); } c = (((c & 0x3FF) << 10) | (c1 & 0x3FF)) + 0x10000; } } let estSize = offset + (c < 0x80 ? 1 * 6 : 4 * 6); if (estSize > outSize) { // doubling estimated size but only for greater than one // input lenght due to we already estemated it for worst case outSize = len > 1 ? estSize << 1 : estSize; dst = __renew(dst, outSize); } if (c < 0x80) { // encode ASCII unsafe code point storeHex(dst, offset, c); offset += 6; } else { // encode UTF-8 unsafe code point if (c < 0x800) { storeHex(dst, offset, (c >> 6) | 0xC0); offset += 6; } else { if (c < 0x10000) { storeHex(dst, offset, (c >> 12) | 0xE0); offset += 6; } else { storeHex(dst, offset, (c >> 18) | 0xF0); offset += 6; storeHex(dst, offset, (c >> 12 & 0x3F) | 0x80); offset += 6; } storeHex(dst, offset, (c >> 6 & 0x3F) | 0x80); offset += 6; } storeHex(dst, offset, (c & 0x3F) | 0x80); offset += 6; } ++i; } // shink output string buffer if necessary if (outSize > offset) { dst = __renew(dst, offset); } return dst; } export function decode(src: usize, len: usize, component: bool): usize { if (!len) return src; let i: usize = 0, offset: usize = 0, ch: u32 = 0; let dst = __new(len << 1, idof<String>()); while (i < len) { let org = i; while (i < len && (ch = load<u16>(src + (i << 1))) != CharCode.PERCENT) i++; if (i > org) { let size = i - org << 1; // TODO: should we optimize for short cases like 2 byte size? memory.copy( dst + offset, src + (org << 1), size ); offset += size; if (i >= len) break; } // decode hex if ( i + 2 >= len || ch != CharCode.PERCENT || (ch = loadHex(src, i + 1 << 1)) == -1 ) throw new URIError(E_URI_MALFORMED); i += 3; if (ch < 0x80) { if (!component && isReserved(ch)) { ch = CharCode.PERCENT; i -= 2; } } else { // decode UTF-8 sequence let nb = utf8LenFromUpperByte(ch); // minimal surrogate: 2 => 0x80, 3 => 0x800, 4 => 0x10000, _ => -1 let lo: u32 = 1 << (17 * nb >> 2) - 1; // mask: 2 => 31, 3 => 15, 4 => 7, _ => 0 ch &= nb ? (0x80 >> nb) - 1 : 0; while (--nb != 0) { let c1: u32; // decode hex if ( i + 2 >= len || load<u16>(src + (i << 1)) != CharCode.PERCENT || (c1 = loadHex(src, i + 1 << 1)) == -1 ) throw new URIError(E_URI_MALFORMED); i += 3; if ((c1 & 0xC0) != 0x80) { ch = 0; break; } ch = (ch << 6) | (c1 & 0x3F); } // check if UTF8 code point properly fit into invalid UTF16 encoding if (ch < lo || lo == -1 || ch > 0x10FFFF || (ch >= 0xD800 && ch < 0xE000)) { throw new URIError(E_URI_MALFORMED); } // encode UTF16 if (ch >= 0x10000) { ch -= 0x10000; let lo = ch >> 10 | 0xD800; let hi = (ch & 0x03FF) | 0xDC00; store<u32>(dst + offset, lo | (hi << 16)); offset += 4; continue; } } store<u16>(dst + offset, ch); offset += 2; } assert(offset <= (len << 1)); // shink output string buffer if necessary if ((len << 1) > offset) { dst = __renew(dst, offset); } return dst; } function storeHex(dst: usize, offset: usize, ch: u32): void { // @ts-ignore: decorator const HEX_CHARS = memory.data<u8>([ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46 ]); store<u16>(dst + offset, CharCode.PERCENT, 0); // % store<u32>( dst + offset, <u32>load<u8>(HEX_CHARS + (ch >> 4 & 0x0F)) | <u32>load<u8>(HEX_CHARS + (ch & 0x0F)) << 16, 2 ); // XX } function loadHex(src: usize, offset: usize): u32 { let c0 = <u32>load<u16>(src + offset, 0); let c1 = <u32>load<u16>(src + offset, 2); return isHex(c0) && isHex(c1) ? fromHex(c0) << 4 | fromHex(c1) : -1; } // @ts-ignore: decorator @inline function fromHex(ch: u32): u32 { return (ch | 32) % 39 - 9; } // @ts-ignore: decorator @inline function utf8LenFromUpperByte(c0: u32): u32 { // same as // if (c0 - 0xC0 <= 0xDF - 0xC0) return 2; // if (c0 - 0xE0 <= 0xEF - 0xE0) return 3; // if (c0 - 0xF0 <= 0xF7 - 0xF0) return 4; // return 0; return c0 - 0xC0 < 56 ? clz(~(c0 << 24)) : 0; } // @ts-ignore: decorator @inline function isReserved(ch: u32): bool { return ch - 35 < 30 ? <bool>load<u8>(URI_RESERVED + (ch - 35)) : false; } // @ts-ignore: decorator @inline function isHex(ch: u32): bool { return (ch - CharCode._0 < 10) || ((ch | 32) - CharCode.a < 6); }