UNPKG

cbor-edn

Version:

Parse CBOR Extended Diagnostic Notation as defined by [draft-ietf-cbor-edn-literals-16](https://www.ietf.org/archive/id/draft-ietf-cbor-edn-literals-16.html) and some CBOR working group discussions.

327 lines (326 loc) 10.1 kB
import { CUSTOM_APP_TAG, ELLIPSE_TAG, IPV4_TAG, IPV6_TAG, MT, } from './constants.js'; import { Tag, encode } from 'cbor2'; import { base64UrlToBytes, hexToU8, u8concat } from 'cbor2/utils'; import { ByteTree } from './byteTree.js'; import { numToBytes } from './spec.js'; export const EDN_EMBEDDED_RANGES = Symbol('EDN_EMBEDDED_RANGES'); const TD = new TextDecoder('utf-8', { fatal: true, }); const TE = new TextEncoder(); function customApp(chunk) { if (!chunk.prefix) { throw new Error('Invalid prefix'); } return new ByteTree(encode(new Tag(CUSTOM_APP_TAG, [ chunk.prefix, TD.decode(chunk.str), ]))); } /** * Capture the rules about concatenated bstr's, tstr's, ellipsis, and * app-strings. * * @param chunks The chunks to be combined. * @param opts Options for UTF8-checking. * @returns Corresponding bytes. * @throws On invalid combinations. */ export function combineStrings(chunks, opts) { // Collapse app-strings as if they were inline. const s = chunks.flat(); const [first] = s; const ret = [first]; /* The mode of the list is the MT of the first non-ellipsis item in the list. If the mode is not tstr or bstr, there can be only one non-ellipsis item. */ const elided = s.some(x => x instanceof ByteTree); const fne = s.find(x => !(x instanceof ByteTree)); const mode = fne?.mt ?? MT.ELLIPSIS; // ?? Only found ellipses let found = !(first instanceof ByteTree) && (first.mt === MT.CUSTOM); /* Coalesce adjacent items of the same type, if possible. Possible: - Ellipsis <- Ellipsis - tstr <- tstr - tstr <- bstr (check UTF8) - bstr <- bstr */ for (let i = 1; i < s.length; i++) { const si = s[i]; const last = ret[ret.length - 1]; if (si instanceof ByteTree) { // Only ellipses will have a raw ByteTree if (!(last instanceof ByteTree)) { ret.push(si); } // Else two ellipses in a row. Ignore the second one. } else if (si.mt === MT.CUSTOM) { if ((mode !== MT.CUSTOM) || found) { throw new Error('Cannot concat custom app-string'); } ret.push(si); found = true; } else if (si.mt === MT.UTF8_STRING) { if (mode !== MT.UTF8_STRING) { throw new Error('Invalid concat, str in non-str mode'); } if (last instanceof ByteTree) { // Last was ellipsis ret.push(si); } else { last.str = new ByteTree(last.str, si.str); } } else if (last instanceof ByteTree) { // Last was ellipsis si.mt = mode; ret.push(si); } else { last.str = new ByteTree(last.str, si.str); } } // Output if (elided) { if (ret.length === 1) { // Only one ... return ret[0]; } // Output 888(ret) return new ByteTree(numToBytes({ int: ELLIPSE_TAG }, null, MT.TAG), numToBytes({ int: ret.length }, null, MT.ARRAY), ret.map(x => { if (x instanceof ByteTree) { return x; } if (mode === MT.CUSTOM) { return customApp(x); } if ((mode === MT.UTF8_STRING) && opts?.validateUTF8) { // Thows if invalid UTF-8. const st = (x.str instanceof ByteTree) ? x.str.bytes() : x.str; TD.decode(st); } if (mode === MT.ENCODED_BYTES) { return new ByteTree(x.str); } return new ByteTree(numToBytes({ int: x.str.length }, x.spec, mode), x.str); })); } // If not elided, we'll have coalesced to exactly one entry. const x = ret[0]; if (mode === MT.CUSTOM) { return customApp(x); } if ((mode === MT.UTF8_STRING) && opts?.validateUTF8) { const st = (x.str instanceof ByteTree) ? x.str.bytes() : x.str; // Thows if invalid UTF-8. TD.decode(st); } if (mode === MT.ENCODED_BYTES) { return new ByteTree(x.str); } const bt = new ByteTree(numToBytes({ int: x.str.length }, x.spec, mode), x.str); bt.mt = mode; if (x.spec === '') { bt.push(new Uint8Array([0xff])); } return bt; } /** * Convert string to pre-encoded Uint8Array. * * @param prefix DT or dt. * @param dt ISO date string. * @returns Obj ready for processing with combineStrings. */ export function encodeDate(prefix, dt) { const d = new Date(dt); const tm = d.getTime() / 1000; const str = Number.isSafeInteger(tm) ? numToBytes({ int: tm }) : numToBytes({ float: tm }); if (prefix === 'dt') { return { mt: MT.ENCODED_BYTES, str, }; } return { mt: MT.ENCODED_BYTES, str: new ByteTree(numToBytes({ int: 1 }, null, MT.TAG), str).bytes(), }; } /** * Coalesce all valid bytes together into chunks, and all adjacent ellipses * into a single ellipsis. * * @param _prefix Ignored. * @param chunks Chunks. * @returns Coalesced array. */ export function encodeHex(_prefix, chunks) { const str = chunks; // SQS str:(@(hex_byte / ellipsis) SQS)* if (str.length === 0) { return [{ mt: MT.BYTE_STRING, str: new Uint8Array() }]; } const bytesAndEllipses = str.reduce((t, v) => { if (t.length) { const last = t[t.length - 1]; if ((typeof v === 'string') && (typeof last === 'string')) { t[t.length - 1] += v; return t; } else if ((v instanceof ByteTree) && (last instanceof ByteTree)) { // E.g. h'... ...' return t; } } t.push(v); return t; }, []); return bytesAndEllipses.map(v => ((typeof v === 'string') ? { mt: MT.BYTE_STRING, str: hexToU8(v) } : v)); } /** * Given an IPv4 or IPv6 address, possible with a mask, trim the address * if needed. * * @param prefix IP or ip. * @param str Array of [IPbytes, number]. * @returns Chunk with either the address or [mask, address] pre-encoded, * as well as v set. */ export function encodeIP(prefix, str) { const [addr, mask] = str; let inside = null; if (mask) { // Trim length of bytes to mask bits const numBytes = Math.ceil(mask / 8); let bytes = addr.bytes.slice(0, numBytes); const lastByte = bytes[bytes.length - 1]; if (lastByte !== 0) { const numBits = mask % 8; if (numBits) { bytes[bytes.length - 1] = (lastByte >> numBits) << numBits; } } let count = bytes.length; for (let i = bytes.length - 1; i >= 0; i--) { if (bytes[i] === 0) { count = i; } else { break; } } bytes = bytes.slice(0, count); inside = { mt: MT.ENCODED_BYTES, str: encode([mask, bytes]), v: addr.v, }; } else { inside = { mt: MT.ENCODED_BYTES, str: encode(addr.bytes), v: addr.v, }; } if (prefix === 'ip') { return inside; } return { mt: MT.ENCODED_BYTES, str: new ByteTree(numToBytes({ int: (addr.v === 4) ? IPV4_TAG : IPV6_TAG }, null, MT.TAG), inside.str).bytes(), }; } /** * Convert the parsed version of an IPv6 address into a single buffer. * * @param bytes Array of '::', bytes as Uint8Array, or an IPv6 address as * IPbytes. * @returns IPbytes with v:6. */ export function encodeIPv6(bytes) { const bf = bytes.map(b => (((typeof b !== 'string') && ('bytes' in b)) ? b.bytes : b)); // Position of the :: let cc = -1; const byteCount = bf.reduce((t, v, i) => { if (v instanceof Uint8Array) { t += v.length; } else { cc = i; } return t; }, 0); if (cc >= 0) { bf[cc] = hexToU8(''.padStart((16 - byteCount) * 2, '0')); } return { bytes: u8concat(bf), v: 6, }; } function encodeB64(_prefix, b64) { const b = b64.flat(Infinity).join(''); return base64UrlToBytes(b); } const knownTypes = new Map(); /** * Register an app-string decoder. * * @param prefix The string before the first squote. * @param fun Function to process the string. */ export function registerAppString(prefix, fun) { if (fun) { knownTypes.set(prefix, fun); } else { knownTypes.delete(prefix); } } registerAppString('h', () => ['app_string_h', encodeHex]); registerAppString('b64', () => ['app_string_b64', encodeB64]); registerAppString('dt', () => ['date_time', encodeDate]); registerAppString('DT', () => ['date_time', encodeDate]); registerAppString('ip', () => ['app_string_ip', encodeIP]); registerAppString('IP', () => ['app_string_ip', encodeIP]); // Removed from draft. Add back later if desired. // registerAppString( // 'b32', // (_p: string, str: string) => [null, base32.decode(str)] // ); // registerAppString( // 'h32', // (_p: string, str: string) => [null, base32hex.decode(str)] // ); /** * Two-step processing for app-string plugins. If the first step returns null * for the first item in the array, decoding happens all in the first step. * Otherwise, return `[grammarRuleName, callback]`. The named grammar * rule will be called, and the results passed to `callback(prefix, results)`. * * @param prefix Results of app_prefix. * @param str The matched sqstr, with squotes unescaped. * @returns One of the possible approaches. */ export function parseAppString(prefix, str) { const fun = knownTypes.get(prefix); if (!fun) { return [ null, { mt: MT.CUSTOM, prefix, str: TE.encode(str), }, ]; } return fun(prefix, str); }