binary-structures
Version:
Yet another declarative binary packer/parser, but built for modern browsers.
600 lines (529 loc) • 24.3 kB
text/typescript
import 'improved-map';
import {
Bits_Sizes,
Uint_Sizes,
Int_Sizes,
Float_Sizes,
Size,
Serializer,
Deserializer,
uint_pack,
int_pack,
float_pack,
uint_parse,
int_parse,
float_parse,
utf8_pack,
utf8_parse,
hex_buffer
} from './serialization';
export type Primitive = number | string | ArrayBuffer;
export type Mapped<T> = Map<string, T>;
export type Encoded_Map = Mapped<any>;
export type Encoded_Array = Array<any>;
export type Encoded = Primitive | Encoded_Map | Encoded_Array;
export type Map_Context = Encoded_Map;
export type Array_Context = Encoded_Array;
export type Context = Encoded;
export const Parent = Symbol("Parent");
export interface Contextualized<P> {
[Parent]?: P;
}
const set_context = <E extends (Encoded_Map | Encoded_Array), C>(data: E, context?: C): Context_Type<E, C> => {
if ( context !== undefined ) {
( data as Context_Type<E, C> )[Parent] = context;
}
return data as Context_Type<E, C>;
};
const remove_context = <T extends (Encoded_Map | Encoded_Array), C>(data: T, delete_flag: boolean): T => {
if ( delete_flag ) {
delete ( data as Context_Type<T, C> )[Parent];
}
return data;
};
/* Context needs to be imposed at the Struct level to support Repeat & Byte_Buffer */
export type Context_Type<E extends Encoded, C> = E & Contextualized<C>;
export type Context_Map<Encoded, Context> = Context_Type<Mapped<Encoded>, Context>;
export type Context_Array<Encoded, Context> = Context_Type<Array<Encoded>, Context>;
/* Used by Embed */
export type Context_Iterable<Encoded, Context> = Context_Map<Encoded, Context> | Context_Array<Encoded, Context>;
/* These functions provided by library consumer to convert data to usable structures. */
export type Encoder<Decoded, E extends Encoded, Context> = (decoded: Decoded, context?: Context) => E;
export type Decoder<E extends Encoded, Decoded, Context> = (encoded: E, context?: Context) => Decoded;
export interface Transcoders<E extends Encoded, Decoded, Context> {
encode?: Encoder<Decoded, E, Context>;
decode?: Decoder<E, Decoded, Context>;
little_endian?: boolean;
}
export const inspect_transcoder = <T>(data: T, context?: any): T => {
console.log({ data, context });
return data
};
export const inspect = {
encode: inspect_transcoder,
decode: inspect_transcoder,
};
/** A function to fetch the data to be packed.
* It is provided by the code handling the input data and called by the packer function to fetch the data to pack.
*/
export interface Fetcher<Decoded> {
(): Decoded;
}
/** A function to deliver the parsed result to the correct place.
* It is provided by the code managing the results container and called by the parser function with the parsed data.
*/
export interface Deliver<Decoded> {
(data: Decoded): void;
}
export interface Parse_Options<Context> {
byte_offset?: number;
little_endian?: boolean;
context?: Context;
}
export interface Pack_Options<C> extends Parse_Options<C> {
data_view?: DataView;
}
export interface Packed {
buffer: ArrayBuffer;
size: Size; /* In Bytes */
}
export interface Parsed<Decoded> {
data: Decoded;
size: Size; /* In Bytes */
}
export interface Packer<Decoded, Context> {
(source: Decoded | Fetcher<Decoded>, options?: Pack_Options<Context>): Packed;
}
export interface Parser<Decoded, Context> {
(data_view: DataView, options?: Parse_Options<Context>, deliver?: Deliver<Decoded>): Parsed<Decoded>;
}
/* Explicitly imposing that, for custom Transcoders, output format from deserialization must match input format to serialization. */
export interface Struct<Decoded, Context> {
pack: Packer<Decoded, Context>;
parse: Parser<Decoded, Context>;
}
/* Called by pack */
interface fetch_and_encode<D, E extends Encoded, C> {
source: Fetcher<D | E> | D | E;
encode?: Encoder<D, E, C>;
context?: C;
}
const fetch_and_encode = <D, E extends Encoded, C>({ source, encode, context }: fetch_and_encode<D, E, C>): E => {
let decoded;
if ( typeof source === 'function' ) {
decoded = source();
} else {
decoded = source as D | E;
}
if ( typeof encode === 'function' ) {
return encode(decoded as D, context);
} else {
return decoded as E;
}
};
/* Called by parse */
interface decode_and_deliver<E extends Encoded, D, C> {
encoded: E | D;
decode?: Decoder<E, D, C>;
context?: C;
deliver?: Deliver<D>;
}
const decode_and_deliver = <E extends Encoded, D, C>({ encoded, decode, context, deliver }: decode_and_deliver<E, D, C>): D => {
let decoded;
if ( typeof decode === 'function' ) {
decoded = decode(encoded as E, context);
} else {
decoded = encoded as D;
}
if ( typeof deliver === 'function' ) {
deliver(decoded);
}
return decoded;
};
const factory = <E extends number | string>(serializer: Serializer<E>, deserializer: Deserializer<E>, verify_size: (bits: number) => boolean) => {
return ( <D, C>(bits: number, transcoders: Transcoders<E, D, C> = {}): Struct<D, C> => {
if ( !verify_size(bits) ) {
throw new Error(`Invalid size: ${bits}`);
}
const { encode, decode, little_endian: LE } = transcoders;
const pack: Packer<D, C> = (source, options = {}) => {
const { data_view = new DataView(new ArrayBuffer(Math.ceil(bits / 8))), byte_offset = 0, little_endian = LE, context } = options;
const encoded = fetch_and_encode({ source, encode, context }) as E;
const size = ( serializer(encoded, { bits, data_view, byte_offset, little_endian }) / 8 );
return { size, buffer: data_view.buffer };
};
const parse: Parser<D, C> = (data_view, options = {}, deliver) => {
const { byte_offset = 0, little_endian = LE, context } = options;
const encoded = deserializer({ bits, data_view, byte_offset, little_endian });
const data = decode_and_deliver({ encoded, context, decode, deliver }) as D;
return { data, size: bits / 8 };
};
return { pack, parse };
} );
};
export const Bits = factory(uint_pack, uint_parse, (s) => Bits_Sizes.includes(s));
export const Uint = factory(uint_pack, uint_parse, (s) => Uint_Sizes.includes(s));
export const Int = factory(int_pack, int_parse, (s) => Int_Sizes.includes(s));
export const Float = factory(float_pack, float_parse, (s) => Float_Sizes.includes(s));
export const Utf8 = factory(utf8_pack, utf8_parse, (s) => s % 8 === 0 && s >= 0);
export type Numeric<C> = number | { bits?: number, bytes?: number } | ((context?: C) => number);
const numeric = <C>(n: Numeric<C>, context?: C, type: 'b' | 'B' = 'B'): number => {
if ( typeof n === 'object' ) {
let { bits = 0, bytes = 0 } = n;
n = type === 'B' ? bits / 8 + bytes : bits + bytes * 8;
} else if ( typeof n === 'function' ) {
n = n(context);
} else if ( typeof n !== 'number' ) {
throw new Error(`Invalid numeric input ${n}`);
}
if ( n < 0 ) {
throw new Error(`Invalid size: ${n} bytes`);
}
return n;
};
/** Byte_Buffer doesn't do any serialization, but just copies bytes to/from an ArrayBuffer that's a subset of the
* serialized buffer. Byte_Buffer only works on byte-aligned data.
*
* @param {Numeric} length
* @param {Transcoders<ArrayBuffer, any>} transcoders
*/
export const Byte_Buffer = <D, C>(length: Numeric<C>, transcoders: Transcoders<ArrayBuffer, D, C> = {}) => {
const { encode, decode } = transcoders;
const pack = (source: D | Fetcher<D>, options: Pack_Options<C> = {}): Packed => {
const { data_view, byte_offset = 0, context } = options;
const size = numeric(length, context);
const buffer = fetch_and_encode({ source, encode, context });
if ( size !== buffer.byteLength ) {
throw new Error(`Length miss-match. Expected length: ${size}, actual bytelength: ${buffer.byteLength}`)
}
if ( data_view === undefined ) {
return { size, buffer }
}
new Uint8Array(buffer).forEach((value, index) => {
data_view.setUint8(byte_offset + index, value);
});
return { size, buffer: data_view.buffer }
};
const parse = (data_view: DataView, options: Parse_Options<C> = {}, deliver?: Deliver<D>) => {
const { byte_offset = 0, context } = options;
const size = numeric(length, context);
const buffer = data_view.buffer.slice(byte_offset, byte_offset + size);
const data = decode_and_deliver({ encoded: buffer, context, decode, deliver });
return { data, size };
};
return { pack, parse }
};
export const Padding = <C>(bits: Numeric<C>, transcoders: Transcoders<number, any, C> = {}): Struct<any, C> => {
const { encode, decode } = transcoders;
const pack: Packer<any, C> = (source, options = {}) => {
let { data_view, byte_offset = 0, context } = options;
const size = numeric(bits, context, 'b') as number;
if ( data_view === undefined) {
data_view = new DataView(new ArrayBuffer(Math.ceil(size / 8)));
}
if ( encode !== undefined ) {
let fill: number = encode(null, options.context);
let i = 0;
while ( i < Math.floor(size / 8) ) {
data_view.setUint8(byte_offset + i, fill);
fill >>= 8;
i++;
}
const remainder = size % 8;
if ( remainder ) {
data_view.setUint8(byte_offset + i, fill & ( 2 ** remainder - 1 ))
}
}
return { size: size / 8, buffer: data_view.buffer }
};
const parse: Parser<any, C> = (data_view, options = {}, deliver) => {
const { context } = options;
const size = numeric(bits, context, 'b') as number;
let data: any = null;
if ( decode !== undefined ) {
data = decode(data, context);
if ( deliver !== undefined ) {
deliver(data);
}
}
return { size: size / 8, data };
};
return { pack, parse }
};
/* Allow Symbols once TypesScript adds support */
export type Chooser<C> = (context?: C) => number | string;
export interface Choices<D, C> {
[choice: number]: Struct<D, C>;
[choice: string]: Struct<D, C>;
}
export interface Branch<D, C> {
chooser: Chooser<C>;
choices: Choices<D, C>;
default_choice?: Struct<D, C>;
}
export const Branch = <D, C>({ chooser, choices, default_choice }: Branch<D, C>): Struct<D, C> => {
const choose = (source?: C): Struct<D, C> => {
let choice = chooser(source);
if ( choices.hasOwnProperty(choice) ) {
return choices[choice];
} else {
if ( default_choice !== undefined ) {
return default_choice;
} else {
throw new Error(`Choice ${choice} not in ${Object.keys(choices)}`);
}
}
};
const pack: Packer<D, C> = (source, options = {}) => {
return choose(options.context).pack(source, options);
};
const parse: Parser<D, C> = (data_view, options = {}, deliver) => {
return choose(options.context).parse(data_view, options, deliver);
};
return { parse, pack };
};
export const Embed = <D, C extends Context_Iterable<D, S>, S>(embedded: Struct<Context_Iterable<D, S>, S> | Struct<D, C>): Struct<Context_Iterable<D, S> | D, C> => {
const pack = (source: Fetcher<D>, { byte_offset, data_view, little_endian, context }: Pack_Options<C> = {}): Packed => {
if ( context !== undefined ) {
const parent = context[Parent];
if ( embedded instanceof Array ) {
return ( embedded as Binary_Array<D, Context_Array<D, S>, S> )
.pack( context as Context_Array<D, S>, { byte_offset, data_view, little_endian, context: parent }, source);
} else if ( embedded instanceof Map ) {
return ( embedded as Binary_Map<D, Context_Map<D, S>, S> )
.pack(context as Context_Map<D, S>, { byte_offset, data_view, little_endian, context: parent }, context as Context_Map<D, S>);
}
}
return ( embedded as Struct<D, C> ).pack(source, { byte_offset, data_view, little_endian, context });
};
const parse = (data_view: DataView, { byte_offset, little_endian, context }: Parse_Options<C> = {}, deliver?: Deliver<D>): Parsed<Context_Iterable<D, S> | D> => {
if ( context !== undefined ) {
const parent = context[Parent];
if ( embedded instanceof Array ) {
return ( embedded as Binary_Array<D, Context_Array<D, S>, S> )
.parse(data_view, { byte_offset, little_endian, context: parent }, undefined, context as Context_Array<D, S>);
} else if ( embedded instanceof Map ) {
return ( embedded as Binary_Map<D, Context_Map<D, S>, S> )
.parse(data_view, { byte_offset, little_endian, context: parent }, undefined, context as Context_Map<D, S>);
}
}
return ( embedded as Struct<D, C> ).parse(data_view, { byte_offset, little_endian, context }, deliver);
};
return { pack, parse }
};
const concat_buffers = (packed: Packed[], byte_length: number) => {
const data_view = new DataView(new ArrayBuffer(Math.ceil(byte_length)));
let byte_offset = 0;
for ( const { size, buffer } of packed ) {
/* Copy all the data from the returned buffers into one grand buffer. */
const bytes = Array.from(new Uint8Array(buffer as ArrayBuffer));
/* Create a Byte Array with the appropriate number of Uint(8)s, possibly with a trailing Bits. */
const array = Binary_Array();
for ( let i = 0; i < Math.floor(size); i++ ) {
array.push(Uint(8));
}
if ( size % 1 ) {
array.push(Bits(( size % 1 ) * 8));
}
/* Pack the bytes into the buffer */
array.pack(bytes, { data_view, byte_offset });
byte_offset += size;
}
return data_view;
};
export type Map_Item<I> = Struct<I, Mapped<I>>;
export type Map_Iterable<I> = Array<[string, Map_Item<I>]>;
export type Map_Transcoders<I, D, C> = Transcoders<Mapped<I>, D, C>;
export interface Binary_Map<I, D, C> extends Mapped<Map_Item<I>>, Struct<D, C> {
pack: (source: D | Fetcher<D>, options?: Pack_Options<C>, encoded?: Context_Map<I, C>) => Packed;
parse: (data_view: DataView, options?: Parse_Options<C>, deliver?: Deliver<D>, results?: Context_Map<I, C>) => Parsed<D>;
}
export function Binary_Map<I, D, C>(transcoders: Map_Transcoders<I, D, C> | Map_Iterable<I> = {}, iterable?: Map_Iterable<I> | Map_Transcoders<I, D, C>) {
if ( transcoders instanceof Array ) {
[transcoders, iterable] = [iterable as Map_Transcoders<I, D, C>, transcoders as Map_Iterable<I>];
}
const { encode, decode, little_endian: LE } = transcoders;
const map = new Map(( iterable || [] ) as Map_Iterable<I>) as Binary_Map<I, D, C>;
map.pack = (source, options = {}, encoded) => {
const packed: Packed[] = [];
let { data_view, byte_offset = 0, little_endian = LE, context } = options;
if ( encoded === undefined ) {
encoded = fetch_and_encode({ source, encode, context });
set_context(encoded, context);
}
/* Need to return a function to the `pack` chain to enable Embed with value checking. */
const fetcher = (key: string) => () => {
const value = encoded!.get(key);
if ( value === undefined ) {
throw new Error(`Insufficient data for serialization: ${key} not in ${encoded}`)
}
return value;
};
let offset = 0;
for ( const [key, item] of map ) {
const { size, buffer } = item.pack(fetcher(key), { data_view, byte_offset: data_view === undefined ? 0 : byte_offset + offset, little_endian, context: encoded });
if ( data_view === undefined ) {
packed.push({ size, buffer });
}
offset += size;
}
if ( data_view === undefined ) {
data_view = concat_buffers(packed, offset);
}
return { size: offset, buffer: data_view.buffer };
};
map.parse = (data_view, options = {}, deliver, results) => {
const { byte_offset = 0, little_endian = LE, context } = options;
let remove_parent_symbol = false;
if ( results === undefined ) {
results = set_context(new Map() as Mapped<I>, context);
remove_parent_symbol = true;
}
let offset = 0;
for ( const [key, item] of map ) {
const { data, size } = item.parse(data_view, { byte_offset: byte_offset + offset, little_endian, context: results }, (data) => results!.set(key, data));
offset += size;
}
const data = decode_and_deliver<Mapped<I>, D, C>({ encoded: results, decode, context, deliver });
remove_context(results, remove_parent_symbol);
return { data, size: offset };
};
return map;
}
export namespace Binary_Map {
export let object_encoder = (obj: any) => Map.fromObject(obj);
export let object_decoder = (map: Map<any, any>) => map.toObject();
export let object_transcoders = {encode: Binary_Map.object_encoder, decode: Binary_Map.object_decoder};
}
/* This would be much cleaner if JavaScript had interfaces. Or I could make everything subclass Struct... */
const extract_array_options = <Items, Transcoders>(elements: Array<Items | Transcoders> = []) => {
if ( elements.length > 0 ) {
const first = elements[0];
if ( !first.hasOwnProperty('pack') && !first.hasOwnProperty('parse') ) {
return elements.shift() as Transcoders;
}
const last = elements[elements.length - 1];
if ( !last.hasOwnProperty('pack') && !last.hasOwnProperty('parse') ) {
return elements.pop() as Transcoders;
}
}
return {} as Transcoders;
};
export type Array_Item<I> = Struct<I, Array<I>>;
export type Array_Transcoders<I, D, C> = Transcoders<Array<I>, D, C>;
export interface Binary_Array<I, D, C> extends Array<Array_Item<I>>, Struct<D, C> {
pack: (source: D | Fetcher<D>, options?: Pack_Options<C>, fetcher?: Fetcher<I>) => Packed;
__pack_loop: (fetcher: Fetcher<I>, options: Pack_Options<Array<I>>, store: (result: Packed) => void, parent?: C) => number;
parse: (data_view: DataView, options?: Parse_Options<C>, deliver?: Deliver<D>, results?: Context_Array<I, C>) => Parsed<D>;
__parse_loop: (data_view: DataView, options: Parse_Options<Context_Array<I, C>>, deliver: Deliver<I>, parent?: C) => number;
}
export const Binary_Array = <I, D, C>(...elements: Array<Array_Transcoders<I, D, C> | Array_Item<I>>): Binary_Array<I, D, C> => {
const { encode, decode, little_endian: LE } = extract_array_options(elements) as Array_Transcoders<I, D, C>;
const array = new Array(...elements as Array<Array_Item<I>>) as Binary_Array<I, D, C>;
array.pack = (source, options = {}, fetcher) => {
let { data_view, byte_offset = 0, little_endian = LE, context } = options;
const encoded = fetch_and_encode({ source, encode, context });
const packed: Packed[] = [];
if ( fetcher === undefined ) {
set_context(encoded, context);
const iterator = encoded[Symbol.iterator]();
fetcher = () => {
const value = iterator.next().value;
if ( value === undefined ) {
throw new Error(`Insufficient data for serialization: ${encoded}`)
}
return value;
}
}
const store = (result: Packed) => {
if ( data_view === undefined ) {
packed.push(result);
}
};
const size = array.__pack_loop(fetcher, { data_view, byte_offset, little_endian, context: encoded }, store, context);
if ( data_view === undefined ) {
data_view = concat_buffers(packed, size);
}
return { size, buffer: data_view.buffer };
};
array.__pack_loop = (fetcher, { data_view, byte_offset = 0, little_endian, context }, store) => {
let offset = 0;
for ( const item of array ) {
const { size, buffer } = item.pack(fetcher, { data_view, byte_offset: data_view === undefined ? 0 : byte_offset + offset, little_endian, context });
store({ size, buffer });
offset += size;
}
return offset;
};
array.parse = (data_view, options = {}, deliver, results) => {
const { byte_offset = 0, little_endian = LE, context } = options;
let remove_parent_symbol = false;
if ( results === undefined ) {
results = set_context(new Array() as Array<I>, context);
remove_parent_symbol = true;
}
const size = array.__parse_loop(data_view, { byte_offset, little_endian, context: results }, (data: I) => results!.push(data), context);
const data = decode_and_deliver({ encoded: remove_context(results, remove_parent_symbol), context, decode, deliver });
return { data, size };
};
array.__parse_loop = (data_view, { byte_offset = 0, little_endian, context }, deliver) => {
let offset = 0;
for ( const item of array ) {
const { data, size } = item.parse(data_view, { byte_offset: byte_offset + offset, little_endian, context }, deliver);
offset += size;
}
return offset;
};
return array;
};
export interface Repeat_Options<I, D, C> extends Array_Transcoders<I, D, C> {
count?: Numeric<C>;
bytes?: Numeric<C>;
}
export const Repeat = <I, D, C>(...elements: Array<Repeat_Options<I, D, C> | Array_Item<I>>): Binary_Array<I, D, C> => {
const { count, bytes, encode, decode, little_endian } = extract_array_options(elements) as Repeat_Options<I, D, C>;
const array = Binary_Array<I, D, C>({ encode, decode, little_endian }, ...elements as Array<Array_Item<I>>);
const pack_loop = array.__pack_loop;
const parse_loop = array.__parse_loop;
array.__pack_loop = (fetcher, { data_view, byte_offset = 0, little_endian, context }, store, parent) => {
let offset = 0;
if ( count !== undefined ) {
const repeat = numeric(count, parent);
for ( let i = 0; i < repeat; i++ ) {
offset += pack_loop(fetcher, { data_view, byte_offset: byte_offset + offset, little_endian, context }, store);
}
} else if ( bytes !== undefined ) {
const repeat = numeric(bytes, parent);
while ( offset < repeat ) {
offset += pack_loop(fetcher, { data_view, byte_offset: byte_offset + offset, little_endian, context }, store);
}
if ( offset > repeat ) {
throw new Error(`Cannot pack into ${repeat} bytes.`);
}
} else {
throw new Error("One of count or bytes must specified in options.")
}
return offset;
};
array.__parse_loop = (data_view, { byte_offset = 0, little_endian, context }, deliver, parent) => {
let offset = 0;
if ( count !== undefined ) {
const repeat = numeric(count, parent);
for ( let i = 0; i < repeat; i++ ) {
offset += parse_loop(data_view, { byte_offset: byte_offset + offset, little_endian, context }, deliver);
}
} else if ( bytes !== undefined ) {
const repeat = numeric(bytes, parent);
while ( offset < repeat ) {
offset += parse_loop(data_view, { byte_offset: byte_offset + offset, little_endian, context }, deliver);
}
if ( offset > repeat ) {
throw new Error(`Cannot parse exactly ${repeat} bytes.`);
}
} else {
throw new Error("One of count or bytes must specified in options.")
}
return offset;
};
return array;
};