@ipld/dag-json
Version:
JS implementation of DAG-JSON
292 lines (268 loc) • 8.35 kB
JavaScript
/* eslint max-depth: ["error", 7] */
import { Token, Type } from 'cborg'
import * as cborgJson from 'cborg/json'
import { CID } from 'multiformats'
import { base64 } from 'multiformats/bases/base64'
/**
* @template T
* @typedef {import('multiformats/codecs/interface').ByteView<T>} ByteView
*/
/**
* @template T
* @typedef {import('multiformats/codecs/interface').ArrayBufferView<T>} ArrayBufferView
*/
/**
* @template T
* @typedef {import('multiformats').ToString<T>} ToString
*/
/**
* @typedef {import('cborg/interface').DecodeTokenizer} DecodeTokenizer
*/
/**
* @template T
* @param {ByteView<T> | ArrayBufferView<T>} buf
* @returns {ByteView<T>}
*/
function toByteView (buf) {
if (buf instanceof ArrayBuffer) {
return new Uint8Array(buf, 0, buf.byteLength)
}
return buf
}
/**
* cidEncoder will receive all Objects during encode, it needs to filter out
* anything that's not a CID and return `null` for that so it's encoded as
* normal. Encoding a CID means replacing it with a `{"/":"<CidString>}`
* object as per the DAG-JSON spec.
*
* @param {any} obj
* @returns {Token[]|null}
*/
function cidEncoder (obj) {
if (obj.asCID !== obj && obj['/'] !== obj.bytes) {
return null // any other kind of object
}
const cid = CID.asCID(obj)
/* c8 ignore next 4 */
// very unlikely case, and it'll probably throw a recursion error in cborg
if (!cid) {
return null
}
const cidString = cid.toString()
return [
new Token(Type.map, Infinity, 1),
new Token(Type.string, '/', 1), // key
new Token(Type.string, cidString, cidString.length), // value
new Token(Type.break, undefined, 1)
]
}
/**
* bytesEncoder will receive all Uint8Arrays (and friends) during encode, it
* needs to replace it with a `{"/":{"bytes":"Base64ByteString"}}` object as
* per the DAG-JSON spec.
*
* @param {Uint8Array} bytes
* @returns {Token[]|null}
*/
function bytesEncoder (bytes) {
const bytesString = base64.encode(bytes).slice(1) // no mbase prefix
return [
new Token(Type.map, Infinity, 1),
new Token(Type.string, '/', 1), // key
new Token(Type.map, Infinity, 1), // value
new Token(Type.string, 'bytes', 5), // inner key
new Token(Type.string, bytesString, bytesString.length), // inner value
new Token(Type.break, undefined, 1),
new Token(Type.break, undefined, 1)
]
}
/**
* taBytesEncoder wraps bytesEncoder() but for the more exotic typed arrays so
* that we access the underlying ArrayBuffer data
*
* @param {Int8Array|Uint16Array|Int16Array|Uint32Array|Int32Array|Float32Array|Float64Array|Uint8ClampedArray|BigInt64Array|BigUint64Array} obj
* @returns {Token[]|null}
*/
function taBytesEncoder (obj) {
return bytesEncoder(new Uint8Array(obj.buffer, obj.byteOffset, obj.byteLength))
}
/**
* abBytesEncoder wraps bytesEncoder() but for plain ArrayBuffers
*
* @param {ArrayBuffer} ab
* @returns {Token[]|null}
*/
function abBytesEncoder (ab) {
return bytesEncoder(new Uint8Array(ab))
}
// eslint-disable-next-line jsdoc/require-returns-check
/**
* Intercept all `undefined` values from an object walk and reject the entire
* object if we find one.
*
* @returns {null}
*/
function undefinedEncoder () {
throw new Error('`undefined` is not supported by the IPLD Data Model and cannot be encoded')
}
/**
* Intercept all `number` values from an object walk and reject the entire
* object if we find something that doesn't fit the IPLD data model (NaN &
* Infinity).
*
* @param {number} num
* @returns {null}
*/
function numberEncoder (num) {
if (Number.isNaN(num)) {
throw new Error('`NaN` is not supported by the IPLD Data Model and cannot be encoded')
}
if (num === Infinity || num === -Infinity) {
throw new Error('`Infinity` and `-Infinity` is not supported by the IPLD Data Model and cannot be encoded')
}
return null // process with standard number encoder
}
const encodeOptions = {
typeEncoders: {
Object: cidEncoder,
Buffer: bytesEncoder,
Uint8Array: bytesEncoder,
Int8Array: taBytesEncoder,
Uint16Array: taBytesEncoder,
Int16Array: taBytesEncoder,
Uint32Array: taBytesEncoder,
Int32Array: taBytesEncoder,
Float32Array: taBytesEncoder,
Float64Array: taBytesEncoder,
Uint8ClampedArray: taBytesEncoder,
BigInt64Array: taBytesEncoder,
BigUint64Array: taBytesEncoder,
DataView: taBytesEncoder,
ArrayBuffer: abBytesEncoder,
undefined: undefinedEncoder,
number: numberEncoder
}
}
/**
* @implements {DecodeTokenizer}
*/
class DagJsonTokenizer extends cborgJson.Tokenizer {
/**
* @param {Uint8Array} data
* @param {object} [options]
*/
constructor (data, options) {
super(data, options)
/** @type {Token[]} */
this.tokenBuffer = []
}
/**
* @returns {boolean}
*/
done () {
return this.tokenBuffer.length === 0 && super.done()
}
/**
* @returns {Token}
*/
_next () {
if (this.tokenBuffer.length > 0) {
// @ts-ignore https://github.com/Microsoft/TypeScript/issues/30406
return this.tokenBuffer.pop()
}
return super.next()
}
/**
* Implements rules outlined in https://github.com/ipld/specs/pull/356
*
* @returns {Token}
*/
next () {
const token = this._next()
if (token.type === Type.map) {
const keyToken = this._next()
if (keyToken.type === Type.string && keyToken.value === '/') {
const valueToken = this._next()
if (valueToken.type === Type.string) { // *must* be a CID
const breakToken = this._next() // swallow the end-of-map token
if (breakToken.type !== Type.break) {
throw new Error('Invalid encoded CID form')
}
this.tokenBuffer.push(valueToken) // CID.parse will pick this up after our tag token
return new Token(Type.tag, 42, 0)
}
if (valueToken.type === Type.map) {
const innerKeyToken = this._next()
if (innerKeyToken.type === Type.string && innerKeyToken.value === 'bytes') {
const innerValueToken = this._next()
if (innerValueToken.type === Type.string) { // *must* be Bytes
for (let i = 0; i < 2; i++) {
const breakToken = this._next() // swallow two end-of-map tokens
if (breakToken.type !== Type.break) {
throw new Error('Invalid encoded Bytes form')
}
}
const bytes = base64.decode(`m${innerValueToken.value}`)
return new Token(Type.bytes, bytes, innerValueToken.value.length)
}
this.tokenBuffer.push(innerValueToken) // bail
}
this.tokenBuffer.push(innerKeyToken) // bail
}
this.tokenBuffer.push(valueToken) // bail
}
this.tokenBuffer.push(keyToken) // bail
}
return token
}
}
const decodeOptions = {
allowIndefinite: false,
allowUndefined: false,
allowNaN: false,
allowInfinity: false,
allowBigInt: true, // this will lead to BigInt for ints outside of
// safe-integer range, which may surprise users
strict: true,
useMaps: false,
rejectDuplicateMapKeys: true,
/** @type {import('cborg').TagDecoder[]} */
tags: []
}
// we're going to get TAG(42)STRING("bafy...") from the tokenizer so we only need
// to deal with the STRING("bafy...") at this point
decodeOptions.tags[42] = CID.parse
export const name = 'dag-json'
export const code = 0x0129
/**
* @template T
* @param {T} node
* @returns {ByteView<T>}
*/
export const encode = (node) => cborgJson.encode(node, encodeOptions)
/**
* @template T
* @param {ByteView<T> | ArrayBufferView<T>} data
* @returns {T}
*/
export const decode = (data) => {
const buf = toByteView(data)
// the tokenizer is stateful so we need a single instance of it
const options = Object.assign(decodeOptions, { tokenizer: new DagJsonTokenizer(buf, decodeOptions) })
return cborgJson.decode(buf, options)
}
/**
* @template T
* @param {T} node
* @returns {ToString<T>}
*/
export const format = (node) => utf8Decoder.decode(encode(node))
export { format as stringify }
const utf8Decoder = new TextDecoder()
/**
* @template T
* @param {ToString<T>} data
* @returns {T}
*/
export const parse = (data) => decode(utf8Encoder.encode(data))
const utf8Encoder = new TextEncoder()