UNPKG

byte-encoder

Version:

Encode data to utf8 bytes. Browser or NodeJS.

418 lines (385 loc) 11.5 kB
import ByteView from 'byteview' class ByteEncoderIterator { #string #read = 0 #written = 0 constructor (string = '') { if (typeof string !== 'string') { throw new TypeError('Expected typeof "string". Recieved typeof: "' + typeof string + '"') } this.#string = string } get [Symbol.toStringTag] () { return 'ByteEncoder' } get read () { return this.#read } get written () { return this.#written } * [Symbol.iterator] () { let index = -1 let leadSurrogate = null const { length } = this.#string this.#read = 0 this.#written = 0 while (++index < length) { let codePoint = this.#string.charCodeAt(index) ++this.#read // is surrogate component if (codePoint > 0xD7FF && codePoint < 0xE000) { // if last char was not a lead if (leadSurrogate === null) { // no lead yet if (codePoint > 0xDBFF) { // unexpected trail this.#written += 3 yield * [0xEF, 0xBF, 0xBD] continue } else if (index + 1 === length) { // unpaired lead this.#written += 3 yield * [0xEF, 0xBF, 0xBD] continue } // valid lead leadSurrogate = codePoint continue } // 2 leads in a row if (codePoint < 0xDC00) { this.#written += 3 yield * [0xEF, 0xBF, 0xBD] leadSurrogate = codePoint continue } // valid surrogate pair codePoint = (leadSurrogate - 0xD800 << 10 | codePoint - 0xDC00) + 0x10000 } else if (leadSurrogate) { // valid bmp char, but last char was a lead this.#written += 3 yield * [0xEF, 0xBF, 0xBD] continue } leadSurrogate = null // encode utf8 if (codePoint < 0x80) { ++this.#written yield codePoint } else if (codePoint < 0x800) { this.#written += 2 yield * [ codePoint >> 0x6 | 0xC0, codePoint & 0x3F | 0x80 ] } else if (codePoint < 0x10000) { this.#written += 3 yield * [ codePoint >> 0xC | 0xE0, codePoint >> 0x6 & 0x3F | 0x80, codePoint & 0x3F | 0x80 ] } else if (codePoint < 0x110000) { this.#written += 4 yield * [ codePoint >> 0x12 | 0xF0, codePoint >> 0xC & 0x3F | 0x80, codePoint >> 0x6 & 0x3F | 0x80, codePoint & 0x3F | 0x80 ] } else { throw new Error('Invalid code point') } } } } export default class ByteEncoder { #encoding static Iterator = ByteEncoderIterator constructor () { this.#encoding = 'utf-8' } get encoding () { return this.#encoding } /** * * @param {string} string * @returns {ByteView} */ encode (string = '') { if (typeof string !== 'string') { throw new TypeError('Expected type "string". Recieved type: "' + typeof string + '"') } let index = -1 let leadSurrogate = null const { length } = string const optimisticLength = length * 2 + 5 let byteView = new ByteView(optimisticLength) let read = 0 let written = 0 let offset = -1 while (++index < length) { let codePoint = string.charCodeAt(index) ++read // is surrogate component if (codePoint > 0xD7FF && codePoint < 0xE000) { // if last char was not a lead if (leadSurrogate === null) { // no lead yet if (codePoint > 0xDBFF) { // unexpected trail if (written + 3 >= byteView.length) { // reallocate ByteView byteView = ByteView.concat( [byteView], written + (length - (read - 1)) * 3 ) } byteView[++offset] = 0xEF byteView[++offset] = 0xBF byteView[++offset] = 0xBD written += 3 continue } else if (index + 1 === length) { // unpaired lead if (written + 3 >= byteView.length) { // reallocate ByteView byteView = ByteView.concat( [byteView], written + (length - (read - 1)) * 3 ) } byteView[++offset] = 0xEF byteView[++offset] = 0xBF byteView[++offset] = 0xBD written += 3 continue } // valid lead leadSurrogate = codePoint continue } // 2 leads in a row if (codePoint < 0xDC00) { if (written + 3 >= byteView.length) { // reallocate ByteView byteView = ByteView.concat( [byteView], written + (length - (read - 1)) * 3 ) } byteView[++offset] = 0xEF byteView[++offset] = 0xBF byteView[++offset] = 0xBD written += 3 leadSurrogate = codePoint continue } // valid surrogate pair codePoint = (leadSurrogate - 0xD800 << 10 | codePoint - 0xDC00) + 0x10000 } else if (leadSurrogate) { // valid bmp char, but last char was a lead if (written + 3 >= byteView.length) { // reallocate ByteView byteView = ByteView.concat( [byteView], written + (length - (read - 1)) * 3 ) } byteView[++offset] = 0xEF byteView[++offset] = 0xBF byteView[++offset] = 0xBD written += 3 continue } leadSurrogate = null // encode utf8 if (codePoint < 0x80) { if (written + 1 >= byteView.length) { // reallocate ByteView byteView = ByteView.concat( [byteView], written + (length - (read - 1)) * 3 ) } byteView[++offset] = codePoint written += 1 } else if (codePoint < 0x800) { if (written + 2 >= byteView.length) { // reallocate ByteView byteView = ByteView.concat( [byteView], written + (length - (read - 1)) * 3 ) } byteView[++offset] = codePoint >> 0x6 | 0xC0 byteView[++offset] = codePoint & 0x3F | 0x80 written += 2 } else if (codePoint < 0x10000) { if (written + 3 >= byteView.length) { // reallocate ByteView byteView = ByteView.concat( [byteView], written + (length - (read - 1)) * 3 ) } byteView[++offset] = codePoint >> 0xC | 0xE0 byteView[++offset] = codePoint >> 0x6 & 0x3F | 0x80 byteView[++offset] = codePoint & 0x3F | 0x80 written += 3 } else if (codePoint < 0x110000) { if (written + 4 >= byteView.length) { // reallocate ByteView byteView = ByteView.concat( [byteView], written + (length - (read - 1)) * 3 ) } byteView[++offset] = codePoint >> 0x12 | 0xF0 byteView[++offset] = codePoint >> 0xC & 0x3F | 0x80 byteView[++offset] = codePoint >> 0x6 & 0x3F | 0x80 byteView[++offset] = codePoint & 0x3F | 0x80 written += 4 } else { throw new Error('Invalid code point') } } return byteView.slice(0, written) } /** * * @param {string} string * @param {ByteView | ArrayBufferView} byteView * @returns {{ written: number, read: number }} */ encodeInto (string, byteView) { if (typeof string !== 'string') { throw new TypeError('Expected type "string". Recieved type: "' + typeof string + '"') } if (!ByteView.isByteView(byteView) && !ByteView.isView(byteView) && !isBuffer(byteView)) { throw new TypeError('Expected instance of "ByteView | TypedArray | Buffer"') } let index = -1 let leadSurrogate = null const { length } = string const max = byteView.length let read = 0 let written = 0 let offset = -1 while (++index < length) { let codePoint = string.charCodeAt(index) if (written + 1 >= max) { return { written, read } } ++read // is surrogate component if (codePoint > 0xD7FF && codePoint < 0xE000) { // if last char was not a lead if (leadSurrogate === null) { // no lead yet if (codePoint > 0xDBFF) { // unexpected trail if (written + 3 >= max) { --read return { written, read } } byteView[++offset] = 0xEF byteView[++offset] = 0xBF byteView[++offset] = 0xBD written += 3 continue } else if (index + 1 === length) { // unpaired lead if (written + 3 >= max) { --read return { written, read } } byteView[++offset] = 0xEF byteView[++offset] = 0xBF byteView[++offset] = 0xBD written += 3 continue } // valid lead leadSurrogate = codePoint continue } // 2 leads in a row if (codePoint < 0xDC00) { if (written + 3 >= max) { --read return { written, read } } byteView[++offset] = 0xEF byteView[++offset] = 0xBF byteView[++offset] = 0xBD written += 3 leadSurrogate = codePoint continue } // valid surrogate pair codePoint = (leadSurrogate - 0xD800 << 10 | codePoint - 0xDC00) + 0x10000 } else if (leadSurrogate) { // valid bmp char, but last char was a lead if (written + 3 >= max) { --read return { written, read } } byteView[++offset] = 0xEF byteView[++offset] = 0xBF byteView[++offset] = 0xBD written += 3 continue } leadSurrogate = null // encode utf8 if (codePoint < 0x80) { if (written + 1 >= max) { --read return { written, read } } byteView[++offset] = codePoint written += 1 } else if (codePoint < 0x800) { if (written + 2 >= max) { --read return { written, read } } byteView[++offset] = codePoint >> 0x6 | 0xC0 byteView[++offset] = codePoint & 0x3F | 0x80 written += 2 } else if (codePoint < 0x10000) { if (written + 3 >= max) { --read return { written, read } } byteView[++offset] = codePoint >> 0xC | 0xE0 byteView[++offset] = codePoint >> 0x6 & 0x3F | 0x80 byteView[++offset] = codePoint & 0x3F | 0x80 written += 3 } else if (codePoint < 0x110000) { if (written + 4 >= max) { --read return { written, read } } byteView[++offset] = codePoint >> 0x12 | 0xF0 byteView[++offset] = codePoint >> 0xC & 0x3F | 0x80 byteView[++offset] = codePoint >> 0x6 & 0x3F | 0x80 byteView[++offset] = codePoint & 0x3F | 0x80 written += 4 } else { throw new Error('Invalid code point') } } return { written, read } } } function isBuffer (byteView) { return ( typeof Buffer !== 'undefined' && typeof Buffer.isBuffer === 'function' && Buffer.isBuffer(byteView) ) }