UNPKG

@cantoo/pdf-lib

Version:

Create and modify PDF files with JavaScript

1,443 lines (1,314 loc) 1.6 MB
/* * The `chars`, `lookup`, `encode`, and `decode` members of this file are * licensed under the following: * * base64-arraybuffer * https://github.com/niklasvh/base64-arraybuffer * * Copyright (c) 2012 Niklas von Hertzen * Licensed under the MIT license. * */ const chars$1 = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'; // Use a lookup table to find the index. const lookup$1 = new Uint8Array(256); for (let i = 0; i < chars$1.length; i++) { lookup$1[chars$1.charCodeAt(i)] = i; } const encodeToBase64 = (bytes) => { let base64 = ''; const len = bytes.length; for (let i = 0; i < len; i += 3) { base64 += chars$1[bytes[i] >> 2]; base64 += chars$1[((bytes[i] & 3) << 4) | (bytes[i + 1] >> 4)]; base64 += chars$1[((bytes[i + 1] & 15) << 2) | (bytes[i + 2] >> 6)]; base64 += chars$1[bytes[i + 2] & 63]; } if (len % 3 === 2) { base64 = base64.substring(0, base64.length - 1) + '='; } else if (len % 3 === 1) { base64 = base64.substring(0, base64.length - 2) + '=='; } return base64; }; const decodeFromBase64$1 = (base64) => { let bufferLength = base64.length * 0.75; const len = base64.length; let i; let p = 0; let encoded1; let encoded2; let encoded3; let encoded4; if (base64[base64.length - 1] === '=') { bufferLength--; if (base64[base64.length - 2] === '=') { bufferLength--; } } const bytes = new Uint8Array(bufferLength); for (i = 0; i < len; i += 4) { encoded1 = lookup$1[base64.charCodeAt(i)]; encoded2 = lookup$1[base64.charCodeAt(i + 1)]; encoded3 = lookup$1[base64.charCodeAt(i + 2)]; encoded4 = lookup$1[base64.charCodeAt(i + 3)]; bytes[p++] = (encoded1 << 2) | (encoded2 >> 4); bytes[p++] = ((encoded2 & 15) << 4) | (encoded3 >> 2); bytes[p++] = ((encoded3 & 3) << 6) | (encoded4 & 63); } return bytes; }; // This regex is designed to be as flexible as possible. It will parse certain // invalid data URIs. const DATA_URI_PREFIX_REGEX = /^(data)?:?([\w/+]+)?;?(charset=[\w-]+|base64)?.*,/i; /** * If the `dataUri` input is a data URI, then the data URI prefix must not be * longer than 100 characters, or this function will fail to decode it. * * @param dataUri a base64 data URI or plain base64 string * @returns a Uint8Array containing the decoded input */ const decodeFromBase64DataUri = (dataUri) => { const trimmedUri = dataUri.trim(); const prefix = trimmedUri.substring(0, 100); const res = prefix.match(DATA_URI_PREFIX_REGEX); // Assume it's not a data URI - just a plain base64 string if (!res) return decodeFromBase64$1(trimmedUri); // Remove the data URI prefix and parse the remainder as a base64 string const [fullMatch] = res; const data = trimmedUri.substring(fullMatch.length); return decodeFromBase64$1(data); }; const toCharCode = (character) => character.charCodeAt(0); const toCodePoint = (character) => character.codePointAt(0); const toHexStringOfMinLength = (num, minLength) => padStart$1(num.toString(16), minLength, '0').toUpperCase(); const toHexString = (num) => toHexStringOfMinLength(num, 2); const charFromCode = (code) => String.fromCharCode(code); const charFromHexCode = (hex) => charFromCode(parseInt(hex, 16)); const padStart$1 = (value, length, padChar) => { let padding = ''; for (let idx = 0, len = length - value.length; idx < len; idx++) { padding += padChar; } return padding + value; }; const stringAsByteArray = (str) => { const buffer = new Uint8Array(str.length); copyStringIntoBuffer(str, buffer, 0); return buffer; }; const copyStringIntoBuffer = (str, buffer, offset) => { const length = str.length; for (let idx = 0; idx < length; idx++) { buffer[offset++] = str.charCodeAt(idx); } return length; }; const addRandomSuffix = (prefix, suffixLength = 4) => `${prefix}-${Math.floor(Math.random() * Math.pow(10, suffixLength))}`; const escapeRegExp = (str) => str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); const cleanText = (text) => text.replace(/\t|\u0085|\u2028|\u2029/g, ' ').replace(/[\b\v]/g, ''); const escapedNewlineChars = ['\\n', '\\f', '\\r', '\\u000B']; const newlineChars = ['\n', '\f', '\r', '\u000B']; // eslint-disable-next-line no-control-regex const isNewlineChar = (text) => /^[\n\f\r\u000B]$/.test(text); // eslint-disable-next-line no-control-regex const lineSplit = (text) => text.split(/[\n\f\r\u000B]/); const mergeLines = (text) => // eslint-disable-next-line no-control-regex text.replace(/[\n\f\r\u000B]/g, ' '); // JavaScript's String.charAt() method doesn work on strings containing UTF-16 // characters (with high and low surrogate pairs), such as 💩 (poo emoji). This // `charAtIndex()` function does. // // Credit: https://github.com/mathiasbynens/String.prototype.at/blob/master/at.js#L14-L48 const charAtIndex = (text, index) => { // Get the first code unit and code unit value const cuFirst = text.charCodeAt(index); let cuSecond; const nextIndex = index + 1; let length = 1; if ( // Check if it's the start of a surrogate pair. cuFirst >= 0xd800 && cuFirst <= 0xdbff && // high surrogate text.length > nextIndex // there is a next code unit ) { cuSecond = text.charCodeAt(nextIndex); if (cuSecond >= 0xdc00 && cuSecond <= 0xdfff) length = 2; // low surrogate } return [text.slice(index, index + length), length]; }; const charSplit = (text) => { const chars = []; for (let idx = 0, len = text.length; idx < len;) { const [c, cLen] = charAtIndex(text, idx); chars.push(c); idx += cLen; } return chars; }; const buildWordBreakRegex = (wordBreaks) => { const newlineCharUnion = escapedNewlineChars.join('|'); const escapedRules = ['$']; for (let idx = 0, len = wordBreaks.length; idx < len; idx++) { const wordBreak = wordBreaks[idx]; if (isNewlineChar(wordBreak)) { throw new TypeError(`\`wordBreak\` must not include ${newlineCharUnion}`); } escapedRules.push(wordBreak === '' ? '.' : escapeRegExp(wordBreak)); } const breakRules = escapedRules.join('|'); return new RegExp(`(${newlineCharUnion})|((.*?)(${breakRules}))`, 'gm'); }; const breakTextIntoLines = (text, wordBreaks, maxWidth, computeWidthOfText) => { const regex = buildWordBreakRegex(wordBreaks); const words = cleanText(text).match(regex); let currLine = ''; let currWidth = 0; const lines = []; const pushCurrLine = () => { if (currLine !== '') lines.push(currLine); currLine = ''; currWidth = 0; }; for (let idx = 0, len = words.length; idx < len; idx++) { const word = words[idx]; if (isNewlineChar(word)) { pushCurrLine(); } else { const width = computeWidthOfText(word); if (currWidth + width > maxWidth) pushCurrLine(); currLine += word; currWidth += width; } } pushCurrLine(); return lines; }; // See section "7.9.4 Dates" of the PDF specification const dateRegex = /^D:(\d\d\d\d)(\d\d)?(\d\d)?(\d\d)?(\d\d)?(\d\d)?([+\-Z])?(\d\d)?'?(\d\d)?'?$/; const parseDate = (dateStr) => { const match = dateStr.match(dateRegex); if (!match) return undefined; const [, year, month = '01', day = '01', hours = '00', mins = '00', secs = '00', offsetSign = 'Z', offsetHours = '00', offsetMins = '00',] = match; // http://www.ecma-international.org/ecma-262/5.1/#sec-15.9.1.15 const tzOffset = offsetSign === 'Z' ? 'Z' : `${offsetSign}${offsetHours}:${offsetMins}`; const date = new Date(`${year}-${month}-${day}T${hours}:${mins}:${secs}${tzOffset}`); return date; }; const findLastMatch = (value, regex) => { var _a; let position = 0; let lastMatch; while (position < value.length) { const match = value.substring(position).match(regex); if (!match) return { match: lastMatch, pos: position }; lastMatch = match; position += ((_a = match.index) !== null && _a !== void 0 ? _a : 0) + match[0].length; } return { match: lastMatch, pos: position }; }; const last = (array) => array[array.length - 1]; // export const dropLast = <T>(array: T[]): T[] => // array.slice(0, array.length - 1); const typedArrayFor = (value) => { if (value instanceof Uint8Array) return value; const length = value.length; const typedArray = new Uint8Array(length); for (let idx = 0; idx < length; idx++) { typedArray[idx] = value.charCodeAt(idx); } return typedArray; }; const mergeIntoTypedArray = (...arrays) => { const arrayCount = arrays.length; const typedArrays = []; for (let idx = 0; idx < arrayCount; idx++) { const element = arrays[idx]; typedArrays[idx] = element instanceof Uint8Array ? element : typedArrayFor(element); } let totalSize = 0; for (let idx = 0; idx < arrayCount; idx++) { totalSize += arrays[idx].length; } const merged = new Uint8Array(totalSize); let offset = 0; for (let arrIdx = 0; arrIdx < arrayCount; arrIdx++) { const arr = typedArrays[arrIdx]; for (let byteIdx = 0, arrLen = arr.length; byteIdx < arrLen; byteIdx++) { merged[offset++] = arr[byteIdx]; } } return merged; }; const mergeUint8Arrays = (arrays) => { let totalSize = 0; for (let idx = 0, len = arrays.length; idx < len; idx++) { totalSize += arrays[idx].length; } const mergedBuffer = new Uint8Array(totalSize); let offset = 0; for (let idx = 0, len = arrays.length; idx < len; idx++) { const array = arrays[idx]; mergedBuffer.set(array, offset); offset += array.length; } return mergedBuffer; }; const arrayAsString = (array) => { let str = ''; for (let idx = 0, len = array.length; idx < len; idx++) { str += charFromCode(array[idx]); } return str; }; const byAscendingId = (a, b) => a.id - b.id; const sortedUniq = (array, indexer) => { const uniq = []; for (let idx = 0, len = array.length; idx < len; idx++) { const curr = array[idx]; const prev = array[idx - 1]; if (idx === 0 || indexer(curr) !== indexer(prev)) { uniq.push(curr); } } return uniq; }; const isArrayEqual = (arr1, arr2) => { if (arr1.length !== arr2.length) { return false; } for (let i = 0, ii = arr1.length; i < ii; i++) { if (arr1[i] !== arr2[i]) { return false; } } return true; }; // Arrays and TypedArrays in JS both have .reverse() methods, which would seem // to negate the need for this function. However, not all runtimes support this // method (e.g. React Native). This function compensates for that fact. const reverseArray = (array) => { const arrayLen = array.length; for (let idx = 0, len = Math.floor(arrayLen / 2); idx < len; idx++) { const leftIdx = idx; const rightIdx = arrayLen - idx - 1; const temp = array[idx]; array[leftIdx] = array[rightIdx]; array[rightIdx] = temp; } return array; }; const sum = (array) => { let total = 0; for (let idx = 0, len = array.length; idx < len; idx++) { total += array[idx]; } return total; }; const range = (start, end) => { const arr = new Array(end - start); for (let idx = 0, len = arr.length; idx < len; idx++) { arr[idx] = start + idx; } return arr; }; const pluckIndices = (arr, indices) => { const plucked = new Array(indices.length); for (let idx = 0, len = indices.length; idx < len; idx++) { plucked[idx] = arr[indices[idx]]; } return plucked; }; const canBeConvertedToUint8Array = (input) => input instanceof Uint8Array || input instanceof ArrayBuffer || typeof input === 'string'; const toUint8Array = (input) => { if (typeof input === 'string') { return decodeFromBase64DataUri(input); } else if (input instanceof ArrayBuffer) { return new Uint8Array(input); } else if (input instanceof Uint8Array) { return input; } else { throw new TypeError('`input` must be one of `string | ArrayBuffer | Uint8Array`'); } }; // Precompute hex octets for best performance // Credit: https://stackoverflow.com/questions/40031688/javascript-arraybuffer-to-hex/40031979#40031979 const byteToHex = []; for (let byte = 0x00; byte <= 0xff; ++byte) { byteToHex[byte] = byte.toString(16).padStart(2, '0'); } const byteArrayToHexString = (array) => { const hexOctets = new Array(array.length); for (let idx = 0; idx < array.length; ++idx) { hexOctets[idx] = byteToHex[array[idx]]; } return hexOctets.join(''); }; /** * Returns a Promise that resolves after at least one tick of the * Macro Task Queue occurs. */ const waitForTick = () => new Promise((resolve) => { setTimeout(() => resolve(), 0); }); /** * Encodes a string to UTF-8. * * @param input The string to be encoded. * @param byteOrderMark Whether or not a byte order marker (BOM) should be added * to the start of the encoding. (default `true`) * @returns A Uint8Array containing the UTF-8 encoding of the input string. * * ----------------------------------------------------------------------------- * * JavaScript strings are composed of Unicode code points. Code points are * integers in the range 0 to 1,114,111 (0x10FFFF). When serializing a string, * it must be encoded as a sequence of words. A word is typically 8, 16, or 32 * bytes in size. As such, Unicode defines three encoding forms: UTF-8, UTF-16, * and UTF-32. These encoding forms are described in the Unicode standard [1]. * This function implements the UTF-8 encoding form. * * ----------------------------------------------------------------------------- * * In UTF-8, each code point is mapped to a sequence of 1, 2, 3, or 4 bytes. * Note that the logic which defines this mapping is slightly convoluted, and * not as straightforward as the mapping logic for UTF-16 or UTF-32. The UTF-8 * mapping logic is as follows [2]: * * • If a code point is in the range U+0000..U+007F, then view it as a 7-bit * integer: 0bxxxxxxx. Map the code point to 1 byte with the first high order * bit set to 0: * * b1=0b0xxxxxxx * * • If a code point is in the range U+0080..U+07FF, then view it as an 11-bit * integer: 0byyyyyxxxxxx. Map the code point to 2 bytes with the first 5 bits * of the code point stored in the first byte, and the last 6 bits stored in * the second byte: * * b1=0b110yyyyy b2=0b10xxxxxx * * • If a code point is in the range U+0800..U+FFFF, then view it as a 16-bit * integer, 0bzzzzyyyyyyxxxxxx. Map the code point to 3 bytes with the first * 4 bits stored in the first byte, the next 6 bits stored in the second byte, * and the last 6 bits in the third byte: * * b1=0b1110zzzz b2=0b10yyyyyy b3=0b10xxxxxx * * • If a code point is in the range U+10000...U+10FFFF, then view it as a * 21-bit integer, 0bvvvzzzzzzyyyyyyxxxxxx. Map the code point to 4 bytes with * the first 3 bits stored in the first byte, the next 6 bits stored in the * second byte, the next 6 bits stored in the third byte, and the last 6 bits * stored in the fourth byte: * * b1=0b11110xxx b2=0b10zzzzzz b3=0b10yyyyyy b4=0b10xxxxxx * * ----------------------------------------------------------------------------- * * It is important to note, when iterating through the code points of a string * in JavaScript, that if a character is encoded as a surrogate pair it will * increase the string's length by 2 instead of 1 [4]. For example: * * ``` * > 'a'.length * 1 * > '💩'.length * 2 * > '語'.length * 1 * > 'a💩語'.length * 4 * ``` * * The results of the above example are explained by the fact that the * characters 'a' and '語' are not represented by surrogate pairs, but '💩' is. * * Because of this idiosyncrasy in JavaScript's string implementation and APIs, * we must "jump" an extra index after encoding a character as a surrogate * pair. In practice, this means we must increment the index of our for loop by * 2 if we encode a surrogate pair, and 1 in all other cases. * * ----------------------------------------------------------------------------- * * References: * - [1] https://www.unicode.org/versions/Unicode12.0.0/UnicodeStandard-12.0.pdf * 3.9 Unicode Encoding Forms - UTF-8 * - [2] http://www.herongyang.com/Unicode/UTF-8-UTF-8-Encoding.html * - [3] http://www.herongyang.com/Unicode/UTF-8-UTF-8-Encoding-Algorithm.html * - [4] https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/length#Description * */ const utf8Encode = (input, byteOrderMark = true) => { const encoded = []; if (byteOrderMark) encoded.push(0xef, 0xbb, 0xbf); for (let idx = 0, len = input.length; idx < len;) { const codePoint = input.codePointAt(idx); // One byte encoding if (codePoint < 0x80) { const byte1 = codePoint & 0x7f; encoded.push(byte1); idx += 1; } // Two byte encoding else if (codePoint < 0x0800) { const byte1 = ((codePoint >> 6) & 0x1f) | 0xc0; const byte2 = (codePoint & 0x3f) | 0x80; encoded.push(byte1, byte2); idx += 1; } // Three byte encoding else if (codePoint < 0x010000) { const byte1 = ((codePoint >> 12) & 0x0f) | 0xe0; const byte2 = ((codePoint >> 6) & 0x3f) | 0x80; const byte3 = (codePoint & 0x3f) | 0x80; encoded.push(byte1, byte2, byte3); idx += 1; } // Four byte encoding (surrogate pair) else if (codePoint < 0x110000) { const byte1 = ((codePoint >> 18) & 0x07) | 0xf0; const byte2 = ((codePoint >> 12) & 0x3f) | 0x80; const byte3 = ((codePoint >> 6) & 0x3f) | 0x80; const byte4 = ((codePoint >> 0) & 0x3f) | 0x80; encoded.push(byte1, byte2, byte3, byte4); idx += 2; } // Should never reach this case else throw new Error(`Invalid code point: 0x${toHexString(codePoint)}`); } return new Uint8Array(encoded); }; /** * Encodes a string to UTF-16. * * @param input The string to be encoded. * @param byteOrderMark Whether or not a byte order marker (BOM) should be added * to the start of the encoding. (default `true`) * @returns A Uint16Array containing the UTF-16 encoding of the input string. * * ----------------------------------------------------------------------------- * * JavaScript strings are composed of Unicode code points. Code points are * integers in the range 0 to 1,114,111 (0x10FFFF). When serializing a string, * it must be encoded as a sequence of words. A word is typically 8, 16, or 32 * bytes in size. As such, Unicode defines three encoding forms: UTF-8, UTF-16, * and UTF-32. These encoding forms are described in the Unicode standard [1]. * This function implements the UTF-16 encoding form. * * ----------------------------------------------------------------------------- * * In UTF-16, each code point is mapped to one or two 16-bit integers. The * UTF-16 mapping logic is as follows [2]: * * • If a code point is in the range U+0000..U+FFFF, then map the code point to * a 16-bit integer with the most significant byte first. * * • If a code point is in the range U+10000..U+10000, then map the code point * to two 16-bit integers. The first integer should contain the high surrogate * and the second integer should contain the low surrogate. Both surrogates * should be written with the most significant byte first. * * ----------------------------------------------------------------------------- * * It is important to note, when iterating through the code points of a string * in JavaScript, that if a character is encoded as a surrogate pair it will * increase the string's length by 2 instead of 1 [4]. For example: * * ``` * > 'a'.length * 1 * > '💩'.length * 2 * > '語'.length * 1 * > 'a💩語'.length * 4 * ``` * * The results of the above example are explained by the fact that the * characters 'a' and '語' are not represented by surrogate pairs, but '💩' is. * * Because of this idiosyncrasy in JavaScript's string implementation and APIs, * we must "jump" an extra index after encoding a character as a surrogate * pair. In practice, this means we must increment the index of our for loop by * 2 if we encode a surrogate pair, and 1 in all other cases. * * ----------------------------------------------------------------------------- * * References: * - [1] https://www.unicode.org/versions/Unicode12.0.0/UnicodeStandard-12.0.pdf * 3.9 Unicode Encoding Forms - UTF-8 * - [2] http://www.herongyang.com/Unicode/UTF-16-UTF-16-Encoding.html * - [3] https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/length#Description * */ const utf16Encode = (input, byteOrderMark = true) => { const encoded = []; if (byteOrderMark) encoded.push(0xfeff); for (let idx = 0, len = input.length; idx < len;) { const codePoint = input.codePointAt(idx); // Two byte encoding if (codePoint < 0x010000) { encoded.push(codePoint); idx += 1; } // Four byte encoding (surrogate pair) else if (codePoint < 0x110000) { encoded.push(highSurrogate(codePoint), lowSurrogate(codePoint)); idx += 2; } // Should never reach this case else throw new Error(`Invalid code point: 0x${toHexString(codePoint)}`); } return new Uint16Array(encoded); }; /** * Returns `true` if the `codePoint` is within the * Basic Multilingual Plane (BMP). Code points inside the BMP are not encoded * with surrogate pairs. * @param codePoint The code point to be evaluated. * * Reference: https://en.wikipedia.org/wiki/UTF-16#Description */ const isWithinBMP = (codePoint) => codePoint >= 0 && codePoint <= 0xffff; /** * Returns `true` if the given `codePoint` is valid and must be represented * with a surrogate pair when encoded. * @param codePoint The code point to be evaluated. * * Reference: https://en.wikipedia.org/wiki/UTF-16#Description */ const hasSurrogates = (codePoint) => codePoint >= 0x010000 && codePoint <= 0x10ffff; // From Unicode 3.0 spec, section 3.7: // http://unicode.org/versions/Unicode3.0.0/ch03.pdf const highSurrogate = (codePoint) => Math.floor((codePoint - 0x10000) / 0x400) + 0xd800; // From Unicode 3.0 spec, section 3.7: // http://unicode.org/versions/Unicode3.0.0/ch03.pdf const lowSurrogate = (codePoint) => ((codePoint - 0x10000) % 0x400) + 0xdc00; var ByteOrder; (function (ByteOrder) { ByteOrder["BigEndian"] = "BigEndian"; ByteOrder["LittleEndian"] = "LittleEndian"; })(ByteOrder || (ByteOrder = {})); const REPLACEMENT = '�'.codePointAt(0); /** * Decodes a Uint8Array of data to a string using UTF-16. * * Note that this function attempts to recover from erronous input by * inserting the replacement character (�) to mark invalid code points * and surrogate pairs. * * @param input A Uint8Array containing UTF-16 encoded data * @param byteOrderMark Whether or not a byte order marker (BOM) should be read * at the start of the encoding. (default `true`) * @returns The decoded string. */ const utf16Decode = (input, byteOrderMark = true) => { // Need at least 2 bytes of data in UTF-16 encodings if (input.length <= 1) return String.fromCodePoint(REPLACEMENT); const byteOrder = byteOrderMark ? readBOM(input) : ByteOrder.BigEndian; // Skip byte order mark if needed let idx = byteOrderMark ? 2 : 0; const codePoints = []; while (input.length - idx >= 2) { const first = decodeValues(input[idx++], input[idx++], byteOrder); if (isHighSurrogate(first)) { if (input.length - idx < 2) { // Need at least 2 bytes left for the low surrogate that is required codePoints.push(REPLACEMENT); } else { const second = decodeValues(input[idx++], input[idx++], byteOrder); if (isLowSurrogate(second)) { codePoints.push(first, second); } else { // Low surrogates should always follow high surrogates codePoints.push(REPLACEMENT); } } } else if (isLowSurrogate(first)) { // High surrogates should always come first since `decodeValues()` // accounts for the byte ordering idx += 2; codePoints.push(REPLACEMENT); } else { codePoints.push(first); } } // There shouldn't be extra byte(s) left over if (idx < input.length) codePoints.push(REPLACEMENT); return String.fromCodePoint(...codePoints); }; /** * Returns `true` if the given `codePoint` is a high surrogate. * @param codePoint The code point to be evaluated. * * Reference: https://en.wikipedia.org/wiki/UTF-16#Description */ const isHighSurrogate = (codePoint) => codePoint >= 0xd800 && codePoint <= 0xdbff; /** * Returns `true` if the given `codePoint` is a low surrogate. * @param codePoint The code point to be evaluated. * * Reference: https://en.wikipedia.org/wiki/UTF-16#Description */ const isLowSurrogate = (codePoint) => codePoint >= 0xdc00 && codePoint <= 0xdfff; /** * Decodes the given utf-16 values first and second using the specified * byte order. * @param first The first byte of the encoding. * @param second The second byte of the encoding. * @param byteOrder The byte order of the encoding. * Reference: https://en.wikipedia.org/wiki/UTF-16#Examples */ const decodeValues = (first, second, byteOrder) => { // Append the binary representation of the preceding byte by shifting the // first one 8 to the left and than applying a bitwise or-operator to append // the second one. if (byteOrder === ByteOrder.LittleEndian) return (second << 8) | first; if (byteOrder === ByteOrder.BigEndian) return (first << 8) | second; throw new Error(`Invalid byteOrder: ${byteOrder}`); }; /** * Returns whether the given array contains a byte order mark for the * UTF-16BE or UTF-16LE encoding. If it has neither, BigEndian is assumed. * * Reference: https://en.wikipedia.org/wiki/Byte_order_mark#UTF-16 * * @param bytes The byte array to be evaluated. */ // prettier-ignore const readBOM = (bytes) => (hasUtf16BigEndianBOM(bytes) ? ByteOrder.BigEndian : hasUtf16LittleEndianBOM(bytes) ? ByteOrder.LittleEndian : ByteOrder.BigEndian); const hasUtf16BigEndianBOM = (bytes) => bytes[0] === 0xfe && bytes[1] === 0xff; const hasUtf16LittleEndianBOM = (bytes) => bytes[0] === 0xff && bytes[1] === 0xfe; const hasUtf16BOM = (bytes) => hasUtf16BigEndianBOM(bytes) || hasUtf16LittleEndianBOM(bytes); // tslint:disable radix /** * Converts a number to its string representation in decimal. This function * differs from simply converting a number to a string with `.toString()` * because this function's output string will **not** contain exponential * notation. * * Credit: https://stackoverflow.com/a/46545519 */ const numberToString = (num) => { let numStr = String(num); if (Math.abs(num) < 1.0) { const e = parseInt(num.toString().split('e-')[1]); if (e) { const negative = num < 0; if (negative) num *= -1; num *= Math.pow(10, e - 1); numStr = '0.' + new Array(e).join('0') + num.toString().substring(2); if (negative) numStr = '-' + numStr; } } else { let e = parseInt(num.toString().split('+')[1]); if (e > 20) { e -= 20; num /= Math.pow(10, e); numStr = num.toString() + new Array(e + 1).join('0'); } } return numStr; }; const sizeInBytes = (n) => Math.ceil(n.toString(2).length / 8); /** * Converts a number into its constituent bytes and returns them as * a number[]. * * Returns most significant byte as first element in array. It may be necessary * to call .reverse() to get the bits in the desired order. * * Example: * bytesFor(0x02A41E) => [ 0b10, 0b10100100, 0b11110 ] * * Credit for algorithm: https://stackoverflow.com/a/1936865 */ const bytesFor = (n) => { const bytes = new Uint8Array(sizeInBytes(n)); for (let i = 1; i <= bytes.length; i++) { bytes[i - 1] = n >> ((bytes.length - i) * 8); } return bytes; }; const error = (msg) => { throw new Error(msg); }; var commonjsGlobal = typeof globalThis !== 'undefined' ? globalThis : typeof window !== 'undefined' ? window : typeof global !== 'undefined' ? global : typeof self !== 'undefined' ? self : {}; function getDefaultExportFromCjs (x) { return x && x.__esModule && Object.prototype.hasOwnProperty.call(x, 'default') ? x['default'] : x; } var common = {}; (function (exports) { var TYPED_OK = (typeof Uint8Array !== 'undefined') && (typeof Uint16Array !== 'undefined') && (typeof Int32Array !== 'undefined'); function _has(obj, key) { return Object.prototype.hasOwnProperty.call(obj, key); } exports.assign = function (obj /*from1, from2, from3, ...*/) { var sources = Array.prototype.slice.call(arguments, 1); while (sources.length) { var source = sources.shift(); if (!source) { continue; } if (typeof source !== 'object') { throw new TypeError(source + 'must be non-object'); } for (var p in source) { if (_has(source, p)) { obj[p] = source[p]; } } } return obj; }; // reduce buffer size, avoiding mem copy exports.shrinkBuf = function (buf, size) { if (buf.length === size) { return buf; } if (buf.subarray) { return buf.subarray(0, size); } buf.length = size; return buf; }; var fnTyped = { arraySet: function (dest, src, src_offs, len, dest_offs) { if (src.subarray && dest.subarray) { dest.set(src.subarray(src_offs, src_offs + len), dest_offs); return; } // Fallback to ordinary array for (var i = 0; i < len; i++) { dest[dest_offs + i] = src[src_offs + i]; } }, // Join array of chunks to single array. flattenChunks: function (chunks) { var i, l, len, pos, chunk, result; // calculate data length len = 0; for (i = 0, l = chunks.length; i < l; i++) { len += chunks[i].length; } // join chunks result = new Uint8Array(len); pos = 0; for (i = 0, l = chunks.length; i < l; i++) { chunk = chunks[i]; result.set(chunk, pos); pos += chunk.length; } return result; } }; var fnUntyped = { arraySet: function (dest, src, src_offs, len, dest_offs) { for (var i = 0; i < len; i++) { dest[dest_offs + i] = src[src_offs + i]; } }, // Join array of chunks to single array. flattenChunks: function (chunks) { return [].concat.apply([], chunks); } }; // Enable/Disable typed arrays use, for testing // exports.setTyped = function (on) { if (on) { exports.Buf8 = Uint8Array; exports.Buf16 = Uint16Array; exports.Buf32 = Int32Array; exports.assign(exports, fnTyped); } else { exports.Buf8 = Array; exports.Buf16 = Array; exports.Buf32 = Array; exports.assign(exports, fnUntyped); } }; exports.setTyped(TYPED_OK); } (common)); var deflate$4 = {}; var deflate$3 = {}; var trees$1 = {}; // (C) 1995-2013 Jean-loup Gailly and Mark Adler // (C) 2014-2017 Vitaly Puzrin and Andrey Tupitsin // // This software is provided 'as-is', without any express or implied // warranty. In no event will the authors be held liable for any damages // arising from the use of this software. // // Permission is granted to anyone to use this software for any purpose, // including commercial applications, and to alter it and redistribute it // freely, subject to the following restrictions: // // 1. The origin of this software must not be misrepresented; you must not // claim that you wrote the original software. If you use this software // in a product, an acknowledgment in the product documentation would be // appreciated but is not required. // 2. Altered source versions must be plainly marked as such, and must not be // misrepresented as being the original software. // 3. This notice may not be removed or altered from any source distribution. /* eslint-disable space-unary-ops */ var utils$6 = common; /* Public constants ==========================================================*/ /* ===========================================================================*/ //var Z_FILTERED = 1; //var Z_HUFFMAN_ONLY = 2; //var Z_RLE = 3; var Z_FIXED$1 = 4; //var Z_DEFAULT_STRATEGY = 0; /* Possible values of the data_type field (though see inflate()) */ var Z_BINARY = 0; var Z_TEXT = 1; //var Z_ASCII = 1; // = Z_TEXT var Z_UNKNOWN$1 = 2; /*============================================================================*/ function zero$1(buf) { var len = buf.length; while (--len >= 0) { buf[len] = 0; } } // From zutil.h var STORED_BLOCK = 0; var STATIC_TREES = 1; var DYN_TREES = 2; /* The three kinds of block type */ var MIN_MATCH$1 = 3; var MAX_MATCH$1 = 258; /* The minimum and maximum match lengths */ // From deflate.h /* =========================================================================== * Internal compression state. */ var LENGTH_CODES$1 = 29; /* number of length codes, not counting the special END_BLOCK code */ var LITERALS$1 = 256; /* number of literal bytes 0..255 */ var L_CODES$1 = LITERALS$1 + 1 + LENGTH_CODES$1; /* number of Literal or Length codes, including the END_BLOCK code */ var D_CODES$1 = 30; /* number of distance codes */ var BL_CODES$1 = 19; /* number of codes used to transfer the bit lengths */ var HEAP_SIZE$1 = 2 * L_CODES$1 + 1; /* maximum heap size */ var MAX_BITS$1 = 15; /* All codes must not exceed MAX_BITS bits */ var Buf_size = 16; /* size of bit buffer in bi_buf */ /* =========================================================================== * Constants */ var MAX_BL_BITS = 7; /* Bit length codes must not exceed MAX_BL_BITS bits */ var END_BLOCK = 256; /* end of block literal code */ var REP_3_6 = 16; /* repeat previous bit length 3-6 times (2 bits of repeat count) */ var REPZ_3_10 = 17; /* repeat a zero length 3-10 times (3 bits of repeat count) */ var REPZ_11_138 = 18; /* repeat a zero length 11-138 times (7 bits of repeat count) */ /* eslint-disable comma-spacing,array-bracket-spacing */ var extra_lbits = /* extra bits for each length code */ [0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0]; var extra_dbits = /* extra bits for each distance code */ [0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13]; var extra_blbits = /* extra bits for each bit length code */ [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7]; var bl_order = [16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15]; /* eslint-enable comma-spacing,array-bracket-spacing */ /* The lengths of the bit length codes are sent in order of decreasing * probability, to avoid transmitting the lengths for unused bit length codes. */ /* =========================================================================== * Local data. These are initialized only once. */ // We pre-fill arrays with 0 to avoid uninitialized gaps var DIST_CODE_LEN = 512; /* see definition of array dist_code below */ // !!!! Use flat array instead of structure, Freq = i*2, Len = i*2+1 var static_ltree = new Array((L_CODES$1 + 2) * 2); zero$1(static_ltree); /* The static literal tree. Since the bit lengths are imposed, there is no * need for the L_CODES extra codes used during heap construction. However * The codes 286 and 287 are needed to build a canonical tree (see _tr_init * below). */ var static_dtree = new Array(D_CODES$1 * 2); zero$1(static_dtree); /* The static distance tree. (Actually a trivial tree since all codes use * 5 bits.) */ var _dist_code = new Array(DIST_CODE_LEN); zero$1(_dist_code); /* Distance codes. The first 256 values correspond to the distances * 3 .. 258, the last 256 values correspond to the top 8 bits of * the 15 bit distances. */ var _length_code = new Array(MAX_MATCH$1 - MIN_MATCH$1 + 1); zero$1(_length_code); /* length code for each normalized match length (0 == MIN_MATCH) */ var base_length = new Array(LENGTH_CODES$1); zero$1(base_length); /* First normalized length for each code (0 = MIN_MATCH) */ var base_dist = new Array(D_CODES$1); zero$1(base_dist); /* First normalized distance for each code (0 = distance of 1) */ function StaticTreeDesc(static_tree, extra_bits, extra_base, elems, max_length) { this.static_tree = static_tree; /* static tree or NULL */ this.extra_bits = extra_bits; /* extra bits for each code or NULL */ this.extra_base = extra_base; /* base index for extra_bits */ this.elems = elems; /* max number of elements in the tree */ this.max_length = max_length; /* max bit length for the codes */ // show if `static_tree` has data or dummy - needed for monomorphic objects this.has_stree = static_tree && static_tree.length; } var static_l_desc; var static_d_desc; var static_bl_desc; function TreeDesc(dyn_tree, stat_desc) { this.dyn_tree = dyn_tree; /* the dynamic tree */ this.max_code = 0; /* largest code with non zero frequency */ this.stat_desc = stat_desc; /* the corresponding static tree */ } function d_code(dist) { return dist < 256 ? _dist_code[dist] : _dist_code[256 + (dist >>> 7)]; } /* =========================================================================== * Output a short LSB first on the stream. * IN assertion: there is enough room in pendingBuf. */ function put_short(s, w) { // put_byte(s, (uch)((w) & 0xff)); // put_byte(s, (uch)((ush)(w) >> 8)); s.pending_buf[s.pending++] = (w) & 0xff; s.pending_buf[s.pending++] = (w >>> 8) & 0xff; } /* =========================================================================== * Send a value on a given number of bits. * IN assertion: length <= 16 and value fits in length bits. */ function send_bits(s, value, length) { if (s.bi_valid > (Buf_size - length)) { s.bi_buf |= (value << s.bi_valid) & 0xffff; put_short(s, s.bi_buf); s.bi_buf = value >> (Buf_size - s.bi_valid); s.bi_valid += length - Buf_size; } else { s.bi_buf |= (value << s.bi_valid) & 0xffff; s.bi_valid += length; } } function send_code(s, c, tree) { send_bits(s, tree[c * 2]/*.Code*/, tree[c * 2 + 1]/*.Len*/); } /* =========================================================================== * Reverse the first len bits of a code, using straightforward code (a faster * method would use a table) * IN assertion: 1 <= len <= 15 */ function bi_reverse(code, len) { var res = 0; do { res |= code & 1; code >>>= 1; res <<= 1; } while (--len > 0); return res >>> 1; } /* =========================================================================== * Flush the bit buffer, keeping at most 7 bits in it. */ function bi_flush(s) { if (s.bi_valid === 16) { put_short(s, s.bi_buf); s.bi_buf = 0; s.bi_valid = 0; } else if (s.bi_valid >= 8) { s.pending_buf[s.pending++] = s.bi_buf & 0xff; s.bi_buf >>= 8; s.bi_valid -= 8; } } /* =========================================================================== * Compute the optimal bit lengths for a tree and update the total bit length * for the current block. * IN assertion: the fields freq and dad are set, heap[heap_max] and * above are the tree nodes sorted by increasing frequency. * OUT assertions: the field len is set to the optimal bit length, the * array bl_count contains the frequencies for each bit length. * The length opt_len is updated; static_len is also updated if stree is * not null. */ function gen_bitlen(s, desc) // deflate_state *s; // tree_desc *desc; /* the tree descriptor */ { var tree = desc.dyn_tree; var max_code = desc.max_code; var stree = desc.stat_desc.static_tree; var has_stree = desc.stat_desc.has_stree; var extra = desc.stat_desc.extra_bits; var base = desc.stat_desc.extra_base; var max_length = desc.stat_desc.max_length; var h; /* heap index */ var n, m; /* iterate over the tree elements */ var bits; /* bit length */ var xbits; /* extra bits */ var f; /* frequency */ var overflow = 0; /* number of elements with bit length too large */ for (bits = 0; bits <= MAX_BITS$1; bits++) { s.bl_count[bits] = 0; } /* In a first pass, compute the optimal bit lengths (which may * overflow in the case of the bit length tree). */ tree[s.heap[s.heap_max] * 2 + 1]/*.Len*/ = 0; /* root of the heap */ for (h = s.heap_max + 1; h < HEAP_SIZE$1; h++) { n = s.heap[h]; bits = tree[tree[n * 2 + 1]/*.Dad*/ * 2 + 1]/*.Len*/ + 1; if (bits > max_length) { bits = max_length; overflow++; } tree[n * 2 + 1]/*.Len*/ = bits; /* We overwrite tree[n].Dad which is no longer needed */ if (n > max_code) { continue; } /* not a leaf node */ s.bl_count[bits]++; xbits = 0; if (n >= base) { xbits = extra[n - base]; } f = tree[n * 2]/*.Freq*/; s.opt_len += f * (bits + xbits); if (has_stree) { s.static_len += f * (stree[n * 2 + 1]/*.Len*/ + xbits); } } if (overflow === 0) { return; } // Trace((stderr,"\nbit length overflow\n")); /* This happens for example on obj2 and pic of the Calgary corpus */ /* Find the first bit length which could increase: */ do { bits = max_length - 1; while (s.bl_count[bits] === 0) { bits--; } s.bl_count[bits]--; /* move one leaf down the tree */ s.bl_count[bits + 1] += 2; /* move one overflow item as its brother */ s.bl_count[max_length]--; /* The brother of the overflow item also moves one step up, * but this does not affect bl_count[max_length] */ overflow -= 2; } while (overflow > 0); /* Now recompute all bit lengths, scanning in increasing frequency. * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all * lengths instead of fixing only the wrong ones. This idea is taken * from 'ar' written by Haruhiko Okumura.) */ for (bits = max_length; bits !== 0; bits--) { n = s.bl_count[bits]; while (n !== 0) { m = s.heap[--h]; if (m > max_code) { continue; } if (tree[m * 2 + 1]/*.Len*/ !== bits) { // Trace((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits)); s.opt_len += (bits - tree[m * 2 + 1]/*.Len*/) * tree[m * 2]/*.Freq*/; tree[m * 2 + 1]/*.Len*/ = bits; } n--; } } } /* =========================================================================== * Generate the codes for a given tree and bit counts (which need not be * optimal). * IN assertion: the array bl_count contains the bit length statistics for * the given tree and the field len is set for all tree elements. * OUT assertion: the field code is set for all tree elements of non * zero code length. */ function gen_codes(tree, max_code, bl_count) // ct_data *tree; /* the tree to decorate */ // int max_code; /* largest code with non zero frequency */ // ushf *bl_count; /* number of codes at each bit length */ { var next_code = new Array(MAX_BITS$1 + 1); /* next code value for each bit length */ var code = 0; /* running code value */ var bits; /* bit index */ var n; /* code index */ /* The distribution counts are first used to generate the code values * without bit reversal. */ for (bits = 1; bits <= MAX_BITS$1; bits++) { next_code[bits] = code = (code + bl_count[bits - 1]) << 1; } /* Check that the bit counts in bl_count are consistent. The last code * must be all ones. */ //Assert (code + bl_count[MAX_BITS]-1 == (1<<MAX_BITS)-1, // "inconsistent bit counts"); //Tracev((stderr,"\ngen_codes: max_code %d ", max_code)); for (n = 0; n <= max_code; n++) { var len = tree[n * 2 + 1]/*.Len*/; if (len === 0) { continue; } /* Now reverse the bits */ tree[n * 2]/*.Code*/ = bi_reverse(next_code[len]++, len); //Tracecv(tree != static_ltree, (stderr,"\nn %3d %c l %2d c %4x (%x) ", // n, (isgraph(n) ? n : ' '), len, tree[n].Code, next_code[len]-1)); } } /* =========================================================================== * Initialize the various 'constant' tables. */ function tr_static_init() { var n; /* iterates over tree elements */ var bits; /* bit counter */ var length; /* length value */ var code; /* code value */ var dist; /* distance index */ var bl_count = new Array(MAX_BITS$1 + 1); /* number of codes at each bit length for an optimal tree */ // do check in _tr_init() //if (static_init_done) return; /* For some embedded targets, global variables are not initialized: */ /*#ifdef NO_INIT_GLOBAL_POINTERS static_l_desc.static_tree = static_ltree; static_l_desc.extra_bits = extra_lbits; static_d_desc.static_tree = static_dtree; static_d_desc.extra_bits = extra_dbits; static_bl_desc.extra_bits = extra_blbits; #endif*/ /* Initialize the mapping length (0..255) -> length code (0..28) */ length = 0; for (code = 0; code < LENGTH_CODES$1 - 1; code++) { base_length[code] = length; for (n = 0; n < (1 << extra_lbits[code]); n++) { _length_code[length++] = code; } } //Assert (length == 256, "tr_static_init: length != 256"); /* Note that the length 255 (match length 258) can be represented * in two different ways: code 284 + 5 bits or code 285, so we * overwrite length_code[255] to use the best encoding: */ _length_code[length - 1] = code; /* Initialize the mapping dist (0..32K) -> dist code (0..29) */ dist = 0; for (code = 0; code < 16; code++) { base_dist[code] = dist; for (n = 0; n < (1 << extra_dbits[code]); n++) { _dist_code[dist++] = code; } } //Assert (dist == 256, "tr_static_init: dist != 256"); dist >>= 7; /* from now on, all distances are divided by 128 */ for (; code < D_CODES$1; code++) { base_dist[code] = dist << 7; for (n = 0; n < (1 << (extra_dbits[code] - 7)); n++) { _dist_code[256 + dist++] = code; } } //Assert (dist == 256, "tr_static_init: 256+dist != 512"); /* Construct the codes of the static literal tree */ for (bits = 0; bits <= MAX_BITS$1; bits++) { bl_count[bits] = 0; } n = 0; while (n <= 143) { static_ltree[n * 2 + 1]/*.Len*/ = 8; n++; bl_count[8]++; } while (n <= 255) { static_ltree[n * 2 + 1]/*.Len*/ = 9; n++; bl_count[9]++; } while (n <= 279) { static_ltree[n * 2 + 1]/*.Len*/ = 7; n++; bl_count[7]++; } while (n <= 287) { static_ltree[n * 2 + 1]/*.Len*/ = 8; n++; bl_count[8]++; } /* Codes 286 and 287 do not exist, but we must include them in the * tree construction to get a canonical Huffman tree (longest code * all ones) */ gen_codes(static_ltree, L_CODES$1 + 1, bl_count); /* The static distance tree is trivial: */ for (n = 0; n < D_CODES$1; n++) { static_dtree[n * 2 + 1]/*.Len*/ = 5; static_dtree[n * 2]/*.Code*/ = bi_reverse(n, 5); } // Now data ready and we can init static trees static_l_desc = new StaticTreeDesc(static_ltree, extra_lbits, LITERALS$1 + 1, L_CODES$1, MAX_BITS$1); static_d_desc = new StaticTreeDesc(static_dtree, extra_dbits, 0, D_CODES$1, MAX_BITS$1); static_bl_desc = new StaticTreeDesc(new Array(0), extra_blbits, 0, BL_CODES$1, MAX_BL_BITS); //static_init_done = true; } /* =========================================================================== * Initialize a new block. */ function init_block(s) { var n; /* iterates over tree elements */ /* Initialize the trees. */ for (n = 0; n < L_CODES$1; n++) { s.dyn_ltree[n * 2]/*.Freq*/ = 0; } for (n = 0; n < D_CODES$1; n++) { s.dyn_dtree[n * 2]/*.Freq*/ = 0; } for (n = 0; n < BL_CODES$1; n++) { s.bl_tree[n * 2]/*.Freq*/ = 0; } s.dyn_ltree[END_BLOCK * 2]/*.Freq*/ = 1; s.opt_len = s.static_len = 0; s.last_lit = s.matches = 0; } /* =========================================================================== * Flush the bit buffer and align the output on a byte boundary */ function bi_windup(s) { if (s.bi_valid > 8) { put_short(s, s.bi_buf); } else if (s.bi_valid > 0) { //put_byte(s, (Byte)s->bi_buf); s.pending_buf[s.pending++] = s.bi_buf; } s.bi_buf = 0; s.bi_valid = 0; } /* =========================================================================== * Copy a stored block, storing first the length and its * one's complement if requested. */ function copy_block(s, buf