UNPKG

@adguard/agtree

Version:
79 lines (77 loc) 2.87 kB
/* * AGTree v3.4.3 (build date: Thu, 11 Dec 2025 13:43:19 GMT) * (c) 2025 Adguard Software Ltd. * Released under the MIT license * https://github.com/AdguardTeam/tsurlfilter/tree/master/packages/agtree#readme */ /* eslint-disable no-param-reassign */ /* eslint-disable no-bitwise */ /** * @file Utility for encoding strings to byte sequences. */ /** * Checks if the given code point is an ASCII code point. * * @param codePoint Code point to check. * @returns `true` if the given code point is an ASCII code point, `false` otherwise. * @see {@link https://infra.spec.whatwg.org/#ascii-code-point} */ const isAsciiCodePoint = (codePoint) => { return codePoint >= 0x0000 && codePoint <= 0x007F; }; const OFFSETS = [0xC0, 0xE0, 0xF0]; /** * Encodes an UTF-8 string into a byte sequence according to the WHATWG spec. * * @param str String to encode. * @param buffer Buffer to write the encoded bytes to. * @returns Number of bytes written to the buffer. * @see {@link https://encoding.spec.whatwg.org/#utf-8-encoder} * @note Bytes written maybe larger than the string length, but never smaller. * For example, the string '你好' has a length of 2, but its byte representation has a length of 6. */ const encodeIntoPolyfill = (str, buffer) => { const { length } = buffer; let read = 0; let written = 0; for (let i = 0; i < str.length;) { const codePoint = str.codePointAt(i) || 0; let bytesNeeded = 1; if (isAsciiCodePoint(codePoint)) { if (written >= length) break; // Stop if no space in buffer buffer[written] = codePoint; written += 1; read += 1; // Increment read for ASCII i += 1; } else { if (codePoint >= 0x0080 && codePoint <= 0x07FF) { bytesNeeded = 2; } else if (codePoint >= 0x0800 && codePoint <= 0xFFFF) { bytesNeeded = 3; } else if (codePoint >= 0x10000 && codePoint <= 0x10FFFF) { bytesNeeded = 4; } if (written + bytesNeeded - 1 >= length) { // Stop if no space for the whole encoding break; } let count = bytesNeeded - 1; const offset = OFFSETS[bytesNeeded - 2]; buffer[written] = (codePoint >> (6 * count)) + offset; written += 1; while (count > 0) { buffer[written] = 0x80 | ((codePoint >> (6 * (count - 1))) & 0x3F); written += 1; count -= 1; } read += codePoint >= 0x10000 ? 2 : 1; // Increment read by 1 or 2 depending on code point i += codePoint >= 0x10000 ? 2 : 1; // Move i by 1 or 2 } } return { read, written }; }; export { encodeIntoPolyfill };