UNPKG

@davidcal/fec-raptorq

Version:

Node.js wrapper for RaptorQ forward error correction

59 lines (49 loc) 1.65 kB
const encoder = new TextEncoder(); const decoder = new TextDecoder("utf-8", { fatal: false, ignoreBOM: false, }); const fatal_decoder = new TextDecoder("utf-8", { fatal: true, ignoreBOM: false, }); /** * @stability 1 - experimental * * Decodes a Uint8Array containing UTF-8 data into a string until it hits an invalid byte sequence. * * Returns an object containing the maximal valid `string` and corresponding `num_bytes` taken from the original input. */ export const utf8_decode_maximally = (bytes) => { // A naïve approach might be to just non-fatally decode and re-encode the string, counting the number of bytes of overlap. // This technique would not work due to some combinations beginning with 0xEF being invalid UTF-8, subsequently getting replaced by the replacement character 0xEF 0xBF 0xBD. // The byte 0xEF would incorrectly be counted in the overlap check. // As such, an additional check for the replacement character is necessary. const decoded = decoder.decode(bytes); const num_valid_bytes = (() => { const re_encoded = encoder.encode(decoded); for (let i = 0; i < re_encoded.length; i++) { if (bytes[i] === re_encoded[i]) { if (true && i + 2 < re_encoded.length && re_encoded[i] === 0xEF && re_encoded[i + 1] === 0xBF && re_encoded[i + 2] === 0xBD && (false || bytes[i + 1] !== 0xBF || bytes[i + 2] !== 0xBD ) ) { return i; } continue; } return i; }; })(); return { string: fatal_decoder.decode(bytes.subarray(0, num_valid_bytes)), num_valid_bytes, }; }; export const utf8DecodeMaximally = utf8_decode_maximally;