UNPKG

shaka-player

Version:
167 lines (141 loc) 5.95 kB
/*! @license * Shaka Player * Copyright 2016 Google LLC * SPDX-License-Identifier: Apache-2.0 */ describe('StringUtils', () => { describe('with TextDecoder', () => { if (window.TextDecoder) { defineStringUtilTests(); } }); describe('without TextDecoder', () => { let originalTextDecoder; beforeAll(() => { originalTextDecoder = window.TextDecoder; window['TextDecoder'] = null; }); afterAll(() => { window.TextDecoder = originalTextDecoder; }); defineStringUtilTests(); }); }); function defineStringUtilTests() { const StringUtils = shaka.util.StringUtils; it('parses fromUTF8', () => { // This is 4 Unicode characters, the last will be split into a surrogate // pair. const arr = [0x46, 0xe2, 0x82, 0xac, 0x20, 0xf0, 0x90, 0x8d, 0x88]; expect(StringUtils.fromUTF8(new Uint8Array(arr))) .toBe('F\u20ac \ud800\udf48'); }); it('won\'t break if given cut-off UTF8 character', () => { const arr1 = [0x53, 0x61, 0x6e, 0x20, 0x4a, 0x6f, 0x73, 0xc3, 0xa9]; expect(StringUtils.fromUTF8(new Uint8Array(arr1))) .toBe('San Jos\u00E9'); // This array contains the first half of a 2-byte UTF8 character // (0xc3 0xa9 = é). The half-character is stranded at the very end of the // string. const arr = [0x53, 0x61, 0x6e, 0x20, 0x4a, 0x6f, 0x73, 0xc3]; expect(StringUtils.fromUTF8(new Uint8Array(arr))) .toBe('San Jos\uFFFD'); }); it('won\'t break if given an invalid UTF-8 sequence', () => { // 0xe9 0x33 0x33 is an invalid UTF-8 sequence. const arr = [0x4a, 0x6f, 0x73, 0xE9, 0x33, 0x33, 0x20, 0x53, 0x61, 0x6e]; expect(StringUtils.fromUTF8(new Uint8Array(arr))) .toBe('Jos\uFFFD33 San'); }); it('can handle an 8-byte character', () => { // This is the UTF-8 encoding of the US flag emoji. // It decodes into two Unicode codepoints, which becomes 4 JavaScript // UTF-16 characters. const arr = [0xf0, 0x9f, 0x87, 0xba, 0xf0, 0x9f, 0x87, 0xb8]; expect(StringUtils.fromUTF8(new Uint8Array(arr))) .toBe('\uD83C\uDDFA\uD83C\uDDF8'); }); it('strips the BOM in fromUTF8', () => { // This is 4 Unicode characters, the last will be split into a surrogate // pair. const arr = [0xef, 0xbb, 0xbf, 0x74, 0x65, 0x78, 0x74]; const ContentType = shaka.util.ManifestParserUtils.ContentType; expect(StringUtils.fromUTF8(new Uint8Array(arr))).toBe(ContentType.TEXT); }); it('parses fromUTF16 big-endian', () => { // This is big-endian pairs of 16-bit numbers. This translates into 3 // Unicode characters where the last is split into a surrogate pair. const arr = [0x00, 0x46, 0x38, 0x01, 0xd8, 0x01, 0xdc, 0x37]; expect(StringUtils.fromUTF16(new Uint8Array(arr), false)) .toBe('F\u3801\ud801\udc37'); }); it('parses fromUTF16 little-endian', () => { // This is little-endian pairs of 16-bit numbers. This translates into 3 // Unicode characters where the last is split into a surrogate pair. const arr = [0x46, 0x00, 0x01, 0x38, 0x01, 0xd8, 0x37, 0xdc]; expect(StringUtils.fromUTF16(new Uint8Array(arr), true)) .toBe('F\u3801\ud801\udc37'); }); describe('fromBytesAutoDetect', () => { it('detects UTF-8 BOM', () => { const arr = [0xef, 0xbb, 0xbf, 0x46, 0x6f, 0x6f]; expect(StringUtils.fromBytesAutoDetect(new Uint8Array(arr))).toBe('Foo'); }); it('detects UTF-16 BE BOM', () => { const arr = [0xfe, 0xff, 0x00, 0x46, 0x00, 0x6f, 0x00, 0x6f]; expect(StringUtils.fromBytesAutoDetect(new Uint8Array(arr))).toBe('Foo'); }); it('detects UTF-16 LE BOM', () => { const arr = [0xff, 0xfe, 0x46, 0x00, 0x6f, 0x00, 0x6f, 0x00]; expect(StringUtils.fromBytesAutoDetect(new Uint8Array(arr))).toBe('Foo'); }); it('guesses UTF-8', () => { const arr = [0x46, 0x6f, 0x6f]; expect(StringUtils.fromBytesAutoDetect(new Uint8Array(arr))).toBe('Foo'); }); it('guesses UTF-16 BE', () => { const arr = [0x00, 0x46, 0x00, 0x6f, 0x00, 0x6f]; expect(StringUtils.fromBytesAutoDetect(new Uint8Array(arr))).toBe('Foo'); }); it('guesses UTF-16 LE', () => { const arr = [0x46, 0x00, 0x6f, 0x00, 0x6f, 0x00]; expect(StringUtils.fromBytesAutoDetect(new Uint8Array(arr))).toBe('Foo'); }); it('fails if unable to guess', () => { const expected = shaka.test.Util.jasmineError(new shaka.util.Error( shaka.util.Error.Severity.CRITICAL, shaka.util.Error.Category.TEXT, shaka.util.Error.Code.UNABLE_TO_DETECT_ENCODING)); const arr = [0x01, 0x02, 0x03, 0x04]; expect(() => StringUtils.fromBytesAutoDetect(new Uint8Array(arr))) .toThrow(expected); }); }); it('converts toUTF8', () => { const str = 'Xe\u4524\u1952'; const arr = [0x58, 0x65, 0xe4, 0x94, 0xa4, 0xe1, 0xa5, 0x92]; const buffer = StringUtils.toUTF8(str); expect(shaka.util.BufferUtils.toUint8(buffer)) .toEqual(new Uint8Array(arr)); }); it('converts toUTF16-LE', () => { const str = 'Xe\u4524\u1952'; const arr = [0x58, 0, 0x65, 0, 0x24, 0x45, 0x52, 0x19]; const buffer = StringUtils.toUTF16(str, /* littleEndian= */ true); expect(shaka.util.BufferUtils.toUint8(buffer)) .toEqual(new Uint8Array(arr)); }); it('converts toUTF16-BE', () => { const str = 'Xe\u4524\u1952'; const arr = [0, 0x58, 0, 0x65, 0x45, 0x24, 0x19, 0x52]; const buffer = StringUtils.toUTF16(str, /* littleEndian= */ false); expect(shaka.util.BufferUtils.toUint8(buffer)) .toEqual(new Uint8Array(arr)); }); it('does not cause stack overflow, #335', () => { const buffer = new Uint8Array(8e5); // Well above arg count limit. expect(StringUtils.fromUTF8(buffer).length).toBe(buffer.byteLength); expect(StringUtils.fromUTF16(buffer, true).length) .toBe(buffer.byteLength / 2); }); }