UNPKG

molstar

Version:

A comprehensive macromolecular library.

github.com/molstar/molstar

molstar/molstar

135 lines (134 loc) • 4.86 kB

JavaScript

/** * Copyright (c) 2017-2020 mol* contributors, licensed under MIT, See LICENSE file for more info. * * Adapted from https://github.com/rcsb/mmtf-javascript * @author Alexander Rose <alexander.rose@weirdbyte.de> * @author David Sehnal <david.sehnal@gmail.com> */ import { ChunkedBigString, MAX_STRING_LENGTH } from './string-like'; export function utf8Write(data, offset, str) { for (let i = 0, l = str.length; i < l; i++) { const codePoint = str.charCodeAt(i); // One byte of UTF-8 if (codePoint < 0x80) { data[offset++] = codePoint >>> 0 & 0x7f | 0x00; continue; } // Two bytes of UTF-8 if (codePoint < 0x800) { data[offset++] = codePoint >>> 6 & 0x1f | 0xc0; data[offset++] = codePoint >>> 0 & 0x3f | 0x80; continue; } // Three bytes of UTF-8. if (codePoint < 0x10000) { data[offset++] = codePoint >>> 12 & 0x0f | 0xe0; data[offset++] = codePoint >>> 6 & 0x3f | 0x80; data[offset++] = codePoint >>> 0 & 0x3f | 0x80; continue; } // Four bytes of UTF-8 if (codePoint < 0x110000) { data[offset++] = codePoint >>> 18 & 0x07 | 0xf0; data[offset++] = codePoint >>> 12 & 0x3f | 0x80; data[offset++] = codePoint >>> 6 & 0x3f | 0x80; data[offset++] = codePoint >>> 0 & 0x3f | 0x80; continue; } throw new Error('bad codepoint ' + codePoint); } } const __chars = function () { const data = []; for (let i = 0; i < 1024; i++) data[i] = String.fromCharCode(i); return data; }(); function throwError(err) { throw new Error(err); } function _utf8Read(data, offset, length) { const chars = __chars; let str = void 0, chunkOffset = 0; const chunk = [], chunkSize = 512; for (let i = offset, end = offset + length; i < end; i++) { const byte = data[i]; if ((byte & 0x80) === 0x00) { // One byte character chunk[chunkOffset++] = chars[byte]; } else if ((byte & 0xe0) === 0xc0) { // Two byte character chunk[chunkOffset++] = chars[((byte & 0x0f) << 6) | (data[++i] & 0x3f)]; } else if ((byte & 0xf0) === 0xe0) { // Three byte character chunk[chunkOffset++] = String.fromCharCode(((byte & 0x0f) << 12) | ((data[++i] & 0x3f) << 6) | ((data[++i] & 0x3f) << 0)); } else if ((byte & 0xf8) === 0xf0) { // Four byte character chunk[chunkOffset++] = String.fromCharCode(((byte & 0x07) << 18) | ((data[++i] & 0x3f) << 12) | ((data[++i] & 0x3f) << 6) | ((data[++i] & 0x3f) << 0)); } else throwError('Invalid byte ' + byte.toString(16)); if (chunkOffset === chunkSize) { str = str || []; str[str.length] = chunk.join(''); chunkOffset = 0; } } if (!str) return chunk.slice(0, chunkOffset).join(''); if (chunkOffset > 0) { str[str.length] = chunk.slice(0, chunkOffset).join(''); } return str.join(''); } const utf8Decoder = (typeof TextDecoder !== 'undefined') ? new TextDecoder() : undefined; /** Decode UTF8 data. Return as primitive `string` type, or fail if the result is longer than MAX_STRING_LENGTH. */ export function utf8Read(data, offset = 0, length = data.length) { if (utf8Decoder) { const input = (offset || length !== data.length) ? data.subarray(offset, offset + length) : data; return utf8Decoder.decode(input); } else { return _utf8Read(data, offset, length); } } /** Decode UTF8 data, potentially exceeding MAX_STRING_LENGTH. Return as primitive `string` if possible; or as `ChunkedBigString` if the result is longer than MAX_STRING_LENGTH. */ export function utf8ReadLong(data, offset = 0, length = data.length) { if (length <= MAX_STRING_LENGTH) { return utf8Read(data, offset, length); } const out = ChunkedBigString.fromUtf8Data(data, offset, offset + length); return out.length <= MAX_STRING_LENGTH ? out.toString() : out; } export function utf8ByteCount(str) { let count = 0; for (let i = 0, l = str.length; i < l; i++) { const codePoint = str.charCodeAt(i); if (codePoint < 0x80) { count += 1; continue; } if (codePoint < 0x800) { count += 2; continue; } if (codePoint < 0x10000) { count += 3; continue; } if (codePoint < 0x110000) { count += 4; continue; } throwError('bad codepoint ' + codePoint); } return count; }