UNPKG

molstar

Version:

A comprehensive macromolecular library.

191 lines (190 loc) 9.35 kB
/** * Copyright (c) 2025 mol* contributors, licensed under MIT, See LICENSE file for more info. * * @author Adam Midlik <midlik@gmail.com> */ import { utf8Read } from './utf8'; export const StringLike = { /** Return true if `obj` is instance of `StringLike` */ is(obj) { return typeof obj.charCodeAt === 'function'; // a bit hacky }, /** Try to convert `StringLike` to a primitive `string`. Might fail if the content is longer that max allowed string length. */ toString(str) { try { return str.toString(); } catch (err) { throw new Error(`Failed to convert StringLike object into string. This might be because the length ${str.length} exceeds maximum allowed string length ${MAX_STRING_LENGTH}. (${err})`); } }, }; /** Maximum allowed string length (might be bigger for some engines, but in Chrome 136 and Node 22 it is this). */ export const MAX_STRING_LENGTH = 536870888; /** Binary logarithm of default string chunk size for `ChunkedBigString`. (string chunk size is chosen to be a power of 2, so we can use faster bit shift operator instead of integer division) */ const DEFAULT_LOG_STRING_CHUNK_SIZE = 28; // 2**28 is the largest power of 2 which is <= MAX_STRING_LENGTH /** Implementation of `CustomString`, based on an array of fixed-length strings (chunks). */ export class ChunkedBigString { get length() { return this._length; } constructor(logStringChunkSize = DEFAULT_LOG_STRING_CHUNK_SIZE) { this._chunks = []; this._length = 0; this._tmpArray = []; this.STRING_CHUNK_SIZE = 2 ** logStringChunkSize; this.STRING_CHUNK_SHIFT = logStringChunkSize; this.STRING_CHUNK_MASK = 2 ** logStringChunkSize - 1; } static fromString(content, logStringChunkSize = DEFAULT_LOG_STRING_CHUNK_SIZE) { const out = new ChunkedBigString(logStringChunkSize); out._append(content); return out; } static fromStrings(content, logStringChunkSize = DEFAULT_LOG_STRING_CHUNK_SIZE) { const out = new ChunkedBigString(logStringChunkSize); for (const inputChunk of content) { out._append(inputChunk); } return out; } /** Create instance from UTF8 data. (Do not call directly, prefer `utf8ReadLong` in utf8.ts.) */ static fromUtf8Data(data, start = 0, end = data.length, logStringChunkSize = DEFAULT_LOG_STRING_CHUNK_SIZE) { const bufferChunkSize = 2 ** logStringChunkSize; // n bytes will always decode to <=n characters const stringChunks = []; let readStart = start; while (readStart < end) { let readEnd = Math.min(readStart + bufferChunkSize, end); if (readEnd < end) { // This is buffer chunk boundary, adjust to avoid cutting multi-byte characters while ((data[readEnd] & 0xC0) === 0x80) { // Byte after the cut is a continuation byte (10xxxxxx) readEnd--; if (readEnd === readStart) throw new Error('Input is rubbish, no UTF-8 character start found in a chunk'); } } // Else this is the end of the read region, let default error handling do its job const stringChunk = utf8Read(data, readStart, readEnd - readStart); stringChunks.push(stringChunk); readStart = readEnd; } return ChunkedBigString.fromStrings(stringChunks, logStringChunkSize); } _append(inputChunk) { const chunkSize = this.STRING_CHUNK_SIZE; const tail = (this._chunks.length === 0 || this._chunks[this._chunks.length - 1].length === chunkSize) ? '' : this._chunks.pop(); let inputPtr = chunkSize - tail.length; this._chunks.push(tail + inputChunk.substring(0, inputPtr)); // Assuming .substring() deals with inputPtr > inputChunk.length while (inputPtr < inputChunk.length) { this._chunks.push(inputChunk.substring(inputPtr, inputPtr + chunkSize)); // Assuming .substring() deals with inputPtr + chunkSize > inputChunk.length inputPtr += chunkSize; } this._length += inputChunk.length; } _getChunkIndex(index) { return index >>> this.STRING_CHUNK_SHIFT; // equivalent to `Math.floor(index / STRING_CHUNK_SIZE)` } _getIndexInChunk(index) { return index & this.STRING_CHUNK_MASK; // equivalent to `index % STRING_CHUNK_SIZE` } _isOutOfRange(index) { return index < 0 || index >= this.length; } at(index) { if (-this.length <= index && index < 0) { return this.at(index + this.length); } return this.charAt(index) || undefined; } charAt(index) { if (this._isOutOfRange(index)) return ''; const iChunk = this._getChunkIndex(index); const indexInChunk = this._getIndexInChunk(index); return this._chunks[iChunk][indexInChunk]; } charCodeAt(index) { if (this._isOutOfRange(index)) return NaN; const iChunk = this._getChunkIndex(index); const indexInChunk = this._getIndexInChunk(index); return this._chunks[iChunk].charCodeAt(indexInChunk); } substring(start, end) { const start_ = Math.min(Math.max(start !== null && start !== void 0 ? start : 0, 0), this.length); const end_ = Math.min(Math.max(end !== null && end !== void 0 ? end : this.length, 0), this.length); if (start_ > end_) { return this.substring(end_, start_); } if (start_ === end_) { return ''; } if (end_ - start_ > MAX_STRING_LENGTH) { throw new Error(`Trying to create get a substring longer (${end_ - start_}) than maximum allowed string length (${MAX_STRING_LENGTH}).`); } const iFirstChunk = this._getChunkIndex(start_); const indexInChunkFrom = this._getIndexInChunk(start_); const iLastChunk = this._getChunkIndex(end_); const indexInChunkTo = this._getIndexInChunk(end_); if (iFirstChunk === iLastChunk) { return this._chunks[iFirstChunk].substring(indexInChunkFrom, indexInChunkTo); } else { const out = this._getTmpArray(); out.push(this._chunks[iFirstChunk].substring(indexInChunkFrom, this.STRING_CHUNK_SIZE)); for (let iChunk = iFirstChunk + 1; iChunk < iLastChunk; iChunk++) { out.push(this._chunks[iChunk]); } out.push(this._chunks[iLastChunk].substring(0, indexInChunkTo)); return out.join(''); } } _getTmpArray() { while (this._tmpArray.length) this._tmpArray.pop(); // this seems to be faster than `this._tmpArray.length = 0` for short arrays return this._tmpArray; } indexOf(searchString, position = 0) { if (searchString.length > this.STRING_CHUNK_SIZE) { throw new Error('NotImplementedError: indexOf is only implemented for searchString shorter than STRING_CHUNK_SIZE'); // In real use-cases STRING_CHUNK_SIZE is big and it doesn't make sense to search for such long substrings. } if (position < 0) position = 0; const iFirstChunk = this._getChunkIndex(position); for (let iChunk = iFirstChunk; iChunk < this._chunks.length; iChunk++) { const chunk = this._chunks[iChunk]; const positionInChunk = iChunk === iFirstChunk ? this._getIndexInChunk(position) : 0; // Try to find the whole substring in this chunk const found = chunk.indexOf(searchString, positionInChunk); if (found >= 0) return iChunk * this.STRING_CHUNK_SIZE + found; // Try to find the substring overflowing to the next chunk (assumes searchString.length <= STRING_CHUNK_SIZE) if (iChunk !== this._chunks.length - 1) { const start = Math.max(this.STRING_CHUNK_SIZE - searchString.length + 1, positionInChunk); const aroundBoundary = chunk.substring(start, undefined) + this._chunks[iChunk + 1].substring(0, searchString.length - 1); const found = aroundBoundary.indexOf(searchString); if (found >= 0) return iChunk * this.STRING_CHUNK_SIZE + start + found; } } return -1; } includes(searchString, position = 0) { return this.indexOf(searchString, position) >= 0; } startsWith(searchString, position = 0) { if (searchString.length > this.STRING_CHUNK_SIZE) { throw new Error('NotImplementedError: startsWith is only implemented for searchString shorter than STRING_CHUNK_SIZE'); // In real use-cases STRING_CHUNK_SIZE is big and it doesn't make sense to search for such long substrings. } return this.substring(position, position + searchString.length) === searchString; } toString() { try { return this._chunks.join(''); } catch (err) { throw new Error(`Failed to convert StringLike object into string. This might be because the length ${this.length} exceeds maximum allowed string length ${MAX_STRING_LENGTH}. (${err})`); } } }