molstar
Version:
A comprehensive macromolecular library.
191 lines (190 loc) • 9.35 kB
JavaScript
/**
* Copyright (c) 2025 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* @author Adam Midlik <midlik@gmail.com>
*/
import { utf8Read } from './utf8';
export const StringLike = {
/** Return true if `obj` is instance of `StringLike` */
is(obj) {
return typeof obj.charCodeAt === 'function'; // a bit hacky
},
/** Try to convert `StringLike` to a primitive `string`. Might fail if the content is longer that max allowed string length. */
toString(str) {
try {
return str.toString();
}
catch (err) {
throw new Error(`Failed to convert StringLike object into string. This might be because the length ${str.length} exceeds maximum allowed string length ${MAX_STRING_LENGTH}. (${err})`);
}
},
};
/** Maximum allowed string length (might be bigger for some engines, but in Chrome 136 and Node 22 it is this). */
export const MAX_STRING_LENGTH = 536870888;
/** Binary logarithm of default string chunk size for `ChunkedBigString`. (string chunk size is chosen to be a power of 2, so we can use faster bit shift operator instead of integer division) */
const DEFAULT_LOG_STRING_CHUNK_SIZE = 28; // 2**28 is the largest power of 2 which is <= MAX_STRING_LENGTH
/** Implementation of `CustomString`, based on an array of fixed-length strings (chunks). */
export class ChunkedBigString {
get length() {
return this._length;
}
constructor(logStringChunkSize = DEFAULT_LOG_STRING_CHUNK_SIZE) {
this._chunks = [];
this._length = 0;
this._tmpArray = [];
this.STRING_CHUNK_SIZE = 2 ** logStringChunkSize;
this.STRING_CHUNK_SHIFT = logStringChunkSize;
this.STRING_CHUNK_MASK = 2 ** logStringChunkSize - 1;
}
static fromString(content, logStringChunkSize = DEFAULT_LOG_STRING_CHUNK_SIZE) {
const out = new ChunkedBigString(logStringChunkSize);
out._append(content);
return out;
}
static fromStrings(content, logStringChunkSize = DEFAULT_LOG_STRING_CHUNK_SIZE) {
const out = new ChunkedBigString(logStringChunkSize);
for (const inputChunk of content) {
out._append(inputChunk);
}
return out;
}
/** Create instance from UTF8 data. (Do not call directly, prefer `utf8ReadLong` in utf8.ts.) */
static fromUtf8Data(data, start = 0, end = data.length, logStringChunkSize = DEFAULT_LOG_STRING_CHUNK_SIZE) {
const bufferChunkSize = 2 ** logStringChunkSize; // n bytes will always decode to <=n characters
const stringChunks = [];
let readStart = start;
while (readStart < end) {
let readEnd = Math.min(readStart + bufferChunkSize, end);
if (readEnd < end) {
// This is buffer chunk boundary, adjust to avoid cutting multi-byte characters
while ((data[readEnd] & 0xC0) === 0x80) { // Byte after the cut is a continuation byte (10xxxxxx)
readEnd--;
if (readEnd === readStart)
throw new Error('Input is rubbish, no UTF-8 character start found in a chunk');
}
} // Else this is the end of the read region, let default error handling do its job
const stringChunk = utf8Read(data, readStart, readEnd - readStart);
stringChunks.push(stringChunk);
readStart = readEnd;
}
return ChunkedBigString.fromStrings(stringChunks, logStringChunkSize);
}
_append(inputChunk) {
const chunkSize = this.STRING_CHUNK_SIZE;
const tail = (this._chunks.length === 0 || this._chunks[this._chunks.length - 1].length === chunkSize) ? '' : this._chunks.pop();
let inputPtr = chunkSize - tail.length;
this._chunks.push(tail + inputChunk.substring(0, inputPtr)); // Assuming .substring() deals with inputPtr > inputChunk.length
while (inputPtr < inputChunk.length) {
this._chunks.push(inputChunk.substring(inputPtr, inputPtr + chunkSize)); // Assuming .substring() deals with inputPtr + chunkSize > inputChunk.length
inputPtr += chunkSize;
}
this._length += inputChunk.length;
}
_getChunkIndex(index) {
return index >>> this.STRING_CHUNK_SHIFT; // equivalent to `Math.floor(index / STRING_CHUNK_SIZE)`
}
_getIndexInChunk(index) {
return index & this.STRING_CHUNK_MASK; // equivalent to `index % STRING_CHUNK_SIZE`
}
_isOutOfRange(index) {
return index < 0 || index >= this.length;
}
at(index) {
if (-this.length <= index && index < 0) {
return this.at(index + this.length);
}
return this.charAt(index) || undefined;
}
charAt(index) {
if (this._isOutOfRange(index))
return '';
const iChunk = this._getChunkIndex(index);
const indexInChunk = this._getIndexInChunk(index);
return this._chunks[iChunk][indexInChunk];
}
charCodeAt(index) {
if (this._isOutOfRange(index))
return NaN;
const iChunk = this._getChunkIndex(index);
const indexInChunk = this._getIndexInChunk(index);
return this._chunks[iChunk].charCodeAt(indexInChunk);
}
substring(start, end) {
const start_ = Math.min(Math.max(start !== null && start !== void 0 ? start : 0, 0), this.length);
const end_ = Math.min(Math.max(end !== null && end !== void 0 ? end : this.length, 0), this.length);
if (start_ > end_) {
return this.substring(end_, start_);
}
if (start_ === end_) {
return '';
}
if (end_ - start_ > MAX_STRING_LENGTH) {
throw new Error(`Trying to create get a substring longer (${end_ - start_}) than maximum allowed string length (${MAX_STRING_LENGTH}).`);
}
const iFirstChunk = this._getChunkIndex(start_);
const indexInChunkFrom = this._getIndexInChunk(start_);
const iLastChunk = this._getChunkIndex(end_);
const indexInChunkTo = this._getIndexInChunk(end_);
if (iFirstChunk === iLastChunk) {
return this._chunks[iFirstChunk].substring(indexInChunkFrom, indexInChunkTo);
}
else {
const out = this._getTmpArray();
out.push(this._chunks[iFirstChunk].substring(indexInChunkFrom, this.STRING_CHUNK_SIZE));
for (let iChunk = iFirstChunk + 1; iChunk < iLastChunk; iChunk++) {
out.push(this._chunks[iChunk]);
}
out.push(this._chunks[iLastChunk].substring(0, indexInChunkTo));
return out.join('');
}
}
_getTmpArray() {
while (this._tmpArray.length)
this._tmpArray.pop(); // this seems to be faster than `this._tmpArray.length = 0` for short arrays
return this._tmpArray;
}
indexOf(searchString, position = 0) {
if (searchString.length > this.STRING_CHUNK_SIZE) {
throw new Error('NotImplementedError: indexOf is only implemented for searchString shorter than STRING_CHUNK_SIZE');
// In real use-cases STRING_CHUNK_SIZE is big and it doesn't make sense to search for such long substrings.
}
if (position < 0)
position = 0;
const iFirstChunk = this._getChunkIndex(position);
for (let iChunk = iFirstChunk; iChunk < this._chunks.length; iChunk++) {
const chunk = this._chunks[iChunk];
const positionInChunk = iChunk === iFirstChunk ? this._getIndexInChunk(position) : 0;
// Try to find the whole substring in this chunk
const found = chunk.indexOf(searchString, positionInChunk);
if (found >= 0)
return iChunk * this.STRING_CHUNK_SIZE + found;
// Try to find the substring overflowing to the next chunk (assumes searchString.length <= STRING_CHUNK_SIZE)
if (iChunk !== this._chunks.length - 1) {
const start = Math.max(this.STRING_CHUNK_SIZE - searchString.length + 1, positionInChunk);
const aroundBoundary = chunk.substring(start, undefined) + this._chunks[iChunk + 1].substring(0, searchString.length - 1);
const found = aroundBoundary.indexOf(searchString);
if (found >= 0)
return iChunk * this.STRING_CHUNK_SIZE + start + found;
}
}
return -1;
}
includes(searchString, position = 0) {
return this.indexOf(searchString, position) >= 0;
}
startsWith(searchString, position = 0) {
if (searchString.length > this.STRING_CHUNK_SIZE) {
throw new Error('NotImplementedError: startsWith is only implemented for searchString shorter than STRING_CHUNK_SIZE');
// In real use-cases STRING_CHUNK_SIZE is big and it doesn't make sense to search for such long substrings.
}
return this.substring(position, position + searchString.length) === searchString;
}
toString() {
try {
return this._chunks.join('');
}
catch (err) {
throw new Error(`Failed to convert StringLike object into string. This might be because the length ${this.length} exceeds maximum allowed string length ${MAX_STRING_LENGTH}. (${err})`);
}
}
}