molstar
Version:
A comprehensive macromolecular library.
307 lines • 11.6 kB
JavaScript
/**
* Copyright (c) 2017 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* mostly from https://github.com/dsehnal/CIFTools.js
* @author David Sehnal <david.sehnal@gmail.com>
* @author Alexander Rose <alexander.rose@weirdbyte.de>
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.TokenBuilder = exports.trimStr = exports.Tokenizer = void 0;
var tslib_1 = require("tslib");
var mol_task_1 = require("../../../../mol-task");
function Tokenizer(data) {
return {
data: data,
position: 0,
length: data.length,
lineNumber: 1,
tokenStart: 0,
tokenEnd: 0
};
}
exports.Tokenizer = Tokenizer;
(function (Tokenizer) {
function getTokenString(state) {
return state.data.substring(state.tokenStart, state.tokenEnd);
}
Tokenizer.getTokenString = getTokenString;
/** Resets the state */
function reset(state) {
state.position = 0;
state.lineNumber = 1;
state.tokenStart = 0;
state.tokenEnd = 0;
}
Tokenizer.reset = reset;
/**
* Eat everything until a newline occurs.
*/
function eatLine(state) {
var data = state.data;
while (state.position < state.length) {
switch (data.charCodeAt(state.position)) {
case 10: // \n
state.tokenEnd = state.position;
++state.position;
++state.lineNumber;
return true;
case 13: // \r
state.tokenEnd = state.position;
++state.position;
++state.lineNumber;
if (data.charCodeAt(state.position) === 10) {
++state.position;
}
return true;
default:
++state.position;
break;
}
}
state.tokenEnd = state.position;
return state.tokenStart !== state.tokenEnd;
}
Tokenizer.eatLine = eatLine;
/** Sets the current token start to the current position */
function markStart(state) {
state.tokenStart = state.position;
}
Tokenizer.markStart = markStart;
/** Sets the current token start to current position and moves to the next line. */
function markLine(state) {
state.tokenStart = state.position;
return eatLine(state);
}
Tokenizer.markLine = markLine;
/** Advance the state and return line as string. */
function readLine(state) {
markLine(state);
return getTokenString(state);
}
Tokenizer.readLine = readLine;
/** Advance the state and return trimmed line as string. */
function readLineTrim(state) {
markLine(state);
var position = state.position;
trim(state, state.tokenStart, state.tokenEnd);
state.position = position;
return getTokenString(state);
}
Tokenizer.readLineTrim = readLineTrim;
function readLinesChunk(state, count, tokens) {
var read = 0;
for (var i = 0; i < count; i++) {
if (!markLine(state))
return read;
TokenBuilder.addUnchecked(tokens, state.tokenStart, state.tokenEnd);
read++;
}
return read;
}
/** Advance the state by the given number of lines and return them*/
function markLines(state, count) {
var lineTokens = TokenBuilder.create(state.data, count * 2);
readLinesChunk(state, count, lineTokens);
return lineTokens;
}
Tokenizer.markLines = markLines;
/** Advance the state by the given number of lines and return them */
function readLines(state, count) {
var ret = [];
for (var i = 0; i < count; i++) {
ret.push(Tokenizer.readLine(state));
}
return ret;
}
Tokenizer.readLines = readLines;
/** Advance the state by the given number of lines and return line starts/ends as tokens. */
function readLinesAsync(state, count, ctx, initialLineCount) {
if (initialLineCount === void 0) { initialLineCount = 100000; }
return (0, tslib_1.__awaiter)(this, void 0, void 0, function () {
var length, lineTokens, linesAlreadyRead;
return (0, tslib_1.__generator)(this, function (_a) {
switch (_a.label) {
case 0:
length = state.length;
lineTokens = TokenBuilder.create(state.data, count * 2);
linesAlreadyRead = 0;
return [4 /*yield*/, (0, mol_task_1.chunkedSubtask)(ctx, initialLineCount, state, function (chunkSize, state) {
var linesToRead = Math.min(count - linesAlreadyRead, chunkSize);
readLinesChunk(state, linesToRead, lineTokens);
linesAlreadyRead += linesToRead;
return linesToRead;
}, function (ctx, state) { return ctx.update({ message: 'Parsing...', current: state.position, max: length }); })];
case 1:
_a.sent();
return [2 /*return*/, lineTokens];
}
});
});
}
Tokenizer.readLinesAsync = readLinesAsync;
function readAllLines(data) {
var state = Tokenizer(data);
var tokens = TokenBuilder.create(state.data, Math.max(data.length / 80, 2));
while (markLine(state)) {
TokenBuilder.add(tokens, state.tokenStart, state.tokenEnd);
}
return tokens;
}
Tokenizer.readAllLines = readAllLines;
function readLinesChunkChecked(state, count, tokens) {
var read = 0;
for (var i = 0; i < count; i++) {
if (!markLine(state))
return read;
TokenBuilder.add(tokens, state.tokenStart, state.tokenEnd);
read++;
}
return read;
}
function readAllLinesAsync(data, ctx, chunkSize) {
if (chunkSize === void 0) { chunkSize = 100000; }
return (0, tslib_1.__awaiter)(this, void 0, void 0, function () {
var state, tokens;
return (0, tslib_1.__generator)(this, function (_a) {
switch (_a.label) {
case 0:
state = Tokenizer(data);
tokens = TokenBuilder.create(state.data, Math.max(data.length / 80, 2));
return [4 /*yield*/, (0, mol_task_1.chunkedSubtask)(ctx, chunkSize, state, function (chunkSize, state) {
readLinesChunkChecked(state, chunkSize, tokens);
return state.position < state.length ? chunkSize : 0;
}, function (ctx, state) { return ctx.update({ message: 'Parsing...', current: state.position, max: length }); })];
case 1:
_a.sent();
return [2 /*return*/, tokens];
}
});
});
}
Tokenizer.readAllLinesAsync = readAllLinesAsync;
/**
* Eat everything until a whitespace/newline occurs.
*/
function eatValue(state) {
while (state.position < state.length) {
switch (state.data.charCodeAt(state.position)) {
case 9: // \t
case 10: // \n
case 13: // \r
case 32: // ' '
state.tokenEnd = state.position;
return;
default:
++state.position;
break;
}
}
state.tokenEnd = state.position;
}
Tokenizer.eatValue = eatValue;
/**
* Skips all the whitespace - space, tab, newline, CR
* Handles incrementing line count.
*/
function skipWhitespace(state) {
var prev = -1;
while (state.position < state.length) {
var c = state.data.charCodeAt(state.position);
switch (c) {
case 9: // '\t'
case 32: // ' '
prev = c;
++state.position;
break;
case 10: // \n
// handle \r\n
if (prev !== 13) {
++state.lineNumber;
}
prev = c;
++state.position;
break;
case 13: // \r
prev = c;
++state.position;
++state.lineNumber;
break;
default:
return prev;
}
}
return prev;
}
Tokenizer.skipWhitespace = skipWhitespace;
/** Trims spaces and tabs */
function trim(state, start, end) {
var data = state.data;
var s = start, e = end - 1;
var c = data.charCodeAt(s);
while ((c === 9 || c === 32) && s <= e)
c = data.charCodeAt(++s);
c = data.charCodeAt(e);
while ((c === 9 || c === 32) && e >= s)
c = data.charCodeAt(--e);
state.tokenStart = s;
state.tokenEnd = e + 1;
state.position = end;
return state;
}
Tokenizer.trim = trim;
})(Tokenizer || (Tokenizer = {}));
exports.Tokenizer = Tokenizer;
function trimStr(data, start, end) {
var s = start, e = end - 1;
var c = data.charCodeAt(s);
while ((c === 9 || c === 32) && s <= e)
c = data.charCodeAt(++s);
c = data.charCodeAt(e);
while ((c === 9 || c === 32) && e >= s)
c = data.charCodeAt(--e);
return data.substring(s, e + 1);
}
exports.trimStr = trimStr;
var TokenBuilder;
(function (TokenBuilder) {
function resize(builder) {
// scale the size using golden ratio, because why not.
var newBuffer = new Uint32Array((1.61 * builder.indices.length) | 0);
newBuffer.set(builder.indices);
builder.indices = newBuffer;
builder.indicesLenMinus2 = (newBuffer.length - 2) | 0;
}
function add(tokens, start, end) {
var builder = tokens;
if (builder.offset > builder.indicesLenMinus2) {
resize(builder);
}
builder.indices[builder.offset++] = start;
builder.indices[builder.offset++] = end;
tokens.count++;
}
TokenBuilder.add = add;
function addToken(tokens, tokenizer) {
add(tokens, tokenizer.tokenStart, tokenizer.tokenEnd);
}
TokenBuilder.addToken = addToken;
function addUnchecked(tokens, start, end) {
tokens.indices[tokens.offset++] = start;
tokens.indices[tokens.offset++] = end;
tokens.count++;
}
TokenBuilder.addUnchecked = addUnchecked;
function create(data, size) {
size = Math.max(10, size);
return {
data: data,
indicesLenMinus2: (size - 2) | 0,
count: 0,
offset: 0,
indices: new Uint32Array(size)
};
}
TokenBuilder.create = create;
})(TokenBuilder = exports.TokenBuilder || (exports.TokenBuilder = {}));
//# sourceMappingURL=tokenizer.js.map
;