UNPKG

molstar

Version:

A comprehensive macromolecular library.

307 lines 11.6 kB
"use strict"; /** * Copyright (c) 2017 mol* contributors, licensed under MIT, See LICENSE file for more info. * * mostly from https://github.com/dsehnal/CIFTools.js * @author David Sehnal <david.sehnal@gmail.com> * @author Alexander Rose <alexander.rose@weirdbyte.de> */ Object.defineProperty(exports, "__esModule", { value: true }); exports.TokenBuilder = exports.trimStr = exports.Tokenizer = void 0; var tslib_1 = require("tslib"); var mol_task_1 = require("../../../../mol-task"); function Tokenizer(data) { return { data: data, position: 0, length: data.length, lineNumber: 1, tokenStart: 0, tokenEnd: 0 }; } exports.Tokenizer = Tokenizer; (function (Tokenizer) { function getTokenString(state) { return state.data.substring(state.tokenStart, state.tokenEnd); } Tokenizer.getTokenString = getTokenString; /** Resets the state */ function reset(state) { state.position = 0; state.lineNumber = 1; state.tokenStart = 0; state.tokenEnd = 0; } Tokenizer.reset = reset; /** * Eat everything until a newline occurs. */ function eatLine(state) { var data = state.data; while (state.position < state.length) { switch (data.charCodeAt(state.position)) { case 10: // \n state.tokenEnd = state.position; ++state.position; ++state.lineNumber; return true; case 13: // \r state.tokenEnd = state.position; ++state.position; ++state.lineNumber; if (data.charCodeAt(state.position) === 10) { ++state.position; } return true; default: ++state.position; break; } } state.tokenEnd = state.position; return state.tokenStart !== state.tokenEnd; } Tokenizer.eatLine = eatLine; /** Sets the current token start to the current position */ function markStart(state) { state.tokenStart = state.position; } Tokenizer.markStart = markStart; /** Sets the current token start to current position and moves to the next line. */ function markLine(state) { state.tokenStart = state.position; return eatLine(state); } Tokenizer.markLine = markLine; /** Advance the state and return line as string. */ function readLine(state) { markLine(state); return getTokenString(state); } Tokenizer.readLine = readLine; /** Advance the state and return trimmed line as string. */ function readLineTrim(state) { markLine(state); var position = state.position; trim(state, state.tokenStart, state.tokenEnd); state.position = position; return getTokenString(state); } Tokenizer.readLineTrim = readLineTrim; function readLinesChunk(state, count, tokens) { var read = 0; for (var i = 0; i < count; i++) { if (!markLine(state)) return read; TokenBuilder.addUnchecked(tokens, state.tokenStart, state.tokenEnd); read++; } return read; } /** Advance the state by the given number of lines and return them*/ function markLines(state, count) { var lineTokens = TokenBuilder.create(state.data, count * 2); readLinesChunk(state, count, lineTokens); return lineTokens; } Tokenizer.markLines = markLines; /** Advance the state by the given number of lines and return them */ function readLines(state, count) { var ret = []; for (var i = 0; i < count; i++) { ret.push(Tokenizer.readLine(state)); } return ret; } Tokenizer.readLines = readLines; /** Advance the state by the given number of lines and return line starts/ends as tokens. */ function readLinesAsync(state, count, ctx, initialLineCount) { if (initialLineCount === void 0) { initialLineCount = 100000; } return (0, tslib_1.__awaiter)(this, void 0, void 0, function () { var length, lineTokens, linesAlreadyRead; return (0, tslib_1.__generator)(this, function (_a) { switch (_a.label) { case 0: length = state.length; lineTokens = TokenBuilder.create(state.data, count * 2); linesAlreadyRead = 0; return [4 /*yield*/, (0, mol_task_1.chunkedSubtask)(ctx, initialLineCount, state, function (chunkSize, state) { var linesToRead = Math.min(count - linesAlreadyRead, chunkSize); readLinesChunk(state, linesToRead, lineTokens); linesAlreadyRead += linesToRead; return linesToRead; }, function (ctx, state) { return ctx.update({ message: 'Parsing...', current: state.position, max: length }); })]; case 1: _a.sent(); return [2 /*return*/, lineTokens]; } }); }); } Tokenizer.readLinesAsync = readLinesAsync; function readAllLines(data) { var state = Tokenizer(data); var tokens = TokenBuilder.create(state.data, Math.max(data.length / 80, 2)); while (markLine(state)) { TokenBuilder.add(tokens, state.tokenStart, state.tokenEnd); } return tokens; } Tokenizer.readAllLines = readAllLines; function readLinesChunkChecked(state, count, tokens) { var read = 0; for (var i = 0; i < count; i++) { if (!markLine(state)) return read; TokenBuilder.add(tokens, state.tokenStart, state.tokenEnd); read++; } return read; } function readAllLinesAsync(data, ctx, chunkSize) { if (chunkSize === void 0) { chunkSize = 100000; } return (0, tslib_1.__awaiter)(this, void 0, void 0, function () { var state, tokens; return (0, tslib_1.__generator)(this, function (_a) { switch (_a.label) { case 0: state = Tokenizer(data); tokens = TokenBuilder.create(state.data, Math.max(data.length / 80, 2)); return [4 /*yield*/, (0, mol_task_1.chunkedSubtask)(ctx, chunkSize, state, function (chunkSize, state) { readLinesChunkChecked(state, chunkSize, tokens); return state.position < state.length ? chunkSize : 0; }, function (ctx, state) { return ctx.update({ message: 'Parsing...', current: state.position, max: length }); })]; case 1: _a.sent(); return [2 /*return*/, tokens]; } }); }); } Tokenizer.readAllLinesAsync = readAllLinesAsync; /** * Eat everything until a whitespace/newline occurs. */ function eatValue(state) { while (state.position < state.length) { switch (state.data.charCodeAt(state.position)) { case 9: // \t case 10: // \n case 13: // \r case 32: // ' ' state.tokenEnd = state.position; return; default: ++state.position; break; } } state.tokenEnd = state.position; } Tokenizer.eatValue = eatValue; /** * Skips all the whitespace - space, tab, newline, CR * Handles incrementing line count. */ function skipWhitespace(state) { var prev = -1; while (state.position < state.length) { var c = state.data.charCodeAt(state.position); switch (c) { case 9: // '\t' case 32: // ' ' prev = c; ++state.position; break; case 10: // \n // handle \r\n if (prev !== 13) { ++state.lineNumber; } prev = c; ++state.position; break; case 13: // \r prev = c; ++state.position; ++state.lineNumber; break; default: return prev; } } return prev; } Tokenizer.skipWhitespace = skipWhitespace; /** Trims spaces and tabs */ function trim(state, start, end) { var data = state.data; var s = start, e = end - 1; var c = data.charCodeAt(s); while ((c === 9 || c === 32) && s <= e) c = data.charCodeAt(++s); c = data.charCodeAt(e); while ((c === 9 || c === 32) && e >= s) c = data.charCodeAt(--e); state.tokenStart = s; state.tokenEnd = e + 1; state.position = end; return state; } Tokenizer.trim = trim; })(Tokenizer || (Tokenizer = {})); exports.Tokenizer = Tokenizer; function trimStr(data, start, end) { var s = start, e = end - 1; var c = data.charCodeAt(s); while ((c === 9 || c === 32) && s <= e) c = data.charCodeAt(++s); c = data.charCodeAt(e); while ((c === 9 || c === 32) && e >= s) c = data.charCodeAt(--e); return data.substring(s, e + 1); } exports.trimStr = trimStr; var TokenBuilder; (function (TokenBuilder) { function resize(builder) { // scale the size using golden ratio, because why not. var newBuffer = new Uint32Array((1.61 * builder.indices.length) | 0); newBuffer.set(builder.indices); builder.indices = newBuffer; builder.indicesLenMinus2 = (newBuffer.length - 2) | 0; } function add(tokens, start, end) { var builder = tokens; if (builder.offset > builder.indicesLenMinus2) { resize(builder); } builder.indices[builder.offset++] = start; builder.indices[builder.offset++] = end; tokens.count++; } TokenBuilder.add = add; function addToken(tokens, tokenizer) { add(tokens, tokenizer.tokenStart, tokenizer.tokenEnd); } TokenBuilder.addToken = addToken; function addUnchecked(tokens, start, end) { tokens.indices[tokens.offset++] = start; tokens.indices[tokens.offset++] = end; tokens.count++; } TokenBuilder.addUnchecked = addUnchecked; function create(data, size) { size = Math.max(10, size); return { data: data, indicesLenMinus2: (size - 2) | 0, count: 0, offset: 0, indices: new Uint32Array(size) }; } TokenBuilder.create = create; })(TokenBuilder = exports.TokenBuilder || (exports.TokenBuilder = {})); //# sourceMappingURL=tokenizer.js.map