molstar
Version:
A comprehensive macromolecular library.
255 lines • 8.94 kB
JavaScript
/**
* Copyright (c) 2017 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* @author Alexander Rose <alexander.rose@weirdbyte.de>
*/
import { __awaiter, __generator } from "tslib";
// import { Column } from 'mol-data/db'
import { TokenBuilder, Tokenizer } from '../common/text/tokenizer';
import * as Data from './data-model';
import { Field } from './field';
import { ReaderResult as Result } from '../result';
import { Task, chunkedSubtask, } from '../../../mol-task';
function State(data, runtimeCtx, opts) {
var tokenizer = Tokenizer(data);
return {
data: data,
tokenizer: tokenizer,
tokenType: 2 /* End */,
runtimeCtx: runtimeCtx,
tokens: [],
fieldCount: 0,
recordCount: 0,
columnCount: 0,
columnNames: [],
quoteCharCode: opts.quote.charCodeAt(0),
commentCharCode: opts.comment.charCodeAt(0),
delimiterCharCode: opts.delimiter.charCodeAt(0),
noColumnNamesRecord: opts.noColumnNames
};
}
/**
* Eat everything until a delimiter or newline occurs.
* Ignores whitespace at the end of the value, i.e. trim right.
* Returns true when a newline occurs after the value.
*/
function eatValue(state, delimiterCharCode) {
while (state.position < state.length) {
var c = state.data.charCodeAt(state.position);
++state.position;
switch (c) {
case 10: // \n
case 13: // \r
return true;
case delimiterCharCode:
return;
case 9: // \t
case 32: // ' '
break;
default:
++state.tokenEnd;
break;
}
}
}
/**
* Eats a quoted value. Can contain a newline.
* Returns true when a newline occurs after the quoted value.
*
* Embedded quotes are represented by a pair of double quotes:
* - ""xx"" => "xx"
*/
function eatQuoted(state, quoteCharCode, delimiterCharCode) {
++state.position;
while (state.position < state.length) {
var c = state.data.charCodeAt(state.position);
if (c === quoteCharCode) {
var next = state.data.charCodeAt(state.position + 1);
if (next !== quoteCharCode) {
// get rid of the quotes.
state.tokenStart++;
state.tokenEnd = state.position;
++state.position;
return skipEmpty(state, delimiterCharCode);
}
}
++state.position;
}
state.tokenEnd = state.position;
}
/**
* Skips empty chars.
* Returns true when the current char is a newline.
*/
function skipEmpty(state, delimiterCharCode) {
while (state.position < state.length) {
var c = state.data.charCodeAt(state.position);
if (c !== 9 && c !== 32 && c !== delimiterCharCode) { // \t or ' '
return c === 10 || c === 13; // \n or \r
}
++state.position;
}
}
function skipWhitespace(state) {
var prev = -1;
while (state.position < state.length) {
var c = state.data.charCodeAt(state.position);
switch (c) {
case 9: // '\t'
case 32: // ' '
prev = c;
++state.position;
break;
case 10: // \n
// handle \r\n
if (prev !== 13) {
++state.lineNumber;
}
prev = c;
++state.position;
break;
case 13: // \r
prev = c;
++state.position;
++state.lineNumber;
break;
default:
return;
}
}
}
function skipLine(state) {
while (state.position < state.length) {
var c = state.data.charCodeAt(state.position);
if (c === 10 || c === 13)
return; // \n or \r
++state.position;
}
}
/**
* Move to the next token.
* Returns true when the current char is a newline, i.e. indicating a full record.
*/
function moveNextInternal(state) {
var tokenizer = state.tokenizer;
skipWhitespace(tokenizer);
if (tokenizer.position >= tokenizer.length) {
state.tokenType = 2 /* End */;
return false;
}
tokenizer.tokenStart = tokenizer.position;
tokenizer.tokenEnd = tokenizer.position;
var c = state.data.charCodeAt(tokenizer.position);
switch (c) {
case state.commentCharCode:
state.tokenType = 1 /* Comment */;
skipLine(tokenizer);
break;
case state.quoteCharCode:
state.tokenType = 0 /* Value */;
return eatQuoted(tokenizer, state.quoteCharCode, state.delimiterCharCode);
default:
state.tokenType = 0 /* Value */;
return eatValue(tokenizer, state.delimiterCharCode);
}
}
/**
* Moves to the next non-comment token/line.
* Returns true when the current char is a newline, i.e. indicating a full record.
*/
function moveNext(state) {
var newRecord = moveNextInternal(state);
while (state.tokenType === 1 /* Comment */) {
newRecord = moveNextInternal(state);
}
return newRecord;
}
function readRecordsChunk(chunkSize, state) {
if (state.tokenType === 2 /* End */)
return 0;
var counter = 0;
var newRecord;
var tokens = state.tokens, tokenizer = state.tokenizer;
while (state.tokenType === 0 /* Value */ && counter < chunkSize) {
TokenBuilder.add(tokens[state.fieldCount % state.columnCount], tokenizer.tokenStart, tokenizer.tokenEnd);
++state.fieldCount;
newRecord = moveNext(state);
if (newRecord) {
++state.recordCount;
++counter;
}
}
return counter;
}
function readRecordsChunks(state) {
var newRecord = moveNext(state);
if (newRecord)
++state.recordCount;
return chunkedSubtask(state.runtimeCtx, 100000, state, readRecordsChunk, function (ctx, state) { return ctx.update({ message: 'Parsing...', current: state.tokenizer.position, max: state.data.length }); });
}
function addColumn(state) {
state.columnNames.push(Tokenizer.getTokenString(state.tokenizer));
state.tokens.push(TokenBuilder.create(state.tokenizer.data, state.data.length / 80));
}
function init(state) {
var newRecord = moveNext(state);
while (!newRecord) {
addColumn(state);
newRecord = moveNext(state);
}
addColumn(state);
state.columnCount = state.columnNames.length;
if (state.noColumnNamesRecord) {
state.columnNames.forEach(function (x, i, arr) { return arr[i] = i + ''; });
Tokenizer.reset(state.tokenizer);
}
}
function handleRecords(state) {
return __awaiter(this, void 0, void 0, function () {
var columns, i;
return __generator(this, function (_a) {
switch (_a.label) {
case 0:
init(state);
return [4 /*yield*/, readRecordsChunks(state)];
case 1:
_a.sent();
columns = Object.create(null);
for (i = 0; i < state.columnCount; ++i) {
columns[state.columnNames[i]] = Field(state.tokens[i]);
}
return [2 /*return*/, Data.CsvTable(state.recordCount, state.columnNames, columns)];
}
});
});
}
function parseInternal(data, ctx, opts) {
return __awaiter(this, void 0, void 0, function () {
var state, table, result;
return __generator(this, function (_a) {
switch (_a.label) {
case 0:
state = State(data, ctx, opts);
ctx.update({ message: 'Parsing...', current: 0, max: data.length });
return [4 /*yield*/, handleRecords(state)];
case 1:
table = _a.sent();
result = Data.CsvFile(table);
return [2 /*return*/, Result.success(result)];
}
});
});
}
export function parseCsv(data, opts) {
var _this = this;
var completeOpts = Object.assign({}, { quote: '"', comment: '#', delimiter: ',', noColumnNames: false }, opts);
return Task.create('Parse CSV', function (ctx) { return __awaiter(_this, void 0, void 0, function () {
return __generator(this, function (_a) {
switch (_a.label) {
case 0: return [4 /*yield*/, parseInternal(data, ctx, completeOpts)];
case 1: return [2 /*return*/, _a.sent()];
}
});
}); });
}
//# sourceMappingURL=parser.js.map