molstar
Version:
A comprehensive macromolecular library.
259 lines • 9.24 kB
JavaScript
/**
* Copyright (c) 2017 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* @author Alexander Rose <alexander.rose@weirdbyte.de>
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.parseCsv = void 0;
var tslib_1 = require("tslib");
// import { Column } from 'mol-data/db'
var tokenizer_1 = require("../common/text/tokenizer");
var Data = (0, tslib_1.__importStar)(require("./data-model"));
var field_1 = require("./field");
var result_1 = require("../result");
var mol_task_1 = require("../../../mol-task");
function State(data, runtimeCtx, opts) {
var tokenizer = (0, tokenizer_1.Tokenizer)(data);
return {
data: data,
tokenizer: tokenizer,
tokenType: 2 /* End */,
runtimeCtx: runtimeCtx,
tokens: [],
fieldCount: 0,
recordCount: 0,
columnCount: 0,
columnNames: [],
quoteCharCode: opts.quote.charCodeAt(0),
commentCharCode: opts.comment.charCodeAt(0),
delimiterCharCode: opts.delimiter.charCodeAt(0),
noColumnNamesRecord: opts.noColumnNames
};
}
/**
* Eat everything until a delimiter or newline occurs.
* Ignores whitespace at the end of the value, i.e. trim right.
* Returns true when a newline occurs after the value.
*/
function eatValue(state, delimiterCharCode) {
while (state.position < state.length) {
var c = state.data.charCodeAt(state.position);
++state.position;
switch (c) {
case 10: // \n
case 13: // \r
return true;
case delimiterCharCode:
return;
case 9: // \t
case 32: // ' '
break;
default:
++state.tokenEnd;
break;
}
}
}
/**
* Eats a quoted value. Can contain a newline.
* Returns true when a newline occurs after the quoted value.
*
* Embedded quotes are represented by a pair of double quotes:
* - ""xx"" => "xx"
*/
function eatQuoted(state, quoteCharCode, delimiterCharCode) {
++state.position;
while (state.position < state.length) {
var c = state.data.charCodeAt(state.position);
if (c === quoteCharCode) {
var next = state.data.charCodeAt(state.position + 1);
if (next !== quoteCharCode) {
// get rid of the quotes.
state.tokenStart++;
state.tokenEnd = state.position;
++state.position;
return skipEmpty(state, delimiterCharCode);
}
}
++state.position;
}
state.tokenEnd = state.position;
}
/**
* Skips empty chars.
* Returns true when the current char is a newline.
*/
function skipEmpty(state, delimiterCharCode) {
while (state.position < state.length) {
var c = state.data.charCodeAt(state.position);
if (c !== 9 && c !== 32 && c !== delimiterCharCode) { // \t or ' '
return c === 10 || c === 13; // \n or \r
}
++state.position;
}
}
function skipWhitespace(state) {
var prev = -1;
while (state.position < state.length) {
var c = state.data.charCodeAt(state.position);
switch (c) {
case 9: // '\t'
case 32: // ' '
prev = c;
++state.position;
break;
case 10: // \n
// handle \r\n
if (prev !== 13) {
++state.lineNumber;
}
prev = c;
++state.position;
break;
case 13: // \r
prev = c;
++state.position;
++state.lineNumber;
break;
default:
return;
}
}
}
function skipLine(state) {
while (state.position < state.length) {
var c = state.data.charCodeAt(state.position);
if (c === 10 || c === 13)
return; // \n or \r
++state.position;
}
}
/**
* Move to the next token.
* Returns true when the current char is a newline, i.e. indicating a full record.
*/
function moveNextInternal(state) {
var tokenizer = state.tokenizer;
skipWhitespace(tokenizer);
if (tokenizer.position >= tokenizer.length) {
state.tokenType = 2 /* End */;
return false;
}
tokenizer.tokenStart = tokenizer.position;
tokenizer.tokenEnd = tokenizer.position;
var c = state.data.charCodeAt(tokenizer.position);
switch (c) {
case state.commentCharCode:
state.tokenType = 1 /* Comment */;
skipLine(tokenizer);
break;
case state.quoteCharCode:
state.tokenType = 0 /* Value */;
return eatQuoted(tokenizer, state.quoteCharCode, state.delimiterCharCode);
default:
state.tokenType = 0 /* Value */;
return eatValue(tokenizer, state.delimiterCharCode);
}
}
/**
* Moves to the next non-comment token/line.
* Returns true when the current char is a newline, i.e. indicating a full record.
*/
function moveNext(state) {
var newRecord = moveNextInternal(state);
while (state.tokenType === 1 /* Comment */) {
newRecord = moveNextInternal(state);
}
return newRecord;
}
function readRecordsChunk(chunkSize, state) {
if (state.tokenType === 2 /* End */)
return 0;
var counter = 0;
var newRecord;
var tokens = state.tokens, tokenizer = state.tokenizer;
while (state.tokenType === 0 /* Value */ && counter < chunkSize) {
tokenizer_1.TokenBuilder.add(tokens[state.fieldCount % state.columnCount], tokenizer.tokenStart, tokenizer.tokenEnd);
++state.fieldCount;
newRecord = moveNext(state);
if (newRecord) {
++state.recordCount;
++counter;
}
}
return counter;
}
function readRecordsChunks(state) {
var newRecord = moveNext(state);
if (newRecord)
++state.recordCount;
return (0, mol_task_1.chunkedSubtask)(state.runtimeCtx, 100000, state, readRecordsChunk, function (ctx, state) { return ctx.update({ message: 'Parsing...', current: state.tokenizer.position, max: state.data.length }); });
}
function addColumn(state) {
state.columnNames.push(tokenizer_1.Tokenizer.getTokenString(state.tokenizer));
state.tokens.push(tokenizer_1.TokenBuilder.create(state.tokenizer.data, state.data.length / 80));
}
function init(state) {
var newRecord = moveNext(state);
while (!newRecord) {
addColumn(state);
newRecord = moveNext(state);
}
addColumn(state);
state.columnCount = state.columnNames.length;
if (state.noColumnNamesRecord) {
state.columnNames.forEach(function (x, i, arr) { return arr[i] = i + ''; });
tokenizer_1.Tokenizer.reset(state.tokenizer);
}
}
function handleRecords(state) {
return (0, tslib_1.__awaiter)(this, void 0, void 0, function () {
var columns, i;
return (0, tslib_1.__generator)(this, function (_a) {
switch (_a.label) {
case 0:
init(state);
return [4 /*yield*/, readRecordsChunks(state)];
case 1:
_a.sent();
columns = Object.create(null);
for (i = 0; i < state.columnCount; ++i) {
columns[state.columnNames[i]] = (0, field_1.Field)(state.tokens[i]);
}
return [2 /*return*/, Data.CsvTable(state.recordCount, state.columnNames, columns)];
}
});
});
}
function parseInternal(data, ctx, opts) {
return (0, tslib_1.__awaiter)(this, void 0, void 0, function () {
var state, table, result;
return (0, tslib_1.__generator)(this, function (_a) {
switch (_a.label) {
case 0:
state = State(data, ctx, opts);
ctx.update({ message: 'Parsing...', current: 0, max: data.length });
return [4 /*yield*/, handleRecords(state)];
case 1:
table = _a.sent();
result = Data.CsvFile(table);
return [2 /*return*/, result_1.ReaderResult.success(result)];
}
});
});
}
function parseCsv(data, opts) {
var _this = this;
var completeOpts = Object.assign({}, { quote: '"', comment: '#', delimiter: ',', noColumnNames: false }, opts);
return mol_task_1.Task.create('Parse CSV', function (ctx) { return (0, tslib_1.__awaiter)(_this, void 0, void 0, function () {
return (0, tslib_1.__generator)(this, function (_a) {
switch (_a.label) {
case 0: return [4 /*yield*/, parseInternal(data, ctx, completeOpts)];
case 1: return [2 /*return*/, _a.sent()];
}
});
}); });
}
exports.parseCsv = parseCsv;
//# sourceMappingURL=parser.js.map
;