UNPKG

gtf-nostream

Version:

utilities to read GTF data

252 lines 10.5 kB
//@ts-nocheck import * as GTF from './util'; const containerAttributes = { Parent: 'child_features', Derives_from: 'derived_features', }; export default class Parser { constructor(args) { // eslint-disable-next-line @typescript-eslint/no-empty-function const nullFunc = () => { }; Object.assign(this, { featureCallback: args.featureCallback || nullFunc, endCallback: args.endCallback || nullFunc, commentCallback: args.commentCallback || nullFunc, errorCallback: args.errorCallback || nullFunc, directiveCallback: args.directiveCallback || nullFunc, sequenceCallback: args.sequenceCallback || nullFunc, // number of lines to buffer bufferSize: args.bufferSize === undefined ? 1000 : args.bufferSize, // features that we have to keep on hand for now because they // might be referenced by something else _underConstructionTopLevel: [], // index of the above by ID _underConstructionById: {}, _completedReferences: {}, // features that reference something we have not seen yet // structured as: // { 'some_id' : { // 'Parent' : [ orphans that have a Parent attr referencing it ], // 'Derives_from' : [ orphans that have a Derives_from attr referencing it ], // } // } _underConstructionOrphans: {}, // if this is true, the parser ignores the // rest of the lines in the file. eof: false, lineNumber: 0, }); } addLine(line) { if (this.eof) { // otherwise, if we are done, ignore this line return; } this.lineNumber += 1; if (/^\s*[^#\s>]/.test(line)) { // feature line, most common case this._bufferLine(line); return; } const match = /^\s*(#+)(.*)/.exec(line); if (match) { // directive or comment // eslint-disable-next-line prefer-const let [, hashsigns, contents] = match; if (hashsigns.length === 3) { // sync directive, all forward-references are resolved. this._emitAllUnderConstructionFeatures(); } else if (hashsigns.length === 2) { const directive = GTF.parseDirective(line); this._emitItem(directive); } else { contents = contents.replace(/\s*/, ''); this._emitItem({ comment: contents }); } } else if (/^\s*$/.test(line)) { // blank line, do nothing } else { // it's a parse error const errLine = line.replace(/\r?\n?$/g, ''); throw new Error(`GTF parse error. Cannot parse '${errLine}'.`); } } _emitItem(i) { if (i[0]) { this.featureCallback(i); } else if (i.directive) { this.directiveCallback(i); } else if (i.comment) { this.commentCallback(i); } } finish() { this._emitAllUnderConstructionFeatures(); this.endCallback(); } _enforceBufferSizeLimit(additionalItemCount = 0) { const _unbufferItem = item => { var _a, _b, _c; if ((_c = (_b = (_a = item === null || item === void 0 ? void 0 : item[0]) === null || _a === void 0 ? void 0 : _a.attributes) === null || _b === void 0 ? void 0 : _b.ID) === null || _c === void 0 ? void 0 : _c[0]) { const ids = item[0].attributes.ID; ids.forEach(id => { delete this._underConstructionById[id]; delete this._completedReferences[id]; }); item.forEach(i => { if (i.child_features) { i.child_features.forEach(c => { _unbufferItem(c); }); } if (i.derived_features) { i.derived_features.forEach(d => { _unbufferItem(d); }); } }); } }; while (this._underConstructionTopLevel.length + additionalItemCount > this.bufferSize) { const item = this._underConstructionTopLevel.shift(); this._emitItem(item); _unbufferItem(item); } } /** * return all under-construction features, called when we know * there will be no additional data to attach to them * @private */ _emitAllUnderConstructionFeatures() { this._underConstructionTopLevel.forEach(this._emitItem.bind(this)); this._underConstructionTopLevel = []; this._underConstructionById = {}; this._completedReferences = {}; // if we have any orphans hanging around still, this is a // problem. die with a parse error if (Object.values(this._underConstructionOrphans).filter(entry => Object.keys(entry).length).length) { throw new Error(`some features reference other features that do not exist in the file (or in the same '###' scope). ${JSON.stringify(this._underConstructionOrphans)}`); } } // do the right thing with a newly-parsed feature line _bufferLine(line) { const featureLine = GTF.parseFeature(line); featureLine.child_features = []; featureLine.derived_features = []; // featureLine._lineNumber = this.lineNumber //< debugging aid const featureNumber = this.lineNumber; // no such thing as unique ID in GTF. make one up. const isTranscript = featureLine.featureType === 'transcript'; // trying to support the Cufflinks convention of adding a transcript line // NOTE: a feature is an arrayref of one or more feature lines. const ids = isTranscript ? featureLine.attributes.transcript_id || [] : [featureNumber]; const parents = isTranscript ? [] : featureLine.attributes.transcript_id || []; const derives = featureLine.attributes.Derives_from || []; if (!ids.length && !parents.length && !derives.length) { // if it has no IDs and does not refer to anything, we can just // output it this._emitItem([featureLine]); return; } function createTranscript(feature) { const result = JSON.parse(JSON.stringify(feature)); result.featureType = 'transcript'; return GTF.formatFeature(result); } parents.forEach(parent => { const underConst = this._underConstructionById[parent]; if (!underConst) { this._bufferLine(createTranscript(featureLine)); } }); let feature; ids.forEach(id => { const existing = this._underConstructionById[id]; if (existing) { existing.push(featureLine); feature = existing; } else { // haven't seen it yet, so buffer it so we can attach // child features to it feature = [featureLine]; this._enforceBufferSizeLimit(1); if (!parents.length && !derives.length) { this._underConstructionTopLevel.push(feature); } this._underConstructionById[id] = feature; // see if we have anything buffered that refers to it this._resolveReferencesTo(feature, id); } }); // try to resolve all its references this._resolveReferencesFrom(feature || [featureLine], { Parent: parents, Derives_from: derives }, ids); } _resolveReferencesTo(feature, id) { const references = this._underConstructionOrphans[id]; if (!references) { return; } Object.keys(references).forEach(attrname => { const pname = containerAttributes[attrname] || attrname.toLowerCase(); feature.forEach(loc => { loc[pname].push(...references[attrname]); delete references[attrname]; }); }); } _parseError(message) { this.eof = true; this.errorCallback(`${this.lineNumber}: ${message}`); } _resolveReferencesFrom(feature, references, ids) { // this is all a bit more awkward in javascript than it was in perl function postSet(obj, slot1, slot2) { let subObj = obj[slot1]; if (!subObj) { subObj = {}; obj[slot1] = subObj; } const returnVal = subObj[slot2] || false; subObj[slot2] = true; return returnVal; } function expandFeature(parentFeature, childFeature) { parentFeature[0].start = Math.min(parentFeature[0].start, childFeature[0].start); parentFeature[0].end = Math.max(parentFeature[0].end, childFeature[0].end); } Object.entries(references).forEach(([attrname, toIds]) => { let pname; toIds.forEach(toId => { const otherFeature = this._underConstructionById[toId]; if (otherFeature) { expandFeature(otherFeature, feature); if (!pname) { pname = containerAttributes[attrname] || attrname.toLowerCase(); } if (!ids.filter(id => postSet(this._completedReferences, id, `${attrname},${toId}`)).length) { otherFeature.forEach(location => { location[pname].push(feature); }); } } else { if (!this._underConstructionOrphans[toId]) { this._underConstructionOrphans[toId] = {}; } if (!this._underConstructionOrphans[toId][attrname]) { this._underConstructionOrphans[toId][attrname] = []; } this._underConstructionOrphans[toId][attrname].push(feature); } }); }); } } //# sourceMappingURL=parse.js.map