UNPKG

@gmod/gff

Version:

read and write GFF3 data as streams

335 lines 13.6 kB
"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k); __setModuleDefault(result, mod); return result; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.FASTAParser = void 0; const GFF3 = __importStar(require("./util")); const containerAttributes = { Parent: 'child_features', Derives_from: 'derived_features', }; class FASTAParser { constructor(seqCallback) { this.seqCallback = seqCallback; this.currentSequence = undefined; } addLine(line) { const defMatch = /^>\s*(\S+)\s*(.*)/.exec(line); if (defMatch) { this._flush(); this.currentSequence = { id: defMatch[1], sequence: '' }; if (defMatch[2]) this.currentSequence.description = defMatch[2].trim(); } else if (this.currentSequence && /\S/.test(line)) { this.currentSequence.sequence += line.replace(/\s/g, ''); } } _flush() { if (this.currentSequence) this.seqCallback(this.currentSequence); } finish() { this._flush(); } } exports.FASTAParser = FASTAParser; class Parser { constructor(args) { this.fastaParser = undefined; // if this is true, the parser ignores the // rest of the lines in the file. currently // set when the file switches over to FASTA this.eof = false; this.lineNumber = 0; // features that we have to keep on hand for now because they // might be referenced by something else this._underConstructionTopLevel = []; // index of the above by ID this._underConstructionById = {}; this._completedReferences = {}; // features that reference something we have not seen yet // structured as: // { 'some_id' : { // 'Parent' : [ orphans that have a Parent attr referencing it ], // 'Derives_from' : [ orphans that have a Derives_from attr referencing it ], // } // } this._underConstructionOrphans = {}; // eslint-disable-next-line @typescript-eslint/no-empty-function const nullFunc = () => { }; this.featureCallback = args.featureCallback || nullFunc; this.endCallback = args.endCallback || nullFunc; this.commentCallback = args.commentCallback || nullFunc; this.errorCallback = args.errorCallback || nullFunc; this.directiveCallback = args.directiveCallback || nullFunc; this.sequenceCallback = args.sequenceCallback || nullFunc; this.disableDerivesFromReferences = args.disableDerivesFromReferences || false; // number of lines to buffer this.bufferSize = args.bufferSize === undefined ? 1000 : args.bufferSize; } addLine(line) { // if we have transitioned to a fasta section, just delegate to that parser if (this.fastaParser) { this.fastaParser.addLine(line); return; } if (this.eof) { // otherwise, if we are done, ignore this line return; } this.lineNumber += 1; if (/^\s*[^#\s>]/.test(line)) { // feature line, most common case this._bufferLine(line); return; } const match = /^\s*(#+)(.*)/.exec(line); if (match) { // directive or comment const [, hashsigns] = match; let [, , contents] = match; if (hashsigns.length === 3) { // sync directive, all forward-references are resolved. this._emitAllUnderConstructionFeatures(); } else if (hashsigns.length === 2) { const directive = GFF3.parseDirective(line); if (directive) { if (directive.directive === 'FASTA') { this._emitAllUnderConstructionFeatures(); this.eof = true; this.fastaParser = new FASTAParser(this.sequenceCallback); } else { this._emitItem(directive); } } } else { contents = contents.replace(/\s*/, ''); this._emitItem({ comment: contents }); } } else if (/^\s*$/.test(line)) { // blank line, do nothing } else if (/^\s*>/.test(line)) { // implicit beginning of a FASTA section this._emitAllUnderConstructionFeatures(); this.eof = true; this.fastaParser = new FASTAParser(this.sequenceCallback); this.fastaParser.addLine(line); } else { // it's a parse error const errLine = line.replace(/\r?\n?$/g, ''); throw new Error(`GFF3 parse error. Cannot parse '${errLine}'.`); } } finish() { this._emitAllUnderConstructionFeatures(); if (this.fastaParser) this.fastaParser.finish(); this.endCallback(); } _emitItem(i) { if (Array.isArray(i)) this.featureCallback(i); else if ('directive' in i) this.directiveCallback(i); else if ('comment' in i) this.commentCallback(i); } _enforceBufferSizeLimit(additionalItemCount = 0) { const _unbufferItem = (item) => { if (item && Array.isArray(item) && item[0].attributes && item[0].attributes.ID && item[0].attributes.ID[0]) { const ids = item[0].attributes.ID; ids.forEach((id) => { delete this._underConstructionById[id]; delete this._completedReferences[id]; }); item.forEach((i) => { if (i.child_features) i.child_features.forEach((c) => _unbufferItem(c)); if (i.derived_features) i.derived_features.forEach((d) => _unbufferItem(d)); }); } }; while (this._underConstructionTopLevel.length + additionalItemCount > this.bufferSize) { const item = this._underConstructionTopLevel.shift(); if (item) { this._emitItem(item); _unbufferItem(item); } } } /** * return all under-construction features, called when we know * there will be no additional data to attach to them */ _emitAllUnderConstructionFeatures() { this._underConstructionTopLevel.forEach(this._emitItem.bind(this)); this._underConstructionTopLevel = []; this._underConstructionById = {}; this._completedReferences = {}; // if we have any orphans hanging around still, this is a // problem. die with a parse error if (Array.from(Object.values(this._underConstructionOrphans)).length) { throw new Error(`some features reference other features that do not exist in the file (or in the same '###' scope). ${Object.keys(this._underConstructionOrphans)}`); } } // do the right thing with a newly-parsed feature line _bufferLine(line) { var _a, _b, _c; const rawFeatureLine = GFF3.parseFeature(line); const featureLine = Object.assign(Object.assign({}, rawFeatureLine), { child_features: [], derived_features: [] }); // featureLine._lineNumber = this.lineNumber //< debugging aid // NOTE: a feature is an arrayref of one or more feature lines. const ids = ((_a = featureLine.attributes) === null || _a === void 0 ? void 0 : _a.ID) || []; const parents = ((_b = featureLine.attributes) === null || _b === void 0 ? void 0 : _b.Parent) || []; const derives = this.disableDerivesFromReferences ? [] : ((_c = featureLine.attributes) === null || _c === void 0 ? void 0 : _c.Derives_from) || []; if (!ids.length && !parents.length && !derives.length) { // if it has no IDs and does not refer to anything, we can just // output it this._emitItem([featureLine]); return; } let feature = undefined; ids.forEach((id) => { const existing = this._underConstructionById[id]; if (existing) { // another location of the same feature if (existing[existing.length - 1].type !== featureLine.type) { this._parseError(`multi-line feature "${id}" has inconsistent types: "${featureLine.type}", "${existing[existing.length - 1].type}"`); } existing.push(featureLine); feature = existing; } else { // haven't seen it yet, so buffer it so we can attach // child features to it feature = [featureLine]; this._enforceBufferSizeLimit(1); if (!parents.length && !derives.length) { this._underConstructionTopLevel.push(feature); } this._underConstructionById[id] = feature; // see if we have anything buffered that refers to it this._resolveReferencesTo(feature, id); } }); // try to resolve all its references this._resolveReferencesFrom(feature || [featureLine], { Parent: parents, Derives_from: derives }, ids); } _resolveReferencesTo(feature, id) { const references = this._underConstructionOrphans[id]; // references is of the form // { // 'Parent' : [ orphans that have a Parent attr referencing this feature ], // 'Derives_from' : [ orphans that have a Derives_from attr referencing this feature ], // } if (!references) return; feature.forEach((loc) => { loc.child_features.push(...references.Parent); }); feature.forEach((loc) => { loc.derived_features.push(...references.Derives_from); }); delete this._underConstructionOrphans[id]; } _parseError(message) { this.eof = true; this.errorCallback(`${this.lineNumber}: ${message}`); } _resolveReferencesFrom(feature, references, ids) { // this is all a bit more awkward in javascript than it was in perl function postSet(obj, slot1, slot2) { let subObj = obj[slot1]; if (!subObj) { subObj = {}; obj[slot1] = subObj; } const returnVal = subObj[slot2] || false; subObj[slot2] = true; return returnVal; } references.Parent.forEach((toId) => { const otherFeature = this._underConstructionById[toId]; if (otherFeature) { const pname = containerAttributes.Parent; if (!ids.filter((id) => postSet(this._completedReferences, id, `Parent,${toId}`)).length) { otherFeature.forEach((location) => { location[pname].push(feature); }); } } else { let ref = this._underConstructionOrphans[toId]; if (!ref) { ref = { Parent: [], Derives_from: [], }; this._underConstructionOrphans[toId] = ref; } ref.Parent.push(feature); } }); references.Derives_from.forEach((toId) => { const otherFeature = this._underConstructionById[toId]; if (otherFeature) { const pname = containerAttributes.Derives_from; if (!ids.filter((id) => postSet(this._completedReferences, id, `Derives_from,${toId}`)).length) { otherFeature.forEach((location) => { location[pname].push(feature); }); } } else { let ref = this._underConstructionOrphans[toId]; if (!ref) { ref = { Parent: [], Derives_from: [], }; this._underConstructionOrphans[toId] = ref; } ref.Derives_from.push(feature); } }); } } exports.default = Parser; //# sourceMappingURL=parse.js.map