@gmod/gff
Version:
read and write GFF3 data as streams
335 lines • 13.6 kB
JavaScript
"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
__setModuleDefault(result, mod);
return result;
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.FASTAParser = void 0;
const GFF3 = __importStar(require("./util"));
const containerAttributes = {
Parent: 'child_features',
Derives_from: 'derived_features',
};
class FASTAParser {
constructor(seqCallback) {
this.seqCallback = seqCallback;
this.currentSequence = undefined;
}
addLine(line) {
const defMatch = /^>\s*(\S+)\s*(.*)/.exec(line);
if (defMatch) {
this._flush();
this.currentSequence = { id: defMatch[1], sequence: '' };
if (defMatch[2])
this.currentSequence.description = defMatch[2].trim();
}
else if (this.currentSequence && /\S/.test(line)) {
this.currentSequence.sequence += line.replace(/\s/g, '');
}
}
_flush() {
if (this.currentSequence)
this.seqCallback(this.currentSequence);
}
finish() {
this._flush();
}
}
exports.FASTAParser = FASTAParser;
class Parser {
constructor(args) {
this.fastaParser = undefined;
// if this is true, the parser ignores the
// rest of the lines in the file. currently
// set when the file switches over to FASTA
this.eof = false;
this.lineNumber = 0;
// features that we have to keep on hand for now because they
// might be referenced by something else
this._underConstructionTopLevel = [];
// index of the above by ID
this._underConstructionById = {};
this._completedReferences = {};
// features that reference something we have not seen yet
// structured as:
// { 'some_id' : {
// 'Parent' : [ orphans that have a Parent attr referencing it ],
// 'Derives_from' : [ orphans that have a Derives_from attr referencing it ],
// }
// }
this._underConstructionOrphans = {};
// eslint-disable-next-line @typescript-eslint/no-empty-function
const nullFunc = () => { };
this.featureCallback = args.featureCallback || nullFunc;
this.endCallback = args.endCallback || nullFunc;
this.commentCallback = args.commentCallback || nullFunc;
this.errorCallback = args.errorCallback || nullFunc;
this.directiveCallback = args.directiveCallback || nullFunc;
this.sequenceCallback = args.sequenceCallback || nullFunc;
this.disableDerivesFromReferences =
args.disableDerivesFromReferences || false;
// number of lines to buffer
this.bufferSize = args.bufferSize === undefined ? 1000 : args.bufferSize;
}
addLine(line) {
// if we have transitioned to a fasta section, just delegate to that parser
if (this.fastaParser) {
this.fastaParser.addLine(line);
return;
}
if (this.eof) {
// otherwise, if we are done, ignore this line
return;
}
this.lineNumber += 1;
if (/^\s*[^#\s>]/.test(line)) {
// feature line, most common case
this._bufferLine(line);
return;
}
const match = /^\s*(#+)(.*)/.exec(line);
if (match) {
// directive or comment
const [, hashsigns] = match;
let [, , contents] = match;
if (hashsigns.length === 3) {
// sync directive, all forward-references are resolved.
this._emitAllUnderConstructionFeatures();
}
else if (hashsigns.length === 2) {
const directive = GFF3.parseDirective(line);
if (directive) {
if (directive.directive === 'FASTA') {
this._emitAllUnderConstructionFeatures();
this.eof = true;
this.fastaParser = new FASTAParser(this.sequenceCallback);
}
else {
this._emitItem(directive);
}
}
}
else {
contents = contents.replace(/\s*/, '');
this._emitItem({ comment: contents });
}
}
else if (/^\s*$/.test(line)) {
// blank line, do nothing
}
else if (/^\s*>/.test(line)) {
// implicit beginning of a FASTA section
this._emitAllUnderConstructionFeatures();
this.eof = true;
this.fastaParser = new FASTAParser(this.sequenceCallback);
this.fastaParser.addLine(line);
}
else {
// it's a parse error
const errLine = line.replace(/\r?\n?$/g, '');
throw new Error(`GFF3 parse error. Cannot parse '${errLine}'.`);
}
}
finish() {
this._emitAllUnderConstructionFeatures();
if (this.fastaParser)
this.fastaParser.finish();
this.endCallback();
}
_emitItem(i) {
if (Array.isArray(i))
this.featureCallback(i);
else if ('directive' in i)
this.directiveCallback(i);
else if ('comment' in i)
this.commentCallback(i);
}
_enforceBufferSizeLimit(additionalItemCount = 0) {
const _unbufferItem = (item) => {
if (item &&
Array.isArray(item) &&
item[0].attributes &&
item[0].attributes.ID &&
item[0].attributes.ID[0]) {
const ids = item[0].attributes.ID;
ids.forEach((id) => {
delete this._underConstructionById[id];
delete this._completedReferences[id];
});
item.forEach((i) => {
if (i.child_features)
i.child_features.forEach((c) => _unbufferItem(c));
if (i.derived_features)
i.derived_features.forEach((d) => _unbufferItem(d));
});
}
};
while (this._underConstructionTopLevel.length + additionalItemCount >
this.bufferSize) {
const item = this._underConstructionTopLevel.shift();
if (item) {
this._emitItem(item);
_unbufferItem(item);
}
}
}
/**
* return all under-construction features, called when we know
* there will be no additional data to attach to them
*/
_emitAllUnderConstructionFeatures() {
this._underConstructionTopLevel.forEach(this._emitItem.bind(this));
this._underConstructionTopLevel = [];
this._underConstructionById = {};
this._completedReferences = {};
// if we have any orphans hanging around still, this is a
// problem. die with a parse error
if (Array.from(Object.values(this._underConstructionOrphans)).length) {
throw new Error(`some features reference other features that do not exist in the file (or in the same '###' scope). ${Object.keys(this._underConstructionOrphans)}`);
}
}
// do the right thing with a newly-parsed feature line
_bufferLine(line) {
var _a, _b, _c;
const rawFeatureLine = GFF3.parseFeature(line);
const featureLine = Object.assign(Object.assign({}, rawFeatureLine), { child_features: [], derived_features: [] });
// featureLine._lineNumber = this.lineNumber //< debugging aid
// NOTE: a feature is an arrayref of one or more feature lines.
const ids = ((_a = featureLine.attributes) === null || _a === void 0 ? void 0 : _a.ID) || [];
const parents = ((_b = featureLine.attributes) === null || _b === void 0 ? void 0 : _b.Parent) || [];
const derives = this.disableDerivesFromReferences
? []
: ((_c = featureLine.attributes) === null || _c === void 0 ? void 0 : _c.Derives_from) || [];
if (!ids.length && !parents.length && !derives.length) {
// if it has no IDs and does not refer to anything, we can just
// output it
this._emitItem([featureLine]);
return;
}
let feature = undefined;
ids.forEach((id) => {
const existing = this._underConstructionById[id];
if (existing) {
// another location of the same feature
if (existing[existing.length - 1].type !== featureLine.type) {
this._parseError(`multi-line feature "${id}" has inconsistent types: "${featureLine.type}", "${existing[existing.length - 1].type}"`);
}
existing.push(featureLine);
feature = existing;
}
else {
// haven't seen it yet, so buffer it so we can attach
// child features to it
feature = [featureLine];
this._enforceBufferSizeLimit(1);
if (!parents.length && !derives.length) {
this._underConstructionTopLevel.push(feature);
}
this._underConstructionById[id] = feature;
// see if we have anything buffered that refers to it
this._resolveReferencesTo(feature, id);
}
});
// try to resolve all its references
this._resolveReferencesFrom(feature || [featureLine], { Parent: parents, Derives_from: derives }, ids);
}
_resolveReferencesTo(feature, id) {
const references = this._underConstructionOrphans[id];
// references is of the form
// {
// 'Parent' : [ orphans that have a Parent attr referencing this feature ],
// 'Derives_from' : [ orphans that have a Derives_from attr referencing this feature ],
// }
if (!references)
return;
feature.forEach((loc) => {
loc.child_features.push(...references.Parent);
});
feature.forEach((loc) => {
loc.derived_features.push(...references.Derives_from);
});
delete this._underConstructionOrphans[id];
}
_parseError(message) {
this.eof = true;
this.errorCallback(`${this.lineNumber}: ${message}`);
}
_resolveReferencesFrom(feature, references, ids) {
// this is all a bit more awkward in javascript than it was in perl
function postSet(obj, slot1, slot2) {
let subObj = obj[slot1];
if (!subObj) {
subObj = {};
obj[slot1] = subObj;
}
const returnVal = subObj[slot2] || false;
subObj[slot2] = true;
return returnVal;
}
references.Parent.forEach((toId) => {
const otherFeature = this._underConstructionById[toId];
if (otherFeature) {
const pname = containerAttributes.Parent;
if (!ids.filter((id) => postSet(this._completedReferences, id, `Parent,${toId}`)).length) {
otherFeature.forEach((location) => {
location[pname].push(feature);
});
}
}
else {
let ref = this._underConstructionOrphans[toId];
if (!ref) {
ref = {
Parent: [],
Derives_from: [],
};
this._underConstructionOrphans[toId] = ref;
}
ref.Parent.push(feature);
}
});
references.Derives_from.forEach((toId) => {
const otherFeature = this._underConstructionById[toId];
if (otherFeature) {
const pname = containerAttributes.Derives_from;
if (!ids.filter((id) => postSet(this._completedReferences, id, `Derives_from,${toId}`)).length) {
otherFeature.forEach((location) => {
location[pname].push(feature);
});
}
}
else {
let ref = this._underConstructionOrphans[toId];
if (!ref) {
ref = {
Parent: [],
Derives_from: [],
};
this._underConstructionOrphans[toId] = ref;
}
ref.Derives_from.push(feature);
}
});
}
}
exports.default = Parser;
//# sourceMappingURL=parse.js.map