@gmod/gff
Version:
read and write GFF3 data as streams
237 lines • 7.9 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.GFFFormattingTransformer = exports.GFFTransformer = void 0;
exports.parseStringSync = parseStringSync;
exports.formatSync = formatSync;
const parse_1 = require("./parse");
const util_1 = require("./util");
// shared arg processing for the parse routines
function _processParseOptions(options) {
const out = {
parseFeatures: true,
parseDirectives: false,
parseSequences: true,
parseComments: false,
bufferSize: Infinity,
disableDerivesFromReferences: false,
errorCallback: (err) => {
throw new Error(err);
},
...options,
};
return out;
}
/**
* Parse a stream of text data into a stream of feature, directive, comment,
* an sequence objects.
*/
class GFFTransformer {
decoder;
parser;
lastString = '';
parseFeatures;
parseDirectives;
parseComments;
parseSequences;
/**
* Options for how the text stream is parsed
* @param options - Parser options
*/
constructor(options) {
this.decoder = new TextDecoder();
const processedOptions = _processParseOptions(options ?? {});
const { bufferSize, disableDerivesFromReferences } = processedOptions;
this.parser = new parse_1.GFF3Parser({ bufferSize, disableDerivesFromReferences });
this.parseFeatures = processedOptions.parseFeatures;
this.parseDirectives = processedOptions.parseDirectives;
this.parseComments = processedOptions.parseComments;
this.parseSequences = processedOptions.parseSequences;
this.errorCallback = options?.errorCallback;
}
makeCallbacks(controller) {
const callbacks = {
errorCallback: this.emitErrorMessage.bind(this, controller),
};
if (this.parseFeatures) {
callbacks.featureCallback = (item) => {
controller.enqueue(item);
};
}
if (this.parseDirectives) {
callbacks.directiveCallback = (item) => {
controller.enqueue(item);
};
}
if (this.parseComments) {
callbacks.commentCallback = (item) => {
controller.enqueue(item);
};
}
if (this.parseSequences) {
callbacks.sequenceCallback = (item) => {
controller.enqueue(item);
};
}
return callbacks;
}
emitErrorMessage(controller, errorMessage) {
if (this.errorCallback) {
this.errorCallback(errorMessage);
}
else {
controller.error(errorMessage);
}
}
transform(chunk, controller) {
// Decode the current chunk to string and prepend the last string
const string = `${this.lastString}${this.decoder.decode(chunk, {
stream: true,
})}`;
// Extract lines from chunk
const lines = string.split(/\r\n|[\r\n]/g);
// Save last line, as it might be incomplete
this.lastString = lines.pop() || '';
// Enqueue each line in the next chunk
for (const line of lines) {
this.parser.addLine(line, this.makeCallbacks(controller));
}
}
flush(controller) {
const callbacks = this.makeCallbacks(controller);
this.lastString = `${this.lastString}${this.decoder.decode()}`;
if (this.lastString) {
this.parser.addLine(this.lastString, callbacks);
this.lastString = '';
}
this.parser.finish(callbacks);
}
}
exports.GFFTransformer = GFFTransformer;
/**
* Synchronously parse a string containing GFF3 and return an array of the
* parsed items.
*
* @param str - GFF3 string
* @param inputOptions - Parsing options
* @returns array of parsed features, directives, comments and/or sequences
*/
function parseStringSync(str, inputOptions) {
if (!str) {
return [];
}
const options = _processParseOptions(inputOptions ?? {});
const items = [];
const push = items.push.bind(items);
const callbacks = {
errorCallback: options.errorCallback,
};
if (options.parseFeatures) {
callbacks.featureCallback = push;
}
if (options.parseDirectives) {
callbacks.directiveCallback = push;
}
if (options.parseComments) {
callbacks.commentCallback = push;
}
if (options.parseSequences) {
callbacks.sequenceCallback = push;
}
const parser = new parse_1.GFF3Parser({
disableDerivesFromReferences: options.disableDerivesFromReferences || false,
bufferSize: Infinity,
});
str
.split(/\r\n|[\r\n]/)
.forEach((line) => parser.addLine.bind(parser)(line, callbacks));
parser.finish(callbacks);
return items;
}
/**
* Format an array of GFF3 items (features,directives,comments) into string of
* GFF3. Does not insert synchronization (###) marks.
*
* @param items - Array of features, directives, comments and/or sequences
* @returns the formatted GFF3
*/
function formatSync(items) {
// sort items into seq and other
const other = [];
const sequences = [];
items.forEach((i) => {
if ('sequence' in i) {
sequences.push(i);
}
else {
other.push(i);
}
});
let str = other
.map((o) => (Array.isArray(o) ? (0, util_1.formatItem)(o).join('') : (0, util_1.formatItem)(o)))
.join('');
if (sequences.length) {
str += '##FASTA\n';
str += sequences.map(util_1.formatSequence).join('');
}
return str;
}
/**
* Transform a stream of features, directives, comments and/or sequences into a
* stream of GFF3 text.
*
* Inserts synchronization (###) marks automatically.
*/
class GFFFormattingTransformer {
linesSinceLastSyncMark = 0;
haveWeEmittedData = false;
fastaMode = false;
minLinesBetweenSyncMarks;
insertVersionDirective;
/**
* Options for how the output text stream is formatted
* @param options - Formatter options
*/
constructor(options = {}) {
this.minLinesBetweenSyncMarks = options.minSyncLines || 100;
this.insertVersionDirective =
options.insertVersionDirective === false ? false : true;
}
transform(chunk, controller) {
// if we have not emitted anything yet, and this first chunk is not a
// gff-version directive, emit one
if (!this.haveWeEmittedData &&
this.insertVersionDirective &&
(!('directive' in chunk) ||
('directive' in chunk && chunk.directive !== 'gff-version'))) {
controller.enqueue('##gff-version 3\n');
}
// if it's a sequence chunk coming down, emit a FASTA directive and change
// to FASTA mode
if ('sequence' in chunk && !this.fastaMode) {
controller.enqueue('##FASTA\n');
this.fastaMode = true;
}
const str = Array.isArray(chunk)
? chunk.map((c) => (0, util_1.formatItem)(c)).join('')
: (0, util_1.formatItem)(chunk);
controller.enqueue(str);
if (this.linesSinceLastSyncMark >= this.minLinesBetweenSyncMarks) {
controller.enqueue('###\n');
this.linesSinceLastSyncMark = 0;
}
else {
// count the number of newlines in this chunk
let count = 0;
// eslint-disable-next-line @typescript-eslint/prefer-for-of
for (let i = 0; i < str.length; i += 1) {
if (str[i] === '\n') {
count += 1;
}
}
this.linesSinceLastSyncMark += count;
}
this.haveWeEmittedData = true;
}
}
exports.GFFFormattingTransformer = GFFFormattingTransformer;
//# sourceMappingURL=api.js.map