@gmod/gff
Version:
read and write GFF3 data as streams
442 lines (415 loc) • 14.7 kB
text/typescript
import { GFF3Parser, ParseCallbacks } from './parse'
import {
formatItem,
formatSequence,
GFF3Comment,
GFF3Directive,
GFF3Feature,
GFF3Sequence,
GFF3Item,
} from './util'
/** Parser options */
export interface ParseOptions {
/** Whether to resolve references to derives from features */
disableDerivesFromReferences?: boolean
/** Whether to parse features, default true */
parseFeatures?: boolean
/** Whether to parse directives, default false */
parseDirectives?: boolean
/** Whether to parse comments, default false */
parseComments?: boolean
/** Whether to parse sequences, default true */
parseSequences?: boolean
/** Maximum number of GFF3 lines to buffer, default Infinity */
bufferSize?: number
}
type ParseOptionsProcessed = Required<ParseOptions>
// shared arg processing for the parse routines
function _processParseOptions(options: ParseOptions): ParseOptionsProcessed {
const out = {
parseFeatures: true,
parseDirectives: false,
parseSequences: true,
parseComments: false,
bufferSize: Infinity,
disableDerivesFromReferences: false,
...options,
}
return out
}
/**
* Parse a stream of text data into a stream of feature, directive, comment,
* an sequence objects.
*/
export class GFFTransformer<
O extends ParseOptions,
T = O extends { parseFeatures: true }
? O extends { parseSequences: true }
? O extends { parseDirectives: true }
? O extends { parseComments: true }
? GFF3Item
: GFF3Feature | GFF3Sequence | GFF3Directive
: O extends { parseComments: true }
? GFF3Feature | GFF3Sequence | GFF3Comment
: GFF3Feature | GFF3Sequence
: O extends { parseSequences: false }
? O extends { parseDirectives: true }
? O extends { parseComments: true }
? GFF3Feature | GFF3Directive | GFF3Comment
: GFF3Feature | GFF3Directive
: O extends { parseComments: true }
? GFF3Feature | GFF3Comment
: GFF3Feature
: O extends { parseDirectives: true }
? O extends { parseComments: true }
? GFF3Item
: GFF3Feature | GFF3Sequence | GFF3Directive
: O extends { parseComments: true }
? GFF3Feature | GFF3Sequence | GFF3Comment
: GFF3Feature | GFF3Sequence
: O extends { parseFeatures: false }
? O extends { parseSequences: true }
? O extends { parseDirectives: true }
? O extends { parseComments: true }
? GFF3Sequence | GFF3Directive | GFF3Comment
: GFF3Sequence | GFF3Directive
: O extends { parseComments: true }
? GFF3Sequence | GFF3Comment
: GFF3Sequence
: O extends { parseSequences: false }
? O extends { parseDirectives: true }
? O extends { parseComments: true }
? GFF3Directive | GFF3Comment
: GFF3Directive
: O extends { parseComments: true }
? GFF3Comment
: never
: O extends { parseDirectives: true }
? O extends { parseComments: true }
? GFF3Sequence | GFF3Directive | GFF3Comment
: GFF3Sequence | GFF3Directive
: O extends { parseComments: true }
? GFF3Sequence | GFF3Comment
: GFF3Sequence
: O extends { parseSequences: true }
? O extends { parseDirectives: true }
? O extends { parseComments: true }
? GFF3Item
: GFF3Feature | GFF3Sequence | GFF3Directive
: O extends { parseComments: true }
? GFF3Feature | GFF3Sequence | GFF3Comment
: GFF3Feature | GFF3Sequence
: O extends { parseSequences: false }
? O extends { parseDirectives: true }
? O extends { parseComments: true }
? GFF3Feature | GFF3Directive | GFF3Comment
: GFF3Feature | GFF3Directive
: O extends { parseComments: true }
? GFF3Feature | GFF3Comment
: GFF3Feature
: O extends { parseDirectives: true }
? O extends { parseComments: true }
? GFF3Item
: GFF3Feature | GFF3Sequence | GFF3Directive
: O extends { parseComments: true }
? GFF3Feature | GFF3Sequence | GFF3Comment
: GFF3Feature | GFF3Sequence,
> implements Transformer<Uint8Array, T>
{
private decoder: TextDecoder
private parser: GFF3Parser
private lastString = ''
private parseFeatures: boolean
private parseDirectives: boolean
private parseComments: boolean
private parseSequences: boolean
/**
* Options for how the text stream is parsed
* @param options - Parser options
*/
constructor(options?: O) {
this.decoder = new TextDecoder()
const processedOptions = _processParseOptions(options ?? {})
const { bufferSize, disableDerivesFromReferences } = processedOptions
this.parser = new GFF3Parser({ bufferSize, disableDerivesFromReferences })
this.parseFeatures = processedOptions.parseFeatures
this.parseDirectives = processedOptions.parseDirectives
this.parseComments = processedOptions.parseComments
this.parseSequences = processedOptions.parseSequences
}
private makeCallbacks(controller: TransformStreamDefaultController<T>) {
const callbacks: ParseCallbacks = {
errorCallback: this.emitErrorMessage.bind(this, controller),
}
if (this.parseFeatures) {
callbacks.featureCallback = (item: GFF3Feature) => {
controller.enqueue(item as T)
}
}
if (this.parseDirectives) {
callbacks.directiveCallback = (item: GFF3Directive) => {
controller.enqueue(item as T)
}
}
if (this.parseComments) {
callbacks.commentCallback = (item: GFF3Comment) => {
controller.enqueue(item as T)
}
}
if (this.parseSequences) {
callbacks.sequenceCallback = (item: GFF3Sequence) => {
controller.enqueue(item as T)
}
}
return callbacks
}
private emitErrorMessage(
controller: TransformStreamDefaultController<T>,
errorMessage: string,
) {
controller.error(errorMessage)
}
transform(
chunk: Uint8Array,
controller: TransformStreamDefaultController<T>,
) {
// Decode the current chunk to string and prepend the last string
const string = `${this.lastString}${this.decoder.decode(chunk, {
stream: true,
})}`
// Extract lines from chunk
const lines = string.split(/\r\n|[\r\n]/g)
// Save last line, as it might be incomplete
this.lastString = lines.pop() || ''
// Enqueue each line in the next chunk
for (const line of lines) {
this.parser.addLine(line, this.makeCallbacks(controller))
}
}
flush(controller: TransformStreamDefaultController<T>) {
const callbacks = this.makeCallbacks(controller)
this.lastString = `${this.lastString}${this.decoder.decode()}`
if (this.lastString) {
this.parser.addLine(this.lastString, callbacks)
this.lastString = ''
}
this.parser.finish(callbacks)
}
}
/**
* Synchronously parse a string containing GFF3 and return an array of the
* parsed items.
*
* @param str - GFF3 string
* @param inputOptions - Parsing options
* @returns array of parsed features, directives, comments and/or sequences
*/
export function parseStringSync<O extends ParseOptions>(
str: string,
inputOptions?: O,
): O extends { parseFeatures: true }
? O extends { parseSequences: true }
? O extends { parseDirectives: true }
? O extends { parseComments: true }
? GFF3Item[]
: (GFF3Feature | GFF3Sequence | GFF3Directive)[]
: O extends { parseComments: true }
? (GFF3Feature | GFF3Sequence | GFF3Comment)[]
: (GFF3Feature | GFF3Sequence)[]
: O extends { parseSequences: false }
? O extends { parseDirectives: true }
? O extends { parseComments: true }
? (GFF3Feature | GFF3Directive | GFF3Comment)[]
: (GFF3Feature | GFF3Directive)[]
: O extends { parseComments: true }
? (GFF3Feature | GFF3Comment)[]
: GFF3Feature[]
: O extends { parseDirectives: true }
? O extends { parseComments: true }
? GFF3Item[]
: (GFF3Feature | GFF3Sequence | GFF3Directive)[]
: O extends { parseComments: true }
? (GFF3Feature | GFF3Sequence | GFF3Comment)[]
: (GFF3Feature | GFF3Sequence)[]
: O extends { parseFeatures: false }
? O extends { parseSequences: true }
? O extends { parseDirectives: true }
? O extends { parseComments: true }
? (GFF3Sequence | GFF3Directive | GFF3Comment)[]
: (GFF3Sequence | GFF3Directive)[]
: O extends { parseComments: true }
? (GFF3Sequence | GFF3Comment)[]
: GFF3Sequence[]
: O extends { parseSequences: false }
? O extends { parseDirectives: true }
? O extends { parseComments: true }
? (GFF3Directive | GFF3Comment)[]
: GFF3Directive[]
: O extends { parseComments: true }
? GFF3Comment[]
: never[]
: O extends { parseDirectives: true }
? O extends { parseComments: true }
? (GFF3Sequence | GFF3Directive | GFF3Comment)[]
: (GFF3Sequence | GFF3Directive)[]
: O extends { parseComments: true }
? (GFF3Sequence | GFF3Comment)[]
: GFF3Sequence[]
: O extends { parseSequences: true }
? O extends { parseDirectives: true }
? O extends { parseComments: true }
? GFF3Item[]
: (GFF3Feature | GFF3Sequence | GFF3Directive)[]
: O extends { parseComments: true }
? (GFF3Feature | GFF3Sequence | GFF3Comment)[]
: (GFF3Feature | GFF3Sequence)[]
: O extends { parseSequences: false }
? O extends { parseDirectives: true }
? O extends { parseComments: true }
? (GFF3Feature | GFF3Directive | GFF3Comment)[]
: (GFF3Feature | GFF3Directive)[]
: O extends { parseComments: true }
? (GFF3Feature | GFF3Comment)[]
: GFF3Feature[]
: O extends { parseDirectives: true }
? O extends { parseComments: true }
? GFF3Item[]
: (GFF3Feature | GFF3Sequence | GFF3Directive)[]
: O extends { parseComments: true }
? (GFF3Feature | GFF3Sequence | GFF3Comment)[]
: (GFF3Feature | GFF3Sequence)[] {
if (!str) {
return [] as any
}
const options = _processParseOptions(inputOptions ?? {})
const items: GFF3Item[] = []
const push = items.push.bind(items)
const callbacks: ParseCallbacks = {
errorCallback: (err: string) => {
throw new Error(err)
},
}
if (options.parseFeatures) {
callbacks.featureCallback = push
}
if (options.parseDirectives) {
callbacks.directiveCallback = push
}
if (options.parseComments) {
callbacks.commentCallback = push
}
if (options.parseSequences) {
callbacks.sequenceCallback = push
}
const parser = new GFF3Parser({
disableDerivesFromReferences: options.disableDerivesFromReferences || false,
bufferSize: Infinity,
})
str
.split(/\r\n|[\r\n]/)
.forEach((line) => parser.addLine.bind(parser)(line, callbacks))
parser.finish(callbacks)
return items as any
}
/**
* Format an array of GFF3 items (features,directives,comments) into string of
* GFF3. Does not insert synchronization (###) marks.
*
* @param items - Array of features, directives, comments and/or sequences
* @returns the formatted GFF3
*/
export function formatSync(items: GFF3Item[]): string {
// sort items into seq and other
const other: (GFF3Feature | GFF3Directive | GFF3Comment)[] = []
const sequences: GFF3Sequence[] = []
items.forEach((i) => {
if ('sequence' in i) {
sequences.push(i)
} else {
other.push(i)
}
})
let str = other
.map((o) => (Array.isArray(o) ? formatItem(o).join('') : formatItem(o)))
.join('')
if (sequences.length) {
str += '##FASTA\n'
str += sequences.map(formatSequence).join('')
}
return str
}
/** Formatter options */
export interface FormatOptions {
/**
* The minimum number of lines to emit between sync (###) directives, default
* 100
*/
minSyncLines?: number
/**
* Whether to insert a version directive at the beginning of a formatted
* stream if one does not exist already, default true
*/
insertVersionDirective?: boolean
}
/**
* Transform a stream of features, directives, comments and/or sequences into a
* stream of GFF3 text.
*
* Inserts synchronization (###) marks automatically.
*/
export class GFFFormattingTransformer implements Transformer<GFF3Item, string> {
linesSinceLastSyncMark = 0
haveWeEmittedData = false
fastaMode = false
minLinesBetweenSyncMarks: number
insertVersionDirective: boolean
/**
* Options for how the output text stream is formatted
* @param options - Formatter options
*/
constructor(options: FormatOptions = {}) {
this.minLinesBetweenSyncMarks = options.minSyncLines || 100
this.insertVersionDirective =
options.insertVersionDirective === false ? false : true
}
transform(
chunk: GFF3Item,
controller: TransformStreamDefaultController<string>,
) {
// if we have not emitted anything yet, and this first chunk is not a
// gff-version directive, emit one
if (
!this.haveWeEmittedData &&
this.insertVersionDirective &&
(!('directive' in chunk) ||
('directive' in chunk && chunk.directive !== 'gff-version'))
) {
controller.enqueue('##gff-version 3\n')
}
// if it's a sequence chunk coming down, emit a FASTA directive and change
// to FASTA mode
if ('sequence' in chunk && !this.fastaMode) {
controller.enqueue('##FASTA\n')
this.fastaMode = true
}
const str = Array.isArray(chunk)
? chunk.map((c) => formatItem(c)).join('')
: formatItem(chunk)
controller.enqueue(str)
if (this.linesSinceLastSyncMark >= this.minLinesBetweenSyncMarks) {
controller.enqueue('###\n')
this.linesSinceLastSyncMark = 0
} else {
// count the number of newlines in this chunk
let count = 0
// eslint-disable-next-line @typescript-eslint/prefer-for-of
for (let i = 0; i < str.length; i += 1) {
if (str[i] === '\n') {
count += 1
}
}
this.linesSinceLastSyncMark += count
}
this.haveWeEmittedData = true
}
}