UNPKG

@gmod/indexedfasta

Version:

read indexed fasta and bgzipped fasta formats

163 lines 5.68 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); const generic_filehandle2_1 = require("generic-filehandle2"); function _faiOffset(offset, lineBytes, lineLength, pos) { return offset + lineBytes * Math.floor(pos / lineLength) + (pos % lineLength); } async function readFAI(fai, opts = {}) { const decoder = new TextDecoder('utf8'); const text = decoder.decode((await fai.readFile(opts))); const names = []; const offsets = []; const lengths = []; const lineLengths = []; const lineBytes = []; const nameToIndex = {}; let lineStart = 0; const len = text.length; let idx = 0; while (lineStart < len) { let lineEnd = text.indexOf('\n', lineStart); if (lineEnd === -1) { lineEnd = len; } let line = text.slice(lineStart, lineEnd); if (line.endsWith('\r')) { line = line.slice(0, -1); } line = line.trim(); lineStart = lineEnd + 1; if (line.length === 0) { continue; } const tab1 = line.indexOf('\t'); const tab2 = line.indexOf('\t', tab1 + 1); const tab3 = line.indexOf('\t', tab2 + 1); const tab4 = line.indexOf('\t', tab3 + 1); const name = line.slice(0, tab1); if (name.startsWith('>')) { throw new Error('found > in sequence name, might have supplied FASTA file for the FASTA index'); } names.push(name); lengths.push(+line.slice(tab1 + 1, tab2)); offsets.push(+line.slice(tab2 + 1, tab3)); lineLengths.push(+line.slice(tab3 + 1, tab4)); lineBytes.push(+line.slice(tab4 + 1)); nameToIndex[name] = idx; idx++; } return { names, nameToIndex, offsets, lengths, lineLengths, lineBytes }; } class IndexedFasta { constructor({ fasta, fai, path, faiPath, }) { if (fasta) { this.fasta = fasta; } else if (path) { this.fasta = new generic_filehandle2_1.LocalFile(path); } else { throw new Error('Need to pass filehandle for fasta or path to localfile'); } if (fai) { this.fai = fai; } else if (faiPath) { this.fai = new generic_filehandle2_1.LocalFile(faiPath); } else if (path) { this.fai = new generic_filehandle2_1.LocalFile(`${path}.fai`); } else { throw new Error('Need to pass filehandle for or path to localfile'); } } async _getIndexes(opts) { if (!this.indexes) { this.indexes = readFAI(this.fai, opts).catch((e) => { this.indexes = undefined; throw e; }); } return this.indexes; } /** * @returns array of string sequence names that are present in the index, in * which the array index indicates the sequence ID, and the value is the * sequence name */ async getSequenceNames(opts) { return (await this._getIndexes(opts)).names; } /** * @returns object mapping sequence names to their lengths */ async getSequenceSizes(opts) { const idx = await this._getIndexes(opts); if (!idx.sizesCache) { const sizes = {}; for (let i = 0; i < idx.names.length; i++) { sizes[idx.names[i]] = idx.lengths[i]; } idx.sizesCache = sizes; } return idx.sizesCache; } /** * @returns the length of the given sequence, or undefined if not found */ async getSequenceSize(seqName, opts) { const idx = await this._getIndexes(opts); const i = idx.nameToIndex[seqName]; return i !== undefined ? idx.lengths[i] : undefined; } /** * @param name * * @returns true if the file contains the given reference sequence name */ async hasReferenceSequence(name, opts) { return (await this._getIndexes(opts)).nameToIndex[name] !== undefined; } /** * @param seqName * @param min * @param max */ async getResiduesByName(seqName, min, max, opts) { const idx = await this._getIndexes(opts); const i = idx.nameToIndex[seqName]; if (i === undefined) { return undefined; } return this._fetchFromIndex(idx.offsets[i], idx.lineBytes[i], idx.lineLengths[i], idx.lengths[i], min, max, opts); } //alias for getResiduesByName async getSequence(seqName, min, max, opts) { return this.getResiduesByName(seqName, min, max, opts); } async _fetchFromIndex(offset, lineBytes, lineLength, seqLength, min = 0, max, opts) { let end = max; if (min < 0) { throw new TypeError('regionStart cannot be less than 0'); } if (end === undefined || end > seqLength) { end = seqLength; } if (min >= end) { return ''; } const position = _faiOffset(offset, lineBytes, lineLength, min); const readlen = _faiOffset(offset, lineBytes, lineLength, end) - position; const decoder = new TextDecoder('utf8'); const seq = decoder .decode(await this.fasta.read(readlen, position, opts)) .replace(/\s+/g, ''); if (/[^\x20-\x7e]/.test(seq.slice(0, 1000))) { throw new Error('Non-ASCII characters detected in sequence. The file may be gzip compressed. Use BgzipIndexedFasta for bgzip files, or decompress the file.'); } return seq; } } exports.default = IndexedFasta; //# sourceMappingURL=indexedFasta.js.map