@gmod/indexedfasta
Version:
read indexed fasta and bgzipped fasta formats
163 lines • 5.68 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
const generic_filehandle2_1 = require("generic-filehandle2");
function _faiOffset(offset, lineBytes, lineLength, pos) {
return offset + lineBytes * Math.floor(pos / lineLength) + (pos % lineLength);
}
async function readFAI(fai, opts = {}) {
const decoder = new TextDecoder('utf8');
const text = decoder.decode((await fai.readFile(opts)));
const names = [];
const offsets = [];
const lengths = [];
const lineLengths = [];
const lineBytes = [];
const nameToIndex = {};
let lineStart = 0;
const len = text.length;
let idx = 0;
while (lineStart < len) {
let lineEnd = text.indexOf('\n', lineStart);
if (lineEnd === -1) {
lineEnd = len;
}
let line = text.slice(lineStart, lineEnd);
if (line.endsWith('\r')) {
line = line.slice(0, -1);
}
line = line.trim();
lineStart = lineEnd + 1;
if (line.length === 0) {
continue;
}
const tab1 = line.indexOf('\t');
const tab2 = line.indexOf('\t', tab1 + 1);
const tab3 = line.indexOf('\t', tab2 + 1);
const tab4 = line.indexOf('\t', tab3 + 1);
const name = line.slice(0, tab1);
if (name.startsWith('>')) {
throw new Error('found > in sequence name, might have supplied FASTA file for the FASTA index');
}
names.push(name);
lengths.push(+line.slice(tab1 + 1, tab2));
offsets.push(+line.slice(tab2 + 1, tab3));
lineLengths.push(+line.slice(tab3 + 1, tab4));
lineBytes.push(+line.slice(tab4 + 1));
nameToIndex[name] = idx;
idx++;
}
return { names, nameToIndex, offsets, lengths, lineLengths, lineBytes };
}
class IndexedFasta {
constructor({ fasta, fai, path, faiPath, }) {
if (fasta) {
this.fasta = fasta;
}
else if (path) {
this.fasta = new generic_filehandle2_1.LocalFile(path);
}
else {
throw new Error('Need to pass filehandle for fasta or path to localfile');
}
if (fai) {
this.fai = fai;
}
else if (faiPath) {
this.fai = new generic_filehandle2_1.LocalFile(faiPath);
}
else if (path) {
this.fai = new generic_filehandle2_1.LocalFile(`${path}.fai`);
}
else {
throw new Error('Need to pass filehandle for or path to localfile');
}
}
async _getIndexes(opts) {
if (!this.indexes) {
this.indexes = readFAI(this.fai, opts).catch((e) => {
this.indexes = undefined;
throw e;
});
}
return this.indexes;
}
/**
* @returns array of string sequence names that are present in the index, in
* which the array index indicates the sequence ID, and the value is the
* sequence name
*/
async getSequenceNames(opts) {
return (await this._getIndexes(opts)).names;
}
/**
* @returns object mapping sequence names to their lengths
*/
async getSequenceSizes(opts) {
const idx = await this._getIndexes(opts);
if (!idx.sizesCache) {
const sizes = {};
for (let i = 0; i < idx.names.length; i++) {
sizes[idx.names[i]] = idx.lengths[i];
}
idx.sizesCache = sizes;
}
return idx.sizesCache;
}
/**
* @returns the length of the given sequence, or undefined if not found
*/
async getSequenceSize(seqName, opts) {
const idx = await this._getIndexes(opts);
const i = idx.nameToIndex[seqName];
return i !== undefined ? idx.lengths[i] : undefined;
}
/**
* @param name
*
* @returns true if the file contains the given reference sequence name
*/
async hasReferenceSequence(name, opts) {
return (await this._getIndexes(opts)).nameToIndex[name] !== undefined;
}
/**
* @param seqName
* @param min
* @param max
*/
async getResiduesByName(seqName, min, max, opts) {
const idx = await this._getIndexes(opts);
const i = idx.nameToIndex[seqName];
if (i === undefined) {
return undefined;
}
return this._fetchFromIndex(idx.offsets[i], idx.lineBytes[i], idx.lineLengths[i], idx.lengths[i], min, max, opts);
}
//alias for getResiduesByName
async getSequence(seqName, min, max, opts) {
return this.getResiduesByName(seqName, min, max, opts);
}
async _fetchFromIndex(offset, lineBytes, lineLength, seqLength, min = 0, max, opts) {
let end = max;
if (min < 0) {
throw new TypeError('regionStart cannot be less than 0');
}
if (end === undefined || end > seqLength) {
end = seqLength;
}
if (min >= end) {
return '';
}
const position = _faiOffset(offset, lineBytes, lineLength, min);
const readlen = _faiOffset(offset, lineBytes, lineLength, end) - position;
const decoder = new TextDecoder('utf8');
const seq = decoder
.decode(await this.fasta.read(readlen, position, opts))
.replace(/\s+/g, '');
if (/[^\x20-\x7e]/.test(seq.slice(0, 1000))) {
throw new Error('Non-ASCII characters detected in sequence. The file may be gzip compressed. Use BgzipIndexedFasta for bgzip files, or decompress the file.');
}
return seq;
}
}
exports.default = IndexedFasta;
//# sourceMappingURL=indexedFasta.js.map