@gmod/indexedfasta
Version:
read indexed fasta and bgzipped fasta formats
138 lines • 4.39 kB
JavaScript
import { LocalFile } from 'generic-filehandle2';
function _faiOffset(idx, pos) {
return (idx.offset +
idx.lineBytes * Math.floor(pos / idx.lineLength) +
(pos % idx.lineLength));
}
async function readFAI(fai, opts = {}) {
const decoder = new TextDecoder('utf8');
return Object.fromEntries(decoder
.decode((await fai.readFile(opts)))
.split(/\r?\n/)
.map(r => r.trim())
.filter(f => !!f)
.map(line => line.split('\t'))
.map(row => {
if (row[0]?.startsWith('>')) {
throw new Error('found > in sequence name, might have supplied FASTA file for the FASTA index');
}
return [
row[0],
{
name: row[0],
length: +row[1],
start: 0,
end: +row[1],
offset: +row[2],
lineLength: +row[3],
lineBytes: +row[4],
},
];
}));
}
export default class IndexedFasta {
constructor({ fasta, fai, path, faiPath, }) {
if (fasta) {
this.fasta = fasta;
}
else if (path) {
this.fasta = new LocalFile(path);
}
else {
throw new Error('Need to pass filehandle for fasta or path to localfile');
}
if (fai) {
this.fai = fai;
}
else if (faiPath) {
this.fai = new LocalFile(faiPath);
}
else if (path) {
this.fai = new LocalFile(`${path}.fai`);
}
else {
throw new Error('Need to pass filehandle for or path to localfile');
}
}
async _getIndexes(opts) {
if (!this.indexes) {
this.indexes = readFAI(this.fai, opts).catch((e) => {
this.indexes = undefined;
throw e;
});
}
return this.indexes;
}
/**
* @returns array of string sequence names that are present in the index, in
* which the array index indicates the sequence ID, and the value is the
* sequence name
*/
async getSequenceNames(opts) {
return Object.keys(await this._getIndexes(opts));
}
/**
* @returns array of string sequence names that are present in the index, in
* which the array index indicates the sequence ID, and the value is the
* sequence name
*/
async getSequenceSizes(opts) {
const returnObject = {};
const idx = await this._getIndexes(opts);
for (const val of Object.values(idx)) {
returnObject[val.name] = val.length;
}
return returnObject;
}
/**
* @returns array of string sequence names that are present in the index, in
* which the array index indicates the sequence ID, and the value is the
* sequence name
*/
async getSequenceSize(seqName, opts) {
const idx = await this._getIndexes(opts);
return idx[seqName]?.length;
}
/**
* @param name
*
* @returns true if the file contains the given reference sequence name
*/
async hasReferenceSequence(name, opts) {
return !!(await this._getIndexes(opts))[name];
}
/**
* @param seqName
* @param min
* @param max
*/
async getResiduesByName(seqName, min, max, opts) {
const indexEntry = (await this._getIndexes(opts))[seqName];
return indexEntry
? this._fetchFromIndexEntry(indexEntry, min, max, opts)
: undefined;
}
//alias for getResiduesByName
async getSequence(seqName, min, max, opts) {
return this.getResiduesByName(seqName, min, max, opts);
}
async _fetchFromIndexEntry(indexEntry, min = 0, max, opts) {
let end = max;
if (min < 0) {
throw new TypeError('regionStart cannot be less than 0');
}
if (end === undefined || end > indexEntry.length) {
end = indexEntry.length;
}
if (min >= end) {
return '';
}
const position = _faiOffset(indexEntry, min);
const readlen = _faiOffset(indexEntry, end) - position;
const decoder = new TextDecoder('utf8');
return decoder
.decode(await this.fasta.read(readlen, position, opts))
.replace(/\s+/g, '');
}
}
//# sourceMappingURL=indexedFasta.js.map