UNPKG

genomic-reader

Version:

A Typescript library for reading BigWig, BigBed, 2bit, and Bam files. Capable of streaming. For use in the browser or on Node.js.

165 lines 7.51 kB
"use strict"; var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; Object.defineProperty(exports, "__esModule", { value: true }); exports.readBam = exports.BamReader = exports.BamAlignmentFlag = exports.isFlagged = void 0; const BamHeaderReader_1 = require("./BamHeaderReader"); const BamIndexReader_1 = require("./BamIndexReader"); const Bgzf_1 = require("./Bgzf"); const BinaryParser_1 = require("../util/BinaryParser"); function isFlagged(bitwiseFlags, flag) { return !!(bitwiseFlags & flag); } exports.isFlagged = isFlagged; var BamAlignmentFlag; (function (BamAlignmentFlag) { BamAlignmentFlag[BamAlignmentFlag["READ_PAIRED"] = 1] = "READ_PAIRED"; BamAlignmentFlag[BamAlignmentFlag["PROPER_PAIR"] = 2] = "PROPER_PAIR"; BamAlignmentFlag[BamAlignmentFlag["READ_UNMAPPED"] = 4] = "READ_UNMAPPED"; BamAlignmentFlag[BamAlignmentFlag["MATE_UNMAPPED"] = 8] = "MATE_UNMAPPED"; BamAlignmentFlag[BamAlignmentFlag["READ_STRAND"] = 16] = "READ_STRAND"; BamAlignmentFlag[BamAlignmentFlag["MATE_STRAND"] = 32] = "MATE_STRAND"; BamAlignmentFlag[BamAlignmentFlag["FIRST_OF_PAIR"] = 64] = "FIRST_OF_PAIR"; BamAlignmentFlag[BamAlignmentFlag["SECOND_OF_PAIR"] = 128] = "SECOND_OF_PAIR"; BamAlignmentFlag[BamAlignmentFlag["SECONDARY_ALIGNMNET"] = 256] = "SECONDARY_ALIGNMNET"; BamAlignmentFlag[BamAlignmentFlag["READ_FAILS_VENDOR_QUALITY_CHECK"] = 512] = "READ_FAILS_VENDOR_QUALITY_CHECK"; BamAlignmentFlag[BamAlignmentFlag["DUPLICATE_READ"] = 1024] = "DUPLICATE_READ"; BamAlignmentFlag[BamAlignmentFlag["SUPPLEMENTARY_ALIGNMENT"] = 2048] = "SUPPLEMENTARY_ALIGNMENT"; })(BamAlignmentFlag = exports.BamAlignmentFlag || (exports.BamAlignmentFlag = {})); const CIGAR_DECODER = "MIDNSHP=X"; const SEQ_CONSUMING_CIGAR_OPS = "MIS=X"; const REF_CONSUMING_CIGAR_OPS = "MDN=X"; const SEQ_DECODER = "=ACMGRSVTWYHKDBN"; class BamReader { constructor(bamDataLoader, bamIndexDataLoader) { this.bamDataLoader = bamDataLoader; this.bamIndexDataLoader = bamIndexDataLoader; this.indexData = undefined; this.headerData = undefined; } getIndexData() { return __awaiter(this, void 0, void 0, function* () { if (this.indexData === undefined) { this.indexData = yield BamIndexReader_1.readBamIndex(this.bamIndexDataLoader); } return this.indexData; }); } getHeaderData() { return __awaiter(this, void 0, void 0, function* () { if (this.headerData === undefined) { this.headerData = yield BamHeaderReader_1.readBamHeaderData(this.bamDataLoader); } return this.headerData; }); } read(chr, start, end) { return __awaiter(this, void 0, void 0, function* () { const indexData = yield this.getIndexData(); const headerData = yield this.getHeaderData(); const refId = headerData.chromToId[chr]; const chunks = BamIndexReader_1.blocksForRange(indexData.refData[refId], start, end); return yield readBam(this.bamDataLoader, chunks, refId, chr, start, end); }); } } exports.BamReader = BamReader; function readBam(bamDataLoader, chunks, refId, chr, start, end) { return __awaiter(this, void 0, void 0, function* () { const alignments = Array(); for (let chunk of chunks) { const bufSize = chunk.end.blockPosition + (1 << 16) - chunk.start.blockPosition; const chunkBytes = yield bamDataLoader.load(chunk.start.blockPosition, bufSize); const unzippedChunk = Bgzf_1.bgzfUnzip(chunkBytes); const chunkAlignments = readBamFeatures(unzippedChunk.slice(chunk.start.dataPosition), refId, chr, start, end); chunkAlignments.forEach(ca => alignments.push(ca)); } return alignments; }); } exports.readBam = readBam; function readBamFeatures(blocksData, refId, chr, bpStart, bpEnd) { const parser = new BinaryParser_1.BinaryParser(blocksData); const alignments = new Array(); while (parser.position < blocksData.byteLength) { const blockSize = parser.getInt(); const blockEnd = parser.position + blockSize; if (blockSize + parser.position > blocksData.byteLength) break; const blockRefID = parser.getInt(); const pos = parser.getInt(); const readNameLen = parser.getByte(); const mappingQuality = parser.getByte(); const bin = parser.getUShort(); const numCigarOps = parser.getUShort(); const flags = parser.getUShort(); const strand = !isFlagged(flags, BamAlignmentFlag.READ_STRAND); const seqLen = parser.getInt(); const mateChrIdx = parser.getInt(); const matePos = parser.getInt(); const templateLen = parser.getInt(); const readName = parser.getString(readNameLen); if (blockRefID === -1 || refId !== blockRefID || pos > bpEnd || pos + seqLen < bpStart) { parser.position = blockEnd; continue; } const cigarOps = new Array(); let seqOffset = 0; let lengthOnRef = 0; for (let i = 0; i < numCigarOps; i++) { const rawCigar = parser.getUInt(); const opLen = rawCigar >> 4; const op = CIGAR_DECODER.charAt(rawCigar & 0xf); cigarOps.push({ opLen, op, seqOffset }); if (SEQ_CONSUMING_CIGAR_OPS.includes(op)) { seqOffset += opLen; } if (REF_CONSUMING_CIGAR_OPS.includes(op)) { lengthOnRef += opLen; } } const seqChars = new Array(); const seqBytes = (seqLen + 1) / 2; for (let i = 0; i < seqBytes; i++) { const seqByte = parser.getByte(); seqChars.push(SEQ_DECODER.charAt((seqByte & 0xf0) >> 4)); seqChars.push(SEQ_DECODER.charAt(seqByte & 0x0f)); } const sequence = seqChars.slice(0, seqLen).join(''); const phredQualities = new Array(); for (let i = 0; i < seqLen; i++) { phredQualities.push(parser.getByte()); } let mate; if (mateChrIdx >= 0) { mate = { chr: chr, position: matePos, strand: !isFlagged(flags, BamAlignmentFlag.MATE_STRAND) }; } alignments.push({ chr: chr, start: pos, flags: flags, strand: strand, readName: readName, cigarOps: cigarOps, templateLength: templateLen, mappingQuality: mappingQuality, seq: sequence, phredQualities: phredQualities, lengthOnRef: lengthOnRef }); parser.position = blockEnd; } return alignments; } //# sourceMappingURL=BamReader.js.map