UNPKG

@gmod/bam

Version:

Parser for BAM and BAM index (bai) files

439 lines 15.1 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); const constants_1 = __importDefault(require("./constants")); const SEQRET_DECODER = '=ACMGRSVTWYHKDBN'.split(''); const CIGAR_DECODER = 'MIDNSHP=X???????'.split(''); class BamRecord { fileOffset; bytes; #dataView; constructor(args) { this.bytes = args.bytes; this.fileOffset = args.fileOffset; this.#dataView = new DataView(this.bytes.byteArray.buffer); } get byteArray() { return this.bytes.byteArray; } get flags() { return ((this.#dataView.getInt32(this.bytes.start + 16, true) & 0xffff0000) >> 16); } get ref_id() { return this.#dataView.getInt32(this.bytes.start + 4, true); } get start() { return this.#dataView.getInt32(this.bytes.start + 8, true); } get end() { return this.start + this.length_on_ref; } get id() { return this.fileOffset; } get mq() { const mq = (this.bin_mq_nl & 0xff00) >> 8; return mq === 255 ? undefined : mq; } get score() { return this.mq; } get qual() { if (this.isSegmentUnmapped()) { return; } const p = this.b0 + this.read_name_length + this.num_cigar_ops * 4 + this.num_seq_bytes; return this.byteArray.subarray(p, p + this.seq_length); } get strand() { return this.isReverseComplemented() ? -1 : 1; } get b0() { return this.bytes.start + 36; } get name() { let str = ''; for (let i = 0; i < this.read_name_length - 1; i++) { str += String.fromCharCode(this.byteArray[this.b0 + i]); } return str; } get tags() { let p = this.b0 + this.read_name_length + this.num_cigar_ops * 4 + this.num_seq_bytes + this.seq_length; const blockEnd = this.bytes.end; const tags = {}; while (p < blockEnd) { const tag = String.fromCharCode(this.byteArray[p], this.byteArray[p + 1]); const type = String.fromCharCode(this.byteArray[p + 2]); p += 3; if (type === 'A') { tags[tag] = String.fromCharCode(this.byteArray[p]); p += 1; } else if (type === 'i') { tags[tag] = this.#dataView.getInt32(p, true); p += 4; } else if (type === 'I') { tags[tag] = this.#dataView.getUint32(p, true); p += 4; } else if (type === 'c') { tags[tag] = this.#dataView.getInt8(p); p += 1; } else if (type === 'C') { tags[tag] = this.#dataView.getUint8(p); p += 1; } else if (type === 's') { tags[tag] = this.#dataView.getInt16(p, true); p += 2; } else if (type === 'S') { tags[tag] = this.#dataView.getUint16(p, true); p += 2; } else if (type === 'f') { tags[tag] = this.#dataView.getFloat32(p, true); p += 4; } else if (type === 'Z' || type === 'H') { const value = []; while (p <= blockEnd) { const cc = this.byteArray[p++]; if (cc !== 0) { value.push(String.fromCharCode(cc)); } else { break; } } tags[tag] = value.join(''); } else if (type === 'B') { const cc = this.byteArray[p++]; const Btype = String.fromCharCode(cc); const limit = this.#dataView.getInt32(p, true); p += 4; if (Btype === 'i') { if (tag === 'CG') { const value = []; for (let k = 0; k < limit; k++) { const cigop = this.#dataView.getInt32(p, true); const lop = cigop >> 4; const op = CIGAR_DECODER[cigop & 0xf]; value.push(lop + op); p += 4; } tags[tag] = value.join(''); } else { const value = []; for (let k = 0; k < limit; k++) { value.push(this.#dataView.getInt32(p, true)); p += 4; } tags[tag] = value; } } else if (Btype === 'I') { if (tag === 'CG') { const value = []; for (let k = 0; k < limit; k++) { const cigop = this.#dataView.getUint32(p, true); const lop = cigop >> 4; const op = CIGAR_DECODER[cigop & 0xf]; value.push(lop + op); p += 4; } tags[tag] = value.join(''); } else { const value = []; for (let k = 0; k < limit; k++) { value.push(this.#dataView.getUint32(p, true)); p += 4; } tags[tag] = value; } } else if (Btype === 's') { const value = []; for (let k = 0; k < limit; k++) { value.push(this.#dataView.getInt16(p, true)); p += 2; } tags[tag] = value; } else if (Btype === 'S') { const value = []; for (let k = 0; k < limit; k++) { value.push(this.#dataView.getUint16(p, true)); p += 2; } tags[tag] = value; } else if (Btype === 'c') { const value = []; for (let k = 0; k < limit; k++) { value.push(this.#dataView.getInt8(p)); p += 1; } tags[tag] = value; } else if (Btype === 'C') { const value = []; for (let k = 0; k < limit; k++) { value.push(this.#dataView.getUint8(p)); p += 1; } tags[tag] = value; } else if (Btype === 'f') { const value = []; for (let k = 0; k < limit; k++) { value.push(this.#dataView.getFloat32(p, true)); p += 4; } tags[tag] = value; } } else { console.error('Unknown BAM tag type', type); break; } } return tags; } /** * @returns {boolean} true if the read is paired, regardless of whether both * segments are mapped */ isPaired() { return !!(this.flags & constants_1.default.BAM_FPAIRED); } /** @returns {boolean} true if the read is paired, and both segments are mapped */ isProperlyPaired() { return !!(this.flags & constants_1.default.BAM_FPROPER_PAIR); } /** @returns {boolean} true if the read itself is unmapped; conflictive with isProperlyPaired */ isSegmentUnmapped() { return !!(this.flags & constants_1.default.BAM_FUNMAP); } /** @returns {boolean} true if the read itself is unmapped; conflictive with isProperlyPaired */ isMateUnmapped() { return !!(this.flags & constants_1.default.BAM_FMUNMAP); } /** @returns {boolean} true if the read is mapped to the reverse strand */ isReverseComplemented() { return !!(this.flags & constants_1.default.BAM_FREVERSE); } /** @returns {boolean} true if the mate is mapped to the reverse strand */ isMateReverseComplemented() { return !!(this.flags & constants_1.default.BAM_FMREVERSE); } /** @returns {boolean} true if this is read number 1 in a pair */ isRead1() { return !!(this.flags & constants_1.default.BAM_FREAD1); } /** @returns {boolean} true if this is read number 2 in a pair */ isRead2() { return !!(this.flags & constants_1.default.BAM_FREAD2); } /** @returns {boolean} true if this is a secondary alignment */ isSecondary() { return !!(this.flags & constants_1.default.BAM_FSECONDARY); } /** @returns {boolean} true if this read has failed QC checks */ isFailedQc() { return !!(this.flags & constants_1.default.BAM_FQCFAIL); } /** @returns {boolean} true if the read is an optical or PCR duplicate */ isDuplicate() { return !!(this.flags & constants_1.default.BAM_FDUP); } /** @returns {boolean} true if this is a supplementary alignment */ isSupplementary() { return !!(this.flags & constants_1.default.BAM_FSUPPLEMENTARY); } get cigarAndLength() { if (this.isSegmentUnmapped()) { return { length_on_ref: 0, CIGAR: '', }; } const numCigarOps = this.num_cigar_ops; let p = this.b0 + this.read_name_length; const CIGAR = []; // check for CG tag by inspecting whether the CIGAR field contains a clip // that consumes entire seqLen let cigop = this.#dataView.getInt32(p, true); let lop = cigop >> 4; let op = CIGAR_DECODER[cigop & 0xf]; if (op === 'S' && lop === this.seq_length) { // if there is a CG the second CIGAR field will be a N tag the represents // the length on ref p += 4; cigop = this.#dataView.getInt32(p, true); lop = cigop >> 4; op = CIGAR_DECODER[cigop & 0xf]; if (op !== 'N') { console.warn('CG tag with no N tag'); } return { CIGAR: this.tags.CG, length_on_ref: lop, }; } else { let lref = 0; for (let c = 0; c < numCigarOps; ++c) { cigop = this.#dataView.getInt32(p, true); lop = cigop >> 4; op = CIGAR_DECODER[cigop & 0xf]; CIGAR.push(lop + op); // soft clip, hard clip, and insertion don't count toward the length on // the reference if (op !== 'H' && op !== 'S' && op !== 'I') { lref += lop; } p += 4; } return { CIGAR: CIGAR.join(''), length_on_ref: lref, }; } } get length_on_ref() { return this.cigarAndLength.length_on_ref; } get CIGAR() { return this.cigarAndLength.CIGAR; } get num_cigar_ops() { return this.flag_nc & 0xffff; } get read_name_length() { return this.bin_mq_nl & 0xff; } get num_seq_bytes() { return (this.seq_length + 1) >> 1; } get seq() { const p = this.b0 + this.read_name_length + this.num_cigar_ops * 4; const seqBytes = this.num_seq_bytes; const len = this.seq_length; const buf = []; let i = 0; for (let j = 0; j < seqBytes; ++j) { const sb = this.byteArray[p + j]; buf.push(SEQRET_DECODER[(sb & 0xf0) >> 4]); i++; if (i < len) { buf.push(SEQRET_DECODER[sb & 0x0f]); i++; } } return buf.join(''); } // adapted from igv.js get pair_orientation() { if (!this.isSegmentUnmapped() && !this.isMateUnmapped() && this.ref_id === this.next_refid) { const s1 = this.isReverseComplemented() ? 'R' : 'F'; const s2 = this.isMateReverseComplemented() ? 'R' : 'F'; let o1 = ' '; let o2 = ' '; if (this.isRead1()) { o1 = '1'; o2 = '2'; } else if (this.isRead2()) { o1 = '2'; o2 = '1'; } const tmp = []; const isize = this.template_length; if (isize > 0) { tmp[0] = s1; tmp[1] = o1; tmp[2] = s2; tmp[3] = o2; } else { tmp[2] = s1; tmp[3] = o1; tmp[0] = s2; tmp[1] = o2; } return tmp.join(''); } return undefined; } get bin_mq_nl() { return this.#dataView.getInt32(this.bytes.start + 12, true); } get flag_nc() { return this.#dataView.getInt32(this.bytes.start + 16, true); } get seq_length() { return this.#dataView.getInt32(this.bytes.start + 20, true); } get next_refid() { return this.#dataView.getInt32(this.bytes.start + 24, true); } get next_pos() { return this.#dataView.getInt32(this.bytes.start + 28, true); } get template_length() { return this.#dataView.getInt32(this.bytes.start + 32, true); } toJSON() { const data = {}; for (const k of Object.keys(this)) { if (k.startsWith('_') || k === 'bytes') { continue; } // @ts-ignore data[k] = this[k]; } return data; } } exports.default = BamRecord; function cacheGetter(ctor, prop) { const desc = Object.getOwnPropertyDescriptor(ctor.prototype, prop); if (!desc) { throw new Error('OH NO, NO PROPERTY DESCRIPTOR'); } // eslint-disable-next-line @typescript-eslint/unbound-method const getter = desc.get; if (!getter) { throw new Error('OH NO, NOT A GETTER'); } Object.defineProperty(ctor.prototype, prop, { get() { const ret = getter.call(this); Object.defineProperty(this, prop, { value: ret }); return ret; }, }); } cacheGetter(BamRecord, 'tags'); cacheGetter(BamRecord, 'cigarAndLength'); cacheGetter(BamRecord, 'seq'); cacheGetter(BamRecord, 'qual'); //# sourceMappingURL=record.js.map