UNPKG

@gmod/bam

Version:

Parser for BAM and BAM index (bai) files

478 lines (424 loc) 12.3 kB
import Constants from './constants' const SEQRET_DECODER = '=ACMGRSVTWYHKDBN'.split('') const CIGAR_DECODER = 'MIDNSHP=X???????'.split('') interface Bytes { start: number end: number byteArray: Uint8Array } export default class BamRecord { public fileOffset: number private bytes: Bytes #dataView: DataView constructor(args: { bytes: Bytes; fileOffset: number }) { this.bytes = args.bytes this.fileOffset = args.fileOffset this.#dataView = new DataView(this.bytes.byteArray.buffer) } get byteArray() { return this.bytes.byteArray } get flags() { return ( (this.#dataView.getInt32(this.bytes.start + 16, true) & 0xffff0000) >> 16 ) } get ref_id() { return this.#dataView.getInt32(this.bytes.start + 4, true) } get start() { return this.#dataView.getInt32(this.bytes.start + 8, true) } get end() { return this.start + this.length_on_ref } get id() { return this.fileOffset } get mq() { const mq = (this.bin_mq_nl & 0xff00) >> 8 return mq === 255 ? undefined : mq } get score() { return this.mq } get qual() { if (this.isSegmentUnmapped()) { return } const p = this.b0 + this.read_name_length + this.num_cigar_ops * 4 + this.num_seq_bytes return this.byteArray.subarray(p, p + this.seq_length) } get strand() { return this.isReverseComplemented() ? -1 : 1 } get b0() { return this.bytes.start + 36 } get name() { let str = '' for (let i = 0; i < this.read_name_length - 1; i++) { str += String.fromCharCode(this.byteArray[this.b0 + i]!) } return str } get tags() { let p = this.b0 + this.read_name_length + this.num_cigar_ops * 4 + this.num_seq_bytes + this.seq_length const blockEnd = this.bytes.end const tags = {} as Record<string, unknown> while (p < blockEnd) { const tag = String.fromCharCode( this.byteArray[p]!, this.byteArray[p + 1]!, ) const type = String.fromCharCode(this.byteArray[p + 2]!) p += 3 if (type === 'A') { tags[tag] = String.fromCharCode(this.byteArray[p]!) p += 1 } else if (type === 'i') { tags[tag] = this.#dataView.getInt32(p, true) p += 4 } else if (type === 'I') { tags[tag] = this.#dataView.getUint32(p, true) p += 4 } else if (type === 'c') { tags[tag] = this.#dataView.getInt8(p) p += 1 } else if (type === 'C') { tags[tag] = this.#dataView.getUint8(p) p += 1 } else if (type === 's') { tags[tag] = this.#dataView.getInt16(p, true) p += 2 } else if (type === 'S') { tags[tag] = this.#dataView.getUint16(p, true) p += 2 } else if (type === 'f') { tags[tag] = this.#dataView.getFloat32(p, true) p += 4 } else if (type === 'Z' || type === 'H') { const value = [] while (p <= blockEnd) { const cc = this.byteArray[p++]! if (cc !== 0) { value.push(String.fromCharCode(cc)) } else { break } } tags[tag] = value.join('') } else if (type === 'B') { const cc = this.byteArray[p++]! const Btype = String.fromCharCode(cc) const limit = this.#dataView.getInt32(p, true) p += 4 if (Btype === 'i') { if (tag === 'CG') { const value = [] for (let k = 0; k < limit; k++) { const cigop = this.#dataView.getInt32(p, true) const lop = cigop >> 4 const op = CIGAR_DECODER[cigop & 0xf]! value.push(lop + op) p += 4 } tags[tag] = value.join('') } else { const value = [] for (let k = 0; k < limit; k++) { value.push(this.#dataView.getInt32(p, true)) p += 4 } tags[tag] = value } } else if (Btype === 'I') { if (tag === 'CG') { const value = [] for (let k = 0; k < limit; k++) { const cigop = this.#dataView.getUint32(p, true) const lop = cigop >> 4 const op = CIGAR_DECODER[cigop & 0xf]! value.push(lop + op) p += 4 } tags[tag] = value.join('') } else { const value = [] for (let k = 0; k < limit; k++) { value.push(this.#dataView.getUint32(p, true)) p += 4 } tags[tag] = value } } else if (Btype === 's') { const value = [] for (let k = 0; k < limit; k++) { value.push(this.#dataView.getInt16(p, true)) p += 2 } tags[tag] = value } else if (Btype === 'S') { const value = [] for (let k = 0; k < limit; k++) { value.push(this.#dataView.getUint16(p, true)) p += 2 } tags[tag] = value } else if (Btype === 'c') { const value = [] for (let k = 0; k < limit; k++) { value.push(this.#dataView.getInt8(p)) p += 1 } tags[tag] = value } else if (Btype === 'C') { const value = [] for (let k = 0; k < limit; k++) { value.push(this.#dataView.getUint8(p)) p += 1 } tags[tag] = value } else if (Btype === 'f') { const value = [] for (let k = 0; k < limit; k++) { value.push(this.#dataView.getFloat32(p, true)) p += 4 } tags[tag] = value } } else { console.error('Unknown BAM tag type', type) break } } return tags } /** * @returns {boolean} true if the read is paired, regardless of whether both * segments are mapped */ isPaired() { return !!(this.flags & Constants.BAM_FPAIRED) } /** @returns {boolean} true if the read is paired, and both segments are mapped */ isProperlyPaired() { return !!(this.flags & Constants.BAM_FPROPER_PAIR) } /** @returns {boolean} true if the read itself is unmapped; conflictive with isProperlyPaired */ isSegmentUnmapped() { return !!(this.flags & Constants.BAM_FUNMAP) } /** @returns {boolean} true if the read itself is unmapped; conflictive with isProperlyPaired */ isMateUnmapped() { return !!(this.flags & Constants.BAM_FMUNMAP) } /** @returns {boolean} true if the read is mapped to the reverse strand */ isReverseComplemented() { return !!(this.flags & Constants.BAM_FREVERSE) } /** @returns {boolean} true if the mate is mapped to the reverse strand */ isMateReverseComplemented() { return !!(this.flags & Constants.BAM_FMREVERSE) } /** @returns {boolean} true if this is read number 1 in a pair */ isRead1() { return !!(this.flags & Constants.BAM_FREAD1) } /** @returns {boolean} true if this is read number 2 in a pair */ isRead2() { return !!(this.flags & Constants.BAM_FREAD2) } /** @returns {boolean} true if this is a secondary alignment */ isSecondary() { return !!(this.flags & Constants.BAM_FSECONDARY) } /** @returns {boolean} true if this read has failed QC checks */ isFailedQc() { return !!(this.flags & Constants.BAM_FQCFAIL) } /** @returns {boolean} true if the read is an optical or PCR duplicate */ isDuplicate() { return !!(this.flags & Constants.BAM_FDUP) } /** @returns {boolean} true if this is a supplementary alignment */ isSupplementary() { return !!(this.flags & Constants.BAM_FSUPPLEMENTARY) } get cigarAndLength() { if (this.isSegmentUnmapped()) { return { length_on_ref: 0, CIGAR: '', } } const numCigarOps = this.num_cigar_ops let p = this.b0 + this.read_name_length const CIGAR = [] // check for CG tag by inspecting whether the CIGAR field contains a clip // that consumes entire seqLen let cigop = this.#dataView.getInt32(p, true) let lop = cigop >> 4 let op = CIGAR_DECODER[cigop & 0xf] if (op === 'S' && lop === this.seq_length) { // if there is a CG the second CIGAR field will be a N tag the represents // the length on ref p += 4 cigop = this.#dataView.getInt32(p, true) lop = cigop >> 4 op = CIGAR_DECODER[cigop & 0xf] if (op !== 'N') { console.warn('CG tag with no N tag') } return { CIGAR: this.tags.CG as string, length_on_ref: lop, } } else { let lref = 0 for (let c = 0; c < numCigarOps; ++c) { cigop = this.#dataView.getInt32(p, true) lop = cigop >> 4 op = CIGAR_DECODER[cigop & 0xf]! CIGAR.push(lop + op) // soft clip, hard clip, and insertion don't count toward the length on // the reference if (op !== 'H' && op !== 'S' && op !== 'I') { lref += lop } p += 4 } return { CIGAR: CIGAR.join(''), length_on_ref: lref, } } } get length_on_ref() { return this.cigarAndLength.length_on_ref } get CIGAR() { return this.cigarAndLength.CIGAR } get num_cigar_ops() { return this.flag_nc & 0xffff } get read_name_length() { return this.bin_mq_nl & 0xff } get num_seq_bytes() { return (this.seq_length + 1) >> 1 } get seq() { const p = this.b0 + this.read_name_length + this.num_cigar_ops * 4 const seqBytes = this.num_seq_bytes const len = this.seq_length const buf = [] let i = 0 for (let j = 0; j < seqBytes; ++j) { const sb = this.byteArray[p + j]! buf.push(SEQRET_DECODER[(sb & 0xf0) >> 4]) i++ if (i < len) { buf.push(SEQRET_DECODER[sb & 0x0f]) i++ } } return buf.join('') } // adapted from igv.js get pair_orientation() { if ( !this.isSegmentUnmapped() && !this.isMateUnmapped() && this.ref_id === this.next_refid ) { const s1 = this.isReverseComplemented() ? 'R' : 'F' const s2 = this.isMateReverseComplemented() ? 'R' : 'F' let o1 = ' ' let o2 = ' ' if (this.isRead1()) { o1 = '1' o2 = '2' } else if (this.isRead2()) { o1 = '2' o2 = '1' } const tmp = [] const isize = this.template_length if (isize > 0) { tmp[0] = s1 tmp[1] = o1 tmp[2] = s2 tmp[3] = o2 } else { tmp[2] = s1 tmp[3] = o1 tmp[0] = s2 tmp[1] = o2 } return tmp.join('') } return undefined } get bin_mq_nl() { return this.#dataView.getInt32(this.bytes.start + 12, true) } get flag_nc() { return this.#dataView.getInt32(this.bytes.start + 16, true) } get seq_length() { return this.#dataView.getInt32(this.bytes.start + 20, true) } get next_refid() { return this.#dataView.getInt32(this.bytes.start + 24, true) } get next_pos() { return this.#dataView.getInt32(this.bytes.start + 28, true) } get template_length() { return this.#dataView.getInt32(this.bytes.start + 32, true) } toJSON() { const data: Record<string, any> = {} for (const k of Object.keys(this)) { if (k.startsWith('_') || k === 'bytes') { continue } // @ts-ignore data[k] = this[k] } return data } } function cacheGetter<T>(ctor: { prototype: T }, prop: keyof T): void { const desc = Object.getOwnPropertyDescriptor(ctor.prototype, prop) if (!desc) { throw new Error('OH NO, NO PROPERTY DESCRIPTOR') } // eslint-disable-next-line @typescript-eslint/unbound-method const getter = desc.get if (!getter) { throw new Error('OH NO, NOT A GETTER') } Object.defineProperty(ctor.prototype, prop, { get() { const ret = getter.call(this) Object.defineProperty(this, prop, { value: ret }) return ret }, }) } cacheGetter(BamRecord, 'tags') cacheGetter(BamRecord, 'cigarAndLength') cacheGetter(BamRecord, 'seq') cacheGetter(BamRecord, 'qual')