higlass
Version:
HiGlass Hi-C / genomic / large data viewer
388 lines (343 loc) • 9.94 kB
JavaScript
import genbankParser from 'genbank-parser';
import slugid from 'slugid';
import decompress from '../utils/decompress';
/** @import { AbstractDataFetcher } from '../types' */
/** @typedef {{ start: number, end: number, type: 'filler', strand: "+" | "-" }} FillerSegment */
/**
* Take a list of genes, which can be any list with elements containing
* { start, end } fields and return another list of { start, end }
* fields containing the collapsed genes.
*
* The segments should be sorted by their start coordinate.
*
* The scale parameter is the number of base pairs per pixels
*
* @param {ArrayLike<{ start: number, end: number }>} segments
* @param {number} scale
* @param {"+" | "-"} strand
* @returns {Array<FillerSegment>}
*/
function collapse(segments, scale, strand) {
/** @type {Array<FillerSegment>} */
const collapsed = [];
// the maximum distance we allow between segments before collapsing them
const MAX_DIST_BETWEEN = 5;
// no segments in, no segments out
if (!segments.length) {
return [];
}
// start with the first segment
let currStart = segments[0].start;
let currEnd = segments[0].end;
// continue on to the next segments
for (let i = 1; i < segments.length; i++) {
if (segments[i].start < currEnd + (MAX_DIST_BETWEEN * 1) / scale) {
// this segment is within merging distance -- merge it
currEnd = Math.max(currEnd, segments[i].end);
} else {
// this segment is outside of the merging distance, dump the current
// collapsed segment and start a new one
collapsed.push({
type: 'filler',
start: currStart,
end: currEnd,
strand,
});
// start a new collapsed segment
currStart = segments[i].start;
currEnd = segments[i].end;
}
}
// add the final segment
collapsed.push({
start: currStart,
end: currEnd,
type: 'filler',
strand,
});
return collapsed;
}
/**
* Shuffles array in place.
* @template T
* @param {Array<T>} a items An array containing the items.
* @returns {Array<T>} The (mutated) shuffled array
*/
function shuffle(a) {
for (let i = a.length - 1; i > 0; i--) {
const j = Math.floor(Math.random() * (i + 1));
const x = a[i];
a[i] = a[j];
a[j] = x;
}
return a;
}
/** @typedef {import('genbank-parser').ParsedGenbank["features"][number]} GenbankFeature */
/**
* @typedef FillerGeneAnnotation
* @prop {number} xStart
* @prop {number} xEnd
* @prop {'+' | '-'} strand
* @prop {string} uid
* @prop {'filler'} type
* @prop {[]} fields
*/
/**
* @typedef CompleteGeneAnnotation
* @prop {number} xStart
* @prop {number} xEnd
* @prop {'+' | '-'} strand
* @prop {number} chrOffset
* @prop {number} importance
* @prop {string} uid
* @prop {string=} type
* @prop {[
* chr: 'chrom',
* start: number,
* end: number,
* name: string,
* importance: number,
* strand: '+' | '-',
* _unknown0: string,
* _unknown1: string,
* type: string,
* name: string,
* start: string,
* end: string,
* start: string,
* end: string,
* ]} fields
*/
/** @typedef {CompleteGeneAnnotation | FillerGeneAnnotation} GeneAnnotation */
/**
* @param {GenbankFeature | FillerSegment} x
* @returns {x is FillerSegment}
*/
function isFillerSegment(x) {
return x.type === 'filler';
}
/**
* Convert a genbank feature to a higlass gene annotation
*
* @param {GenbankFeature | FillerSegment} gb
* @returns {GeneAnnotation}
*/
function genbankFeatureToHiGlassGeneAnnotation(gb) {
const importance = gb.end - gb.start;
const strand = gb.strand === 1 ? '+' : '-';
const uid = slugid.nice();
if (isFillerSegment(gb)) {
// this is annotation that was generated by collapsing genes and is
// only meant to show that there is something there.
return {
xStart: gb.start,
xEnd: gb.end,
strand: gb.strand,
fields: [],
type: 'filler',
uid,
};
}
return {
xStart: gb.start,
xEnd: gb.end,
strand,
chrOffset: 0,
importance: gb.end - gb.start,
uid,
type: gb.type,
fields: [
'chrom',
gb.start,
gb.end,
gb.name,
importance,
strand,
'',
'',
gb.type,
gb.name,
gb.start.toString(),
gb.end.toString(),
gb.start.toString(),
gb.end.toString(),
],
};
}
/**
* Convert genbank text to a JSON representation and extract features
* @param {string} gbText
* @returns {{
* json: import('genbank-parser').ParsedGenbank[],
* features: GenbankFeature[],
* }}
*/
function gbToJsonAndFeatures(gbText) {
const gbJson = genbankParser(gbText);
const features = shuffle(
gbJson[0].features
.filter((f) => f.type !== 'source')
.sort((a, b) => a.start - b.start),
);
return { json: gbJson, features };
}
/**
* @typedef GenbankDataConfig
* @prop {string=} url
* @prop {string=} text
*/
/**
* @typedef {Array<GeneAnnotation> & { tilePositionId?: string }} GenbankTile
*/
/** @implements {AbstractDataFetcher<GenbankTile, GenbankDataConfig>} */
class GBKDataFetcher {
/** @param {GenbankDataConfig} dataConfig */
constructor(dataConfig) {
/** @type {GenbankDataConfig} */
this.dataConfig = dataConfig;
/** @type {string} */
this.trackUid = slugid.nice();
/** @type {string} */
this.errorTxt = '';
/** @type {Promise<string>} */
let textPromise;
if (dataConfig.url) {
const extension = dataConfig.url.slice(dataConfig.url.length - 3);
textPromise = fetch(dataConfig.url, {
mode: 'cors',
redirect: 'follow',
method: 'GET',
}).then((originalResponse) => {
const normalizedResponse =
extension === '.gz'
? decompress(originalResponse, { format: 'gzip' })
: originalResponse;
return normalizedResponse.text();
});
} else if (dataConfig.text) {
textPromise = Promise.resolve(dataConfig.text);
} else {
throw new Error('No data or URL specified');
}
/** @type {Promise<ReturnType<typeof gbToJsonAndFeatures>>} */
this.gbDataPromise = textPromise.then((text) => gbToJsonAndFeatures(text));
}
/**
* @param {import('../types').HandleTilesetInfoFinished} [callback]
* @returns {Promise<import('../types').LegacyTilesetInfo | undefined>}
*/
tilesetInfo(callback) {
this.tilesetInfoLoading = true;
return this.gbDataPromise
.then((gbData) => {
this.tilesetInfoLoading = false;
const TILE_SIZE = 1024;
/** @satisfies {import('../types').LegacyTilesetInfo} */
const retVal = {
name: `genbank-${this.trackUid}`,
tile_size: TILE_SIZE,
max_zoom: Math.ceil(
Math.log(gbData.json[0].size / TILE_SIZE) / Math.log(2),
),
max_width: gbData.json[0].size,
min_pos: [0],
max_pos: [gbData.json[0].size],
};
if (callback) {
callback(retVal);
}
return retVal;
})
.catch((err) => {
this.tilesetInfoLoading = false;
if (callback) {
callback({
error: `Error parsing genbank: ${err}`,
});
}
return undefined;
});
}
/**
* @param {(tiles: Record<string, GenbankTile>) => void} receivedTiles
* @param {string[]} tileIds
* @returns {Promise<Record<string, GenbankTile>>}
*/
async fetchTilesDebounced(receivedTiles, tileIds) {
/** @type {Record<string, GenbankTile>} */
const tiles = {};
/** @type {string[]} */
const validTileIds = [];
/** @type {Promise<GeneAnnotation[]>[]} */
const tilePromises = [];
for (const tileId of tileIds) {
const parts = tileId.split('.');
const z = Number.parseInt(parts[0], 10);
const x = Number.parseInt(parts[1], 10);
if (Number.isNaN(x) || Number.isNaN(z)) {
console.warn('Invalid tile zoom or position:', z, x);
continue;
}
validTileIds.push(tileId);
tilePromises.push(this.tile(z, x));
}
const values = await Promise.all(tilePromises);
for (let i = 0; i < values.length; i++) {
const validTileId = validTileIds[i];
tiles[validTileId] = values[i];
tiles[validTileId].tilePositionId = validTileId;
}
receivedTiles(tiles);
return tiles;
}
/**
* @param {number} z
* @param {number} x
* @returns {Promise<GeneAnnotation[]>}
*/
async tile(z, x) {
const [tsInfo, gbData] = await Promise.all([
this.tilesetInfo(),
this.gbDataPromise,
]);
if (!tsInfo) {
throw new Error('No tileset info');
}
const tileWidth = +tsInfo.max_width / 2 ** +z;
// get the bounds of the tile
const minX = tsInfo.min_pos[0] + x * tileWidth;
const maxX = tsInfo.min_pos[0] + (x + 1) * tileWidth;
const filtered = gbData.features.filter(
(v) => v.end > minX && v.start < maxX,
);
const scaleFactor = 1024 / 2 ** (tsInfo.max_zoom - z);
/** @type {Array<FillerSegment>} */
const collapsedPlus = collapse(
filtered.filter((v) => v.strand === 1),
scaleFactor,
'+',
);
/** @type {Array<FillerSegment>} */
const collapsedMinus = collapse(
filtered.filter((v) => v.strand !== 1),
scaleFactor,
'-',
);
/** @type {Array<GenbankFeature | FillerSegment>} */
const values = [];
const TILE_CAPACITY = 20;
// fill the tile with entries that are within it
for (let i = 0; i < gbData.features.length; i++) {
if (values.length >= TILE_CAPACITY) {
break;
}
if (gbData.features[i].end >= minX && gbData.features[i].start <= maxX) {
values.push(gbData.features[i]);
}
}
return [values, collapsedPlus, collapsedMinus].flatMap((v) =>
v.map(genbankFeatureToHiGlassGeneAnnotation),
);
}
}
export default GBKDataFetcher;