higlass
Version:
HiGlass Hi-C / genomic / large data viewer
589 lines (530 loc) • 20.2 kB
JavaScript
import { scaleLinear } from 'd3-scale';
import slugid from 'slugid';
import DenseDataExtrema1D from '../utils/DenseDataExtrema1D';
import DenseDataExtrema2D from '../utils/DenseDataExtrema2D';
import assert from '../utils/assert';
import dictValues from '../utils/dict-values';
import maxNonZero from '../utils/max-non-zero';
import minNonZero from '../utils/min-non-zero';
// Utils
import tts from '../utils/trim-trailing-slash';
// Services
import * as tileProxy from '../services/tile-proxy';
import { isResolutionsTilesetInfo } from '../utils/type-guards';
/** @import { PubSub } from 'pub-sub-es' */
/** @import { TilesetInfo, AbstractDataFetcher, TileSource, DataConfig, HandleTilesetInfoFinished } from '../types' */
/** @import { CompletedTileData, TileResponse } from '../services/worker' */
/**
* @typedef Tile
* @property {number} min_value
* @property {number} max_value
* @property {DenseDataExtrema1D | DenseDataExtrema2D} denseDataExtrema
* @property {number} minNonZero
* @property {number} maxNonZero
* @property {Array<number> | Float32Array} dense
* @property {string} dtype
* @property {string} server
* @property {number[]} tilePos
* @property {string} tilePositionId
* @property {string} tilesetUid
* @property {number} zoomLevel
*/
/** @typedef {Pick<Tile, 'zoomLevel' | 'tilePos' | 'tilePositionId'>} DividedTileA */
/** @typedef {Pick<Tile, 'zoomLevel' | 'tilePos' | 'tilePositionId' | 'dense' | 'denseDataExtrema' | 'minNonZero' | 'maxNonZero'>} DividedTileB */
/** @typedef {DividedTileA | DividedTileB} DividedTile */
/** @typedef {Omit<DataConfig, 'children'> & { children?: DataFetcher[], tilesetUid?: string, tilesetInfo: TilesetInfo }} ResolvedDataConfig */
/**
* @template T
* @param {Array<T>} x
* @returns {x is [T, T]}
*/
function isTuple(x) {
return x.length === 2;
}
/**
* @param {PubSub} pubSub
* @returns {TileSource<Tile>}
*/
function createDefaultTileSource(pubSub) {
return {
async fetchTiles(request) {
/** @type {Record<string, Tile>} */
// @ts-expect-error - TODO: Need to resolve these types together
const tileData = await tileProxy.fetchTilesDebounced(request, pubSub);
return tileData;
},
fetchTilesetInfo({ server, tilesetUid }) {
return new Promise((resolve, reject) => {
tileProxy.trackInfo(server, tilesetUid, resolve, reject, pubSub);
});
},
registerTileset({ server, url, filetype, coordSystem }) {
const serverUrl = `${tts(server)}/register_url/`;
const payload = {
fileurl: url,
filetype,
coordSystem,
};
return fetch(serverUrl, {
method: 'POST',
body: JSON.stringify(payload),
headers: {
'Content-Type': 'application/json; charset=utf-8',
},
});
},
};
}
/** @implements {AbstractDataFetcher<Tile | DividedTile, ResolvedDataConfig>} */
export default class DataFetcher {
/**
* @param {DataConfig} dataConfig
* @param {PubSub} pubSub
* @param {TileSource<Tile>} [tileSource]
*/
constructor(dataConfig, pubSub, tileSource) {
this._tileSource = tileSource || createDefaultTileSource(pubSub);
/** @type {boolean} */
this.tilesetInfoLoading = true;
if (!dataConfig) {
// Trevor: This should probably throw?
console.error('No dataconfig provided');
return;
}
// copy the dataConfig so that it doesn't dirty so that
// it doesn't get modified when we make objects of its
// children below
/** @type {ResolvedDataConfig} */
this.dataConfig = JSON.parse(JSON.stringify(dataConfig));
/** @type {string} */
this.uuid = slugid.nice();
/** @type {PubSub} */
this.pubSub = pubSub;
if (dataConfig.children) {
// convert each child into an object
this.dataConfig.children = dataConfig.children.map(
(c) => new DataFetcher(c, pubSub),
);
}
}
/**
* We don't a have a tilesetUid for this track. But we do have a url, filetype
* and server. Using these, we can use the server to fullfill tile requests
* from this dataset.
*
* @param {object} opts
* @param {string} opts.server - The server api location (e.g. 'localhost:8000/api/v1')
* @param {string} opts.url - The location of the data file (e.g. 'encode.org/my.file.bigwig')
* @param {string} opts.filetype - The type of file being served (e.g. 'bigwig')
* @param {string=} opts.coordSystem - The coordinate system being served (e.g. 'hg38')
*/
async registerFileUrl({ server, url, filetype, coordSystem }) {
return this._tileSource.registerTileset({
server,
url,
filetype,
coordSystem,
});
}
/**
* Obtain tileset infos for all of the tilesets listed
* @param {HandleTilesetInfoFinished} finished - A callback that will be called
*/
tilesetInfo(finished) {
// if this track has a url, server and filetype
// then we need to register those with the server
const { server, url, filetype, coordSystem } = this.dataConfig;
if (server && url && filetype) {
return this.registerFileUrl({ server, url, filetype, coordSystem })
.then((data) => data.json())
.then((data) => {
this.dataConfig.tilesetUid = data.uid;
this.tilesetInfoAfterRegister(finished);
})
.catch((rejected) => {
console.error('Error registering url', rejected);
});
}
return new Promise(() => {
this.tilesetInfoAfterRegister(finished);
});
}
/**
* Obtain tileset infos for all of the tilesets listed
*
* If there is more than one tileset info, this function
* should (not currently implemented) check if the tileset
* infos have the same dimensions and then return a common
* one.
*
* @param {HandleTilesetInfoFinished} finished - A callback that will be called
* when all tileset infos are loaded
*/
tilesetInfoAfterRegister(finished) {
if (!this.dataConfig.children) {
// this data source has no children so we
// just need to retrieve one tileset info
const { server, tilesetUid } = this.dataConfig;
if (!server || !tilesetUid) {
console.warn(
'No dataConfig children, server or tilesetUid:',
this.dataConfig,
);
finished(null);
} else {
this._tileSource
.fetchTilesetInfo({ server, tilesetUid })
.then((tilesetInfo) => {
// tileset infos are indxed by by tilesetUids, we can just resolve
// that here before passing it back to the track
this.dataConfig.tilesetInfo = tilesetInfo[tilesetUid];
finished(tilesetInfo[tilesetUid], tilesetUid);
})
.catch((error) => {
this.tilesetInfoLoading = false;
finished({ error });
});
}
} else {
// this data source has children, so we need to wait to get
// all of their tileset infos in order to return them to the track
const promises = this.dataConfig.children.map(
(x) =>
/** @type {Promise<TilesetInfo>} */
new Promise((resolve) => {
x.tilesetInfo(resolve);
}),
);
Promise.all(promises).then((values) => {
// this is where we should check if all the children's tileset
// infos match
finished(values[0]);
});
}
}
/**
* @param {string} tilesetUid - Uid of the tileset on the server
* @param {string} tileId - The tileId of the tile
* @returns {string} The full tile id that the server will parse.
*
* @example
* ```javascript
* // returns 'xyxx.0.0.0'
* fullTileId('xyxx', '0.0.0');
* ```
*/
fullTileId(tilesetUid, tileId) {
return `${tilesetUid}.${tileId}`;
}
/**
* Fetch a set of tiles.
*
* Because the track shouldn't care about tileset ids, the tile ids
* should just include positions and any necessary transforms.
*
* @param {(tiles: Record<string, DividedTile | Tile>) => void} receivedTiles - A function to call once the tiles have been fetched
* @param {string[]} tileIds - The tile ids to fetch
* @returns {Promise<Record<string, DividedTile | Tile>>}
*/
fetchTilesDebounced(receivedTiles, tileIds) {
if (this.dataConfig.type === 'horizontal-section') {
return this.fetchHorizontalSection(receivedTiles, tileIds);
}
if (this.dataConfig.type === 'vertical-section') {
return this.fetchHorizontalSection(receivedTiles, tileIds, true);
}
if (
!this.dataConfig.children &&
this.dataConfig.tilesetUid &&
this.dataConfig.server
) {
// no children, just return the fetched tiles as is
const promise = this._tileSource.fetchTiles({
id: slugid.nice(),
server: this.dataConfig.server,
tileIds: tileIds.map((x) => `${this.dataConfig.tilesetUid}.${x}`),
options: this.dataConfig.options,
});
return /** @type {Promise<Record<string, Tile>>} */ (promise).then(
(returnedTiles) => {
const tilesetUid = dictValues(returnedTiles)[0].tilesetUid;
/** @type {Record<string, Tile>} */
const newTiles = {};
for (let i = 0; i < tileIds.length; i++) {
const fullTileId = this.fullTileId(tilesetUid, tileIds[i]);
returnedTiles[fullTileId].tilePositionId = tileIds[i];
newTiles[tileIds[i]] = returnedTiles[fullTileId];
}
receivedTiles(newTiles);
return newTiles;
},
);
}
// multiple child tracks, need to wait for all of them to
// fetch their data before returning to the parent
/** @type {Promise<Record<string, DividedTile | Tile>>[]} Tiles */
const promises =
this.dataConfig.children?.map(
(x) =>
/** @type {Promise<Record<string, Tile | DividedTile>>} */
new Promise((resolve) => {
x.fetchTilesDebounced(resolve, tileIds);
}),
) ?? [];
return Promise.all(promises).then((returnedTiles) => {
// if we're trying to divide two datasets,
if (this.dataConfig.type === 'divided' && isTuple(returnedTiles)) {
const newTiles = this.makeDivided(returnedTiles, tileIds);
receivedTiles(newTiles);
return newTiles;
}
// assume we're just returning raw tiles
console.warn(
'Unimplemented dataConfig type. Returning first data source.',
this.dataConfig,
);
receivedTiles(returnedTiles[0]);
return returnedTiles[0];
});
}
/**
* Return an array consisting of the division of the numerator
* array by the denominator array
*
* @param {ArrayLike<number>} numeratorData - An array of numerical values
* @param {ArrayLike<number>} denominatorData - An array of numerical values
*
* @returns {Float32Array} An array consisting of the division of the numerator by the denominator
*/
divideData(numeratorData, denominatorData) {
const result = new Float32Array(numeratorData.length);
for (let i = 0; i < result.length; i++) {
if (denominatorData[i] === 0.0) result[i] = Number.NaN;
else result[i] = numeratorData[i] / denominatorData[i];
}
return result;
}
/*
* Take a horizontal slice across the returned tiles at the
* given position.
*
* @param {list} returnedTiles: The tiles returned from a fetch request
* @param {Number} sliceYPos: The y position across which to slice
*/
horizontalSlice(/* returnedTiles, sliceYPos */) {
return null;
}
/**
* Extract a slice from a matrix at a given position.
*
* @param {Array<number> | Float32Array} inputData - An array containing a matrix stored row-wise
* @param {Array<number>} arrayShape - The shape of the array, should be a
* two element array e.g. [256,256].
* @param {number} sliceIndex - The index across which to take the slice
* @param {number=} axis - The axis along which to take the slice
* @returns {Array<number> | Float32Array} an array corresponding to a slice of this matrix
*/
extractDataSlice(inputData, arrayShape, sliceIndex, axis) {
if (!axis) {
return inputData.slice(
arrayShape[1] * sliceIndex,
arrayShape[1] * (sliceIndex + 1),
);
}
const returnArray = new Array(arrayShape[1]);
for (let i = sliceIndex; i < inputData.length; i += arrayShape[0]) {
returnArray[Math.floor(i / arrayShape[0])] = inputData[i];
}
return returnArray;
}
/**
* Fetch a horizontal section of a 2D dataset
* @param {(tiles: Record<string, Tile>) => void} receivedTiles - A function to call once the tiles have been fetched
* @param {string[]} tileIds - The tile ids to fetch
* @param {boolean=} vertical - Whether to fetch a vertical section
* @returns {Promise<Record<string, Tile>>}
*/
fetchHorizontalSection(receivedTiles, tileIds, vertical = false) {
// We want to take a horizontal section of a 2D dataset
// that means that a 1D track is requesting data from a 2D source
// because the 1D track only requests 1D tiles, we need to calculate
// the 2D tile from which to take the slice
/** @type {string[]} */
const newTileIds = [];
/** @type {boolean[]} */
const mirrored = [];
const { slicePos, tilesetInfo, server } = this.dataConfig;
assert(slicePos, 'No slice position in dataConfig.');
assert(server, 'No server in dataConfig.');
assert(tilesetInfo, 'No tilesetInfo in dataConfig.');
for (const tileId of tileIds) {
const parts = tileId.split('.');
const zoomLevel = +parts[0];
const xTilePos = +parts[1];
// this is a dummy scale that we'll use to fetch tile positions
// along the y-axis of the 2D dataset (we already have the x positions
// from the track that is querying this data)
const scale = scaleLinear().domain([slicePos, slicePos]);
// there's two different ways of calculating tile positions
// this needs to be consolidated into one function eventually
let yTiles = [];
if (isResolutionsTilesetInfo(tilesetInfo)) {
const sortedResolutions = tilesetInfo.resolutions
.map((x) => +x)
.sort((a, b) => b - a);
yTiles = tileProxy.calculateTilesFromResolution(
sortedResolutions[zoomLevel],
scale,
tilesetInfo.min_pos[vertical ? 1 : 0],
tilesetInfo.max_pos[vertical ? 1 : 0],
);
} else {
yTiles = tileProxy.calculateTiles(
zoomLevel,
scale,
tilesetInfo.min_pos[vertical ? 1 : 0],
tilesetInfo.max_pos[vertical ? 1 : 0],
tilesetInfo.max_zoom,
tilesetInfo.max_width,
);
}
const sortedPosition = [xTilePos, yTiles[0]].sort((a, b) => a - b);
// make note of whether we reversed the x and y tile positions
if (sortedPosition[0] === xTilePos) {
mirrored.push(false);
} else {
mirrored.push(true);
}
const newTileId = `${zoomLevel}.${sortedPosition[0]}.${sortedPosition[1]}`;
newTileIds.push(newTileId);
// we may need to add something about the data transform
}
// actually fetch the new tileIds
const promise = this._tileSource.fetchTiles({
id: slugid.nice(),
server: server,
tileIds: newTileIds.map((x) => `${this.dataConfig.tilesetUid}.${x}`),
});
return promise.then((returnedTiles) => {
// we've received some new tiles, but they're 2D
// we need to extract the row corresponding to the data we need
const tilesetUid = dictValues(returnedTiles)[0].tilesetUid;
// console.log('tilesetUid:', tilesetUid);
/** @type {Record<string, Tile>} */
const newTiles = {};
for (let i = 0; i < newTileIds.length; i++) {
const parts = newTileIds[i].split('.');
const zoomLevel = +parts[0];
const xTilePos = +parts[1];
const yTilePos = +parts[2];
const sliceIndex = tileProxy.calculateTileAndPosInTile(
tilesetInfo,
// @ts-expect-error - This is undefined for legacy tilesets, but
// `calculateTileAndPosInTile` ignores this argument with `resolutions`.
// We should probably refactor `calculateTileAndPosInTile` to just take
// the `tilesetInfo` object.
tilesetInfo.max_width,
tilesetInfo.min_pos[1],
zoomLevel,
+slicePos,
)[1];
const fullTileId = this.fullTileId(tilesetUid, newTileIds[i]);
const tile = returnedTiles[fullTileId];
let dataSlice = null;
if (xTilePos === yTilePos) {
// this is tile along the diagonal that we have to mirror
dataSlice = this.extractDataSlice(tile.dense, [256, 256], sliceIndex);
const mirroredDataSlice = this.extractDataSlice(
tile.dense,
[256, 256],
sliceIndex,
1,
);
for (let j = 0; j < dataSlice.length; j++) {
dataSlice[j] += mirroredDataSlice[j];
}
} else if (mirrored[i]) {
// this tile is in the upper right triangle but the data is only available for
// the lower left so we have to mirror it
dataSlice = this.extractDataSlice(
tile.dense,
[256, 256],
sliceIndex,
1,
);
} else {
dataSlice = this.extractDataSlice(tile.dense, [256, 256], sliceIndex);
}
const newTile = {
// @ts-expect-error - this is ok because float32 array can be spread as number[]
min_value: Math.min.apply(null, dataSlice),
// @ts-expect-error - this is ok because float32 array can be spread as number[]
max_value: Math.max.apply(null, dataSlice),
denseDataExtrema: new DenseDataExtrema1D(dataSlice),
minNonZero: minNonZero(dataSlice),
maxNonZero: maxNonZero(dataSlice),
dense: dataSlice,
dtype: tile.dtype,
server: tile.server,
tilePos: mirrored[i] ? [yTilePos] : [xTilePos],
tilePositionId: tileIds[i],
tilesetUid,
zoomLevel: tile.zoomLevel,
};
newTiles[tileIds[i]] = newTile;
}
receivedTiles(newTiles);
return newTiles;
});
}
/**
* @typedef {{ zoomLevel: number, tilePos: number[], dense?: ArrayLike<number> }} Dividable
* @param {[Record<string, Dividable>, Record<string, Dividable>]} returnedTiles
* @param {string[]} tileIds
* @returns {Record<string, DividedTile>}
*/
makeDivided(returnedTiles, tileIds) {
if (returnedTiles.length < 2) {
console.warn(
'Only one tileset specified for a divided datafetcher:',
this.dataConfig,
);
}
/** @type {Record<string, DividedTile>} */
const newTiles = {};
for (let i = 0; i < tileIds.length; i++) {
// const numeratorUid = this.fullTileId(numeratorTilesetUid, tileIds[i]);
// const denominatorUid = this.fullTileId(denominatorTilesetUid, tileIds[i]);
const zoomLevel = returnedTiles[0][tileIds[i]].zoomLevel;
const tilePos = returnedTiles[0][tileIds[i]].tilePos;
/** @type {DividedTile} */
let newTile = {
zoomLevel,
tilePos,
tilePositionId: tileIds[i],
};
const denseA = returnedTiles[0][tileIds[i]].dense;
const denseB = returnedTiles[1][tileIds[i]].dense;
if (denseA && denseB) {
const newData = this.divideData(denseA, denseB);
const dde =
tilePos.length === 2
? new DenseDataExtrema2D(newData)
: new DenseDataExtrema1D(newData);
newTile = {
dense: newData,
denseDataExtrema: dde,
minNonZero: minNonZero(newData),
maxNonZero: maxNonZero(newData),
zoomLevel,
tilePos,
tilePositionId: tileIds[i],
};
}
// returned ids will be indexed by the tile id and won't include the
// tileset uid
newTiles[tileIds[i]] = newTile;
}
return newTiles;
}
}