ucsc-xena-client
Version:
UCSC Xena Client. Functional genomics visualizations.
539 lines (461 loc) • 18.7 kB
JavaScript
/*eslint camelcase: 0, no-multi-spaces: 0, no-mixed-spaces-and-tabs: 0 */
;
var _extends = Object.assign || function (target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i]; for (var key in source) { if (Object.prototype.hasOwnProperty.call(source, key)) { target[key] = source[key]; } } } return target; };
var _slicedToArray = function () { function sliceIterator(arr, i) { var _arr = []; var _n = true; var _d = false; var _e = undefined; try { for (var _i = arr[Symbol.iterator](), _s; !(_n = (_s = _i.next()).done); _n = true) { _arr.push(_s.value); if (i && _arr.length === i) break; } } catch (err) { _d = true; _e = err; } finally { try { if (!_n && _i["return"]) _i["return"](); } finally { if (_d) throw _e; } } return _arr; } return function (arr, i) { if (Array.isArray(arr)) { return arr; } else if (Symbol.iterator in Object(arr)) { return sliceIterator(arr, i); } else { throw new TypeError("Invalid attempt to destructure non-iterable instance"); } }; }();
function _toConsumableArray(arr) { if (Array.isArray(arr)) { for (var i = 0, arr2 = Array(arr.length); i < arr.length; i++) { arr2[i] = arr[i]; } return arr2; } else { return Array.from(arr); } }
var Rx = require('./rx');
var _ = require('./underscore_ext');
var _require = require('./permuteCase'),
permuteCase = _require.permuteCase,
permuteBitCount = _require.permuteBitCount,
prefixBitLimit = _require.prefixBitLimit;
// Load all query files as a map of strings.
var qs = require('./loadXenaQueries');
var maxPermute = 7; // max number of chars to permute for case-insensitive match
///////////////////////////////////////////////////////
// support for hg18/GRCh36, hg19/GRCh37, hg38/GRCh38, mm10
// Xena refGene is the composite gene model we build, NOT literally "refGene annotation"
var refGene = {
hg18: { host: 'https://reference.xenahubs.net', name: 'refgene_good_hg18' },
GRCh36: { host: 'https://reference.xenahubs.net', name: 'refgene_good_hg18' },
hg19: { host: 'https://reference.xenahubs.net', name: 'gencode_good_hg19_V24lift37' },
GRCh37: { host: 'https://reference.xenahubs.net', name: 'gencode_good_hg19_V24lift37' },
hg38: { host: 'https://reference.xenahubs.net', name: 'gencode_good_hg38' },
GRCh38: { host: 'https://reference.xenahubs.net', name: 'gencode_good_hg38' },
mm9: { host: 'https://reference.xenahubs.net', name: 'refgene_good_mm9' },
mm10: { host: 'https://reference.xenahubs.net', name: 'gencode_good_mm10' }
};
// support for hg18/GRCh36, hg19/GRCh37, hg38/GRCh38
var transcript = {
hg18: { host: 'https://reference.xenahubs.net', name: 'refGene_hg18' },
GRCh36: { host: 'https://reference.xenahubs.net', name: 'refGene_hg18' },
hg19: { host: 'https://reference.xenahubs.net', name: 'wgEncodeGencodeBasicV24lift37' },
GRCh37: { host: 'https://reference.xenahubs.net', name: 'wgEncodeGencodeBasicV24lift37' },
hg38: { host: 'https://reference.xenahubs.net', name: 'wgEncodeGencodeBasicV24' },
GRCh38: { host: 'https://reference.xenahubs.net', name: 'wgEncodeGencodeBasicV24' }
};
///////////////////////////////////////////////////////
// Serialization helpers
var jsonResp = function jsonResp(xhr) {
return JSON.parse(xhr.response);
};
var quote = function quote(s) {
return s == null ? 'nil' : '"' + s + '"';
}; // XXX should escape "
var toString = function toString(x) {
return x.toString();
};
var sep = function sep(l) {
return typeof _.get(l, 0) === 'number' ? _.map(l, toString).join(' ') : _.map(l, quote).join(' ');
};
var arrayfmt = function arrayfmt(l) {
return '[' + sep(l) + ']';
};
var nanstr = function nanstr(v) {
return isNaN(v) ? null : v;
};
///////////////////////////////////////////////////////
function parseDsID(dsID) {
var _JSON$parse = JSON.parse(dsID),
host = _JSON$parse.host,
name = _JSON$parse.name;
return [host, name];
}
var isDsID = function isDsID(x) {
return x[0] === '{';
};
// Transform a function taking initial parameters (host, dataset, ...) to
// optionally take (dsID, ...) instead. Uses a heuristic check for dsID,
// and marshalls the parameters as necessary.
var dsIDFn = function dsIDFn(fn) {
return function (hostOrDsID) {
for (var _len = arguments.length, args = Array(_len > 1 ? _len - 1 : 0), _key = 1; _key < _len; _key++) {
args[_key - 1] = arguments[_key];
}
return isDsID(hostOrDsID) ? fn.apply(undefined, _toConsumableArray(parseDsID(hostOrDsID)).concat(args)) : fn.apply(undefined, [hostOrDsID].concat(args));
};
};
///////////////////////////////////////////////////////
// Transforms of responses from the xena server.
function indexFeatures(features) {
return _.object(_.map(features, function (f) {
return [f.name, f.longtitle || f.name];
}));
}
function indexCodes(codes) {
return _.object(_.map(codes, function (row) {
return [row.name, row.code && row.code.split('\t')];
}));
}
function datasetListTransform(host, list) {
return _.map(list, function (ds) {
var text = JSON.parse(ds.text) || {},
pmtext = ds.pmtext ? JSON.parse(ds.pmtext) : null;
// merge curated fields over raw metadata
// XXX note that we're case sensitive on raw metadata
ds = _.extend(text, _.dissoc(ds, 'text'));
return _.extend(ds, {
dsID: JSON.stringify({ host: host, name: ds.name }),
label: ds.label || ds.name,
probemapMeta: pmtext
});
});
}
// XXX Can't rewrite :samples in sparseData until we can do 'distinct', or 'keys' for category fields, on the server.
// XXX "position", "position (2)" is really horrible, in refGeneExons. Need
// better naming for position fields. Might want to allow renaming fields,
// with [:old-name :new-name] Also, cds doesn't really need to be indexed.
// XXX Should we write a compact collection type, where columns are in typed
// arrays? Maybe with codes? Or run-length encoding?
function indexFeatureDetail(features) {
return _.reduce(features, function (acc, row) {
acc[row.name] = row;
return acc;
}, {});
}
function mutationAttrs(list) {
return _.map(list, function (row) {
return {
"sample": row.sampleID,
"chr": row.position.chrom,
"start": row.position.chromstart,
"end": row.position.chromend,
"gene": _.getIn(row, ['genes', 0]),
"reference": row.ref,
"alt": row.alt,
"altGene": row.altGene,
"effect": row.effect,
"aminoAcid": row['amino-acid'],
"rnaVaf": nanstr(row['rna-vaf']),
"dnaVaf": nanstr(row['dna-vaf'])
};
});
}
// {field: [value, ...], ...} -> [{field: value, ...}, ...]
function collateRows(rows) {
var keys = _.keys(rows);
return _.times(rows[keys[0]].length, function (i) {
return _.object(keys, _.map(keys, function (k) {
return rows[k][i];
}));
});
}
// {:sampleid ["id0", "id1", ...], chromstart: [123, 345...], ...}
function indexMutations(resp) {
// XXX The query for samples is returning every row in the dataset,
// rather than distinct sampleIDs from the dataset. We need a
// 'distinct' function for xena-query.
var rows = mutationAttrs(collateRows(resp.rows));
return {
rows: rows,
samplesInResp: _.uniq(resp.samples) // XXX rename this after deprecating samples
};
}
var segmentedAttrs = function segmentedAttrs(list) {
return _.map(list, function (row) {
return {
"sample": row.sampleID,
"start": row.position.chromstart,
"end": row.position.chromend,
"value": nanstr(row.value)
};
});
};
function indexSegmented(resp) {
// XXX The query for samples is returning every row in the dataset,
// rather than distinct sampleIDs from the dataset. We need a
// 'distinct' function for xena-query.
var rows = segmentedAttrs(collateRows(resp.rows));
return {
rows: rows,
samplesInResp: _.uniq(resp.samples) // XXX rename this after deprecating samples
};
}
function alignMatches(input, matches) {
var index = _.object(_.map(matches, function (g) {
return g.toLowerCase();
}), matches);
return _.map(input, function (g) {
return index[g.toLowerCase()];
});
}
function splitExon(s) {
return _.map(s.replace(/,$/, '').split(','), _.partial(parseInt, _, 10));
}
function refGeneAttrs(row) {
return {
name2: row.name2[0],
strand: row.position.strand,
txStart: row.position.chromstart,
txEnd: row.position.chromend,
chrom: row.position.chrom,
cdsStart: row['position (2)'].chromstart, // XXX ouch: position (2)
cdsEnd: row['position (2)'].chromend,
exonCount: row.exonCount,
exonStarts: splitExon(row.exonStarts),
exonEnds: splitExon(row.exonEnds)
};
}
function indexRefGene(resp) {
return _.object(resp.name2, _.map(collateRows(resp), refGeneAttrs));
}
function transcriptAttrs(row) {
return {
name: row.name,
strand: row.position.strand,
txStart: row.position.chromstart,
txEnd: row.position.chromend,
chrom: row.position.chrom,
cdsStart: row['position (2)'].chromstart, // XXX ouch: position (2)
cdsEnd: row['position (2)'].chromend,
exonCount: row.exonCount,
exonStarts: splitExon(row.exonStarts),
exonEnds: splitExon(row.exonEnds)
};
}
function indexTranscripts(resp) {
return collateRows(resp).map(transcriptAttrs);
}
// Generate sql patterns for case-insensitive match of a prefix, by
// permutting the characters having case, up to the character limit 'maxPermute'.
// The results have to be filtered, since they may contain spurious matches.
var prefixPatterns = function prefixPatterns(prefix) {
return permuteCase(prefixBitLimit(maxPermute, prefix)).map(function (g) {
return g + '%';
});
};
var filterByPrefix = function filterByPrefix(prefix) {
return function (list) {
var lcPrefix = prefix.toLowerCase();
return list.filter(function (m) {
return m.toLowerCase().indexOf(lcPrefix) === 0;
});
};
};
////////////////////////////////////////////////////
// Query marshalling and dispatch
function xenaPost(host, query) {
return {
crossDomain: true,
headers: { 'Content-Type': 'text/plain' },
url: host + '/data/',
body: query,
// rxjs 5 defaults to 'json', which will cause the browser to parse
// the response before it gets to us. That would be fine, except it's
// not well supported cross-browser. In particular, it fails in
// phantom 1.9 and IE. If removing this, also remove the JSON.parse
// from jsonResp.
responseType: 'text',
method: 'POST'
};
}
function marshallParam(p) {
if (_.isString(p)) {
return quote(p);
}
if (_.isArray(p)) {
// XXX Note this only works with string arrays.
return arrayfmt(p);
}
return p == null ? 'nil' : p;
}
// marshall parameters and build the lisp call form
function xenaCall(queryFn) {
for (var _len2 = arguments.length, params = Array(_len2 > 1 ? _len2 - 1 : 0), _key2 = 1; _key2 < _len2; _key2++) {
params[_key2 - 1] = arguments[_key2];
}
return '(' + queryFn + ' ' + params.map(marshallParam).join(' ') + ')';
}
// Given a host, query, and parameters, marshall the parameters and dispatch a
// POST, returning an observable.
function doPost(query, host) {
for (var _len3 = arguments.length, params = Array(_len3 > 2 ? _len3 - 2 : 0), _key3 = 2; _key3 < _len3; _key3++) {
params[_key3 - 2] = arguments[_key3];
}
return Rx.Observable.ajax(xenaPost(host, xenaCall.apply(undefined, [query].concat(params)))).map(jsonResp);
}
// Create POST methods for all of the xena queries.
var queryPosts = _.mapObject(qs, function (query) {
return function () {
for (var _len4 = arguments.length, args = Array(_len4), _key4 = 0; _key4 < _len4; _key4++) {
args[_key4] = arguments[_key4];
}
return doPost.apply(undefined, [query].concat(args));
};
});
////////////////////////////////////////////////////
// Extend POST methods
function transformPOSTMethods(postMethods) {
// We frequently want to index or normalize the returned data. For the
// common case where we only need to map a transform over the response,
// this function will apply the transform mapFn to the POST method.
var mapResponse = function mapResponse(mapFn) {
return function (postFn) {
return function () {
return postFn.apply(undefined, arguments).map(mapFn);
};
};
};
// Transforms that we apply to the POST methods, to make them easier to use.
var mapFns = {
allFieldMetadata: mapResponse(indexFeatureDetail),
featureList: mapResponse(indexFeatures),
fieldCodes: mapResponse(indexCodes),
fieldMetadata: mapResponse(indexFeatureDetail),
geneTranscripts: mapResponse(indexTranscripts),
refGeneExons: mapResponse(indexRefGene),
refGeneRange: mapResponse(indexRefGene),
segmentedDataRange: mapResponse(indexSegmented),
// Apply a transform that requires the 'host' parameter
datasetList: function datasetList(postFn) {
return function (host, cohort) {
return postFn(host, cohort).map(function (resp) {
return datasetListTransform(host, resp);
});
};
},
// Apply a transform that requires the 'host' parameter
datasetMetadata: function datasetMetadata(postFn) {
return function (host, dataset) {
return postFn(host, dataset).map(function (resp) {
return datasetListTransform(host, resp);
});
};
},
// Apply a transform that requires the 'host' parameter
probemapList: function probemapList(postFn) {
return function (host) {
return postFn(host).map(function (resp) {
return datasetListTransform(host, resp);
});
};
},
sparseData: mapResponse(indexMutations),
sparseDataRange: mapResponse(indexMutations),
// Generate case permutations of the gene parameter
sparseDataMatchField: function sparseDataMatchField(postFn) {
return function (host, field, dataset, genes) {
return postFn(host, field, dataset, _.flatmap(genes, permuteCase)).map(function (list) {
return alignMatches(genes, list);
});
};
},
// Generate case permutations of the gene parameter
sparseDataMatchPartialField: function sparseDataMatchPartialField(postFn) {
return function (host, field, dataset, prefix, limit) {
return postFn(host, field, dataset, prefixPatterns(prefix), limit).map(filterByPrefix(prefix));
};
},
// Convert the gene parameter to lower-case, for matching
sparseDataMatchFieldSlow: function sparseDataMatchFieldSlow(postFn) {
return function (host, field, dataset, genes) {
return postFn(host, field, dataset, genes.map(function (g) {
return g.toLowerCase();
})).map(function (list) {
return alignMatches(genes, list);
});
};
},
// Convert fields to lower-case, for matching, and apply a transform that
// requires the 'fields' parameter.
matchFields: function matchFields(postFn) {
return function (host, dataset, fields) {
return postFn(host, dataset, _.map(fields, function (f) {
return f.toLowerCase();
})).map(function (list) {
return alignMatches(fields, list);
});
};
}
};
var mapPostFn = function mapPostFn(transform, name) {
return transform(postMethods[name]);
},
mapFnPosts = _.mapObject(mapFns, mapPostFn);
return _.merge(postMethods, mapFnPosts);
}
//queryPosts = transformPOSTMethods(queryPosts);
////////////////////////////////////////////////////
// Wrap POST methods so they will take either a dsID, or
// (host, name) as the first parameters.
function wrapDsIDParams(postMethods) {
var dsIDFns = ['allFieldMetadata', 'datasetSamples', 'datasetFieldExamples', 'datasetField', 'datasetProbeValues', 'datasetProbeSignature', 'datasetGeneProbesValues', 'datasetChromProbeValues', 'datasetGeneProbeAvg', 'datasetMetadata', 'featureList', 'fieldCodes', 'maxRange', 'refGeneExons', 'refGenePosition', 'refGeneRange', 'matchFields', 'segmentedDataRange', 'segmentedDataExamples', 'sparseData', 'sparseDataRange', 'sparseDataExamples'],
dsIDFnPosts = _.mapObject(_.pick(postMethods, dsIDFns), dsIDFn);
return _.merge(postMethods, dsIDFnPosts);
}
////////////////////////////////////////////////////
// Apply transforms.
queryPosts = wrapDsIDParams(transformPOSTMethods(queryPosts));
////////////////////////////////////////////////////
// Derived queries
var _queryPosts = queryPosts,
datasetMetadata = _queryPosts.datasetMetadata,
refGenePosition = _queryPosts.refGenePosition,
refGeneExons = _queryPosts.refGeneExons,
refGeneRange = _queryPosts.refGeneRange;
// Override sparseDataMatchField to dispatch to the 'Slow' version
// if necessary.
var sparseDataMatchField = _.curry(function (field, host, dataset, genes) {
return (_.max(_.map(genes, permuteBitCount)) > 7 ? queryPosts.sparseDataMatchFieldSlow : queryPosts.sparseDataMatchField)(host, field, dataset, genes);
});
// Look up gene strand from refGene, using the assembly specified
// in the probemap metadata
var probemapGeneStrand = dsIDFn(function (host, probemap, gene) {
return datasetMetadata(host, probemap).flatMap(function (_ref) {
var _ref2 = _slicedToArray(_ref, 1),
assembly = _ref2[0].assembly;
var _refGene = refGene[assembly || 'hg19'],
host = _refGene.host,
name = _refGene.name;
return refGenePosition(host, name, gene);
}).map(function (_ref3) {
var strand = _ref3.strand;
return strand;
});
});
// case-insensitive gene lookup
var refGeneExonCase = dsIDFn(function (host, dataset, genes) {
return sparseDataMatchField('name2', host, dataset, genes).flatMap(function (caseGenes) {
return refGeneExons(host, dataset, _.filter(caseGenes, _.identity));
});
});
// test if host is up
function testHost(host) {
return Rx.Observable.ajax(xenaPost(host, '(+ 1 2)')).map(function (s) {
return !!(s.response && 3 === JSON.parse(s.response));
}).timeoutWith(5000, Rx.Observable.of(false)).catch(function () {
return Rx.Observable.of(false);
});
}
var cohortMetaURL = "https://raw.githubusercontent.com/ucscXena/cohortMetaData/master/xenacohort_tag.json";
var cohortPreferredURL = "https://raw.githubusercontent.com/ucscXena/cohortMetaData/master/defaultDataset.json";
var cohortPhenotypeURL = "https://raw.githubusercontent.com/ucscXena/cohortMetaData/master/defaultPhenotype.json";
var fetchJSON = function fetchJSON(url) {
return Rx.Observable.ajax({
url: url,
method: 'GET',
responseType: 'json',
crossDomain: true
}).map(function (xhr) {
return xhr.response;
});
};
module.exports = _extends({}, queryPosts, {
// derived query posts
probemapGeneStrand: probemapGeneStrand,
refGeneExonCase: refGeneExonCase,
refGeneRange: refGeneRange,
sparseDataMatchGenes: dsIDFn(sparseDataMatchField('genes')),
// helpers:
parseDsID: parseDsID,
nanstr: nanstr,
xenaPost: xenaPost,
testHost: testHost,
// reference
refGene: refGene,
transcript: transcript,
// cohort meta
fetchCohortMeta: fetchJSON(cohortMetaURL),
fetchCohortPreferred: fetchJSON(cohortPreferredURL),
fetchCohortPhenotype: fetchJSON(cohortPhenotypeURL)
});