UNPKG

ucsc-xena-client

Version:

UCSC Xena Client. Functional genomics visualizations.

539 lines (461 loc) 18.7 kB
/*eslint camelcase: 0, no-multi-spaces: 0, no-mixed-spaces-and-tabs: 0 */ 'use strict'; var _extends = Object.assign || function (target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i]; for (var key in source) { if (Object.prototype.hasOwnProperty.call(source, key)) { target[key] = source[key]; } } } return target; }; var _slicedToArray = function () { function sliceIterator(arr, i) { var _arr = []; var _n = true; var _d = false; var _e = undefined; try { for (var _i = arr[Symbol.iterator](), _s; !(_n = (_s = _i.next()).done); _n = true) { _arr.push(_s.value); if (i && _arr.length === i) break; } } catch (err) { _d = true; _e = err; } finally { try { if (!_n && _i["return"]) _i["return"](); } finally { if (_d) throw _e; } } return _arr; } return function (arr, i) { if (Array.isArray(arr)) { return arr; } else if (Symbol.iterator in Object(arr)) { return sliceIterator(arr, i); } else { throw new TypeError("Invalid attempt to destructure non-iterable instance"); } }; }(); function _toConsumableArray(arr) { if (Array.isArray(arr)) { for (var i = 0, arr2 = Array(arr.length); i < arr.length; i++) { arr2[i] = arr[i]; } return arr2; } else { return Array.from(arr); } } var Rx = require('./rx'); var _ = require('./underscore_ext'); var _require = require('./permuteCase'), permuteCase = _require.permuteCase, permuteBitCount = _require.permuteBitCount, prefixBitLimit = _require.prefixBitLimit; // Load all query files as a map of strings. var qs = require('./loadXenaQueries'); var maxPermute = 7; // max number of chars to permute for case-insensitive match /////////////////////////////////////////////////////// // support for hg18/GRCh36, hg19/GRCh37, hg38/GRCh38, mm10 // Xena refGene is the composite gene model we build, NOT literally "refGene annotation" var refGene = { hg18: { host: 'https://reference.xenahubs.net', name: 'refgene_good_hg18' }, GRCh36: { host: 'https://reference.xenahubs.net', name: 'refgene_good_hg18' }, hg19: { host: 'https://reference.xenahubs.net', name: 'gencode_good_hg19_V24lift37' }, GRCh37: { host: 'https://reference.xenahubs.net', name: 'gencode_good_hg19_V24lift37' }, hg38: { host: 'https://reference.xenahubs.net', name: 'gencode_good_hg38' }, GRCh38: { host: 'https://reference.xenahubs.net', name: 'gencode_good_hg38' }, mm9: { host: 'https://reference.xenahubs.net', name: 'refgene_good_mm9' }, mm10: { host: 'https://reference.xenahubs.net', name: 'gencode_good_mm10' } }; // support for hg18/GRCh36, hg19/GRCh37, hg38/GRCh38 var transcript = { hg18: { host: 'https://reference.xenahubs.net', name: 'refGene_hg18' }, GRCh36: { host: 'https://reference.xenahubs.net', name: 'refGene_hg18' }, hg19: { host: 'https://reference.xenahubs.net', name: 'wgEncodeGencodeBasicV24lift37' }, GRCh37: { host: 'https://reference.xenahubs.net', name: 'wgEncodeGencodeBasicV24lift37' }, hg38: { host: 'https://reference.xenahubs.net', name: 'wgEncodeGencodeBasicV24' }, GRCh38: { host: 'https://reference.xenahubs.net', name: 'wgEncodeGencodeBasicV24' } }; /////////////////////////////////////////////////////// // Serialization helpers var jsonResp = function jsonResp(xhr) { return JSON.parse(xhr.response); }; var quote = function quote(s) { return s == null ? 'nil' : '"' + s + '"'; }; // XXX should escape " var toString = function toString(x) { return x.toString(); }; var sep = function sep(l) { return typeof _.get(l, 0) === 'number' ? _.map(l, toString).join(' ') : _.map(l, quote).join(' '); }; var arrayfmt = function arrayfmt(l) { return '[' + sep(l) + ']'; }; var nanstr = function nanstr(v) { return isNaN(v) ? null : v; }; /////////////////////////////////////////////////////// function parseDsID(dsID) { var _JSON$parse = JSON.parse(dsID), host = _JSON$parse.host, name = _JSON$parse.name; return [host, name]; } var isDsID = function isDsID(x) { return x[0] === '{'; }; // Transform a function taking initial parameters (host, dataset, ...) to // optionally take (dsID, ...) instead. Uses a heuristic check for dsID, // and marshalls the parameters as necessary. var dsIDFn = function dsIDFn(fn) { return function (hostOrDsID) { for (var _len = arguments.length, args = Array(_len > 1 ? _len - 1 : 0), _key = 1; _key < _len; _key++) { args[_key - 1] = arguments[_key]; } return isDsID(hostOrDsID) ? fn.apply(undefined, _toConsumableArray(parseDsID(hostOrDsID)).concat(args)) : fn.apply(undefined, [hostOrDsID].concat(args)); }; }; /////////////////////////////////////////////////////// // Transforms of responses from the xena server. function indexFeatures(features) { return _.object(_.map(features, function (f) { return [f.name, f.longtitle || f.name]; })); } function indexCodes(codes) { return _.object(_.map(codes, function (row) { return [row.name, row.code && row.code.split('\t')]; })); } function datasetListTransform(host, list) { return _.map(list, function (ds) { var text = JSON.parse(ds.text) || {}, pmtext = ds.pmtext ? JSON.parse(ds.pmtext) : null; // merge curated fields over raw metadata // XXX note that we're case sensitive on raw metadata ds = _.extend(text, _.dissoc(ds, 'text')); return _.extend(ds, { dsID: JSON.stringify({ host: host, name: ds.name }), label: ds.label || ds.name, probemapMeta: pmtext }); }); } // XXX Can't rewrite :samples in sparseData until we can do 'distinct', or 'keys' for category fields, on the server. // XXX "position", "position (2)" is really horrible, in refGeneExons. Need // better naming for position fields. Might want to allow renaming fields, // with [:old-name :new-name] Also, cds doesn't really need to be indexed. // XXX Should we write a compact collection type, where columns are in typed // arrays? Maybe with codes? Or run-length encoding? function indexFeatureDetail(features) { return _.reduce(features, function (acc, row) { acc[row.name] = row; return acc; }, {}); } function mutationAttrs(list) { return _.map(list, function (row) { return { "sample": row.sampleID, "chr": row.position.chrom, "start": row.position.chromstart, "end": row.position.chromend, "gene": _.getIn(row, ['genes', 0]), "reference": row.ref, "alt": row.alt, "altGene": row.altGene, "effect": row.effect, "aminoAcid": row['amino-acid'], "rnaVaf": nanstr(row['rna-vaf']), "dnaVaf": nanstr(row['dna-vaf']) }; }); } // {field: [value, ...], ...} -> [{field: value, ...}, ...] function collateRows(rows) { var keys = _.keys(rows); return _.times(rows[keys[0]].length, function (i) { return _.object(keys, _.map(keys, function (k) { return rows[k][i]; })); }); } // {:sampleid ["id0", "id1", ...], chromstart: [123, 345...], ...} function indexMutations(resp) { // XXX The query for samples is returning every row in the dataset, // rather than distinct sampleIDs from the dataset. We need a // 'distinct' function for xena-query. var rows = mutationAttrs(collateRows(resp.rows)); return { rows: rows, samplesInResp: _.uniq(resp.samples) // XXX rename this after deprecating samples }; } var segmentedAttrs = function segmentedAttrs(list) { return _.map(list, function (row) { return { "sample": row.sampleID, "start": row.position.chromstart, "end": row.position.chromend, "value": nanstr(row.value) }; }); }; function indexSegmented(resp) { // XXX The query for samples is returning every row in the dataset, // rather than distinct sampleIDs from the dataset. We need a // 'distinct' function for xena-query. var rows = segmentedAttrs(collateRows(resp.rows)); return { rows: rows, samplesInResp: _.uniq(resp.samples) // XXX rename this after deprecating samples }; } function alignMatches(input, matches) { var index = _.object(_.map(matches, function (g) { return g.toLowerCase(); }), matches); return _.map(input, function (g) { return index[g.toLowerCase()]; }); } function splitExon(s) { return _.map(s.replace(/,$/, '').split(','), _.partial(parseInt, _, 10)); } function refGeneAttrs(row) { return { name2: row.name2[0], strand: row.position.strand, txStart: row.position.chromstart, txEnd: row.position.chromend, chrom: row.position.chrom, cdsStart: row['position (2)'].chromstart, // XXX ouch: position (2) cdsEnd: row['position (2)'].chromend, exonCount: row.exonCount, exonStarts: splitExon(row.exonStarts), exonEnds: splitExon(row.exonEnds) }; } function indexRefGene(resp) { return _.object(resp.name2, _.map(collateRows(resp), refGeneAttrs)); } function transcriptAttrs(row) { return { name: row.name, strand: row.position.strand, txStart: row.position.chromstart, txEnd: row.position.chromend, chrom: row.position.chrom, cdsStart: row['position (2)'].chromstart, // XXX ouch: position (2) cdsEnd: row['position (2)'].chromend, exonCount: row.exonCount, exonStarts: splitExon(row.exonStarts), exonEnds: splitExon(row.exonEnds) }; } function indexTranscripts(resp) { return collateRows(resp).map(transcriptAttrs); } // Generate sql patterns for case-insensitive match of a prefix, by // permutting the characters having case, up to the character limit 'maxPermute'. // The results have to be filtered, since they may contain spurious matches. var prefixPatterns = function prefixPatterns(prefix) { return permuteCase(prefixBitLimit(maxPermute, prefix)).map(function (g) { return g + '%'; }); }; var filterByPrefix = function filterByPrefix(prefix) { return function (list) { var lcPrefix = prefix.toLowerCase(); return list.filter(function (m) { return m.toLowerCase().indexOf(lcPrefix) === 0; }); }; }; //////////////////////////////////////////////////// // Query marshalling and dispatch function xenaPost(host, query) { return { crossDomain: true, headers: { 'Content-Type': 'text/plain' }, url: host + '/data/', body: query, // rxjs 5 defaults to 'json', which will cause the browser to parse // the response before it gets to us. That would be fine, except it's // not well supported cross-browser. In particular, it fails in // phantom 1.9 and IE. If removing this, also remove the JSON.parse // from jsonResp. responseType: 'text', method: 'POST' }; } function marshallParam(p) { if (_.isString(p)) { return quote(p); } if (_.isArray(p)) { // XXX Note this only works with string arrays. return arrayfmt(p); } return p == null ? 'nil' : p; } // marshall parameters and build the lisp call form function xenaCall(queryFn) { for (var _len2 = arguments.length, params = Array(_len2 > 1 ? _len2 - 1 : 0), _key2 = 1; _key2 < _len2; _key2++) { params[_key2 - 1] = arguments[_key2]; } return '(' + queryFn + ' ' + params.map(marshallParam).join(' ') + ')'; } // Given a host, query, and parameters, marshall the parameters and dispatch a // POST, returning an observable. function doPost(query, host) { for (var _len3 = arguments.length, params = Array(_len3 > 2 ? _len3 - 2 : 0), _key3 = 2; _key3 < _len3; _key3++) { params[_key3 - 2] = arguments[_key3]; } return Rx.Observable.ajax(xenaPost(host, xenaCall.apply(undefined, [query].concat(params)))).map(jsonResp); } // Create POST methods for all of the xena queries. var queryPosts = _.mapObject(qs, function (query) { return function () { for (var _len4 = arguments.length, args = Array(_len4), _key4 = 0; _key4 < _len4; _key4++) { args[_key4] = arguments[_key4]; } return doPost.apply(undefined, [query].concat(args)); }; }); //////////////////////////////////////////////////// // Extend POST methods function transformPOSTMethods(postMethods) { // We frequently want to index or normalize the returned data. For the // common case where we only need to map a transform over the response, // this function will apply the transform mapFn to the POST method. var mapResponse = function mapResponse(mapFn) { return function (postFn) { return function () { return postFn.apply(undefined, arguments).map(mapFn); }; }; }; // Transforms that we apply to the POST methods, to make them easier to use. var mapFns = { allFieldMetadata: mapResponse(indexFeatureDetail), featureList: mapResponse(indexFeatures), fieldCodes: mapResponse(indexCodes), fieldMetadata: mapResponse(indexFeatureDetail), geneTranscripts: mapResponse(indexTranscripts), refGeneExons: mapResponse(indexRefGene), refGeneRange: mapResponse(indexRefGene), segmentedDataRange: mapResponse(indexSegmented), // Apply a transform that requires the 'host' parameter datasetList: function datasetList(postFn) { return function (host, cohort) { return postFn(host, cohort).map(function (resp) { return datasetListTransform(host, resp); }); }; }, // Apply a transform that requires the 'host' parameter datasetMetadata: function datasetMetadata(postFn) { return function (host, dataset) { return postFn(host, dataset).map(function (resp) { return datasetListTransform(host, resp); }); }; }, // Apply a transform that requires the 'host' parameter probemapList: function probemapList(postFn) { return function (host) { return postFn(host).map(function (resp) { return datasetListTransform(host, resp); }); }; }, sparseData: mapResponse(indexMutations), sparseDataRange: mapResponse(indexMutations), // Generate case permutations of the gene parameter sparseDataMatchField: function sparseDataMatchField(postFn) { return function (host, field, dataset, genes) { return postFn(host, field, dataset, _.flatmap(genes, permuteCase)).map(function (list) { return alignMatches(genes, list); }); }; }, // Generate case permutations of the gene parameter sparseDataMatchPartialField: function sparseDataMatchPartialField(postFn) { return function (host, field, dataset, prefix, limit) { return postFn(host, field, dataset, prefixPatterns(prefix), limit).map(filterByPrefix(prefix)); }; }, // Convert the gene parameter to lower-case, for matching sparseDataMatchFieldSlow: function sparseDataMatchFieldSlow(postFn) { return function (host, field, dataset, genes) { return postFn(host, field, dataset, genes.map(function (g) { return g.toLowerCase(); })).map(function (list) { return alignMatches(genes, list); }); }; }, // Convert fields to lower-case, for matching, and apply a transform that // requires the 'fields' parameter. matchFields: function matchFields(postFn) { return function (host, dataset, fields) { return postFn(host, dataset, _.map(fields, function (f) { return f.toLowerCase(); })).map(function (list) { return alignMatches(fields, list); }); }; } }; var mapPostFn = function mapPostFn(transform, name) { return transform(postMethods[name]); }, mapFnPosts = _.mapObject(mapFns, mapPostFn); return _.merge(postMethods, mapFnPosts); } //queryPosts = transformPOSTMethods(queryPosts); //////////////////////////////////////////////////// // Wrap POST methods so they will take either a dsID, or // (host, name) as the first parameters. function wrapDsIDParams(postMethods) { var dsIDFns = ['allFieldMetadata', 'datasetSamples', 'datasetFieldExamples', 'datasetField', 'datasetProbeValues', 'datasetProbeSignature', 'datasetGeneProbesValues', 'datasetChromProbeValues', 'datasetGeneProbeAvg', 'datasetMetadata', 'featureList', 'fieldCodes', 'maxRange', 'refGeneExons', 'refGenePosition', 'refGeneRange', 'matchFields', 'segmentedDataRange', 'segmentedDataExamples', 'sparseData', 'sparseDataRange', 'sparseDataExamples'], dsIDFnPosts = _.mapObject(_.pick(postMethods, dsIDFns), dsIDFn); return _.merge(postMethods, dsIDFnPosts); } //////////////////////////////////////////////////// // Apply transforms. queryPosts = wrapDsIDParams(transformPOSTMethods(queryPosts)); //////////////////////////////////////////////////// // Derived queries var _queryPosts = queryPosts, datasetMetadata = _queryPosts.datasetMetadata, refGenePosition = _queryPosts.refGenePosition, refGeneExons = _queryPosts.refGeneExons, refGeneRange = _queryPosts.refGeneRange; // Override sparseDataMatchField to dispatch to the 'Slow' version // if necessary. var sparseDataMatchField = _.curry(function (field, host, dataset, genes) { return (_.max(_.map(genes, permuteBitCount)) > 7 ? queryPosts.sparseDataMatchFieldSlow : queryPosts.sparseDataMatchField)(host, field, dataset, genes); }); // Look up gene strand from refGene, using the assembly specified // in the probemap metadata var probemapGeneStrand = dsIDFn(function (host, probemap, gene) { return datasetMetadata(host, probemap).flatMap(function (_ref) { var _ref2 = _slicedToArray(_ref, 1), assembly = _ref2[0].assembly; var _refGene = refGene[assembly || 'hg19'], host = _refGene.host, name = _refGene.name; return refGenePosition(host, name, gene); }).map(function (_ref3) { var strand = _ref3.strand; return strand; }); }); // case-insensitive gene lookup var refGeneExonCase = dsIDFn(function (host, dataset, genes) { return sparseDataMatchField('name2', host, dataset, genes).flatMap(function (caseGenes) { return refGeneExons(host, dataset, _.filter(caseGenes, _.identity)); }); }); // test if host is up function testHost(host) { return Rx.Observable.ajax(xenaPost(host, '(+ 1 2)')).map(function (s) { return !!(s.response && 3 === JSON.parse(s.response)); }).timeoutWith(5000, Rx.Observable.of(false)).catch(function () { return Rx.Observable.of(false); }); } var cohortMetaURL = "https://raw.githubusercontent.com/ucscXena/cohortMetaData/master/xenacohort_tag.json"; var cohortPreferredURL = "https://raw.githubusercontent.com/ucscXena/cohortMetaData/master/defaultDataset.json"; var cohortPhenotypeURL = "https://raw.githubusercontent.com/ucscXena/cohortMetaData/master/defaultPhenotype.json"; var fetchJSON = function fetchJSON(url) { return Rx.Observable.ajax({ url: url, method: 'GET', responseType: 'json', crossDomain: true }).map(function (xhr) { return xhr.response; }); }; module.exports = _extends({}, queryPosts, { // derived query posts probemapGeneStrand: probemapGeneStrand, refGeneExonCase: refGeneExonCase, refGeneRange: refGeneRange, sparseDataMatchGenes: dsIDFn(sparseDataMatchField('genes')), // helpers: parseDsID: parseDsID, nanstr: nanstr, xenaPost: xenaPost, testHost: testHost, // reference refGene: refGene, transcript: transcript, // cohort meta fetchCohortMeta: fetchJSON(cohortMetaURL), fetchCohortPreferred: fetchJSON(cohortPreferredURL), fetchCohortPhenotype: fetchJSON(cohortPhenotypeURL) });