UNPKG

ucsc-xena-client

Version:

UCSC Xena Client. Functional genomics visualizations.

290 lines (251 loc) 8.82 kB
/*eslint-disable camelcase */ 'use strict'; var jStat = require('jStat').jStat, _ = require('./underscore_ext'), linearAlgebra = require('linear-algebra')(), Matrix = linearAlgebra.Matrix; var reduce = _.reduce, map = _.map, groupBy = _.groupBy, sortBy = _.sortBy, last = _.last, uniq = _.uniq, pluck = _.pluck, filter = _.filter; function pluckTte(x) { return pluck(x, 'tte'); } // kaplan-meier // See http://en.wikipedia.org/wiki/Kaplan%E2%80%93Meier_estimator // // tte time to exit (event or censor) // ev is truthy if there is an event. function compute(tte, ev) { var exits = sortBy(map(tte, function (x, i) { return { tte: x, ev: ev[i] }; }), 'tte'), // sort and collate uexits = uniq(pluckTte(exits), true), // unique tte gexits = groupBy(exits, function (x) { return x.tte; }), // group by common time of exit dini = reduce(uexits, function (a, tte) { // compute d_i, n_i for times t_i (including censor times) var group = gexits[tte], l = last(a) || { n: exits.length, e: 0 }, events = filter(group, function (x) { return x.ev; }); a.push({ n: l.n - l.e, // at risk e: group.length, // number exiting d: events.length, // number events (death) t: group[0].tte // time }); return a; }, []), // s : the survival probability from t=0 to the particular time (i.e. the end of the time interval) // rate : the chance of an event happened within the time interval (as in t and the previous t with an event) si = reduce(dini, function (a, dn) { // survival at each t_i (including censor times) var l = last(a) || { s: 1 }; if (dn.d) { // there were events at this t_i a.push({ t: dn.t, e: true, s: l.s * (1 - dn.d / dn.n), n: dn.n, d: dn.d, rate: dn.d / dn.n }); } else { // only censors a.push({ t: dn.t, e: false, s: l.s, n: dn.n, d: dn.d, rate: null }); } return a; }, []); return si; } //log-rank test of the difference between KM plots // http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3059453/ // a good article to understand KM and comparing KM plots using log-rank test, // they used the pearson chisquared test to compute test statistics // sum of (O-E)^2/E // http://oto.sagepub.com/content/143/3/331.long // a good article to understand KM and comparing KM plots using log-rank test and hazardous ratio test // they also used the pearson chisquared test to compute test statistics // http://www.ncbi.nlm.nih.gov/pmc/articles/PMC403858/ // introduce pearson chi-square to compute logrank statistics, however mentioned there is another way // https://stat.ethz.ch/education/semesters/ss2011/seminar/contents/presentation_2.pdf // introduce the other way // http://ssp.unl.edu/Log%20Rank%20Test%20For%20More%20Than%202%20Groups.pdf // gives basic idea of the "other" way // (O-E)^2/V V is variance for two groups and covariance for multiple groups // https://cran.r-project.org/web/packages/survival/survival.pdf // R use (O-E)^2/V V is variance for two groups and covariance for multiple groups //https://github.com/CamDavidsonPilon/lifelines/blob/master/lifelines/statistics.py //python implementation, identical results to R // covariance calculation // https://books.google.com/books?id=nPkjIEVY-CsC&pg=PA451&lpg=PA451&dq=multivariate+hypergeometric+distribution+covariance&source=bl&ots=yoieGfA4bu&sig=dhRcSYKcYiqLXBPZWOaqzciViMs&hl=en&sa=X&ved=0CEQQ6AEwBmoVChMIkqbU09SuyAIVgimICh0J3w1x#v=onepage&q=multivariate%20hypergeometric%20distribution%20covariance&f=false //https://plot.ly/ipython-notebooks/survival-analysis-r-vs-python/#Using-R // R online tutorial // chisquare distribution at // https://github.com/jstat/jstat/blob/master/src/distribution.js // testing jStat accuracy: http://www.socscistatistics.com/pvalues/chidistribution.aspx // p value = 1- jStat.chisquare.cdf(x, dof ); -- x is chisquare statistics, dof is degree of freedom // for comparing two plots, the dof is n-1 = 1, comparing three plots dof = n-1 = 2 // given a theoretical survival curve (si), and tte + ev ( tte and ev is the data ), // compute the expected total number of events // report observed n events, expected n events. pearson's chi-square component (O-E)^2/E function expectedObservedEventNumber(si, tte, ev) { var exits = sortBy(map(tte, function (x, i) { return { tte: x, ev: ev[i] }; }), 'tte'), // sort and collate uexits = _.uniq(_.pluck(exits, 'tte'), true), // unique tte gexits = groupBy(exits, function (x) { return x.tte; }), // group by common time of exit data = reduce(uexits, function (a, tte) { // sorted by time stats from the input data as in tte,ev var group = gexits[tte], l = last(a) || { n: exits.length, e: 0 }, events = filter(group, function (x) { return x.ev; }); a.push({ n: l.n - l.e, // at risk e: group.length, // number exiting d: events.length, // number events (death) t: group[0].tte // time }); return a; }, []), expectedNumber, observedNumber, dataByTimeTable = []; si = si.filter(function (item) { //only keep the curve where there is an event if (item.e) { return true; } else { return false; } }); expectedNumber = reduce(si, function (memo, item) { var pointerInData = _.find(data, function (x) { if (x.t === item.t) { return true; } if (x.t > item.t) { return true; } return false; }); if (pointerInData) { var expected = pointerInData.n * item.rate; dataByTimeTable.push(pointerInData); return memo + expected; } else { return memo; } }, 0); observedNumber = filter(ev, function (x) { return x === 1; }).length; //1 is the internal xena converted code for EVENT return { expected: expectedNumber, observed: observedNumber, dataByTimeTable: dataByTimeTable, timeNumber: dataByTimeTable.length }; } function logranktest(allGroupsRes, groupsTte, groupsEv) { var KM_stats, pValue, dof, // degree of freedom i, j, //groups t, //timeIndex O_E_table = [], O_minus_E_vector = [], O_minus_E_vector_minus1, // O-E and O-E drop the last element vv = [], vv_minus1, //covariant matrix and covraiance matrix drops the last row and column N, //total number of samples Ki, Kj, // at risk number from each group n; //total observed _.each(groupsTte, function (groupTte, i) { var group = { tte: groupTte, ev: groupsEv[i] }, r = expectedObservedEventNumber(allGroupsRes, group.tte, group.ev); //console.log(group.name, group.tte.length, r.observed, r.expected, // (r.observed-r.expected)*(r.observed-r.expected)/r.expected, r.timeNumber); if (r.expected) { O_E_table.push(r); O_minus_E_vector.push(r.observed - r.expected); } }); dof = O_E_table.length - 1; // logrank stats covariance matrix vv for (i = 0; i < O_E_table.length; i++) { vv.push([]); for (j = 0; j < O_E_table.length; j++) { vv[i].push(0); } } for (i = 0; i < O_E_table.length; i++) { for (j = i; j < O_E_table.length; j++) { for (t = 0; t < allGroupsRes.length; t++) { N = allGroupsRes[t].n; n = allGroupsRes[t].d; if (t < O_E_table[i].timeNumber && t < O_E_table[j].timeNumber) { Ki = O_E_table[i].dataByTimeTable[t].n; Kj = O_E_table[j].dataByTimeTable[t].n; // https://books.google.com/books?id=nPkjIEVY-CsC&pg=PA451&lpg=PA451&dq=multivariate+hypergeometric+distribution+covariance&source=bl&ots=yoieGfA4bu&sig=dhRcSYKcYiqLXBPZWOaqzciViMs&hl=en&sa=X&ved=0CEQQ6AEwBmoVChMIkqbU09SuyAIVgimICh0J3w1x#v=onepage&q=multivariate%20hypergeometric%20distribution%20covariance&f=false // when N==1: only 1 subject, no variance if (i !== j && N !== 1) { vv[i][j] -= n * Ki * Kj * (N - n) / (N * N * (N - 1)); vv[j][i] = vv[i][j]; } else { //i==j if (N !== 1) { vv[i][i] += n * Ki * (N - Ki) * (N - n) / (N * N * (N - 1)); } } } } } } O_minus_E_vector_minus1 = O_minus_E_vector.slice(0, O_minus_E_vector.length - 1); vv_minus1 = vv.slice(0, vv.length - 1); for (i = 0; i < vv_minus1.length; i++) { vv_minus1[i] = vv_minus1[i].slice(0, vv_minus1[i].length - 1); } var vv_minus1_copy = vv_minus1.slice(0, vv_minus1.length); for (i = 0; i < vv_minus1.length; i++) { vv_minus1_copy[i] = vv_minus1[i].slice(0, vv_minus1[i].length); } if (dof > 0) { var m = new Matrix([O_minus_E_vector_minus1]), m_T = new Matrix([O_minus_E_vector_minus1]).trans(), vv_minus1_inv = new Matrix(jStat.inv(vv_minus1_copy)), mfinal = m.dot(vv_minus1_inv).dot(m_T); KM_stats = mfinal.data[0][0]; pValue = 1 - jStat.chisquare.cdf(KM_stats, dof); } return { dof: dof, KM_stats: KM_stats, pValue: pValue }; } module.exports = { compute: compute, logranktest: logranktest };