@plantinformatics/vcf-genotype-brapi
Version:
Client and server functions to access genotype data from VCF via a custom web API and BrAPI
730 lines (671 loc) • 27.9 kB
JavaScript
import { promisify } from 'util';
import intervalTree1d from 'interval-tree-1d';
import * as childProcessProgressive from '@plantinformatics/child-process-progressive/dist/child-process-progressive.mjs';
//------------------------------------------------------------------------------
/** Handle string representations of boolean and Number args in objects passed as remoteMethod params. The boolean args are being received as strings, e.g.
* vcfGenotypeLookup() : preArgs : {... , requestInfo: 'false', requestFormat: 'Numerical', requestSamplesAll: 'true', snpPolymorphismFilter: 'false'}
* Parse these to convert to native boolean values true, false.
*
* Use this also for numeric values - JSON.parse() works for those also, e.g.
* Block.blockFeaturesCounts : userOptions : { ... mafThreshold : '0', ... }
*
* Originally named parseBooleanFields(), (added in pretzel in 011deb32, minor edit in 339ab17b) in lb4app/lb3app/common/utilities/json-text.js.
*/
function parseStringFields(object, fieldNames) {
fieldNames.forEach(fieldName => {
if (typeof object[fieldName] === 'string') {
/** trace confirmed this is used for : snpPolymorphismFilter,
* mafThreshold, featureCallRateThreshold in blockFeaturesCounts. */
// console.log('parseStringFields', fieldName, object[fieldName]);
object[fieldName] = JSON.parse(object[fieldName]);
}
});
}
//------------------------------------------------------------------------------
function _mergeNamespaces(n, m) {
m.forEach(function (e) {
e && typeof e !== 'string' && !Array.isArray(e) && Object.keys(e).forEach(function (k) {
if (k !== 'default' && !(k in n)) {
var d = Object.getOwnPropertyDescriptor(e, k);
Object.defineProperty(n, k, d.get ? d : {
enumerable: true,
get: function () { return e[k]; }
});
}
});
});
return Object.freeze(n);
}
var src = {};
/*----------------------------------------------------------------------------*/
/* global require */
/* global exports */
/* global process */
/*----------------------------------------------------------------------------*/
/** Calculate the bin size for even-sized bins to span the given interval.
* The bin size is rounded to be a multiple of a power of 10, only the first 1-2
* digits are non-zero.
* Used in @see binBoundaries().
* @return lengthRounded
*/
function binEvenLengthRound$1(interval, nBins) {
let lengthRounded;
if (interval && (interval.length === 2) && (nBins > 0)) {
/* if (interval[1] < interval[0])
interval = interval.sort(); */
/** handle -ve interval direction - could occur with only -ve features in block. */
let intervalLength = Math.abs(interval[1] - interval[0]),
binLength = intervalLength / nBins,
digits = Math.floor(Math.log10(binLength)),
eN1 = Math.exp(digits * Math.log(10)),
mantissa = binLength / eN1,
/** choose 1 2 or 5 as the first digit of the bin size. */
m1 = mantissa > 5 ? 5 : (mantissa > 2 ? 2 : 1);
if (digits >= 0) {
lengthRounded = Math.round(m1 * eN1);
} else {
/** for e.g. digits===-1, eN1 is 0.09999999999999998,
* and (m1 * eN1) is 0.4999999999999999 which will round down to 0.
* So instead, use string operation to construct eN1, so .round() is not required.
* This could probably be used for digits >= 0 also.
*
* A simpler form would be Math.round(m1 * eN1 * 100000) / 100000, but
* that is limited to digits > -5, which would be sufficient for the
* datasets used so far, e.g. a genetic map is ~200cM, so digits===-1, and
* for a physical map digits==-6.
*/
eN1 = '0.' + ('000000000000000'.substr(0, 1+digits)) + '1';
lengthRounded = (m1 * eN1);
}
console.log('binEvenLengthRound', interval, nBins, intervalLength, binLength, digits, eN1, mantissa, m1, lengthRounded);
}
return lengthRounded;
}var binEvenLengthRound_1 = src.binEvenLengthRound = binEvenLengthRound$1;
/** Generate an array of even-sized bins to span the given interval.
* Used for mongo aggregation pipeline : $bucket : boundaries.
*/
function binBoundaries$1(interval, lengthRounded) {
let b;
if (lengthRounded) {
let
start = interval[0],
intervalLength = interval[1] - interval[0],
direction = Math.sign(intervalLength),
forward = (direction > 0) ?
function (a,b) {return a < b; }
: function (a,b) {return a > b; };
let location = Math.floor(start / lengthRounded) * lengthRounded;
b = [location];
do {
location += lengthRounded;
b.push(location);
}
while (forward(location, interval[1]));
console.log('binBoundaries', direction, b.length, location, b[0], b[b.length-1]);
}
return b;
}var binBoundaries_1 = src.binBoundaries = binBoundaries$1;
var intervalBins = /*#__PURE__*/_mergeNamespaces({
__proto__: null,
binBoundaries: binBoundaries_1,
binEvenLengthRound: binEvenLengthRound_1,
default: src
}, [src]);
// import { promisifyFn as promisify } from './promisify';
// const util = require('util');
// const util = {promisify}; // require('util');
const createIntervalTree = intervalTree1d;
// import createIntervalTree from "interval-tree-1d";
console.log('createIntervalTree', createIntervalTree, intervalTree1d);
console.log('childProcessProgressive', childProcessProgressive);
const { ErrorStatus } = childProcessProgressive.default.errorStatus; // = require('./errorStatus.js');
/*
function childProcess() { }
function dataOutReplyClosureLimit() { }
function dataReduceClosure() { }
function stringCountString() { }
*/
const /*import*/ {
childProcess, dataOutReplyClosureLimit, dataOutReplyClosure, dataReduceClosure,
stringCountString,
} = childProcessProgressive.default.childProcess;
console.log('interval-bins', intervalBins);
const /* import*/ { binEvenLengthRound, binBoundaries } = intervalBins; // from 'interval-bins';
//------------------------------------------------------------------------------
function callOut(command, datasetId, scope, preArgs, cb) {
childProcess(
'vcfGenotypeLookup.bash',
/* postData */ '',
/* useFile */ false,
/* fileName */ undefined,
/* moreParams */ [command, datasetId, scope, /*isecFlags*/ '', /*isecDatasetIds*/''].concat(preArgs || []),
dataOutReplyClosure(cb), cb, /*progressive*/ false);
}
const callOutP = promisify(callOut);
function vcfGenotypeSamplesFiltered(datasetId, scope, filter) {
const fnName = 'vcfGenotypeSamplesFiltered';
/** result */
let promise;
if (filter) {
parseStringFields(filter, ['matchHet']);
const matchHet = filter.matchHet;
filter.features.forEach(f => parseStringFields(f, ['position', 'matchRef']));
/** The purpose of allowing the caller to nominate the first SNP to filter
* on, by listing it first, is that the 2nd query can be limited to the
* result of the first, i.e. filteredSamples; see comment below. */
const refFirst = filter.features[0].matchRef;
/** array of feature positions. index is matchRef, i.e. [0] is Alt, [1] is Ref */
const groupedFilters = filter.features.reduce((grouped, feature) => {
grouped[feature.matchRef].push(feature.position);
return grouped;
}, {true : [], false : []});
const first = groupedFilters[refFirst];
/** @return regexp to be used by grep. '.' will match | / etc */
function refToGenotype(matchRef, matchHet) {
const
/** map {false,true} -> {1,0} */
value = + ! matchRef,
/** vcfGenotypeLookup.bash uses pattern as gtMatch in : '\t'"$gtMatch"'$' */
pattern = matchHet ?
'.*' + value + '.*' :
value + '.' + value;
return pattern;
}
function groupCall(group, matchRef) {
const
/** GT= is recognised by vcfGenotypeLookup.bash to set gtMatch.
* similar to --include 'GT="1/1"' but that filters SNPs not samples. */
include = 'GT=' + refToGenotype(matchRef, matchHet),
regions = group.map(position => scope + ':' + position).join(','),
preArgs = ['-r'].concat(regions).concat([include]),
p = callOutP('filter_samples', datasetId, scope, preArgs);
console.log(fnName, preArgs.join(' '));
return p;
}
const counts = {};
function countSamples(a) {
a.forEach(s => {
if (counts[s] == undefined) {
counts[s] = 1;
} else {
counts[s]++;
}
});
}
/** Number of SNPs queried so far; filter requires counts to match this. */
let nSNPs = first.length;
function query(group, matchRef) {
const
promise =
groupCall(group, matchRef)
.then(samplesValues => {
// samplesValues.replaceAll(/\t.../g, '');
/* split on (tab genotype newline), then
* trim off the '' created from trailing (... newline).
*/
/** Sample names which matched in the first query. */
const matchedSamples = samplesValues.split(/\t...\n/g);
if (matchedSamples.at(-1) === '') {
matchedSamples.pop();
}
/* count and filter for those with count === #SNPs in first query */
countSamples(matchedSamples);
/** Sample names will appear multiple times, once for each SNP genotype which they match.
* Array.from(new Set( )) preserves order, which is preferable for GUI consistency. */
const uniqSamples = Array.from(new Set(matchedSamples));
/* first.length is the number of SNPs in the first query,
* i.e. groupedFilters[refFirst] */
// next : >= nSNPs - allowMissing
const filteredSamples = uniqSamples.filter(s => counts[s] === nSNPs);
console.log(fnName, filteredSamples.length, nSNPs, uniqSamples.length, matchedSamples.length);
return filteredSamples;
});
return promise;
}
/** Map the array of sample names to the result format of the existing
* genotypeSamples endpoint. */
function samplesToResult(samples) {
return samples.join('\n');
}
promise =
query(first, refFirst)
.then(firstSamples => {
/** if filteredSamples.length < 100 it might be a good optimisation to
* narrow the 2nd query to filteredSamples. See comment re. refFirst. */
const
secondMatch = ! refFirst,
second = groupedFilters[secondMatch];
nSNPs += second.length;
let result;
if (! second.length) {
result = samplesToResult(firstSamples);
} else {
result = query(second, secondMatch)
.then(samples => samplesToResult(samples));
}
return result;
});
} else {
// -l, --list-samples: list sample names and exit
promise = callOutP('query', datasetId, scope, /*preArgs*/ ['-l']);
}
return promise;
}
function vcfGenotypeHaplotypesSamples(datasetId, scope, positions) {
const fnName = 'vcfGenotypeHaplotypesSamples';
/** The positions are received as strings, and can be used in that form;
* JSON.parse() sanitises the values somewhat. */
positions = positions.map(p => JSON.parse(p));
function positionsCall(positions) {
/** Extract from vcfGenotypeSamplesFiltered() : groupCall, with group -> positions */
const
regions = positions.map(position => scope + ':' + position).join(','),
preArgs = ['-r'].concat(regions),
p = callOutP('haplotypes_samples', datasetId, scope, preArgs);
console.log(fnName, preArgs.join(' '));
return p;
}
/** If handling multiple Blocks (chromosomes) of a dataset then this might use
* 1 call per chr and combine results. (related : vcfGenotypeSamplesFiltered()
* combines results from multiple calls).
*
* Also, this function could get the list of sample names and map the sample
* numbers in the result to names, but that is easily done in the frontend,
* which keeps the reply small.
*/
const promise = positionsCall(positions);
return promise;
}
//------------------------------------------------------------------------------
/**
* @param datasetDir name of directory containing the VCF dataset
* @param scope e.g. '1A'; identifies the vcf file, i.e. datasetId/scope.vcf.gz
* scope===undefined or null signifies that all scopes of the dataset should be searched.
* @param preArgs args to be inserted in command line, additional to the datasetDir / vcf dir name.
* See comment in frontend/app/services/auth.js : vcfGenotypeLookup()
* @param nLines if defined, limit the output to nLines.
* @param dataOutCb passed to childProcess() - see comment there.
* If undefined, then dataOutReplyClosureLimit(cb, lineFilter, nLines) is used.
* @param cb
*/
function vcfGenotypeLookup(datasetDir, scope, preArgs_, nLines, dataOutCb, cb) {
/** Split out the optional parameters which are passed as separate params for
* processing separately to the remainder of preArgs, which are inserted as a
* list into the command. */
let {isecFlags, isecDatasetIds, ... preArgs} = preArgs_ || {};
const
fnName = 'vcfGenotypeLookup',
headerOnly = preArgs.headerOnly,
/** snpPolymorphismFilter is not applicable if SNPList because if the
* number of samples requested is <=1 then every row appears homozygous.
*/
snpPolymorphismFilter = ! preArgs.SNPList && preArgs.snpPolymorphismFilter,
/** These parameters are supported by view only, not query, so if
* present then view | query will be used.
* In that case moreParams will be passed to view, and paramsForQuery
* will be passed to query.
*/
viewRequired = snpPolymorphismFilter || preArgs.mafThreshold ||
preArgs.featureCallRateThreshold ||
preArgs.minAlleles !== undefined || preArgs.maxAlleles !== undefined ||
preArgs.typeSNP !== undefined,
command = headerOnly ? 'view' : preArgs.SNPList ?
(viewRequired ? 'counts_view' : 'counts_query') :
preArgs.requestFormat ? (viewRequired ? 'view_query' : 'query') : 'view';
/* isec is only meaningful with >1 datasets. The caller
* vcfGenotypeLookupDataset() only passes isecDatasetIds when
* isecDatasetIds.length > 1
*/
let isecDatasetIdsText = isecDatasetIds;
if (Array.isArray(isecDatasetIds) /*&& (isecDatasetIds.length > 1)*/) {
/** this is split in vcfGenotypeLookup.bash with tr '!' ' ' */
const datasetIdsSeparator = '!';
isecDatasetIdsText = isecDatasetIds.join(datasetIdsSeparator);
}
/** The params passed to spawn (node:child_process) are passed as options.args
* to ChildProcess.spawn (node:internal/child_process) which calls
* spawn(options) which converts non-strings to strings, e.g. arrays are
* joined with ',' into a single string. undefined -> 'undefined'.
*
* If scope is undefined then preArgs.datasetVcfFile is expected.
*/
let moreParams = [
command, datasetDir, scope || preArgs.datasetVcfFile,
isecFlags || '', isecDatasetIdsText || ''],
regionParams = scope ? ['-r', preArgs.region] : ['', ''];
moreParams = moreParams.concat(regionParams);
/** from BCFTOOLS(1) :
bcftools view [OPTIONS] file.vcf.gz [REGION [...]]
-h, --header-only
output the VCF header only
-H, --no-header
suppress the header in VCF output
bcftools query [OPTIONS] file.vcf.gz [file.vcf.gz [...]]
-H, --print-header
print header
* headerOnly implies command==='view' i.e. -h
* When ! headerOnly, the header is required;
* * for view : --with-header is default
* * for query : use -H
*/
const
headerOption = headerOnly ? /*command===view*/'-h' :
(command === 'view') ? '' : '-H';
if (preArgs.requestFormat) {
const /** from BCFTOOLS(1) :
* %GT Genotype (e.g. 0/1)
* %TGT Translated genotype (e.g. C/A)
*/
formatGT = (preArgs.requestFormat === 'CATG') ? '%TGT' : '%GT';
/** now INFO/MAF is added if not present, by
* vcfGenotypeLookup.{bash,Makefile} : dbName2Vcf() / %.MAF.vcf.gz
* So requestInfo means just 'request INFO/tSNP' - no longer needed because
* to enable SNP filters to be applied in frontend also, request all of INFO/
* (until eb969a33 just INFO/MAF and INFO/tSNP were requested)
* Note that %INFO produces a column header '(null)' instead of 'INFO';
* this is handled in addFeaturesJson() in frontend/app/utils/data/vcf-feature.js.
*/
preArgs.requestInfo;
const formatChromosome = scope ? '' : '%CHROM\t',
format = formatChromosome + '%ID\t%POS' + '\t%REF\t%ALT' +
'\t%INFO' +
'[\t' + formatGT + ']\n';
/** Params passed to query if view|query is used, otherwise to command. */
const paramsForQuery = ['-queryStart', headerOption, '-f', format, '-queryEnd'];
moreParams = moreParams.concat(paramsForQuery);
if (preArgs.snpNames?.length) {
const snpNames = ['-snpsStart'].concat(preArgs.snpNames).concat(['-snpsEnd']);
moreParams = moreParams.concat(snpNames);
}
if (headerOnly) {
moreParams.push('--force-samples');
}
/** default is no het filter, i.e. false */
if (snpPolymorphismFilter) {
moreParams.push('--genotype');
moreParams.push('het');
}
/** Just 1 --include or --exclude is permitted, so combine these
* mafThreshold and featureCallRateThreshold into 1 condition. */
const includeConditions = [];
const mafThresholdMax = 0.5;
/** default is no MAF filter, i.e. >= 0, (0 <= MAF <= 0.5)
* Also omit when condition is <= 0.5 (i.e. .mafUpper && .mafThreshold === mafThresholdMax).
*/
if ((preArgs.mafThreshold !== undefined) &&
(preArgs.mafThreshold !== (preArgs.mafUpper ? mafThresholdMax : 0))) {
const
/** --min-af and --max-af uses "INFO/AC and INFO/AN when
* available or FORMAT/GT" quoting BCFTOOLS(1), whereas
* --include MAF< / > may utilise INFO/MAF for example ? not clear so using INFO/MAF.
* Related : mafThresholdText() (components/panel/manage-genotype.js)
*/
afOption = 'INFO/MAF' + (preArgs.mafUpper ? '<=' : '>=') + preArgs.mafThreshold;
includeConditions.push(afOption);
}
if (preArgs.featureCallRateThreshold) {
const
/** equivalent to INFO/CR :
* N_PASS(GT!="./.")/N_SAMPLES
* F_PASS(GT!="./.")
* INFO/F_MISSING is converse of INFO/CR, so the following expression is
* equivalent to : INFO/CR >= .featureCallRateThreshold
*/
fcrOption = 'INFO/F_MISSING < ' + (1 - preArgs.featureCallRateThreshold);
includeConditions.push(fcrOption);
}
if (includeConditions.length) {
moreParams.push('--include'); // aka. -i
moreParams.push(includeConditions.join(' && '));
}
if (preArgs.minAlleles !== undefined) {
moreParams.push('--min-alleles');
moreParams.push(preArgs.minAlleles);
}
if (preArgs.maxAlleles !== undefined) {
moreParams.push('--max-alleles');
moreParams.push(preArgs.maxAlleles);
}
if (preArgs.typeSNP) {
moreParams.push("--types");
moreParams.push("snps");
}
}
const samples = preArgs.samples;
if (samples?.length) {
const
samplesJoined = samples
.trimEnd(/\n/)
.replaceAll('\n', ',');
moreParams = moreParams.concat('-s', samplesJoined);
} else if (preArgs.requestSamplesAll) ; else {
// There is not an option for 0 samples, except via using an empty file :
moreParams = moreParams.concat('-S', '/dev/null');
}
/** avoid tracing samples, and moreParams[9] which is the samples. */
console.log(fnName, datasetDir, preArgs.region, preArgs.requestFormat, samples?.length, moreParams.slice(0, 9+3));
if (! dataOutCb) {
const lineFilter = null;
dataOutCb = dataOutReplyClosureLimit(cb, lineFilter, nLines);
}
childProcess(
'vcfGenotypeLookup.bash',
/* postData */ '',
/* useFile */ false,
/* fileName */ undefined,
moreParams,
dataOutCb, cb, /*progressive*/ true);
}
function vcfGenotypeFeaturesCounts(
block, interval, nBins = 10, isZoomed, userOptions, cb) {
// header comment copied from block-features.js : blockFeaturesCounts()
const fnName = 'vcfGenotypeFeaturesCounts';
let result;
// default interval can be the whole domain of the block
if (! interval || interval.length !== 2) {
const
errorText = 'Interval is required. ' + JSON.stringify(interval),
error = new ErrorStatus(400, errorText);
result = error;
} else {
if (interval[0] > interval[1]) {
console.warn(fnName, 'reverse interval', interval, block.id);
let swap = interval[0];
interval[0] = interval[1];
interval[1] = swap;
}
const
scope = block.name,
datasetDir = block.datasetId,
// may be able to omit domainInteger if ! isZoomed
domainInteger = interval.map((d) => d.toFixed(0)),
region = scope + ':' + domainInteger.join('-'),
preArgs = {region, samples : null, requestFormat : 'CATG', SNPList : true},
// arguments 1-3 are used : block, interval, nBins
summary = new vcfToSummary(...arguments);
if (userOptions) {
Object.entries(userOptions).forEach(([key, value]) =>
{ if (value !== undefined) { preArgs[key] = value; } });
}
function sumCb(error, text) {
let result;
if (error) {
throw error;
} else if (text === undefined) {
result = summary.summarise();
} else {
summary.accumulateChunk(text);
}
return result;
}
const [blockArg, ...intervalArgs] = arguments;
const dataOutCb = dataReduceClosure(sumCb);
vcfGenotypeLookup(
datasetDir, scope,
preArgs, /*nLines*/undefined, dataOutCb, cb
);
/* vcfGenotypeLookup() includes %REF\t%ALT, which could be omitted in this case. */
}
return result;
}
const symbolCount = Symbol.for('count');
class vcfToSummary {
/**
* @param interval domainInteger
*/
constructor(block, interval, nBins) {
const lengthRounded = binEvenLengthRound(interval, nBins),
boundaries = binBoundaries(interval, lengthRounded),
/** map the boundaries into interval [start, end] pairs. */
intervals = boundaries.map((b, i, a) => (i ? [a[i-1], b] : undefined))
.slice(1, boundaries.length-1);
intervals.forEach((interval) => interval[Symbol.for('count')] = 0);
// console.log(fnName, block.id, lengthRounded, boundaries, intervals);
// set up bins and interval tree
this.summaryTree = createIntervalTree(intervals);
}
}
vcfToSummary.prototype.accumulateChunk = function (text) {
/** text has \n and \t, column format e.g. :
* # [1]ID [2]POS [3]REF [4]ALT
* scaffold38755_1190119 1190119 C T
*/
text.split('\n')
.forEach((line, i) => {
/* first line of first chunk is header line, for subsequent chunks match /^#/
* last line of chunk may be incomplete - save it to prepend to first line of next chunk.
*/
// skip header line
if (i) {
// add line to interval of summaryTree;
const
cols = line.split('\t'),
position = +cols[1];
this.summaryTree.queryInterval(position, position, addToInterval);
function addToInterval(interval) {
interval[symbolCount]++;
}
}
});
};
/**
* @return summary array, in the same format as block-features.js :
* blockFeaturesCounts(), @see vcfGenotypeFeaturesCounts()
*/
vcfToSummary.prototype.summarise = function() {
const
summaryArray = this.summaryTree.intervals
.sort((a, b) => a[0] - b[0])
.map(
(interval) =>
({
_id : interval[0],
count : interval[symbolCount],
idWidth : [interval[1] - interval[0]]
}));
return summaryArray;
};
//------------------------------------------------------------------------------
/** Get the status of .vcf.gz files for this dataset.
* Related : vcfGenotypeFeaturesCounts().
*/
function vcfGenotypeFeaturesCountsStatus(datasetDir, cb) {
const command = 'status',
moreParams = [
command, datasetDir, /*scope*/'',
/*isecFlags*/'', /*isecDatasetIds*/''];
/** Receive the combined result (progressive===false).
* For non-progressive (expect that the result is in a single chunk) could use
* dataReduceClosure() to catenate chunks.
* @param combined Buffer
*/
function dataOutCb(combined, cb) {
// console.log(fnName, 'dataOutCb', combined);
const text = combined.toString();
cb(null, text);
}
childProcess(
'vcfGenotypeLookup.bash',
/* postData */ '',
/* useFile */ false,
/* fileName */ undefined,
moreParams,
dataOutCb, cb, /*progressive*/ false);
}
const vcfGenotypeFeaturesCountsStatusP = promisify(vcfGenotypeFeaturesCountsStatus);
/** Check if base VCF and SNPLists are installed for any VCF datasets in datasets.
* The requirement for SNPLists is only applied if the base VCF is large.
* vcfGenotypeLookup.{bash,Makefile} will automatically generate
* .MAF.SNPList.vcf.gz if it is not present.
* If the size of the base .vcf.gz is such that this will take > ~5mins then
* require the user to install this .MAF.SNPList.vcf.gz before uploading the VCF
* worksheet.
* @return a promise yielding datasets status, with VCF datasets which are not
* installed having status falsey
*/
function checkVCFsAreInstalled(datasets, status) {
const
fnName = 'checkVCFsAreInstalled',
checkPs = datasets.map(dataset => {
console.log(fnName, dataset.name, dataset.tags);
const
isVCF = dataset.tags?.includes('VCF'),
checkP = ! isVCF ? Promise.resolve(true) :
vcfGenotypeFeaturesCountsStatusP(dataset.name)
.then(vcfStatus => {
const
status = statusToObj(vcfStatus),
notInstalled = dataset.blocks.filter(block => {
const
chrName = block.name,
s = status[chrName],
/** size and time of chr base .vcf.gz e.g. ' 354566 Sep 12 16:20' */
sizeTime = s?.[''] ,
sizeMatch = sizeTime?.match(/^ *([0-9]+)/),
small = ! sizeMatch || (+sizeMatch[1] < 100e6),
ok = small || s['.MAF.SNPList'];
return ! ok;
});
console.log(dataset.name, notInstalled, status, vcfStatus);
return ! notInstalled.length;
});
return checkP;
});
return checkPs;
}
//------------------------------------------------------------------------------
/** Construct a mapping from chr name to a list of suffixes of available .vcf.gz
* files for that chromosome.
*/
function statusToObj(vcfStatus) {
const /** extract from frontend/app/utils/data/vcf-files.js : statusToMatrix() */
a = vcfStatus.split('\n'),
/** collated into a summary object[chrName][colName] -> sizeTime
* This has the same information as map; combined with cols[] this enables
* producing a matrix with sorted column names.
*/
summary = a.reduce((s, line) => {
const
m = line.match(/(.*) ([^.]+)(.*).vcf.gz(.*)/);
if (m) {
const [whole, sizeTime, chrName, suffix, csi] = m,
colName = (suffix + csi); // .replaceAll('.', unicodeDot),
s[chrName] || (s[chrName] = {});
s[chrName][colName] = sizeTime;
}
return s;
}, {});
return summary;
}
//------------------------------------------------------------------------------
var vcfGenotype = /*#__PURE__*/Object.freeze({
__proto__: null,
checkVCFsAreInstalled: checkVCFsAreInstalled,
vcfGenotypeFeaturesCounts: vcfGenotypeFeaturesCounts,
vcfGenotypeFeaturesCountsStatus: vcfGenotypeFeaturesCountsStatus,
vcfGenotypeHaplotypesSamples: vcfGenotypeHaplotypesSamples,
vcfGenotypeLookup: vcfGenotypeLookup,
vcfGenotypeSamplesFiltered: vcfGenotypeSamplesFiltered
});
var main_node = { vcfGenotype };
export { main_node as default };