UNPKG

@plantinformatics/vcf-genotype-brapi

Version:

Client and server functions to access genotype data from VCF via a custom web API and BrAPI

1,283 lines (1,160 loc) 45.1 kB
import { promisify } from 'util'; import intervalTree1d from 'interval-tree-1d'; import * as childProcessProgressive from '@plantinformatics/child-process-progressive/dist/child-process-progressive.mjs'; import { create } from 'flat-cache'; //------------------------------------------------------------------------------ /** Handle string representations of boolean and Number args in objects passed as remoteMethod params. The boolean args are being received as strings, e.g. * vcfGenotypeLookup() : preArgs : {... , requestInfo: 'false', requestFormat: 'Numerical', requestSamplesAll: 'true', snpPolymorphismFilter: 'false'} * Parse these to convert to native boolean values true, false. * * Use this also for numeric values - JSON.parse() works for those also, e.g. * Block.blockFeaturesCounts : userOptions : { ... mafThreshold : '0', ... } * * Originally named parseBooleanFields(), (added in pretzel in 011deb32, minor edit in 339ab17b) in lb4app/lb3app/common/utilities/json-text.js. */ function parseStringFields(object, fieldNames) { fieldNames.forEach(fieldName => { if (typeof object[fieldName] === 'string') { /** trace confirmed this is used for : snpPolymorphismFilter, * mafThreshold, featureCallRateThreshold in blockFeaturesCounts. */ // console.log('parseStringFields', fieldName, object[fieldName]); object[fieldName] = JSON.parse(object[fieldName]); } }); } //------------------------------------------------------------------------------ function _mergeNamespaces(n, m) { m.forEach(function (e) { e && typeof e !== 'string' && !Array.isArray(e) && Object.keys(e).forEach(function (k) { if (k !== 'default' && !(k in n)) { var d = Object.getOwnPropertyDescriptor(e, k); Object.defineProperty(n, k, d.get ? d : { enumerable: true, get: function () { return e[k]; } }); } }); }); return Object.freeze(n); } var src = {}; /*----------------------------------------------------------------------------*/ /* global require */ /* global exports */ /* global process */ /*----------------------------------------------------------------------------*/ /** Calculate the bin size for even-sized bins to span the given interval. * The bin size is rounded to be a multiple of a power of 10, only the first 1-2 * digits are non-zero. * Used in @see binBoundaries(). * @return lengthRounded */ function binEvenLengthRound$1(interval, nBins) { let lengthRounded; if (interval && (interval.length === 2) && (nBins > 0)) { /* if (interval[1] < interval[0]) interval = interval.sort(); */ /** handle -ve interval direction - could occur with only -ve features in block. */ let intervalLength = Math.abs(interval[1] - interval[0]), binLength = intervalLength / nBins, digits = Math.floor(Math.log10(binLength)), eN1 = Math.exp(digits * Math.log(10)), mantissa = binLength / eN1, /** choose 1 2 or 5 as the first digit of the bin size. */ m1 = mantissa > 5 ? 5 : (mantissa > 2 ? 2 : 1); if (digits >= 0) { lengthRounded = Math.round(m1 * eN1); } else { /** for e.g. digits===-1, eN1 is 0.09999999999999998, * and (m1 * eN1) is 0.4999999999999999 which will round down to 0. * So instead, use string operation to construct eN1, so .round() is not required. * This could probably be used for digits >= 0 also. * * A simpler form would be Math.round(m1 * eN1 * 100000) / 100000, but * that is limited to digits > -5, which would be sufficient for the * datasets used so far, e.g. a genetic map is ~200cM, so digits===-1, and * for a physical map digits==-6. */ eN1 = '0.' + ('000000000000000'.substr(0, 1+digits)) + '1'; lengthRounded = (m1 * eN1); } console.log('binEvenLengthRound', interval, nBins, intervalLength, binLength, digits, eN1, mantissa, m1, lengthRounded); } return lengthRounded; }var binEvenLengthRound_1 = src.binEvenLengthRound = binEvenLengthRound$1; /** Generate an array of even-sized bins to span the given interval. * Used for mongo aggregation pipeline : $bucket : boundaries. */ function binBoundaries$1(interval, lengthRounded) { let b; if (lengthRounded) { let start = interval[0], intervalLength = interval[1] - interval[0], direction = Math.sign(intervalLength), forward = (direction > 0) ? function (a,b) {return a < b; } : function (a,b) {return a > b; }; let location = Math.floor(start / lengthRounded) * lengthRounded; b = [location]; do { location += lengthRounded; b.push(location); } while (forward(location, interval[1])); console.log('binBoundaries', direction, b.length, location, b[0], b[b.length-1]); } return b; }var binBoundaries_1 = src.binBoundaries = binBoundaries$1; var intervalBins = /*#__PURE__*/_mergeNamespaces({ __proto__: null, binBoundaries: binBoundaries_1, binEvenLengthRound: binEvenLengthRound_1, default: src }, [src]); // import { promisifyFn as promisify } from './promisify'; // const util = require('util'); // const util = {promisify}; // require('util'); const createIntervalTree = intervalTree1d; // import createIntervalTree from "interval-tree-1d"; console.log('createIntervalTree', createIntervalTree, intervalTree1d); console.log('childProcessProgressive', childProcessProgressive); const { ErrorStatus } = childProcessProgressive.default.errorStatus; // = require('./errorStatus.js'); /* function childProcess() { } function dataOutReplyClosureLimit() { } function dataReduceClosure() { } function stringCountString() { } */ const /*import*/ { childProcess, dataOutReplyClosureLimit, dataOutReplyClosure, dataReduceClosure, stringCountString, } = childProcessProgressive.default.childProcess; console.log('interval-bins', intervalBins); const /* import*/ { binEvenLengthRound, binBoundaries } = intervalBins; // from 'interval-bins'; //------------------------------------------------------------------------------ function callOut(command, datasetId, scope, preArgs, cb) { childProcess( 'vcfGenotypeLookup.bash', /* postData */ '', /* useFile */ false, /* fileName */ undefined, /* moreParams */ [command, datasetId, scope, /*isecFlags*/ '', /*isecDatasetIds*/''].concat(preArgs || []), dataOutReplyClosure(cb), cb, /*progressive*/ false); } const callOutP = promisify(callOut); function vcfGenotypeSamplesFiltered(datasetId, scope, filter) { const fnName = 'vcfGenotypeSamplesFiltered'; /** result */ let promise; if (filter) { parseStringFields(filter, ['matchHet']); const matchHet = filter.matchHet; filter.features.forEach(f => parseStringFields(f, ['position', 'matchRef'])); /** The purpose of allowing the caller to nominate the first SNP to filter * on, by listing it first, is that the 2nd query can be limited to the * result of the first, i.e. filteredSamples; see comment below. */ const refFirst = filter.features[0].matchRef; /** array of feature positions. index is matchRef, i.e. [0] is Alt, [1] is Ref */ const groupedFilters = filter.features.reduce((grouped, feature) => { grouped[feature.matchRef].push(feature.position); return grouped; }, {true : [], false : []}); const first = groupedFilters[refFirst]; /** @return regexp to be used by grep. '.' will match | / etc */ function refToGenotype(matchRef, matchHet) { const /** map {false,true} -> {1,0} */ value = + ! matchRef, /** vcfGenotypeLookup.bash uses pattern as gtMatch in : '\t'"$gtMatch"'$' */ pattern = matchHet ? '.*' + value + '.*' : value + '.' + value; return pattern; } function groupCall(group, matchRef) { const /** GT= is recognised by vcfGenotypeLookup.bash to set gtMatch. * similar to --include 'GT="1/1"' but that filters SNPs not samples. */ include = 'GT=' + refToGenotype(matchRef, matchHet), regions = group.map(position => scope + ':' + position).join(','), preArgs = ['-r'].concat(regions).concat([include]), p = callOutP('filter_samples', datasetId, scope, preArgs); console.log(fnName, preArgs.join(' ')); return p; } const counts = {}; function countSamples(a) { a.forEach(s => { if (counts[s] == undefined) { counts[s] = 1; } else { counts[s]++; } }); } /** Number of SNPs queried so far; filter requires counts to match this. */ let nSNPs = first.length; function query(group, matchRef) { const promise = groupCall(group, matchRef) .then(samplesValues => { // samplesValues.replaceAll(/\t.../g, ''); /* split on (tab genotype newline), then * trim off the '' created from trailing (... newline). */ /** Sample names which matched in the first query. */ const matchedSamples = samplesValues.split(/\t...\n/g); if (matchedSamples.at(-1) === '') { matchedSamples.pop(); } /* count and filter for those with count === #SNPs in first query */ countSamples(matchedSamples); /** Sample names will appear multiple times, once for each SNP genotype which they match. * Array.from(new Set( )) preserves order, which is preferable for GUI consistency. */ const uniqSamples = Array.from(new Set(matchedSamples)); /* first.length is the number of SNPs in the first query, * i.e. groupedFilters[refFirst] */ // next : >= nSNPs - allowMissing const filteredSamples = uniqSamples.filter(s => counts[s] === nSNPs); console.log(fnName, filteredSamples.length, nSNPs, uniqSamples.length, matchedSamples.length); return filteredSamples; }); return promise; } /** Map the array of sample names to the result format of the existing * genotypeSamples endpoint. */ function samplesToResult(samples) { return samples.join('\n'); } promise = query(first, refFirst) .then(firstSamples => { /** if filteredSamples.length < 100 it might be a good optimisation to * narrow the 2nd query to filteredSamples. See comment re. refFirst. */ const secondMatch = ! refFirst, second = groupedFilters[secondMatch]; nSNPs += second.length; let result; if (! second.length) { result = samplesToResult(firstSamples); } else { result = query(second, secondMatch) .then(samples => samplesToResult(samples)); } return result; }); } else { // -l, --list-samples: list sample names and exit promise = callOutP('query', datasetId, scope, /*preArgs*/ ['-l']); } return promise; } function vcfGenotypeHaplotypesSamples(datasetId, scope, positions) { const fnName = 'vcfGenotypeHaplotypesSamples'; /** The positions are received as strings, and can be used in that form; * JSON.parse() sanitises the values somewhat. */ positions = positions.map(p => JSON.parse(p)); function positionsCall(positions) { /** Extract from vcfGenotypeSamplesFiltered() : groupCall, with group -> positions */ const regions = positions.map(position => scope + ':' + position).join(','), preArgs = ['-r'].concat(regions), p = callOutP('haplotypes_samples', datasetId, scope, preArgs); console.log(fnName, preArgs.join(' ')); return p; } /** If handling multiple Blocks (chromosomes) of a dataset then this might use * 1 call per chr and combine results. (related : vcfGenotypeSamplesFiltered() * combines results from multiple calls). * * Also, this function could get the list of sample names and map the sample * numbers in the result to names, but that is easily done in the frontend, * which keeps the reply small. */ const promise = positionsCall(positions); return promise; } //------------------------------------------------------------------------------ /** * @param datasetDir name of directory containing the VCF dataset * @param scope e.g. '1A'; identifies the vcf file, i.e. datasetId/scope.vcf.gz * scope===undefined or null signifies that all scopes of the dataset should be searched. * @param preArgs args to be inserted in command line, additional to the datasetDir / vcf dir name. * See comment in frontend/app/services/auth.js : vcfGenotypeLookup() * @param nLines if defined, limit the output to nLines. * @param dataOutCb passed to childProcess() - see comment there. * If undefined, then dataOutReplyClosureLimit(cb, lineFilter, nLines) is used. * @param cb */ function vcfGenotypeLookup(datasetDir, scope, preArgs_, nLines, dataOutCb, cb) { /** Split out the optional parameters which are passed as separate params for * processing separately to the remainder of preArgs, which are inserted as a * list into the command. */ let {isecFlags, isecDatasetIds, ... preArgs} = preArgs_ || {}; const fnName = 'vcfGenotypeLookup', headerOnly = preArgs.headerOnly, /** snpPolymorphismFilter is not applicable if SNPList because if the * number of samples requested is <=1 then every row appears homozygous. */ snpPolymorphismFilter = ! preArgs.SNPList && preArgs.snpPolymorphismFilter, /** These parameters are supported by view only, not query, so if * present then view | query will be used. * In that case moreParams will be passed to view, and paramsForQuery * will be passed to query. */ viewRequired = snpPolymorphismFilter || preArgs.mafThreshold || preArgs.featureCallRateThreshold || preArgs.minAlleles !== undefined || preArgs.maxAlleles !== undefined || preArgs.typeSNP !== undefined, command = headerOnly ? 'view' : preArgs.SNPList ? (viewRequired ? 'counts_view' : 'counts_query') : preArgs.requestFormat ? (viewRequired ? 'view_query' : 'query') : 'view'; /* isec is only meaningful with >1 datasets. The caller * vcfGenotypeLookupDataset() only passes isecDatasetIds when * isecDatasetIds.length > 1 */ let isecDatasetIdsText = isecDatasetIds; if (Array.isArray(isecDatasetIds) /*&& (isecDatasetIds.length > 1)*/) { /** this is split in vcfGenotypeLookup.bash with tr '!' ' ' */ const datasetIdsSeparator = '!'; isecDatasetIdsText = isecDatasetIds.join(datasetIdsSeparator); } /** The params passed to spawn (node:child_process) are passed as options.args * to ChildProcess.spawn (node:internal/child_process) which calls * spawn(options) which converts non-strings to strings, e.g. arrays are * joined with ',' into a single string. undefined -> 'undefined'. * * If scope is undefined then preArgs.datasetVcfFile is expected. */ let moreParams = [ command, datasetDir, scope || preArgs.datasetVcfFile, isecFlags || '', isecDatasetIdsText || ''], regionParams = scope ? ['-r', preArgs.region] : ['', '']; moreParams = moreParams.concat(regionParams); /** from BCFTOOLS(1) : bcftools view [OPTIONS] file.vcf.gz [REGION [...]] -h, --header-only output the VCF header only -H, --no-header suppress the header in VCF output bcftools query [OPTIONS] file.vcf.gz [file.vcf.gz [...]] -H, --print-header print header * headerOnly implies command==='view' i.e. -h * When ! headerOnly, the header is required; * * for view : --with-header is default * * for query : use -H */ const headerOption = headerOnly ? /*command===view*/'-h' : (command === 'view') ? '' : '-H'; if (preArgs.requestFormat) { const /** from BCFTOOLS(1) : * %GT Genotype (e.g. 0/1) * %TGT Translated genotype (e.g. C/A) */ formatGT = (preArgs.requestFormat === 'CATG') ? '%TGT' : '%GT'; /** now INFO/MAF is added if not present, by * vcfGenotypeLookup.{bash,Makefile} : dbName2Vcf() / %.MAF.vcf.gz * So requestInfo means just 'request INFO/tSNP' - no longer needed because * to enable SNP filters to be applied in frontend also, request all of INFO/ * (until eb969a33 just INFO/MAF and INFO/tSNP were requested) * Note that %INFO produces a column header '(null)' instead of 'INFO'; * this is handled in addFeaturesJson() in frontend/app/utils/data/vcf-feature.js. */ preArgs.requestInfo; const formatChromosome = scope ? '' : '%CHROM\t', format = formatChromosome + '%ID\t%POS' + '\t%REF\t%ALT' + '\t%INFO' + '[\t' + formatGT + ']\n'; /** Params passed to query if view|query is used, otherwise to command. */ const paramsForQuery = ['-queryStart', headerOption, '-f', format, '-queryEnd']; moreParams = moreParams.concat(paramsForQuery); if (preArgs.snpNames?.length) { const snpNames = ['-snpsStart'].concat(preArgs.snpNames).concat(['-snpsEnd']); moreParams = moreParams.concat(snpNames); } if (headerOnly) { moreParams.push('--force-samples'); } /** default is no het filter, i.e. false */ if (snpPolymorphismFilter) { moreParams.push('--genotype'); moreParams.push('het'); } /** Just 1 --include or --exclude is permitted, so combine these * mafThreshold and featureCallRateThreshold into 1 condition. */ const includeConditions = []; const mafThresholdMax = 0.5; /** default is no MAF filter, i.e. >= 0, (0 <= MAF <= 0.5) * Also omit when condition is <= 0.5 (i.e. .mafUpper && .mafThreshold === mafThresholdMax). */ if ((preArgs.mafThreshold !== undefined) && (preArgs.mafThreshold !== (preArgs.mafUpper ? mafThresholdMax : 0))) { const /** --min-af and --max-af uses "INFO/AC and INFO/AN when * available or FORMAT/GT" quoting BCFTOOLS(1), whereas * --include MAF< / > may utilise INFO/MAF for example ? not clear so using INFO/MAF. * Related : mafThresholdText() (components/panel/manage-genotype.js) */ afOption = 'INFO/MAF' + (preArgs.mafUpper ? '<=' : '>=') + preArgs.mafThreshold; includeConditions.push(afOption); } if (preArgs.featureCallRateThreshold) { const /** equivalent to INFO/CR : * N_PASS(GT!="./.")/N_SAMPLES * F_PASS(GT!="./.") * INFO/F_MISSING is converse of INFO/CR, so the following expression is * equivalent to : INFO/CR >= .featureCallRateThreshold */ fcrOption = 'INFO/F_MISSING < ' + (1 - preArgs.featureCallRateThreshold); includeConditions.push(fcrOption); } if (includeConditions.length) { moreParams.push('--include'); // aka. -i moreParams.push(includeConditions.join(' && ')); } if (preArgs.minAlleles !== undefined) { moreParams.push('--min-alleles'); moreParams.push(preArgs.minAlleles); } if (preArgs.maxAlleles !== undefined) { moreParams.push('--max-alleles'); moreParams.push(preArgs.maxAlleles); } if (preArgs.typeSNP) { moreParams.push("--types"); moreParams.push("snps"); } } const samples = preArgs.samples; if (samples?.length) { const samplesJoined = samples .trimEnd(/\n/) .replaceAll('\n', ','); moreParams = moreParams.concat('-s', samplesJoined); } else if (preArgs.requestSamplesAll) ; else { // There is not an option for 0 samples, except via using an empty file : moreParams = moreParams.concat('-S', '/dev/null'); } /** avoid tracing samples, and moreParams[9] which is the samples. */ console.log(fnName, datasetDir, preArgs.region, preArgs.requestFormat, samples?.length, moreParams.slice(0, 9+3)); if (! dataOutCb) { const lineFilter = null; dataOutCb = dataOutReplyClosureLimit(cb, lineFilter, nLines); } childProcess( 'vcfGenotypeLookup.bash', /* postData */ '', /* useFile */ false, /* fileName */ undefined, moreParams, dataOutCb, cb, /*progressive*/ true); } function vcfGenotypeFeaturesCounts( block, interval, nBins = 10, isZoomed, userOptions, cb) { // header comment copied from block-features.js : blockFeaturesCounts() const fnName = 'vcfGenotypeFeaturesCounts'; let result; // default interval can be the whole domain of the block if (! interval || interval.length !== 2) { const errorText = 'Interval is required. ' + JSON.stringify(interval), error = new ErrorStatus(400, errorText); result = error; } else { if (interval[0] > interval[1]) { console.warn(fnName, 'reverse interval', interval, block.id); let swap = interval[0]; interval[0] = interval[1]; interval[1] = swap; } const scope = block.name, datasetDir = block.datasetId, // may be able to omit domainInteger if ! isZoomed domainInteger = interval.map((d) => d.toFixed(0)), region = scope + ':' + domainInteger.join('-'), preArgs = {region, samples : null, requestFormat : 'CATG', SNPList : true}, // arguments 1-3 are used : block, interval, nBins summary = new vcfToSummary(...arguments); if (userOptions) { Object.entries(userOptions).forEach(([key, value]) => { if (value !== undefined) { preArgs[key] = value; } }); } function sumCb(error, text) { let result; if (error) { throw error; } else if (text === undefined) { result = summary.summarise(); } else { summary.accumulateChunk(text); } return result; } const [blockArg, ...intervalArgs] = arguments; const dataOutCb = dataReduceClosure(sumCb); vcfGenotypeLookup( datasetDir, scope, preArgs, /*nLines*/undefined, dataOutCb, cb ); /* vcfGenotypeLookup() includes %REF\t%ALT, which could be omitted in this case. */ } return result; } const symbolCount = Symbol.for('count'); class vcfToSummary { /** * @param interval domainInteger */ constructor(block, interval, nBins) { const lengthRounded = binEvenLengthRound(interval, nBins), boundaries = binBoundaries(interval, lengthRounded), /** map the boundaries into interval [start, end] pairs. */ intervals = boundaries.map((b, i, a) => (i ? [a[i-1], b] : undefined)) .slice(1, boundaries.length-1); intervals.forEach((interval) => interval[Symbol.for('count')] = 0); // console.log(fnName, block.id, lengthRounded, boundaries, intervals); // set up bins and interval tree this.summaryTree = createIntervalTree(intervals); } } vcfToSummary.prototype.accumulateChunk = function (text) { /** text has \n and \t, column format e.g. : * # [1]ID [2]POS [3]REF [4]ALT * scaffold38755_1190119 1190119 C T */ text.split('\n') .forEach((line, i) => { /* first line of first chunk is header line, for subsequent chunks match /^#/ * last line of chunk may be incomplete - save it to prepend to first line of next chunk. */ // skip header line if (i) { // add line to interval of summaryTree; const cols = line.split('\t'), position = +cols[1]; this.summaryTree.queryInterval(position, position, addToInterval); function addToInterval(interval) { interval[symbolCount]++; } } }); }; /** * @return summary array, in the same format as block-features.js : * blockFeaturesCounts(), @see vcfGenotypeFeaturesCounts() */ vcfToSummary.prototype.summarise = function() { const summaryArray = this.summaryTree.intervals .sort((a, b) => a[0] - b[0]) .map( (interval) => ({ _id : interval[0], count : interval[symbolCount], idWidth : [interval[1] - interval[0]] })); return summaryArray; }; //------------------------------------------------------------------------------ /** Get the status of .vcf.gz files for this dataset. * Related : vcfGenotypeFeaturesCounts(). */ function vcfGenotypeFeaturesCountsStatus(datasetDir, cb) { const command = 'status', moreParams = [ command, datasetDir, /*scope*/'', /*isecFlags*/'', /*isecDatasetIds*/'']; /** Receive the combined result (progressive===false). * For non-progressive (expect that the result is in a single chunk) could use * dataReduceClosure() to catenate chunks. * @param combined Buffer */ function dataOutCb(combined, cb) { // console.log(fnName, 'dataOutCb', combined); const text = combined.toString(); cb(null, text); } childProcess( 'vcfGenotypeLookup.bash', /* postData */ '', /* useFile */ false, /* fileName */ undefined, moreParams, dataOutCb, cb, /*progressive*/ false); } const vcfGenotypeFeaturesCountsStatusP = promisify(vcfGenotypeFeaturesCountsStatus); /** Check if base VCF and SNPLists are installed for any VCF datasets in datasets. * The requirement for SNPLists is only applied if the base VCF is large. * vcfGenotypeLookup.{bash,Makefile} will automatically generate * .MAF.SNPList.vcf.gz if it is not present. * If the size of the base .vcf.gz is such that this will take > ~5mins then * require the user to install this .MAF.SNPList.vcf.gz before uploading the VCF * worksheet. * @return a promise yielding datasets status, with VCF datasets which are not * installed having status falsey */ function checkVCFsAreInstalled(datasets, status) { const fnName = 'checkVCFsAreInstalled', checkPs = datasets.map(dataset => { console.log(fnName, dataset.name, dataset.tags); const isVCF = dataset.tags?.includes('VCF'), checkP = ! isVCF ? Promise.resolve(true) : vcfGenotypeFeaturesCountsStatusP(dataset.name) .then(vcfStatus => { const status = statusToObj(vcfStatus), notInstalled = dataset.blocks.filter(block => { const chrName = block.name, s = status[chrName], /** size and time of chr base .vcf.gz e.g. ' 354566 Sep 12 16:20' */ sizeTime = s?.[''] , sizeMatch = sizeTime?.match(/^ *([0-9]+)/), small = ! sizeMatch || (+sizeMatch[1] < 100e6), ok = small || s['.MAF.SNPList']; return ! ok; }); console.log(dataset.name, notInstalled, status, vcfStatus); return ! notInstalled.length; }); return checkP; }); return checkPs; } //------------------------------------------------------------------------------ /** Construct a mapping from chr name to a list of suffixes of available .vcf.gz * files for that chromosome. */ function statusToObj(vcfStatus) { const /** extract from frontend/app/utils/data/vcf-files.js : statusToMatrix() */ a = vcfStatus.split('\n'), /** collated into a summary object[chrName][colName] -> sizeTime * This has the same information as map; combined with cols[] this enables * producing a matrix with sorted column names. */ summary = a.reduce((s, line) => { const m = line.match(/(.*) ([^.]+)(.*).vcf.gz(.*)/); if (m) { const [whole, sizeTime, chrName, suffix, csi] = m, colName = (suffix + csi); // .replaceAll('.', unicodeDot), s[chrName] || (s[chrName] = {}); s[chrName][colName] = sizeTime; } return s; }, {}); return summary; } //------------------------------------------------------------------------------ var vcfGenotype = /*#__PURE__*/Object.freeze({ __proto__: null, checkVCFsAreInstalled: checkVCFsAreInstalled, vcfGenotypeFeaturesCounts: vcfGenotypeFeaturesCounts, vcfGenotypeFeaturesCountsStatus: vcfGenotypeFeaturesCountsStatus, vcfGenotypeHaplotypesSamples: vcfGenotypeHaplotypesSamples, vcfGenotypeLookup: vcfGenotypeLookup, vcfGenotypeSamplesFiltered: vcfGenotypeSamplesFiltered }); /** Generated by ChatGPT from this prompt : ### For a JavaScript web application, show how functions making web API requests can be written to be used either in the frontend (web browser) or backend (node.js server). --- To write functions for making web API requests that can be used in both the frontend (web browser) and backend (Node.js server), you can use the Fetch API, which is available in browsers, and you can also use it in Node.js by using the `node-fetch` library. */ /* global require */ // defined in Node.js server only // Import fetch depending on the environment // const fetch = typeof window === 'undefined' ? require('node-fetch') : window.fetch; let fetch$1 = typeof window === 'undefined' ? import('./index-BSNIxF6l.js').then(module => fetch$1 = module.default) : window.fetch; /** * Make an API request. * @param {string} url - The API endpoint. * @param {Object} options - Fetch options (method, headers, body, etc.). * @returns {Promise<Object>} - The response data. */ async function apiRequest(url, options = {}) { try { const response = await fetch$1(url, options); if (!response.ok) { throw new Error(`HTTP error! Status: ${response.status}`); } const data = await response.json(); return data; } catch (error) { console.error('API request failed:', error); throw error; } } //------------------------------------------------------------------------------ var apiRequest$1 = /*#__PURE__*/Object.freeze({ __proto__: null, apiRequest: apiRequest }); /** Generated by ChatGPT ### In JavaScript, show a wrapper functionality for caching data either on the web-browser frontend web-app (using IndexedDB) or in the backend (Node.js server). Include JSDocs comments in the function header comments. --- Here’s a simple wrapper functionality for caching data using IndexedDB on the frontend or storing data in memory on a Node.js backend: ### Frontend (IndexedDB) */ /** * A simple wrapper for caching data in IndexedDB. * @class CacheWrapper */ let CacheWrapper$1 = class CacheWrapper { constructor(dbName, storeName) { this.dbName = dbName; this.storeName = storeName; this.db = null; this.init(); } async init() { this.db = await new Promise((resolve, reject) => { const request = indexedDB.open(this.dbName, 1); request.onupgradeneeded = (event) => { const db = event.target.result; db.createObjectStore(this.storeName); }; request.onsuccess = (event) => resolve(event.target.result); request.onerror = () => reject('Database error'); }); } /** * Caches data in IndexedDB. * @param {string} key - The key to store the data under. * @param {any} value - The value to store. * @returns {Promise<void>} */ async set(key, value) { const transaction = this.db.transaction([this.storeName], 'readwrite'); const store = transaction.objectStore(this.storeName); return new Promise((resolve, reject) => { const request = store.put(value, key); request.onsuccess = () => resolve(); request.onerror = () => reject('Storage error'); }); } /** * Retrieves cached data from IndexedDB. * @param {string} key - The key to retrieve the data for. * @returns {Promise<any>} */ async get(key) { const transaction = this.db.transaction([this.storeName]); const store = transaction.objectStore(this.storeName); return new Promise((resolve, reject) => { const request = store.get(key); request.onsuccess = (event) => {console.log('get', event, event.target); resolve(event.target.result ? event.target.result/*.value*/ : null); }; request.onerror = () => reject('Retrieval error'); }); } }; var cacheBrowser = /*#__PURE__*/Object.freeze({ __proto__: null, CacheWrapper: CacheWrapper$1 }); // old API is available via require() // let flatCache = require('flat-cache'); /* global require */ /* global exports */ /*----------------------------------------------------------------------------*/ /** function API signature generated by ChatGPT, see ./cache-browser.js */ /** Implementation copied from pretzel/lb4app/lb3app/common/utilities/results-cache.js */ /** * File-based persistent cache for Node.js, implemented using flat-cache. * @class CacheWrapper */ class CacheWrapper { /** If cache is not initialised, initialise it, calling .load(). * The constructor signature provides the same interface as * cache-browser.js. * * Default cacheDir is .cache in server cwd, i.e. lb4app/lb3app/.cache/ * (previously : /app/node_modules/flat-cache/.cache) * Default cacheId is cache1 (i.e. file .cache/cache1) * Cache format is json. * @param {string} dbName * @param {string} storeName */ constructor(dbName, storeName) { { const cacheId = storeName, cacheDir = './Cache/' + dbName; /* old API : cache = flatCache.load(cacheName); * cache = new FlatCache(); cache.load(cacheName); * Unlike create(), .load() does not set the given name as cacheId * (although the function header comment says : If specified `cacheDir` * will be used as the directory to persist the data to. refn : * node_modules/flat-cache/dist/index.js ) */ this.cache = create({ cacheId, cacheDir }); } } /** * Caches data in a file using flat-cache. * @param {string} key - The key to store the data under. * @param {any} value - The value to store. */ set(key, value) { this.cache.setKey(key, value); /** https://github.com/royriojas/flat-cache#readme : "Non visited * keys are removed when cache.save() is called" if noPrune is not * true */ this.cache.save(/*noPrune*/ true); } /** * Retrieves cached data from flat-cache. * @param {string} key - The key to retrieve the data for. * @returns {any|null} The cached value or null if not found. */ get(key) { let cacheContent = this.cache.getKey(key); return cacheContent; } } var cacheNode = /*#__PURE__*/Object.freeze({ __proto__: null, CacheWrapper: CacheWrapper }); //------------------------------------------------------------------------------ /** Reduce the array to a promise; map each array element to a promise using * elt2PromiseFn, in series (not in parallel). * @param array * @param elt2PromiseFn (previousResult, element) -> promise * @param starting_promise Start after this initial promise yields * Defaults to Promise.resolve() if undefined. */ function reduceInSeries(array, elt2PromiseFn, starting_promise) { /** based on ensureCounts() in lb4app/lb3app/common/utilities/block-features.js * and also https://stackoverflow.com/a/21372567 user663031 * @param previousP head of chain of promises * @param previous result value yielded by previousP */ const promise = array.reduce( (previousP, currentElement) => previousP.then( (previous) => elt2PromiseFn(previous, currentElement)), starting_promise ?? Promise.resolve()); return promise; } //------------------------------------------------------------------------------ /** Map array to an array of promises, which are the results of calling * elt2PromiseFn() on each element of the array, in series. * elt2PromiseFn() returns a promise. * The result promise is used to perform each call to elt2PromiseFn() in series * instead of in parallel. * @param {Array} array * @param {function(any, number): Promise} elt2PromiseFn * The signature of elt2PromiseFn() is (element, i) => promise. */ function mapInSeries(array, elt2PromiseFn) { let promise = Promise.resolve(); /** Wrap elt2PromiseFn() to provide series execution. */ function seriesWrap(elt, i) { promise = promise.then(result => elt2PromiseFn(elt, i)); return promise; } const allP = Promise.all(array.map(seriesWrap)); return allP; } //------------------------------------------------------------------------------ var promises = /*#__PURE__*/Object.freeze({ __proto__: null, mapInSeries: mapInSeries, reduceInSeries: reduceInSeries }); //------------------------------------------------------------------------------ /* let mapInSeries; import('@plantinformatics/vcf-genotype-brapi/dist/vcf-genotype-brapi-node.mjs').then(vcfGenotypeBrapi => { const promises = vcfGenotypeBrapi.default.promises; console.log('vcfGenotypeBrapi', vcfGenotypeBrapi, 'promises', promises); mapInSeries = promises.mapInSeries; }); */ //------------------------------------------------------------------------------ /** Base of web API endpoint URLs of IPK PanBARLEX */ const domainIpk = 'ipk-gatersleben.de'; const baseUrl = 'https://panbarlex.' + domainIpk; /** Import fetch depending on the environment */ // const fetch = isNodeJs ? require('node-fetch') : window.fetch; let cache; //------------------------------------------------------------------------------ /** There are distinct cache implementations used : * - cache-node.js backend Node.js * - cache-browser.js frontend browser * These have the same signature `CacheWrapper`, so this library can use either * via this abstract interface. * Packaging both of these for frontend and backend is currently problematic, so * instead require the calling app to pass in the implementation to be used. * * @param {CacheWrapper} cacheWrapper */ function init(cacheWrapper, fetch_) { cache = new cacheWrapper('IPK', 'PanBARLEX'); fetch = fetch_; } //------------------------------------------------------------------------------ /** Lookup key in the response cache; if present return it, otherwise fetch(url) * and store the response in cache. * @param {string} url * @param {string} key * @return {Promise<string>} response, from cache or API */ function fromCacheOrFetch(url, key) { const fnName = 'fromCacheOrFetch', responseP = cache?.get(key) .then(response => { // This indicates if cache hit or miss. console.log(fnName, key, !!response, response?.length || response?.clusterMembers?.length); if (response) { return response; } else { /** promise yielding API response */ const apiP = fetch(url) .then(response => response.json()) .then(response => (cache?.set(key, response), response)); return apiP; } }); return responseP; } //------------------------------------------------------------------------------ /** Ids of the gene clusters shown in Known Genes page : * https://panbarlex.ipk-gatersleben.de/#known-genes * From index.js. */ const clusterIds = [ /** duplicates : BarleyCDS90_21807, BarleyCDS90_06263 */ 'BarleyCDS90_26655', 'BarleyCDS90_11894', 'BarleyCDS90_28638', 'BarleyCDS90_21807', // 'BarleyCDS90_21807', 'BarleyCDS90_16242', 'BarleyCDS90_29093', 'BarleyCDS90_32122', 'BarleyCDS90_03650', 'BarleyCDS90_12108', 'BarleyCDS90_20032', 'BarleyCDS90_08746', 'BarleyCDS90_26304', 'BarleyCDS90_13218', 'BarleyCDS90_32282', 'BarleyCDS90_02101', 'BarleyCDS90_23360', 'BarleyCDS90_12730', 'BarleyCDS90_26113', 'BarleyCDS90_12265', 'BarleyCDS90_18400', 'BarleyCDS90_12590', 'BarleyCDS90_04005', 'BarleyCDS90_22416', 'BarleyCDS90_21767', 'BarleyCDS90_27197', 'BarleyCDS90_21150', 'BarleyCDS90_27024', 'BarleyCDS90_05674', 'BarleyCDS90_24106', 'BarleyCDS90_19421', 'BarleyCDS90_20558', 'BarleyCDS90_06263', 'BarleyCDS90_14810', 'BarleyCDS90_30630', 'BarleyCDS90_05066', 'BarleyCDS90_11183', 'BarleyCDS90_14602', 'BarleyCDS90_04855', 'BarleyCDS90_18169', 'BarleyCDS90_03514', 'BarleyCDS90_20637', 'BarleyCDS90_12606', 'BarleyCDS90_16788', 'BarleyCDS90_03661', 'BarleyCDS90_20867', 'BarleyCDS90_03660', 'BarleyCDS90_10622', 'BarleyCDS90_02817', 'BarleyCDS90_20076', 'BarleyCDS90_26502', 'BarleyCDS90_22983', 'BarleyCDS90_26445', 'BarleyCDS90_05119', 'BarleyCDS90_03849', 'BarleyCDS90_02298', 'BarleyCDS90_28839', 'BarleyCDS90_16461', 'BarleyCDS90_12637', 'BarleyCDS90_14064', 'BarleyCDS90_13238', 'BarleyCDS90_07253', 'BarleyCDS90_06263', 'BarleyCDS90_18064', 'BarleyCDS90_32523', 'BarleyCDS90_05952', 'BarleyCDS90_01082', 'BarleyCDS90_07326', 'BarleyCDS90_14480', 'BarleyCDS90_09159', 'BarleyCDS90_06461', 'BarleyCDS90_02227', 'BarleyCDS90_12379', 'BarleyCDS90_04098', 'BarleyCDS90_13747', 'BarleyCDS90_18436', 'BarleyCDS90_07497', 'BarleyCDS90_11112', 'BarleyCDS90_15689', 'BarleyCDS90_17088', 'BarleyCDS90_30284', 'BarleyCDS90_32261', 'BarleyCDS90_19260', 'BarleyCDS90_28657', 'BarleyCDS90_03727', 'BarleyCDS90_27400', 'BarleyCDS90_25598', 'BarleyCDS90_22388', 'BarleyCDS90_28381', 'BarleyCDS90_13798', 'BarleyCDS90_23167', 'BarleyCDS90_22569', 'BarleyCDS90_28347', 'BarleyCDS90_09215', 'BarleyCDS90_10442', 'BarleyCDS90_16674', 'BarleyCDS90_05804', 'BarleyCDS90_06164', 'BarleyCDS90_07756', 'BarleyCDS90_19605', 'BarleyCDS90_13788', 'BarleyCDS90_25906', 'BarleyCDS90_17853', ]; /** Request data for each of the gene clusters in clusterIds[]. * @return {Promise<Array>} promise yielding an array of the responses. */ function getKnownGenes() { const /** Option to send requests in series instead of parallel, to reduce peak * server load. */ inSeries = true, allP = inSeries ? mapInSeries(clusterIds, getGene) : Promise.all(clusterIds.map(getGene)); return allP; } /** Request data for a gene cluster, which includes the projections to * reference assemblies. * * @param {string} clusterId gene cluster id to request projections for. * @return {Promise<Object>} response data */ async function getGene(clusterId) { const fnName = 'getGene'; /** Usage with reduceInSeries() instead of mapInSeries() is recorded in comments in 0eced61. */ const key = 'sequence_clusters/' + clusterId, url = baseUrl + '/' + key; let response = await cache?.get(key); console.log(fnName, clusterId, !!response, response?.clusterMembers?.length); if (! response) { /** promise yields response */ response = await fetch(url) .then(response => response.json()) .then(response => (cache?.set(key, response), response)); } return response; } //------------------------------------------------------------------------------ /** Extract attributes from the response to /sequence_clusters/ * to be loaded in Pretzel as a Feature. * @param {object} gene result of /sequence_clusters/<clusterId> * @return {object} Feature to be pushed to store */ function geneToFeature(gene) { const fnName = 'geneToFeature', {clusterMembers, genes, samples, ...feature} = gene, proj = clusterMembers.findBy('sampleId', 'MOREX'); if (proj.genes.length !== 1) { console.log(fnName, proj.genes); } if (proj.genes.length) { const g = proj.genes[0].feature; feature.blockId = g.seqid; // blockNames.find(g.seqid); feature.value = [g.start, g.end]; feature._id = g.featureId; } feature.name = feature.descriptions.join(', '); // clusterId; return feature; } /** Get the chromosome names and sizes for referenceAssemblyName. * @param {string} referenceAssemblyName sampleId, e.g. 'Morex' * @return {Promise<Array<object>>} promise yielding array of /contig_length/ results */ function getChromosomes(referenceAssemblyName) { const key = 'configuration', requestURL = 'https://divbrowse.' + domainIpk + '/barley_pangenome_v2/' + key, chrsP = fromCacheOrFetch(requestURL, key).then(configuration => /** configuration contains [start,end] of 1 assembly, so if * referenceAssemblyName matches that then contig_length is not * required. */ Promise.all(configuration.chromosomes.map( chr => getcontig_length(referenceAssemblyName, chr.id) ))); return chrsP; } /** Get contig_length of contigId (chromosome) for referenceAssemblyName. * @param {string} referenceAssemblyName sampleId, e.g. 'Morex' * @param {string} contigId e.g. 'chr6H' * @return {Promise<object>} promise yielding a /contig_length/ result */ function getcontig_length(referenceAssemblyName, contigId) { const queryParameters = { sampleId : referenceAssemblyName, contigId, }, /** e.g. 'sampleId=Morex&contigId=chr6H' */ paramsText = Object.entries(queryParameters) .map(keyValuePair => keyValuePair.join('=')) .join('&'), key = 'contig_length?' + paramsText, requestURL = baseUrl + '/' + key, responseP = fromCacheOrFetch(requestURL, key); // {"sampleId":"Morex","contigId":"chr6H","length":561794515} return responseP; } //------------------------------------------------------------------------------ var ipkPanbarlexServer = /*#__PURE__*/Object.freeze({ __proto__: null, clusterIds: clusterIds, domainIpk: domainIpk, fromCacheOrFetch: fromCacheOrFetch, geneToFeature: geneToFeature, getChromosomes: getChromosomes, getGene: getGene, getKnownGenes: getKnownGenes, getcontig_length: getcontig_length, init: init }); var main_node = { vcfGenotype, apiRequest: apiRequest$1, cacheBrowser, cacheNode, ipkPanbarlexServer, promises, }; export { main_node as default };