UNPKG

@plantinformatics/vcf-genotype-brapi

Version:

Client and server functions to access genotype data from VCF via a custom web API and BrAPI

1,343 lines (1,190 loc) 96.2 kB
import { pick } from 'lodash/object.js'; import { chunk } from 'lodash/array.js'; import BrAPI from '@solgenomics/brapijs'; //------------------------------------------------------------------------------ /** intervalOutside and intervalMerge() are copied from pretzel/frontend/app/utils/interval-calcs.js 199fa9b4 */ //------------------------------------------------------------------------------ /* global d3 */ const intervalLimit = [d3.min, d3.max], /** Choose the outside values, as with d3.extent() * true if value a is outside the domain limit b. */ intervalOutside = [(a, b) => (a < b), (a, b) => (a > b), ]; /** Merge the given interval v into the domain, so that the result domain * contains the interval. * * Used within .reduce(), e.g. : * intervals.reduce(intervalMerge, []); * @param domain result of merging the intervals. * form is [min, max]. * @param v a single interval (feature value). can be either direction, i.e. doesn't assume f[0] < f[1] * @see intervalExtent() */ function intervalMerge(domain, v) { // let v = f.get('valueOrdered'); [0, 1].forEach(function (i) { /** the limit value of the interval v, in the direction i. * The result domain is ordered [min, max] whereas the input values v are * not; this translates the unordered value to the ordered result. */ let limit = intervalLimit[i](v); if ((domain[i] === undefined) || intervalOutside[i](limit, domain[i])) domain[i] = limit; }); return domain; } // import { get as Ember_get, set as Ember_set } from '@ember/object'; /* function Ember_get(object, fieldName) { return object[fieldName]; } function Ember_set(object, fieldName, value) { object[fieldName] = value; } */ let Ember_get, Ember_set; function setFrameworkFunctions$1(functions) { Ember_get = functions.Ember_get; Ember_set = functions.Ember_set; console.log('setFrameworkFunctions', 'Ember_get', Ember_get, 'Ember_set', Ember_set); } //------------------------------------------------------------------------------ const dLog$2 = console.debug; const trace = 1; //------------------------------------------------------------------------------ /** number of columns in the vcf output before the first sample column. */ const nColumnsBeforeSamples = 9; /** Copied from components/panel/manage-genotype.js */ const callRateSymbol = Symbol.for('callRate'); //------------------------------------------------------------------------------ /** map from vcf column name to Feature field name. */ const vcfColumn2Feature = { 'CHROM' : 'blockId', 'POS' : 'value', 'ID' : '_name', 'REF' : 'values.ref', 'ALT' : 'values.alt', }; //------------------------------------------------------------------------------ /** Map the column name '(null)' to 'INFO' * * Using --format %INFO outputs the whole of the INFO value; with the column * header name '(null)' * (when using e.g. %INFO/MAF, the column header name is the sub-field name, i.e. 'MAF') */ function columnNameINFOFix(columnNames) { columnNames = columnNames.map(name => name == '(null)' ? 'INFO' : name); return columnNames; } //------------------------------------------------------------------------------ /** * @return true if value is 0, 1, 2, or 0/0, 0/1, 1/0, 1/1, * @param value is defined, and is a string * It is assumed to be well-formed - only the first char is checked. */ function gtValueIsNumeric(value) { const char = value[0]; return ['0', '1', '2'].includes(char); } //------------------------------------------------------------------------------ /** Convert punctuation in datasetId to underscore, to sanitize it and enable * use of the result as a CSS class name. * * Used in genotype table column headers for the dataset colour rectangle (border-left). * This is in support of selecting dataset colour using datasetId instead of * hard-wiring it onto every element; this will support future plans for user * editing of dataset colour. */ function datasetId2Class(datasetId) { const className = datasetId.replaceAll(/[ -,.-/:-?\[-^`{-~]/g, '_'); return className; } // ----------------------------------------------------------------------------- /** If block is Germinate and block._meta.linkageGroupName is defined, insert * linkageGroupName into requestOptions, for use with URL path parameter /chromosome/ * by utils/data/germinate.js : callsetsCalls(), via * germinate-genotype.js : germinateGenotypeLookup() */ function addGerminateOptions(requestOptions, block) { if (block?.hasTag('Germinate') && block._meta?.linkageGroupName) { requestOptions.linkageGroupName = block._meta.linkageGroupName; } return requestOptions; } //------------------------------------------------------------------------------ /** Request featuresCounts (histograms) for all blocks (chromosomes) of the * given dataset. * @param auth service for sending API requests * @param datasetId * @param genotypeSNPFilters current user-controlled thresholds for SNP filters * controls.genotypeSNPFilters */ function getDatasetFeaturesCounts(auth, datasetId, genotypeSNPFilters) { const promise = auth.getDatasetFeaturesCounts(datasetId, genotypeSNPFilters); return promise; } //------------------------------------------------------------------------------ /** Lookup the genotype for the selected samples in the interval of the brushed block. * The server store to add the features to is derived from * vcfGenotypeLookupDataset() param blockV, from brushedOrViewedVCFBlocksVisible, * which matches vcfDatasetId : scope * @param auth auth service for ajax * @param samples to request, may be undefined or [] * Not used if requestSamplesAll * @param domainInteger [start,end] of interval, where start and end are integer values * domainInteger is not applicable if scope is undefined, so this parameter is * used in that case to carry {datasetVcfFiles, snpNames} from genotype-search. * @param requestOptions : * {requestFormat, requestSamplesAll, headerOnly}, * . requestFormat 'CATG' (%TGT) or 'Numerical' (%GT for 01) * . headerOnly true means -h (--header-only), otherwise -H (--no-header) * . linkageGroupName defined if isGerminate * * @param vcfDatasetId id of VCF dataset to lookup * @param scope chromosome, e.g. 1A, or chr1A - match %CHROM chromosome in .vcf.gz file * scope===undefined signifies to search across all scopes of the dataset; * in this case preArgs.region is passed undefined. * @param rowLimit */ function vcfGenotypeLookup(auth, samples, domainInteger, requestOptions, vcfDatasetId, scope, rowLimit) { const fnName = 'vcfGenotypeLookup', region = scope && (scope + ':' + domainInteger.join('-')), requestFormat = requestOptions.requestFormat, /** this dataset has tSNP in INFO field */ requestInfo = requestFormat && (vcfDatasetId === 'Triticum_aestivum_IWGSC_RefSeq_v1.0_vcf_data'), preArgs = Object.assign({ region, samples, requestInfo }, requestOptions); if (! scope) { const searchScope = domainInteger; // preArgs.datasetVcfFiles = searchScope.datasetVcfFiles; preArgs.snpNames = searchScope.snpNames; // actually genotype-search.selectedFeaturesText } /** Noted in vcfGenotypeLookup.bash : When requestOptions.isecDatasetIds is given, * -R is used, so -r is not given, i.e. preArgs.region is not used. */ // parent is .referenceDatasetName /* reply time is generally too quick to see the non-zero count, so to see the * count in operation use +2 here. */ auth.apiStatsCount(fnName, 1); /** Currently passing datasetId as param 'parent', until requirements evolve. * The VCF dataset directories are just a single level in $vcfDir; * it may be desirable to interpose a parent level, e.g. * vcf/ * Triticum_aestivum_IWGSC_RefSeq_v1.0/ * Triticum_aestivum_IWGSC_RefSeq_v1.0_vcf_data * It's not necessary because datasetId is unique. * (also the directory name could be e.g. lookupDatasetId ._meta.vcfFilename instead of the default datasetId). */ const textP = auth.vcfGenotypeLookup(vcfDatasetId, scope, preArgs, rowLimit, {} ) .then( (textObj) => { /* Result from Pretzel API endpoint is vcfGenotypeLookup is {text}; * result from Germinate is an array, recognised by vcf-feature.js : resultIsGerminate(). */ const text = textObj.text || textObj; auth.apiStatsCount(fnName, -1); return text; }); return textP; } //------------------------------------------------------------------------------ /* sample data : * ------------------------------------- * default output format : ##fileformat=VCFv4.1 ##FILTER=<ID=PASS,Description="All filters passed"> ##phasing=none ##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data"> ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype as 0/1"> #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT ExomeCapture-DAS5-003227 ExomeCapture-DAS5-002775 ExomeCapture-DAS5-002986 chr1A 327382120 scaffold22435_31704476 G A 100 PASS AC=3;AN=6;NS=616;MAF=0.418019;AC_Het=233;tSNP=.;pass=no;passRelaxed=no;selected=no GT:GL:DP 1/0:-7.65918,-2.74391e-08,-7.48455:6 1/0:-5.41078,-0.00397816,-2.1981:3 1/0:-4.50477,-1.46346e-05,-10.5809:6 * ------------------------------------- * requestFormat === 'CATG' : formatArgs = '-H -f "%ID\t%POS[\t%TGT]\n"' : # [1]ID [2]POS [3]ExomeCapture-DAS5-002978:GT [4]ExomeCapture-DAS5-003024:GT [5]ExomeCapture-DAS5-003047:GT [6]ExomeC scaffold38755_709316 709316 C/C C/T C/C C/C C/C ./. C/C C/C C/C C/T C/C C/C C/C C/C C/T C/C C/C C/C C/C C/T C/C C/C C * ------------------------------------- * requestFormat === 'Numerical' : formatArgs = '-H -f "%ID\t%POS[\t%GT]\n"' : # [1]ID [2]POS [3]ExomeCapture-DAS5-002978:GT [4]ExomeCapture-DAS5-003024:GT [5]ExomeCapture-DAS5-003047:GT [6]ExomeC scaffold38755_709316 709316 0/0 0/1 0/0 0/0 0/0 ./. 0/0 0/0 0/0 0/1 0/0 0/0 0/0 0/0 0/1 0/0 0/0 0/0 0/0 0/1 0/0 0/0 0 */ /** Parse VCF output and add features to block. * @return * { createdFeatures : array of created Features, * sampleNames : array of sample names, * resultBlocks : blocks of the result rows, in the case of [genotype-search], otherwise undefined * } * * @param block view dataset block for corresponding scope (chromosome) * In the case of [genotype-search] all scopes (chromosomes) of the dataset are searched, * and block is dataset * @param requestFormat 'CATG', 'Numerical', ... * @param replaceResults true means remove previous results for this block from block.features[] and selectedFeatures. * @param selectedService if defined then update selectedFeatures * @param text result from bcftools request */ function addFeaturesJson(block, requestFormat, replaceResults, selectedService, text) { /** true if block is given; otherwise determine block of each row, from CHROM column. */ const blockGiven = block.constructor.modelName === 'block'; /** If ! blockGiven, collate the blocks of the result rows. */ const resultBlocks = blockGiven ? undefined : new Map(); let dataset; if (! blockGiven) { if (block.constructor.modelName !== 'dataset') { dLog$2(fnName, blockGiven, block.constructor.modelName, block?.id); } else { dataset = block; block = undefined; } } const fnName = 'addFeaturesJson'; dLog$2(fnName, blockGiven, block?.id, block?.mapName, text.length); /** optional : add fileformat, FILTER, phasing, INFO, FORMAT to block meta * read #CHROM or '# [1]ID' column headers as feature field names * parse /^[^#]/ (chr) lines into features, add to block */ let createdFeatures = [], /** if the output is truncated by rowLimit aka nLines, the last line will not * have a trailing \n, and is discarded. If incomplete lines were not * discarded, values.length may be < 4, and feature.value may be undefined. */ lines = text.split('\n'), meta = {}, /** true if column is genotype format value. */ columnIsGT, columnNames, sampleNames; dLog$2(fnName, lines.length); if (text && text.length && (text.charAt(text.length-1) !== '\n')) { dLog$2(fnName, 'discarding incomplete last line', lines[lines.length-1]); lines.splice(-1, 1); } /* If block is not given, could remove its feature and selected features when it is seen in the results. * i.e. factor this to a function and call it when a new block is seen in the results. */ if (replaceResults && blockGiven) { if (selectedService) { const selectedFeatures = selectedService.selectedFeatures; // let mapChrName = Ember_get(block, 'brushName'); /* remove features of block from createdFeatures, i.e. matching Chromosome : mapChrName * If the user has renewed the axis brush, then selectedFeatures will not * contain any features from selectionFeature in previous result; in that * case this has no effect and none is required. * If the user send a new request with e.g. changed samples, then this would apply. * This can also be moved to selectedService. */ let blockSelectedFeatures = selectedFeatures.filter((f) => f.feature.get('blockId.id') === block.id); if (blockSelectedFeatures.length) { selectedFeatures.removeObjects(blockSelectedFeatures); } } if (block.get('features.length')) { // alternative : block.set('features', Ember_A()); block.features.removeAt(0, block.get('features.length')); } } if (selectedService) { selectedService.selectedFeaturesUpdateIndex(); } lines.forEach((l, lineNum) => { if (l.startsWith('##')) { const nameVal = l.match(/^##([^=]+)=(.*)/); if (nameVal.length > 2) { /** ##INFO and ##FORMAT are duplicated : could .match(/.*ID=(.+),(.+)>/) and use ID to store [2] in meta.{INFO,FORMAT}.<ID> * ##bcftools_{viewVersion,viewCommand} are also duplicated, the last pair generated this output so it is of more interest. */ meta[nameVal[1]] = nameVal[2]; } } else if (l.startsWith('#CHROM')) { // Column header row output by bcftools view columnNames = l.slice(1).split('\t'); columnNames = columnNameINFOFix(columnNames); sampleNames = columnNames.slice(nColumnsBeforeSamples); // from columnNames.slice(0,9), appended tSNP. const nonSampleFields = ['CHROM', 'POS', 'ID', 'REF', 'ALT', 'QUAL', 'FILTER', 'INFO', 'FORMAT', 'tSNP']; columnIsGT = columnNames.map(c => nonSampleFields.includes(c)); } else if (l.startsWith('# [1]') || l.startsWith('#[1]')) { // expect ID or CHROM // Column header row output by bcftools query // # [1]ID [2]POS [3]ExomeCapture-DAS5-002978:GT [4]ExomeCapture-DAS5-003024:GT [5]ExomeCapture-DAS5-003047:GT [6]ExomeC /* between versions 1.9 and 1.19 of bcftools, this changed '# [1]ID' to '#[1]ID' * 1.9 is current on centos (2024Jan). */ columnIsGT = l .split(/\t\[[0-9]+\]/) .map((name) => name.endsWith(':GT')); // trim off :GT, and split at 'tab[num]' columnNames = l .replaceAll(':GT', '') .split(/\t\[[0-9]+\]/); columnNames[0] = columnNames[0].replace(/^# ?\[1\]/, ''); columnNames = columnNameINFOFix(columnNames); // nColumnsBeforeSamples is 2 or 3 in this case : skip (CHROM,) ID, POS. const posColumn = columnNames.indexOf('POS'); sampleNames = columnNames.slice(posColumn + 1); // skip the (null) / INFO column name // (2 for REF, ALT) sampleNames.splice(2, 1); } else if (columnNames && l.length) { const values = l.split('\t'); let feature = values.reduce((f, value, i) => { const fieldName = columnNames[i]; let fieldNameF; /* vcfColumn2Feature[] provides Feature field name corresponding to the * column name, for the common columns; for other cases this is * overridden in the switch default. */ fieldNameF = vcfColumn2Feature[fieldName]; /** maybe handle samples differently, e.g. Feature.values.samples: [] * if (i > nColumnsBeforeSamples) { ... } else */ switch (fieldName) { case 'CHROM' : // Update required : now Block.name may be e.g. 'Chr1A' - can compare value with .name instead of trimming off /^chr/. let scope = value.replace(/^chr/, ''); if (! blockGiven) { block = dataset.blocks.findBy('name', value); if (! block) { dLog$2(fnName, i, value, 'not in', dataset.blocks.mapBy('name'), dataset.blocks.mapBy('scope')); } else { resultBlocks.has(block) || resultBlocks.set(block, []); value = block; } } else if (scope !== block.scope) { dLog$2(fnName, value, scope, block.scope, fieldName, i); value = null; } else { value = block; } break; case 'POS' : value = parseNumber(value); f['value_0'] = value; value = [ value ]; break; case 'ID' : case 'REF' : case 'ALT' : break; case 'INFO' : // (null) fieldNameF = 'values.' + fieldName; const infoEntries = value.split(';').map(kv => kv.split('=')); value = Object.fromEntries(infoEntries); if (value.MAF) { value.MAF = parseNumber(value.MAF); } if (value.tSNP) { if ((value.tSNP === '.') || (value.tSNP === '')) { delete value.tSNP; } else { value.tSNP = parseNumber(value.tSNP); } } parseNumberFields(value); break; default : fieldNameF = 'values.' + fieldName; value = parseNumber(value); } if (! fieldNameF) { dLog$2(fnName, fieldName, value, i); } else { /** match values. and meta. */ let prefix = fieldNameF.match(/^([^.]+)\..*/); prefix = prefix && prefix[1]; if (prefix) { /** replace A/A with A, 1/1 with 2 (i.e. x/y -> x+y). */ if (columnIsGT[i]) { let match = value.match(/^(\w)[|/](\w)$/); if (! match) ; else if (requestFormat === 'Numerical') { // +"0" + "0" is "00", so the + + is required. value = '' + (+match[1] + +match[2]); } else /* CATG */ if (match[1] === match[2]) { value = match[1]; } } if (! f[prefix]) { f[prefix] = {}; } if (fieldName.match(/\./)) { // Ember_set() interprets dot in field name, so use [] = f[prefix][fieldName] = value; } else { /* could also use Ember_set() when ! prefix. */ Ember_set(f, fieldNameF, value); } /* These will not be needed after changing references to e.g. * feature.values.MAF to feature.values.INFO.MAF, which is * equivalent and replaces it. */ if (value.MAF !== undefined) { f.values.MAF = value.MAF; } if (value.tSNP !== undefined) { f.values.tSNP = value.tSNP; } } else { f[fieldNameF] = value; } } return f; }, {}); // or EmberObject.create({value : []}); if (! blockGiven && block) { const featuresDomain = resultBlocks.get(block); intervalMerge(featuresDomain, feature.value); } /* CHROM column is present in default format, and omitted when -f is used * i.e. 'CATG', 'Numerical', so in this case set .blockId here. */ if (requestFormat) { feature.blockId = block; } /** based on similar : components/table-brushed.js : afterPaste() */ /** If it is required for vcfFeatures2MatrixView() to create displayData * without creating model:Feature in the Ember data store, the following * part can factor out as a separate function, returning an array of * native JS objects at this point, and passing those to the 2nd function * for creation of model:Feature */ if (feature.blockId && feature.value?.length && feature._name) { // trace level is e.g. 0,1,2,3; the number of rows displayed will be e.g. 0,2,4,8. if ((lineNum < (1 << trace))) { dLog$2(fnName, 'newFeature', feature); } // in this case feature.blockId is block let store = feature.blockId.get('store'); /** name is used in CSS selector, e.g. in utils/draw/axis.js : * axisFeatureCircles_selectOne{,InAxis}(), and . and : are not valid * for that use. */ const separator = '_'; if (feature._name === '.') { // Use chr:position:ref:alt, with separator in place of ':' feature._name = block.name + separator + feature.value[0]; ['ref', 'alt'].forEach(a => { const value = feature.values[a]; if (value) { feature._name += separator + value; } }); } /* Previously sanitized feature._name using datasetId2Class(), but it is * desired to retain the '.' which may appear in SNP names in VCF files. * Before use DOM element id / class, they are sanitized via * eltClassName() in axisFeatureCircles_eltId(). */ // .id is used by axisFeatureCircles_eltId(). // ._name may be also added to other blocks. /* append .value[0] to handle datasets with duplicate .name in 1 chr * This could be optional - done just when * (existingFeature.get('value.0') !== feature.value[0]) */ feature.id = block.id + '_' + feature._name + '_' + feature.value[0]; let existingFeature = store.peekRecord('feature', feature.id); if (existingFeature) { mergeFeatureValues(existingFeature, feature); feature = existingFeature; // this is included in createdFeatures, since it is a result from the current request. } else { // Replace Ember.Object() with models/feature. feature = store.createRecord('feature', feature); /** fb is a Proxy */ let fb = feature.get('blockId'); if (fb.then) { fb.then((b) => feature.set('blockId', b)); } } let mapChrName = Ember_get(feature, 'blockId.brushName'); if (selectedService) { selectedService.selectedFeaturesMergeFeature(mapChrName, feature); } /* vcfFeatures2MatrixView() uses createdFeatures to populate * displayData; it could be renamed to resultFeatures; the * feature is added to createdFeatures regardless of * existingFeature. */ createdFeatures.push(feature); // block may be undefined if CHROM is not in dataset.blocks[] // If existingFeature then addObject(feature) is a no-op. if (block && (replaceResults || ! existingFeature)) { block.features.addObject(feature); } } } }); /* in the case of [genotype-search], this is just the block of the last row. * - collate blocks and update each */ if (block) { blockEnsureFeatureCount(block); block.addFeaturePositions(createdFeatures); } if (! columnNames || ! sampleNames) { dLog$2(fnName, lines.length, text.length); } let result = {createdFeatures, sampleNames, resultBlocks}; return result; } //------------------------------------------------------------------------------ /** If block.featureCount is undefined, then it can be set from block.features.length. * This is used when features are added from genotype calls received from VCF or Germinate. * The features received are likely only a small part of the chromosome, so the * count is just a lower bound. Also it is likely that block.featureCount will * be defined from received blockFeaturesCounts; this is just a fall-back. * (possibly the first vcf result may arrive before blockFeaturesCounts if * blocks are viewed from URL) */ function blockEnsureFeatureCount(block) { const featuresLength = block.get('features.length'); if ((block.get('featureCount') ?? 0) < featuresLength) { block.set('featureCount', featuresLength); } } // ----------------------------------------------------------------------------- /** Merge feature.values into existingFeature.values */ function mergeFeatureValues(existingFeature, feature) { Object.entries(feature.values).forEach((e) => { if (existingFeature.values[e[0]] !== e[1]) { existingFeature.values[e[0]] = e[1]; } }); } //------------------------------------------------------------------------------ /** @return true if the genotypeLookup API result is from Germinate, * false if VCF, from bcftools */ function resultIsGerminate(data) { return Array.isArray(data); } /** Parse Germinate genotype calls result and add features to block. * @return * { createdFeatures : array of created Features, * sampleNames : array of sample names } * * @param block view dataset block for corresponding scope (chromosome) * @param requestFormat 'CATG', 'Numerical', ... * Unlike bcftools, Germinate probably sends results only in CATG (nucleotide) * format, which is the format it uses for upload and storage in HDF. * @param replaceResults true means remove previous results for this block from block.features[] and selectedFeatures. * @param selectedService if defined then update selectedFeatures * @param data result from Germinate callsets/<datasetDbId>/calls request * @param options { nSamples } */ function addFeaturesGerminate(block, requestFormat, replaceResults, selectedService, data, options) { const fnName = 'addFeaturesGerminate'; dLog$2(fnName, block.id, block.mapName, data.length); if (replaceResults) { dLog$2(fnName, 'replaceResults not implemented'); } const store = block.get('store'), columnNames = data.mapBy('callSetName').uniq(), sampleNames = columnNames, createdFeatures = data.map((call, i) => { const f = {values : {}}; /* Will lookup f.value in block.features interval tree, * and if found, merge with existing feature - factor out use of * mergeFeatureValues() in addFeaturesJson(). * Using createdFeatures.push(feature) instead of =data.map() */ // call.callSetDbId identifies sample name : callSetName // previously seeing in results : 'CnullT' - this is now fixed in java. const genotypeValue = call.genotypeValue; f.values[call.callSetName] = genotypeValue; let {markerName, positionText} = variantNameSplit(call.variantName, i < 5), position = +positionText; if (isNaN(position)) { // handle Oct19 format : dbid_mapid_ exome SNP name e.g. 6_20_6_scaffold77480, or? scaffold72661_85293-85293.0 markerName = positionText; } else { f.value_0 = position; f.value = [position]; } let feature = f; /** sampleID corresponds to callSetName, so exclude it from the feature name/id */ const [datasetID, sampleID] = call.callSetDbId.split('-'); /* .id is unique per genotype table row; for 1 feature per cell, append : + '_' + call.callSetName */ feature._name = markerName; feature.id = block.id + '_' + datasetID + '_' + markerName; feature = featureMergeOrCreate(store, block, feature); return feature; }); dLog$2(fnName, data.length, columnNames.length); featureUpdateSelectedAndBlock(selectedService, block, createdFeatures); let result = {createdFeatures, sampleNames}; return result; } function featureMergeOrCreate(store, block, feature) { /** used in addFeaturesGerminate() and addFeaturesBrapi() */ let existingFeature = store.peekRecord('feature', feature.id); if (existingFeature) { mergeFeatureValues(existingFeature, feature); feature = existingFeature; // this is included in createdFeatures, since it is a result from the current request. // as noted in addFeaturesJson(), can rename to resultFeatures. } else { // addFeaturesJson() uses feature.blockId - not sure if that is applicable feature.blockId = block; // Replace Ember.Object() with models/feature. feature = store.createRecord('feature', feature); const server = block.server; if (! feature.value) { brapiGetVariantPosition(server, feature); } } return feature; } function featureUpdateSelectedAndBlock(selectedService, block, createdFeatures) { /** used in addFeaturesGerminate() and addFeaturesBrapi() */ if (selectedService) { const feature = createdFeatures[0], mapChrName = feature?.get('blockId.brushName'); // selectedService = feature?.get('blockId.axis.selected'); selectedService.selectedFeaturesUpdateIndex(); createdFeatures.forEach( feature => selectedService.selectedFeaturesMergeFeature(mapChrName, feature)); } // createRecord() connects to block OK, so this is not required : // createdFeatures.forEach(feature => { // block.features.addObject(feature); blockEnsureFeatureCount(block); block.addFeaturePositions(createdFeatures); } //------------------------------------------------------------------------------ /** Split the variantName from either Germinate or Spark server into component elements. * @param variantName * @param traceUnmatched enable tracing of failure to parse variantName * @return {markerName, positionText} */ function variantNameSplit(variantName, traceUnmatched) { const fnName = 'variantNameSplit'; /** Germinate : * "variantName": "m2-23.0" * m2-23.0 => m2 is marker name and 23.0 is its position * Some of the marker names contain '-', e.g. 'scaffold77480-1_24233-24233.0' * so instead of split('-'), use .match(/(.+) ... ) which is greedy. * * Spark server : e.g. "variantName":"Chr1A_4188418" */ let match, wholeString, markerName, positionText; if ((match = variantName.match(/(.+)-(.+)/))) { // Germinate [wholeString, markerName, positionText] = match; } else if ((match = variantName.match(/(.+)_(.+)/))) { // Spark server let chrName; [wholeString, chrName, positionText] = match; // markerName is used to make feature .id and ._name unique markerName = positionText; } else if (traceUnmatched) { dLog$2(fnName, variantName, 'not matched'); } return {markerName, positionText}; } /** @return true if the genotypeLookup API result is from Brapi, * false if VCF, from bcftools, or Germinate. * Related : resultIsGerminate(). */ function resultIsBrapi(data) { return ! resultIsGerminate(data) && (typeof data === 'object'); } /** Parse Brapi genotype calls result and add features to block. * Params are the same as addFeaturesGerminate(), except for data. * @return * { createdFeatures : array of created Features, * sampleNames : array of sample names } * @param requestFormat 'CATG', 'Numerical', ... * Not used; BrAPI "GT" returns Numerical format. * refn : dataMatrixAbbreviations and dataMatrixNames in https://brapigenotyping21.docs.apiary.io/#/reference/allele-matrix/get-allelematrix * @param data result from BrAPI allelematrix request * {callSetDbIds, dataMatrices, variantDbIds, ... } */ function addFeaturesBrapi(block, requestFormat, replaceResults, selectedService, data, options) { const fnName = 'addFeaturesBrapi'; dLog$2(fnName, block.id, block.mapName, data.callSetDbIds?.length, data.variantDbIds?.length, data.dataMatrices?.length); if (replaceResults) { dLog$2(fnName, 'replaceResults not implemented'); } const store = block.get('store'), columnNames = data.callSetDbIds, sampleNames = columnNames, dataset = block.get('datasetId'), samples = dataset.get('samples'), samplesById = Object.fromEntries(samples.map(s => [s.sampleDbId, s])), createdFeatures = data.variantDbIds.map((variantDbId, variantIndex) => { const /** only .dataMatrices[0] is handled; [0] should be the data type requested * by .dataMatrixAbbreviations / dataMatrixNames, and this function will * request 'GT'. */ row = data.dataMatrices[0].dataMatrix[variantIndex], entries = data.callSetDbIds.map((callSetDbId, sampleIndex) => [samplesById[callSetDbId].sampleName, row[sampleIndex]]), values = Object.fromEntries(entries), f = {values}; let feature = f; feature._name = variantDbId; feature.id = variantDbId; feature = featureMergeOrCreate(store, block, feature); return feature; }); dLog$2(fnName, data.dataMatrices?.length, data.callSetDbIds?.length, columnNames.length); featureUpdateSelectedAndBlock(selectedService, block, createdFeatures); let result = {createdFeatures, sampleNames}; return result; } // export { brapiGetVariantPosition } function brapiGetVariantPosition(server, feature) { const variantDbId = feature.id, variantsP = server.variants([variantDbId]).then(data => { const /** data is response.result.data[] */ d = data[0], values = feature.values, info = d.additionalInfo; feature.value = [+d.start]; feature.value_0 = feature.value[0]; if (d.end !== undefined) { feature.value[1] = +d.end; } if (d.referenceBases) { values.ref = d.referenceBases; } if (d.alternateBases) { values.alt = d.alternateBases.join(','); } if (info !== undefined) { values.INFO = info; const valuesAdd = pick(info, ['MAF', 'tSNP']); Object.assign(values, valuesAdd); if (info.AC && info.AN && +info.AN) { feature[callRateSymbol] = +info.AC / +info.AN; } } // dLog(fnName, feature); }); return variantsP; } // ----------------------------------------------------------------------------- /** Convert numeric value from string to number. * If given value is not numeric, return the param. * Related : parseNumberFields(), parseBooleanFields(); * @param text * @return text unchanged if it is not numeric */ function parseNumber(text) { const number = Number(text), result = isNaN(number) ? text : number; return result; } /** Convert numeric string values in object to number. */ function parseNumberFields(obj) { /** Convert numeric strings to numbers. */ Object.entries(obj).forEach(([k, v]) => { const number = Number(v); if (! isNaN(number)) { obj[k] = number; } }); } var vcfFeature = /*#__PURE__*/Object.freeze({ __proto__: null, addFeaturesBrapi: addFeaturesBrapi, addFeaturesGerminate: addFeaturesGerminate, addFeaturesJson: addFeaturesJson, addGerminateOptions: addGerminateOptions, datasetId2Class: datasetId2Class, getDatasetFeaturesCounts: getDatasetFeaturesCounts, gtValueIsNumeric: gtValueIsNumeric, parseNumber: parseNumber, parseNumberFields: parseNumberFields, resultIsBrapi: resultIsBrapi, resultIsGerminate: resultIsGerminate, setFrameworkFunctions: setFrameworkFunctions$1, variantNameSplit: variantNameSplit, vcfGenotypeLookup: vcfGenotypeLookup }); // import { mapInSeries } from './promises'; // .js /** * @file genolink-passport.js * * Provides functions to access the Passport Data Retrieval API from Genesys. * This API endpoint allows users to retrieve passport data using either a list of * accession numbers, genotype IDs, or both. */ /** * @typedef {Object} PassportDataQuery * @property {Array<string>} [accessionNumbers] - An array of accession numbers. * @property {Array<string>} [genotypeIds] - An array of genotype IDs. * @property {Array<string>} [selectFields] - An array of Passport data field names; possible values are in passportFieldNames[]. */ /** * Fetch passport data from the Genesys API. * * Update : some of the given accessionNumbers or genotypeIds may not be present * in database. These will be omitted from the response. * To handle this : * * - "accessionNumber" should be added to selectFields if not present, to enable * the Genolink backend to map accessions to their corresponding genotype IDs. * It should be filtered out of the output if it was not present in * selectFields. * * - genotypeID is added to the output, and should be filtered out of the output * * - for genotypeIds (or accessionNumbers) which are not in the output, add an * object with an empty string for each of selectFields. * * There is an example of this in the header comment of the following function, * which performs those output filtering steps, @see fillInMissingData(). * * Looking at the 2 uses of this function : * - datasetGetPassportData() (manage-genotype.js) does not require filter * "accessionNumber" and "genotypeID" to be filtered out of the output. * - selectedSamplesGetPassport() (genotype-samples.js) will output those fields * if they are not filtered out, but in some cases including genotypeID may be * desired. * * @param {PassportDataQuery} query - Query parameters. * @param {Array<string>} [query.accessionNumbers] - An array of accession numbers. * @param {Array<string>} [query.genotypeIds] - An array of genotype IDs. * @param {Array<string>} [query.selectFields] - An array of Passport data field names. * If not provided, the default is to request all passport data, i.e. all fields. * @param {string} baseUrl - The base URL of the API (e.g., "https://genolink.plantinformatics.io"). * @returns {Promise<any>} - Resolves with the JSON response from the API. * Update : {Array<Promise<any>>} */ function getPassportData({accessionNumbers = [], genotypeIds = [], selectFields = [] }, baseUrl) { const accessionNumbersIsKey = accessionNumbers.length > 0, keyName = accessionNumbersIsKey ? 'accessionNumbers' : 'genotypeIds', keys = accessionNumbersIsKey ? accessionNumbers : genotypeIds, /** default page size of Genolink * By using (<=) 100, it is not necessary to use &p= &l= */ pageLength = 100, chunks = chunk(keys, pageLength), elt2PromiseFn = (keyschunk, i) => getPassportDataChunk({[keyName] : keyschunk, selectFields}, baseUrl), response = chunks.map(elt2PromiseFn); // or mapInSeries(keys, elt2PromiseFn) // caller e.g. : [].concat(responses); return response; } async function getPassportDataChunk({ accessionNumbers = [], genotypeIds = [], selectFields = [] }, baseUrl, page, pageLength) { let url = new URL("/api/genesys/accession/query", baseUrl); const /** selectFields === [] means all fields are selected. */ accessionNumberAdded = selectFields.length && !selectFields.includes("accessionNumber"), selectFieldsAN = accessionNumberAdded ? selectFields.concat("accessionNumber") : selectFields; // If any selectFields are defined, pass them as query params in the URL. if (selectFieldsAN.length) { url += '?select=' + selectFieldsAN.join(','); // Probably required iff the full list of keys is sent in each request. // + '&p=' + page + '&l=' + pageLength; } // Prepare the request payload. Only include keys that have values. const payload = {}; if (accessionNumbers.length > 0) { payload.accessionNumbers = accessionNumbers; } if (genotypeIds.length > 0) { payload.genotypeIds = genotypeIds; } const options = { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(payload) }; try { const response = await fetch(url.toString(), options); if (!response.ok) { throw new Error(`Error fetching passport data: ${response.status} ${response.statusText}`); } return await response.json() .then(data => fillInMissingData(accessionNumbers, genotypeIds, selectFields, selectFieldsAN, accessionNumberAdded, data)); } catch (error) { console.error("Error in getPassportData:", error.message, url, payload, error); throw error; } } /** Filter the output of getPassportData(), to fill in missing data. * * The first 3 parameters are the same as getPassportData(). * @param {Array<string>} accessionNumbers - An array of accession numbers. * @param {Array<string>} genotypeIds - An array of genotype IDs. * @param {Array<string>} selectFields - An array of Passport data field names. * If not provided, the default is to request all passport data, i.e. all fields. * * @param {Array<string>} selectFields - An array of Passport data field names. * This is the same as selectFields if selectFields includes or implies "accessionNumber"; * otherwise it is a copy of selectFields, with "accessionNumber" appended. * * @param {object} data response from API request. * * --- * Example : selectFields : [ "accessionNumber", "countryOfOrigin.codeNum" ] input : ```json { "genotypeIds": ["AGG240WHEA2-B00003-1-09", "AGG5259WHEA1-B00003-1-06", "AGG_missing_data_ID"] } ``` API response: ```json { "content": [ { "accessionNumber": "AGG 240 WHEA", "countryOfOrigin.codeNum": "380", "genotypeID": "AGG240WHEA2-B00003-1-09" }, { "accessionNumber": "AGG 5259 WHEA", "countryOfOrigin.codeNum": "364", "genotypeID": "AGG5259WHEA1-B00003-1-06" } ], } ``` desired output : ```json [ { "accessionNumber": "AGG 240 WHEA", "countryOfOrigin.codeNum": "380" }, { "accessionNumber": "AGG 5259 WHEA", "countryOfOrigin.codeNum": "364" }, { "accessionNumber": "", "countryOfOrigin.codeNum": "" }, ], */ function fillInMissingData(accessionNumbers, genotypeIds, selectFields, selectFieldsAN, accessionNumberAdded, data) { // set up test case // genotypeIds.push("missingDataKeyId"); const /** One of accessionNumbers and genotypeIds is [], and the other is an array * of ID strings. * Notice that the capitalisation of the query 'genotypeIds' is different to the * field name in the response 'genotypeID'. */ keyName = accessionNumbers?.length ? "accessionNumber" : "genotypeID", /** Convert the output data.content[] to a map to enable it to be converted to * a parallel array. */ map = data.content.reduce((m, d) => { const key = d[keyName], {genotypeID, accessionNumber, ...rest} = d; m[key] = d; return m; }, {}), keys = accessionNumbers?.length ? accessionNumbers : genotypeIds, /** If a key does not have a response, create an empty response with a field * for each of selectFields. */ parallel = keys.map(key => map[key] || Object.fromEntries(selectFields.map(s => [s, ''])) ); /* console.log( fnName, accessionNumbers, genotypeIds, selectFields, accessionNumberAdded, data, keyName, map, keys, parallel); */ /** From the original response only .content is used; if other parts are * needed then it can be copied with : * Object.assign(Object.assign({}, data), {content : parallel}) * Update : return just the array, to enable easier concat() of chunks; * currently no field other than .content is required. */ return parallel; // {content : }; } /** * Convenience function to query passport data using only accession numbers. * * @param {Array<string>} accessionNumbers - An array of accession numbers. * @param {string} baseUrl - The base URL of the API. * @returns {Promise<any>} - Resolves with the JSON response from the API. */ async function getPassportDataByAccessionNumbers(accessionNumbers, baseUrl) { return getPassportData({ accessionNumbers }, baseUrl); } /** * Convenience function to query passport data using only genotype IDs. * * @param {Array<string>} genotypeIds - An array of genotype IDs. * @param {string} baseUrl - The base URL of the API. * @returns {Promise<any>} - Resolves with the JSON response from the API. */ async function getPassportDataByGenotypeIds(genotypeIds, baseUrl) { return getPassportData({ genotypeIds }, baseUrl); } // Example usage: // (async () => { // const baseUrl = "https://genolink.plantinformatics.io"; // // Using genotypeIds only // try { // const resultByGenotype = await getPassportDataByGenotypeIds( // ["AGG240WHEA2-B00003-1-09", "AGG5259WHEA1-B00003-1-06"], // baseUrl // ); // console.log("Result by genotype IDs:", resultByGenotype); // } catch (err) { // console.error(err); // } // // Using accessionNumbers only // try { // const resultByAccession = await getPassportDataByAccessionNumbers( // ["AGG 1 WHEA", "AGG 480 WHEA"], // baseUrl // ); // console.log("Result by accession numbers:", resultByAccession); // } catch (err) { // console.error(err); // } // // Using both // try { // const resultByBoth = await getPassportData( // { // accessionNumbers: ["AGG 1 WHEA", "AGG 480 WHEA"], // genotypeIds: ["AGG240WHEA2-B00003-1-09"] // }, // baseUrl // ); // console.log("Result using both fields:", resultByBoth); // } catch (err) { // console.error(err); // } // })(); //------------------------------------------------------------------------------ const passportFieldNames = [ "accessionName", "accessionNumber", "acquisitionDate", "aliases", "countryOfOrigin.name", "crop.name", "cropName", "doi", "donorCode", "donorName", "genus", "instituteCode", "institute.fullName", "institute.id", "institute.owner.createdDate", "institute.owner.lastModifiedDate", "institute.owner.name", "lastModifiedDate", "sampStat", "taxonomy.grinTaxonomySpecies.id", "taxonomy.grinTaxonomySpecies.name", "taxonomy.grinTaxonomySpecies.name", "taxonomy.grinTaxonomySpecies.speciesName", "taxonomy.taxonName", "uuid", ]; //------------------------------------------------------------------------------ var genolinkPassport = /*#__PURE__*/Object.freeze({ __proto__: null, getPassportData: getPassportData, getPassportDataByAccessionNumbers: getPassportDataByAccessionNumbers, getPassportDataByGenotypeIds: getPassportDataByGenotypeIds, getPassportDataChunk: getPassportDataChunk, passportFieldNames: passportFieldNames }); //------------------------------------------------------------------------------ /** Base of web API endpoint URLs of IPK PanBARLEX */ const baseUrl$1 = 'https://panbarlex.ipk-gatersleben.de'; //------------------------------------------------------------------------------ /** * Represents an interval selected ("brushed") on a chromosome axis by the user. * @typedef {object} Interval * @property {string} genotype - The genotype to fetch data for. * @property {string} contig - The contig to fetch data for. * @property {number} start - The starting position of the data range. * @property {number} end - The ending position of the data range. */ /** * Fetches dot plot data from the web API. * * Explanation : * This function accepts parameters for `genotype`, `contig`, `start`, and `end`, allowing you to fetch different data sets based on user input or other dynamic values. * It constructs the request body based on these parameters and performs the fetch operation. * It checks if the response is OK and then parses the JSON data if it is. * * Error Handling : * The function includes error handling to log any issues with the fetch operation. * * The first version of this function was partially generated by ChatGPT based * on /dotplot network request recorded by Web Inspector `Copy as Fetch`. * * @param {Interval[]} intervals - An array of objects each containing the parameters * - {genotype, contig, start, end} * * @returns {Promise<Object>} The JSON response from the API containing the dot plot data. * @throws {Error} Will throw an error if the fetch operation fails or the response is not OK. */ async function fetchDotPlotData(intervals) { const url = baseUrl$1 + '/assemblies/dotplot', entries = intervals.slice(0, 2) .map((interval, i) => [["1st", "2nd"][i] , interval]), params = Object.fromEntries(entries), body = JSON.stringify(params); try { const response = await fetch(url, { method: "POST", // Method is required body: body, // Request body mode: "cors" // CORS mode }); if (!response.ok) { throw new Error(`HTTP error! status: ${response.status}`); } const data = await response.json(); return data; } catch (error) { console.error("Error fetching dot plot data:", error); throw error; // Rethrow so the caller can handle it } } //------------------------------------------------------------------------------ var ipkPanbarlexBrowser = /*#__PURE__*/Object.freeze({ __proto__: null, fetchDotPlotData: fetchDotPlotData }); //------------------------------------------------------------------------------ /** Reduce the array to a promise; map each array element to a promise using * elt2PromiseFn, in series (not in parallel). * @param array * @param elt2PromiseFn (previousResult, element) -> promise * @param starting_promise Start after this initial promise yields * Defaults to Promise.resolve() if undefined. */ function reduceInSeries(array, elt2PromiseFn, starting_promise) { /** based on ensureCounts() in lb4app/lb3app/common/utilities/block-features.js * and also https://stackoverflow.com/a/21372567 user663031 * @param previousP head of chain of promises * @param previous result value yielded by previousP */ const promise = array.reduce( (previousP, currentElement) => previousP.then( (previous) => elt2PromiseFn(previous, currentElement)), Promise.resolve()); return promise; } //-----------------------------------------------------------------------------