@gmod/jbrowse
Version:
JBrowse - client-side genome browser
793 lines (705 loc) • 30.2 kB
JavaScript
define(
[
'dojo/_base/declare',
'dojo/_base/array',
'dojo/data/util/simpleFetch',
'JBrowse/Util',
'JBrowse/Digest/Crc32'
],
function( declare, array, simpleFetch, Util, Crc32 ) {
var dojof = Util.dojof;
var Meta = declare( null,
/**
* @lends JBrowse.Store.TrackMetaData.prototype
*/
{
_noDataValue: '(no data)',
/**
* Data store for track metadata, supporting faceted
* (parameterized) searching. Keeps all of the track metadata,
* and the indexes thereof, in memory.
* @constructs
* @param args.trackConfigs {Array} array of track configuration
* @param args.indexFacets {Function|Array|String}
* @param args.onReady {Function}
* @param args.metadataStores {Array[dojox.data]}
*/
constructor: function( args ) {
// set up our facet name discrimination: what facets we will
// actually provide search on
var non_facet_attrs = ['conf'];
this._filterFacet = (function() {
var filter = args.indexFacets || function() {return true;};
// if we have a non-function filter, coerce to an array,
// then convert that array to a function
if( typeof filter == 'string' )
filter = [filter];
if( dojo.isArray( filter ) ) {
var oldfilter = filter;
filter = function( facetName) {
return dojo.some( oldfilter, function(fn) {
return facetName == fn.toLowerCase();
});
};
}
var ident_facets = this.getIdentityAttributes();
return function(facetName) {
return (
// always index ident facets
dojo.some( ident_facets, function(n) { return n == facetName; } )
// otherwise, must pass the user filter AND not be one of our explicitly-blocked attrs
|| filter(facetName)
&& ! dojo.some( non_facet_attrs, function(a) { return a == facetName;})
);
};
}).call(this);
// set up our onReady callbacks to fire once the data is
// loaded
if( ! dojo.isArray( args.onReady ) ){
this.onReadyFuncs = args.onReady ? [ args.onReady ] : [];
} else {
this.onReadyFuncs = dojo.clone(args.onReady);
}
// interpret the track configurations themselves as a metadata store
this._indexItems(
{
store: this,
items: dojo.map( args.trackConfigs,
dojo.hitch( this, '_trackConfigToItem' ) )
}
);
// fetch and index all the items from each of the stores
var stores_fetched_count = 0;
// filter out empty metadata store entries
args.metadataStores = dojo.filter( args.metadataStores, function(s) { return s; } );
if( ! args.metadataStores || ! args.metadataStores.length ) {
// if we don't actually have any stores besides the track
// confs, we're ready now.
this._finishLoad();
} else {
// index the track metadata from each of the stores
var storeFetchFinished = dojo.hitch( this, function() {
if( ++stores_fetched_count == args.metadataStores.length )
this._finishLoad();
});
dojo.forEach( args.metadataStores, function(store) {
store.fetch({
scope: this,
onComplete: dojo.hitch( this, function(items) {
// build our indexes
this._indexItems({ store: store, items: items, supplementalOnly: true });
// if this is the last store to be fetched, call
// our onReady callbacks
storeFetchFinished();
}),
onError: function(e) {
console.error(e, e.stack);
storeFetchFinished();
}
});
},this);
}
// listen for track-editing commands and update our track metadata accordingly
args.browser.subscribe( '/jbrowse/v1/c/tracks/new',
dojo.hitch( this, 'addTracks' ));
args.browser.subscribe( '/jbrowse/v1/c/tracks/replace', dojo.hitch( this, function( trackConfigs ) {
this.deleteTracks( trackConfigs, 'no events' );
this.addTracks( trackConfigs, 'no events' );
}));
args.browser.subscribe( '/jbrowse/v1/c/tracks/delete',
dojo.hitch( this, 'deleteTracks' ));
},
/**
* Convert a track config object into a data store item.
*/
_trackConfigToItem: function( conf ) {
var metarecord = dojo.clone( conf.metadata || {} );
metarecord.label = conf.label;
metarecord.key = conf.key;
metarecord.conf = conf;
metarecord['track type'] = conf.type;
if( conf.category )
metarecord.category = conf.category;
return metarecord;
},
// map of special comparator functions for certain metadata items
comparatorMap: {
// for category metadata, split on "/" and compare
"category": function(a,b) {
var acs = (a||'Uncategorized').split(/\s*\/\s*/);
var bcs = (b||'Uncategorized').split(/\s*\/\s*/);
var ac, bc, compresult;
while( (ac=acs.shift()) && (bc=bcs.shift()) ) {
if(( compresult = ac.localeCompare( bc ) ))
return compresult;
}
return 0;
}
},
addTracks: function( trackConfigs, suppressEvents ) {
if( trackConfigs.length ) {
// clear the query cache
delete this.previousQueryFingerprint;
delete this.previousResults;
}
array.forEach( trackConfigs, function( conf ) {
// insert in the indexes
this._indexItems({
store: this,
items: [ this._trackConfigToItem( conf ) ]
});
var name = conf.label;
var item = this.fetchItemByIdentity( name );
if( ! item )
console.error( 'failed to add '+name+' track to track metadata store', conf );
else if( ! suppressEvents )
this.onNew( item );
},this );
},
deleteTracks: function( trackConfigs, suppressEvents ) {
if( trackConfigs.length ) {
// clear the query cache
delete this.previousQueryFingerprint;
delete this.previousResults;
}
// we don't actually delete things, we just mark them as
// deleted and filter out deleted ones when returning results.
array.forEach( trackConfigs, function( conf ) {
var name = conf.label;
var item = this.fetchItemByIdentity( name );
if( item ) {
item.DELETED = true;
if( ! suppressEvents )
this.onDelete( item );
}
},this);
},
/**
* Set the store's state to be ready (i.e. loaded), and calls all
* our onReady callbacks.
* @private
*/
_finishLoad: function() {
// sort the facet names
this.facets.sort();
// calculate the average bucket size for each facet index
dojo.forEach( dojof.values( this.facetIndexes.byName ), function(bucket) {
bucket.avgBucketSize = bucket.itemCount / bucket.bucketCount;
});
// calculate the rank of the facets: make an array of
// facet names sorted by bucket size, descending
this.facetIndexes.facetRank = dojo.clone(this.facets).sort(dojo.hitch(this,function(a,b){
return this.facetIndexes.byName[a].avgBucketSize - this.facetIndexes.byName[b].avgBucketSize;
}));
// sort the facet indexes by ident, so that we can do our
// kind-of-efficient N-way merging when querying. also,
// uniqify them by identity.
var itemSortFunction = dojo.hitch( this, '_itemSortFunc' );
dojo.forEach( dojof.values( this.facetIndexes.byName ), function( facetIndex ) {
dojo.forEach( dojof.values( facetIndex.byValue ), function( valueIndex ) {
var uniqueItems = [];
var seen = {};
//NOTE: the first record loaded with a given identity always wins
array.forEach( valueIndex.items, function( item ) {
var id = this.getIdentity( item );
if( ! seen[id] ) {
seen[id] = true;
uniqueItems.push( item );
}
},this);
valueIndex.items = uniqueItems.sort( itemSortFunction );
},this);
},this);
this.ready = true;
this._onReady();
},
_itemSortFunc: function(a,b) {
var ai = this.getIdentity(a),
bi = this.getIdentity(b);
return ai == bi ? 0 :
ai > bi ? 1 :
ai < bi ? -1 : 0;
},
_indexItems: function( args ) {
// get our (filtered) list of facets we will index for
var store = args.store,
items = args.items;
var storeAttributes = {};
// convert the items to a uniform format
items = dojo.map( items, function( item ) {
var itemattrs = store.getAttributes(item);
//convert the item into a uniform data format of plain objects
var newitem = {};
dojo.forEach( itemattrs, function(attr) {
// stores sometimes emit undef attributes >:-{
if( ! attr )
return;
var lcattr = attr.toLowerCase();
storeAttributes[lcattr] = true;
newitem[lcattr] = store.getValue(item,attr);
});
return newitem;
},
this
);
// merge them with any existing records, filtering out ones
// that should be ignored if we were passed
// 'supplementalOnly', and update the identity index
this.identIndex = this.identIndex || {};
items = (function() {
var seenInThisStore = {};
return dojo.map( items, function(item) {
// merge the new item attributes with any existing
// record for this item
var ident = this.getIdentity(item);
var existingItem = this.identIndex[ ident ];
if( existingItem && existingItem.DELETED )
delete existingItem.DELETED;
// skip this item if we have already
// seen it from this store, or if we
// are supplementalOnly and it
// does not already exist
if( seenInThisStore[ident] || args.supplementalOnly && !existingItem) {
return null;
}
seenInThisStore[ident] = true;
return this.identIndex[ ident ] = dojo.mixin( existingItem || {}, item );
},
this
);
}).call(this);
// filter out nulls
items = dojo.filter( items, function(i) { return i;});
// update our facet list to include any new attrs these
// items have
var store_facets = dojof.keys( storeAttributes );
var new_facets = this._addFacets( dojof.keys( storeAttributes ) );
var use_facets = array.filter( this.facets, function(f) { return f in storeAttributes; } );
// initialize indexes for any new facets
this.facetIndexes = this.facetIndexes || { itemCount: 0, bucketCount: 0, byName: {} };
dojo.forEach( new_facets, function(facet) {
if( ! this.facetIndexes.byName[facet] ) {
this.facetIndexes.bucketCount++;
this.facetIndexes.byName[facet] = { itemCount: 0, bucketCount: 0, byValue: {} };
}
}, this);
// now update the indexes with the new data
if( use_facets.length ) {
var gotDataForItem = {};
dojo.forEach( use_facets, function(f){ gotDataForItem[f] = {};});
dojo.forEach( items, function( item ) {
this.facetIndexes.itemCount++;
dojo.forEach( use_facets, function( facet ) {
var value = this.getValue( item, facet, undefined );
if( typeof value == 'undefined' )
return;
gotDataForItem[facet][this.getIdentity(item)] = 1;
this._indexItem( facet, value, item );
},this);
}, this);
// index the items that do not have data for this facet
dojo.forEach( use_facets, function(facet) {
dojo.forEach( dojof.values( this.identIndex ), function(item) {
if( ! gotDataForItem[facet][this.getIdentity(item)] ) {
this._indexItem( facet, this._noDataValue, item );
}
},this);
},this);
}
},
/**
* Add an item to the indexes for the given facet name and value.
* @private
*/
_indexItem: function( facet, value, item ) {
var facetValues = this.facetIndexes.byName[facet];
var bucket = facetValues.byValue[value];
if( !bucket ) {
bucket = facetValues.byValue[value] = { itemCount: 0, items: [] };
facetValues.bucketCount++;
}
bucket.itemCount++;
facetValues.itemCount++;
bucket.items.push(item);
},
/**
* Given an array of string facet names, add records for them,
* initializing the necessary data structures.
* @private
* @returns {Array[String]} facet names that did not already exist
*/
_addFacets: function( facetNames ) {
var old_facets = this.facets || [];
var seen = {};
this.facets = dojo.filter(
old_facets.concat( facetNames ),
function(facetName) {
var take = this._filterFacet(facetName) && !seen[facetName];
seen[facetName] = true;
return take;
},
this
);
return this.facets.slice( old_facets.length );
},
/**
* Get the number of items that matched the most recent query.
* @returns {Number} the item count, or undefined if there has not
* been any query so far.
*/
getCount: function() {
return this._fetchCount;
},
/**
* @param facetName {String} facet name
* @returns {Object}
*/
getFacetCounts: function( facetName ) {
var context = this._fetchFacetCounts[ facetName ] || this._fetchFacetCounts[ '__other__' ];
return context ? context[facetName] : undefined;
},
/**
* Get an array of the text names of the facets that are defined
* in this track metadata.
* @param callback {Function} called as callback( [facet,facet,...] )
*/
getFacetNames: function( callback ) {
return this.facets;
},
/**
* Get an Array of the distinct values for a given facet name.
* @param facetName {String} the facet name
* @returns {Array} distinct values for that facet
*/
getFacetValues: function( facetName ) {
var index = this.facetIndexes.byName[facetName];
if( !index )
return [];
return dojof.keys( index.byValue );
},
/**
* Get statistics about the facet with the given name.
* @returns {Object} as: <code>{ itemCount: ##, bucketCount: ##, avgBucketSize: ## }</code>
*/
getFacetStats: function( facetName ) {
var index = this.facetIndexes.byName[facetName];
if( !index ) return {};
var stats = {};
dojo.forEach( ['itemCount','bucketCount','avgBucketSize'],
function(attr) { stats[attr] = index[attr]; }
);
return stats;
},
// dojo.data.api.Read support
getValue: function( i, attr, defaultValue ) {
var v = i[attr];
return typeof v == 'undefined' ? defaultValue : v;
},
getValues: function( i, attr ) {
var a = [ i[attr] ];
return typeof a[0] == 'undefined' ? [] : a;
},
getAttributes: function(item) {
return dojof.keys( item );
},
hasAttribute: function(item,attr) {
return item.hasOwnProperty(attr);
},
containsValue: function(item, attribute, value) {
return item[attribute] == value;
},
isItem: function(item) {
return typeof item == 'object' && typeof item.label == 'string';
},
isItemLoaded: function() {
return this.ready;
},
loadItem: function( args ) {
},
getItem: function( label ) {
if( this.ready )
return this.identIndex[label];
else
return null;
},
// used by the dojo.data.util.simpleFetch mixin to implement fetch()
_fetchItems: function( keywordArgs, findCallback, errorCallback ) {
if( ! this.ready ) {
this.onReady( dojo.hitch( this, '_fetchItems', keywordArgs, findCallback, errorCallback ) );
return;
}
var query = dojo.clone( keywordArgs.query || {} );
// coerce query arguments to arrays if they are not already arrays
dojo.forEach( dojof.keys( query ), function(qattr) {
if( ! dojo.isArray( query[qattr] ) ) {
query[qattr] = [ query[qattr] ];
}
},this);
var results;
var queryFingerprint = Crc32.objectFingerprint( query );
if( queryFingerprint == this.previousQueryFingerprint ) {
results = this.previousResults;
} else {
this.previousQueryFingerprint = queryFingerprint;
this.previousResults = results = this._doQuery( query );
}
// and finally, hand them to the finding callback
findCallback(results,keywordArgs);
this.onFetchSuccess();
},
/**
* @private
*/
_doQuery: function( /**Object*/ query ) {
var textFilter = this._compileTextFilter( query.text );
delete query.text;
// algorithm pseudocode:
//
// * for each individual facet, get a set of tracks that
// matches its selected values. sort each set by the
// track's unique identifier.
// * while still need to go through all the items in the filtered sets:
// - if all the facets have the same track first in their sorted set:
// add it to the core result set.
// count it in the global counts
// - if all the facets *but one* have the same track first:
// this track will need to be counted in the
// 'leave-out' counts for the odd facet out. count it.
// - shift the lowest-labeled track off of whatever facets have it at the front
var results = []; // array of items that completely match the query
// construct the filtered sets (arrays of items) for each of
// our search criteria
var filteredSets = [];
if( textFilter ) {
filteredSets.push(
this._filterDeleted(
array.filter( dojof.values( this.identIndex ), textFilter )
).sort( dojo.hitch(this,'_itemSortFunc') )
);
filteredSets[0].facetName = 'Contains text';
}
filteredSets.push.apply( filteredSets,
dojo.map( dojof.keys( query ), function( facetName ) {
var values = query[facetName];
var items = [];
if( ! this.facetIndexes.byName[facetName] ) {
console.error( "No facet defined with name '"+facetName+"'." );
throw "No facet defined with name '"+facetName+"', faceted search failed.";
}
dojo.forEach( values, function(value) {
var idx = this.facetIndexes.byName[facetName].byValue[value] || {};
items.push.apply( items, this._filterDeleted( idx.items || [] ) );
},this);
items.facetName = facetName;
items.sort( dojo.hitch( this, '_itemSortFunc' ));
return items;
},this)
);
dojo.forEach( filteredSets, function(s) {
s.myOffset = 0;
s.topItem = function() { return this[this.myOffset]; };
s.shift = function() { this.myOffset++; };
});
// init counts
var facetMatchCounts = {};
if( ! filteredSets.length ) {
results = this._filterDeleted( dojof.values( this.identIndex ) );
} else {
// calculate how many item records total we need to go through
var leftToProcess = 0;
dojo.forEach( filteredSets,
function(s) { leftToProcess += s.length;} );
// do a sort of N-way merge of the filtered sets
while( leftToProcess ) {
// look at the top of each of our sets, seeing what items
// we have there. group the sets by the identity of their
// topmost item.
var setsByTopIdent = {}, uniqueIdents = [], ident, item;
dojo.forEach(filteredSets, function(set,i) {
item = set.topItem();
ident = item ? this.getIdentity( item ) : '(at end of set)';
if( setsByTopIdent[ ident ] ) {
setsByTopIdent[ ident ].push( set );
} else {
setsByTopIdent[ ident ] = [set];
uniqueIdents.push( ident );
}
},this);
if( uniqueIdents.length == 1 ) {
// each of our matched sets has the same item at the
// top. this means it is part of the core result set.
results.push( item );
} else {
// ident we are operating on is always the
// lexically-first one that is not the end-of-set
// marker
uniqueIdents.sort();
var leftOutIndex;
if( uniqueIdents[0] == '(at end of set)' ) {
ident = uniqueIdents[1];
leftOutIndex = 0;
} else {
ident = uniqueIdents[0];
leftOutIndex = 1;
}
ident = uniqueIdents[0] == '(at end of set)' ? uniqueIdents[1] : uniqueIdents[0];
if( uniqueIdents.length == 2
&& setsByTopIdent[ ident ].length == filteredSets.length - 1 ) {
// all of the matched sets except one has the same
// item on top, and it is the lowest-labeled item
var leftOutSet = setsByTopIdent[ uniqueIdents[ leftOutIndex ] ][0];
this._countItem( facetMatchCounts, setsByTopIdent[ident][0].topItem(), leftOutSet.facetName );
}
}
dojo.forEach( setsByTopIdent[ ident ], function(s) { s.shift(); leftToProcess--; });
}
}
// each of the leave-one-out count sets needs to also have the
// core result set counted in it, and also make a counting set
// for the core result set (used by __other__ facets not
// involved in the query)
dojo.forEach( dojof.keys(facetMatchCounts).concat( ['__other__'] ), function(category) {
dojo.forEach( results, function(item) {
this._countItem( facetMatchCounts, item, category);
},this);
},this);
// in the case of just one filtered set, the 'leave-one-out'
// count for it is actually the count of all results, so we
// need to make a special little count of that attribute for
// the global result set.
if( filteredSets.length == 1 ) {
dojo.forEach( dojof.values( this.identIndex ), function(item) {
this._countItem( facetMatchCounts, item, filteredSets[0].facetName );
},this);
}
this._fetchFacetCounts = facetMatchCounts;
this._fetchCount = results.length;
return results;
},
_countItem: function( facetMatchCounts, item, facetName ) {
var facetEntry = facetMatchCounts[facetName];
if( !facetEntry ) facetEntry = facetMatchCounts[facetName] = {};
var facets = facetName == '__other__' ? this.facets : [facetName];
dojo.forEach( facets, function(attrName) {
var value = this.getValue( item, attrName, this._noDataValue );
var attrEntry = facetEntry[attrName];
if( !attrEntry ) {
attrEntry = facetEntry[attrName] = {};
attrEntry[value] = 0;
}
attrEntry[value] = ( attrEntry[value] || 0 ) + 1;
},this);
},
onReady: function( scope, func ) {
scope = scope || dojo.global;
func = dojo.hitch( scope, func );
if( ! this.ready ) {
this.onReadyFuncs.push( func );
return;
} else {
func();
}
},
/**
* Event hook called once when the store is initialized and has
* an initial set of data loaded.
*/
_onReady: function() {
dojo.forEach( this.onReadyFuncs || [], function(func) {
func.call();
});
},
/**
* Event hook called after a fetch has been successfully completed
* on this store.
*/
onFetchSuccess: function() {
},
/**
* Event hook called when there are new items in the store.
*/
onNew: function( item ) {
},
/**
* Event hook called when something is deleted from the store.
*/
onDelete: function( item ) {
},
/**
* Event hook called when one or more items in the store have changed their values.
*/
onSet: function( item, attribute, oldvalue, newvalue ) {
},
_filterDeleted: function( items ) {
return array.filter( items, function(i) {
return ! i.DELETED;
});
},
/**
* Compile a text search string into a function that tests whether
* a given piece of text matches that search string.
* @private
*/
_compileTextFilter: function( textString ) {
if( textString === undefined )
return null;
// parse out words and quoted words, and convert each into a regexp
var rQuotedWord = /\s*["']([^"']+)["']\s*/g;
var rWord = /(\S+)/g;
var parseWord = function() {
var word = rQuotedWord.exec( textString ) || rWord.exec( textString );
if( word ) {
word = word[1];
var lastIndex = Math.max( rQuotedWord.lastIndex, rWord.lastIndex );
rWord.lastIndex = rQuotedWord.lastIndex = lastIndex;
}
return word;
};
var wordREs = [];
var currentWord;
while( (currentWord = parseWord()) ) {
// escape regex control chars, and convert glob-like chars to
// their regex equivalents
currentWord = dojo.regexp.escapeString( currentWord, '*?' )
.replace(/\*/g,'.+')
.replace(/ /g,'\\s+')
.replace(/\?/g,'.');
wordREs.push( new RegExp(currentWord,'i') );
}
// return a function that takes on item and returns true if it
// matches the text filter
return dojo.hitch(this, function(item) {
return dojo.some( this.facets, function(facetName) {
var text = this.getValue( item, facetName );
return array.every( wordREs, function(re) { return re.test(text); } );
},this);
});
},
getFeatures: function() {
return {
'dojo.data.api.Read': true,
'dojo.data.api.Identity': true,
'dojo.data.api.Notification': true
};
},
close: function() {},
getLabel: function(i) {
return this.getValue(i,'key',undefined);
},
getLabelAttributes: function(i) {
return ['key'];
},
// dojo.data.api.Identity support
getIdentityAttributes: function() {
return ['label'];
},
getIdentity: function(i) {
return this.getValue(i, 'label', undefined);
},
fetchItemByIdentity: function(id) {
return this.identIndex[id];
}
});
dojo.extend( Meta, simpleFetch );
return Meta;
});