UNPKG

labo-components

Version:
601 lines (509 loc) 19.4 kB
import MetadataSchemaUtil from '../../util/MetadataSchemaUtil'; import ElasticsearchDataUtil from '../../util/ElasticsearchDataUtil'; import RegexUtil from '../../util/RegexUtil'; /* TODO: - nadenken hoe automatisch facets te genereren - apart component maken voor zoeken in fragmenten - component met audio player - play-out van een fragment goed integreren (b.v. vanuit woordenwolk naar player) - make sure the config 'knows' which kind of view it should generate data for */ //base class for each collection configuration export default class CollectionConfig { //requires the output of [SEARCH_API]/api/v1/collections/show_stats?collectionId=[ID] constructor(clientId, user, collectionId, collectionMetadata) { this.clientId = clientId; this.user = user; this.collectionId = collectionId; //based on the ES index/alias name this.collectionMetadata = collectionMetadata; //contains ES stats, field desriptions & collection_metadata (CKAN) this.docType = null; this.stringFields = []; this.textFields = []; this.dateFields = []; this.nonAnalyzedFields = []; this.keywordFields = []; this.longFields = []; this.doubleFields = []; this.nestedFields = []; if(collectionMetadata && collectionMetadata.collection_statistics) { let temp = null; //extract the preferred doc type if(collectionMetadata.collection_statistics) { collectionMetadata.collection_statistics.forEach(dt => { if(temp == null) { temp = dt; } else if(dt && temp.doc_count < dt.doc_count) { temp = dt; } }); this.docType = temp.doc_type; } //extract the field info if(temp && temp.fields) { //merged in getStringFields(). ES5 uses 'text' and older versions only use 'string' this.stringFields = temp.fields['string']; this.textFields = temp.fields['text']; //merged in getNonAnalyzedFields() this.nonAnalyzedFields = temp.fields['not_analyzed']; if(temp.fields['keyword']){ this.keywordFields = temp.fields['keyword']; } this.dateFields = temp.fields['date']; this.longFields = temp.fields['long']; this.doubleFields = temp.fields['double']; this.nestedFields = temp.fields['nested']; } } } getCollectionMediaTypes() { return []; } //Important for generating replacable PIDs for playable content; should override! getNamespace() { return 'http://' + this.collectionId.replace(/-/g, '.'); + '.clariah.nl#'; } //TODO see if this is necessary or we just directly access the global variable getCollectionId() { return this.collectionId; } //checks if there is a proper title / name from CKAN / workspace API, otherwise just returns the ID getCollectionTitle() { if(this.collectionMetadata) { return this.collectionMetadata.title; } return null; } getCollectionMetadata() { return this.collectionMetadata; } //TODO this will become a much more important function later on //FIXME make the difference between CKAN / workspace API versions of the collection metadata less weird getSearchIndex() { let searchIndex = this.collectionId; if(this.collectionMetadata) { searchIndex = this.collectionMetadata.index; if(!searchIndex && this.collectionMetadata.user && this.collectionMetadata.id) { //TODO check this on the server (user & id) searchIndex = 'pc__' + this.clientId + '__' + this.collectionMetadata.user + '__' + this.collectionMetadata.id; } } return searchIndex } getImageBaseUrl() { return null } getVideoBaseUrl() { return null } getAudioBaseUrl() { return null } requiresPlayoutAccess() { return false } //by default anonymous users are able to do anything with the collections that are granted for them //only in rare cases parts of a collection are closed off, therefore the "prohibition approach" is chosen getAnonymousUserRestrictions() { return { prohibitThumbnails : false, // show thumbnails in search results prohibitPlayout : false // playing content } } hideOffAirContent() { return false } getThumbnailContentServerId() { return null; } getDocumentType() { return this.docType; } //CURRENT this gets the layers from additional indices with the __[LAYER NAME] suffix getCollectionIndices() { const indices = [this.getCollectionId()]; const stats = this.getCollectionMetadata(); if(stats && stats['collection_annotation_indices']) { return indices.concat( stats['collection_annotation_indices'].map((i) => { return i.collection; }) ); } return indices; } //used whenever you want to search by default in nested documents (only when no field categories/clusters are selected) //should return an object like this: {path : 'layer__asr', fields : ['layer__asr.words']} getNestedSearchLayers() { return null; } //the nested path used for forming the ES query in the search API getFragmentPath() { return null } //which of the fragment fields are text fields and suitable for match queries? getFragmentTextFields() { return null } //by default enable & disable when a fragment path is set includeMediaObjects(searchTerm=null) { //always include media objects when there is no search term if(!searchTerm || searchTerm == '') return true; //otherwise look if there is a fragment path, if so EXCLUDE media objects //(so only documents of the doctype media_fragment are returned) return this.getFragmentPath() == null ? true : false } //by default never return the children of nested documents includeFragmentChildren() { return false } getStringFields() { let tmp = []; if(this.stringFields) { tmp = tmp.concat(this.stringFields); } if(this.textFields) { tmp = tmp.concat(this.textFields); } return tmp.length > 0 ? tmp : null } getTextFields() { return this.textFields; } getDateFields() { return this.dateFields; } getNonAnalyzedFields() { let tmp = []; if(this.nonAnalyzedFields) { tmp = tmp.concat(this.nonAnalyzedFields); } if(this.keywordFields) { tmp = tmp.concat(this.keywordFields); } return tmp.length > 0 ? tmp : null; } getKeywordFields() { return this.keywordFields; } //checks if the field is a keyword field and makes sure to return the matched keyword field name getMatchedKeywordField(fieldName) { const kwMatch = this.getKeywordFields().find((kw) => kw.indexOf(fieldName) !== -1); if(kwMatch) { return fieldName === kwMatch ? fieldName : fieldName + '.keyword'; } return null; } //used by the collection analyzer (field analysis pull down) getAllFields() { let tmp = []; if(this.dateFields) { this.dateFields.forEach(f => { tmp.push({id : f, type : 'date', keywordMultiField : false, title : this.toPrettyFieldName(f)}) }); } if(this.stringFields) { this.stringFields.forEach(f => { tmp.push({id : f, type : 'text', keywordMultiField : false, title : this.toPrettyFieldName(f)}) }); } if(this.textFields) { this.textFields.forEach(f => { tmp.push({id : f, type : 'text', keywordMultiField : false, title : this.toPrettyFieldName(f)}) }); } if(this.longFields) { this.longFields.forEach(f => { tmp.push({id : f, type : 'numeric', keywordMultiField : false, title : this.toPrettyFieldName(f)}) }); } if(this.doubleFields) { this.doubleFields.forEach(f => { tmp.push({id : f, type : 'numeric', keywordMultiField : false, title : this.toPrettyFieldName(f)}) }); } //mark all the nested fields tmp.forEach(f => { if(this.nestedFields && this.nestedFields.indexOf(f.id) !== -1) { f.nested = true; } }); //mark all the fields that are a multi-field keyword field if(this.keywordFields) { tmp.forEach(f => { if(this.keywordFields.indexOf(f.id + '.keyword') !== -1) { f.keywordMultiField = true; } }); } if(this.nonAnalyzedFields) { tmp.forEach(f => { if(this.nonAnalyzedFields.indexOf(f.id + '.raw') !== -1) { f.keywordMultiField = true; } }); } //finally add all the pure keyword fields if(this.keywordFields) { this.keywordFields.forEach(f => { if(f.indexOf('.keyword') === -1) { tmp.push({id : f, type : 'keyword', keywordMultiField : false, title : this.toPrettyFieldName(f)}) } }); } return tmp.length > 0 ? tmp : null; } usesLayeredModel() { return false; } //simply return the first date field by default (this function is used by QueryBuilder) getPreferredDateField() { const dfs = this.getDateFields(); if(dfs && dfs.length > 0) { return dfs[0]; } return null; } //should return an array of formatted date string getFormattedDates(date) { if(date && typeof(date) === 'string') { if(date.match(/^\d/)) { return [date.replace(/[^0-9-]/g, '').substring(0, 10)]; } else { return [date.replace(/[^0-9-]/g, '')]; } } else if(date && typeof(date) === 'object') { return [date]; } return null; } //if the data has translations within its metadata getPreferredLanguage() { return null; } //Try to generate at least some date facets to be able to draw a timeline //TODO the queryDataFormat can be detected from a retrieved date (implement this somewhere) getFacets() { return ElasticsearchDataUtil.extractFacetsFromStats(this.dateFields, this.stringFields); } //returns the list of facets for the AggregationCreator //(allowHeavyFacets should be used in the sub classes to filter out collection-specific facets that can freeze up the UI, because of too many results) getFacetSelectionList(allowHeavyFacets) { let fields = this.getKeywordFields(); if(!fields) { fields = this.getNonAnalyzedFields(); } if(fields) { return fields.map(f => { return { value : f, label : this.toPrettyFieldName(f) } }).sort((a,b) => { return a.label > b.label ? 1 : a.label < b.label ? -1 : 0; }) } return null; } //enables the user to narrow down full-text search to certain parts of the top-level metadata (e.g. search only in titles) getMetadataFieldCategories() { return null; } /* Function for (LD) entity lookup functions: expects an object like this: { person : { //currently only person entities are supported autocompleteConfig : { 'fieldClusters' : ["production-person", "guest-person", "discussed-person"], //for which field category clusters autocomplete is offered 'autocompleteVocabulary' : 'GTAA', //vocabulary to use for auto-complete 'autocompleteParams' : {'ConceptScheme' : 'http://data.beeldengeluid.nl/gtaa/Persoonsnamen'} //any other parameters, in this case the concept scheme within the vocabulary is specified }, fetchEntityDetails : (entityUri, callback) => { SearchAPI.grlc( 'BENG-PERSON-LD', 'getPersonDataForGTAAId', {gtaa: entityUri}, //TODO make sure the calling code abstracts the gtaa ID to entity ID NISV_GTAA_WIKIDATA.formatPersonDetails, callback ); }, fetchEntitiesInResource : (resourceUri, callback) => { SearchAPI.grlc( 'BENG-LD', 'getPersonsAndRolesForProgramAndParts', {resource: resourceUri}, //TODO make sure the calling code abstracts the gtaa ID to entity ID NISV_GTAA_WIKIDATA.formatPersonsInResource, callback ); } } } */ getEntityConfig() { return null; } //gets the resource's URI in the linked data store getResourceUri(resourceId) { return ""; } //TODO also fetch some data if there is no structured data //TODO check if this super function is actually used or always overridden getItemDetailData(result, currentDateField=null) { //first flatten the pure ES response let mappedResource = this.formatSearchResult(result); if(!mappedResource) { return null; } //then fetch any data that can be fetched from known schemas (DIDL, DC, ...) const structuredData = MetadataSchemaUtil.extractStructuredData(result); if(structuredData) { mappedResource = Object.assign(structuredData, mappedResource); } //if there are no title and date try to fetch them via the ES stats or the raw data itself if(mappedResource.title == null) { if(result.title) { mappedResource.title = result.title; } else if(this.stringFields != null && this.stringFields.length > 0) { mappedResource.title = result[this.stringFields[0]]; } else { mappedResource.title = '<No title available>'; } } if(mappedResource.description == null && result.description) { mappedResource.description = result.description; } if(mappedResource.posterURL == null && result.posterURL) { mappedResource.posterURL = result.posterURL; } if(mappedResource.playableContent == null && result.playableContent) { mappedResource.playableContent = result.playableContent; } if(mappedResource.date == null) { if(currentDateField && result[currentDateField]) { mappedResource.date = result[currentDateField];//TODO nested fields can't be found in this way!! fix this mappedResource.dateField = currentDateField; } else if(this.dateFields != null && this.dateFields.length > 0) { mappedResource.date = result[this.dateFields[0]]; mappedResource.dateField = this.dateFields[0]; } else { mappedResource.date = '<No date available>'; mappedResource.dateField = '<None available>' } } //then add the raw data //mappedResource.rawData = result; return mappedResource } //determines, based on an item processed by getItemDetailData(), if it is playable (overridden by KB only) isResourcePlayable(mappedResource) { return mappedResource.playable === true || ( mappedResource.playableContent && mappedResource.playableContent.length > 0 ); } //set the number of extra highlights to add to the search snippet getNumberOfHighlightsToDisplay = () => 2; //gets the fields for showing highlights in search snippets, in order of preference getPreferredHighlightFields = () => []; //gets the fields that should never be highlighted getForbiddenHighlightFields = () => []; __getSearchReferences(result) { //Returns an object for the metadata table with keys for the left column of the table and a // list of {searchTerm, linkText} objects as value for displaying links in the right column // See EYEDesmetFilmConfig for an example return null; } //TODO change this to a more index/db agnostic function. Also change the name to formatResource() formatSearchResult(result) { if(!result) return null; const dataRoot = result._source ? result._source : result;//either coming from search or document API const formattedResult = JSON.parse(JSON.stringify(dataRoot)); //always add the raw server data to the rawData prop //formattedResult.rawData = JSON.parse(JSON.stringify(dataRoot)); //add the highlights (only when results come from the search API; never via the document API) if(result.highlights) { formattedResult.highlights = result.highlights; } //add the basic fields required by many components (TODO do this on the server) formattedResult.resourceId = result._id; formattedResult.index = result._index; formattedResult.docType = result._type; formattedResult.collectionId = this.getSearchIndex(); //THIS SHOULD BE REMOVED AFTER ALL COLLECTION CONFIGS ARE UPDATED formattedResult._id = result._id; formattedResult._score = result._score; formattedResult._type = result._type; formattedResult._index = result._index; return formattedResult; } //this function is used by the UI components to prettify an index field (taken from collection_statistics) toPrettyFieldName(esFieldName) { const fieldDescription = this._getFieldDescription(esFieldName); if(fieldDescription && fieldDescription.en_label) { return fieldDescription.en_label; } else { return this.formatIndexFieldName(esFieldName); } } //standard way to get the field description (with en_label, description) for an index field, don't override _getFieldDescription(esFieldName) { if(!esFieldName || !this.collectionMetadata) return null; const normalizedField = ElasticsearchDataUtil.normalizeKeywordField(esFieldName); if(this.collectionMetadata.field_descriptions && this.collectionMetadata.field_descriptions[normalizedField]) { return this.collectionMetadata.field_descriptions[normalizedField]; } return null; } //default way to format an index field into something more readable formatIndexFieldName(esFieldName) { if(esFieldName) { //first split the field based on a dot let tmp = esFieldName.split('.'); // remove namespaces tmp = tmp.map((field)=>(field.substring(field.indexOf(":") + 1))); let isKeywordField = false; //if the last field is called raw or keyword (ES reserved names), drop it if(tmp[tmp.length -1] === 'raw' || tmp[tmp.length -1] === 'keyword') { isKeywordField = true; tmp.pop(); } let leaf = tmp.pop(); // move @ to end of fieldname if (leaf.substring(0,1) === '@'){ leaf = leaf.substring(1) + '@'; } let origin = tmp.join("."); if (origin) { if(origin.indexOf('@graph') !== -1) { origin = origin.substring('@graph.'.length); } if(origin.length > 0 && leaf !== 'value@') { origin = ' => ' + origin; } } leaf = leaf === 'value@' ? '' : leaf; //always remove value@, since it is not nice to show // Add preferred date label const postFix = this.getPreferredDateField() === esFieldName ? " (PREFERRED)" : ""; return leaf + origin + (isKeywordField ? ' *' : '') + postFix; } return esFieldName; } //maps a LD predicate to an ES field name predicateToIndexField = p => p; //used to prevent graphs to blow up in case the minimum date is really low (because of incorrect data) getMinimumYear = () => 1600; getMaximumYear = () => -1; getFieldsToExclude = () => null; getHighlightFields = () => ['*']; //provide an array with ES field names to highlight /* ---------------------------------- COLLECTION-SPECIFIC STATIC TEXTS -------------------------------- */ //returns the static text for the search snippet or the highlight overview in the quick viewer getMatchingTermsMsg(numHits, forSnippet) { if(forSnippet) { return numHits > 0 ? (numHits + " match" + (numHits === 1 ? "" : "es") + " in archival metadata") : ' No matches in the archival metadata, matching terms found in the automatic enrichments'; } else { return numHits <= 0 ? "No matching terms found in the archival metadata, matching terms may be in the automatic annotations (visible when going to the Resource's full view)." : 'Matching terms in archival metadata'; } } //use the search term to find matching media fragments within the active media object //populate activeMediaObject.mediaFragments with an array of fragments to make it work findMatchingMediaFragments = (resource, searchTerm, activeMediaObject=null) => { return null; }; }