UNPKG

dashboards_mirror_survey_stack

Version:

Library allowing to retrieve, reorganize and keep up to date SurveyStack data in a mongoDB mirror as well as serving it efficiently.

769 lines (672 loc) 29.3 kB
// To initialize an on demand materialized view, you need to fill a dashboardDocument (similar to the model) and upload it into the dashboardsMetadata collection using uploadDashboardDocument. Afterwards, the updateCollectionAndRetrieveDeltas will read from the source and create the materialized view, including the indices needed to guarantee that the merging works correctly in the future. // add the exported endpoint to the express server. It will update the view and return the newer information. The intention of this is it can be merged to an already downloaded version which has all the data except the latest entries. const { MongoClient, ObjectId } = require('mongodb'), asyncHandler = require('express-async-handler'), { createAllViews } = require('./viewCreator.js') ; // adds a strategy to deal with regexp values when turning json into string // if any value is regext, it is going to be stored as an object with the following format: '/[0-9]+/g' -> { parseAsRegexp: true, expression: '[0-9]+', flags: 'g' } function regexpReplacer(key, value) { if ( value instanceof RegExp ) { return { parseAsRegexp: true, expression: value.source, flags: value.flags }; } else { return value; } }; // parses the strategy we are using to store regexp values as stored by our regexp replacer, interpreting /[0-9]+/ as new RegExp('[0-9]+') function regexpReviver(key, value) { if ( typeof value == 'object' && value ) { if (value.parseAsRegexp ) { return new RegExp( value.expression, value.flags ); } else { return value; } ; } else { return value; }; }; exports.regexpReplacer = regexpReplacer; exports.regexpReviver = regexpReviver; /** * Dashboard documents contain all information about a set of documents that are grouped together. It can either source a survey from SurveyStack submissions (in which case it uses the survey _id attribute to fetch entries) or a collection inside the `dashboards` database. * * @param {DashboardDocument} newData -- Document describing the collection to be uploaded * @param {string} targetDB * @param {URL} mongoURI * @throws {results} */ async function uploadDashboardDocument(newData,targetDB, targetColl="dashboardsMetadata",mongoURI) { const client = new MongoClient(mongoURI, {}); let operation; try { await client.connect(); operation = await client .db(targetDB) .collection(targetColl) .findOneAndReplace( { dashboardName:newData.dashboardName }, newData, {upsert:true} ) ; } catch(error) { throw error; } finally { await client.close(); }; return operation; }; /** * Uploads data into a collection. * @param {array} newData * @param {string} targetDB * @param {URL} mongoURI * @throws {} */ async function uploadData(newData, targetDB="test", targetColl, mongoURI) { const client = new MongoClient(mongoURI, {}); newData.forEach( datum => { datum.meta = {}; } ); try { await client.connect(); await client .db(targetDB) .collection(targetColl) .insertMany( newData ) ; } catch(error) { throw error; } finally { await client.close(); }; } ; /** * Ensures an index exists and creates it otherwise. * @param {} mongoClient * @param {} targetDB * @param {} targetCollection * @param {} indexAttribute * @param {} uniqueIndex * @returns {} */ async function checkOrCreateIndex( { mongoClient, targetDB, targetCollection, indexAttribute, uniqueIndex } ) { let output = {}; let collection = mongoClient .db(targetDB) .collection(targetCollection) ; let indices = await collection .listIndexes() .toArray() ; let indexExists = indices.find( ind => Object.keys( ind.key )[0] == indexAttribute ); output.indexExists = indexExists ? true : false; if (!indexExists) { let indexCreationObject = {}; indexCreationObject[indexAttribute] = 1; output.indexCreation = await collection.createIndex(indexCreationObject, { unique: uniqueIndex }); }; return output; }; /** * Creates or updates all entities in an array on the target collection. * @param {string} mongoKey - The adress/key for the mongodb. * @param {Array} dataArray - Array containing data. * @param {string} targetDB - Db in which the target collection exists. * @param {string} targetCollection - Collection in which to merge the new data. * @param {string} indexAttribute - Attribute to be used as unique index. * @returns {object} - Returns the mongo response, detailing amount of changed documments, etc. */ async function upsertArrayIntoCollection({ mongoKey, dataArray, targetDB, targetCollection, indexAttribute }) { const client = new MongoClient(mongoKey); await client.connect(); let result; let indexCreation; let success = true; try { // Prepare all upser operations to be sent in bulk. let uploads = dataArray.map( entity => { let filterObject = {}; filterObject[indexAttribute] = entity[indexAttribute]; let operation = { updateOne: { filter: filterObject, update: { $set: entity, }, upsert: true }, }; return operation; } ) ; await client.connect(); // perform bulk operation. result = await client .db(targetDB) .collection(targetCollection) .bulkWrite( uploads, {ordered: false} ) ; // ensure index exists and create it if not. indexCreation = await checkOrCreateIndex( { mongoClient: client, targetDB: targetDB, targetCollection:targetCollection, indexAttribute: indexAttribute, uniqueIndex: true } ); } catch(e) { result = { error: e }; success = false; }; await client.close(); return { success: success, bulkOperationResults: result, indexCreation: indexCreation }; }; exports.upsertArrayIntoCollection = upsertArrayIntoCollection; /** * Registers latest ocassion in which the dashboard has been updated * @param {mongodb.client} client -- an active mongodb client object * @param {string} dashboardID -- name of the dashboard * @param {string} targetDB * @throws {} */ async function registerUpdateDate( client, dashboardID, targetDB) { const collection = client .db(targetDB) .collection("dashboardsMetadata") ; // console.log( `registering date for ${dashboardID}` ); try { const updatePointer = await collection.updateOne( { dashboardName: dashboardID }, { $set: { updateTime: new Date() } }, { upsert: true } ); // console.log( updatePointer ); } catch(error) { throw error; } ; } ; /** * Checks for latest update of dashboard collection, gets changes in source collection based on modification date, merges changes on dashboard collection. * If the collection doesn't exist yet, it will first create the unique index required to perform the merge. * @param {mongodb.client} client * @param {DashboardDocument} dashboardDocument * @returns {} */ async function updateDashboard( client, dashboardDocument ) { // console.log( "dashboardDocument" ); // console.log( dashboardDocument ); let targetCollection = dashboardDocument.customMergeStage ? JSON.parse( dashboardDocument.customMergeStage, regexpReviver ).$merge.into.coll : dashboardDocument.targetCollection; let collectionsInfo = await client .db( dashboardDocument.targetDatabase ) .listCollections() .toArray() ; let collectionExists = collectionsInfo .map( coll => coll.name ) .includes( targetCollection ) ; // console.log( `collection exists: ${collectionExists}` ); if (dashboardDocument.indexDefiningAttributes) { let indexDescription = {}; // console.log(`adding collection and index on ${targetCollection}`); dashboardDocument .indexDefiningAttributes .forEach( attr => indexDescription[attr] = 1 ) ; // console.log("index description"); // console.log( indexDescription ); //TODO first check if the index already exists let indexCreation = await client .db(dashboardDocument.targetDatabase) .collection( targetCollection ) .createIndex( indexDescription, {unique:true} ) ; // console.log("index creation"); // console.log(indexCreation); }; const updatePipeline = dashboardDocument.updatePipeline(); // if (['sample_id_into_lab_id', 'sample_id_orphans',].includes( dashboardDocument.dashboardName )) { // console.log("pipeline stages"); // updatePipeline.forEach( d => console.log( JSON.stringify(d) ) ); // console.log( updatePipeline ); // } // TODO check and remove. Doing this here makes no sense, as the data won't be downloaded otherwise into the server. The newest entries need to be fetched by each client, based on the latest time in which they updated the local version of the file. // let entries = await client // .db(dashboardDocument.source.database) // .collection(dashboardDocument.source.collection) // .aggregate([ updatePipeline[0], updatePipeline[1], updatePipeline[2] ]) // .toArray() // ; // console.log("entries"); // console.log(entries[0]); let mergePointer = await client .db(dashboardDocument.source.database) .collection(dashboardDocument.source.collection) .aggregate(updatePipeline, {allowDiskUse:true}) .toArray() ; // console.log('update merge pointer'); // console.log(mergePointer); // return entries; }; /** * Allows to download all entries for which the `dateModified` attribute is posterior to `latestCheckTime` parameter. * @param {mongodb.client} client -- Actibe mongodb session object. * @param {DashboardDocument} dashboardDocument -- Document containing instructions. * @param {Date} latestCheckTime -- Date after which entries are selected. * @returns {} */ async function getNewerDeltas( client, dashboardDocument, latestCheckTime ) { // console.log(`latest check for ${dashboardDocument.targetDatabase}:${dashboardDocument.targetCollection} is ${latestCheckTime}`); let entries = await client .db(dashboardDocument.targetDatabase) .collection(dashboardDocument.targetCollection) .find( { 'dateModified' : { '$gte' : new Date( latestCheckTime ) } } ) .toArray() ; // console.log(`${entries.length} entries`); return entries; }; /** * Builds the update aggregation of the dashboard and delivers the object that represents it. It is used inside "updateDashboard". * Some steps that are common to almost all aggregations we need are automatically added here, instead of existing independently on each survey. These filter out archived submissions, match the adequate survey and if provided ask only for submissions after a certain date. * When in use, the system keeps track of the latest consultation in the "updateTime" and retrieves only what's dated after this check. This can by bypassed for testing using "replaceDateModifiedWith" providing a date. * @param {} dashboardName - Plain text name of the dashboard, as informed in it's dashboardMetadata defining document. * @param {} dashboardDocument - Document defining a dashboard. If empty, a dashboard name `dasbhoardName` will be retrieved from the `dashboardsMetadata` db. * @param {} mongoURI - Mongo connection URL. * @param {} replaceDateModifiedWith - Instead of retrieving only entries created after latest check, retrieve after an arbitrary date. Note that an early enough date is the same as bypassing. * @returns {} */ async function retrieveDashboardAggregation( dashboardName, dashboardDocument, mongoURI, replaceDateModifiedWith = false, targetDB ) { const client = new MongoClient(mongoURI, {}); let updatePipeline = []; try { await client.connect(); // the option of fetching the dashboard description here allows for easier testing, but it is generally needed up in the pipeline if working with the actual API if ( dashboardDocument.dashboardName !== dashboardName ) { dashboardDocument = await client .db(targetDB) .collection("dashboardsMetadata") .findOne({ dashboardName:dashboardName }) ; }; // get just the entries related to the survey we want, in case it is part of surveystack-production/submissions if ( dashboardDocument.source.isSurveyStack ) { let source = dashboardDocument.source.surveyID; let id_match = Array.isArray( source ) ? source.map( sid => new ObjectId(sid) ) : [ new ObjectId(source) ]; updatePipeline.push( { $match: { 'meta.survey.id': { $in: id_match }, 'meta.archived': { $ne: true }, 'meta.dateModified':{ $gte: new Date( dashboardDocument.updateTime ) } } } ); } else if ( !dashboardDocument.doNotFilterByDate ) { updatePipeline.push( { $match: { // this attribute is flattened up in the processed dashboards. 'meta.archived': { $ne: true }, } } ); } else { updatePipeline.push( { $match: { // this attribute is flattened up in the processed dashboards. 'dateModified':{ $gte: new Date( dashboardDocument.updateTime ) }, 'meta.archived': { $ne: true }, } } ); }; const genericMergeStage = { $merge: { into: { db: dashboardDocument.targetDatabase, coll: dashboardDocument.targetCollection ? dashboardDocument.targetCollection : dashboardDocument.dashboardName }, on: dashboardDocument.indexDefiningAttributes ? dashboardDocument.indexDefiningAttributes : [ "_id" ] } } ; updatePipeline.push( ... JSON.parse( dashboardDocument.aggregationDocument, regexpReviver ) ); updatePipeline.push( ... [ { $merge: dashboardDocument.customMergeStage ? JSON.parse( dashboardDocument.customMergeStage, regexpReviver ).$merge : genericMergeStage.$merge, } ] ); } catch(e) { console.log(e); }; return updatePipeline; }; /** * Applies an aggregation over a DB, mainly used for testing. * @param {} aggregation * @param {} sourceDB * @param {} sourceCol * @param {} mongoURI * @param {} avoidMerging - Typically we don't want to store the results, so we remove the 'merge stage'. * @returns {} */ async function applyAggregation( aggregation, sourceDB, sourceCol, mongoURI, avoidMerging = true ) { const client = new MongoClient(mongoURI, {}); let entries = []; if (avoidMerging) { aggregation = aggregation.filter( stage => !stage.$merge ); }; try { await client.connect(); entries = await client .db(sourceDB) .collection(sourceCol) .aggregate(aggregation) .toArray(); } catch(e) { }; return entries; }; exports.retrieveDashboardAggregation = retrieveDashboardAggregation; exports.applyAggregation = applyAggregation; /** * Applies the aggregation described by the dashboard document and, merges the new results in and optionally return the newer entries. * @param {} dashboardName * @param {} dashboardDocument * @param {} mongoURI * @param {} undefined * @param {} true * @returns {} * @throws {} */ async function updateCollectionAndRetrieveDeltas( { dashboardName, dashboardDocument, mongoURI, latestCheckTime = undefined, retrieveDeltas = true, controlDB, controlCollection="dashboardsMetadata" } ) { const client = new MongoClient(mongoURI, {}); let entries; let date = latestCheckTime instanceof Date ? latestCheckTime : new Date( latestCheckTime ); try { await client.connect(); // the option of fetching the dashboard description here allows for easier testing, but it is generally needed up in the pipeline if working with the actual API if ( dashboardDocument.dashboardName !== dashboardName ) { dashboardDocument = await client .db(controlDB) .collection("dashboardsMetadata") .findOne({ dashboardName:dashboardName }) ; }; await updateDashboard(client, dashboardDocument ); if ( retrieveDeltas ) { // console.log(`latest check is ${date}`); // console.log('retrieveing deltas'); entries = await getNewerDeltas(client, dashboardDocument, date ); // console.log( `entries length is ${entries.length} -updateCollectionAndRetrieveDeltas-` ); } else { // console.log('deltas not retrieved'); } ; await registerUpdateDate( client, dashboardName, controlDB ); } catch (error) { throw error; } finally { await client.close(); }; return entries; }; // TODO replace fetch with axios /* * Triggers the update of the materialized view and retrieves the added and modified entries, based on the 'meta.dateModified' attribute for submisions entries and `dateModified` for collections that already exist in the dashboards collection. * Expects collectionName which should be a collection that exists in the dashboardsMetadata collection and userid to determine the access level. */ // function addUpdatesPath(app,path="/static/updates/", mongoURI) { // app.post( path, asyncHandler(async (req,res) => { // const { collectionName, userid, latestCheckTime, retrieveDeltas } = req.body; // console.log(`updating ${collectionName}`); // const client = new MongoClient(mongoURI, {}); // let entries; // try { // await client.connect(); // let dashboardDocument = await client // .db("dashboards") // .collection("dashboardsMetadata") // .findOne({ // dashboardName:collectionName // }) // ; // let permissionNeeded = `${dashboardDocument.readableTo}@/${dashboardDocument.source.group}`; // console.log(`user id is ${userid}`); // console.log(`retrieving ${collectionName}`); // let roles; // if ( userid.length == 24 ) { // roles = await fetch( `https://app.surveystack.io/api/roles?userId=${userid}` ) // .then( privileges => { return privileges.json(); } ) // ; // // console.log('roles are'); // // console.log(roles); // } else { // roles = []; // } ; // if ( roles.find( role => role == permissionNeeded ) || dashboardDocument.readableTo == "public" ) { // console.log('adequate permissions'); // entries = await updateCollectionAndRetrieveDeltas( collectionName, dashboardDocument, mongoURI, latestCheckTime, retrieveDeltas ).catch( e => // console.error(`failure retrieving ${collectionName}`) // ); // console.log( `retrieveing ${ entries ? entries.length : 0 } objects` ); // } else { // console.log('not adequate permissions'); // entries = []; // res.status(403); // } // ; // } catch(error) { // console.log( e => console.error(`error attempting to retrieve ${ collectionName }`) ); // } finally { // await client.close(); // res.json(entries); // }; // }) ); // }; // let rootUri = 'mongodb://localhost:27017'; // destroyDataExplorerCollections( rootUri ).catch( console.error ); // rebuildDataExplorerCols( rootUri ); exports.uploadDashboardDocument = uploadDashboardDocument; exports.uploadData = uploadData; exports.updateCollectionAndRetrieveDeltas = updateCollectionAndRetrieveDeltas; // exports.addUpdatesPath = addUpdatesPath; // let plantingSurveyObject = { // dashboardName: "rfc_planting", // updateTime:new Date( "2020-08-30T04:28:39.222Z"), // rebuildTime:new Date( "2020-08-30T04:28:39.222Z"), // readableTo:"public", // source: { // database: "surveystack-production", // collection: "submissions", // isSurveyStack: true, // surveyID:[ // // BI // "611aceeb7a10b50001a86ce3", // // France // "6123f586cf1bb700017dfdb1" // ], // group: "real-food-campaign" // }, // customMergeStage: JSON.stringify( planting.mergeStage, edge.regexpReplacer ), // targetDatabase: "dashboards", // aggregationDocument: JSON.stringify( planting.aggregation, edge.regexpReplacer ), // description: "Survey containing detailed cultivation procedures.", // indexDefiningAttributes: [ 'farmos_url', 'field_name', "sample_type" ], // }; class SurveySource { constructor({ database, collection, isSurveyStack, surveyID, group }) { this.database = database; this.collection = collection; this.isSurveyStack = isSurveyStack; this.surveyID = surveyID; this.group = group; }; }; /** * * @param {} dashboardName * @param {string} readableTo -- Group that has access to the data or "public" for everyone or "private" for nobody. * @param {} source * @param {integer} stageOrder -- Aggregations will be executed following a stage order, allowing to control data concurrency. * @param {} customMergeStage -- Use a merge stage other than the one that the library generates. * @param {} targetDatabase * @param {} aggregationDocument * @param {} description * @param {} indexDefiningAttributes * @param {string} controlCollection -- Collection in which control data is stored, such as last update date for each aggregation. A document describing this object will be stored there, for each data source/aggregation in the pipeline. * @param {string} dashboardGroup -- Group of dashboards the dashboard informed by this data source belongs to. * @param {string} dashboard -- dashboardInformed by this data source. */ class SurveyDefinition { constructor({ dashboardName, readableTo = "private", source, stageOrder, customMergeStage, targetDatabase, targetCollection, aggregationDocument, description, indexDefiningAttributes, controlCollection = "dashboardsMetadata", dashboardGroup, dashboard, }) { this.dashboardName = dashboardName; this.readableTo = readableTo; this.source = source; this.customMergeStage = customMergeStage; this.targetDatabase = targetDatabase; this.targetCollection = targetCollection, this.aggregationDocument = aggregationDocument; this.description = description; this.indexDefiningAttributes = indexDefiningAttributes; this.controlCollection = controlCollection; this.dashboardGroup = dashboardGroup; this.dashboard = dashboard; this.stageOrder = stageOrder; // initialize times this.updateTime = new Date("2020-01-01"); this.rebuildTime = new Date("2020-01-01"); // check the targetCollection is the same as the target of the merge stage, if there is a customMergeStage let mergeStageTargetCollection = this.customMergeStage ? JSON.parse( this.customMergeStage, regexpReviver ).$merge.into.coll : false; if (mergeStageTargetCollection && mergeStageTargetCollection !== targetCollection) { throw new Error(`Target collection of custom merge stage is ${mergeStageTargetCollection} while the target collection mentioned as this object's targetCollection attribute is ${this.targetCollection}. Both need to be the same.`); } }; /** * Insers a control document into the control collection, adding this aggregation to the process. * @param {} connectionURL */ uploadDocument(connectionURL) { uploadDashboardDocument( this, this.targetDatabase, this.controlCollection, connectionURL ); }; updatePipeline() { let targetCollection = this.customMergeStage ? JSON.parse( this.customMergeStage, regexpReviver ).$merge.into.coll : this.targetCollection; const updatePipeline = []; // get just the entries related to the survey we want, in case it is part of surveystack-production/submissions if ( this.source.isSurveyStack ) { let source = this.source.surveyID; let id_match = Array.isArray( source ) ? source.map( sid => new ObjectId(sid) ) : [ new ObjectId(source) ]; updatePipeline.push( { $match: { 'meta.survey.id': { $in: id_match }, 'meta.archived': { $ne: true }, 'meta.dateModified':{ $gte: new Date( this.updateTime ) } } } ); } else if ( !this.doNotFilterByDate ) { updatePipeline.push( { $match: { // this attribute is flattened up in the processed dashboards. 'meta.archived': { $ne: true }, } } ); } else { updatePipeline.push( { $match: { // this attribute is flattened up in the processed dashboards. 'dateModified':{ $gte: new Date( this.updateTime ) }, 'meta.archived': { $ne: true }, } } ); }; const genericMergeStage = { $merge: { into: { db: this.targetDatabase, coll: this.targetCollection ? this.targetCollection : this.dashboardName }, on: this.indexDefiningAttributes ? this.indexDefiningAttributes : [ "_id" ] } } ; const removeUID = { $project: { _id: 0 } }; updatePipeline.push( ... JSON.parse( this.aggregationDocument, regexpReviver ) ); if (this.customMergeStage) { updatePipeline.push(JSON.parse( this.customMergeStage, regexpReviver )); } else if (this.indexDefiningAttributes) { updatePipeline.push(removeUID,genericMergeStage); } else { updatePipeline.push(genericMergeStage); }; return updatePipeline; }; }; exports.SurveySource = SurveySource; exports.SurveyDefinition = SurveyDefinition; /** * Provided with an array of SurveyDefinition objects, executes them asyncronously, grouping them into stages according to their stageOrder attribute. * @param {} 0 * @param {} false * @param {} surveyDocuments * @returns {} */ async function aggregationStage( { stage = 0, getData = false, surveyObjectsArray = surveyDocuments, connectionURL } ) { let surveyObjects = surveyObjectsArray.filter( d => d.stageOrder == stage ); console.log(`About to perform ${surveyObjects.length} aggregations.`); let transactionPromises = surveyObjects.map( async survey => { console.log(`Retrieving ${survey.dashboardName}`); let errors = []; let data = {}; let entries = await updateCollectionAndRetrieveDeltas({ dashboardName:survey.dashboardName, dashboardDocument:survey, mongoURI:connectionURL, latestCheckTime: new Date("2020-07-30T04:28:39.222Z"), retrieveDeltas:getData, controlCollection:survey.controlCollection, controlDB:survey.targetDatabase }) .catch( e => errors.push( { error: e, dashboard: survey.dashboardName } ) ) ; if ( getData ) { data = entries; }; return { data: data, errors: errors, name: survey.dashboardName }; } ) ; let errors = await Promise.all(transactionPromises).then( array => { return array.flatMap( d => d.errors ); } ); let output = { processed: surveyObjects.map( obj => obj.dashboardName ), getData: getData, errors: errors }; if (getData) { let allData = {}; let data = await Promise.all(transactionPromises).then( array => { array.forEach( d => allData[d.name] = d.data ); } ); output.data = allData; }; console.log( "ERRORS" ); console.log( errors); return output; }; exports.aggregationStage = aggregationStage; exports.createAllViews = createAllViews;