UNPKG

joola.io.engine

Version:
645 lines (569 loc) 22.5 kB
/** * joola.io * * Copyright Joola Smart Solutions, Ltd. <info@joo.la> * * Licensed under GNU General Public License 3.0 or later. * Some rights reserved. See LICENSE, AUTHORS. * * @license GPL-3.0+ <http://spdx.org/licenses/GPL-3.0+> */ var fs = require('fs'), utils = require('../shared/utils'), _datasources = require('../objects/datasources'), _datatables = require('../objects/datatables'), connector = require('../connectors/connector'), _mongo = require('mongodb'), path = require('path'); exports.minCacheDate = function (datatable, callback) { joola.logger.debug('Checking min cache date on [' + datatable.id + ']...'); var connstring = datatable.caching.system.url; joola.logger.silly('Connecting to mongo @ ' + connstring + '...'); //noinspection JSPotentiallyInvalidConstructorUsage var mongo = new _mongo.MongoClient.connect(connstring, {w: 1}, function (err, db) { if (err) return callback(err); db.createCollection(datatable.id, null, function (err, collection) { if (err) { db.close(); return callback(err); } collection.find().sort({date: 1}).limit(1).toArray(function (err, result) { db.close(); if (err) return callback(err); if (result && result[0] && result[0].date) return callback(null, new Date(result[0].date)); return callback(null); }); }); }); }; exports.maxCacheDate = function (datatable, callback) { var connstring = datatable.caching.system.url; joola.logger.silly('Connecting to mongo @ ' + connstring + '...'); //noinspection JSPotentiallyInvalidConstructorUsage var mongo = new _mongo.MongoClient.connect(connstring, {w: 1}, function (err, db) { if (err) return callback(err); db.createCollection(datatable.id, null, function (err, collection) { if (err) { db.close(); return callback(err); } collection.find().sort({date: -1}).limit(1).toArray(function (err, result) { db.close(); if (err) return callback(err); if (result && result[0] && result[0].date) return callback(null, new Date(result[0].date).fixDate(true, false)); return callback(null, new Date()); }); }); }); }; exports.maxNotHandledCacheDate = function (datatable, callback) { joola.logger.debug('Checking max not handled cache date on [' + datatable.id + ']...'); var connstring = datatable.caching.system.url; joola.logger.silly('Connecting to mongo @ ' + connstring + '...'); //noinspection JSPotentiallyInvalidConstructorUsage var mongo = new _mongo.MongoClient.connect(connstring, {w: 1}, function (err, db) { db.createCollection(datatable.id, null, function (err, collection) { if (err) return callback(err); collection.find({handled: false}).sort({date: -1}).limit(1).toArray(function (err, result) { db.close(); if (err) return callback(null); if (result && result.length > 0) { joola.logger.debug('Found max not handled cache date [' + datatable.id + ']: ' + new Date(result[0].date)); return callback(new Date(result[0].date)); } else return callback(null); }); }); }); }; exports.minNotHandledCacheDate = function (datatable, callback) { joola.logger.debug('Checking min not handled cache date on [' + datatable.id + ']...'); var connstring = datatable.caching.system.url; joola.logger.silly('Connecting to mongo @ ' + connstring + '...'); //noinspection JSPotentiallyInvalidConstructorUsage var mongo = new _mongo.MongoClient.connect(connstring, {w: 1}, function (err, db) { db.createCollection(datatable.id, null, function (err, collection) { if (err) return callback(err); collection.find({handled: false}).sort({date: 1}, null, null).limit(1).toArray(function (err, result) { db.close(); if (err) return callback(null); if (result && result.length > 0) { joola.logger.debug('Found min not handled cache date [' + datatable.id + ']: ' + new Date(result[0].date)); return callback(new Date(result[0].date)); } else return callback(null); }); }); }); }; function verifyCollection(db, datatable, query, data, callback) { //noinspection JSUnresolvedFunction joola.logger.silly('Creating collection [' + datatable.id + ']...'); db.createCollection(datatable.id, null, function (err, collection) { if (err) throw err; //noinspection JSUnresolvedFunction joola.logger.silly('...Created collection [' + datatable.id + ']'); var indexKey = {key: 1}; verifyIndex(indexKey, collection, datatable, query, data, callback); }); } function verifyIndex(indexKey, collection, datatable, query, data, callback) { var _indexKey = JSON.stringify(indexKey).replace(/"/g, '').replace(/:/g, '').replace(/ /g, '').replace(/\./g, '').replace(/,/g, '').replace('{', '').replace('}', ''); joola.logger.debug('Verifying index [' + _indexKey + ']...'); collection.ensureIndex(indexKey, { unique: true }, function (err) { joola.logger.debug('... Index verfieid [' + _indexKey + ']' + (err ? ' Error: ' + err : '')); indexKey = {'date': 1}; collection.ensureIndex(indexKey, { unique: false }, function () { processRows(collection, datatable, query, data, callback); }); }); } function processRows(collection, datatable, query, data, callback) { //noinspection JSUnresolvedFunction joola.logger.silly('Processing rows, total count: ' + data.rows.length); if (data.rows.length == 0) { joola.logger.debug('Nothing to do here.'); return callback(); } var chunks = chunk(data.rows, datatable.caching.chunkSize); joola.logger.debug('Broke ' + data.rows.length + ' rows into ' + chunks.length + ' chunks of data [chunkSize:' + datatable.caching.chunkSize + '].'); data = null; var $options = {w: 1}; var callsb = []; _.each(chunks, function (chunk) { joola.logger.debug('Processing chunk...'); var callb = function (callback) { try { var worker = path.resolve(__dirname + '/fork_processDocument'); var dimensions = _datatables.basequery(datatable).dimensionsUsed; var metrics = _datatables.basequery(datatable).metricsUsed; var done = function (err) { if (err) console.log(err); return callback(null); }; var counter = 0; var args = {dimensions: dimensions, metrics: metrics, chunk: chunk}; var childProcess = require('child_process').fork(worker); childProcess.on('message', function (message) { if (message.type == 'init') childProcess.send(require('JASON').stringify(args)); else if (message.type == 'document') { var document = message.document; var insertDocument = function (callback) { document.date = new Date(document.date); collection.insert(document, $options, function (err) { if (counter % 100 == 0) joola.logger.silly('Document saved [' + counter + '/' + chunk.length + '].'); if (err) { if (err.message.indexOf('E11000') != -1) { //TODO: check if we need to update docs in this case. if (counter % 50 == 0) joola.logger.warn('Found a duplicate document, id: ' + document._id + ', key: ' + document.key); } else joola.logger.error('Failed to save document [' + counter + '/' + chunk.length + ', ' + document.key + ']: ' + err); } else { //document saved successfully. // var filename = 'missed_' + new Date().getTime().toString() + '_' + counter; //fs.writeFile('/tmp/missed/' + filename, JSON.stringify(document)); // console.log(document); } counter++; if (counter == chunk.length) { childProcess.kill(); return done(); } else return callback(); }); }; insertDocument(function () { }); } }); } catch (ex) { console.log(ex); console.log(ex.stack); return callback(ex); } }; callsb.push(callb); }); chunks = null; fork(callsb, function () { callsb = null; joola.logger.debug('Finished parsing documents.'); /* exports.minNotHandledCacheDate(datatable, function (firstDate) { exports.maxNotHandledCacheDate(datatable, function (lastDate) { //TODO: If we extend on both sides, we should not mark as handled the entire lot. //crunch(datatable, query, collection, db, function () { // joola.logger.debug('Finished saving source data [' + firstDate + ']-[' + lastDate + '].'); if (!query.balanced) { collection.update( {handled: false},//, date: {$lte: $enddate, $gte: $startdate}}, {$set: {handled: true}}, {w: 1, multi: true, verbose: true}, function (err) { if (err) throw err; joola.logger.debug('Processed rows marked as handled.'); try { if (firstDate) { joola.logger.debug('First date: ' + firstDate); //firstDate.fixDate(true, true); //joola.logger.debug('Fixed first date: ' + firstDate); } else joola.logger.debug('Missing first date'); } catch (ex) { joola.logger.debug('Failed to fixdate on firstdate: ' + firstDate + ', ex:' + ex.message); } try { if (lastDate) { joola.logger.debug('Last date: ' + lastDate); //lastDate.fixDate(true, true); //joola.logger.debug('Fixed last date: ' + lastDate); } else joola.logger.debug('Missing last date'); } catch (ex) { joola.logger.debug('Failed to fixdate on lastdate: ' + lastDate + ', ex:' + ex.message); } return callback(null, firstDate, lastDate); }); } else return callback(null, firstDate, lastDate); }); });*/ return callback(null); }); } exports.saveData = function (query, datatable, data, callback) { var connstring = datatable.caching.system.url; joola.logger.silly('Connecting to mongo @ ' + connstring + '...'); //noinspection JSPotentiallyInvalidConstructorUsage var mongo = new _mongo.MongoClient.connect(connstring, {w: 1}, function (err, db) { if (err) return callback(err); joola.logger.debug('Connected to mongo @ ' + connstring); verifyCollection(db, datatable, query, data, function (err, firstDate, lastDate) { db.close(); return callback(err, firstDate, lastDate); }); }); }; exports.fetch = function (query, callback) { var connstring = query.datatable.caching.system.url; joola.logger.silly('Connecting to mongo @ ' + connstring + '...'); //noinspection JSPotentiallyInvalidConstructorUsage var mongo = new _mongo.MongoClient.connect(connstring, {w: 1}, function (err, db) { if (err) return callback(err); joola.logger.silly('Verifying collection [' + query.datatable.id + '.' + query.resolution + ']...'); db.createCollection(query.datatable.id, {strict: false}, function (err, collection) { var processFetch = function (collection) { var fields = {}; _.each(query.dimensions, function (d) { fields[d.name] = 1; }); joola.logger.debug('Executing fetch from [' + query.datatable.id + '], range [' + utils.formatDate(query.startdate, 'yyyy-mm-dd hh:nn:ss') + ']-[' + utils.formatDate(query.enddate, 'yyyy-mm-dd hh:nn:ss') + ']...'); var ds = _datasources.get(query.datatable.datasourceid); var dbquery = connector.createQuery(); query.datatable.datasource = ds; query.datatable.query = _datatables.basequery(query.datatable); var basequery = query.datatable.query; dbquery.sql = basequery.sql; dbquery.datasource = ds; dbquery.enddate = new Date(query.enddate).fixDate(); dbquery.startdate = new Date(query.startdate).fixDate(); dbquery.resolution = query.resolution; dbquery._query = query; try { var manager = require('./manager'); //avoid circular requirement //manager.cacheTable(query.datatable, ce.clone(dbquery), function () { var groupBy = {}; _.each(query.dimensions, function (d) { if (d.type == 'date') { switch (query.resolution) { case 'second': groupBy['date'] = '$timebucket.second'; break; case 'minute': groupBy['date'] = '$timebucket.minute'; break; case 'hour': groupBy['date'] = '$timebucket.hour'; break; case 'day': groupBy['date'] = '$timebucket.day'; break; case 'week': groupBy['date'] = '$timebucket.day'; break; case 'month': groupBy['date'] = '$timebucket.month'; break; case 'year': groupBy['date'] = '$timebucket.year'; break; default: break; } } else groupBy[d.name] = '$' + d.name; }); var $group = {}; $group._id = groupBy; if (!query.distinctCount) { $group.rowcount = {$sum: 1}; _.each(query.metrics, function (m) { var exist = _.find(query.datatable.metrics, function (m) { return _.find(query.metrics, function (m2) { return m2.id == m.id; }) }); if (exist) { if (m.aggregation == 'avg') $group[m.name] = {$avg: '$' + m.name}; else $group[m.name] = {$sum: '$' + m.name}; } }); } else { } var $sortkey = {}; if (query.sortKey) { if ($group[query.sortKey.name]) $sortkey[query.sortKey.name] = (query.sortDir == 'DESC' ? -1 : 1); else { $sortkey['_id.' + query.sortKey.name] = (query.sortDir == 'DESC' ? -1 : 1); } } else { if (query.distinctCount) { if (Object.keys($group._id).length > 1) { if ($group._id.date) $sortkey['_id.date'] = (query.sortDir == 'DESC' ? -1 : 1); else $sortkey['_id'] = (query.sortDir == 'DESC' ? -1 : 1); } else $sortkey['_id'] = (query.sortDir == 'DESC' ? -1 : 1); } else { $sortkey['_id'] = (query.sortDir == 'DESC' ? -1 : 1); } } var $match = { date: { $gte: new Date(query.startdate), $lte: new Date(query.enddate)} }; _.each(query.filters, function (filter) { if (filter.dimension.type != 'date') { if (filter.operator == '=') $match[filter.dimension.name] = filter.value; else if (filter.operator == '*=') $match[filter.dimension.name] = eval('/' + filter.value + '/gi'); else $match[filter.dimension.name] = filter.value; } }); //$match['handled'] = true; if (!query.distinctCount) { //console.log('match', $match); //console.log('g', $group); //console.log('sort', $sortkey); joola.logger.debug('Executing mongodb aggregate [' + query.startdate + ']-[' + query.enddate + ']...'); collection.aggregate( {$match: $match}, {$group: $group}, {$sort: $sortkey}, //{$limit: 500}, function (err, result) { db.close(); if (!err) { joola.logger.debug('Found [' + result.length + ' documents] in cached collection [' + query.datatable.id + '.' + query.resolution + ']'); function countKeys(obj) { return Object.keys(obj).length; } if (result.length == 1) { var r = result[0]; if (countKeys(r._id) == 0) { joola.logger.debug('Found a missing dimension in collection.'); r._id = $group._id; _.each(r._id, function (value, key) { r._id[key] = '(not set)'; }); } } return callback(err, result, query); } else return callback(err, null, query); }); } else { joola.logger.debug('Executing mongodb distinct aggregate [' + query.startdate + ']-[' + query.enddate + ']...'); groupBy = {}; $group = {}; var $unwind = {}; _.each(query.dimensions, function (d) { if (d.type == 'date') { switch (query.resolution) { case 'second': groupBy['date'] = '$timebucket.second'; break; case 'minute': groupBy['date'] = '$timebucket.minute'; break; case 'hour': groupBy['date'] = '$timebucket.hour'; break; case 'day': groupBy['date'] = '$timebucket.day'; break; case 'week': groupBy['date'] = '$timebucket.day'; break; case 'month': groupBy['date'] = '$timebucket.month'; break; case 'year': groupBy['date'] = '$timebucket.year'; break; default: break; } } else if (d.id != query.metrics[0].dimension) { groupBy[d.name] = '$' + d.name; } else { $group[d.name] = {'$addToSet': '$' + d.name}; $match[d.name] = {$ne: null}; $unwind = '$' + d.name; } }); $group._id = groupBy; // var $group2 = { _id: '$_id' }; $group2[query.metrics[0].name] = {$sum: 1}; //console.log('m', $match); //console.log('g1', $group); //console.log('un', $unwind); //console.log('g2', $group2); $sortkey = {}; if (query.sortKey) { if ($group[query.sortKey.name]) $sortkey[query.sortKey.name] = (query.sortDir == 'DESC' ? -1 : 1); else { $sortkey[ query.sortKey.name] = (query.sortDir == 'DESC' ? -1 : 1); } } else { if (query.distinctCount) { if (Object.keys($group._id).length > 1) { if ($group._id.date) $sortkey['_id.date'] = (query.sortDir == 'DESC' ? -1 : 1); else $sortkey['_id'] = (query.sortDir == 'DESC' ? -1 : 1); } else $sortkey['_id'] = (query.sortDir == 'DESC' ? -1 : 1); } else { $sortkey['_id'] = (query.sortDir == 'DESC' ? -1 : 1); } } //console.log('m', $match); //console.log('g', $group); //console.log('u', $unwind); //console.log('g2', $group2); //console.log('sort', $sortkey); collection.aggregate( {$match: $match}, {$group: $group}, {$unwind: $unwind}, {$group: $group2}, {$sort: $sortkey}, //{$limit: 500}, function (err, result) { db.close(); if (!err) { joola.logger.debug('Found [' + result.length + ' documents] in cached collection [' + query.datatable.id + '.' + query.resolution + ']'); // console.log(result); function countKeys(obj) { return Object.keys(obj).length; } if (result.length == 1) { var r = result[0]; if (countKeys(r._id) == 0) { joola.logger.debug('Found a missing dimension in collection.'); r._id = $group._id; _.each(r._id, function (value, key) { r._id[key] = '(not set)'; }) } } return callback(err, result, query); } else { return callback(err, null, query); } } ); } //}); } catch (err) { joola.logger.error('Error while calling cacheTable: ' + ex.message); return callback(err); } }; if (err) { db.collection(query.datatable.id, function (err, collection) { if (err) throw err; processFetch(collection); }); } else { processFetch(collection); } }); }); }; exports.flushCache = function (datatable, callback) { var connstring = datatable.caching.system.url; joola.logger.silly('Flushing mongo @ ' + connstring + '...'); var mongo = new _mongo.MongoClient.connect(connstring, {w: 1}, function (err, db) { if (err) throw err; joola.logger.warn('Flushed mongo @ ' + connstring); db.dropDatabase(function (err) { return callback(err); }); }); };