UNPKG

dataflo.ws

Version:

Zero-code JSON config-based dataflow engine for Node, PhoneGap and browser.

1,098 lines (858 loc) 25.4 kB
var EventEmitter = require ('events').EventEmitter, crypto = require ('crypto'), util = require ('util'), urlUtil = require ('url'), spawn = require ('child_process').spawn, mongo = require ('mongodb'), task = require ('./base'); /** * @author * @docauthor * @class task.mongoRequest * @extends task.task * * A class for creating MongoDB-related tasks. * * To use, set {@link #className} to `"mongoRequest"`. * * ### Example * { flows: [{ url: "/entity/suggest", tasks: [{ functionName: "parseFilter', url: "{$request.url}", produce: "data.suggest" }, { className: "mongoRequest", connector: "mongo", collection: "messages", filter: "{$data.suggest.tag}", produce: "data.records" }, { className: "renderTask", type: "json", data: "{$data.records}", output: "{$response}" }] }] } * * @cfg {String} connector (required) The **config name** for connector * in the project configuration object or prepared **MongoDB.Db connection** * * @cfg {String} collection (required) The collection name from MongoDB. * * @cfg {String} [method="run"] The name of the method name to be called * after the task requirements are statisfied. * * Possible values: * * - `run`, selects from the DB * - `insert`, inserts into the DB * - `update`, updates records in the DB * - `remove`, removes records from the DB * * @cfg {String} filter (required) The name of the property of the dataflow * instance or the identifier of an object with filter fields for `select`, * `insert` or `update` methods (see {@link #method}). Filter can be mongo's * ObjectID, ObjectID array (in such case mongo requested with {$in: []}) * or real {@link http://www.mongodb.org/display/DOCS/Querying mongo query} */ var mongoRequestTask = module.exports = function (config) { this.timestamp = true; this.insertingSafe = false; /* aliases */ this.find = this.run; this.init (config); }; mongo.Db.prototype.open = function (callback) { var self = this; if (self._state == 'connected') { return callback (null, self); } // Set the status of the server if (this.openCalled) self._state = 'connecting'; // Set up connections if(self.serverConfig instanceof mongo.Server || self.serverConfig instanceof mongo.ReplSet) { if (!this._openCallbacks) this._openCallbacks = []; if (callback) this._openCallbacks.push (callback); if (!this.openCalled) self.serverConfig.connect(self, {firstCall: true}, function(err, result) { if(err != null) { // Return error from connection self.emit ('error', err); self._openCallbacks.map (function (item) { item (err, null); }); self._openCallbacks = []; return; } // Set the status of the server self._state = 'connected'; // Callback self.emit ('open', self); self._openCallbacks.map (function (item) { item (null, self); }); self._openCallbacks = []; return; }); // Set that db has been opened this.openCalled = true; } else { var err = new Error ("Server parameter must be of type Server or ReplSet"); self.emit ('error', err); return callback(err, null); } }; util.inherits (mongoRequestTask, task); util.extend (mongoRequestTask.prototype, { _log : function(){ var self = this; if (self.verbose){ console.log.apply (console, arguments); } }, // private method get connector _getConnector: function () { // connector is real connector object if (!this.connector.substring && this.connector.open) return this.connector; // get connector config from project if it created if (project.connectors[this.connector]) { return project.connectors[this.connector]; } // otherwise create connector from project config and add to project.connectors var connectorConfig = project.config.db[this.connector]; var connOptions; if (!connectorConfig.options) connectorConfig.options = {}; connOptions = connectorConfig.options; /* if (!connOptions.hasOwnProperty('native_parser')) connOptions['native_parser'] = true; */ if (!connOptions.hasOwnProperty('journal') || !connOptions.hasOwnProperty('w') || !connOptions.hasOwnProperty('fsync')) connOptions['journal'] = true; // create connector if (this.verbose) { console.log ("new mongo connector:", connectorConfig); } var connector = new mongo.Db ( connectorConfig.database, new mongo.Server (connectorConfig.host, connectorConfig.port), connOptions ); project.connectors[this.connector] = connector; project.connections[this.connector] = {}; return connector; }, // private method to collection open _openCollection: function (cb) { var self = this; // get db client var client = self._getConnector (); if (this.verbose) console.log ( 'checking project.connections', self.connector, self.collection, project.connections[self.connector][self.collection] === void 0 ? 'not cached' : 'cached' ); // check collection existing in cache // if collection cached - return through callback this collection if (project.connections[self.connector][self.collection]) { cb.call (self, false, project.connections[self.connector][self.collection]); return; } // otherwise open db connection client.open (function (err, p_client) { // get collection client.collection (self.collection, function (err, collection) { if (err) { console.log (err); } else { // add to collections cache if (this.verbose) console.log ('storing project.connections', self.connector, self.collection); project.connections[self.connector][self.collection] = collection; } cb.call (self, err, collection); }); }); }, objectId: function () { this.completed(this._objectId(this.id)); }, // private method to create ObjectID _objectId: function (hexString) { if (!hexString) return null; var ObjectID = project.connectors[this.connector].bson_serializer.ObjectID; if (hexString.constructor === ObjectID) return hexString; var id; try { id = new ObjectID(hexString); } catch (e) { console.error(hexString); id = hexString.toString(); } if (this.verbose) console.log('ObjectID',id); return id; }, // actually, it's a fetch function run: function () { var self = this; if (this.verbose) self.emit ('log', 'run called'); // primary usable by Ext.data.Store // we need to return {data: []} // open collection self._openColOrFail(function (collection) { var filter = self.filter, options = self.options || {}, sort = self.sort || (self.pager && self.pager.sort) || {}; if (self.pager) { if (self.pager.limit) { options.limit = self.pager.limit; options.page = self.pager.page || 0; //options.skip = self.pager.start || 0; options.skip = self.pager.start || options.limit * options.page; } if (!filter) filter = self.pager.filter; } options.sort = sort; if (self.verbose) console.log ("collection.find >> ", self.collection, filter, options ); // find by filter or all records if (filter) { if (filter.constructor === Array) filter = {_id: {'$in': filter}}; // filter is string if (filter.constructor === String) { filter = {_id: self._objectId (filter)}; // filter is hash } else if (filter._id) { // filter._id is string if (filter._id.constructor === String) filter._id = self._objectId (filter._id); // filter._id is hash with $in quantificators if (filter._id['$in']) { filter._id['$in'] = filter._id['$in'].map(function(id) { return self._objectId(id); }); } } } //remap options fields if (options.fields) { var fields = options.fields, include = fields["$inc"], exclude = fields["$exc"]; delete fields.$inc; delete fields.$exc; if (include) { include.map(function(field) {fields[field] = 1}); } else if (exclude) { exclude.map(function(field) {fields[field] = 0}) } } if (self.verbose) console.log ("collection.find", self.collection, filter, options); var cursor = collection.find(filter, options); cursor.toArray (function (err, docs) { if (self.verbose) console.log ("findResult", docs && docs.length || 0); if (docs) { docs.map (function (item) { if (self.verbose) console.log(item._id); if (self.mapping) { self.mapFields (item); } }); } cursor.count(function (err, n) { self.completed ({ success: (err == null), total: n || 0, err: err, data: docs }); if (!err & 0 == n) { self.empty(); } }); }); }); }, insert: function () { var self = this; if (!self.data) self.data = {}; if (self.verbose) { self.emit ('log', 'insert called ' + self.data); } self._openCollection (function (err, collection) { if (self.data.constructor != Array) { self.data = [self.data]; } var docsId = []; self.data = self.data.map(function(item) { var clone = util.extend(true, {}, item); if (self.timestamp) { clone.created = clone.updated = ~~(new Date().getTime()/1000); } if (clone._id == null || clone._id == '') { delete clone._id; } else { docsId.push(clone._id); } return clone; }); /* MODIFIED: optionally check if records already in collection by self.filter, otherwise by _id * if records found : * if self.forceUpdate is true of updateData is provided * : update records using updateData or data * if records not found : insert */ var filter = self.filter || {_id: {$in: docsId}}; self._log('Filter: ', filter, ', Update: ', self.updateData); if (self.insertingSafe) { // find any records alredy stored in db self._log('insertingSafe data = ', self.data); collection.find(filter).toArray(function (err, alreadyStoredDocs) { self._log('Already stored: ', alreadyStoredDocs.length, ' docs'); if (alreadyStoredDocs.length > 0 && (self.forceUpdate || self.updateData)) { var updateData = self.updateData || self.data; self._log('Updating @filter: ', filter, ' with: ', updateData); if (self.emulate) { console.log('EMULATION: Update'); self.completed ({ success: true, total: alreadyStoredDocs.length, err: null, data: [] }); if (0 == alreadyStoredDocs.length) { self.empty(); } return; } collection.update( filter, updateData, { safe: true }, Boolean ); self._log(alreadyStoredDocs); self.completed ({ success: true, total: alreadyStoredDocs.length, err: false, data: alreadyStoredDocs }); if (0 == alreadyStoredDocs.length) { self.empty(); } return; } else { // build list of new records self._log('Really inserting. Creating dataToInsert with unique = ', self.unique); var dataToInsert = []; /* if self.unique array is provided, its fields are used to check whether doc is already in collection * doc is not inserted only if all unique fields of the new doc are equal to the same fields of the old doc * * if self.unique is not provided checks by _id */ if (alreadyStoredDocs.length == 0) { self.data.map(function (item) { dataToInsert.push(item) }); } else { if (!self.unique) { var alreadyStoredDocsIds = {}; alreadyStoredDocs.map (function(item) { alreadyStoredDocsIds[item._id] = true; }); self.data.map(function(item) { if (!alreadyStoredDocsIds[item._id]) dataToInsert.push(item); }); } else { var unique = self.unique; if ( !(unique instanceof Array) ) unique = [unique]; dataToInsert = self.data.filter(function(item) { var uniqueField; for (var k = 0; k < alreadyStoredDocs.length; k++) { for (var l = 0; l < unique.length; l++) { uniqueField = unique[l]; if (alreadyStoredDocs[k][uniqueField] != item[uniqueField]) return true; } } return false; }); } } if (dataToInsert.length == 0) { self._log('Nothing to insert'); self.completed ({ success: (err == null), total: alreadyStoredDocs.length, err: err || null, data: alreadyStoredDocs }); if (!err && 0 == alreadyStoredDocs.length) { self.empty(); } return; } self._log('Perform insert of ', dataToInsert.length, ' items', dataToInsert); if (self.emulate) { console.log('EMULATION: Insert Safe'); self.completed ({ success: true, total: 1, err: null, data: [] }); return; } collection.insert (dataToInsert, {safe: true}, function (err, docs) { if (docs) docs.map (function (item) { if (self.mapping) { self.mapFields (item); } }); self._log('inserted ', docs, err); var insertedRecords = alreadyStoredDocs.concat(docs); self.completed ({ success: (err == null), total: (insertedRecords && insertedRecords.length) || 0, err: err || null, data: insertedRecords }); if (!err && 0 == insertedRecords.length) { self.empty(); } }); } }); //collection.find(filter).toArray } else { if (self.emulate) { console.log('EMULATION: Insert'); self.completed ({ success: true, total: 1, err: null, data: [] }); return; } collection.insert (self.data, {safe: true}, function (err, docs) { // TODO: check two parallels tasks: if one from its completed, then dataflow must be completed (for exaple mongo & ldap tasks) if (this.verbose) console.log ('collection.insert', docs, err); if (docs) docs.map (function (item) { if (self.mapping) { self.mapFields (item); } }); if (err) { console.error(err); } self.completed ({ success: (err == null), total: (docs && docs.length) || 0, err: err || null, data: docs }); if (!err && 0 == docs.length) { self.empty(); } }); } }); }, /** * Params: * * @cfg {Object} criteria - object for select updating object (see MongoDB docs). * * @cfg {Array} criteriaFields - this array must contains field names, by wich will * be constructed criteriaObj. This parameter is for updating many records. * * @cfg {Array | Object} data - main data container. * * @cfg {Object} modify - object {operation: [fieldName], ...} for modifying data, * f.e. {$push: ['comment'], $set: ['title']} * * @cfg {Array} options (upsert, multi, safe) * */ update: function () { var self = this, options = self.options || {}, idList, total = 0, success = 0, failed = 0, criteriaFields = self.criteriaFields || ["_id"]; var callback = function (err) { if (idList.length > 1) { // many records total++; if (err) { failed++ } else { success++; } if (total == idList.length) { if (total == success) { if (self.verbose) self.emit('log', 'Updated IDs', idList); self.completed({ _id: { $in: idList } }); } else { self.failed({ msg: 'Not all records updated', failed: failed, total: total, success: success }); } } } else { // single object if (err) { self.failed(err); } else { self.completed ({ _id: idList[0] }); if (0 == idList.length) { self.empty(); } } } }; if (self.verbose) self.emit ('log', 'update called ', self.data); self._openCollection (function (err, collection) { // wrap single record to array if (self.data.constructor != Array) { self.data = [self.data]; } idList = self.data.map (function (item) { if (item._id || self.criteria || options.upsert) { // clone before update var set = util.extend(true, {}, item); delete set._id; // criteriaObj var criteriaObj; if (!self.criteria) { // default by _id or by defined first level fields just criteriaObj = {}; criteriaFields.forEach(function(fieldName) { if (fieldName == "_id") { if (item.hasOwnProperty(fieldName)) criteriaObj[fieldName] = self._objectId(item[fieldName]); } else { if (set.hasOwnProperty(fieldName)) criteriaObj[fieldName] = set[fieldName]; } }); } else { criteriaObj = self.criteria; } // newObj var newObj; if (self.modify) { newObj = {}; var modify = self.modify; for (var m in modify) { newObj[m] = {}; modify[m].map(function(field) { newObj[m][field] = set[field]; delete set[field]; }); } if (!('$set' in modify)) { newObj.$set = set; } } else { newObj = (self.replace) ? (set) : ({$set: set}); } // set timestamp if (self.timestamp) { var timestamp = ~~(new Date().getTime()/1000); if (newObj.$set) newObj.$set.updated = timestamp; else newObj.updated = timestamp; } // safe options.safe = true; // show input params if (self.verbose) console.log('collection.update ', criteriaObj, newObj, options); // do update collection.update(criteriaObj, newObj, options, callback); // return Id for map operation return item._id; } else { // something wrong. this couldn't happen self.emit ('log', 'strange things with _id: "'+item._id+'"'); } return null; }); }); }, remove: function () { var self = this, ids; self.options = self.options || { safe: true }; if (self.verbose) { self.emit('log', 'remove called ', self.data); } if (!Object.is('Array', self.data)) { self.data = [self.data]; } ids = self.data.filter(function (item) { return null != item._id; }); if (self.data.length != ids.length && ids.length == 0) { ids = self.data.filter(function (id) { return null != id; }). map(function (id) { return self._objectId(id); }); } else { ids = ids.map(function (item) { return self._objectId(item._id); }); } self._openCollection(function (err, collection) { if (self.verbose) { console.log('remove by filter', { _id: { $in: ids } }) } collection.remove({ _id: { $in: ids } }, self.options, function (err, records) { self.completed ({ err: err, success: err == null, total: records.length, data: records }); }); }); }, removeAll: function () { var self = this; self.options = self.options || { safe: true }; if (self.verbose) { self.emit('log', 'removeAll'); } self._openCollection(function (err, collection) { collection.remove({ }, self.options, function (err, records) { self.completed ({ err: err, success: err == null, total: records.length, data: records }); }); }); }, emitError: function (e) { if (e) { this.state = 5; this.emit('error', e); this.cancel(); return true; } else { return false; } }, readGridFS: function () { var self = this; this.openGridFS('r', function (gs) { gs.read(function (err, data) { if (err) { self.failed(err); } else { self.completed(data); } }); }); }, pipeGridFS: function () { var self = this; var toStream = this.toStream; this.openGridFS('r', function (gs) { var stream = gs.stream(true); stream.on('end', function () { self.completed(stream); }); stream.on('error', function (err) { self.failed(err); }); stream.pipe(toStream); }); }, writeGridFS: function () { var self = this; var data = this.fileData; this.openGridFS('w', function (gs) { gs.write(data, function (err) { if (err) { self.failed(err); } else { gs.close(function (err, result) { if (err) { self.failed(err); } else { self.completed(result); } }); } }); }); }, openGridFS: function (mode, cb) { var self = this; var options = this.options; var fileName = this.fileName; this.connector = 'mongo'; var db = this._getConnector(); db.open(function (err, db) { var gs = new mongo.GridStore(db, fileName, mode, options); gs.open(function (err, gs) { if (err) { self.failed(err); } else { cb(gs); } }); }); }, createDbRef: function () { var self = this; var DBRef = project.connectors[ this.connector ].bson_serializer.DBRef; var data = this.data; var colName = this.refCollection; var createRef = function (item) { return new DBRef( colName, self._objectId(item._id) ); }; try { if (data instanceof Array) { var refs = data.map(createRef); } else { refs = createRef(data); } this.completed(refs); } catch (e) { this.failed(e); } }, /** * Run a group command across a collection * * @param {Object|Array|Function|Code} keys an object, array or function expressing the keys to group by. * @param {Object} condition an optional condition that must be true for a row to be considered. * @param {Object} initial initial value of the aggregation counter object. * @param {Function|Code} reduce the reduce function aggregates (reduces) the objects iterated * @param {Function|Code} finalize an optional function to be run on each item in the result set just before the item is returned. * @param {Boolean} command specify if you wish to run using the internal group command or using eval, default is true. * @param {Object} [options] additional options during update. * @param {Function} callback returns the results. * @return {null} * @api public * @group(keys, condition, initial, reduce, finalize, command, options, callback) */ group: function () { var self = this; self._openColOrFail(function (collection) { collection.group(self.keys, self.condition, self.initial, self.reduce, self._onResult.bind(self)); }); }, /** * Run Map Reduce across a collection. Be aware that the inline option for out will return an array of results not a collection. * * Options * - **out** {Object, default:*{inline:1}*}, sets the output target for the map reduce job. *{inline:1} | {replace:'collectionName'} | {merge:'collectionName'} | {reduce:'collectionName'}* * - **query** {Object}, query filter object. * - **sort** {Object}, sorts the input objects using this key. Useful for optimization, like sorting by the emit key for fewer reduces. * - **limit** {Number}, number of objects to return from collection. * - **keeptemp** {Boolean, default:false}, keep temporary data. * - **finalize** {Function | String}, finalize function. * - **scope** {Object}, can pass in variables that can be access from map/reduce/finalize. * - **jsMode** {Boolean, default:false}, it is possible to make the execution stay in JS. Provided in MongoDB > 2.0.X. * - **verbose** {Boolean, default:false}, provide statistics on job execution time. * - **readPreference** {String, only for inline results}, the preferred read preference (Server.PRIMARY, Server.PRIMARY_PREFERRED, Server.SECONDARY, Server.SECONDARY_PREFERRED, Server.NEAREST). * * @param {Function|String} map the mapping function. * @param {Function|String} reduce the reduce function. * @param {Objects} [options] options for the map reduce job. * @return {Objects} returns the result of the map reduce job, (error, results, [stats]) */ mapReduce: function () { var self = this; var options = self.options || {}; options.out = { inline: 1 }; // override any external out defenition self._openColOrFail(function (collection) { collection.mapReduce( self.map, self.reduce, options, self._onResult.bind(self) ); }); }, _openColOrFail: function (callback) { this._openCollection(function (err, collection) { if (err) { this.failed(err); } else { callback.call(this, collection); } }); }, _onResult: function (err, data) { if (err) { this.failed(); } else { this.completed({ success: true, err: data && data.errmsg, data: data, total: data ? data.length : 0 }); if (!data || 0 == data.length) { this.empty(); } } }, aggregate: function () { this._openColOrFail(function (collection) { collection.aggregate(this.params, this._onResult.bind(this)); }); }, GET: function () { this.run(); }, POST: function () { this._openColOrFail(function (collection) { collection.update( this.criteria || {}, this.data || {}, this.options || {}, this._onResult.bind(this) ); }); }, PUT: function () { this._openColOrFail(function (collection) { collection.insert( this.data || {}, this.options || {}, this._onResult.bind(this) ); }); } });