UNPKG

midas-core

Version:

Enrich data with APIs

308 lines (236 loc) 11.7 kB
'use strict'; Object.defineProperty(exports, "__esModule", { value: true }); exports.default = undefined; var _typeof = typeof Symbol === "function" && typeof Symbol.iterator === "symbol" ? function (obj) { return typeof obj; } : function (obj) { return obj && typeof Symbol === "function" && obj.constructor === Symbol && obj !== Symbol.prototype ? "symbol" : typeof obj; }; var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }(); var _Loader2 = require('./Loader'); var _Loader3 = _interopRequireDefault(_Loader2); var _flat = require('flat'); var _flat2 = _interopRequireDefault(_flat); var _sqlstring = require('sqlstring'); var _sqlstring2 = _interopRequireDefault(_sqlstring); var _promise = require('mysql2/promise'); var _promise2 = _interopRequireDefault(_promise); var _crypto = require('crypto'); var _crypto2 = _interopRequireDefault(_crypto); var _csvjson = require('csvjson'); var _csvjson2 = _interopRequireDefault(_csvjson); var _fsExtra = require('fs-extra'); var _fsExtra2 = _interopRequireDefault(_fsExtra); var _path = require('path'); var _path2 = _interopRequireDefault(_path); function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } function _toConsumableArray(arr) { if (Array.isArray(arr)) { for (var i = 0, arr2 = Array(arr.length); i < arr.length; i++) { arr2[i] = arr[i]; } return arr2; } else { return Array.from(arr); } } function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } } function _possibleConstructorReturn(self, call) { if (!self) { throw new ReferenceError("this hasn't been initialised - super() hasn't been called"); } return call && (typeof call === "object" || typeof call === "function") ? call : self; } function _inherits(subClass, superClass) { if (typeof superClass !== "function" && superClass !== null) { throw new TypeError("Super expression must either be null or a function, not " + typeof superClass); } subClass.prototype = Object.create(superClass && superClass.prototype, { constructor: { value: subClass, enumerable: false, writable: true, configurable: true } }); if (superClass) Object.setPrototypeOf ? Object.setPrototypeOf(subClass, superClass) : subClass.__proto__ = superClass; } var Json2csvParser = require('json2csv').Parser; var MySQLLoader = function (_Loader) { _inherits(MySQLLoader, _Loader); function MySQLLoader(config) { _classCallCheck(this, MySQLLoader); var _this = _possibleConstructorReturn(this, (MySQLLoader.__proto__ || Object.getPrototypeOf(MySQLLoader)).call(this, config)); _this.db_config = { connectionLimit: 10, host: _this.config._target.host, port: _this.config._target.port, user: _this.config._target.user, password: _this.config._target.password, database: _this.config._target.database }; _this.pool = _promise2.default.createPool(_this.db_config); return _this; } _createClass(MySQLLoader, [{ key: '_get_connection', value: async function _get_connection() { return this.pool.getConnection(); } }, { key: '_escape_column_name', value: function _escape_column_name(name) { return _sqlstring2.default.escape(name.trim().toLowerCase().replace(/ /g, '_').replace(/'/g, '')).replace(/'/g, ''); } }, { key: '_create_columns', value: async function _create_columns(table_name, columns) { var _this2 = this; table_name = _sqlstring2.default.escapeId(table_name); var connection = await this._get_connection(); // build statement var statement = 'ALTER TABLE ' + table_name; columns.forEach(function (column, index) { column = _this2._escape_column_name(column); statement += 'ADD COLUMN `' + column + '` varchar(255),'; }); statement = statement.slice(0, -1); var result = await connection.query(statement); connection.close(); return result; } }, { key: '_get_column_names_from_table', value: async function _get_column_names_from_table(database, table_name) { database = _sqlstring2.default.escape(database); table_name = _sqlstring2.default.escape(table_name); var result = []; var connection = await this._get_connection(); var column_statement = 'SELECT `COLUMN_NAME`\n\t\tFROM `INFORMATION_SCHEMA`.`COLUMNS`\n\t\tWHERE `TABLE_SCHEMA`= ' + database + '\n\t\tAND `TABLE_NAME`= ' + table_name; var existence_statement = 'SELECT EXISTS (' + column_statement + ')'; var table_exists = await connection.query(existence_statement); // looks weird but works table_exists = !!+table_exists[0][0][Object.keys(table_exists[0][0])[0]]; // Return empty result if table does not exist if (!table_exists) { connection.close(); return result; } var column_names = await connection.query(column_statement); if (column_names.length < 1 || !Array.isArray(column_names[0])) { connection.close(); return result; } column_names[0].forEach(function (item, key) { result.push(item.COLUMN_NAME); }); // close connection from pool connection.close(); return new Set(result); } }, { key: '_create_table_from_object', value: async function _create_table_from_object(obj) { var _this3 = this; var name = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : null; var keys = []; if (!Array.isArray(obj) && (typeof obj === 'undefined' ? 'undefined' : _typeof(obj)) === 'object') { keys = Object.keys(obj); } else { keys = obj; } var column_string = ''; var hash = _crypto2.default.createHmac('sha256', _crypto2.default.randomBytes(32).toString('hex') + '').update(keys.length + '').digest('hex'); if (name === null) { name = 'p' + hash; } // SQL tables can have a maximum of 32 chars name = name.replace(/\W/g, '').slice(0, 32); // Add primary key column_string = '_id_midas INT NOT NULL AUTO_INCREMENT PRIMARY KEY,'; //column_string = ''; keys.forEach(function (key) { // normalize column names (dot . is a reserves named) key = _this3._escape_column_name(key); // replace reserved word if (key === '_id_midas') { return; key = '_id_midas_' + hash.slice(0, 4); } column_string += '`' + key + '`' + ' varchar(255),'; }); // chop off last , column_string = column_string.slice(0, -1); // create the connection var connection = await this._get_connection(); // query database await connection.execute('CREATE TABLE IF NOT EXISTS ' + name + ' (' + column_string + ');'); // close connection connection.close(); return name; } // check https://stackoverflow.com/questions/15271202/mysql-load-data-infile-with-on-duplicate-key-update }, { key: '_load_data_in_file_on_duplicate_key_update', value: async function _load_data_in_file_on_duplicate_key_update(data, table_name) { var _this4 = this; var connection = await this._get_connection(); // creat temporary table var temporary_table_name = _crypto2.default.createHmac('sha256', _crypto2.default.randomBytes(32).toString('hex') + '').digest('hex').slice(0, 32); var temporary_table_statement = 'CREATE TEMPORARY TABLE ' + _sqlstring2.default.escapeId(temporary_table_name) + ' LIKE ' + _sqlstring2.default.escapeId(table_name); await connection.execute(temporary_table_statement); // TODO drop indexes // ... // Load file into temporary CSV var file_path = await this._write_data_to_csv(data); var column_names = await this._get_column_names_from_table(this.config._target.database, table_name); // Load data infile var temp_c = [].concat(_toConsumableArray(column_names)).map(function (column) { return '`' + _this4._escape_column_name(column) + '`'; }); var no_id = ''; // Check if data contains the _id_midas property if (data.length > 0 && typeof data[0]['_id_midas'] === 'undefined') { temp_c = temp_c.filter(function (e) { return e !== '`_id_midas`'; }); no_id = 'SET _id_midas = NULL;'; } //temp_c.splice(1,1); var infile_statement = 'LOAD DATA LOCAL INFILE \'' + file_path + '\' INTO TABLE ' + _sqlstring2.default.escapeId(temporary_table_name) + '\n\t\t\t\t\t\t\t\t\tFIELDS TERMINATED BY \',\'\n\t\t\t\t\t\t\t\t\tOPTIONALLY ENCLOSED BY \'"\'\n\t\t\t\t\t\t\t\t\tLINES TERMINATED BY \'\r\n\'\n\t\t\t\t\t\t\t\t IGNORE 1 LINES\n\t\t\t\t\t\t\t\t (' + temp_c.join(',') + ')\n\t\t\t\t\t\t\t\t ' + no_id; await connection.query(infile_statement); // Insert data var insert_statement = 'INSERT INTO ' + _sqlstring2.default.escapeId(table_name) + '\n\t\t\t\t\t\t\t\tSELECT * FROM ' + _sqlstring2.default.escapeId(temporary_table_name) + '\n\t\t\t\t\t\t\t\tON DUPLICATE KEY UPDATE ' + temp_c.map(function (column) { return column + ' = VALUES(' + column + ')'; }).join(',') + ';'; await connection.execute(insert_statement); // Drop temporary table var drop_temp_table_statement = 'DROP TEMPORARY TABLE ' + _sqlstring2.default.escapeId(temporary_table_name); await connection.execute(drop_temp_table_statement); // Remove temporary file await _fsExtra2.default.remove(file_path); // Close connection connection.close(); } }, { key: '_write_data_to_csv', value: async function _write_data_to_csv(data) { var json2csv = require('json2csv').parse; try { var data_string = json2csv(data, { flatten: true, eol: '\r\n' }); // let data_string = csvjson.toCSV(data, options); var random_hash = _crypto2.default.createHmac('sha256', _crypto2.default.randomBytes(32).toString('hex') + '').digest('hex'); var file_path = __dirname + _path2.default.sep + 'TEMP_MIDAS_TABLE_FILE__' + random_hash; var result = await _fsExtra2.default.outputFile(file_path, data_string); return file_path; } catch (err) { return Promise.resolve(err); } } }, { key: 'load', value: async function load(data) { var _this5 = this; var result = false; var flattened = (0, _flat2.default)(data[0]); var table_name = ''; var keys = new Set(Object.keys(flattened)); var columns_to_create = []; var alter_columns_result = null; var column_names = await this._get_column_names_from_table(this.config._target.database, this.config._target.table); // check if it is required to create a new table if (column_names.length === 0) { table_name = await this._create_table_from_object(flattened, this.config._target.table); result = true; } // Add alter base table else { keys = new Set([].concat(_toConsumableArray(keys)).map(function (x) { return _this5._escape_column_name(x); })); columns_to_create = new Set([].concat(_toConsumableArray(keys)).filter(function (x) { return !column_names.has(x); })); alter_columns_result = await this._create_columns(this.config._target.table, columns_to_create); table_name = this.config._target.table; result = true; } // Load actual data await this._load_data_in_file_on_duplicate_key_update(data, table_name); return result; } }]); return MySQLLoader; }(_Loader3.default); exports.default = MySQLLoader;