midas-core
Version:
Enrich data with APIs
308 lines (236 loc) • 11.7 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", {
value: true
});
exports.default = undefined;
var _typeof = typeof Symbol === "function" && typeof Symbol.iterator === "symbol" ? function (obj) { return typeof obj; } : function (obj) { return obj && typeof Symbol === "function" && obj.constructor === Symbol && obj !== Symbol.prototype ? "symbol" : typeof obj; };
var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }();
var _Loader2 = require('./Loader');
var _Loader3 = _interopRequireDefault(_Loader2);
var _flat = require('flat');
var _flat2 = _interopRequireDefault(_flat);
var _sqlstring = require('sqlstring');
var _sqlstring2 = _interopRequireDefault(_sqlstring);
var _promise = require('mysql2/promise');
var _promise2 = _interopRequireDefault(_promise);
var _crypto = require('crypto');
var _crypto2 = _interopRequireDefault(_crypto);
var _csvjson = require('csvjson');
var _csvjson2 = _interopRequireDefault(_csvjson);
var _fsExtra = require('fs-extra');
var _fsExtra2 = _interopRequireDefault(_fsExtra);
var _path = require('path');
var _path2 = _interopRequireDefault(_path);
function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
function _toConsumableArray(arr) { if (Array.isArray(arr)) { for (var i = 0, arr2 = Array(arr.length); i < arr.length; i++) { arr2[i] = arr[i]; } return arr2; } else { return Array.from(arr); } }
function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } }
function _possibleConstructorReturn(self, call) { if (!self) { throw new ReferenceError("this hasn't been initialised - super() hasn't been called"); } return call && (typeof call === "object" || typeof call === "function") ? call : self; }
function _inherits(subClass, superClass) { if (typeof superClass !== "function" && superClass !== null) { throw new TypeError("Super expression must either be null or a function, not " + typeof superClass); } subClass.prototype = Object.create(superClass && superClass.prototype, { constructor: { value: subClass, enumerable: false, writable: true, configurable: true } }); if (superClass) Object.setPrototypeOf ? Object.setPrototypeOf(subClass, superClass) : subClass.__proto__ = superClass; }
var Json2csvParser = require('json2csv').Parser;
var MySQLLoader = function (_Loader) {
_inherits(MySQLLoader, _Loader);
function MySQLLoader(config) {
_classCallCheck(this, MySQLLoader);
var _this = _possibleConstructorReturn(this, (MySQLLoader.__proto__ || Object.getPrototypeOf(MySQLLoader)).call(this, config));
_this.db_config = {
connectionLimit: 10,
host: _this.config._target.host,
port: _this.config._target.port,
user: _this.config._target.user,
password: _this.config._target.password,
database: _this.config._target.database
};
_this.pool = _promise2.default.createPool(_this.db_config);
return _this;
}
_createClass(MySQLLoader, [{
key: '_get_connection',
value: async function _get_connection() {
return this.pool.getConnection();
}
}, {
key: '_escape_column_name',
value: function _escape_column_name(name) {
return _sqlstring2.default.escape(name.trim().toLowerCase().replace(/ /g, '_').replace(/'/g, '')).replace(/'/g, '');
}
}, {
key: '_create_columns',
value: async function _create_columns(table_name, columns) {
var _this2 = this;
table_name = _sqlstring2.default.escapeId(table_name);
var connection = await this._get_connection();
// build statement
var statement = 'ALTER TABLE ' + table_name;
columns.forEach(function (column, index) {
column = _this2._escape_column_name(column);
statement += 'ADD COLUMN `' + column + '` varchar(255),';
});
statement = statement.slice(0, -1);
var result = await connection.query(statement);
connection.close();
return result;
}
}, {
key: '_get_column_names_from_table',
value: async function _get_column_names_from_table(database, table_name) {
database = _sqlstring2.default.escape(database);
table_name = _sqlstring2.default.escape(table_name);
var result = [];
var connection = await this._get_connection();
var column_statement = 'SELECT `COLUMN_NAME`\n\t\tFROM `INFORMATION_SCHEMA`.`COLUMNS`\n\t\tWHERE `TABLE_SCHEMA`= ' + database + '\n\t\tAND `TABLE_NAME`= ' + table_name;
var existence_statement = 'SELECT EXISTS (' + column_statement + ')';
var table_exists = await connection.query(existence_statement);
// looks weird but works
table_exists = !!+table_exists[0][0][Object.keys(table_exists[0][0])[0]];
// Return empty result if table does not exist
if (!table_exists) {
connection.close();
return result;
}
var column_names = await connection.query(column_statement);
if (column_names.length < 1 || !Array.isArray(column_names[0])) {
connection.close();
return result;
}
column_names[0].forEach(function (item, key) {
result.push(item.COLUMN_NAME);
});
// close connection from pool
connection.close();
return new Set(result);
}
}, {
key: '_create_table_from_object',
value: async function _create_table_from_object(obj) {
var _this3 = this;
var name = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : null;
var keys = [];
if (!Array.isArray(obj) && (typeof obj === 'undefined' ? 'undefined' : _typeof(obj)) === 'object') {
keys = Object.keys(obj);
} else {
keys = obj;
}
var column_string = '';
var hash = _crypto2.default.createHmac('sha256', _crypto2.default.randomBytes(32).toString('hex') + '').update(keys.length + '').digest('hex');
if (name === null) {
name = 'p' + hash;
}
// SQL tables can have a maximum of 32 chars
name = name.replace(/\W/g, '').slice(0, 32);
// Add primary key
column_string = '_id_midas INT NOT NULL AUTO_INCREMENT PRIMARY KEY,';
//column_string = '';
keys.forEach(function (key) {
// normalize column names (dot . is a reserves named)
key = _this3._escape_column_name(key);
// replace reserved word
if (key === '_id_midas') {
return;
key = '_id_midas_' + hash.slice(0, 4);
}
column_string += '`' + key + '`' + ' varchar(255),';
});
// chop off last ,
column_string = column_string.slice(0, -1);
// create the connection
var connection = await this._get_connection();
// query database
await connection.execute('CREATE TABLE IF NOT EXISTS ' + name + ' (' + column_string + ');');
// close connection
connection.close();
return name;
}
// check https://stackoverflow.com/questions/15271202/mysql-load-data-infile-with-on-duplicate-key-update
}, {
key: '_load_data_in_file_on_duplicate_key_update',
value: async function _load_data_in_file_on_duplicate_key_update(data, table_name) {
var _this4 = this;
var connection = await this._get_connection();
// creat temporary table
var temporary_table_name = _crypto2.default.createHmac('sha256', _crypto2.default.randomBytes(32).toString('hex') + '').digest('hex').slice(0, 32);
var temporary_table_statement = 'CREATE TEMPORARY TABLE ' + _sqlstring2.default.escapeId(temporary_table_name) + ' LIKE ' + _sqlstring2.default.escapeId(table_name);
await connection.execute(temporary_table_statement);
// TODO drop indexes
// ...
// Load file into temporary CSV
var file_path = await this._write_data_to_csv(data);
var column_names = await this._get_column_names_from_table(this.config._target.database, table_name);
// Load data infile
var temp_c = [].concat(_toConsumableArray(column_names)).map(function (column) {
return '`' + _this4._escape_column_name(column) + '`';
});
var no_id = '';
// Check if data contains the _id_midas property
if (data.length > 0 && typeof data[0]['_id_midas'] === 'undefined') {
temp_c = temp_c.filter(function (e) {
return e !== '`_id_midas`';
});
no_id = 'SET _id_midas = NULL;';
}
//temp_c.splice(1,1);
var infile_statement = 'LOAD DATA LOCAL INFILE \'' + file_path + '\' INTO TABLE ' + _sqlstring2.default.escapeId(temporary_table_name) + '\n\t\t\t\t\t\t\t\t\tFIELDS TERMINATED BY \',\'\n\t\t\t\t\t\t\t\t\tOPTIONALLY ENCLOSED BY \'"\'\n\t\t\t\t\t\t\t\t\tLINES TERMINATED BY \'\r\n\'\n\t\t\t\t\t\t\t\t IGNORE 1 LINES\n\t\t\t\t\t\t\t\t (' + temp_c.join(',') + ')\n\t\t\t\t\t\t\t\t ' + no_id;
await connection.query(infile_statement);
// Insert data
var insert_statement = 'INSERT INTO ' + _sqlstring2.default.escapeId(table_name) + '\n\t\t\t\t\t\t\t\tSELECT * FROM ' + _sqlstring2.default.escapeId(temporary_table_name) + '\n\t\t\t\t\t\t\t\tON DUPLICATE KEY UPDATE ' + temp_c.map(function (column) {
return column + ' = VALUES(' + column + ')';
}).join(',') + ';';
await connection.execute(insert_statement);
// Drop temporary table
var drop_temp_table_statement = 'DROP TEMPORARY TABLE ' + _sqlstring2.default.escapeId(temporary_table_name);
await connection.execute(drop_temp_table_statement);
// Remove temporary file
await _fsExtra2.default.remove(file_path);
// Close connection
connection.close();
}
}, {
key: '_write_data_to_csv',
value: async function _write_data_to_csv(data) {
var json2csv = require('json2csv').parse;
try {
var data_string = json2csv(data, { flatten: true, eol: '\r\n' });
// let data_string = csvjson.toCSV(data, options);
var random_hash = _crypto2.default.createHmac('sha256', _crypto2.default.randomBytes(32).toString('hex') + '').digest('hex');
var file_path = __dirname + _path2.default.sep + 'TEMP_MIDAS_TABLE_FILE__' + random_hash;
var result = await _fsExtra2.default.outputFile(file_path, data_string);
return file_path;
} catch (err) {
return Promise.resolve(err);
}
}
}, {
key: 'load',
value: async function load(data) {
var _this5 = this;
var result = false;
var flattened = (0, _flat2.default)(data[0]);
var table_name = '';
var keys = new Set(Object.keys(flattened));
var columns_to_create = [];
var alter_columns_result = null;
var column_names = await this._get_column_names_from_table(this.config._target.database, this.config._target.table);
// check if it is required to create a new table
if (column_names.length === 0) {
table_name = await this._create_table_from_object(flattened, this.config._target.table);
result = true;
}
// Add alter base table
else {
keys = new Set([].concat(_toConsumableArray(keys)).map(function (x) {
return _this5._escape_column_name(x);
}));
columns_to_create = new Set([].concat(_toConsumableArray(keys)).filter(function (x) {
return !column_names.has(x);
}));
alter_columns_result = await this._create_columns(this.config._target.table, columns_to_create);
table_name = this.config._target.table;
result = true;
}
// Load actual data
await this._load_data_in_file_on_duplicate_key_update(data, table_name);
return result;
}
}]);
return MySQLLoader;
}(_Loader3.default);
exports.default = MySQLLoader;