UNPKG

target-clickhouse

Version:
326 lines 17.6 kB
"use strict"; var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; var __generator = (this && this.__generator) || function (thisArg, body) { var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g; return g = { next: verb(0), "throw": verb(1), "return": verb(2) }, typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g; function verb(n) { return function (v) { return step([n, v]); }; } function step(op) { if (f) throw new TypeError("Generator is already executing."); while (_) try { if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t; if (y = 0, t) op = [op[0] & 2, t.value]; switch (op[0]) { case 0: case 1: t = op; break; case 4: _.label++; return { value: op[1], done: false }; case 5: _.label++; y = op[1]; op = [0]; continue; case 7: op = _.ops.pop(); _.trys.pop(); continue; default: if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; } if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; } if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; } if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; } if (t[2]) _.ops.pop(); _.trys.pop(); continue; } op = body.call(thisArg, _); } catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; } if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true }; } }; var __spreadArray = (this && this.__spreadArray) || function (to, from, pack) { if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) { if (ar || !(i in from)) { if (!ar) ar = Array.prototype.slice.call(from, 0, i); ar[i] = from[i]; } } return to.concat(ar || Array.prototype.slice.call(from)); }; exports.__esModule = true; var ono_1 = require("ono"); var singer_node_1 = require("singer-node"); var jsonSchemaInspector_1 = require("./jsonSchemaInspector"); var utils_1 = require("./utils"); var RecordProcessor_1 = require("./RecordProcessor"); var jsonSchemaTranslator_1 = require("./jsonSchemaTranslator"); var DeletedRecordProcessor_1 = require("./DeletedRecordProcessor"); var metaRepresentsReplacingMergeTree = function (meta) { return meta.pkMappings.length > 0; }; var StreamProcessor = (function () { function StreamProcessor(clickhouse, meta, startedClean, config, maxVer, recordProcessor, deletedRecordProcessor, noPendingRows, cleaningValues) { if (recordProcessor === void 0) { recordProcessor = new RecordProcessor_1["default"](meta, clickhouse, { batchSize: config.batch_size, translateValues: config.translate_values, autoEndTimeoutMs: (config.insert_stream_timeout_sec - 5) * 1000 }); } if (deletedRecordProcessor === void 0) { deletedRecordProcessor = new DeletedRecordProcessor_1["default"](meta, clickhouse, { batchSize: config.deletion_batch_size, translateValues: config.translate_values }); } if (noPendingRows === void 0) { noPendingRows = 0; } if (cleaningValues === void 0) { cleaningValues = []; } this.clickhouse = clickhouse; this.meta = meta; this.startedClean = startedClean; this.maxVer = maxVer; this.recordProcessor = recordProcessor; this.deletedRecordProcessor = deletedRecordProcessor; this.noPendingRows = noPendingRows; this.cleaningValues = cleaningValues; } StreamProcessor.createStreamProcessor = function (ch, meta, config, cleanFirst, existingTables) { return __awaiter(this, void 0, void 0, function () { var streamProcessor, rootAlreadyExists, _a, _b, _c; return __generator(this, function (_d) { switch (_d.label) { case 0: streamProcessor = new StreamProcessor(ch, meta, cleanFirst, config, 0); if (!cleanFirst) return [3, 2]; return [4, streamProcessor.clearTables()]; case 1: _d.sent(); rootAlreadyExists = false; return [3, 3]; case 2: rootAlreadyExists = existingTables.some(function (table) { return meta.sqlTableName === (0, jsonSchemaInspector_1.escapeIdentifier)(table); }); _d.label = 3; case 3: if (!rootAlreadyExists) return [3, 5]; return [4, (0, jsonSchemaTranslator_1.updateSchema)(meta, ch, existingTables)]; case 4: _d.sent(); return [3, 7]; case 5: (0, singer_node_1.log_info)("[".concat(meta.prop, "]: creating tables")); return [4, Promise.all((0, jsonSchemaTranslator_1.translateCH)(ch.getDatabase(), meta, true).map(ch.runQuery.bind(ch)))]; case 6: _d.sent(); _d.label = 7; case 7: _a = streamProcessor; if (!(cleanFirst || !metaRepresentsReplacingMergeTree(meta))) return [3, 8]; _b = streamProcessor.maxVer; return [3, 10]; case 8: _c = Number; return [4, ch.runQuery("SELECT max(_ver)\n FROM ".concat(meta.sqlTableName))]; case 9: _b = _c.apply(void 0, [(_d.sent()).data[0][0]]); _d.label = 10; case 10: _a.maxVer = _b; (0, singer_node_1.log_info)("[".concat(meta.prop, "]: initial max version is [").concat(streamProcessor.maxVer, "]")); return [2, streamProcessor]; } }); }); }; StreamProcessor.prototype.clearTables = function () { return __awaiter(this, void 0, void 0, function () { var queries; var _this = this; return __generator(this, function (_a) { switch (_a.label) { case 0: queries = buildDropTablesQueries(this.meta); return [4, Promise.all(queries.map(function (query) { return _this.clickhouse.runQuery(query); }))]; case 1: _a.sent(); return [2]; } }); }); }; StreamProcessor.prototype.processRecord = function (record, messageCount, abort) { return __awaiter(this, void 0, void 0, function () { var cleaningValue; return __generator(this, function (_a) { switch (_a.label) { case 0: if (!!this.startedClean) return [3, 2]; cleaningValue = this.meta.cleaningColumn && record[this.meta.cleaningColumn]; if (!(cleaningValue && !this.cleaningValues.includes(cleaningValue))) return [3, 2]; return [4, this.deleteCleaningValue(cleaningValue)]; case 1: _a.sent(); this.cleaningValues.push(cleaningValue); _a.label = 2; case 2: this.recordProcessor.pushRecord(record, abort, this.maxVer, undefined, undefined, undefined, messageCount); this.maxVer++; this.noPendingRows++; return [2]; } }); }); }; StreamProcessor.prototype.processDeletedRecord = function (record) { return __awaiter(this, void 0, void 0, function () { return __generator(this, function (_a) { switch (_a.label) { case 0: return [4, this.deletedRecordProcessor.pushDeletedRecord(record)]; case 1: _a.sent(); return [2]; } }); }); }; StreamProcessor.prototype.commitPendingChanges = function () { return __awaiter(this, void 0, void 0, function () { return __generator(this, function (_a) { switch (_a.label) { case 0: if (!(this.noPendingRows > 0)) return [3, 2]; (0, singer_node_1.log_info)("[".concat(this.meta.prop, "]: ending batch ingestion for ").concat(this.noPendingRows, " rows")); return [4, this.recordProcessor.endIngestion()]; case 1: _a.sent(); this.noPendingRows = 0; this.maxVer++; _a.label = 2; case 2: return [4, this.deletedRecordProcessor.deleteBufferedData()]; case 3: _a.sent(); return [2]; } }); }); }; StreamProcessor.prototype.finalizeProcessing = function () { return __awaiter(this, void 0, void 0, function () { var err_1; var _this = this; return __generator(this, function (_a) { switch (_a.label) { case 0: _a.trys.push([0, 2, , 3]); return [4, this.commitPendingChanges()]; case 1: _a.sent(); return [3, 3]; case 2: err_1 = _a.sent(); throw (0, ono_1.ono)(err_1, "could not save new records"); case 3: (0, singer_node_1.log_info)("[".concat(this.meta.prop, "]: finalizing processing")); if (!!this.startedClean) return [3, 8]; if (!this.isReplacingMergeTree()) return [3, 6]; (0, singer_node_1.log_info)("[".concat(this.meta.prop, "]: removing root duplicates")); return [4, this.clickhouse.runQuery("OPTIMIZE TABLE ".concat(this.meta.sqlTableName, " FINAL"))]; case 4: _a.sent(); if (!this.recordProcessor.hasChildren) return [3, 6]; (0, singer_node_1.log_info)("[".concat(this.meta.prop, "]: removing children orphans")); return [4, Promise.all(this.meta.children.map(function (child) { return _this.deleteChildDuplicates(child); }))]; case 5: _a.sent(); _a.label = 6; case 6: (0, singer_node_1.log_info)("[".concat(this.meta.prop, "]: ensuring PK integrity is maintained")); return [4, this.assertPKIntegrity(this.meta)]; case 7: _a.sent(); _a.label = 8; case 8: return [2]; } }); }); }; StreamProcessor.prototype.deleteCleaningValue = function (value) { return __awaiter(this, void 0, void 0, function () { var cleaningColumnMeta, resolvedValue, query; var _this = this; return __generator(this, function (_a) { switch (_a.label) { case 0: if (!this.meta.cleaningColumn) { (0, singer_node_1.log_warning)("[".concat(this.meta.prop, "]: unexpected request to clean values: cleaning column undefined")); return [2]; } cleaningColumnMeta = this.meta.simpleColumnMappings .concat(this.meta.pkMappings) .find(function (column) { return column.prop === _this.meta.cleaningColumn; }); if (!cleaningColumnMeta) { throw new Error("[".concat(this.meta.prop, "] could not resolve cleaning column meta (looking for ").concat(this.meta.cleaningColumn, ")")); } if (!cleaningColumnMeta.valueTranslator) { throw new Error("[".concat(this.meta.prop, "] could not be used as cleaning column as it do not have a translator")); } resolvedValue = cleaningColumnMeta.valueTranslator(value); (0, singer_node_1.log_info)("[".concat(this.meta.prop, "]: cleaning column: deleting based on ").concat(resolvedValue)); query = "\n ALTER TABLE ".concat(this.meta.sqlTableName, "\n DELETE\n WHERE `").concat(this.meta.cleaningColumn, "` = '").concat((0, utils_1.escapeValue)(value), "'"); return [4, this.clickhouse.runQuery(query)]; case 1: _a.sent(); return [2]; } }); }); }; StreamProcessor.prototype.deleteChildDuplicates = function (currentNode) { return __awaiter(this, void 0, void 0, function () { var query; return __generator(this, function (_a) { switch (_a.label) { case 0: query = "\n ALTER TABLE ".concat(currentNode.sqlTableName, "\n DELETE\n WHERE (").concat(this.meta.pkMappings .map(function (pk) { return (0, jsonSchemaInspector_1.escapeIdentifier)((0, jsonSchemaInspector_1.formatRootPKColumn)(pk.prop)); }) .concat(["_root_ver"]) .join(","), ") NOT IN (SELECT ").concat(this.meta.pkMappings .map(function (elem) { return elem.sqlIdentifier; }) .concat(["_ver"]) .join(","), " FROM ").concat(this.meta.sqlTableName, ")"); return [4, this.clickhouse.runQuery(query)]; case 1: _a.sent(); return [4, Promise.all(currentNode.children.map(this.deleteChildDuplicates.bind(this)))]; case 2: _a.sent(); return [2]; } }); }); }; StreamProcessor.prototype.isReplacingMergeTree = function () { return metaRepresentsReplacingMergeTree(this.meta); }; StreamProcessor.prototype.assertPKIntegrity = function (meta) { return __awaiter(this, void 0, void 0, function () { var pks, query, result; var _this = this; return __generator(this, function (_a) { switch (_a.label) { case 0: return [4, Promise.all(meta.children.map(function (child) { return _this.assertPKIntegrity(child); }))]; case 1: _a.sent(); if (meta.pkMappings.length === 0) { return [2]; } pks = meta.pkMappings.map(function (elem) { return elem.sqlIdentifier; }).join(","); query = "\n SELECT ".concat(pks, "\n FROM (SELECT ").concat(pks, ", ROW_NUMBER() OVER (PARTITION BY ").concat(pks, ") AS row_number FROM ").concat(meta.sqlTableName, ")\n WHERE row_number > 1 LIMIT 1"); return [4, this.clickhouse.runQuery(query)]; case 2: result = _a.sent(); if (result.rows > 0) { throw (0, ono_1.ono)("Duplicate key on table %s, data: %j, aborting process", meta.sqlTableName, result.data); } return [2]; } }); }); }; return StreamProcessor; }()); exports["default"] = StreamProcessor; var buildDropTablesQueries = function (meta) { return __spreadArray([ "DROP TABLE IF EXISTS ".concat(meta.sqlTableName) ], meta.children.flatMap(buildDropTablesQueries), true); }; //# sourceMappingURL=StreamProcessor.js.map