target-clickhouse
Version:
A Singer target for Clickhouse
326 lines • 17.6 kB
JavaScript
"use strict";
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
var __generator = (this && this.__generator) || function (thisArg, body) {
var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g;
return g = { next: verb(0), "throw": verb(1), "return": verb(2) }, typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g;
function verb(n) { return function (v) { return step([n, v]); }; }
function step(op) {
if (f) throw new TypeError("Generator is already executing.");
while (_) try {
if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t;
if (y = 0, t) op = [op[0] & 2, t.value];
switch (op[0]) {
case 0: case 1: t = op; break;
case 4: _.label++; return { value: op[1], done: false };
case 5: _.label++; y = op[1]; op = [0]; continue;
case 7: op = _.ops.pop(); _.trys.pop(); continue;
default:
if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; }
if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; }
if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; }
if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; }
if (t[2]) _.ops.pop();
_.trys.pop(); continue;
}
op = body.call(thisArg, _);
} catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; }
if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true };
}
};
var __spreadArray = (this && this.__spreadArray) || function (to, from, pack) {
if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
if (ar || !(i in from)) {
if (!ar) ar = Array.prototype.slice.call(from, 0, i);
ar[i] = from[i];
}
}
return to.concat(ar || Array.prototype.slice.call(from));
};
exports.__esModule = true;
var ono_1 = require("ono");
var singer_node_1 = require("singer-node");
var jsonSchemaInspector_1 = require("./jsonSchemaInspector");
var utils_1 = require("./utils");
var RecordProcessor_1 = require("./RecordProcessor");
var jsonSchemaTranslator_1 = require("./jsonSchemaTranslator");
var DeletedRecordProcessor_1 = require("./DeletedRecordProcessor");
var metaRepresentsReplacingMergeTree = function (meta) { return meta.pkMappings.length > 0; };
var StreamProcessor = (function () {
function StreamProcessor(clickhouse, meta, startedClean, config, maxVer, recordProcessor, deletedRecordProcessor, noPendingRows, cleaningValues) {
if (recordProcessor === void 0) { recordProcessor = new RecordProcessor_1["default"](meta, clickhouse, {
batchSize: config.batch_size,
translateValues: config.translate_values,
autoEndTimeoutMs: (config.insert_stream_timeout_sec - 5) * 1000
}); }
if (deletedRecordProcessor === void 0) { deletedRecordProcessor = new DeletedRecordProcessor_1["default"](meta, clickhouse, {
batchSize: config.deletion_batch_size,
translateValues: config.translate_values
}); }
if (noPendingRows === void 0) { noPendingRows = 0; }
if (cleaningValues === void 0) { cleaningValues = []; }
this.clickhouse = clickhouse;
this.meta = meta;
this.startedClean = startedClean;
this.maxVer = maxVer;
this.recordProcessor = recordProcessor;
this.deletedRecordProcessor = deletedRecordProcessor;
this.noPendingRows = noPendingRows;
this.cleaningValues = cleaningValues;
}
StreamProcessor.createStreamProcessor = function (ch, meta, config, cleanFirst, existingTables) {
return __awaiter(this, void 0, void 0, function () {
var streamProcessor, rootAlreadyExists, _a, _b, _c;
return __generator(this, function (_d) {
switch (_d.label) {
case 0:
streamProcessor = new StreamProcessor(ch, meta, cleanFirst, config, 0);
if (!cleanFirst) return [3, 2];
return [4, streamProcessor.clearTables()];
case 1:
_d.sent();
rootAlreadyExists = false;
return [3, 3];
case 2:
rootAlreadyExists = existingTables.some(function (table) { return meta.sqlTableName === (0, jsonSchemaInspector_1.escapeIdentifier)(table); });
_d.label = 3;
case 3:
if (!rootAlreadyExists) return [3, 5];
return [4, (0, jsonSchemaTranslator_1.updateSchema)(meta, ch, existingTables)];
case 4:
_d.sent();
return [3, 7];
case 5:
(0, singer_node_1.log_info)("[".concat(meta.prop, "]: creating tables"));
return [4, Promise.all((0, jsonSchemaTranslator_1.translateCH)(ch.getDatabase(), meta, true).map(ch.runQuery.bind(ch)))];
case 6:
_d.sent();
_d.label = 7;
case 7:
_a = streamProcessor;
if (!(cleanFirst || !metaRepresentsReplacingMergeTree(meta))) return [3, 8];
_b = streamProcessor.maxVer;
return [3, 10];
case 8:
_c = Number;
return [4, ch.runQuery("SELECT max(_ver)\n FROM ".concat(meta.sqlTableName))];
case 9:
_b = _c.apply(void 0, [(_d.sent()).data[0][0]]);
_d.label = 10;
case 10:
_a.maxVer = _b;
(0, singer_node_1.log_info)("[".concat(meta.prop, "]: initial max version is [").concat(streamProcessor.maxVer, "]"));
return [2, streamProcessor];
}
});
});
};
StreamProcessor.prototype.clearTables = function () {
return __awaiter(this, void 0, void 0, function () {
var queries;
var _this = this;
return __generator(this, function (_a) {
switch (_a.label) {
case 0:
queries = buildDropTablesQueries(this.meta);
return [4, Promise.all(queries.map(function (query) { return _this.clickhouse.runQuery(query); }))];
case 1:
_a.sent();
return [2];
}
});
});
};
StreamProcessor.prototype.processRecord = function (record, messageCount, abort) {
return __awaiter(this, void 0, void 0, function () {
var cleaningValue;
return __generator(this, function (_a) {
switch (_a.label) {
case 0:
if (!!this.startedClean) return [3, 2];
cleaningValue = this.meta.cleaningColumn && record[this.meta.cleaningColumn];
if (!(cleaningValue && !this.cleaningValues.includes(cleaningValue))) return [3, 2];
return [4, this.deleteCleaningValue(cleaningValue)];
case 1:
_a.sent();
this.cleaningValues.push(cleaningValue);
_a.label = 2;
case 2:
this.recordProcessor.pushRecord(record, abort, this.maxVer, undefined, undefined, undefined, messageCount);
this.maxVer++;
this.noPendingRows++;
return [2];
}
});
});
};
StreamProcessor.prototype.processDeletedRecord = function (record) {
return __awaiter(this, void 0, void 0, function () {
return __generator(this, function (_a) {
switch (_a.label) {
case 0: return [4, this.deletedRecordProcessor.pushDeletedRecord(record)];
case 1:
_a.sent();
return [2];
}
});
});
};
StreamProcessor.prototype.commitPendingChanges = function () {
return __awaiter(this, void 0, void 0, function () {
return __generator(this, function (_a) {
switch (_a.label) {
case 0:
if (!(this.noPendingRows > 0)) return [3, 2];
(0, singer_node_1.log_info)("[".concat(this.meta.prop, "]: ending batch ingestion for ").concat(this.noPendingRows, " rows"));
return [4, this.recordProcessor.endIngestion()];
case 1:
_a.sent();
this.noPendingRows = 0;
this.maxVer++;
_a.label = 2;
case 2: return [4, this.deletedRecordProcessor.deleteBufferedData()];
case 3:
_a.sent();
return [2];
}
});
});
};
StreamProcessor.prototype.finalizeProcessing = function () {
return __awaiter(this, void 0, void 0, function () {
var err_1;
var _this = this;
return __generator(this, function (_a) {
switch (_a.label) {
case 0:
_a.trys.push([0, 2, , 3]);
return [4, this.commitPendingChanges()];
case 1:
_a.sent();
return [3, 3];
case 2:
err_1 = _a.sent();
throw (0, ono_1.ono)(err_1, "could not save new records");
case 3:
(0, singer_node_1.log_info)("[".concat(this.meta.prop, "]: finalizing processing"));
if (!!this.startedClean) return [3, 8];
if (!this.isReplacingMergeTree()) return [3, 6];
(0, singer_node_1.log_info)("[".concat(this.meta.prop, "]: removing root duplicates"));
return [4, this.clickhouse.runQuery("OPTIMIZE TABLE ".concat(this.meta.sqlTableName, " FINAL"))];
case 4:
_a.sent();
if (!this.recordProcessor.hasChildren) return [3, 6];
(0, singer_node_1.log_info)("[".concat(this.meta.prop, "]: removing children orphans"));
return [4, Promise.all(this.meta.children.map(function (child) { return _this.deleteChildDuplicates(child); }))];
case 5:
_a.sent();
_a.label = 6;
case 6:
(0, singer_node_1.log_info)("[".concat(this.meta.prop, "]: ensuring PK integrity is maintained"));
return [4, this.assertPKIntegrity(this.meta)];
case 7:
_a.sent();
_a.label = 8;
case 8: return [2];
}
});
});
};
StreamProcessor.prototype.deleteCleaningValue = function (value) {
return __awaiter(this, void 0, void 0, function () {
var cleaningColumnMeta, resolvedValue, query;
var _this = this;
return __generator(this, function (_a) {
switch (_a.label) {
case 0:
if (!this.meta.cleaningColumn) {
(0, singer_node_1.log_warning)("[".concat(this.meta.prop, "]: unexpected request to clean values: cleaning column undefined"));
return [2];
}
cleaningColumnMeta = this.meta.simpleColumnMappings
.concat(this.meta.pkMappings)
.find(function (column) { return column.prop === _this.meta.cleaningColumn; });
if (!cleaningColumnMeta) {
throw new Error("[".concat(this.meta.prop, "] could not resolve cleaning column meta (looking for ").concat(this.meta.cleaningColumn, ")"));
}
if (!cleaningColumnMeta.valueTranslator) {
throw new Error("[".concat(this.meta.prop, "] could not be used as cleaning column as it do not have a translator"));
}
resolvedValue = cleaningColumnMeta.valueTranslator(value);
(0, singer_node_1.log_info)("[".concat(this.meta.prop, "]: cleaning column: deleting based on ").concat(resolvedValue));
query = "\n ALTER TABLE ".concat(this.meta.sqlTableName, "\n DELETE\n WHERE `").concat(this.meta.cleaningColumn, "` = '").concat((0, utils_1.escapeValue)(value), "'");
return [4, this.clickhouse.runQuery(query)];
case 1:
_a.sent();
return [2];
}
});
});
};
StreamProcessor.prototype.deleteChildDuplicates = function (currentNode) {
return __awaiter(this, void 0, void 0, function () {
var query;
return __generator(this, function (_a) {
switch (_a.label) {
case 0:
query = "\n ALTER TABLE ".concat(currentNode.sqlTableName, "\n DELETE\n WHERE (").concat(this.meta.pkMappings
.map(function (pk) { return (0, jsonSchemaInspector_1.escapeIdentifier)((0, jsonSchemaInspector_1.formatRootPKColumn)(pk.prop)); })
.concat(["_root_ver"])
.join(","), ") NOT IN (SELECT ").concat(this.meta.pkMappings
.map(function (elem) { return elem.sqlIdentifier; })
.concat(["_ver"])
.join(","), " FROM ").concat(this.meta.sqlTableName, ")");
return [4, this.clickhouse.runQuery(query)];
case 1:
_a.sent();
return [4, Promise.all(currentNode.children.map(this.deleteChildDuplicates.bind(this)))];
case 2:
_a.sent();
return [2];
}
});
});
};
StreamProcessor.prototype.isReplacingMergeTree = function () {
return metaRepresentsReplacingMergeTree(this.meta);
};
StreamProcessor.prototype.assertPKIntegrity = function (meta) {
return __awaiter(this, void 0, void 0, function () {
var pks, query, result;
var _this = this;
return __generator(this, function (_a) {
switch (_a.label) {
case 0: return [4, Promise.all(meta.children.map(function (child) { return _this.assertPKIntegrity(child); }))];
case 1:
_a.sent();
if (meta.pkMappings.length === 0) {
return [2];
}
pks = meta.pkMappings.map(function (elem) { return elem.sqlIdentifier; }).join(",");
query = "\n SELECT ".concat(pks, "\n FROM (SELECT ").concat(pks, ", ROW_NUMBER() OVER (PARTITION BY ").concat(pks, ") AS row_number FROM ").concat(meta.sqlTableName, ")\n WHERE row_number > 1 LIMIT 1");
return [4, this.clickhouse.runQuery(query)];
case 2:
result = _a.sent();
if (result.rows > 0) {
throw (0, ono_1.ono)("Duplicate key on table %s, data: %j, aborting process", meta.sqlTableName, result.data);
}
return [2];
}
});
});
};
return StreamProcessor;
}());
exports["default"] = StreamProcessor;
var buildDropTablesQueries = function (meta) { return __spreadArray([
"DROP TABLE IF EXISTS ".concat(meta.sqlTableName)
], meta.children.flatMap(buildDropTablesQueries), true); };
//# sourceMappingURL=StreamProcessor.js.map