target-clickhouse
Version:
A Singer target for Clickhouse
266 lines • 14.2 kB
JavaScript
"use strict";
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
var __generator = (this && this.__generator) || function (thisArg, body) {
var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g;
return g = { next: verb(0), "throw": verb(1), "return": verb(2) }, typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g;
function verb(n) { return function (v) { return step([n, v]); }; }
function step(op) {
if (f) throw new TypeError("Generator is already executing.");
while (_) try {
if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t;
if (y = 0, t) op = [op[0] & 2, t.value];
switch (op[0]) {
case 0: case 1: t = op; break;
case 4: _.label++; return { value: op[1], done: false };
case 5: _.label++; y = op[1]; op = [0]; continue;
case 7: op = _.ops.pop(); _.trys.pop(); continue;
default:
if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; }
if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; }
if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; }
if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; }
if (t[2]) _.ops.pop();
_.trys.pop(); continue;
}
op = body.call(thisArg, _);
} catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; }
if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true };
}
};
var __spreadArray = (this && this.__spreadArray) || function (to, from, pack) {
if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
if (ar || !(i in from)) {
if (!ar) ar = Array.prototype.slice.call(from, 0, i);
ar[i] = from[i];
}
}
return to.concat(ar || Array.prototype.slice.call(from));
};
exports.__esModule = true;
exports.processStream = void 0;
var readline = require("readline");
var singer_node_1 = require("singer-node");
var ClickhouseConnection_1 = require("./ClickhouseConnection");
var jsonSchemaInspector_1 = require("./jsonSchemaInspector");
var StreamProcessor_1 = require("./StreamProcessor");
var jsonSchemaTranslator_1 = require("./jsonSchemaTranslator");
var promise_pool_1 = require("@supercharge/promise-pool");
var forAwaitOnMacroTaskQueue_1 = require("./forAwaitOnMacroTaskQueue");
function processSchemaMessage(msg, config, ch, state) {
return __awaiter(this, void 0, void 0, function () {
var meta, streamToReplaceIndex, _a, streamProcessor, _b;
return __generator(this, function (_c) {
switch (_c.label) {
case 0:
meta = (0, jsonSchemaInspector_1.buildMeta)(new jsonSchemaInspector_1.JsonSchemaInspectorContext(msg.stream, msg.schema, msg.keyProperties, config.subtable_separator, undefined, undefined, undefined, msg.cleaningColumn, msg.allKeyProperties));
streamToReplaceIndex = state.streamsToReplace.indexOf(meta.prop);
if (!(streamToReplaceIndex > -1)) return [3, 3];
(0, singer_node_1.log_info)("[".concat(meta.prop, "]: dropping root and children tables"));
return [4, Promise.all((0, jsonSchemaTranslator_1.dropStreamTablesQueries)(meta).map(function (query) { return ch.runQuery(query); }))];
case 1:
_c.sent();
state.streamsToReplace.splice(streamToReplaceIndex, 1);
_a = state;
return [4, ch.listTables()];
case 2:
_a.existingTables = _c.sent();
_c.label = 3;
case 3: return [4, StreamProcessor_1["default"].createStreamProcessor(ch, meta, config, msg.cleanFirst, state.existingTables)];
case 4:
streamProcessor = _c.sent();
_b = state;
return [4, ch.listTables()];
case 5:
_b.existingTables = _c.sent();
return [2, streamProcessor];
}
});
});
}
function tableShouldBeDropped(table, activeStreams, subtableSeparator, extraActiveTables) {
var doesMatchAnActiveStream = activeStreams.concat(extraActiveTables).some(function (activeTable) { return table === activeTable ||
table.startsWith(activeTable + subtableSeparator); });
var isAlreadyDropped = table.startsWith(ClickhouseConnection_1["default"].droppedTablePrefix);
var isArchived = table.startsWith(ClickhouseConnection_1["default"].archivedTablePrefix);
return !doesMatchAnActiveStream && !isAlreadyDropped && !isArchived;
}
function processActiveSchemasMessage(msg, config) {
return __awaiter(this, void 0, void 0, function () {
var ch, tables;
var _this = this;
return __generator(this, function (_a) {
switch (_a.label) {
case 0:
ch = new ClickhouseConnection_1["default"](config);
return [4, ch.listTables()];
case 1:
tables = _a.sent();
return [4, Promise.all(tables.map(function (table) { return __awaiter(_this, void 0, void 0, function () {
return __generator(this, function (_a) {
if (tableShouldBeDropped(table, msg.streams, config.subtable_separator, config.extra_active_tables)) {
return [2, ch.renameObsoleteColumn(table)];
}
return [2];
});
}); }))];
case 2:
_a.sent();
return [2];
}
});
});
}
function processLine(line, config, ch, streamProcessors, state, lineCount, interrupt) {
return __awaiter(this, void 0, void 0, function () {
var msg, _a, _b, _c, _d;
return __generator(this, function (_e) {
switch (_e.label) {
case 0:
msg = (0, singer_node_1.parse_message)(line);
_a = msg === null || msg === void 0 ? void 0 : msg.type;
switch (_a) {
case singer_node_1.MessageType.schema: return [3, 1];
case singer_node_1.MessageType.record: return [3, 5];
case singer_node_1.MessageType.deletedRecord: return [3, 7];
case singer_node_1.MessageType.state: return [3, 9];
case singer_node_1.MessageType.activeStreams: return [3, 11];
}
return [3, 13];
case 1:
if (!streamProcessors.has(msg.stream)) return [3, 3];
return [4, streamProcessors.get(msg.stream).commitPendingChanges()];
case 2:
_e.sent();
_e.label = 3;
case 3:
(0, singer_node_1.log_info)("[".concat(msg.stream, "]: Received schema message."));
_c = (_b = streamProcessors).set;
_d = [msg.stream];
return [4, processSchemaMessage(msg, config, ch, state)];
case 4:
_c.apply(_b, _d.concat([_e.sent()]));
return [3, 14];
case 5:
if (!streamProcessors.has(msg.stream)) {
throw new Error("Record message received before Schema is defined");
}
return [4, streamProcessors.get(msg.stream).processRecord(msg.record, lineCount, interrupt)];
case 6:
_e.sent();
return [3, 14];
case 7:
if (!streamProcessors.has(msg.stream)) {
throw new Error("Record message received before Schema is defined");
}
return [4, streamProcessors.get(msg.stream).processDeletedRecord(msg.record)];
case 8:
_e.sent();
return [3, 14];
case 9:
(0, singer_node_1.log_info)("Received state message. Commit pending changes...");
return [4, Promise.all(Array.from(streamProcessors.values())
.map(function (processor) { return processor.commitPendingChanges(); }))];
case 10:
_e.sent();
(0, singer_node_1.write_line)(JSON.stringify(msg.value));
return [3, 14];
case 11: return [4, processActiveSchemasMessage(msg, config)];
case 12:
_e.sent();
return [3, 14];
case 13:
(0, singer_node_1.log_warning)("Message type not handled at line ".concat(lineCount, " starting with [").concat(line.substring(0, 50), "]"));
return [3, 14];
case 14: return [2];
}
});
});
}
var ProcessingState = (function () {
function ProcessingState(streamsToReplace, existingTables) {
this.streamsToReplace = streamsToReplace;
this.existingTables = existingTables;
}
return ProcessingState;
}());
function processStream(inputStream, config, streamsToReplace) {
if (streamsToReplace === void 0) { streamsToReplace = []; }
return __awaiter(this, void 0, void 0, function () {
var ch, state, _a, _b, lineCount, encounteredErr, rl, abort, streamProcessors, processLinePromise;
var _this = this;
return __generator(this, function (_c) {
switch (_c.label) {
case 0:
ch = new ClickhouseConnection_1["default"](config);
_a = ProcessingState.bind;
_b = [void 0, __spreadArray([], streamsToReplace, true)];
return [4, ch.listTables()];
case 1:
state = new (_a.apply(ProcessingState, _b.concat([_c.sent()])))();
lineCount = 0;
inputStream.on("error", function (err) {
(0, singer_node_1.log_fatal)("".concat(err.message));
throw new Error("READ ERROR ".concat(err));
});
rl = readline.createInterface({
input: inputStream
});
abort = function (err) {
encounteredErr = err;
(0, singer_node_1.log_error)(err.message);
(0, singer_node_1.log_info)("manually closing read stream");
rl.close();
};
streamProcessors = new Map();
processLinePromise = Promise.resolve();
return [4, (0, forAwaitOnMacroTaskQueue_1["default"])(rl[Symbol.asyncIterator](), function (line) { return __awaiter(_this, void 0, void 0, function () {
return __generator(this, function (_a) {
switch (_a.label) {
case 0: return [4, processLinePromise];
case 1:
_a.sent();
processLinePromise = processLine(line, config, ch, streamProcessors, state, lineCount++, abort);
return [2];
}
});
}); })];
case 2:
_c.sent();
return [4, processLinePromise];
case 3:
_c.sent();
(0, singer_node_1.log_info)("done reading lines");
if (encounteredErr) {
throw encounteredErr;
}
return [4, promise_pool_1.PromisePool
.withConcurrency(config.batch_size)["for"](streamProcessors.values())
.handleError(function (error) { return __awaiter(_this, void 0, void 0, function () {
return __generator(this, function (_a) {
throw error;
});
}); })
.process(function (processor) { return __awaiter(_this, void 0, void 0, function () { return __generator(this, function (_a) {
switch (_a.label) {
case 0: return [4, processor.finalizeProcessing()];
case 1: return [2, _a.sent()];
}
}); }); })];
case 4:
_c.sent();
rl.close();
return [2];
}
});
});
}
exports.processStream = processStream;
//# sourceMappingURL=processStream.js.map