UNPKG

target-clickhouse

Version:
266 lines 14.2 kB
"use strict"; var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; var __generator = (this && this.__generator) || function (thisArg, body) { var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g; return g = { next: verb(0), "throw": verb(1), "return": verb(2) }, typeof Symbol === "function" && (g[Symbol.iterator] = function() { return this; }), g; function verb(n) { return function (v) { return step([n, v]); }; } function step(op) { if (f) throw new TypeError("Generator is already executing."); while (_) try { if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t; if (y = 0, t) op = [op[0] & 2, t.value]; switch (op[0]) { case 0: case 1: t = op; break; case 4: _.label++; return { value: op[1], done: false }; case 5: _.label++; y = op[1]; op = [0]; continue; case 7: op = _.ops.pop(); _.trys.pop(); continue; default: if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; } if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; } if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; } if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; } if (t[2]) _.ops.pop(); _.trys.pop(); continue; } op = body.call(thisArg, _); } catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; } if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true }; } }; var __spreadArray = (this && this.__spreadArray) || function (to, from, pack) { if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) { if (ar || !(i in from)) { if (!ar) ar = Array.prototype.slice.call(from, 0, i); ar[i] = from[i]; } } return to.concat(ar || Array.prototype.slice.call(from)); }; exports.__esModule = true; exports.processStream = void 0; var readline = require("readline"); var singer_node_1 = require("singer-node"); var ClickhouseConnection_1 = require("./ClickhouseConnection"); var jsonSchemaInspector_1 = require("./jsonSchemaInspector"); var StreamProcessor_1 = require("./StreamProcessor"); var jsonSchemaTranslator_1 = require("./jsonSchemaTranslator"); var promise_pool_1 = require("@supercharge/promise-pool"); var forAwaitOnMacroTaskQueue_1 = require("./forAwaitOnMacroTaskQueue"); function processSchemaMessage(msg, config, ch, state) { return __awaiter(this, void 0, void 0, function () { var meta, streamToReplaceIndex, _a, streamProcessor, _b; return __generator(this, function (_c) { switch (_c.label) { case 0: meta = (0, jsonSchemaInspector_1.buildMeta)(new jsonSchemaInspector_1.JsonSchemaInspectorContext(msg.stream, msg.schema, msg.keyProperties, config.subtable_separator, undefined, undefined, undefined, msg.cleaningColumn, msg.allKeyProperties)); streamToReplaceIndex = state.streamsToReplace.indexOf(meta.prop); if (!(streamToReplaceIndex > -1)) return [3, 3]; (0, singer_node_1.log_info)("[".concat(meta.prop, "]: dropping root and children tables")); return [4, Promise.all((0, jsonSchemaTranslator_1.dropStreamTablesQueries)(meta).map(function (query) { return ch.runQuery(query); }))]; case 1: _c.sent(); state.streamsToReplace.splice(streamToReplaceIndex, 1); _a = state; return [4, ch.listTables()]; case 2: _a.existingTables = _c.sent(); _c.label = 3; case 3: return [4, StreamProcessor_1["default"].createStreamProcessor(ch, meta, config, msg.cleanFirst, state.existingTables)]; case 4: streamProcessor = _c.sent(); _b = state; return [4, ch.listTables()]; case 5: _b.existingTables = _c.sent(); return [2, streamProcessor]; } }); }); } function tableShouldBeDropped(table, activeStreams, subtableSeparator, extraActiveTables) { var doesMatchAnActiveStream = activeStreams.concat(extraActiveTables).some(function (activeTable) { return table === activeTable || table.startsWith(activeTable + subtableSeparator); }); var isAlreadyDropped = table.startsWith(ClickhouseConnection_1["default"].droppedTablePrefix); var isArchived = table.startsWith(ClickhouseConnection_1["default"].archivedTablePrefix); return !doesMatchAnActiveStream && !isAlreadyDropped && !isArchived; } function processActiveSchemasMessage(msg, config) { return __awaiter(this, void 0, void 0, function () { var ch, tables; var _this = this; return __generator(this, function (_a) { switch (_a.label) { case 0: ch = new ClickhouseConnection_1["default"](config); return [4, ch.listTables()]; case 1: tables = _a.sent(); return [4, Promise.all(tables.map(function (table) { return __awaiter(_this, void 0, void 0, function () { return __generator(this, function (_a) { if (tableShouldBeDropped(table, msg.streams, config.subtable_separator, config.extra_active_tables)) { return [2, ch.renameObsoleteColumn(table)]; } return [2]; }); }); }))]; case 2: _a.sent(); return [2]; } }); }); } function processLine(line, config, ch, streamProcessors, state, lineCount, interrupt) { return __awaiter(this, void 0, void 0, function () { var msg, _a, _b, _c, _d; return __generator(this, function (_e) { switch (_e.label) { case 0: msg = (0, singer_node_1.parse_message)(line); _a = msg === null || msg === void 0 ? void 0 : msg.type; switch (_a) { case singer_node_1.MessageType.schema: return [3, 1]; case singer_node_1.MessageType.record: return [3, 5]; case singer_node_1.MessageType.deletedRecord: return [3, 7]; case singer_node_1.MessageType.state: return [3, 9]; case singer_node_1.MessageType.activeStreams: return [3, 11]; } return [3, 13]; case 1: if (!streamProcessors.has(msg.stream)) return [3, 3]; return [4, streamProcessors.get(msg.stream).commitPendingChanges()]; case 2: _e.sent(); _e.label = 3; case 3: (0, singer_node_1.log_info)("[".concat(msg.stream, "]: Received schema message.")); _c = (_b = streamProcessors).set; _d = [msg.stream]; return [4, processSchemaMessage(msg, config, ch, state)]; case 4: _c.apply(_b, _d.concat([_e.sent()])); return [3, 14]; case 5: if (!streamProcessors.has(msg.stream)) { throw new Error("Record message received before Schema is defined"); } return [4, streamProcessors.get(msg.stream).processRecord(msg.record, lineCount, interrupt)]; case 6: _e.sent(); return [3, 14]; case 7: if (!streamProcessors.has(msg.stream)) { throw new Error("Record message received before Schema is defined"); } return [4, streamProcessors.get(msg.stream).processDeletedRecord(msg.record)]; case 8: _e.sent(); return [3, 14]; case 9: (0, singer_node_1.log_info)("Received state message. Commit pending changes..."); return [4, Promise.all(Array.from(streamProcessors.values()) .map(function (processor) { return processor.commitPendingChanges(); }))]; case 10: _e.sent(); (0, singer_node_1.write_line)(JSON.stringify(msg.value)); return [3, 14]; case 11: return [4, processActiveSchemasMessage(msg, config)]; case 12: _e.sent(); return [3, 14]; case 13: (0, singer_node_1.log_warning)("Message type not handled at line ".concat(lineCount, " starting with [").concat(line.substring(0, 50), "]")); return [3, 14]; case 14: return [2]; } }); }); } var ProcessingState = (function () { function ProcessingState(streamsToReplace, existingTables) { this.streamsToReplace = streamsToReplace; this.existingTables = existingTables; } return ProcessingState; }()); function processStream(inputStream, config, streamsToReplace) { if (streamsToReplace === void 0) { streamsToReplace = []; } return __awaiter(this, void 0, void 0, function () { var ch, state, _a, _b, lineCount, encounteredErr, rl, abort, streamProcessors, processLinePromise; var _this = this; return __generator(this, function (_c) { switch (_c.label) { case 0: ch = new ClickhouseConnection_1["default"](config); _a = ProcessingState.bind; _b = [void 0, __spreadArray([], streamsToReplace, true)]; return [4, ch.listTables()]; case 1: state = new (_a.apply(ProcessingState, _b.concat([_c.sent()])))(); lineCount = 0; inputStream.on("error", function (err) { (0, singer_node_1.log_fatal)("".concat(err.message)); throw new Error("READ ERROR ".concat(err)); }); rl = readline.createInterface({ input: inputStream }); abort = function (err) { encounteredErr = err; (0, singer_node_1.log_error)(err.message); (0, singer_node_1.log_info)("manually closing read stream"); rl.close(); }; streamProcessors = new Map(); processLinePromise = Promise.resolve(); return [4, (0, forAwaitOnMacroTaskQueue_1["default"])(rl[Symbol.asyncIterator](), function (line) { return __awaiter(_this, void 0, void 0, function () { return __generator(this, function (_a) { switch (_a.label) { case 0: return [4, processLinePromise]; case 1: _a.sent(); processLinePromise = processLine(line, config, ch, streamProcessors, state, lineCount++, abort); return [2]; } }); }); })]; case 2: _c.sent(); return [4, processLinePromise]; case 3: _c.sent(); (0, singer_node_1.log_info)("done reading lines"); if (encounteredErr) { throw encounteredErr; } return [4, promise_pool_1.PromisePool .withConcurrency(config.batch_size)["for"](streamProcessors.values()) .handleError(function (error) { return __awaiter(_this, void 0, void 0, function () { return __generator(this, function (_a) { throw error; }); }); }) .process(function (processor) { return __awaiter(_this, void 0, void 0, function () { return __generator(this, function (_a) { switch (_a.label) { case 0: return [4, processor.finalizeProcessing()]; case 1: return [2, _a.sent()]; } }); }); })]; case 4: _c.sent(); rl.close(); return [2]; } }); }); } exports.processStream = processStream; //# sourceMappingURL=processStream.js.map