UNPKG

frolyk

Version:

Stream processing library for Kafka in Node

201 lines 10 kB
"use strict"; var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); const events_1 = __importDefault(require("events")); const local_1 = __importDefault(require("./assignment-contexts/local")); const kafka_1 = __importDefault(require("./assignment-contexts/kafka")); const kafkajs_1 = require("kafkajs"); const streams_1 = __importDefault(require("./streams")); const offsets_1 = require("./offsets"); const highland_1 = __importDefault(require("highland")); const lodash_flatmap_1 = __importDefault(require("lodash.flatmap")); const v4_1 = __importDefault(require("uuid/v4")); var idSeq = 0; class Task { constructor({ group, connection, consumer, admin }) { this.id = idSeq++; this.events = new events_1.default(); this.sources = []; this.group = group; this.options = { admin, connection, consumer }; this.assignedContexts = []; this.sessionSeq = 0; this.reassigning = Promise.resolve(); } source(topicName, options = {}) { const { offsetReset } = Object.assign({ offsetReset: offsets_1.LogicalOffset.Latest }, options); const existingSource = this.sources.find(({ topicName: t }) => { return t === topicName; }); if (existingSource) return existingSource; const newSource = { topicName, processors: [], offsetReset }; this.sources.push(newSource); return newSource; } processor(source, setupProcessing) { const existingSource = this.sources.find(({ topicName }) => source.topicName === topicName); if (!existingSource) { throw new Error('Source must be created through same task that processes it'); } existingSource.processors.push(setupProcessing); return existingSource; } inject(assignments) { return __awaiter(this, void 0, void 0, function* () { const multiple = Array.isArray(assignments); assignments = [].concat(assignments); // normalize to array const group = this.group; const contexts = yield Promise.all(assignments.map(({ topic, partition }) => __awaiter(this, void 0, void 0, function* () { const source = this.sources.find(({ topicName }) => topicName === topic); const assignment = { topic, partition, group }; const processors = source ? source.processors : []; return yield local_1.default({ assignment, processors }); }))); return multiple ? contexts : contexts[0]; }); } start() { return __awaiter(this, void 0, void 0, function* () { if (!this.options.connection) { throw new Error('Task must be configured with kafka connection options to start'); } const connectionConfig = this.options.connection; const clientId = `frolyk-${v4_1.default()}`; const kafka = this.kafka = new kafkajs_1.Kafka(Object.assign({ clientId }, connectionConfig)); const consumerConfig = this.options.consumer || {}; const adminConfig = this.options.admin || {}; const consumer = this.consumer = kafka.consumer(Object.assign(Object.assign({}, consumerConfig), { groupId: `${this.group}` })); const admin = this.admin = kafka.admin(adminConfig); const streams = this.streams = streams_1.default(consumer); const consumerEvents = new events_1.default(); consumer.on(consumer.events.GROUP_JOIN, (...args) => consumerEvents.emit(consumer.events.GROUP_JOIN, ...args)); // TODO: add handling of consumer crashes, fetches, stopping, disconnects, batch stats collection, etc. const sessionAssignmentContexts = highland_1.default(consumer.events.GROUP_JOIN, consumerEvents, ({ payload: { memberAssignment } }) => { const topicNames = Object.keys(memberAssignment); const topicPartitions = topicNames.map((topic) => { return { topic, partitions: memberAssignment[topic] }; }); return lodash_flatmap_1.default(topicPartitions, ({ topic, partitions }) => { return partitions.map((partition) => ({ topic, partition })); }); }).each((newAssignments) => { this.receiveAssignments(newAssignments); }); yield consumer.connect(); for (let { topicName: topic, offsetReset } of this.sources) { yield consumer.subscribe({ topic, fromBeginning: offsets_1.isEarliest(offsetReset) }); } streams.start(); }); } stop() { return __awaiter(this, void 0, void 0, function* () { const { admin, consumer, events, streams } = this; yield this.stopAssignedContexts(); if (consumer) yield consumer.disconnect(); if (admin) yield admin.disconnect(); events.emit('stop'); }); } receiveAssignments(newAssignments) { this.events.emit('assignment-receive'); this.reassigning = this.reassign(newAssignments).catch((err) => { this.events.emit('error', err); }); } reassign(newAssignments) { return __awaiter(this, void 0, void 0, function* () { const reassignmentSeq = ++this.sessionSeq; yield this.reassigning; // wait for previous reassigment to have finished first // TODO: Figure out how we can test this. The hard bit is figuring out how we can trigger // two additional rebalances while we're in the middle of setting a current one up. /* istanbul ignore next */ if (reassignmentSeq !== this.sessionSeq) { // Additional reassignments were called since this one, while we were waiting for the last to finish, // so lets discard these outdated assignments. return; } const { admin, consumer, kafka, streams } = this; yield this.stopAssignedContexts(); if (this.processingSession) { this.events.emit('session-stop'); } // We're using Highland here to control concurrency, limiting ourselves to setting up 4 assignments // concurrently at any given time. const newSessionContexts = yield highland_1.default(newAssignments) .filter(({ topic, partition }) => !!this.sources.find(({ topicName }) => topicName === topic)) .map(({ topic, partition }) => __awaiter(this, void 0, void 0, function* () { const source = this.sources.find(({ topicName }) => topicName === topic); const assignment = { topic, partition, group: this.group }; const { processors } = source; const stream = streams.stream({ topic, partition }); return kafka_1.default({ assignment, admin, consumer, createProducer: (...args) => kafka.producer(...args), processors, stream }); })) .map((awaiting) => highland_1.default(awaiting)) .mergeWithLimit(4) // setup 4 assignments at once .collect() // TODO: add specific logging for failing of assignment setup .toPromise(Promise); // wait for all processing of previous session to have ended if (this.processingSession) yield this.processingSession; this.assignedContexts = newSessionContexts; // start processing for all assignments concurrently yield Promise.all(newSessionContexts.map((context) => context.start())); this.events.emit('session-start', reassignmentSeq); this.processingSession = highland_1.default(newSessionContexts) .map((context) => context.stream) .merge() // process all messages within a session at the same time .last() // hold on to last processed result .toPromise(Promise); // allow monitoring of when processing ends this.processingSession.catch((err) => { this.events.emit('error', err); }); }); } stopAssignedContexts() { return __awaiter(this, void 0, void 0, function* () { const { assignedContexts, streams } = this; yield Promise.all(assignedContexts.map((context) => __awaiter(this, void 0, void 0, function* () { const { topic, partition } = context; const stream = streams.stream({ topic, partition }); yield context.stop(); stream.end(); }))); }); } } function createTask(config) { return new Task(config); } exports.default = createTask; //# sourceMappingURL=task.js.map