frolyk
Version:
Stream processing library for Kafka in Node
201 lines • 10 kB
JavaScript
;
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
const events_1 = __importDefault(require("events"));
const local_1 = __importDefault(require("./assignment-contexts/local"));
const kafka_1 = __importDefault(require("./assignment-contexts/kafka"));
const kafkajs_1 = require("kafkajs");
const streams_1 = __importDefault(require("./streams"));
const offsets_1 = require("./offsets");
const highland_1 = __importDefault(require("highland"));
const lodash_flatmap_1 = __importDefault(require("lodash.flatmap"));
const v4_1 = __importDefault(require("uuid/v4"));
var idSeq = 0;
class Task {
constructor({ group, connection, consumer, admin }) {
this.id = idSeq++;
this.events = new events_1.default();
this.sources = [];
this.group = group;
this.options = {
admin,
connection,
consumer
};
this.assignedContexts = [];
this.sessionSeq = 0;
this.reassigning = Promise.resolve();
}
source(topicName, options = {}) {
const { offsetReset } = Object.assign({ offsetReset: offsets_1.LogicalOffset.Latest }, options);
const existingSource = this.sources.find(({ topicName: t }) => {
return t === topicName;
});
if (existingSource)
return existingSource;
const newSource = {
topicName,
processors: [],
offsetReset
};
this.sources.push(newSource);
return newSource;
}
processor(source, setupProcessing) {
const existingSource = this.sources.find(({ topicName }) => source.topicName === topicName);
if (!existingSource) {
throw new Error('Source must be created through same task that processes it');
}
existingSource.processors.push(setupProcessing);
return existingSource;
}
inject(assignments) {
return __awaiter(this, void 0, void 0, function* () {
const multiple = Array.isArray(assignments);
assignments = [].concat(assignments); // normalize to array
const group = this.group;
const contexts = yield Promise.all(assignments.map(({ topic, partition }) => __awaiter(this, void 0, void 0, function* () {
const source = this.sources.find(({ topicName }) => topicName === topic);
const assignment = { topic, partition, group };
const processors = source ? source.processors : [];
return yield local_1.default({ assignment, processors });
})));
return multiple ? contexts : contexts[0];
});
}
start() {
return __awaiter(this, void 0, void 0, function* () {
if (!this.options.connection) {
throw new Error('Task must be configured with kafka connection options to start');
}
const connectionConfig = this.options.connection;
const clientId = `frolyk-${v4_1.default()}`;
const kafka = this.kafka = new kafkajs_1.Kafka(Object.assign({ clientId }, connectionConfig));
const consumerConfig = this.options.consumer || {};
const adminConfig = this.options.admin || {};
const consumer = this.consumer = kafka.consumer(Object.assign(Object.assign({}, consumerConfig), { groupId: `${this.group}` }));
const admin = this.admin = kafka.admin(adminConfig);
const streams = this.streams = streams_1.default(consumer);
const consumerEvents = new events_1.default();
consumer.on(consumer.events.GROUP_JOIN, (...args) => consumerEvents.emit(consumer.events.GROUP_JOIN, ...args));
// TODO: add handling of consumer crashes, fetches, stopping, disconnects, batch stats collection, etc.
const sessionAssignmentContexts = highland_1.default(consumer.events.GROUP_JOIN, consumerEvents, ({ payload: { memberAssignment } }) => {
const topicNames = Object.keys(memberAssignment);
const topicPartitions = topicNames.map((topic) => {
return { topic, partitions: memberAssignment[topic] };
});
return lodash_flatmap_1.default(topicPartitions, ({ topic, partitions }) => {
return partitions.map((partition) => ({ topic, partition }));
});
}).each((newAssignments) => {
this.receiveAssignments(newAssignments);
});
yield consumer.connect();
for (let { topicName: topic, offsetReset } of this.sources) {
yield consumer.subscribe({ topic, fromBeginning: offsets_1.isEarliest(offsetReset) });
}
streams.start();
});
}
stop() {
return __awaiter(this, void 0, void 0, function* () {
const { admin, consumer, events, streams } = this;
yield this.stopAssignedContexts();
if (consumer)
yield consumer.disconnect();
if (admin)
yield admin.disconnect();
events.emit('stop');
});
}
receiveAssignments(newAssignments) {
this.events.emit('assignment-receive');
this.reassigning = this.reassign(newAssignments).catch((err) => {
this.events.emit('error', err);
});
}
reassign(newAssignments) {
return __awaiter(this, void 0, void 0, function* () {
const reassignmentSeq = ++this.sessionSeq;
yield this.reassigning; // wait for previous reassigment to have finished first
// TODO: Figure out how we can test this. The hard bit is figuring out how we can trigger
// two additional rebalances while we're in the middle of setting a current one up.
/* istanbul ignore next */
if (reassignmentSeq !== this.sessionSeq) {
// Additional reassignments were called since this one, while we were waiting for the last to finish,
// so lets discard these outdated assignments.
return;
}
const { admin, consumer, kafka, streams } = this;
yield this.stopAssignedContexts();
if (this.processingSession) {
this.events.emit('session-stop');
}
// We're using Highland here to control concurrency, limiting ourselves to setting up 4 assignments
// concurrently at any given time.
const newSessionContexts = yield highland_1.default(newAssignments)
.filter(({ topic, partition }) => !!this.sources.find(({ topicName }) => topicName === topic))
.map(({ topic, partition }) => __awaiter(this, void 0, void 0, function* () {
const source = this.sources.find(({ topicName }) => topicName === topic);
const assignment = { topic, partition, group: this.group };
const { processors } = source;
const stream = streams.stream({ topic, partition });
return kafka_1.default({
assignment,
admin,
consumer,
createProducer: (...args) => kafka.producer(...args),
processors,
stream
});
}))
.map((awaiting) => highland_1.default(awaiting))
.mergeWithLimit(4) // setup 4 assignments at once
.collect()
// TODO: add specific logging for failing of assignment setup
.toPromise(Promise);
// wait for all processing of previous session to have ended
if (this.processingSession)
yield this.processingSession;
this.assignedContexts = newSessionContexts;
// start processing for all assignments concurrently
yield Promise.all(newSessionContexts.map((context) => context.start()));
this.events.emit('session-start', reassignmentSeq);
this.processingSession = highland_1.default(newSessionContexts)
.map((context) => context.stream)
.merge() // process all messages within a session at the same time
.last() // hold on to last processed result
.toPromise(Promise); // allow monitoring of when processing ends
this.processingSession.catch((err) => {
this.events.emit('error', err);
});
});
}
stopAssignedContexts() {
return __awaiter(this, void 0, void 0, function* () {
const { assignedContexts, streams } = this;
yield Promise.all(assignedContexts.map((context) => __awaiter(this, void 0, void 0, function* () {
const { topic, partition } = context;
const stream = streams.stream({ topic, partition });
yield context.stop();
stream.end();
})));
});
}
}
function createTask(config) {
return new Task(config);
}
exports.default = createTask;
//# sourceMappingURL=task.js.map