UNPKG

kafka-streams

Version:
1,131 lines (969 loc) 29.1 kB
"use strict"; const EventEmitter = require("events"); const most = require("most"); const Promise = require("bluebird"); const uuid = require("uuid"); const debug = require("debug")("kafka-streams:streamdsl"); const KStorage = require("../KStorage.js"); const KafkaClient = require("../client/KafkaClient.js"); const { KeyCount, Sum, Max, Min } = require("../actions/index.js"); const { messageProduceHandle } = require("../messageProduceHandle.js"); const PRODUCE_TYPES = require("../produceTypes.js"); const NOOP = () => { }; const MESSAGE = "message"; const DEFAULT_AUTO_FLUSH_BUFFER_SIZE = 100; /** * Stream base class */ class StreamDSL { /** * Stream base class that wraps around a private most.js stream$ * and interacts with storages/actions and a kafka-client instance. * @param {string|Array<string>} topicName - can also be topics * @param {KStorage} storage * @param {KafkaClient} kafka * @param {boolean} isClone */ constructor(topicName, storage = null, kafka = null, isClone = false) { debug("stream-dsl from clone", isClone, "for", topicName); if (!isClone && (!kafka || !(kafka instanceof KafkaClient))) { throw new Error("kafka has to be an instance of KafkaClient."); } if (!storage || !(storage instanceof KStorage)) { throw new Error("storage hsa to be an instance of KStorage."); } //convenience if (!topicName) { this.noTopicProvided = true; } else { this.noTopicProvided = false; if (!Array.isArray(topicName)) { topicName = [topicName]; } } //no topics is allowed for produce only streams this.topicName = topicName || []; this.kafka = kafka; this.storage = storage; this.isClone = isClone; if (!(this.storage instanceof KStorage)) { throw new Error("storage must be an instance of KStorage."); } this._ee = new EventEmitter(); this.stream$ = most.fromEvent(MESSAGE, this._ee); this.produceAsTopic = false; this.outputTopicName = null; this.outputPartitionsCount = 1; this.produceType = PRODUCE_TYPES.SEND; this.produceVersion = 1; this.produceCompressionType = 0; this._kafkaStreams = null; this.PRODUCE_TYPES = PRODUCE_TYPES; this.DEFAULT_AUTO_FLUSH_BUFFER_SIZE = DEFAULT_AUTO_FLUSH_BUFFER_SIZE; } /** * dummy, should be overwritten */ start() { return Promise.reject("When inherting StreamDSL, the start method should be overwritten with connector logic."); } /** * returns a stats object with information * about the internal kafka clients * @returns {object} */ getStats() { return this.kafka ? this.kafka.getStats() : null } /** * returns the internal KStorage instance * @returns {KStorage} */ getStorage() { return this.storage; } /** * can be used to manually write message/events * to the internal stream$ * @param message {Object|Array<Object>} */ writeToStream(message) { if (!Array.isArray(message)) { return this._ee.emit("message", message); } message.forEach(_message => { this._ee.emit("message", _message); }); } /** * returns the internal most.js stream * @returns {Object} most.js stream */ getMost() { return this.stream$; } /** * returns a new most stream from the * given array * @param array * @returns {Stream<any>} */ getNewMostFrom(array = []) { return most.from(array); } /** * used to clone or during merges * resets the internal event emitter to the new stream * and replaces the internal stream with the merged new stream * @param newStream$ */ replaceInternalObservable(newStream$) { this._ee.removeAllListeners(MESSAGE); this._ee = new EventEmitter(); this.stream$ = most.merge(newStream$, most.fromEvent(MESSAGE, this._ee)); } /** * sets a handler for produce messages * (emits whenever kafka messages are produced/delivered) * events: produced, delivered * @param handler {module:events.internal} */ setProduceHandler(handler) { if (!handler || !(handler instanceof EventEmitter)) { throw new Error("ProduceHandler must be an instance of EventEmitter (events)."); } this.kafka.setProduceHandler(handler); } /** * creates (and returns) and sets a produce handler * for this stream instance * @returns {module:events.internal} */ createAndSetProduceHandler() { const ee = new EventEmitter(); this.setProduceHandler(ee); return ee; } /** * overwrites the internal kafkaStreams reference * @param reference */ setKafkaStreamsReference(reference) { this._kafkaStreams = reference; } /* * # # * ## ## * ### DSL ### * ## ## * # # */ /** * add more topic/s to the consumer * @param topicName {string|Array<string>} * @returns {StreamDSL} */ from(topicName) { if (!Array.isArray(topicName)) { topicName = [topicName]; } topicName.forEach(topic => { this.topicName.push(topic); }); if (this.noTopicProvided) { this.noTopicProvided = false; } return this; } /** * given a stream of promises, returns stream containing the fulfillment values * etl = Promise -> v * @param etl * @returns {StreamDSL} */ awaitPromises(etl) { this.stream$ = this.stream$.awaitPromises(etl); return this; } /** * simple synchronous map function * etl = v -> v2 * @param etl * @returns {StreamDSL} */ map(etl) { this.stream$ = this.stream$.map(etl); return this; } /** * map that expects etl to return a Promise * can be used to apply async maps to stream * etl = v -> Promise * @param etl * @returns {StreamDSL} */ asyncMap(etl) { this.stream$ = this.stream$.flatMap(value => most.fromPromise(etl(value))); return this; } /** * transform each etl in stream into a stream, * and then concatenate it onto the end of the resulting stream. * etl = v -> stream(v2) * @param etl * @returns {StreamDSL} */ concatMap(etl) { this.stream$ = this.stream$.concatMap(etl); return this; } /** * (do not use for side effects, * except for a closing operation at the end of the stream) * may not be used to chain * eff = v -> void * @param eff * @returns Promise{*} */ forEach(eff) { return this.stream$.forEach(eff); } /** * runs forEach on a multicast stream * you probably would not want to use this in production * @param eff * @param callback * @returns {StreamDSL} */ chainForEach(eff, callback = null) { this.stream$ = this.stream$.multicast(); this.stream$.forEach(eff).then(r => { if (callback) { callback(null, r); } }, e => { if (callback) { callback(e); } }); return this; } /** * (alternative to forEach if in the middle of a * stream operation chain) * use this for side-effects * errors in eff will break stream * @param eff */ tap(eff) { this.stream$ = this.stream$.tap(eff); return this; } /** * stream contains only events for which predicate * returns true * pred = v -> boolean * @param pred * @returns {StreamDSL} */ filter(pred) { this.stream$ = this.stream$.filter(pred); return this; } /** * will remove duplicate messages * be aware that this might take a lot of memory * @returns {StreamDSL} */ skipRepeats() { this.stream$ = this.stream$.skipRepeats(); return this; } /** * skips repeats per your definition * equals = (a,b) -> boolean * @param equals * @returns {StreamDSL} */ skipRepeatsWith(equals) { this.stream$ = this.stream$.skipRepeatsWith(equals); return this; } /** * skips the amount of messages * @param count * @returns {StreamDSL} */ skip(count) { this.stream$ = this.stream$.skip(count); return this; } /** * takes the first messages until count * and omits the rest * @param count * @returns {StreamDSL} */ take(count) { this.stream$ = this.stream$.take(count); return this; } multicast() { this.stream$ = this.stream$.multicast(); return this; } /** * easy string to array mapping * you can pass your delimiter * default is space * "bla blup" => ["bla", "blup"] * @param delimiter * @returns {StreamDSL} */ mapStringToArray(delimiter = " ") { return this.map(element => { if (!element || typeof element !== "string") { return element; } return element.split(delimiter); }); } /** * easy array to key-value object mapping * you can pass your own indices * default is 0,1 * ["bla", "blup"] => { key: "bla", value: "blup" } * @param keyIndex * @param valueIndex * @returns {StreamDSL} */ mapArrayToKV(keyIndex = 0, valueIndex = 1) { return this.map(element => { if (!Array.isArray(element)) { return element; } return { key: element[keyIndex], value: element[valueIndex] } }); } /** * easy string to key-value object mapping * you can pass your own delimiter and indices * default is " " and 0,1 * "bla blup" => { key: "bla", value: "blup" } * @param delimiter * @param keyIndex * @param valueIndex * @returns {StreamDSL} */ mapStringToKV(delimiter = " ", keyIndex = 0, valueIndex = 1) { this.mapStringToArray(delimiter); this.mapArrayToKV(keyIndex, valueIndex); return this; } /** * maps every stream event through JSON.parse * if its type is an object * (if parsing fails, the error object will be returned) * @returns {StreamDSL} */ mapJSONParse() { return this.map(string => { if (typeof string !== "string") { return string; } try { return JSON.parse(string); } catch (e) { return e; } }); } /** * maps every stream event through JSON.stringify * if its type is object * @returns {StreamDSL} */ mapStringify() { return this.map(object => { if (typeof object !== "object") { return object; } return JSON.stringify(object); }); } /** * maps an object type event with a Buffer key field * to an object event with a string key field * @returns {StreamDSL} */ mapBufferKeyToString() { return this.map(object => { if (typeof object !== "object" || !object.key) { return object; } if (Buffer.isBuffer(object.key)) { return object; } try { const key = object.key.toString("utf8"); if (key) { object.key = key; } } catch (_) { //empty } return object; }); } /** * maps an object type event with a Buffer value field * to an object event with a string value field * @returns {StreamDSL} */ mapBufferValueToString() { return this.map(object => { if (typeof object !== "object" || !object.value) { return object; } if (typeof object.value === "string") { return object; } try { const value = object.value.toString("utf8"); if (value) { object.value = value; } } catch (_) { //empty } return object; }); } /** * maps an object type event with a string value field * to an object event with (parsed) object value field * @returns {StreamDSL} */ mapStringValueToJSONObject() { return this.map(object => { if (typeof object !== "object" || !object.value) { return object; } if (typeof object.value === "object") { return object; } try { const value = JSON.parse(object.value); if (value) { object.value = value; } } catch (_) { //empty } return object; }); } /** * takes a buffer kafka message * and turns it into a json representation * buffer key -> string * buffer value -> string -> object * @returns {StreamDSL} */ mapJSONConvenience() { return this .mapBufferKeyToString() .mapBufferValueToString() .mapStringValueToJSONObject(); } /** * wraps an event value inside a kafka message object * the event value will be used as value of the kafka message * @param topic - optional * @returns {StreamDSL} */ wrapAsKafkaValue(topic = undefined) { return this.map(any => { return { opaqueKey: null, partitionKey: null, partition: null, key: null, value: any, topic }; }); } /** * maps every stream event's kafka message * right to its payload value * @returns {StreamDSL} */ mapWrapKafkaValue() { return this.map(message => { if (typeof message === "object" && typeof message.value !== "undefined") { return message.value; } return message; }); } /** * taps to the stream * counts messages and returns * callback once (when message count is reached) * with the current message at count * @param {number} count * @param {function} callback * @returns {StreamDSL} */ atThroughput(count = 1, callback) { let countState = 0; this.tap(message => { if (countState > count) { return; } countState++; if (count === countState) { callback(message); } }); return this; } /** * * default kafka format stringify * {} -> {payload, time, type, id} * getId can be a function to read the id from the message * e.g. getId = message -> message.id * @param type * @param getId * @returns {StreamDSL} */ mapToFormat(type = "unknown-publish", getId = null) { this.map(message => { const id = getId ? getId(message) : uuid.v4(); return { payload: message, time: (new Date()).toISOString(), type, id }; }); return this; } /** * default kafka format parser * {value: "{ payload: {} }" -> {} * @returns {StreamDSL} */ mapFromFormat() { this.map(message => { if (typeof message === "object") { return message.payload; } try { const object = JSON.parse(message); if (typeof object === "object") { return object.payload; } } catch (e) { //empty } return message; }); return this; } /** * maps elements into {time, value} objects * @param etl * @returns {StreamDSL} */ timestamp(etl) { if (!etl) { this.stream$ = this.stream$.timestamp(); return this; } this.stream$ = this.stream$.map(element => { return { time: etl(element), value: element }; }); return this; } /** * replace every element with the substitute value * @param substitute * @returns {StreamDSL} */ constant(substitute) { this.stream$ = this.stream$.constant(substitute); return this; } /** * mapping to incrementally accumulated results, * starting with the provided initial value. * @param eff * @param initial * @returns {StreamDSL} */ scan(eff, initial) { this.stream$ = this.stream$.scan(eff, initial); return this; } /** * slicing events from start ot end of index * @param start * @param end * @returns {StreamDSL} */ slice(start, end) { this.stream$ = this.stream$.slice(start, end); return this; } /** * contain events until predicate * returns false * m -> !!m * @param pred * @returns {StreamDSL} */ takeWhile(pred) { this.stream$ = this.stream$.takeWhile(pred); return this; } /** * contain events after predicate * returns false * @param pred * @returns {StreamDSL} */ skipWhile(pred) { this.stream$ = this.stream$.skipWhile(pred); return this; } /** * contain events until signal$ emits first event * signal$ must be a most stream instance * @param signal$ * @returns {StreamDSL} */ until(signal$) { this.stream$ = this.stream$.until(signal$); return this; } /** * contain all events after signal$ emits first event * signal$ must be a most stream instance * @param signal$ * @returns {StreamDSL} */ since(signal$) { this.stream$ = this.stream$.since(signal$); return this; } /** * Replace the end signal with a new stream returned by f. * Note that f must return a (most.js) stream. * @param f - function (must return a most stream) */ continueWith(f) { this.stream$ = this.stream$.continueWith(f); return this; } /** * reduce a stream to a single result * will return a promise * @param eff * @param initial * @returns Promise{*} */ reduce(eff, initial) { return this.stream$.reduce(eff, initial); } /** * runs reduce on a multicast stream * you probably would not want to use this in production * @param eff * @param initial * @param callback * @returns {StreamDSL} */ chainReduce(eff, initial, callback) { this.stream$ = this.stream$.multicast(); this.stream$.reduce(eff, initial).then(r => { if (callback) { callback(null, r); } }, e => { if (callback) { callback(e); } }); return this; } /** * drains the stream, equally to forEach * without iterator, returns a promise * @returns Promise{*} */ drain() { return this.stream$.drain(); } /** * limits rate events at most one per throttlePeriod * throttlePeriod = index count omit * @param throttlePeriod * @returns {StreamDSL} */ throttle(throttlePeriod) { this.stream$ = this.stream$.throttle(throttlePeriod); return this; } /** * delays every event in stream by given time * @param delayTime * @returns {StreamDSL} */ delay(delayTime) { this.stream$ = this.stream$.delay(delayTime); return this; } /** * wait for a burst of events and emit * only the last event * @param debounceTime * @returns {StreamDSL} */ debounce(debounceTime) { this.stream$ = this.stream$.debounce(debounceTime); return this; } /* * # # * ## ## * ### AGGREGATE ### * ## ## * # # */ /** * maps into counts per key * requires events to have a present key/value field * @param key * @param countFieldName * @returns {StreamDSL} */ countByKey(key = "key", countFieldName = "count") { const keyCount = new KeyCount(this.storage, key, countFieldName); this.asyncMap(keyCount.execute.bind(keyCount)); return this; } /** * maps into sums per key * requires events to have a present key/value field * @param key * @param fieldName * @param sumField * @returns {StreamDSL} */ sumByKey(key = "key", fieldName = "value", sumField = false) { const sum = new Sum(this.storage, key, fieldName, sumField); this.asyncMap(sum.execute.bind(sum)); return this; } /** * collects the smallest value * of the given field, will not alter * the events in the stream * use .getStorage().getMin() to get the * latest value which is stored * @param fieldName * @param minField * @returns {StreamDSL} */ min(fieldName = "value", minField = "min") { const min = new Min(this.storage, fieldName, minField); this.asyncMap(min.execute.bind(min)); return this; } /** * collects the greatest value * of the given field, will not alter * the events in the stream * use .getStorage().getMax() to get the * latest value which is stored * @param fieldName * @param maxField * @returns {StreamDSL} */ max(fieldName = "value", maxField = "max") { const max = new Max(this.storage, fieldName, maxField); this.asyncMap(max.execute.bind(max)); return this; } /* * # # * ## ## * ### JOINS ### * ## ## * # # */ /** * use this as base of a higher-order stream * and merge all child streams into a new stream * @private */ _join() { this.stream$ = most.join(this.stream$); return this; } /** * merge this stream with another, resulting a * stream with all elements from both streams * @param otherStream$ */ _merge(otherStream$) { this.stream$ = most.merge(this.stream$, otherStream$); return this; } /** * merge this stream with another stream * by combining (zipping) every event from each stream * to a single new event on the new stream * combine = (e1, e2) -> e1 + e2 * @param otherStream$ * @param combine */ _zip(otherStream$, combine) { this.stream$ = this.stream$.zip(combine, otherStream$); return this; } /** * merge this stream with another stream * by combining (while awaiting) every event from each stream * combine = (e1, e2) -> e1 + e2 * @param otherStream$ * @param combine * @returns {StreamDSL} * @private */ _combine(otherStream$, combine) { this.stream$ = this.stream$.combine(combine, otherStream$); return this; } /** * merge this stream with another on behalf of * a sample stream * combine = (e1, e2) -> e1 + e2 * @param sampleStream$ * @param otherStream$ * @param combine * @returns {StreamDSL} * @private */ _sample(sampleStream$, otherStream$, combine) { this.stream$ = sampleStream$.sample(combine, this.stream$, otherStream$); return this; } /* * # # * ## ## * ### OUTPUT ### * ## ## * # # */ /** * define an output topic * when passed to KafkaStreams this will trigger * the stream$ result to be produced to the given topic name * if the instance is a clone, this function call will have to setup a kafka producer * returns a promise * @param {string|Object} topic - optional (can also be an object, containing the same parameters as fields) * @param {number} outputPartitionsCount - optional * @param {string} produceType - optional * @param {number} version - optional * @param {number} compressionType - optional * @param {function} producerErrorCallback - optional * @param {Object} outputKafkaConfig - optional * @returns {Promise.<boolean>} */ to(topic = undefined, outputPartitionsCount = 1, produceType = "send", version = 1, compressionType = 0, producerErrorCallback = null, outputKafkaConfig = null) { return new Promise((resolve, reject) => { if (this.produceAsTopic) { return reject(new Error(".to() has already been called on this dsl instance.")); } this.produceAsTopic = true; //map object if first param is a config object if (topic && typeof topic === "object") { if (topic.outputPartitionsCount) { outputPartitionsCount = topic.outputPartitionsCount; } if (topic.produceType) { produceType = topic.produceType; } if (topic.version) { version = topic.version; } if (topic.compressionType) { compressionType = topic.compressionType; } if (topic.producerErrorCallback) { producerErrorCallback = topic.producerErrorCallback; } if (topic.outputKafkaConfig) { outputKafkaConfig = topic.outputKafkaConfig; } if (topic.topic) { topic = topic.topic; } } produceType = produceType || ""; produceType = produceType.toLowerCase(); const produceTypes = Object.keys(PRODUCE_TYPES).map(k => PRODUCE_TYPES[k]); if (produceTypes.indexOf(produceType) === -1) { return reject(new Error(`produceType must be a supported types: ${produceTypes.join(", ")}.`)); } this.outputTopicName = topic; this.outputPartitionsCount = outputPartitionsCount; this.produceType = produceType; this.produceVersion = version; this.produceCompressionType = compressionType; if (!this.isClone) { return resolve(true); } //this instance is a clone, meaning that it has been created //as the result of a KStream or KTable merge //which requires the creation of a Producer for .to() to work first if (!this.kafka || !this.kafka.setupProducer) { return reject(new Error("setting .to() on a cloned KStream requires a kafka client to injected during merge.")); } if (!this._kafkaStreams) { return reject(new Error("KafkaStreams reference missing on stream instance, failed to setup to(..)")); } const oldProducerErrorCallback = producerErrorCallback; producerErrorCallback = (error) => { if (oldProducerErrorCallback) { oldProducerErrorCallback(error); } this._kafkaStreams.emit("error", error); }; this.kafka.setupProducer(this.outputTopicName, this.outputPartitionsCount, resolve, producerErrorCallback || NOOP, outputKafkaConfig); this.forEach(message => { messageProduceHandle( this.kafka, message, this.outputTopicName, this.produceType, this.produceCompressionType, this.produceVersion, producerErrorCallback ); }); }); } } module.exports = StreamDSL;