UNPKG

@aws-lambda-powertools/kafka

Version:

Utility to easily handle message deserialization and parsing of Kafka events in AWS Lambda functions

215 lines (214 loc) 8.41 kB
import { isNull, isRecord } from '@aws-lambda-powertools/commons/typeutils'; import { deserialize as deserializeJson } from './deserializer/json.js'; import { deserialize as deserializePrimitive } from './deserializer/primitive.js'; import { KafkaConsumerDeserializationError, KafkaConsumerError, KafkaConsumerMissingSchemaError, KafkaConsumerParserError, } from './errors.js'; /** * Type guard to assert that the event is a valid {@link MSKEvent | `MSKEvent`}. * * @param event - The event to validate, expected to be an MSKEvent. */ const assertIsMSKEvent = (event) => { if (!isRecord(event) || !isRecord(event.records) || !Object.values(event.records).every((arr) => Array.isArray(arr))) { throw new KafkaConsumerError('Event is not a valid MSKEvent. Expected an object with a "records" property.'); } return true; }; /** * Deserialize Kafka message headers from an array of header objects. * * It returns `null` if the headers are `null`, or an array of header objects * where each header value is decoded as a UTF-8 string. * * @param headers - An array of header objects, where each object maps header keys (string) * to header values (`number[]`), representing the raw bytes of each header value - * i.e. `[{ "headerKey": [104, 101, 108, 108, 111] }]` */ const deserializeHeaders = (headers) => { if (headers === null) { return null; } const result = []; for (const header of headers) { const entries = []; for (const [headerKey, headerValue] of Object.entries(header)) { entries.push([headerKey, Buffer.from(headerValue).toString('utf-8')]); } result.push(Object.fromEntries(entries)); } return result; }; /** * Deserialize a base64-encoded value using the provided schema configuration. * * It returns the deserialized value, which may be a string, object, or other type depending on the schema type. * * @param value - The base64-encoded string to deserialize. * @param config - The schema configuration to use for deserialization. See {@link SchemaConfigValue | `SchemaConfigValue`}. * If not provided, the value is decoded as a UTF-8 string. */ const deserialize = ({ value, deserializer, config, schemaMetadata, }) => { if (config === undefined) { return deserializer(value); } if (config.type === 'json') { return deserializer(value); } if (config.type === 'avro') { if (!config.schema) { throw new KafkaConsumerMissingSchemaError('Schema string is required for avro deserialization'); } return deserializer(value, config.schema); } /* v8 ignore else -- @preserve */ if (config.type === 'protobuf') { if (!config.schema) { throw new KafkaConsumerMissingSchemaError('Schema string is required for protobuf deserialization'); } return deserializer(value, config.schema, schemaMetadata); } }; /** * Get the deserializer function based on the provided type. * * @param type - The type of deserializer to use. Supported types are: `json`, `avro`, `protobuf`, or `undefined`. * If `undefined`, it defaults to deserializing as a primitive string. */ const getDeserializer = async (type) => { if (!type) { return deserializePrimitive; } if (type === 'json') { return deserializeJson; } if (type === 'protobuf') { const deserializer = await import('./deserializer/protobuf.js'); return deserializer.deserialize; } if (type === 'avro') { const deserializer = await import('./deserializer/avro.js'); return deserializer.deserialize; } throw new KafkaConsumerDeserializationError(`Unsupported deserialization type: ${type}. Supported types are: json, avro, protobuf.`); }; /** * Parse a value against a provided schema using the `~standard` property for validation. * * @param value - The value to parse against the schema. * @param schema - The schema to validate against, which should be a {@link StandardSchemaV1 | `Standard Schema V1`} object. */ const parseSchema = (value, schema) => { const result = schema['~standard'].validate(value); /* v8 ignore next -- @preserve */ if (result instanceof Promise) throw new KafkaConsumerParserError('Schema parsing supports only synchronous validation'); if (result.issues) { throw new KafkaConsumerParserError('Schema validation failed', { cause: result.issues, }); } return result.value; }; /** * Deserialize a single record from an MSK event. * * @param record - A single record from the MSK event. * @param config - The schema configuration for deserializing the record's key and value. */ const deserializeRecord = async (record, config) => { const { key, value, headers, valueSchemaMetadata, keySchemaMetadata, ...rest } = record; const { key: keyConfig, value: valueConfig } = config || {}; const deserializerKey = await getDeserializer(keyConfig?.type); const deserializerValue = await getDeserializer(valueConfig?.type); return { ...rest, get key() { if (key === undefined || key === '') { return undefined; } if (isNull(key)) return null; const deserializedKey = deserialize({ value: key, deserializer: deserializerKey, config: keyConfig, schemaMetadata: keySchemaMetadata, }); return keyConfig?.parserSchema ? parseSchema(deserializedKey, keyConfig.parserSchema) : deserializedKey; }, originalKey: key, get value() { const deserializedValue = deserialize({ value: value, deserializer: deserializerValue, config: valueConfig, schemaMetadata: valueSchemaMetadata, }); return valueConfig?.parserSchema ? parseSchema(deserializedValue, valueConfig.parserSchema) : deserializedValue; }, originalValue: value, get headers() { return deserializeHeaders(headers); }, originalHeaders: headers, valueSchemaMetadata, keySchemaMetadata, }; }; /** * Wrap a handler function to automatically deserialize and validate Kafka records from an MSK event. * * The returned function will: * - Deserialize the key and value of each record using the provided schema config. * - Validate the deserialized key and value using Zod schemas if provided. * - Replace the `records` property in the event with an array of deserialized and validated records. * - Call the original handler with the modified event and original context/arguments. * * @example * ```ts * import { kafkaConsumer } from '@aws-lambda-powertools/kafka'; * import { z } from 'zod'; * * const keySchema = z.string(); * const valueSchema = z.object({ * id: z.number(), * }); * * export const handler = kafkaConsumer<z.infer<keySchema>, z.infer<valueSchema>>(async (event, context) => { * // event.records is now an array of deserialized and validated records * for (const record of event.records) { * console.log(record.key, record.value); * } * }, { * key: { type: 'json', parserSchema: keySchema }, * value: { type: 'json', parserSchema: valueSchema }, * }); * ``` * * @typeParam K - Optional type of the deserialized key - defaults to `unknown`. * @typeParam V - Optional type of the deserialized value - defaults to `unknown`. * * @param handler - The original handler function to wrap. It should accept the deserialized event as its first argument. * @param config - The schema configuration for deserializing and validating record keys and values. */ const kafkaConsumer = (handler, config) => { return async (event, context) => { assertIsMSKEvent(event); const consumerRecords = []; for (const recordsArray of Object.values(event.records)) { for (const record of recordsArray) { consumerRecords.push((await deserializeRecord(record, config))); } } return handler({ ...event, records: consumerRecords, }, context); }; }; export { kafkaConsumer };