UNPKG

@confluentinc/schemaregistry

Version:
462 lines (461 loc) 17.9 kB
"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.AvroDeserializer = exports.AvroSerializer = exports.AVRO_TYPE = void 0; const serde_1 = require("./serde"); const schemaregistry_client_1 = require("../schemaregistry-client"); const avsc_1 = __importStar(require("avsc")); const lru_cache_1 = require("lru-cache"); const json_stringify_deterministic_1 = __importDefault(require("json-stringify-deterministic")); exports.AVRO_TYPE = "AVRO"; /** * AvroSerializer is used to serialize messages using Avro. */ class AvroSerializer extends serde_1.Serializer { /** * Create a new AvroSerializer. * @param client - the schema registry client * @param serdeType - the type of the serializer * @param conf - the serializer configuration * @param ruleRegistry - the rule registry */ constructor(client, serdeType, conf, ruleRegistry) { super(client, serdeType, conf, ruleRegistry); this.schemaToTypeCache = new lru_cache_1.LRUCache({ max: this.conf.cacheCapacity ?? 1000 }); this.fieldTransformer = async (ctx, fieldTransform, msg) => { return await this.fieldTransform(ctx, fieldTransform, msg); }; for (const rule of this.ruleRegistry.getExecutors()) { rule.configure(client.config(), new Map(Object.entries(conf.ruleConfig ?? {}))); } } /** * serialize is used to serialize a message using Avro. * @param topic - the topic to serialize the message for * @param msg - the message to serialize * @param headers - optional headers */ async serialize(topic, msg, headers) { if (this.client == null) { throw new Error('client is not initialized'); } if (msg == null) { throw new Error('message is empty'); } let schema = undefined; // Don't derive the schema if it is being looked up in the following ways if (this.config().useSchemaId == null && !this.config().useLatestVersion && this.config().useLatestWithMetadata == null) { const avroSchema = AvroSerializer.messageToSchema(msg); schema = { schemaType: 'AVRO', schema: JSON.stringify(avroSchema), }; } const [schemaId, info] = await this.getSchemaId(exports.AVRO_TYPE, topic, msg, schema); let avroType; let deps; [avroType, deps] = await this.toType(info); const subject = this.subjectName(topic, info); msg = await this.executeRules(subject, topic, schemaregistry_client_1.RuleMode.WRITE, null, info, msg, getInlineTags(info, deps)); avroType.isValid(msg, { errorHook: (path, any, type) => { throw new serde_1.SerializationError(`Invalid message at ${path.join('.')}, expected ${type}, got ${(0, json_stringify_deterministic_1.default)(any)}`); } }); let msgBytes = avroType.typeName === 'bytes' ? msg : avroType.toBuffer(msg); msgBytes = await this.executeRulesWithPhase(subject, topic, schemaregistry_client_1.RulePhase.ENCODING, schemaregistry_client_1.RuleMode.WRITE, null, info, msgBytes, null); return this.serializeSchemaId(topic, msgBytes, schemaId, headers); } async fieldTransform(ctx, fieldTransform, msg) { const [schema,] = await this.toType(ctx.target); return await transform(ctx, schema, msg, fieldTransform); } async toType(info) { return toType(this.client, this.conf, this, info, async (client, info) => { const deps = new Map(); await this.resolveReferences(client, info, deps); return deps; }); } static messageToSchema(msg) { let enumIndex = 1; let fixedIndex = 1; let recordIndex = 1; const namingHook = (avroSchema, opts) => { let schema = avroSchema; switch (schema.type) { case 'enum': schema.name = `Enum${enumIndex++}`; break; case 'fixed': schema.name = `Fixed${fixedIndex++}`; break; case 'record': schema.name = `Record${recordIndex++}`; break; default: } return undefined; }; return avsc_1.Type.forValue(msg, { typeHook: namingHook }); } } exports.AvroSerializer = AvroSerializer; /** * AvroDeserializer is used to deserialize messages using Avro. */ class AvroDeserializer extends serde_1.Deserializer { /** * Create a new AvroDeserializer. * @param client - the schema registry client * @param serdeType - the type of the deserializer * @param conf - the deserializer configuration * @param ruleRegistry - the rule registry */ constructor(client, serdeType, conf, ruleRegistry) { super(client, serdeType, conf, ruleRegistry); this.schemaToTypeCache = new lru_cache_1.LRUCache({ max: this.conf.cacheCapacity ?? 1000 }); this.fieldTransformer = async (ctx, fieldTransform, msg) => { return await this.fieldTransform(ctx, fieldTransform, msg); }; for (const rule of this.ruleRegistry.getExecutors()) { rule.configure(client.config(), new Map(Object.entries(conf.ruleConfig ?? {}))); } } /** * Deserializes a message. * @param topic - the topic * @param payload - the message payload * @param headers - optional headers */ async deserialize(topic, payload, headers) { if (!Buffer.isBuffer(payload)) { throw new Error('Invalid buffer'); } if (payload.length === 0) { return null; } const schemaId = new serde_1.SchemaId(exports.AVRO_TYPE); const [info, bytesRead] = await this.getWriterSchema(topic, payload, schemaId, headers); payload = payload.subarray(bytesRead); const subject = this.subjectName(topic, info); payload = await this.executeRulesWithPhase(subject, topic, schemaregistry_client_1.RulePhase.ENCODING, schemaregistry_client_1.RuleMode.READ, null, info, payload, null); const readerMeta = await this.getReaderSchema(subject); let migrations = []; if (readerMeta != null) { migrations = await this.getMigrations(subject, info, readerMeta); } const [writer, deps] = await this.toType(info); let msg; const msgBytes = payload; if (migrations.length > 0) { msg = writer.typeName === 'bytes' ? msgBytes : writer.fromBuffer(msgBytes); msg = await this.executeMigrations(migrations, subject, topic, msg); } else { if (writer.typeName === 'bytes') { msg = msgBytes; } else { if (readerMeta != null) { const [reader,] = await this.toType(readerMeta); if (reader.equals(writer)) { msg = reader.fromBuffer(msgBytes); } else { msg = reader.fromBuffer(msgBytes, reader.createResolver(writer)); } } else { msg = writer.fromBuffer(msgBytes); } } } let target; if (readerMeta != null) { target = readerMeta; } else { target = info; } msg = await this.executeRules(subject, topic, schemaregistry_client_1.RuleMode.READ, null, target, msg, getInlineTags(info, deps)); return msg; } async fieldTransform(ctx, fieldTransform, msg) { const [schema,] = await this.toType(ctx.target); return await transform(ctx, schema, msg, fieldTransform); } async toType(info) { return toType(this.client, this.conf, this, info, async (client, info) => { const deps = new Map(); await this.resolveReferences(client, info, deps); return deps; }); } } exports.AvroDeserializer = AvroDeserializer; async function toType(client, conf, serde, info, refResolver) { let tuple = serde.schemaToTypeCache.get((0, json_stringify_deterministic_1.default)(info.schema)); if (tuple != null) { return tuple; } const deps = await refResolver(client, info); const addReferencedSchemas = (userHook) => (schema, opts) => { const avroOpts = opts; deps.forEach((schema, _name) => { avroOpts.typeHook = userHook; avsc_1.default.Type.forSchema(JSON.parse(schema), avroOpts); }); if (userHook) { return userHook(schema, opts); } return; }; const avroOpts = conf; let type = avsc_1.default.Type.forSchema(JSON.parse(info.schema), { ...avroOpts, typeHook: addReferencedSchemas(avroOpts?.typeHook), }); serde.schemaToTypeCache.set((0, json_stringify_deterministic_1.default)(info.schema), [type, deps]); return [type, deps]; } async function transform(ctx, schema, msg, fieldTransform) { if (msg == null || schema == null) { return msg; } const fieldCtx = ctx.currentField(); if (fieldCtx != null) { fieldCtx.type = getType(schema); } switch (schema.typeName) { case 'union:unwrapped': case 'union:wrapped': let [subschema, submsg] = resolveUnion(schema, msg); if (subschema == null) { return msg; } submsg = await transform(ctx, subschema, submsg, fieldTransform); if (schema.typeName === 'union:wrapped') { return { [subschema.branchName]: submsg }; } return submsg; case 'array': const arraySchema = schema; const array = msg; return await Promise.all(array.map(item => transform(ctx, arraySchema.itemsType, item, fieldTransform))); case 'map': const mapSchema = schema; const map = msg; for (const key of Object.keys(map)) { map[key] = await transform(ctx, mapSchema.valuesType, map[key], fieldTransform); } return map; case 'record': const recordSchema = schema; const record = msg; for (const field of recordSchema.fields) { if (!(field.name in record)) { continue; } await transformField(ctx, recordSchema, field, record, fieldTransform); } return record; default: if (fieldCtx != null) { const ruleTags = ctx.rule.tags ?? []; if (ruleTags == null || ruleTags.length === 0 || !disjoint(new Set(ruleTags), fieldCtx.tags)) { return await fieldTransform.transform(ctx, fieldCtx, msg); } } return msg; } } async function transformField(ctx, recordSchema, field, record, fieldTransform) { const fullName = recordSchema.name + '.' + field.name; try { ctx.enterField(record, fullName, field.name, getType(field.type), null); const newVal = await transform(ctx, field.type, record[field.name], fieldTransform); if (ctx.rule.kind === 'CONDITION') { if (!newVal) { throw new serde_1.RuleConditionError(ctx.rule); } } else { record[field.name] = newVal; } } finally { ctx.leaveField(); } } function getType(schema) { switch (schema.typeName) { case 'record': return serde_1.FieldType.RECORD; case 'enum': return serde_1.FieldType.ENUM; case 'array': return serde_1.FieldType.ARRAY; case 'map': return serde_1.FieldType.MAP; case 'union:unwrapped': case 'union:wrapped': return serde_1.FieldType.COMBINED; case 'fixed': return serde_1.FieldType.FIXED; case 'string': return serde_1.FieldType.STRING; case 'bytes': return serde_1.FieldType.BYTES; case 'int': return serde_1.FieldType.INT; case 'abstract:long': case 'long': return serde_1.FieldType.LONG; case 'float': return serde_1.FieldType.FLOAT; case 'double': return serde_1.FieldType.DOUBLE; case 'boolean': return serde_1.FieldType.BOOLEAN; case 'null': return serde_1.FieldType.NULL; default: return serde_1.FieldType.NULL; } } function disjoint(slice1, map1) { for (const v of slice1) { if (map1.has(v)) { return false; } } return true; } function resolveUnion(schema, msg) { let unionTypes = null; if (schema.typeName === 'union:unwrapped') { const union = schema; unionTypes = union.types.slice(); if (unionTypes != null) { for (let i = 0; i < unionTypes.length; i++) { if (unionTypes[i].isValid(msg)) { return [unionTypes[i], msg]; } } } } else if (schema.typeName === 'union:wrapped') { const union = schema; unionTypes = union.types.slice(); if (typeof msg === 'object') { let keys = Object.keys(msg); if (keys.length === 1) { let name = keys[0]; for (let i = 0; i < unionTypes.length; i++) { if (unionTypes[i].branchName === name) { return [unionTypes[i], msg[name]]; } } } else { throw new Error('wrapped unions require a name/value pair with the name as the type name'); } } } return [null, msg]; } function getInlineTags(info, deps) { const inlineTags = new Map(); getInlineTagsRecursively('', '', JSON.parse(info.schema), inlineTags); for (const depSchema of deps.values()) { getInlineTagsRecursively('', '', JSON.parse(depSchema), inlineTags); } return inlineTags; } // iterate over the object and get all properties named 'confluent:tags' function getInlineTagsRecursively(ns, name, schema, tags) { if (schema == null || typeof schema === 'string') { return; } else if (Array.isArray(schema)) { for (let i = 0; i < schema.length; i++) { getInlineTagsRecursively(ns, name, schema[i], tags); } } else if (typeof schema === 'object') { const type = schema['type']; switch (type) { case 'array': getInlineTagsRecursively(ns, name, schema['items'], tags); break; case 'map': getInlineTagsRecursively(ns, name, schema['values'], tags); break; case 'record': let recordNs = schema['namespace']; let recordName = schema['name']; if (recordNs === undefined) { recordNs = impliedNamespace(name); } if (recordNs == null) { recordNs = ns; } if (recordNs !== '' && !recordName.startsWith(recordNs)) { recordName = recordNs + '.' + recordName; } const fields = schema['fields']; for (const field of fields) { const fieldTags = field['confluent:tags']; const fieldName = field['name']; if (fieldTags !== undefined && fieldName !== undefined) { tags.set(recordName + '.' + fieldName, new Set(fieldTags)); } const fieldType = field['type']; if (fieldType !== undefined) { getInlineTagsRecursively(recordNs, recordName, fieldType, tags); } } break; } } } function impliedNamespace(name) { const match = /^(.*)\.[^.]+$/.exec(name); return match ? match[1] : null; }