UNPKG

@genkit-ai/compat-oai

Version:

Genkit AI framework plugin for OpenAI APIs.

285 lines 8.6 kB
"use strict"; var __defProp = Object.defineProperty; var __getOwnPropDesc = Object.getOwnPropertyDescriptor; var __getOwnPropNames = Object.getOwnPropertyNames; var __hasOwnProp = Object.prototype.hasOwnProperty; var __export = (target, all) => { for (var name in all) __defProp(target, name, { get: all[name], enumerable: true }); }; var __copyProps = (to, from, except, desc) => { if (from && typeof from === "object" || typeof from === "function") { for (let key of __getOwnPropNames(from)) if (!__hasOwnProp.call(to, key) && key !== except) __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable }); } return to; }; var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod); var audio_exports = {}; __export(audio_exports, { RESPONSE_FORMAT_MEDIA_TYPES: () => RESPONSE_FORMAT_MEDIA_TYPES, SPEECH_MODEL_INFO: () => SPEECH_MODEL_INFO, SpeechConfigSchema: () => SpeechConfigSchema, TRANSCRIPTION_MODEL_INFO: () => TRANSCRIPTION_MODEL_INFO, TranscriptionConfigSchema: () => TranscriptionConfigSchema, compatOaiSpeechModelRef: () => compatOaiSpeechModelRef, compatOaiTranscriptionModelRef: () => compatOaiTranscriptionModelRef, defineCompatOpenAISpeechModel: () => defineCompatOpenAISpeechModel, defineCompatOpenAITranscriptionModel: () => defineCompatOpenAITranscriptionModel }); module.exports = __toCommonJS(audio_exports); var import_genkit = require("genkit"); const TRANSCRIPTION_MODEL_INFO = { supports: { media: true, output: ["text", "json"], multiturn: false, systemRole: false, tools: false } }; const SPEECH_MODEL_INFO = { supports: { media: false, output: ["media"], multiturn: false, systemRole: false, tools: false } }; const ChunkingStrategySchema = import_genkit.z.object({ type: import_genkit.z.string(), prefix_padding_ms: import_genkit.z.number().int().optional(), silence_duration_ms: import_genkit.z.number().int().optional(), threshold: import_genkit.z.number().min(0).max(1).optional() }); const TranscriptionConfigSchema = import_genkit.GenerationCommonConfigSchema.pick({ temperature: true }).extend({ chunking_strategy: import_genkit.z.union([import_genkit.z.literal("auto"), ChunkingStrategySchema]).optional(), include: import_genkit.z.array(import_genkit.z.any()).optional(), language: import_genkit.z.string().optional(), timestamp_granularities: import_genkit.z.array(import_genkit.z.enum(["word", "segment"])).optional(), response_format: import_genkit.z.enum(["json", "text", "srt", "verbose_json", "vtt"]).optional() // TODO stream support }); const SpeechConfigSchema = import_genkit.z.object({ voice: import_genkit.z.enum(["alloy", "echo", "fable", "onyx", "nova", "shimmer"]).default("alloy"), speed: import_genkit.z.number().min(0.25).max(4).optional(), response_format: import_genkit.z.enum(["mp3", "opus", "aac", "flac", "wav", "pcm"]).optional() }); const RESPONSE_FORMAT_MEDIA_TYPES = { mp3: "audio/mpeg", opus: "audio/opus", aac: "audio/aac", flac: "audio/flac", wav: "audio/wav", pcm: "audio/L16" }; function toTTSRequest(modelName, request, requestBuilder) { const { voice, version: modelVersion, temperature, maxOutputTokens, stopSequences, topK, topP, ...restOfConfig } = request.config ?? {}; let options = { model: modelVersion ?? modelName, input: new import_genkit.Message(request.messages[0]).text, voice: voice ?? "alloy" }; if (requestBuilder) { requestBuilder(request, options); } else { options = { ...options, ...restOfConfig // passthorugh rest of the config }; } for (const k in options) { if (options[k] === void 0) { delete options[k]; } } return options; } async function toGenerateResponse(response, responseFormat = "mp3") { const resultArrayBuffer = await response.arrayBuffer(); const resultBuffer = Buffer.from(new Uint8Array(resultArrayBuffer)); const mediaType = RESPONSE_FORMAT_MEDIA_TYPES[responseFormat]; return { message: { role: "model", content: [ { media: { contentType: mediaType, url: `data:${mediaType};base64,${resultBuffer.toString("base64")}` } } ] }, finishReason: "stop", raw: response }; } function defineCompatOpenAISpeechModel(params) { const { ai, name, client, modelRef: modelRef2, requestBuilder } = params; const modelName = name.substring(name.indexOf("/") + 1); return ai.defineModel( { name, apiVersion: "v2", ...modelRef2?.info, configSchema: modelRef2?.configSchema }, async (request, { abortSignal }) => { const ttsRequest = toTTSRequest(modelName, request, requestBuilder); const result = await client.audio.speech.create(ttsRequest, { signal: abortSignal }); return await toGenerateResponse(result, ttsRequest.response_format); } ); } function compatOaiSpeechModelRef(params) { const { name, info = SPEECH_MODEL_INFO, configSchema, config = void 0 } = params; return (0, import_genkit.modelRef)({ name, configSchema: configSchema || SpeechConfigSchema, info, config }); } function toSttRequest(modelName, request, requestBuilder) { const message = new import_genkit.Message(request.messages[0]); const media = message.media; if (!media?.url) { throw new Error("No media found in the request"); } const mediaBuffer = Buffer.from( media.url.slice(media.url.indexOf(",") + 1), "base64" ); const mediaFile = new File([mediaBuffer], "input", { type: media.contentType ?? media.url.slice("data:".length, media.url.indexOf(";")) }); const { temperature, version: modelVersion, maxOutputTokens, stopSequences, topK, topP, ...restOfConfig } = request.config ?? {}; let options = { model: modelVersion ?? modelName, file: mediaFile, prompt: message.text, temperature }; if (requestBuilder) { requestBuilder(request, options); } else { options = { ...options, ...restOfConfig // passthrough rest of the config }; } const outputFormat = request.output?.format; const customFormat = request.config?.response_format; if (outputFormat && customFormat) { if (outputFormat === "json" && customFormat !== "json" && customFormat !== "verbose_json") { throw new Error( `Custom response format ${customFormat} is not compatible with output format ${outputFormat}` ); } } if (outputFormat === "media") { throw new Error(`Output format ${outputFormat} is not supported.`); } options.response_format = customFormat || outputFormat || "text"; for (const k in options) { if (options[k] === void 0) { delete options[k]; } } return options; } function transcriptionToGenerateResponse(result) { return { message: { role: "model", content: [ { text: typeof result === "string" ? result : result.text } ] }, finishReason: "stop", raw: result }; } function defineCompatOpenAITranscriptionModel(params) { const { ai, name, client, modelRef: modelRef2, requestBuilder } = params; return ai.defineModel( { name, apiVersion: "v2", ...modelRef2?.info, configSchema: modelRef2?.configSchema }, async (request, { abortSignal }) => { const modelName = name.substring(name.indexOf("/") + 1); const params2 = toSttRequest(modelName, request, requestBuilder); const result = await client.audio.transcriptions.create( { ...params2, stream: false }, { signal: abortSignal } ); return transcriptionToGenerateResponse(result); } ); } function compatOaiTranscriptionModelRef(params) { const { name, info = TRANSCRIPTION_MODEL_INFO, configSchema, config = void 0 } = params; return (0, import_genkit.modelRef)({ name, configSchema: configSchema || TranscriptionConfigSchema, info, config }); } // Annotate the CommonJS export names for ESM import in node: 0 && (module.exports = { RESPONSE_FORMAT_MEDIA_TYPES, SPEECH_MODEL_INFO, SpeechConfigSchema, TRANSCRIPTION_MODEL_INFO, TranscriptionConfigSchema, compatOaiSpeechModelRef, compatOaiTranscriptionModelRef, defineCompatOpenAISpeechModel, defineCompatOpenAITranscriptionModel }); //# sourceMappingURL=audio.js.map