UNPKG

genkitx-openai

Version:

Firebase Genkit AI framework plugin for OpenAI APIs.

131 lines 3.83 kB
import { __async, __spreadProps, __spreadValues } from "./chunk-MLCSNVBT.mjs"; import { GenerationCommonConfigSchema, Message, z } from "genkit"; import { modelRef } from "genkit/model"; const Whisper1ConfigSchema = GenerationCommonConfigSchema.extend({ language: z.string().optional(), timestamp_granularities: z.array(z.enum(["word", "segment"])).optional(), response_format: z.enum(["json", "text", "srt", "verbose_json", "vtt"]).optional() }); const whisper1 = modelRef({ name: "openai/whisper-1", info: { label: "OpenAI - Whisper", supports: { media: true, output: ["text", "json"], multiturn: false, systemRole: false, tools: false } }, configSchema: Whisper1ConfigSchema }); const gpt4oTranscribe = modelRef({ name: "openai/gpt-4o-transcribe", info: { label: "OpenAI - GPT-4o Transcribe", supports: { media: true, output: ["text", "json"], multiturn: false, systemRole: false, tools: false } }, configSchema: Whisper1ConfigSchema }); function toWhisper1Request(request) { var _a, _b, _c, _d, _e, _f; const message = new Message(request.messages[0]); const media = message.media; if (!(media == null ? void 0 : media.url)) { throw new Error("No media found in the request"); } const mediaBuffer = Buffer.from( media.url.slice(media.url.indexOf(",") + 1), "base64" ); const mediaFile = new File([mediaBuffer], "input", { type: (_a = media.contentType) != null ? _a : media.url.slice("data:".length, media.url.indexOf(";")) }); const options = { model: "whisper-1", file: mediaFile, prompt: message.text, temperature: (_b = request.config) == null ? void 0 : _b.temperature, language: (_c = request.config) == null ? void 0 : _c.language, timestamp_granularities: (_d = request.config) == null ? void 0 : _d.timestamp_granularities }; const outputFormat = (_e = request.output) == null ? void 0 : _e.format; const customFormat = (_f = request.config) == null ? void 0 : _f.response_format; if (outputFormat && customFormat) { if (outputFormat === "json" && customFormat !== "json" && customFormat !== "verbose_json") { throw new Error( `Custom response format ${customFormat} is not compatible with output format ${outputFormat}` ); } } if (outputFormat === "media") { throw new Error(`Output format ${outputFormat} is not supported.`); } options.response_format = customFormat || outputFormat || "text"; for (const k in options) { if (options[k] === void 0) { delete options[k]; } } return options; } function toGenerateResponse(result) { return { candidates: [ { index: 0, finishReason: "stop", message: { role: "model", content: [ { text: typeof result === "string" ? result : result.text } ] } } ] }; } const SUPPORTED_STT_MODELS = { "gpt-4o-transcribe": gpt4oTranscribe, "whisper-1": whisper1 }; function sttModel(ai, name, client) { const modelId = `openai/${name}`; const model = SUPPORTED_STT_MODELS[name]; if (!model) throw new Error(`Unsupported model: ${name}`); return ai.defineModel( __spreadProps(__spreadValues({ name: modelId }, model.info), { configSchema: model.configSchema }), (request) => __async(this, null, function* () { const params = toWhisper1Request(request); const result = yield client.audio.transcriptions.create(__spreadProps(__spreadValues({}, params), { stream: false })); return toGenerateResponse(result); }) ); } export { SUPPORTED_STT_MODELS, Whisper1ConfigSchema, gpt4oTranscribe, sttModel, whisper1 }; //# sourceMappingURL=whisper.mjs.map