UNPKG

@genkit-ai/compat-oai

Version:

Genkit AI framework plugin for OpenAI APIs.

1 lines 15.7 kB
{"version":3,"sources":["../src/audio.ts"],"sourcesContent":["/**\n * Copyright 2024 The Fire Company\n * Copyright 2024 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\nimport type {\n GenerateRequest,\n GenerateResponseData,\n Genkit,\n ModelReference,\n} from 'genkit';\nimport { GenerationCommonConfigSchema, Message, modelRef, z } from 'genkit';\nimport type { ModelAction, ModelInfo } from 'genkit/model';\nimport type OpenAI from 'openai';\nimport { Response } from 'openai/core.mjs';\nimport type {\n SpeechCreateParams,\n Transcription,\n TranscriptionCreateParams,\n} from 'openai/resources/audio/index.mjs';\n\nexport type SpeechRequestBuilder = (\n req: GenerateRequest,\n params: SpeechCreateParams\n) => void;\nexport type TranscriptionRequestBuilder = (\n req: GenerateRequest,\n params: TranscriptionCreateParams\n) => void;\n\nexport const TRANSCRIPTION_MODEL_INFO = {\n supports: {\n media: true,\n output: ['text', 'json'],\n multiturn: false,\n systemRole: false,\n tools: false,\n },\n};\n\nexport const SPEECH_MODEL_INFO: ModelInfo = {\n supports: {\n media: false,\n output: ['media'],\n multiturn: false,\n systemRole: false,\n tools: false,\n },\n};\n\nconst ChunkingStrategySchema = z.object({\n type: z.string(),\n prefix_padding_ms: z.number().int().optional(),\n silence_duration_ms: z.number().int().optional(),\n threshold: z.number().min(0).max(1.0).optional(),\n});\nexport const TranscriptionConfigSchema = GenerationCommonConfigSchema.pick({\n temperature: true,\n}).extend({\n chunking_strategy: z\n .union([z.literal('auto'), ChunkingStrategySchema])\n .optional(),\n include: z.array(z.any()).optional(),\n language: z.string().optional(),\n timestamp_granularities: z.array(z.enum(['word', 'segment'])).optional(),\n response_format: z\n .enum(['json', 'text', 'srt', 'verbose_json', 'vtt'])\n .optional(),\n // TODO stream support\n});\n\nexport const SpeechConfigSchema = z.object({\n voice: z\n .enum(['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'])\n .default('alloy'),\n speed: z.number().min(0.25).max(4.0).optional(),\n response_format: z\n .enum(['mp3', 'opus', 'aac', 'flac', 'wav', 'pcm'])\n .optional(),\n});\n\n/**\n * Supported media formats for Audio generation\n */\nexport const RESPONSE_FORMAT_MEDIA_TYPES = {\n mp3: 'audio/mpeg',\n opus: 'audio/opus',\n aac: 'audio/aac',\n flac: 'audio/flac',\n wav: 'audio/wav',\n pcm: 'audio/L16',\n};\n\nfunction toTTSRequest(\n modelName: string,\n request: GenerateRequest,\n requestBuilder?: SpeechRequestBuilder\n): SpeechCreateParams {\n const {\n voice,\n version: modelVersion,\n temperature,\n maxOutputTokens,\n stopSequences,\n topK,\n topP,\n ...restOfConfig\n } = request.config ?? {};\n\n let options: SpeechCreateParams = {\n model: modelVersion ?? modelName,\n input: new Message(request.messages[0]).text,\n voice: voice ?? 'alloy',\n };\n if (requestBuilder) {\n requestBuilder(request, options);\n } else {\n options = {\n ...options,\n ...restOfConfig, // passthorugh rest of the config\n };\n }\n for (const k in options) {\n if (options[k] === undefined) {\n delete options[k];\n }\n }\n return options;\n}\n\nasync function toGenerateResponse(\n response: Response,\n responseFormat: 'mp3' | 'opus' | 'aac' | 'flac' | 'wav' | 'pcm' = 'mp3'\n): Promise<GenerateResponseData> {\n const resultArrayBuffer = await response.arrayBuffer();\n const resultBuffer = Buffer.from(new Uint8Array(resultArrayBuffer));\n const mediaType = RESPONSE_FORMAT_MEDIA_TYPES[responseFormat];\n return {\n message: {\n role: 'model',\n content: [\n {\n media: {\n contentType: mediaType,\n url: `data:${mediaType};base64,${resultBuffer.toString('base64')}`,\n },\n },\n ],\n },\n finishReason: 'stop',\n raw: response,\n };\n}\n\n/**\n * Method to define a new Genkit Model that is compatible with the Open AI Audio\n * API. \n *\n * These models are to be used to create audio speech from a given request.\n * @param params An object containing parameters for defining the OpenAI speech\n * model.\n * @param params.ai The Genkit AI instance.\n * @param params.name The name of the model.\n * @param params.client The OpenAI client instance.\n * @param params.modelRef Optional reference to the model's configuration and\n * custom options.\n\n * @returns the created {@link ModelAction}\n */\nexport function defineCompatOpenAISpeechModel<\n CustomOptions extends z.ZodTypeAny = z.ZodTypeAny,\n>(params: {\n ai: Genkit;\n name: string;\n client: OpenAI;\n modelRef?: ModelReference<CustomOptions>;\n requestBuilder?: SpeechRequestBuilder;\n}): ModelAction {\n const { ai, name, client, modelRef, requestBuilder } = params;\n const modelName = name.substring(name.indexOf('/') + 1);\n\n return ai.defineModel(\n {\n name,\n apiVersion: 'v2',\n ...modelRef?.info,\n configSchema: modelRef?.configSchema,\n },\n async (request, { abortSignal }) => {\n const ttsRequest = toTTSRequest(modelName!, request, requestBuilder);\n const result = await client.audio.speech.create(ttsRequest, {\n signal: abortSignal,\n });\n return await toGenerateResponse(result, ttsRequest.response_format);\n }\n );\n}\n\n/** Speech generation ModelRef helper, with reasonable defaults for\n * OpenAI-compatible providers */\nexport function compatOaiSpeechModelRef<\n CustomOptions extends z.ZodTypeAny = z.ZodTypeAny,\n>(params: {\n name: string;\n info?: ModelInfo;\n configSchema?: CustomOptions;\n config?: any;\n}) {\n const {\n name,\n info = SPEECH_MODEL_INFO,\n configSchema,\n config = undefined,\n } = params;\n return modelRef({\n name,\n configSchema: configSchema || (SpeechConfigSchema as any),\n info,\n config,\n });\n}\n\nfunction toSttRequest(\n modelName: string,\n request: GenerateRequest,\n requestBuilder?: TranscriptionRequestBuilder\n): TranscriptionCreateParams {\n const message = new Message(request.messages[0]);\n const media = message.media;\n if (!media?.url) {\n throw new Error('No media found in the request');\n }\n const mediaBuffer = Buffer.from(\n media.url.slice(media.url.indexOf(',') + 1),\n 'base64'\n );\n const mediaFile = new File([mediaBuffer], 'input', {\n type:\n media.contentType ??\n media.url.slice('data:'.length, media.url.indexOf(';')),\n });\n const {\n temperature,\n version: modelVersion,\n maxOutputTokens,\n stopSequences,\n topK,\n topP,\n ...restOfConfig\n } = request.config ?? {};\n\n let options: TranscriptionCreateParams = {\n model: modelVersion ?? modelName,\n file: mediaFile,\n prompt: message.text,\n temperature,\n };\n if (requestBuilder) {\n requestBuilder(request, options);\n } else {\n options = {\n ...options,\n ...restOfConfig, // passthrough rest of the config\n };\n }\n const outputFormat = request.output?.format as 'json' | 'text' | 'media';\n const customFormat = request.config?.response_format;\n if (outputFormat && customFormat) {\n if (\n outputFormat === 'json' &&\n customFormat !== 'json' &&\n customFormat !== 'verbose_json'\n ) {\n throw new Error(\n `Custom response format ${customFormat} is not compatible with output format ${outputFormat}`\n );\n }\n }\n if (outputFormat === 'media') {\n throw new Error(`Output format ${outputFormat} is not supported.`);\n }\n options.response_format = customFormat || outputFormat || 'text';\n for (const k in options) {\n if (options[k] === undefined) {\n delete options[k];\n }\n }\n return options;\n}\n\nfunction transcriptionToGenerateResponse(\n result: Transcription | string\n): GenerateResponseData {\n return {\n message: {\n role: 'model',\n content: [\n {\n text: typeof result === 'string' ? result : result.text,\n },\n ],\n },\n finishReason: 'stop',\n raw: result,\n };\n}\n\n/**\n * Method to define a new Genkit Model that is compatible with Open AI\n * Transcriptions API. \n *\n * These models are to be used to transcribe audio to text.\n *\n * @param params An object containing parameters for defining the OpenAI\n * transcription model.\n * @param params.ai The Genkit AI instance.\n * @param params.name The name of the model.\n * @param params.client The OpenAI client instance.\n * @param params.modelRef Optional reference to the model's configuration and\n * custom options.\n\n * @returns the created {@link ModelAction}\n */\nexport function defineCompatOpenAITranscriptionModel<\n CustomOptions extends z.ZodTypeAny = z.ZodTypeAny,\n>(params: {\n ai: Genkit;\n name: string;\n client: OpenAI;\n modelRef?: ModelReference<CustomOptions>;\n requestBuilder?: TranscriptionRequestBuilder;\n}): ModelAction {\n const { ai, name, client, modelRef, requestBuilder } = params;\n\n return ai.defineModel(\n {\n name,\n apiVersion: 'v2',\n ...modelRef?.info,\n configSchema: modelRef?.configSchema,\n },\n async (request, { abortSignal }) => {\n const modelName = name.substring(name.indexOf('/') + 1);\n\n const params = toSttRequest(modelName!, request, requestBuilder);\n // Explicitly setting stream to false ensures we use the non-streaming overload\n const result = await client.audio.transcriptions.create(\n {\n ...params,\n stream: false,\n },\n { signal: abortSignal }\n );\n return transcriptionToGenerateResponse(result);\n }\n );\n}\n\n/** Transcription ModelRef helper, with reasonable defaults for\n * OpenAI-compatible providers */\nexport function compatOaiTranscriptionModelRef<\n CustomOptions extends z.ZodTypeAny = z.ZodTypeAny,\n>(params: {\n name: string;\n info?: ModelInfo;\n configSchema?: CustomOptions;\n config?: any;\n}) {\n const {\n name,\n info = TRANSCRIPTION_MODEL_INFO,\n configSchema,\n config = undefined,\n } = params;\n return modelRef({\n name,\n configSchema: configSchema || (TranscriptionConfigSchema as any),\n info,\n config,\n });\n}\n"],"mappings":"AAsBA,SAAS,8BAA8B,SAAS,UAAU,SAAS;AAmB5D,MAAM,2BAA2B;AAAA,EACtC,UAAU;AAAA,IACR,OAAO;AAAA,IACP,QAAQ,CAAC,QAAQ,MAAM;AAAA,IACvB,WAAW;AAAA,IACX,YAAY;AAAA,IACZ,OAAO;AAAA,EACT;AACF;AAEO,MAAM,oBAA+B;AAAA,EAC1C,UAAU;AAAA,IACR,OAAO;AAAA,IACP,QAAQ,CAAC,OAAO;AAAA,IAChB,WAAW;AAAA,IACX,YAAY;AAAA,IACZ,OAAO;AAAA,EACT;AACF;AAEA,MAAM,yBAAyB,EAAE,OAAO;AAAA,EACtC,MAAM,EAAE,OAAO;AAAA,EACf,mBAAmB,EAAE,OAAO,EAAE,IAAI,EAAE,SAAS;AAAA,EAC7C,qBAAqB,EAAE,OAAO,EAAE,IAAI,EAAE,SAAS;AAAA,EAC/C,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,EAAE,IAAI,CAAG,EAAE,SAAS;AACjD,CAAC;AACM,MAAM,4BAA4B,6BAA6B,KAAK;AAAA,EACzE,aAAa;AACf,CAAC,EAAE,OAAO;AAAA,EACR,mBAAmB,EAChB,MAAM,CAAC,EAAE,QAAQ,MAAM,GAAG,sBAAsB,CAAC,EACjD,SAAS;AAAA,EACZ,SAAS,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,SAAS;AAAA,EACnC,UAAU,EAAE,OAAO,EAAE,SAAS;AAAA,EAC9B,yBAAyB,EAAE,MAAM,EAAE,KAAK,CAAC,QAAQ,SAAS,CAAC,CAAC,EAAE,SAAS;AAAA,EACvE,iBAAiB,EACd,KAAK,CAAC,QAAQ,QAAQ,OAAO,gBAAgB,KAAK,CAAC,EACnD,SAAS;AAAA;AAEd,CAAC;AAEM,MAAM,qBAAqB,EAAE,OAAO;AAAA,EACzC,OAAO,EACJ,KAAK,CAAC,SAAS,QAAQ,SAAS,QAAQ,QAAQ,SAAS,CAAC,EAC1D,QAAQ,OAAO;AAAA,EAClB,OAAO,EAAE,OAAO,EAAE,IAAI,IAAI,EAAE,IAAI,CAAG,EAAE,SAAS;AAAA,EAC9C,iBAAiB,EACd,KAAK,CAAC,OAAO,QAAQ,OAAO,QAAQ,OAAO,KAAK,CAAC,EACjD,SAAS;AACd,CAAC;AAKM,MAAM,8BAA8B;AAAA,EACzC,KAAK;AAAA,EACL,MAAM;AAAA,EACN,KAAK;AAAA,EACL,MAAM;AAAA,EACN,KAAK;AAAA,EACL,KAAK;AACP;AAEA,SAAS,aACP,WACA,SACA,gBACoB;AACpB,QAAM;AAAA,IACJ;AAAA,IACA,SAAS;AAAA,IACT;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,GAAG;AAAA,EACL,IAAI,QAAQ,UAAU,CAAC;AAEvB,MAAI,UAA8B;AAAA,IAChC,OAAO,gBAAgB;AAAA,IACvB,OAAO,IAAI,QAAQ,QAAQ,SAAS,CAAC,CAAC,EAAE;AAAA,IACxC,OAAO,SAAS;AAAA,EAClB;AACA,MAAI,gBAAgB;AAClB,mBAAe,SAAS,OAAO;AAAA,EACjC,OAAO;AACL,cAAU;AAAA,MACR,GAAG;AAAA,MACH,GAAG;AAAA;AAAA,IACL;AAAA,EACF;AACA,aAAW,KAAK,SAAS;AACvB,QAAI,QAAQ,CAAC,MAAM,QAAW;AAC5B,aAAO,QAAQ,CAAC;AAAA,IAClB;AAAA,EACF;AACA,SAAO;AACT;AAEA,eAAe,mBACb,UACA,iBAAkE,OACnC;AAC/B,QAAM,oBAAoB,MAAM,SAAS,YAAY;AACrD,QAAM,eAAe,OAAO,KAAK,IAAI,WAAW,iBAAiB,CAAC;AAClE,QAAM,YAAY,4BAA4B,cAAc;AAC5D,SAAO;AAAA,IACL,SAAS;AAAA,MACP,MAAM;AAAA,MACN,SAAS;AAAA,QACP;AAAA,UACE,OAAO;AAAA,YACL,aAAa;AAAA,YACb,KAAK,QAAQ,SAAS,WAAW,aAAa,SAAS,QAAQ,CAAC;AAAA,UAClE;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAAA,IACA,cAAc;AAAA,IACd,KAAK;AAAA,EACP;AACF;AAiBO,SAAS,8BAEd,QAMc;AACd,QAAM,EAAE,IAAI,MAAM,QAAQ,UAAAA,WAAU,eAAe,IAAI;AACvD,QAAM,YAAY,KAAK,UAAU,KAAK,QAAQ,GAAG,IAAI,CAAC;AAEtD,SAAO,GAAG;AAAA,IACR;AAAA,MACE;AAAA,MACA,YAAY;AAAA,MACZ,GAAGA,WAAU;AAAA,MACb,cAAcA,WAAU;AAAA,IAC1B;AAAA,IACA,OAAO,SAAS,EAAE,YAAY,MAAM;AAClC,YAAM,aAAa,aAAa,WAAY,SAAS,cAAc;AACnE,YAAM,SAAS,MAAM,OAAO,MAAM,OAAO,OAAO,YAAY;AAAA,QAC1D,QAAQ;AAAA,MACV,CAAC;AACD,aAAO,MAAM,mBAAmB,QAAQ,WAAW,eAAe;AAAA,IACpE;AAAA,EACF;AACF;AAIO,SAAS,wBAEd,QAKC;AACD,QAAM;AAAA,IACJ;AAAA,IACA,OAAO;AAAA,IACP;AAAA,IACA,SAAS;AAAA,EACX,IAAI;AACJ,SAAO,SAAS;AAAA,IACd;AAAA,IACA,cAAc,gBAAiB;AAAA,IAC/B;AAAA,IACA;AAAA,EACF,CAAC;AACH;AAEA,SAAS,aACP,WACA,SACA,gBAC2B;AAC3B,QAAM,UAAU,IAAI,QAAQ,QAAQ,SAAS,CAAC,CAAC;AAC/C,QAAM,QAAQ,QAAQ;AACtB,MAAI,CAAC,OAAO,KAAK;AACf,UAAM,IAAI,MAAM,+BAA+B;AAAA,EACjD;AACA,QAAM,cAAc,OAAO;AAAA,IACzB,MAAM,IAAI,MAAM,MAAM,IAAI,QAAQ,GAAG,IAAI,CAAC;AAAA,IAC1C;AAAA,EACF;AACA,QAAM,YAAY,IAAI,KAAK,CAAC,WAAW,GAAG,SAAS;AAAA,IACjD,MACE,MAAM,eACN,MAAM,IAAI,MAAM,QAAQ,QAAQ,MAAM,IAAI,QAAQ,GAAG,CAAC;AAAA,EAC1D,CAAC;AACD,QAAM;AAAA,IACJ;AAAA,IACA,SAAS;AAAA,IACT;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,GAAG;AAAA,EACL,IAAI,QAAQ,UAAU,CAAC;AAEvB,MAAI,UAAqC;AAAA,IACvC,OAAO,gBAAgB;AAAA,IACvB,MAAM;AAAA,IACN,QAAQ,QAAQ;AAAA,IAChB;AAAA,EACF;AACA,MAAI,gBAAgB;AAClB,mBAAe,SAAS,OAAO;AAAA,EACjC,OAAO;AACL,cAAU;AAAA,MACR,GAAG;AAAA,MACH,GAAG;AAAA;AAAA,IACL;AAAA,EACF;AACA,QAAM,eAAe,QAAQ,QAAQ;AACrC,QAAM,eAAe,QAAQ,QAAQ;AACrC,MAAI,gBAAgB,cAAc;AAChC,QACE,iBAAiB,UACjB,iBAAiB,UACjB,iBAAiB,gBACjB;AACA,YAAM,IAAI;AAAA,QACR,0BAA0B,YAAY,yCAAyC,YAAY;AAAA,MAC7F;AAAA,IACF;AAAA,EACF;AACA,MAAI,iBAAiB,SAAS;AAC5B,UAAM,IAAI,MAAM,iBAAiB,YAAY,oBAAoB;AAAA,EACnE;AACA,UAAQ,kBAAkB,gBAAgB,gBAAgB;AAC1D,aAAW,KAAK,SAAS;AACvB,QAAI,QAAQ,CAAC,MAAM,QAAW;AAC5B,aAAO,QAAQ,CAAC;AAAA,IAClB;AAAA,EACF;AACA,SAAO;AACT;AAEA,SAAS,gCACP,QACsB;AACtB,SAAO;AAAA,IACL,SAAS;AAAA,MACP,MAAM;AAAA,MACN,SAAS;AAAA,QACP;AAAA,UACE,MAAM,OAAO,WAAW,WAAW,SAAS,OAAO;AAAA,QACrD;AAAA,MACF;AAAA,IACF;AAAA,IACA,cAAc;AAAA,IACd,KAAK;AAAA,EACP;AACF;AAkBO,SAAS,qCAEd,QAMc;AACd,QAAM,EAAE,IAAI,MAAM,QAAQ,UAAAA,WAAU,eAAe,IAAI;AAEvD,SAAO,GAAG;AAAA,IACR;AAAA,MACE;AAAA,MACA,YAAY;AAAA,MACZ,GAAGA,WAAU;AAAA,MACb,cAAcA,WAAU;AAAA,IAC1B;AAAA,IACA,OAAO,SAAS,EAAE,YAAY,MAAM;AAClC,YAAM,YAAY,KAAK,UAAU,KAAK,QAAQ,GAAG,IAAI,CAAC;AAEtD,YAAMC,UAAS,aAAa,WAAY,SAAS,cAAc;AAE/D,YAAM,SAAS,MAAM,OAAO,MAAM,eAAe;AAAA,QAC/C;AAAA,UACE,GAAGA;AAAA,UACH,QAAQ;AAAA,QACV;AAAA,QACA,EAAE,QAAQ,YAAY;AAAA,MACxB;AACA,aAAO,gCAAgC,MAAM;AAAA,IAC/C;AAAA,EACF;AACF;AAIO,SAAS,+BAEd,QAKC;AACD,QAAM;AAAA,IACJ;AAAA,IACA,OAAO;AAAA,IACP;AAAA,IACA,SAAS;AAAA,EACX,IAAI;AACJ,SAAO,SAAS;AAAA,IACd;AAAA,IACA,cAAc,gBAAiB;AAAA,IAC/B;AAAA,IACA;AAAA,EACF,CAAC;AACH;","names":["modelRef","params"]}