UNPKG

ai-utils.js

Version:

Build AI applications, chatbots, and agents with JavaScript and TypeScript.

188 lines (187 loc) 5.95 kB
import z from "zod"; import { AbstractModel } from "../../model-function/AbstractModel.js"; import { callWithRetryAndThrottle } from "../../util/api/callWithRetryAndThrottle.js"; import { createJsonResponseHandler, createTextResponseHandler, postToApi, } from "../../util/api/postToApi.js"; import { failedOpenAICallResponseHandler } from "./OpenAIError.js"; /** * @see https://openai.com/pricing */ export const OPENAI_TRANSCRIPTION_MODELS = { "whisper-1": { costInMillicentsPerSecond: 10, // = 600 / 60, }, }; export const calculateOpenAITranscriptionCostInMillicents = ({ model, response, }) => { if (model !== "whisper-1") { return null; } const durationInSeconds = response.duration; return (Math.ceil(durationInSeconds) * OPENAI_TRANSCRIPTION_MODELS[model].costInMillicentsPerSecond); }; /** * Create a transcription model that calls the OpenAI transcription API. * * @see https://platform.openai.com/docs/api-reference/audio/create * * @example * const data = await fs.promises.readFile("data/test.mp3"); * * const { transcription } = await transcribe( * new OpenAITranscriptionModel({ model: "whisper-1" }), * { * type: "mp3", * data, * } * ); */ export class OpenAITranscriptionModel extends AbstractModel { constructor(settings) { super({ settings }); Object.defineProperty(this, "provider", { enumerable: true, configurable: true, writable: true, value: "openai" }); } get modelName() { return this.settings.model; } generateTranscriptionResponse(data, options) { return this.callAPI(data, { responseFormat: OpenAITranscriptionResponseFormat.verboseJson, functionId: options?.functionId, settings: options?.settings, run: options?.run, }); } extractTranscriptionText(response) { return response.text; } get apiKey() { const apiKey = this.settings.apiKey ?? process.env.OPENAI_API_KEY; if (apiKey == null) { throw new Error(`OpenAI API key is missing. Pass it as an argument to the constructor or set it as an environment variable named OPENAI_API_KEY.`); } return apiKey; } async callAPI(data, options) { const run = options?.run; const settings = options?.settings; const responseFormat = options?.responseFormat; const callSettings = Object.assign({ apiKey: this.apiKey, }, this.settings, settings, { abortSignal: run?.abortSignal, file: { name: `audio.${data.type}`, data: data.data, }, responseFormat, }); return callWithRetryAndThrottle({ retry: this.settings.retry, throttle: this.settings.throttle, call: async () => callOpenAITranscriptionAPI(callSettings), }); } withSettings(additionalSettings) { return new OpenAITranscriptionModel(Object.assign({}, this.settings, additionalSettings)); } } /** * Call the OpenAI Transcription API to generate a transcription from an audio file. * * @see https://platform.openai.com/docs/api-reference/audio/create * * @example * const transcriptionResponse = await callOpenAITranscriptionAPI({ * apiKey: openAiApiKey, * model: "whisper-1", * file: { * name: "audio.mp3", * data: fileData, // Buffer * }, * responseFormat: callOpenAITranscriptionAPI.responseFormat.json, * }); */ async function callOpenAITranscriptionAPI({ baseUrl = "https://api.openai.com/v1", abortSignal, apiKey, model, file, prompt, responseFormat, temperature, language, }) { const formData = new FormData(); formData.append("file", new Blob([file.data]), file.name); formData.append("model", model); if (prompt) { formData.append("prompt", prompt); } if (responseFormat) { formData.append("response_format", responseFormat.type); } if (temperature) { formData.append("temperature", temperature.toString()); } if (language) { formData.append("language", language); } return postToApi({ url: `${baseUrl}/audio/transcriptions`, apiKey, contentType: null, body: { content: formData, values: { model, prompt, response_format: responseFormat, temperature, language, }, }, failedResponseHandler: failedOpenAICallResponseHandler, successfulResponseHandler: responseFormat.handler, abortSignal, }); } const openAITranscriptionJsonSchema = z.object({ text: z.string(), }); const openAITranscriptionVerboseJsonSchema = z.object({ task: z.literal("transcribe"), language: z.string(), duration: z.number(), segments: z.array(z.object({ id: z.number(), seek: z.number(), start: z.number(), end: z.number(), text: z.string(), tokens: z.array(z.number()), temperature: z.number(), avg_logprob: z.number(), compression_ratio: z.number(), no_speech_prob: z.number(), transient: z.boolean().optional(), })), text: z.string(), }); export const OpenAITranscriptionResponseFormat = { json: { type: "json", handler: createJsonResponseHandler(openAITranscriptionJsonSchema), }, verboseJson: { type: "verbose_json", handler: createJsonResponseHandler(openAITranscriptionVerboseJsonSchema), }, text: { type: "text", handler: createTextResponseHandler(), }, srt: { type: "srt", handler: createTextResponseHandler(), }, vtt: { type: "vtt", handler: createTextResponseHandler(), }, };