UNPKG

ai-utils.js

Version:

Build AI applications, chatbots, and agents with JavaScript and TypeScript.

lgrammel/ai-utils.js

196 lines (195 loc) • 6.92 kB

JavaScript

"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.OpenAITranscriptionResponseFormat = exports.OpenAITranscriptionModel = exports.calculateOpenAITranscriptionCostInMillicents = exports.OPENAI_TRANSCRIPTION_MODELS = void 0; const zod_1 = __importDefault(require("zod")); const AbstractModel_js_1 = require("../../model-function/AbstractModel.cjs"); const callWithRetryAndThrottle_js_1 = require("../../util/api/callWithRetryAndThrottle.cjs"); const postToApi_js_1 = require("../../util/api/postToApi.cjs"); const OpenAIError_js_1 = require("./OpenAIError.cjs"); /** * @see https://openai.com/pricing */ exports.OPENAI_TRANSCRIPTION_MODELS = { "whisper-1": { costInMillicentsPerSecond: 10, // = 600 / 60, }, }; const calculateOpenAITranscriptionCostInMillicents = ({ model, response, }) => { if (model !== "whisper-1") { return null; } const durationInSeconds = response.duration; return (Math.ceil(durationInSeconds) * exports.OPENAI_TRANSCRIPTION_MODELS[model].costInMillicentsPerSecond); }; exports.calculateOpenAITranscriptionCostInMillicents = calculateOpenAITranscriptionCostInMillicents; /** * Create a transcription model that calls the OpenAI transcription API. * * @see https://platform.openai.com/docs/api-reference/audio/create * * @example * const data = await fs.promises.readFile("data/test.mp3"); * * const { transcription } = await transcribe( * new OpenAITranscriptionModel({ model: "whisper-1" }), * { * type: "mp3", * data, * } * ); */ class OpenAITranscriptionModel extends AbstractModel_js_1.AbstractModel { constructor(settings) { super({ settings }); Object.defineProperty(this, "provider", { enumerable: true, configurable: true, writable: true, value: "openai" }); } get modelName() { return this.settings.model; } generateTranscriptionResponse(data, options) { return this.callAPI(data, { responseFormat: exports.OpenAITranscriptionResponseFormat.verboseJson, functionId: options?.functionId, settings: options?.settings, run: options?.run, }); } extractTranscriptionText(response) { return response.text; } get apiKey() { const apiKey = this.settings.apiKey ?? process.env.OPENAI_API_KEY; if (apiKey == null) { throw new Error(`OpenAI API key is missing. Pass it as an argument to the constructor or set it as an environment variable named OPENAI_API_KEY.`); } return apiKey; } async callAPI(data, options) { const run = options?.run; const settings = options?.settings; const responseFormat = options?.responseFormat; const callSettings = Object.assign({ apiKey: this.apiKey, }, this.settings, settings, { abortSignal: run?.abortSignal, file: { name: `audio.${data.type}`, data: data.data, }, responseFormat, }); return (0, callWithRetryAndThrottle_js_1.callWithRetryAndThrottle)({ retry: this.settings.retry, throttle: this.settings.throttle, call: async () => callOpenAITranscriptionAPI(callSettings), }); } withSettings(additionalSettings) { return new OpenAITranscriptionModel(Object.assign({}, this.settings, additionalSettings)); } } exports.OpenAITranscriptionModel = OpenAITranscriptionModel; /** * Call the OpenAI Transcription API to generate a transcription from an audio file. * * @see https://platform.openai.com/docs/api-reference/audio/create * * @example * const transcriptionResponse = await callOpenAITranscriptionAPI({ * apiKey: openAiApiKey, * model: "whisper-1", * file: { * name: "audio.mp3", * data: fileData, // Buffer * }, * responseFormat: callOpenAITranscriptionAPI.responseFormat.json, * }); */ async function callOpenAITranscriptionAPI({ baseUrl = "https://api.openai.com/v1", abortSignal, apiKey, model, file, prompt, responseFormat, temperature, language, }) { const formData = new FormData(); formData.append("file", new Blob([file.data]), file.name); formData.append("model", model); if (prompt) { formData.append("prompt", prompt); } if (responseFormat) { formData.append("response_format", responseFormat.type); } if (temperature) { formData.append("temperature", temperature.toString()); } if (language) { formData.append("language", language); } return (0, postToApi_js_1.postToApi)({ url: `${baseUrl}/audio/transcriptions`, apiKey, contentType: null, body: { content: formData, values: { model, prompt, response_format: responseFormat, temperature, language, }, }, failedResponseHandler: OpenAIError_js_1.failedOpenAICallResponseHandler, successfulResponseHandler: responseFormat.handler, abortSignal, }); } const openAITranscriptionJsonSchema = zod_1.default.object({ text: zod_1.default.string(), }); const openAITranscriptionVerboseJsonSchema = zod_1.default.object({ task: zod_1.default.literal("transcribe"), language: zod_1.default.string(), duration: zod_1.default.number(), segments: zod_1.default.array(zod_1.default.object({ id: zod_1.default.number(), seek: zod_1.default.number(), start: zod_1.default.number(), end: zod_1.default.number(), text: zod_1.default.string(), tokens: zod_1.default.array(zod_1.default.number()), temperature: zod_1.default.number(), avg_logprob: zod_1.default.number(), compression_ratio: zod_1.default.number(), no_speech_prob: zod_1.default.number(), transient: zod_1.default.boolean().optional(), })), text: zod_1.default.string(), }); exports.OpenAITranscriptionResponseFormat = { json: { type: "json", handler: (0, postToApi_js_1.createJsonResponseHandler)(openAITranscriptionJsonSchema), }, verboseJson: { type: "verbose_json", handler: (0, postToApi_js_1.createJsonResponseHandler)(openAITranscriptionVerboseJsonSchema), }, text: { type: "text", handler: (0, postToApi_js_1.createTextResponseHandler)(), }, srt: { type: "srt", handler: (0, postToApi_js_1.createTextResponseHandler)(), }, vtt: { type: "vtt", handler: (0, postToApi_js_1.createTextResponseHandler)(), }, };