ai-utils.js
Version:
Build AI applications, chatbots, and agents with JavaScript and TypeScript.
188 lines (187 loc) • 5.95 kB
JavaScript
import z from "zod";
import { AbstractModel } from "../../model-function/AbstractModel.js";
import { callWithRetryAndThrottle } from "../../util/api/callWithRetryAndThrottle.js";
import { createJsonResponseHandler, createTextResponseHandler, postToApi, } from "../../util/api/postToApi.js";
import { failedOpenAICallResponseHandler } from "./OpenAIError.js";
/**
* @see https://openai.com/pricing
*/
export const OPENAI_TRANSCRIPTION_MODELS = {
"whisper-1": {
costInMillicentsPerSecond: 10, // = 600 / 60,
},
};
export const calculateOpenAITranscriptionCostInMillicents = ({ model, response, }) => {
if (model !== "whisper-1") {
return null;
}
const durationInSeconds = response.duration;
return (Math.ceil(durationInSeconds) *
OPENAI_TRANSCRIPTION_MODELS[model].costInMillicentsPerSecond);
};
/**
* Create a transcription model that calls the OpenAI transcription API.
*
* @see https://platform.openai.com/docs/api-reference/audio/create
*
* @example
* const data = await fs.promises.readFile("data/test.mp3");
*
* const { transcription } = await transcribe(
* new OpenAITranscriptionModel({ model: "whisper-1" }),
* {
* type: "mp3",
* data,
* }
* );
*/
export class OpenAITranscriptionModel extends AbstractModel {
constructor(settings) {
super({ settings });
Object.defineProperty(this, "provider", {
enumerable: true,
configurable: true,
writable: true,
value: "openai"
});
}
get modelName() {
return this.settings.model;
}
generateTranscriptionResponse(data, options) {
return this.callAPI(data, {
responseFormat: OpenAITranscriptionResponseFormat.verboseJson,
functionId: options?.functionId,
settings: options?.settings,
run: options?.run,
});
}
extractTranscriptionText(response) {
return response.text;
}
get apiKey() {
const apiKey = this.settings.apiKey ?? process.env.OPENAI_API_KEY;
if (apiKey == null) {
throw new Error(`OpenAI API key is missing. Pass it as an argument to the constructor or set it as an environment variable named OPENAI_API_KEY.`);
}
return apiKey;
}
async callAPI(data, options) {
const run = options?.run;
const settings = options?.settings;
const responseFormat = options?.responseFormat;
const callSettings = Object.assign({
apiKey: this.apiKey,
}, this.settings, settings, {
abortSignal: run?.abortSignal,
file: {
name: `audio.${data.type}`,
data: data.data,
},
responseFormat,
});
return callWithRetryAndThrottle({
retry: this.settings.retry,
throttle: this.settings.throttle,
call: async () => callOpenAITranscriptionAPI(callSettings),
});
}
withSettings(additionalSettings) {
return new OpenAITranscriptionModel(Object.assign({}, this.settings, additionalSettings));
}
}
/**
* Call the OpenAI Transcription API to generate a transcription from an audio file.
*
* @see https://platform.openai.com/docs/api-reference/audio/create
*
* @example
* const transcriptionResponse = await callOpenAITranscriptionAPI({
* apiKey: openAiApiKey,
* model: "whisper-1",
* file: {
* name: "audio.mp3",
* data: fileData, // Buffer
* },
* responseFormat: callOpenAITranscriptionAPI.responseFormat.json,
* });
*/
async function callOpenAITranscriptionAPI({ baseUrl = "https://api.openai.com/v1", abortSignal, apiKey, model, file, prompt, responseFormat, temperature, language, }) {
const formData = new FormData();
formData.append("file", new Blob([file.data]), file.name);
formData.append("model", model);
if (prompt) {
formData.append("prompt", prompt);
}
if (responseFormat) {
formData.append("response_format", responseFormat.type);
}
if (temperature) {
formData.append("temperature", temperature.toString());
}
if (language) {
formData.append("language", language);
}
return postToApi({
url: `${baseUrl}/audio/transcriptions`,
apiKey,
contentType: null,
body: {
content: formData,
values: {
model,
prompt,
response_format: responseFormat,
temperature,
language,
},
},
failedResponseHandler: failedOpenAICallResponseHandler,
successfulResponseHandler: responseFormat.handler,
abortSignal,
});
}
const openAITranscriptionJsonSchema = z.object({
text: z.string(),
});
const openAITranscriptionVerboseJsonSchema = z.object({
task: z.literal("transcribe"),
language: z.string(),
duration: z.number(),
segments: z.array(z.object({
id: z.number(),
seek: z.number(),
start: z.number(),
end: z.number(),
text: z.string(),
tokens: z.array(z.number()),
temperature: z.number(),
avg_logprob: z.number(),
compression_ratio: z.number(),
no_speech_prob: z.number(),
transient: z.boolean().optional(),
})),
text: z.string(),
});
export const OpenAITranscriptionResponseFormat = {
json: {
type: "json",
handler: createJsonResponseHandler(openAITranscriptionJsonSchema),
},
verboseJson: {
type: "verbose_json",
handler: createJsonResponseHandler(openAITranscriptionVerboseJsonSchema),
},
text: {
type: "text",
handler: createTextResponseHandler(),
},
srt: {
type: "srt",
handler: createTextResponseHandler(),
},
vtt: {
type: "vtt",
handler: createTextResponseHandler(),
},
};