ai-utils.js
Version:
Build AI applications, chatbots, and agents with JavaScript and TypeScript.
197 lines (196 loc) • 6.54 kB
TypeScript
/// <reference types="node" resolution-mode="require"/>
import z from "zod";
import { AbstractModel } from "../../model-function/AbstractModel.js";
import { FunctionOptions } from "../../model-function/FunctionOptions.js";
import { TranscriptionModel, TranscriptionModelSettings } from "../../model-function/transcribe-audio/TranscriptionModel.js";
import { RetryFunction } from "../../util/api/RetryFunction.js";
import { ThrottleFunction } from "../../util/api/ThrottleFunction.js";
import { ResponseHandler } from "../../util/api/postToApi.js";
import { OpenAIModelSettings } from "./OpenAIModelSettings.js";
/**
* @see https://openai.com/pricing
*/
export declare const OPENAI_TRANSCRIPTION_MODELS: {
"whisper-1": {
costInMillicentsPerSecond: number;
};
};
export type OpenAITranscriptionModelType = keyof typeof OPENAI_TRANSCRIPTION_MODELS;
export declare const calculateOpenAITranscriptionCostInMillicents: ({ model, response, }: {
model: OpenAITranscriptionModelType;
response: OpenAITranscriptionVerboseJsonResponse;
}) => number | null;
export interface OpenAITranscriptionModelSettings extends TranscriptionModelSettings {
model: OpenAITranscriptionModelType;
baseUrl?: string;
apiKey?: string;
retry?: RetryFunction;
throttle?: ThrottleFunction;
}
export type OpenAITranscriptionInput = {
type: "mp3" | "mp4" | "mpeg" | "mpga" | "m3a" | "wav" | "webm";
data: Buffer;
};
/**
* Create a transcription model that calls the OpenAI transcription API.
*
* @see https://platform.openai.com/docs/api-reference/audio/create
*
* @example
* const data = await fs.promises.readFile("data/test.mp3");
*
* const { transcription } = await transcribe(
* new OpenAITranscriptionModel({ model: "whisper-1" }),
* {
* type: "mp3",
* data,
* }
* );
*/
export declare class OpenAITranscriptionModel extends AbstractModel<OpenAITranscriptionModelSettings> implements TranscriptionModel<OpenAITranscriptionInput, OpenAITranscriptionVerboseJsonResponse, OpenAITranscriptionModelSettings> {
constructor(settings: OpenAITranscriptionModelSettings);
readonly provider: "openai";
get modelName(): "whisper-1";
generateTranscriptionResponse(data: OpenAITranscriptionInput, options?: FunctionOptions<Partial<OpenAITranscriptionModelSettings & OpenAIModelSettings>>): PromiseLike<OpenAITranscriptionVerboseJsonResponse>;
extractTranscriptionText(response: OpenAITranscriptionVerboseJsonResponse): string;
private get apiKey();
callAPI<RESULT>(data: OpenAITranscriptionInput, options: {
responseFormat: OpenAITranscriptionResponseFormatType<RESULT>;
} & FunctionOptions<Partial<OpenAITranscriptionModelSettings & OpenAIModelSettings>>): Promise<RESULT>;
withSettings(additionalSettings: OpenAITranscriptionModelSettings): this;
}
declare const openAITranscriptionJsonSchema: z.ZodObject<{
text: z.ZodString;
}, "strip", z.ZodTypeAny, {
text: string;
}, {
text: string;
}>;
export type OpenAITranscriptionJsonResponse = z.infer<typeof openAITranscriptionJsonSchema>;
declare const openAITranscriptionVerboseJsonSchema: z.ZodObject<{
task: z.ZodLiteral<"transcribe">;
language: z.ZodString;
duration: z.ZodNumber;
segments: z.ZodArray<z.ZodObject<{
id: z.ZodNumber;
seek: z.ZodNumber;
start: z.ZodNumber;
end: z.ZodNumber;
text: z.ZodString;
tokens: z.ZodArray<z.ZodNumber, "many">;
temperature: z.ZodNumber;
avg_logprob: z.ZodNumber;
compression_ratio: z.ZodNumber;
no_speech_prob: z.ZodNumber;
transient: z.ZodOptional<z.ZodBoolean>;
}, "strip", z.ZodTypeAny, {
text: string;
temperature: number;
id: number;
tokens: number[];
seek: number;
start: number;
end: number;
avg_logprob: number;
compression_ratio: number;
no_speech_prob: number;
transient?: boolean | undefined;
}, {
text: string;
temperature: number;
id: number;
tokens: number[];
seek: number;
start: number;
end: number;
avg_logprob: number;
compression_ratio: number;
no_speech_prob: number;
transient?: boolean | undefined;
}>, "many">;
text: z.ZodString;
}, "strip", z.ZodTypeAny, {
text: string;
segments: {
text: string;
temperature: number;
id: number;
tokens: number[];
seek: number;
start: number;
end: number;
avg_logprob: number;
compression_ratio: number;
no_speech_prob: number;
transient?: boolean | undefined;
}[];
task: "transcribe";
language: string;
duration: number;
}, {
text: string;
segments: {
text: string;
temperature: number;
id: number;
tokens: number[];
seek: number;
start: number;
end: number;
avg_logprob: number;
compression_ratio: number;
no_speech_prob: number;
transient?: boolean | undefined;
}[];
task: "transcribe";
language: string;
duration: number;
}>;
export type OpenAITranscriptionVerboseJsonResponse = z.infer<typeof openAITranscriptionVerboseJsonSchema>;
export type OpenAITranscriptionResponseFormatType<T> = {
type: "json" | "text" | "srt" | "verbose_json" | "vtt";
handler: ResponseHandler<T>;
};
export declare const OpenAITranscriptionResponseFormat: {
json: {
type: "json";
handler: ResponseHandler<{
text: string;
}>;
};
verboseJson: {
type: "verbose_json";
handler: ResponseHandler<{
text: string;
segments: {
text: string;
temperature: number;
id: number;
tokens: number[];
seek: number;
start: number;
end: number;
avg_logprob: number;
compression_ratio: number;
no_speech_prob: number;
transient?: boolean | undefined;
}[];
task: "transcribe";
language: string;
duration: number;
}>;
};
text: {
type: "text";
handler: ResponseHandler<string>;
};
srt: {
type: "srt";
handler: ResponseHandler<string>;
};
vtt: {
type: "vtt";
handler: ResponseHandler<string>;
};
};
export {};