@genkit-ai/compat-oai
Version:
Genkit AI framework plugin for OpenAI APIs.
285 lines • 8.6 kB
JavaScript
;
var __defProp = Object.defineProperty;
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
var __getOwnPropNames = Object.getOwnPropertyNames;
var __hasOwnProp = Object.prototype.hasOwnProperty;
var __export = (target, all) => {
for (var name in all)
__defProp(target, name, { get: all[name], enumerable: true });
};
var __copyProps = (to, from, except, desc) => {
if (from && typeof from === "object" || typeof from === "function") {
for (let key of __getOwnPropNames(from))
if (!__hasOwnProp.call(to, key) && key !== except)
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
}
return to;
};
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
var audio_exports = {};
__export(audio_exports, {
RESPONSE_FORMAT_MEDIA_TYPES: () => RESPONSE_FORMAT_MEDIA_TYPES,
SPEECH_MODEL_INFO: () => SPEECH_MODEL_INFO,
SpeechConfigSchema: () => SpeechConfigSchema,
TRANSCRIPTION_MODEL_INFO: () => TRANSCRIPTION_MODEL_INFO,
TranscriptionConfigSchema: () => TranscriptionConfigSchema,
compatOaiSpeechModelRef: () => compatOaiSpeechModelRef,
compatOaiTranscriptionModelRef: () => compatOaiTranscriptionModelRef,
defineCompatOpenAISpeechModel: () => defineCompatOpenAISpeechModel,
defineCompatOpenAITranscriptionModel: () => defineCompatOpenAITranscriptionModel
});
module.exports = __toCommonJS(audio_exports);
var import_genkit = require("genkit");
const TRANSCRIPTION_MODEL_INFO = {
supports: {
media: true,
output: ["text", "json"],
multiturn: false,
systemRole: false,
tools: false
}
};
const SPEECH_MODEL_INFO = {
supports: {
media: false,
output: ["media"],
multiturn: false,
systemRole: false,
tools: false
}
};
const ChunkingStrategySchema = import_genkit.z.object({
type: import_genkit.z.string(),
prefix_padding_ms: import_genkit.z.number().int().optional(),
silence_duration_ms: import_genkit.z.number().int().optional(),
threshold: import_genkit.z.number().min(0).max(1).optional()
});
const TranscriptionConfigSchema = import_genkit.GenerationCommonConfigSchema.pick({
temperature: true
}).extend({
chunking_strategy: import_genkit.z.union([import_genkit.z.literal("auto"), ChunkingStrategySchema]).optional(),
include: import_genkit.z.array(import_genkit.z.any()).optional(),
language: import_genkit.z.string().optional(),
timestamp_granularities: import_genkit.z.array(import_genkit.z.enum(["word", "segment"])).optional(),
response_format: import_genkit.z.enum(["json", "text", "srt", "verbose_json", "vtt"]).optional()
// TODO stream support
});
const SpeechConfigSchema = import_genkit.z.object({
voice: import_genkit.z.enum(["alloy", "echo", "fable", "onyx", "nova", "shimmer"]).default("alloy"),
speed: import_genkit.z.number().min(0.25).max(4).optional(),
response_format: import_genkit.z.enum(["mp3", "opus", "aac", "flac", "wav", "pcm"]).optional()
});
const RESPONSE_FORMAT_MEDIA_TYPES = {
mp3: "audio/mpeg",
opus: "audio/opus",
aac: "audio/aac",
flac: "audio/flac",
wav: "audio/wav",
pcm: "audio/L16"
};
function toTTSRequest(modelName, request, requestBuilder) {
const {
voice,
version: modelVersion,
temperature,
maxOutputTokens,
stopSequences,
topK,
topP,
...restOfConfig
} = request.config ?? {};
let options = {
model: modelVersion ?? modelName,
input: new import_genkit.Message(request.messages[0]).text,
voice: voice ?? "alloy"
};
if (requestBuilder) {
requestBuilder(request, options);
} else {
options = {
...options,
...restOfConfig
// passthorugh rest of the config
};
}
for (const k in options) {
if (options[k] === void 0) {
delete options[k];
}
}
return options;
}
async function toGenerateResponse(response, responseFormat = "mp3") {
const resultArrayBuffer = await response.arrayBuffer();
const resultBuffer = Buffer.from(new Uint8Array(resultArrayBuffer));
const mediaType = RESPONSE_FORMAT_MEDIA_TYPES[responseFormat];
return {
message: {
role: "model",
content: [
{
media: {
contentType: mediaType,
url: `data:${mediaType};base64,${resultBuffer.toString("base64")}`
}
}
]
},
finishReason: "stop",
raw: response
};
}
function defineCompatOpenAISpeechModel(params) {
const { ai, name, client, modelRef: modelRef2, requestBuilder } = params;
const modelName = name.substring(name.indexOf("/") + 1);
return ai.defineModel(
{
name,
apiVersion: "v2",
...modelRef2?.info,
configSchema: modelRef2?.configSchema
},
async (request, { abortSignal }) => {
const ttsRequest = toTTSRequest(modelName, request, requestBuilder);
const result = await client.audio.speech.create(ttsRequest, {
signal: abortSignal
});
return await toGenerateResponse(result, ttsRequest.response_format);
}
);
}
function compatOaiSpeechModelRef(params) {
const {
name,
info = SPEECH_MODEL_INFO,
configSchema,
config = void 0
} = params;
return (0, import_genkit.modelRef)({
name,
configSchema: configSchema || SpeechConfigSchema,
info,
config
});
}
function toSttRequest(modelName, request, requestBuilder) {
const message = new import_genkit.Message(request.messages[0]);
const media = message.media;
if (!media?.url) {
throw new Error("No media found in the request");
}
const mediaBuffer = Buffer.from(
media.url.slice(media.url.indexOf(",") + 1),
"base64"
);
const mediaFile = new File([mediaBuffer], "input", {
type: media.contentType ?? media.url.slice("data:".length, media.url.indexOf(";"))
});
const {
temperature,
version: modelVersion,
maxOutputTokens,
stopSequences,
topK,
topP,
...restOfConfig
} = request.config ?? {};
let options = {
model: modelVersion ?? modelName,
file: mediaFile,
prompt: message.text,
temperature
};
if (requestBuilder) {
requestBuilder(request, options);
} else {
options = {
...options,
...restOfConfig
// passthrough rest of the config
};
}
const outputFormat = request.output?.format;
const customFormat = request.config?.response_format;
if (outputFormat && customFormat) {
if (outputFormat === "json" && customFormat !== "json" && customFormat !== "verbose_json") {
throw new Error(
`Custom response format ${customFormat} is not compatible with output format ${outputFormat}`
);
}
}
if (outputFormat === "media") {
throw new Error(`Output format ${outputFormat} is not supported.`);
}
options.response_format = customFormat || outputFormat || "text";
for (const k in options) {
if (options[k] === void 0) {
delete options[k];
}
}
return options;
}
function transcriptionToGenerateResponse(result) {
return {
message: {
role: "model",
content: [
{
text: typeof result === "string" ? result : result.text
}
]
},
finishReason: "stop",
raw: result
};
}
function defineCompatOpenAITranscriptionModel(params) {
const { ai, name, client, modelRef: modelRef2, requestBuilder } = params;
return ai.defineModel(
{
name,
apiVersion: "v2",
...modelRef2?.info,
configSchema: modelRef2?.configSchema
},
async (request, { abortSignal }) => {
const modelName = name.substring(name.indexOf("/") + 1);
const params2 = toSttRequest(modelName, request, requestBuilder);
const result = await client.audio.transcriptions.create(
{
...params2,
stream: false
},
{ signal: abortSignal }
);
return transcriptionToGenerateResponse(result);
}
);
}
function compatOaiTranscriptionModelRef(params) {
const {
name,
info = TRANSCRIPTION_MODEL_INFO,
configSchema,
config = void 0
} = params;
return (0, import_genkit.modelRef)({
name,
configSchema: configSchema || TranscriptionConfigSchema,
info,
config
});
}
// Annotate the CommonJS export names for ESM import in node:
0 && (module.exports = {
RESPONSE_FORMAT_MEDIA_TYPES,
SPEECH_MODEL_INFO,
SpeechConfigSchema,
TRANSCRIPTION_MODEL_INFO,
TranscriptionConfigSchema,
compatOaiSpeechModelRef,
compatOaiTranscriptionModelRef,
defineCompatOpenAISpeechModel,
defineCompatOpenAITranscriptionModel
});
//# sourceMappingURL=audio.js.map