@rocketnew/llm-sdk
Version:
Universal LLM SDK for JavaScript/TypeScript - OpenAI, Anthropic, Gemini, Perplexity and more
193 lines (190 loc) • 6.94 kB
JavaScript
import {
BinaryResponseContent,
init_responses
} from "./chunk-W4XUZXUW.mjs";
import {
OPENAI_CHAT_COMPLETION_PARAMS,
__esm,
init_constants
} from "./chunk-DDRQKMV4.mjs";
// src/endpoints/speech/speech_to_completion_bridge/transformation.ts
var SpeechToCompletionBridgeTransformationHandler;
var init_transformation = __esm({
"src/endpoints/speech/speech_to_completion_bridge/transformation.ts"() {
"use strict";
init_constants();
init_responses();
SpeechToCompletionBridgeTransformationHandler = class {
transformRequest(model, input, voice, optionalParams, rocketllmParams, headers, customLLMProvider) {
const passedOptionalParams = {};
for (const op of Object.keys(optionalParams)) {
if (OPENAI_CHAT_COMPLETION_PARAMS.includes(op)) {
passedOptionalParams[op] = optionalParams[op];
}
}
if (voice != null) {
if (typeof voice === "string") {
passedOptionalParams["audio"] = { voice };
if ("response_format" in optionalParams) {
passedOptionalParams["audio"]["format"] = optionalParams["response_format"];
}
}
}
const returnKwargs = {
model,
messages: [
{
role: "user",
content: input
}
],
modalities: ["audio"],
...passedOptionalParams,
...rocketllmParams,
headers,
custom_llm_provider: customLLMProvider
};
const filtered = {};
for (const [k, v] of Object.entries(returnKwargs)) {
if (v != null) {
filtered[k] = v;
}
}
return filtered;
}
_isGeminiTTSModel(model) {
const lower = model.toLowerCase();
return lower.includes("gemini") && (lower.includes("tts") || lower.includes("preview-tts"));
}
_convertPcm16ToWav(pcmData, sampleRate = 24e3, channels = 1) {
const byteRate = sampleRate * channels * 2;
const blockAlign = channels * 2;
const dataSize = pcmData.length;
const fileSize = 36 + dataSize;
const wavHeader = Buffer.alloc(44);
wavHeader.write("RIFF", 0, 4, "ascii");
wavHeader.writeUInt32LE(fileSize, 4);
wavHeader.write("WAVE", 8, 4, "ascii");
wavHeader.write("fmt ", 12, 4, "ascii");
wavHeader.writeUInt32LE(16, 16);
wavHeader.writeUInt16LE(1, 20);
wavHeader.writeUInt16LE(channels, 22);
wavHeader.writeUInt32LE(sampleRate, 24);
wavHeader.writeUInt32LE(byteRate, 28);
wavHeader.writeUInt16LE(blockAlign, 32);
wavHeader.writeUInt16LE(16, 34);
wavHeader.write("data", 36, 4, "ascii");
wavHeader.writeUInt32LE(dataSize, 40);
return Buffer.concat([wavHeader, pcmData]);
}
transformResponse(modelResponse) {
const choices = modelResponse.choices;
if (!choices || choices.length === 0) {
throw new Error("No choices found in the response");
}
const audioPart = choices[0].message?.audio;
if (audioPart == null) {
throw new Error("No audio part found in the response");
}
const audioContent = audioPart.data;
let binaryData = Buffer.from(audioContent, "base64");
const model = modelResponse.model || "";
const isGeminiTTS = this._isGeminiTTSModel(model);
if (isGeminiTTS) {
binaryData = this._convertPcm16ToWav(binaryData);
}
const contentType = isGeminiTTS ? "audio/wav" : "audio/mpeg";
const bodyBuffer = binaryData.buffer.slice(
binaryData.byteOffset,
binaryData.byteOffset + binaryData.byteLength
);
const synthesizedResponse = new Response(bodyBuffer, {
status: 200,
headers: { "content-type": contentType }
});
return new BinaryResponseContent(synthesizedResponse);
}
};
}
});
// src/endpoints/speech/speech_to_completion_bridge/handler.ts
var SpeechToCompletionBridgeHandler, speechToCompletionBridgeHandler;
var init_handler = __esm({
"src/endpoints/speech/speech_to_completion_bridge/handler.ts"() {
init_transformation();
SpeechToCompletionBridgeHandler = class {
constructor() {
this.transformationHandler = new SpeechToCompletionBridgeTransformationHandler();
}
validateInputKwargs(kwargs) {
const model = kwargs.model;
if (model == null || typeof model !== "string") {
throw new Error("model is required");
}
const customLLMProvider = kwargs.customLLMProvider;
if (customLLMProvider == null || typeof customLLMProvider !== "string") {
throw new Error("customLLMProvider is required");
}
const input = kwargs.input;
if (input == null || typeof input !== "string") {
throw new Error("input is required");
}
const optionalParams = kwargs.optionalParams;
if (optionalParams == null || typeof optionalParams !== "object") {
throw new Error("optionalParams is required");
}
const rocketllmParams = kwargs.rocketllmParams;
if (rocketllmParams == null || typeof rocketllmParams !== "object") {
throw new Error("rocketllmParams is required");
}
const headers = kwargs.headers;
if (headers == null || typeof headers !== "object") {
throw new Error("headers is required");
}
return {
model,
input,
voice: kwargs.voice,
optionalParams,
rocketllmParams,
headers,
customLLMProvider
};
}
async speech(model, input, voice, optionalParams, rocketllmParams, headers, customLLMProvider) {
const validated = this.validateInputKwargs({
model,
input,
voice,
optionalParams,
rocketllmParams,
headers,
customLLMProvider
});
const requestData = this.transformationHandler.transformRequest(
validated.model,
validated.input,
validated.voice,
validated.optionalParams,
validated.rocketllmParams,
validated.headers,
validated.customLLMProvider
);
const { completion } = await import("./main-V25EAFRC.mjs");
const result = await completion({
...requestData
});
if (result && typeof result === "object" && "choices" in result) {
return this.transformationHandler.transformResponse(result);
}
throw new Error(`Unmapped response type. Got: ${typeof result}`);
}
};
speechToCompletionBridgeHandler = new SpeechToCompletionBridgeHandler();
}
});
init_handler();
export {
SpeechToCompletionBridgeHandler,
speechToCompletionBridgeHandler
};