UNPKG

@rocketnew/llm-sdk

Version:

Universal LLM SDK for JavaScript/TypeScript - OpenAI, Anthropic, Gemini, Perplexity and more

193 lines (190 loc) 6.94 kB
import { BinaryResponseContent, init_responses } from "./chunk-W4XUZXUW.mjs"; import { OPENAI_CHAT_COMPLETION_PARAMS, __esm, init_constants } from "./chunk-DDRQKMV4.mjs"; // src/endpoints/speech/speech_to_completion_bridge/transformation.ts var SpeechToCompletionBridgeTransformationHandler; var init_transformation = __esm({ "src/endpoints/speech/speech_to_completion_bridge/transformation.ts"() { "use strict"; init_constants(); init_responses(); SpeechToCompletionBridgeTransformationHandler = class { transformRequest(model, input, voice, optionalParams, rocketllmParams, headers, customLLMProvider) { const passedOptionalParams = {}; for (const op of Object.keys(optionalParams)) { if (OPENAI_CHAT_COMPLETION_PARAMS.includes(op)) { passedOptionalParams[op] = optionalParams[op]; } } if (voice != null) { if (typeof voice === "string") { passedOptionalParams["audio"] = { voice }; if ("response_format" in optionalParams) { passedOptionalParams["audio"]["format"] = optionalParams["response_format"]; } } } const returnKwargs = { model, messages: [ { role: "user", content: input } ], modalities: ["audio"], ...passedOptionalParams, ...rocketllmParams, headers, custom_llm_provider: customLLMProvider }; const filtered = {}; for (const [k, v] of Object.entries(returnKwargs)) { if (v != null) { filtered[k] = v; } } return filtered; } _isGeminiTTSModel(model) { const lower = model.toLowerCase(); return lower.includes("gemini") && (lower.includes("tts") || lower.includes("preview-tts")); } _convertPcm16ToWav(pcmData, sampleRate = 24e3, channels = 1) { const byteRate = sampleRate * channels * 2; const blockAlign = channels * 2; const dataSize = pcmData.length; const fileSize = 36 + dataSize; const wavHeader = Buffer.alloc(44); wavHeader.write("RIFF", 0, 4, "ascii"); wavHeader.writeUInt32LE(fileSize, 4); wavHeader.write("WAVE", 8, 4, "ascii"); wavHeader.write("fmt ", 12, 4, "ascii"); wavHeader.writeUInt32LE(16, 16); wavHeader.writeUInt16LE(1, 20); wavHeader.writeUInt16LE(channels, 22); wavHeader.writeUInt32LE(sampleRate, 24); wavHeader.writeUInt32LE(byteRate, 28); wavHeader.writeUInt16LE(blockAlign, 32); wavHeader.writeUInt16LE(16, 34); wavHeader.write("data", 36, 4, "ascii"); wavHeader.writeUInt32LE(dataSize, 40); return Buffer.concat([wavHeader, pcmData]); } transformResponse(modelResponse) { const choices = modelResponse.choices; if (!choices || choices.length === 0) { throw new Error("No choices found in the response"); } const audioPart = choices[0].message?.audio; if (audioPart == null) { throw new Error("No audio part found in the response"); } const audioContent = audioPart.data; let binaryData = Buffer.from(audioContent, "base64"); const model = modelResponse.model || ""; const isGeminiTTS = this._isGeminiTTSModel(model); if (isGeminiTTS) { binaryData = this._convertPcm16ToWav(binaryData); } const contentType = isGeminiTTS ? "audio/wav" : "audio/mpeg"; const bodyBuffer = binaryData.buffer.slice( binaryData.byteOffset, binaryData.byteOffset + binaryData.byteLength ); const synthesizedResponse = new Response(bodyBuffer, { status: 200, headers: { "content-type": contentType } }); return new BinaryResponseContent(synthesizedResponse); } }; } }); // src/endpoints/speech/speech_to_completion_bridge/handler.ts var SpeechToCompletionBridgeHandler, speechToCompletionBridgeHandler; var init_handler = __esm({ "src/endpoints/speech/speech_to_completion_bridge/handler.ts"() { init_transformation(); SpeechToCompletionBridgeHandler = class { constructor() { this.transformationHandler = new SpeechToCompletionBridgeTransformationHandler(); } validateInputKwargs(kwargs) { const model = kwargs.model; if (model == null || typeof model !== "string") { throw new Error("model is required"); } const customLLMProvider = kwargs.customLLMProvider; if (customLLMProvider == null || typeof customLLMProvider !== "string") { throw new Error("customLLMProvider is required"); } const input = kwargs.input; if (input == null || typeof input !== "string") { throw new Error("input is required"); } const optionalParams = kwargs.optionalParams; if (optionalParams == null || typeof optionalParams !== "object") { throw new Error("optionalParams is required"); } const rocketllmParams = kwargs.rocketllmParams; if (rocketllmParams == null || typeof rocketllmParams !== "object") { throw new Error("rocketllmParams is required"); } const headers = kwargs.headers; if (headers == null || typeof headers !== "object") { throw new Error("headers is required"); } return { model, input, voice: kwargs.voice, optionalParams, rocketllmParams, headers, customLLMProvider }; } async speech(model, input, voice, optionalParams, rocketllmParams, headers, customLLMProvider) { const validated = this.validateInputKwargs({ model, input, voice, optionalParams, rocketllmParams, headers, customLLMProvider }); const requestData = this.transformationHandler.transformRequest( validated.model, validated.input, validated.voice, validated.optionalParams, validated.rocketllmParams, validated.headers, validated.customLLMProvider ); const { completion } = await import("./main-V25EAFRC.mjs"); const result = await completion({ ...requestData }); if (result && typeof result === "object" && "choices" in result) { return this.transformationHandler.transformResponse(result); } throw new Error(`Unmapped response type. Got: ${typeof result}`); } }; speechToCompletionBridgeHandler = new SpeechToCompletionBridgeHandler(); } }); init_handler(); export { SpeechToCompletionBridgeHandler, speechToCompletionBridgeHandler };