inference-server
Version:
Libraries and server to build AI applications. Adapters to various native bindings allowing local inference. Integrate it with your application, or use as a microservice.
73 lines (66 loc) • 1.75 kB
text/typescript
import {
ModelEngine,
SpeechToTextTaskArgs,
ToolDefinition,
} from '#package/types/index.js'
import { CustomEngine } from '#package/engines/index.js'
type EngineArgs = {
speechToTextModel: string
chatModel: string
tools: Record<string, ToolDefinition>
}
// an experimental engine that forwards a transcription to a (function calling) chat model
export class VoiceFunctionCallEngine
extends CustomEngine
implements ModelEngine
{
speechToTextModel: string
chatModel: string
tools: Record<string, ToolDefinition>
constructor({ speechToTextModel, chatModel, tools }: EngineArgs) {
super()
this.speechToTextModel = speechToTextModel
this.chatModel = chatModel
this.tools = tools
}
async createTranscription(task: SpeechToTextTaskArgs) {
const speechToTextModel = await this.pool.requestInstance({
model: this.speechToTextModel,
})
const transcriptionTask = speechToTextModel.instance.processSpeechToTextTask(
{
...task,
model: this.speechToTextModel,
},
)
const transcription = await transcriptionTask.result
speechToTextModel.release()
return transcription.text
}
async processSpeechToTextTask(
task: SpeechToTextTaskArgs,
) {
const [transcription, chatModel] = await Promise.all([
this.createTranscription(task),
this.pool.requestInstance({
model: this.chatModel,
}),
])
const chatTask = chatModel.instance.processChatCompletionTask({
onChunk: task.onChunk,
model: this.chatModel,
tools: this.tools ? { definitions: this.tools } : undefined,
messages: [
{
role: 'user',
content: transcription,
},
],
})
const chatResponse = await chatTask.result
chatModel.release()
return {
text: chatResponse.message.content,
}
}
}