UNPKG

life

Version:

Life.js is the first fullstack framework to build agentic web applications. It is minimal, extensible, and typesafe. Well, everything you love.

1 lines 207 kB
{"version":3,"sources":["../agent/server/config.ts","../models/eou/providers/livekit/index.ts","../models/eou/base.ts","../models/eou/providers/turnsense/index.ts","../models/eou/index.ts","../models/llm/providers/mistral.ts","../models/llm/base.ts","../models/llm/providers/openai.ts","../models/llm/providers/xai.ts","../models/llm/index.ts","../models/stt/providers/deepgram.ts","../models/stt/base.ts","../models/stt/index.ts","../models/tts/providers/cartesia.ts","../shared/audio-chunk-to-ms.ts","../shared/weighted-average.ts","../models/tts/lib/speech-duration-tokenizer.ts","../models/tts/lib/hyphenator.ts","../models/tts/lib/punctuation.ts","../models/tts/lib/speech-tokenizer.ts","../shared/markdown/tree.ts","../shared/markdown/mdast-extensions/inline-action.ts","../shared/markdown/mdast-extensions/interrupted.ts","../shared/markdown/mdast-extensions/key.ts","../shared/markdown/repair.ts","../models/tts/lib/latex-to-speech.ts","../models/tts/base.ts","../models/tts/index.ts","../models/vad/providers/silero/index.ts","../models/vad/base.ts","../models/vad/index.ts","../shared/deep-merge.ts","../transport/providers/livekit/node.ts","../transport/config/node.ts"],"sourcesContent":["import { z } from \"zod\";\nimport { eouProviderConfig } from \"@/models/eou\";\nimport { llmProviderConfig } from \"@/models/llm\";\nimport { sttProviderConfig } from \"@/models/stt\";\nimport { ttsProviderConfig } from \"@/models/tts\";\nimport { vadProviderConfig } from \"@/models/vad\";\nimport { deepClone } from \"@/shared/deep-clone\";\nimport { deepMerge } from \"@/shared/deep-merge\";\nimport * as op from \"@/shared/operation\";\nimport { zodObjectWithTelemetry } from \"@/telemetry/helpers/zod\";\nimport type { TelemetryConsumer } from \"@/telemetry/types\";\nimport { transportNodeConfig } from \"@/transport/config/node\";\nimport { agentClientConfig } from \"../client/config\";\n\nexport const agentServerConfig = zodObjectWithTelemetry({\n schema: z.object({\n transport: transportNodeConfig.schema.prefault({ provider: \"livekit\" }),\n models: z\n .object({\n vad: vadProviderConfig.schema.prefault({ provider: \"silero\" }),\n stt: sttProviderConfig.schema.prefault({ provider: \"deepgram\" }),\n eou: eouProviderConfig.schema.prefault({ provider: \"livekit\" }),\n llm: llmProviderConfig.schema.prefault({ provider: \"openai\" }),\n tts: ttsProviderConfig.schema.prefault({ provider: \"cartesia\" }),\n })\n .prefault({}),\n telemetry: z\n .object({\n consumers: z.array(z.custom<TelemetryConsumer>()).prefault([]),\n })\n .prefault({}),\n experimental: z.object().prefault({}),\n }),\n toTelemetry: (config) => {\n config.transport = transportNodeConfig.toTelemetry(config.transport) as never;\n config.models.vad = vadProviderConfig.toTelemetry(config.models.vad) as never;\n config.models.stt = sttProviderConfig.toTelemetry(config.models.stt) as never;\n config.models.eou = eouProviderConfig.toTelemetry(config.models.eou) as never;\n config.models.llm = llmProviderConfig.toTelemetry(config.models.llm) as never;\n config.models.tts = ttsProviderConfig.toTelemetry(config.models.tts) as never;\n\n // Remember if telemetry consumers are defined and redact them (non-serializable)\n config.telemetry.hasCustomConsumers = Boolean(config.telemetry.consumers.length);\n config.telemetry.consumers = \"redacted\" as never;\n\n return config;\n },\n});\n\n/**\n * Used to define a global config in a `life.config.ts` file.\n * @param def - The config definition.\n * @returns The validated config.\n */\nexport function defineConfig(config: z.input<typeof agentServerConfig.schema>) {\n return config;\n}\n\nexport function prepareAgentConfig(\n agentConfig: z.input<typeof agentServerConfig.schema>,\n globalConfigs: z.input<typeof agentServerConfig.schema>[],\n) {\n // Obtain and validate the final config object\n const orderedGlobalConfigs = deepClone(globalConfigs).reverse();\n const mergedConfig = deepMerge(...orderedGlobalConfigs, agentConfig);\n const { error: errorConfig, data: serverConfig } =\n agentServerConfig.schema.safeParse(mergedConfig);\n if (errorConfig)\n return op.failure({\n code: \"Validation\",\n message: \"Invalid agent server config.\",\n cause: errorConfig,\n });\n\n // Produce the client-side subset config\n const { error: errorClientConfig, data: clientConfig } =\n agentClientConfig.schema.safeParse(serverConfig);\n if (errorClientConfig)\n return op.failure({\n code: \"Validation\",\n message: \"Invalid agent client config.\",\n cause: errorClientConfig,\n });\n\n return op.success({ server: serverConfig, client: clientConfig });\n}\n","import path from \"node:path\";\nimport { fileURLToPath } from \"node:url\";\nimport { InferenceSession, Tensor } from \"onnxruntime-node\";\nimport { z } from \"zod\";\nimport type { Message } from \"@/shared/messages\";\nimport { zodObjectWithTelemetry } from \"@/telemetry/helpers/zod\";\nimport { EOUBase } from \"../../base\";\n\nconst transformers = import(\"@huggingface/transformers\");\n\n// Config\nexport const livekitEOUConfig = zodObjectWithTelemetry({\n schema: z.object({\n provider: z.literal(\"livekit\"),\n quantized: z.boolean().prefault(true),\n /**\n * Quick benchmarks have shown that Livekit models are very optimized for multi-turn\n * inferences, the most balanced value considering inference time and accuracy was\n * in the 2-5 messages range for the quantized version. Carefully benchmark the change\n * if you consider increasing / decreasing this value outside of that range.\n */\n maxMessages: z.number().prefault(3),\n maxTokens: z.number().prefault(512),\n }),\n});\n\n// Model\ntype PreTrainedTokenizer = InstanceType<Awaited<typeof transformers>[\"PreTrainedTokenizer\"]>;\nexport class LivekitEOU extends EOUBase<typeof livekitEOUConfig.schema> {\n #_tokenizer?: PreTrainedTokenizer;\n #_session?: InferenceSession;\n\n constructor(config: z.input<typeof livekitEOUConfig.schema>) {\n super(livekitEOUConfig.schema, config);\n }\n\n // Get or create the ONNX inference session\n async #getSession(): Promise<InferenceSession> {\n if (this.#_session) return this.#_session;\n // Retrieve model path\n const currentDir = path.dirname(fileURLToPath(import.meta.url));\n const modelPath = path.join(\n currentDir,\n \"..\",\n \"models\",\n \"eou\",\n \"providers\",\n \"livekit\",\n this.config.quantized ? \"model-quantized.onnx\" : \"model.onnx\",\n );\n this.#_session = await InferenceSession.create(modelPath, {\n interOpNumThreads: 1,\n intraOpNumThreads: 1,\n executionMode: \"sequential\",\n });\n\n return this.#_session;\n }\n\n async #getTokenizer(): Promise<PreTrainedTokenizer> {\n if (this.#_tokenizer) return this.#_tokenizer;\n const { AutoTokenizer } = await transformers;\n this.#_tokenizer = await AutoTokenizer.from_pretrained(\"livekit/turn-detector\", {\n revision: \"v1.2.2-en\",\n });\n return this.#_tokenizer;\n }\n\n async #tokenize(text: string): Promise<bigint[]> {\n // Tokenize the provided text\n const tokenizer = await this.#getTokenizer();\n const inputs = await tokenizer(text, {\n add_special_tokens: false,\n truncation: false,\n return_tensors: \"np\",\n });\n\n // Extract tokens\n const tokens: bigint[] = Array.isArray(inputs.input_ids.data)\n ? inputs.input_ids.data\n : Array.from(inputs.input_ids.data);\n return tokens;\n }\n\n async #untokenize(tokens: bigint[]): Promise<string> {\n const tokenizer = await this.#getTokenizer();\n const text = tokenizer.decode(tokens);\n return text;\n }\n\n async #toLivekitMessages(messages: Message[]): Promise<string> {\n // Ensure last message is from user\n while (messages.length > 0 && messages.at(-1)?.role !== \"user\") {\n messages.pop();\n }\n\n // Tokenize recent messages\n const tokens = await this.#tokenize(\n messages\n .filter((m) => m.role === \"user\" || m.role === \"agent\")\n .slice(-this.config.maxMessages)\n .map(\n (m) =>\n `${m.role === \"user\" ? \"<|user|>\" : \"<|assistant|>\"} ${m.content.trim()} <|im_end|>`,\n )\n .join(\"\"),\n );\n\n // Remove the end token\n tokens.pop();\n\n // If the tokens are less than the max tokens, return them directly\n if (tokens.length <= this.config.maxTokens) return this.#untokenize(tokens);\n\n // Compute the roles tokens\n const userRoleToken = (await this.#tokenize(\"<|user|>\"))[0] as bigint;\n const agentRoleToken = (await this.#tokenize(\"<|assistant|>\"))[0] as bigint;\n const ellipsisToken = (await this.#tokenize(\"...\"))[0] as bigint;\n\n // Compute the kept and rest of tokens\n tokens.reverse();\n const keptTokens = tokens.slice(0, this.config.maxTokens - 3);\n const restTokens = tokens.slice(this.config.maxTokens - 3);\n\n // Append the ellipsis token to the kept tokens\n keptTokens.push(ellipsisToken);\n\n // Find the role of the truncated message\n let truncatedMessageRole: \"user\" | \"agent\" | undefined;\n for (const token of restTokens) {\n if (token === userRoleToken) {\n truncatedMessageRole = \"user\";\n break;\n } else if (token === agentRoleToken) {\n truncatedMessageRole = \"agent\";\n break;\n }\n }\n if (!truncatedMessageRole) throw new Error(\"Failed to find the role. Shouldn't happen.\");\n\n // Append the role token to the kept tokens\n keptTokens.push(truncatedMessageRole === \"user\" ? userRoleToken : agentRoleToken);\n\n // Reverse and return the tokens\n return this.#untokenize(keptTokens.reverse());\n }\n\n async predict(messages: Message[]): Promise<number> {\n try {\n if (!messages || messages.length === 0) return 0;\n\n const session = await this.#getSession();\n\n // Format and tokenize the conversation\n const livekitMessages = await this.#toLivekitMessages(messages);\n if (livekitMessages.length === 0) return 0;\n const tokens = await this.#tokenize(livekitMessages);\n if (tokens.length === 0) return 0;\n\n // Run inference\n const outputs = await session.run({\n input_ids: new Tensor(\"int64\", tokens, [1, tokens.length]),\n });\n\n // Extract and return the EOU probability\n const eouProbability = outputs.prob?.data[0];\n if (!eouProbability) return 0;\n return Number(eouProbability);\n } catch (error) {\n console.error(\"Livekit EOU error:\", error);\n return 0;\n }\n }\n}\n","import type { z } from \"zod\";\nimport type { Message } from \"@/shared/messages\";\n\nexport abstract class EOUBase<ConfigSchema extends z.ZodObject> {\n protected config: z.infer<ConfigSchema>;\n\n constructor(configSchema: ConfigSchema, config: Partial<z.infer<ConfigSchema>>) {\n this.config = configSchema.parse({ ...config });\n }\n\n abstract predict(messages: Message[]): Promise<number> | number;\n}\n","import path from \"node:path\";\nimport { fileURLToPath } from \"node:url\";\nimport { InferenceSession, Tensor } from \"onnxruntime-node\";\nimport { z } from \"zod\";\nimport type { Message } from \"@/shared/messages\";\nimport { zodObjectWithTelemetry } from \"@/telemetry/helpers/zod\";\nimport { EOUBase } from \"../../base\";\n\nconst transformers = import(\"@huggingface/transformers\");\n\nconst MAX_TOKENS = 256; // Hardcoded in the model\n\n// Config\nexport const turnSenseEOUConfig = zodObjectWithTelemetry({\n schema: z.object({\n provider: z.literal(\"turnsense\"),\n quantized: z.boolean().prefault(true),\n /**\n * Quick benchmark have shown that Turnsense models are very optimized for single\n * message inferences, and their documentation shows single message inferences as\n * well. Hence why this value defaults to 1. Carefully benchmark the change if you\n * consider increasing this value.\n */\n maxMessages: z.number().prefault(1),\n }),\n});\n\n// Model\ntype PreTrainedTokenizer = InstanceType<Awaited<typeof transformers>[\"PreTrainedTokenizer\"]>;\nexport class TurnSenseEOU extends EOUBase<typeof turnSenseEOUConfig.schema> {\n #_tokenizer?: PreTrainedTokenizer;\n #_session?: InferenceSession;\n\n constructor(config: z.input<typeof turnSenseEOUConfig.schema>) {\n super(turnSenseEOUConfig.schema, config);\n }\n\n // Get or create the ONNX inference session\n async #getSession(): Promise<InferenceSession> {\n if (this.#_session) return this.#_session;\n // Retrieve model path\n const currentDir = path.dirname(fileURLToPath(import.meta.url));\n const modelPath = path.join(\n currentDir,\n \"..\",\n \"models\",\n \"eou\",\n \"providers\",\n \"turnsense\",\n this.config.quantized ? \"model-quantized.onnx\" : \"model.onnx\",\n );\n this.#_session = await InferenceSession.create(modelPath, {\n interOpNumThreads: 1,\n intraOpNumThreads: 1,\n executionMode: \"sequential\",\n });\n return this.#_session;\n }\n\n async #getTokenizer(): Promise<PreTrainedTokenizer> {\n if (this.#_tokenizer) return this.#_tokenizer;\n const { AutoTokenizer } = await transformers;\n this.#_tokenizer = await AutoTokenizer.from_pretrained(\"latishab/turnsense\");\n return this.#_tokenizer;\n }\n\n async #tokenize(text: string): Promise<{ tokens: bigint[]; attentionMask: bigint[] }> {\n // Tokenize the provided text\n const tokenizer = await this.#getTokenizer();\n const inputs = await tokenizer(text, {\n padding: \"max_length\",\n max_length: 256,\n truncation: true,\n truncation_side: \"left\",\n return_tensors: \"pt\",\n });\n\n // Extract the data arrays from the tokenizer output\n const inputIdsArray = Array.isArray(inputs.input_ids.data)\n ? inputs.input_ids.data\n : Array.from(inputs.input_ids.data);\n const attentionMaskArray = Array.isArray(inputs.attention_mask.data)\n ? inputs.attention_mask.data\n : Array.from(inputs.attention_mask.data);\n return { tokens: inputIdsArray, attentionMask: attentionMaskArray };\n }\n\n async #untokenize(tokens: bigint[]): Promise<string> {\n const tokenizer = await this.#getTokenizer();\n const text = tokenizer.decode(tokens);\n return text;\n }\n\n async #toTurnsenseMessages(messages: Message[]): Promise<string> {\n // Ensure last message is from user\n while (messages.length > 0 && messages.at(-1)?.role !== \"user\") {\n messages.pop();\n }\n\n // Tokenize recent messages\n const { tokens } = await this.#tokenize(\n messages\n .filter((m) => m.role === \"user\" || m.role === \"agent\")\n .slice(-this.config.maxMessages)\n .map(\n (m) =>\n `${m.role === \"user\" ? \"<|user|>\" : \"<|assistant|>\"} ${m.content.trim()} <|im_end|>`,\n )\n .join(\"\"),\n );\n\n // Remove the end token\n tokens.pop();\n\n // If the tokens are less than the max tokens, return them directly\n if (tokens.length <= MAX_TOKENS) return this.#untokenize(tokens);\n\n // Compute the roles tokens\n const userRoleToken = (await this.#tokenize(\"<|user|>\")).tokens[0] as bigint;\n const agentRoleToken = (await this.#tokenize(\"<|assistant|>\")).tokens[0] as bigint;\n const ellipsisToken = (await this.#tokenize(\"...\")).tokens[0] as bigint;\n\n // Compute the kept and rest of tokens\n tokens.reverse();\n const keptTokens = tokens.slice(0, MAX_TOKENS - 3);\n const restTokens = tokens.slice(MAX_TOKENS - 3);\n\n // Append the ellipsis token to the kept tokens\n keptTokens.push(ellipsisToken);\n\n // Find the role of the truncated message\n let truncatedMessageRole: \"user\" | \"agent\" | undefined;\n for (const token of restTokens) {\n if (token === userRoleToken) {\n truncatedMessageRole = \"user\";\n break;\n } else if (token === agentRoleToken) {\n truncatedMessageRole = \"agent\";\n break;\n }\n }\n if (!truncatedMessageRole) throw new Error(\"Failed to find the role. Shouldn't happen.\");\n\n // Append the role token to the kept tokens\n keptTokens.push(truncatedMessageRole === \"user\" ? userRoleToken : agentRoleToken);\n\n // Reverse and return the tokens\n return this.#untokenize(keptTokens.reverse());\n }\n\n async predict(messages: Message[]): Promise<number> {\n try {\n const session = await this.#getSession();\n\n // Format and tokenize the conversation\n const turnsenseMessages = await this.#toTurnsenseMessages(messages);\n if (turnsenseMessages.length === 0) return 0;\n const { tokens, attentionMask } = await this.#tokenize(turnsenseMessages);\n if (tokens.length === 0) return 0;\n\n // Run inference\n const outputs = await session.run({\n input_ids: new Tensor(\"int64\", tokens, [1, tokens.length]),\n attention_mask: new Tensor(\"int64\", attentionMask, [1, attentionMask.length]),\n });\n\n // Retrieve and return the EOU probability\n const probabilities = outputs.probabilities;\n if (!probabilities?.data || probabilities.data.length < 2) return 0;\n const eouProbability = probabilities.data[1];\n return typeof eouProbability === \"number\" ? eouProbability : 0;\n } catch (error) {\n console.error(\"TurnSense EOU error:\", error);\n return 0;\n }\n }\n}\n","import { zodUnionWithTelemetry } from \"@/telemetry/helpers/zod\";\nimport { LivekitEOU, livekitEOUConfig } from \"./providers/livekit\";\nimport { TurnSenseEOU, turnSenseEOUConfig } from \"./providers/turnsense\";\n\n// Providers\nexport const eouProviders = {\n turnsense: { class: TurnSenseEOU, configSchema: turnSenseEOUConfig.schema },\n livekit: { class: LivekitEOU, configSchema: livekitEOUConfig.schema },\n} as const;\n\nexport type EOUProvider = (typeof eouProviders)[keyof typeof eouProviders][\"class\"];\n\n// Config\nexport const eouProviderConfig = zodUnionWithTelemetry(\"provider\", [\n livekitEOUConfig,\n turnSenseEOUConfig,\n]);\n","import { Mistral } from \"@mistralai/mistralai\";\nimport type {\n AssistantMessage,\n SystemMessage,\n Tool,\n ToolMessage,\n UserMessage,\n} from \"@mistralai/mistralai/models/components\";\nimport { z } from \"zod\";\nimport type { Message } from \"@/shared/messages\";\nimport { zodObjectWithTelemetry } from \"@/telemetry/helpers/zod\";\nimport { LLMBase, type LLMGenerateMessageJob, type LLMGenerateObjectChunk } from \"../base\";\nimport type { LLMToolDefinition } from \"../resources\";\n\n// Define Mistral-specific message types with required role properties\ntype MistralUserMessage = UserMessage & { role: \"user\" };\ntype MistralAssistantMessage = AssistantMessage & { role: \"assistant\" };\ntype MistralSystemMessage = SystemMessage & { role: \"system\" };\ntype MistralToolMessage = ToolMessage & { role: \"tool\" };\ntype MistralMessage =\n | MistralUserMessage\n | MistralAssistantMessage\n | MistralSystemMessage\n | MistralToolMessage;\n\n// Config\nexport const mistralLLMConfig = zodObjectWithTelemetry({\n schema: z.object({\n provider: z.literal(\"mistral\"),\n apiKey: z.string().prefault(process.env.MISTRAL_API_KEY as string),\n model: z\n .enum([\n \"mistral-large-latest\",\n \"mistral-large-2411\",\n \"mistral-large-2407\",\n \"mistral-small-latest\",\n \"mistral-small-2501\",\n \"mistral-small-2503\",\n \"mistral-medium-latest\",\n \"mistral-medium-2505\",\n \"pixtral-large-latest\",\n \"pixtral-large-2411\",\n \"codestral-latest\",\n \"codestral-2501\",\n \"codestral-2405\",\n \"ministral-3b-latest\",\n \"ministral-8b-latest\",\n \"open-mistral-7b\",\n \"open-mixtral-8x7b\",\n \"open-mixtral-8x22b\",\n ])\n .prefault(\"mistral-small-latest\"),\n temperature: z.number().min(0).max(1).prefault(0.5),\n }),\n toTelemetry: (config) => {\n // Redact sensitive fields\n config.apiKey = \"redacted\" as never;\n\n return config;\n },\n});\n\n// Model\nexport class MistralLLM extends LLMBase<typeof mistralLLMConfig.schema> {\n readonly #client: Mistral;\n\n constructor(config: z.input<typeof mistralLLMConfig.schema>) {\n super(mistralLLMConfig.schema, config);\n this.#client = new Mistral({ apiKey: config.apiKey });\n }\n\n /**\n * Format conversion\n */\n\n #toMistralMessage(message: Message): MistralMessage {\n if (message.role === \"user\")\n return {\n role: \"user\",\n content: message.content,\n };\n\n if (message.role === \"agent\")\n return {\n role: \"assistant\",\n content: message.content,\n toolCalls: message.toolsRequests?.map((request) => ({\n type: \"function\",\n id: request.toolRequestId,\n function: {\n name: request.toolName,\n arguments: JSON.stringify(request.toolInput),\n },\n })),\n };\n\n if (message.role === \"system\")\n return {\n role: \"system\",\n content: message.content,\n };\n\n if (message.role === \"tool\")\n return {\n role: \"tool\",\n toolCallId: message.toolName,\n content: JSON.stringify(message.toolOutput),\n };\n\n return null as never;\n }\n\n #toMistralMessages(messages: Message[]): MistralMessage[] {\n return messages.map(this.#toMistralMessage.bind(this));\n }\n\n #toMistralTool(tool: LLMToolDefinition): Tool {\n return {\n type: \"function\",\n function: {\n name: tool.name,\n description: tool.description,\n parameters: z.toJSONSchema(tool.schema.input),\n },\n };\n }\n\n #toMistralTools(tools: LLMToolDefinition[]) {\n return tools.map(this.#toMistralTool);\n }\n\n /**\n * Generate a message with job management - returns jobId along with stream\n */\n async generateMessage(\n params: Parameters<typeof LLMBase.prototype.generateMessage>[0],\n ): Promise<LLMGenerateMessageJob> {\n // Create a new job\n const job = this.createGenerateMessageJob();\n\n // Prepare tools and messages in Mistral format\n const mistralTools = params.tools.length > 0 ? this.#toMistralTools(params.tools) : undefined;\n const mistralMessages = this.#toMistralMessages(params.messages);\n\n try {\n // Create the stream\n const stream = await this.#client.chat.stream({\n model: this.config.model,\n temperature: this.config.temperature,\n messages: mistralMessages,\n ...(mistralTools?.length ? { tools: mistralTools } : {}),\n });\n\n // Process the stream\n (async () => {\n let pendingToolCalls: Record<\n string,\n {\n id: string;\n name: string;\n arguments: string;\n }\n > = {};\n\n try {\n for await (const chunk of stream) {\n // Ignore chunks if job was cancelled\n if (job._abortController.signal.aborted) break;\n\n // Extract the choice and delta (if any)\n const choice = chunk.data.choices[0];\n if (!choice) throw new Error(\"No choice\");\n const delta = choice.delta;\n\n // Handle content tokens\n if (delta.content) {\n const content = delta.content;\n const contentString = typeof content === \"string\" ? content : JSON.stringify(content);\n job.stream.push({\n type: \"content\",\n content: contentString,\n });\n }\n\n // Handle tool calls tokens\n const toolCalls = delta.toolCalls;\n if (toolCalls) {\n for (const toolCall of toolCalls) {\n // Retrieve the tool call ID\n const id = toolCall.id ?? Object.keys(pendingToolCalls).at(-1);\n if (!id) throw new Error(\"No tool call ID\");\n\n // Ensure the tool call is tracked\n if (!pendingToolCalls[id]) {\n pendingToolCalls[id] = { id, name: \"\", arguments: \"\" };\n }\n\n // Compound name tokens\n if (toolCall.function?.name) {\n pendingToolCalls[id].name += toolCall.function.name;\n }\n\n // Compound arguments tokens\n if (toolCall.function?.arguments) {\n pendingToolCalls[id].arguments += toolCall.function.arguments;\n }\n }\n }\n\n // Handle finish reasons\n // - Tool calls completion\n if (choice.finishReason === \"tool_calls\") {\n job.stream.push({\n type: \"tools\",\n tools: Object.values(pendingToolCalls).map((toolCall) => ({\n toolRequestId: toolCall.id,\n toolName: toolCall.name,\n toolInput: JSON.parse(toolCall.arguments || \"{}\"),\n })),\n });\n pendingToolCalls = {};\n }\n\n // - End of stream\n if (choice.finishReason === \"stop\") job.stream.push({ type: \"end\" });\n }\n } catch (error) {\n job.stream.push({\n type: \"error\",\n error: error instanceof Error ? error.message : \"Unknown error\",\n });\n }\n })();\n\n // Return the job\n return job;\n } catch (error) {\n job.stream.push({\n type: \"error\",\n error: error instanceof Error ? error.message : \"Failed to create stream\",\n });\n return job;\n }\n }\n\n async generateObject<T extends z.ZodObject>(params: {\n messages: Message[];\n schema: T;\n }): Promise<LLMGenerateObjectChunk<T>> {\n try {\n // Prepare messages in Mistral format\n const mistralMessages = this.#toMistralMessages(params.messages);\n\n // Prepare JSON schema\n const jsonSchema = z.toJSONSchema(params.schema);\n\n // Generate the object using schema-enforced parse method\n // This uses Mistral's built-in schema validation with the Zod schema\n const response = await this.#client.chat.complete({\n model: this.config.model,\n messages: mistralMessages,\n temperature: this.config.temperature,\n\n responseFormat: {\n type: \"json_schema\",\n jsonSchema: { name: \"avc\", schemaDefinition: jsonSchema },\n },\n });\n\n // Extract parsed content from response - already validated by Mistral API\n const data = JSON.parse((response.choices?.[0]?.message?.content as string) || \"{}\");\n\n // Return the validated object (no additional validation needed)\n return { type: \"content\", data };\n } catch (error) {\n return {\n type: \"error\",\n error: error instanceof Error ? error.message : \"Failed to generate object\",\n };\n }\n }\n}\n","import type { z } from \"zod\";\nimport { AsyncQueue } from \"@/shared/async-queue\";\nimport type { AgentToolRequest, Message } from \"@/shared/messages\";\nimport { newId } from \"@/shared/prefixed-id\";\nimport type { LLMToolDefinition } from \"./resources\";\n\n// LLMBase.generateMessage()\nexport type LLMGenerateMessageChunk =\n | { type: \"content\"; content: string }\n | { type: \"tools\"; tools: AgentToolRequest[] }\n | { type: \"end\" }\n | { type: \"error\"; error: string };\n\nexport interface LLMGenerateMessageJob {\n id: string;\n cancel: () => void;\n stream: AsyncQueue<LLMGenerateMessageChunk>;\n _abortController: AbortController;\n}\n\n// LLMBase.generateObject()\nexport type LLMGenerateObjectChunk<T extends z.ZodObject> =\n | { type: \"content\"; data: z.infer<T> }\n | { type: \"error\"; error: string };\n\n/**\n * Base class for all LLMs providers.\n */\nexport abstract class LLMBase<ConfigSchema extends z.ZodObject> {\n config: z.infer<ConfigSchema>;\n\n constructor(configSchema: ConfigSchema, config: Partial<z.infer<ConfigSchema>>) {\n this.config = configSchema.parse({ ...config });\n }\n\n protected createGenerateMessageJob(): LLMGenerateMessageJob {\n const id = newId(\"job\");\n const stream = new AsyncQueue<LLMGenerateMessageChunk>();\n const _abortController = new AbortController();\n const job: LLMGenerateMessageJob = {\n id,\n stream,\n cancel: () => _abortController.abort(),\n _abortController,\n };\n return job;\n }\n\n // To be impemented by subclasses\n abstract generateMessage(params: {\n messages: Message[];\n tools: LLMToolDefinition[];\n }): Promise<LLMGenerateMessageJob>;\n\n abstract generateObject<T extends z.ZodObject>(params: {\n messages: Message[];\n schema: T;\n }): Promise<LLMGenerateObjectChunk<T>>;\n}\n","import { OpenAI } from \"openai\";\nimport type { ChatCompletionMessageParam } from \"openai/resources/index.js\";\nimport { z } from \"zod\";\nimport type { Message } from \"@/shared/messages\";\nimport { zodObjectWithTelemetry } from \"@/telemetry/helpers/zod\";\nimport { LLMBase, type LLMGenerateMessageJob, type LLMGenerateObjectChunk } from \"../base\";\nimport type { LLMToolDefinition } from \"../resources\";\n\n// Config\nexport const openAILLMConfig = zodObjectWithTelemetry({\n schema: z.object({\n provider: z.literal(\"openai\"),\n apiKey: z.string().prefault(process.env.OPENAI_API_KEY as string),\n model: z.enum([\"gpt-4o-mini\", \"gpt-4o\", \"gpt-5\", \"gpt-5-nano\"]).prefault(\"gpt-4o\"),\n temperature: z.number().min(0).max(2).prefault(1),\n }),\n toTelemetry: (config) => {\n // Redact sensitive fields\n config.apiKey = \"redacted\" as never;\n return config;\n },\n});\n\n// Model\nexport class OpenAILLM extends LLMBase<typeof openAILLMConfig.schema> {\n readonly #client: OpenAI;\n\n constructor(config: z.input<typeof openAILLMConfig.schema>) {\n super(openAILLMConfig.schema, config);\n this.#client = new OpenAI({ apiKey: config.apiKey });\n }\n\n /**\n * Format conversion\n */\n\n #toOpenAIMessage(message: Message): ChatCompletionMessageParam {\n if (message.role === \"user\") {\n return { role: \"user\", content: message.content };\n }\n\n if (message.role === \"agent\") {\n return {\n role: \"assistant\",\n content: message.content,\n ...(message.toolsRequests?.length\n ? {\n tool_calls: message.toolsRequests?.map((request) => ({\n id: request.toolRequestId,\n function: { name: request.toolName, arguments: JSON.stringify(request.toolInput) },\n type: \"function\",\n })),\n }\n : {}),\n };\n }\n\n if (message.role === \"system\") {\n return { role: \"system\", content: message.content };\n }\n\n if (message.role === \"tool\") {\n return {\n role: \"tool\",\n tool_call_id: message.toolName,\n content: JSON.stringify(message.toolOutput),\n };\n }\n\n return null as never;\n }\n\n #toOpenAIMessages(messages: Message[]): ChatCompletionMessageParam[] {\n return messages.map(this.#toOpenAIMessage);\n }\n\n #toOpenAITool(tool: LLMToolDefinition): OpenAI.Chat.Completions.ChatCompletionTool {\n return {\n type: \"function\",\n function: {\n name: tool.name,\n description: tool.description,\n parameters: z.toJSONSchema(tool.schema.input),\n },\n };\n }\n\n #toOpenAITools(tools: LLMToolDefinition[]): OpenAI.Chat.Completions.ChatCompletionTool[] {\n return tools.map(this.#toOpenAITool);\n }\n\n /**\n * Generate a message with job management - returns jobId along with stream\n */\n async generateMessage(\n params: Parameters<typeof LLMBase.prototype.generateMessage>[0],\n ): Promise<LLMGenerateMessageJob> {\n // Create a new job\n const job = this.createGenerateMessageJob();\n\n // Prepare tools and messages in OpenAI format\n const openaiTools = params.tools.length > 0 ? this.#toOpenAITools(params.tools) : undefined;\n const openaiMessages = this.#toOpenAIMessages(params.messages);\n\n // Prepare job stream\n const stream = await this.#client.chat.completions.create(\n {\n model: this.config.model,\n temperature: this.config.temperature,\n messages: openaiMessages,\n stream: true,\n ...(openaiTools?.length\n ? {\n tools: openaiTools,\n parallel_tool_calls: true,\n }\n : {}),\n },\n { signal: job._abortController.signal }, // Allows the stream to be cancelled\n );\n\n // Start streaming in the background (don't await)\n (async () => {\n let pendingToolCalls: Record<string, { id: string; name: string; arguments: string }> = {};\n\n for await (const chunk of stream) {\n // Ignore chunks if job was cancelled\n if (job._abortController.signal.aborted) break;\n\n // Extract the choice and delta (if any)\n const choice = chunk.choices[0];\n if (!choice) throw new Error(\"No choice\");\n const delta = choice.delta;\n\n // Handle content tokens\n if (delta.content) job.stream.push({ type: \"content\", content: delta.content });\n\n // Handle tool calls tokens\n if (delta.tool_calls) {\n for (const toolCall of delta.tool_calls) {\n // Retrieve the tool call ID\n const id = toolCall.id ?? Object.keys(pendingToolCalls).at(-1);\n if (!id) throw new Error(\"No tool call ID\");\n\n // Ensure the tool call is tracked\n if (!pendingToolCalls[id]) pendingToolCalls[id] = { id, name: \"\", arguments: \"\" };\n\n // Compound name tokens\n if (toolCall.function?.name) pendingToolCalls[id].name += toolCall.function.name;\n\n // Compound arguments tokens\n if (toolCall.function?.arguments)\n pendingToolCalls[id].arguments += toolCall.function.arguments;\n }\n }\n\n // Handle finish reasons\n // - Tool calls completion\n if (choice.finish_reason === \"tool_calls\") {\n job.stream.push({\n type: \"tools\",\n tools: Object.values(pendingToolCalls).map((toolCall) => ({\n toolRequestId: toolCall.id,\n toolName: toolCall.name,\n toolInput: JSON.parse(toolCall.arguments || \"{}\"),\n })),\n });\n pendingToolCalls = {};\n }\n\n // - End of stream\n if (choice.finish_reason === \"stop\") job.stream.push({ type: \"end\" });\n }\n })();\n\n // Return the job immediately\n return job;\n }\n\n async generateObject<T extends z.ZodObject>(params: {\n messages: Message[];\n schema: T;\n }): Promise<LLMGenerateObjectChunk<T>> {\n try {\n // Prepare messages in OpenAI format\n const openaiMessages = this.#toOpenAIMessages(params.messages);\n\n // Prepare JSON schema\n const jsonSchema = z.toJSONSchema(params.schema);\n\n // Generate the object\n const response = await this.#client.chat.completions.create({\n model: this.config.model,\n messages: openaiMessages,\n temperature: this.config.temperature,\n response_format: {\n type: \"json_schema\",\n json_schema: { name: \"avc\", schema: jsonSchema },\n },\n });\n\n // Parse the response\n const data = JSON.parse(response.choices[0]?.message?.content || \"{}\");\n\n // Return the object\n return { type: \"content\", data };\n } catch (error) {\n return {\n type: \"error\",\n error: error instanceof Error ? error.message : \"Failed to generate object\",\n };\n }\n }\n}\n","import { OpenAI } from \"openai\";\nimport type { ChatCompletionMessageParam } from \"openai/resources/index.js\";\nimport { z } from \"zod\";\nimport type { Message } from \"@/shared/messages\";\nimport { zodObjectWithTelemetry } from \"@/telemetry/helpers/zod\";\nimport { LLMBase, type LLMGenerateMessageJob, type LLMGenerateObjectChunk } from \"../base\";\nimport type { LLMToolDefinition } from \"../resources\";\n\nexport const xaiLLMConfig = zodObjectWithTelemetry({\n schema: z.object({\n provider: z.literal(\"xai\"),\n apiKey: z.string().prefault(process.env.XAI_API_KEY as string),\n model: z\n .enum([\n \"grok-3\",\n \"grok-3-fast\",\n \"grok-3-mini\",\n \"grok-3-mini-fast\",\n \"grok-2-1212\",\n \"grok-2-vision-1212\",\n \"grok-beta\",\n \"grok-vision-beta\",\n ])\n .prefault(\"grok-3-mini\"),\n temperature: z.number().min(0).max(2).prefault(0.5),\n }),\n toTelemetry: (config) => {\n // Redact sensitive fields\n config.apiKey = \"redacted\" as never;\n\n return config;\n },\n});\n\nexport class XaiLLM extends LLMBase<typeof xaiLLMConfig.schema> {\n readonly #client: OpenAI;\n\n constructor(config: z.input<typeof xaiLLMConfig.schema>) {\n super(xaiLLMConfig.schema, config);\n this.#client = new OpenAI({\n apiKey: config.apiKey,\n baseURL: \"https://api.x.ai/v1\",\n });\n }\n\n /**\n * Format conversion\n */\n\n #toOpenAIMessage(message: Message): ChatCompletionMessageParam {\n if (message.role === \"user\") {\n return { role: \"user\", content: message.content };\n }\n\n if (message.role === \"agent\") {\n return {\n role: \"assistant\",\n content: message.content,\n ...(message.toolsRequests?.length\n ? {\n tool_calls: message.toolsRequests?.map((request) => ({\n id: request.toolRequestId,\n function: { name: request.toolName, arguments: JSON.stringify(request.toolInput) },\n type: \"function\",\n })),\n }\n : {}),\n };\n }\n\n if (message.role === \"system\") {\n return { role: \"system\", content: message.content };\n }\n\n if (message.role === \"tool\") {\n return {\n role: \"tool\",\n tool_call_id: message.toolName,\n content: JSON.stringify(message.toolOutput),\n };\n }\n\n return null as never;\n }\n\n #toOpenAIMessages(messages: Message[]): ChatCompletionMessageParam[] {\n return messages.map(this.#toOpenAIMessage);\n }\n\n #toOpenAITool(tool: LLMToolDefinition): OpenAI.Chat.Completions.ChatCompletionTool {\n return {\n type: \"function\",\n function: {\n name: tool.name,\n description: tool.description,\n parameters: z.toJSONSchema(tool.schema.input),\n },\n };\n }\n\n #toOpenAITools(tools: LLMToolDefinition[]): OpenAI.Chat.Completions.ChatCompletionTool[] {\n return tools.map(this.#toOpenAITool);\n }\n\n /**\n * Generate a message with job management - returns jobId along with stream\n */\n async generateMessage(\n params: Parameters<typeof LLMBase.prototype.generateMessage>[0],\n ): Promise<LLMGenerateMessageJob> {\n // Create a new job\n const job = this.createGenerateMessageJob();\n\n // Prepare tools and messages in OpenAI format\n const openaiTools = params.tools.length > 0 ? this.#toOpenAITools(params.tools) : undefined;\n const openaiMessages = this.#toOpenAIMessages(params.messages);\n\n // Prepare job stream\n const stream = await this.#client.chat.completions.create(\n {\n model: this.config.model,\n temperature: this.config.temperature,\n messages: openaiMessages,\n stream: true,\n ...(openaiTools?.length\n ? {\n tools: openaiTools,\n parallel_tool_calls: true,\n }\n : {}),\n },\n { signal: job._abortController.signal }, // Allows the stream to be cancelled\n );\n\n // Start streaming in the background (don't await)\n (async () => {\n let pendingToolCalls: Record<string, { id: string; name: string; arguments: string }> = {};\n\n for await (const chunk of stream) {\n // Ignore chunks if job was cancelled\n if (job._abortController.signal.aborted) continue;\n\n // Extract the choice and delta (if any)\n const choice = chunk.choices[0];\n if (!choice) throw new Error(\"No choice\");\n const delta = choice.delta;\n\n // Handle content tokens\n if (delta.content) {\n job.stream.push({ type: \"content\", content: delta.content });\n continue;\n }\n\n // Handle tool calls tokens\n if (delta.tool_calls) {\n for (const toolCall of delta.tool_calls) {\n // Retrieve the tool call ID\n const id = toolCall.id ?? Object.keys(pendingToolCalls).at(-1);\n if (!id) throw new Error(\"No tool call ID\");\n\n // Ensure the tool call is tracked\n if (!pendingToolCalls[id]) pendingToolCalls[id] = { id, name: \"\", arguments: \"\" };\n\n // Compound name tokens\n if (toolCall.function?.name) pendingToolCalls[id].name += toolCall.function.name;\n\n // Compound arguments tokens\n if (toolCall.function?.arguments)\n pendingToolCalls[id].arguments += toolCall.function.arguments;\n }\n }\n\n // Handle tool call completion\n if (chunk.choices[0]?.finish_reason === \"tool_calls\") {\n job.stream.push({\n type: \"tools\",\n tools: Object.values(pendingToolCalls).map((toolCall) => ({\n toolRequestId: toolCall.id,\n toolName: toolCall.name,\n toolInput: JSON.parse(toolCall.arguments || \"{}\"),\n })),\n });\n pendingToolCalls = {};\n }\n\n // Handle end of stream\n if (chunk.choices[0]?.finish_reason === \"stop\") job.stream.push({ type: \"end\" });\n }\n })();\n\n // Return the job immediately\n return job;\n }\n\n async generateObject<T extends z.ZodObject>(params: {\n messages: Message[];\n schema: T;\n }): Promise<LLMGenerateObjectChunk<T>> {\n try {\n // Prepare messages in OpenAI format\n const openaiMessages = this.#toOpenAIMessages(params.messages);\n\n // Prepare JSON schema\n const jsonSchema = z.toJSONSchema(params.schema);\n\n // Generate the object\n const response = await this.#client.chat.completions.create({\n model: this.config.model,\n messages: openaiMessages,\n temperature: this.config.temperature,\n response_format: {\n type: \"json_schema\",\n json_schema: { name: \"avc\", schema: jsonSchema },\n },\n });\n\n // Parse the response\n const data = JSON.parse(response.choices[0]?.message?.content || \"{}\");\n\n // Return the object\n return { type: \"content\", data };\n } catch (error) {\n return {\n type: \"error\",\n error: error instanceof Error ? error.message : \"Failed to generate object\",\n };\n }\n }\n}\n","import { zodUnionWithTelemetry } from \"@/telemetry/helpers/zod\";\nimport { MistralLLM, mistralLLMConfig } from \"./providers/mistral\";\nimport { OpenAILLM, openAILLMConfig } from \"./providers/openai\";\nimport { XaiLLM, xaiLLMConfig } from \"./providers/xai\";\n\n// Providers\nexport const llmProviders = {\n mistral: { class: MistralLLM, configSchema: mistralLLMConfig },\n openai: { class: OpenAILLM, configSchema: openAILLMConfig },\n xai: { class: XaiLLM, configSchema: xaiLLMConfig },\n} as const;\n\nexport type LLMProvider = (typeof llmProviders)[keyof typeof llmProviders][\"class\"];\n\n// Config\nexport const llmProviderConfig = zodUnionWithTelemetry(\"provider\", [\n mistralLLMConfig,\n openAILLMConfig,\n xaiLLMConfig,\n]);\n","import {\n createClient,\n type DeepgramClient,\n type ListenLiveClient,\n type LiveTranscriptionEvent,\n LiveTranscriptionEvents,\n} from \"@deepgram/sdk\";\nimport { z } from \"zod\";\nimport { zodObjectWithTelemetry } from \"@/telemetry/helpers/zod\";\nimport { STTBase, type STTJob } from \"../base\";\n\n// Config\nexport const deepgramSTTConfig = zodObjectWithTelemetry({\n schema: z.object({\n provider: z.literal(\"deepgram\"),\n apiKey: z.string().prefault(process.env.DEEPGRAM_API_KEY as string),\n model: z\n .enum([\n \"nova-3\",\n \"nova-2\",\n \"nova-2-general\",\n \"nova-2-meeting\",\n \"nova-2-phonecall\",\n \"nova-2-voicemail\",\n \"nova-2-finance\",\n \"nova-2-conversationalai\",\n \"nova-2-video\",\n \"nova-2-medical\",\n \"nova-2-drivethru\",\n \"nova-2-automotive\",\n \"nova-2-atc\",\n \"nova\",\n \"nova-general\",\n \"nova-phonecall\",\n \"enhanced\",\n \"enhanced-general\",\n \"enhanced-meeting\",\n \"enhanced-phonecall\",\n \"enhanced-finance\",\n \"base\",\n \"base-general\",\n \"base-meeting\",\n \"base-phonecall\",\n \"base-voicemail\",\n \"base-finance\",\n \"base-conversationalai\",\n \"base-video\",\n \"whisper-tiny\",\n \"whisper-base\",\n \"whisper-small\",\n \"whisper-medium\",\n \"whisper-large\",\n ])\n .prefault(\"nova-2-general\"),\n language: z.string().prefault(\"en\"),\n }),\n toTelemetry: (config) => {\n // Redact sensitive fields\n config.apiKey = \"redacted\" as never;\n\n return config;\n },\n});\n\n// Model\nexport class DeepgramSTT extends STTBase<typeof deepgramSTTConfig.schema> {\n readonly #deepgram: DeepgramClient;\n readonly #jobsSockets: Map<string, ListenLiveClient> = new Map();\n\n constructor(config: z.input<typeof deepgramSTTConfig.schema>) {\n super(deepgramSTTConfig.schema, config);\n this.#deepgram = createClient(config.apiKey);\n }\n\n // biome-ignore lint/suspicious/useAwait: need async to match STTBase abstract method\n async generate(): Promise<STTJob> {\n // Create a new generation job\n const job = this.createGenerateJob();\n\n // Establish a new socket for the job\n const socket = this.#deepgram.listen.live({\n encoding: \"linear16\",\n sample_rate: 16_000,\n channels: 1,\n filler_words: true,\n numerals: true,\n punctuate: true,\n smart_format: true,\n endpointing: 0, // VAD is managed by the generation plugin\n no_delay: true,\n\n // Dynamic config\n model: this.config.model,\n language: this.config.language,\n });\n this.#jobsSockets.set(job.id, socket);\n\n // Push voice chunks as they arrive\n socket.on(LiveTranscriptionEvents.Transcript, (msg: LiveTranscriptionEvent) => {\n const text = msg.channel.alternatives[0]?.transcript;\n if (text) job.stream.push({ type: \"content\", textChunk: text });\n });\n\n // Handle job cancellation\n job._abortController.signal.addEventListener(\"abort\", () => {\n socket.requestClose();\n this.#jobsSockets.delete(job.id);\n });\n\n // Ensure the socket is kept alive until the job is cancelled\n setInterval(() => socket.keepAlive(), 1000);\n\n return job;\n }\n\n // biome-ignore lint/suspicious/useAwait: Need async to match STTBase abstract method\n protected async onVoiceInput(job: STTJob, pcm: Int16Array) {\n this.#jobsSockets.get(job.id)?.send(pcm.buffer);\n }\n}\n","import type { z } from \"zod\";\nimport { AsyncQueue } from \"@/shared/async-queue\";\nimport { newId } from \"@/shared/prefixed-id\";\n\n// STTBase.generate()\nexport type STTChunk =\n | { type: \"content\"; textChunk: string }\n | { type: \"end\" }\n | { type: \"error\"; error: string };\n\nexport type STTJob = {\n id: string;\n stream: AsyncQueue<STTChunk>;\n cancel: () => void;\n inputVoice: (chunk: Int16Array) => void;\n _abortController: AbortController;\n};\n\n/**\n * Base class for all STT providers.\n */\nexport abstract class STTBase<ConfigSchema extends z.ZodObject> {\n protected config: z.infer<ConfigSchema>;\n\n constructor(configSchema: ConfigSchema, config: Partial<z.infer<ConfigSchema>>) {\n this.config = configSchema.parse({ ...config });\n }\n\n protected createGenerateJob(): STTJob {\n const id = newId(\"job\");\n const stream = new AsyncQueue<STTChunk>();\n const _abortController = new AbortController();\n const job: STTJob = {\n id,\n stream,\n inputVoice: (pcm: Int16Array) => this.onVoiceInput(job, pcm),\n cancel: () => _abortController.abort(),\n _abortController,\n };\n return job;\n }\n\n // To be impemented by subclasses\n abstract generate(): Promise<STTJob>;\n protected abstract onVoiceInput(job: STTJob, pcm: Int16Array): Promise<void>;\n}\n","import { zodUnionWithTelemetry } from \"@/telemetry/helpers/zod\";\nimport { DeepgramSTT, deepgramSTTConfig } from \"./providers/deepgram\";\n\n// Providers\nexport const sttProviders = {\n deepgram: { class: DeepgramSTT, configSchema: deepgramSTTConfig },\n} as const;\n\nexport type STTProvider = (typeof sttProviders)[keyof typeof sttProviders][\"class\"];\n\n// Config\nexport const sttProviderConfig = zodUnionWithTelemetry(\"provider\", [deepgramSTTConfig]);\n","import { CartesiaClient } from \"@cartesia/cartesia-js\";\nimport type { StreamingResponse } from \"@cartesia/carte