inference-server

Version:

Libraries and server to build AI applications. Adapters to various native bindings allowing local inference. Integrate it with your application, or use as a microservice.

github.com/iimez/inference-server

iimez/inference-server

127 lines (112 loc) • 3.24 kB

text/typescript

import fs from 'node:fs' import path from 'node:path' import { ChatHistoryItem, ChatModelResponse, LlamaTextJSON, ChatModelFunctionCall, } from 'node-llama-cpp' import { CompletionFinishReason, ChatMessage, ToolCallResult } from '#package/types/index.js' import { flattenMessageTextContent } from '#package/lib/flattenMessageTextContent.js' import { LlamaChatResult } from './types.js' export function mapFinishReason( nodeLlamaCppFinishReason: LlamaChatResult['stopReason'], ): CompletionFinishReason { switch (nodeLlamaCppFinishReason) { case 'functionCalls': return 'toolCalls' case 'stopGenerationTrigger': return 'stopTrigger' case 'customStopTrigger': return 'stopTrigger' default: return nodeLlamaCppFinishReason } } export function addFunctionCallToChatHistory({ chatHistory, functionName, functionDescription, callParams, callResult, rawCall, startsNewChunk }: { chatHistory: ChatHistoryItem[], functionName: string, functionDescription?: string, callParams: any, callResult: ToolCallResult, rawCall?: LlamaTextJSON, startsNewChunk?: boolean }) { const newChatHistory = chatHistory.slice(); if (newChatHistory.length === 0 || newChatHistory[newChatHistory.length - 1]!.type !== "model") newChatHistory.push({ type: "model", response: [] }); const lastModelResponseItem = newChatHistory[newChatHistory.length - 1] as ChatModelResponse; const newLastModelResponseItem = {...lastModelResponseItem}; newChatHistory[newChatHistory.length - 1] = newLastModelResponseItem; const modelResponse = newLastModelResponseItem.response.slice(); newLastModelResponseItem.response = modelResponse; const functionCall: ChatModelFunctionCall = { type: "functionCall", name: functionName, description: functionDescription, params: callParams, result: typeof callResult === "string" ? callResult : callResult.text, rawCall }; if (startsNewChunk) functionCall.startsNewChunk = true; modelResponse.push(functionCall); return newChatHistory; } export function createChatMessageArray( messages: ChatMessage[], ): ChatHistoryItem[] { const items: ChatHistoryItem[] = [] let systemPrompt: string | undefined for (const message of messages) { if (message.role === 'user') { items.push({ type: 'user', text: flattenMessageTextContent(message.content), }) } else if (message.role === 'assistant') { items.push({ type: 'model', response: [message.content], }) } else if (message.role === 'system') { if (systemPrompt) { systemPrompt += '\n\n' + flattenMessageTextContent(message.content) } else { systemPrompt = flattenMessageTextContent(message.content) } } } if (systemPrompt) { items.unshift({ type: 'system', text: systemPrompt, }) } return items } export async function readGBNFFiles(directoryPath: string) { const gbnfFiles = fs .readdirSync(directoryPath) .filter((f) => f.endsWith('.gbnf')) const fileContents = await Promise.all( gbnfFiles.map((file) => fs.promises.readFile(path.join(directoryPath, file), 'utf-8'), ), ) return gbnfFiles.reduce((acc, file, i) => { acc[file.replace('.gbnf', '')] = fileContents[i] return acc }, {} as Record<string, string>) }