inference-server
Version:
Libraries and server to build AI applications. Adapters to various native bindings allowing local inference. Integrate it with your application, or use as a microservice.
53 lines (45 loc) • 1.32 kB
JavaScript
import readline from 'node:readline'
import chalk from 'chalk'
import { ModelServer as InferenceServer } from '#package/index.js'
// A command-line chat example using the ModelServer.
const localModels = new InferenceServer({
// log: 'info',
models: {
'my-model': {
task: 'text-completion',
minInstances: 1,
url: 'https://huggingface.co/HuggingFaceTB/smollm-135M-instruct-v0.2-Q8_0-GGUF/blob/main/smollm-135m-instruct-add-basics-q8_0.gguf',
sha256: 'a98d3857b95b96c156d954780d28f39dcb35b642e72892ee08ddff70719e6220',
engine: 'node-llama-cpp',
// device: { gpu: false },
},
},
})
console.log('Initializing models...')
await localModels.start()
const rl = readline.createInterface({
input: process.stdin,
output: process.stdout,
})
const messages = []
while (true) {
const input = await new Promise((resolve) => {
rl.question(chalk.bold(chalk.dim('user > ')), (input) => {
resolve(input)
})
})
messages.push({
role: 'user',
content: input,
})
process.stdout.write(chalk.bold(chalk.dim('model > ')))
const result = await localModels.processChatCompletionTask({
model: 'my-model',
messages,
onChunk: (chunk) => {
process.stdout.write(chunk.text)
},
})
messages.push(result.message)
process.stdout.write(' ' + chalk.dim(`[${result.finishReason}]`) + '\n')
}