UNPKG

inference-server

Version:

Libraries and server to build AI applications. Adapters to various native bindings allowing local inference. Integrate it with your application, or use as a microservice.

67 lines (61 loc) 1.55 kB
import http from 'node:http' import { InferenceServer } from '#package/index.js' // A minimal chat server using the ModelServer. const localModels = new InferenceServer({ log: 'info', concurrency: 2, models: { 'phi3-mini-4k': { task: 'text-completion', url: 'https://gpt4all.io/models/gguf/Phi-3-mini-4k-instruct.Q4_0.gguf', engine: 'gpt4all', maxInstances: 2, }, }, }) await localModels.start() const httpServer = http.createServer((req, res) => { if (req.url === '/chat' && req.method === 'POST') { let body = '' req.on('data', (chunk) => { body += chunk.toString() }) req.on('end', async () => { const req = JSON.parse(body) const completion = await modelServer.processChatCompletionTask(req) res.writeHead(200, { 'Content-Type': 'application/json' }) res.end(JSON.stringify(completion, null, 2)) }) } else { res.writeHead(404, { 'Content-Type': 'text/plain' }) res.end('Not found') } }) httpServer.listen(3000).on('listening', () => { console.log('HTTP Server up') }) /* curl http://localhost:3000/chat \ -H "Content-Type: application/json" \ -d '{ "model": "phi3-mini-4k", "messages": [ { "role": "user", "content": "how to find my kernel version on linux=?" } ] }' */ /* { "finishReason": "eogToken", "message": { "role": "assistant", "content": "To find your kernel version on Linux, you can use the following methods: [...]" }, "promptTokens": 10, "completionTokens": 344, "totalTokens": 354 } */