inference-server
Version:
Libraries and server to build AI applications. Adapters to various native bindings allowing local inference. Integrate it with your application, or use as a microservice.
94 lines (79 loc) • 2.64 kB
text/typescript
import { IncomingMessage, ServerResponse } from 'node:http'
import type { OpenAI } from 'openai'
import { EmbeddingParams } from '#package/types/index.js'
import { parseJSONRequestBody } from '#package/api/parseJSONRequestBody.js'
import { omitEmptyValues } from '#package/lib/util.js'
import { InferenceServer } from '#package/server.js'
// handler for v1/embeddings
// https://platform.openai.com/docs/api-reference/embeddings
type OpenAIEmbeddingsParams = OpenAI.EmbeddingCreateParams
export function createEmbeddingsHandler(inferenceServer: InferenceServer) {
return async (req: IncomingMessage, res: ServerResponse) => {
let args: OpenAIEmbeddingsParams
try {
const body = await parseJSONRequestBody(req)
args = body
} catch (e) {
console.error(e)
res.writeHead(400, { 'Content-Type': 'application/json' })
res.end(JSON.stringify({ error: 'Invalid request' }))
return
}
// TODO ajv schema validation?
if (!args.model || !args.input) {
res.writeHead(400, { 'Content-Type': 'application/json' })
res.end(JSON.stringify({ error: 'Invalid request' }))
return
}
if (!inferenceServer.modelExists(args.model)) {
res.writeHead(400, { 'Content-Type': 'application/json' })
res.end(JSON.stringify({ error: 'Invalid model' }))
return
}
const controller = new AbortController()
req.on('close', () => {
console.debug('Client closed connection')
controller.abort()
})
req.on('end', () => {
console.debug('Client ended connection')
controller.abort()
})
try {
let input = args.input
if (typeof input !== 'string') {
throw new Error('Input must be a string')
}
const embeddingsReq = omitEmptyValues<EmbeddingParams>({
model: args.model,
input: args.input as string,
})
const { instance, release } = await inferenceServer.requestInstance(
embeddingsReq,
controller.signal,
)
const task = instance.processEmbeddingTask(embeddingsReq)
const result = await task.result
release()
const response: OpenAI.CreateEmbeddingResponse = {
model: instance.modelId,
object: 'list',
data: result.embeddings.map((embedding, index) => ({
embedding: Array.from(embedding),
index,
object: 'embedding',
})),
usage: {
prompt_tokens: result.inputTokens,
total_tokens: result.inputTokens,
},
}
res.writeHead(200, { 'Content-Type': 'application/json' })
res.end(JSON.stringify(response, null, 2))
} catch (err) {
console.error(err)
res.writeHead(500, { 'Content-Type': 'application/json' })
res.end(JSON.stringify({ error: 'Internal server error' }))
}
}
}