llmatic
Version:
Use self-hosted LLMs with an OpenAI compatible API
159 lines (137 loc) • 3.79 kB
text/typescript
/* eslint-disable @typescript-eslint/naming-convention */
import type { Cradle } from "../container.ts";
import type { LlmAdapter } from "../llm-adapter.ts";
import type { OperationHandler } from "../operation-handler.ts";
import type { SseHelper } from "../sse-helper.ts";
import {
type ChoiceMessage,
type CreateChatCompletionOkResponse,
type CreateChatCompletionRequest,
Role,
} from "../types/create-chat-completion.ts";
import type { Choice } from "../types/create-chat-completion.ts";
import type { RouteHandlerMethod } from "fastify";
import shortUUID from "short-uuid";
type Chunk = Choice & {
delta: Partial<ChoiceMessage>;
};
export default class CreateChatCompletionHandler implements OperationHandler {
operationId = "createChatCompletion";
readonly #llmAdapter: LlmAdapter;
readonly #sseHelper: SseHelper;
constructor({ llmAdapter, sseHelper }: Cradle) {
this.#llmAdapter = llmAdapter;
this.#sseHelper = sseHelper;
}
handle: RouteHandlerMethod = async (request, reply) => {
const body = request.body as CreateChatCompletionRequest;
const {
frequency_penalty,
logit_bias,
max_tokens,
messages,
model,
n,
presence_penalty,
stop,
stream,
temperature,
top_p,
} = body;
const abortController = new AbortController();
request.raw.once("close", () => {
if (request.raw.destroyed) {
abortController.abort();
}
});
const id = `chatcmpl-${shortUUID.generate()}`;
const choiceTokens: string[][] = [];
const choices: Choice[] = [];
await this.#llmAdapter.createChatCompletion(
{
messages,
model,
frequencyPenalty: frequency_penalty,
logitBias: logit_bias,
maxTokens: max_tokens,
n,
presencePenalty: presence_penalty,
stop: (Array.isArray(stop) ? stop : [stop].filter(Boolean)) as string[],
temperature,
topP: top_p,
},
abortController.signal,
({ index, delta, finishReason }) => {
if (stream) {
this.#sseHelper.sse(
reply,
this.#createResponseChunk(id, model, {
delta,
index,
}),
);
return;
}
choices[index] ??= {
index,
message: { role: Role.Assistant, content: "" },
};
choices[index].finish_reason = finishReason;
if (delta.role) {
choices[index].message!.role = delta.role;
}
choiceTokens[index] ??= [];
if (delta.content) {
choiceTokens[index].push(delta.content);
}
},
);
if (stream) {
this.#sseHelper.sse(reply, "[DONE]");
reply.raw.end();
return;
}
for (const [index, choice] of choices.entries()) {
if (!choice) {
continue;
}
choice.message!.role = Role.Assistant;
choice.message!.content = choiceTokens[index].join("");
}
const response: CreateChatCompletionOkResponse = {
...this.#createResponse(id, model, choices),
usage: {
completion_tokens: 0,
prompt_tokens: 0,
total_tokens: 0,
},
};
return response;
};
#createResponse(
id: string,
model: string,
choices: Choice[],
): CreateChatCompletionOkResponse {
return {
id,
model,
choices: choices.filter(Boolean),
created: Math.floor(Date.now() / 1000),
object: "chat.completion",
};
}
#createResponseChunk(
id: string,
model: string,
deltaChoice: Chunk,
): CreateChatCompletionOkResponse {
return {
id,
model,
choices: [deltaChoice],
created: Date.now(),
object: "chat.completion.chunk",
};
}
}