UNPKG

node-llama-cpp

Version:

Run AI models locally on your machine with node.js bindings for llama.cpp. Enforce a JSON schema on the model output on the generation level

290 lines 13.6 kB
import { ChatWrapper } from "../ChatWrapper.js"; import { SpecialToken, LlamaText, SpecialTokensText } from "../utils/LlamaText.js"; import { ChatModelFunctionsDocumentationGenerator } from "./utils/ChatModelFunctionsDocumentationGenerator.js"; import { jsonDumps } from "./utils/jsonDumps.js"; import { isLlama3_2LightweightModel } from "./utils/isLlama3_2LightweightModel.js"; // source: https://llama.meta.com/docs/model-cards-and-prompt-formats/llama3_1 export class Llama3_1ChatWrapper extends ChatWrapper { wrapperName = "Llama 3.1"; cuttingKnowledgeDate; todayDate; noToolInstructions; /** @internal */ _specialTokensTextForPreamble; settings = { supportsSystemMessages: true, functions: { call: { optionalPrefixSpace: true, prefix: LlamaText(new SpecialTokensText("<function=")), paramsPrefix: LlamaText(new SpecialTokensText(">")), suffix: LlamaText(new SpecialTokensText("</function><|eom_id|>")) }, result: { prefix: LlamaText(new SpecialTokensText("\n<|start_header_id|>ipython<|end_header_id|>\n\n")), suffix: LlamaText(new SpecialToken("EOT"), new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|>\n\n")) } } }; constructor(options = {}) { super(); const { cuttingKnowledgeDate = new Date("2023-12-01T00:00:00Z"), todayDate = () => new Date(), noToolInstructions = false, _specialTokensTextForPreamble = false } = options; this.cuttingKnowledgeDate = cuttingKnowledgeDate == null ? null : cuttingKnowledgeDate instanceof Function ? cuttingKnowledgeDate : new Date(cuttingKnowledgeDate); this.todayDate = todayDate == null ? null : todayDate instanceof Function ? todayDate : new Date(todayDate); this.noToolInstructions = noToolInstructions; this._specialTokensTextForPreamble = _specialTokensTextForPreamble; } addAvailableFunctionsSystemMessageToHistory(history, availableFunctions, { documentParams = true } = {}) { const availableFunctionNames = Object.keys(availableFunctions ?? {}); if (availableFunctions == null || availableFunctionNames.length === 0) return history; const res = history.slice(); const functionsSystemMessage = { type: "system", text: this.generateAvailableFunctionsSystemText(availableFunctions, { documentParams }).toJSON() }; if (res.length >= 2 && res[0].type === "system" && res[1].type === "system") res.splice(1, 0, functionsSystemMessage); else res.unshift({ type: "system", text: this.generateAvailableFunctionsSystemText(availableFunctions, { documentParams }).toJSON() }); return res; } generateContextState({ chatHistory, availableFunctions, documentFunctionParams }) { const chatHistoryWithPreamble = this.prependPreambleToChatHistory(chatHistory); const historyWithFunctions = this.addAvailableFunctionsSystemMessageToHistory(chatHistoryWithPreamble, availableFunctions, { documentParams: documentFunctionParams }); const resultItems = []; let systemTexts = []; let userTexts = []; let modelTexts = []; let currentAggregateFocus = null; const flush = () => { if (systemTexts.length > 0 || userTexts.length > 0 || modelTexts.length > 0) resultItems.push({ system: systemTexts.length === 0 ? null : LlamaText.joinValues(resultItems.length === 0 && this._specialTokensTextForPreamble ? LlamaText(new SpecialTokensText("\n\n")) : "\n\n", systemTexts), user: userTexts.length === 0 ? null : LlamaText.joinValues("\n\n", userTexts), model: modelTexts.length === 0 ? null : LlamaText.joinValues("\n\n", modelTexts) }); systemTexts = []; userTexts = []; modelTexts = []; }; for (const item of historyWithFunctions) { if (item.type === "system") { if (currentAggregateFocus !== "system") flush(); currentAggregateFocus = "system"; systemTexts.push(LlamaText.fromJSON(item.text)); } else if (item.type === "user") { if (currentAggregateFocus !== "user") flush(); currentAggregateFocus = "user"; userTexts.push(LlamaText(item.text)); } else if (item.type === "model") { if (currentAggregateFocus !== "model") flush(); currentAggregateFocus = "model"; modelTexts.push(this.generateModelResponseText(item.response)); } else void item; } flush(); const contextText = LlamaText(new SpecialToken("BOS"), resultItems.map((item, index) => { const isLastItem = index === resultItems.length - 1; const res = []; if (item.system != null) { res.push(LlamaText([ new SpecialTokensText("<|start_header_id|>system<|end_header_id|>\n\n"), item.system, new SpecialToken("EOT") ])); } if (item.user != null) { res.push(LlamaText([ new SpecialTokensText("<|start_header_id|>user<|end_header_id|>\n\n"), item.user, new SpecialToken("EOT") ])); } if (item.model != null) { res.push(LlamaText([ new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|>\n\n"), item.model, isLastItem ? LlamaText([]) : new SpecialToken("EOT") ])); } return LlamaText(res); })); return { contextText, stopGenerationTriggers: [ LlamaText(new SpecialToken("EOS")), LlamaText(new SpecialToken("EOT")), LlamaText(new SpecialTokensText("<|eot_id|>")), LlamaText(new SpecialTokensText("<|end_of_text|>")), LlamaText("<|eot_id|>"), LlamaText("<|end_of_text|>") ] }; } generateAvailableFunctionsSystemText(availableFunctions, { documentParams = true }) { const functionsDocumentationGenerator = new ChatModelFunctionsDocumentationGenerator(availableFunctions); if (!functionsDocumentationGenerator.hasAnyFunctions) return LlamaText([]); return LlamaText.joinValues("\n", [ "You have access to the following functions:", "", functionsDocumentationGenerator.getLlama3_1FunctionSignatures({ documentParams }), "", "", "If you choose to call a function ONLY reply in the following format:", "<{start_tag}={function_name}>{parameters}{end_tag}", "where", "", "start_tag => `<function`", "parameters => a JSON dict with the function argument name as key and function argument value as value.", "end_tag => `</function>`", "", "Here is an example,", LlamaText([ new SpecialTokensText("<function="), "example_function_name", new SpecialTokensText(">"), jsonDumps({ "example_name": "example_value" }), new SpecialTokensText("</function>") ]), "", "Reminder:", "- Function calls MUST follow the specified format", "- Only call one function at a time", "- Put the entire function call reply on one line", "- Always add your sources when using search results to answer the user query", "- After calling a function, the result will appear afterwards and is only visible to you", "- To make information visible to the user, you must include it in your response", "- Do not tell the user about the functions you are using", "- Only call functions when needed" ]); } prependPreambleToChatHistory(chatHistory) { const res = chatHistory.slice(); const formatMonthDate = (date, timezone) => { const today = this.todayDate instanceof Function ? this.todayDate() : (this.todayDate ?? new Date()); if (today.getUTCMonth() === date.getUTCMonth() && today.getUTCFullYear() === date.getUTCFullYear()) return formatDate(date, timezone); const month = date.toLocaleDateString("en-US", { month: "long", timeZone: timezone }); const year = date.toLocaleDateString("en-US", { year: "numeric", timeZone: timezone }); return `${month} ${year}`; }; const lines = []; if (this.cuttingKnowledgeDate != null) { const date = this.cuttingKnowledgeDate instanceof Function ? this.cuttingKnowledgeDate() : this.cuttingKnowledgeDate; lines.push(`Cutting Knowledge Date: ${formatMonthDate(date, "UTC")}`); } if (this.todayDate != null) { const date = this.todayDate instanceof Function ? this.todayDate() : this.todayDate; lines.push(`Today Date: ${formatDate(date, undefined)}`); } if (!this.noToolInstructions) { if (lines.length > 0) lines.push(""); lines.push("# Tool Instructions"); lines.push("- When looking for real time information use relevant functions if available"); lines.push(""); lines.push(""); } if (lines.length > 0) res.unshift({ type: "system", text: this._specialTokensTextForPreamble ? LlamaText(new SpecialTokensText(lines.join("\n"))).toJSON() : LlamaText.joinValues("\n", lines).toJSON() }); return res; } /** @internal */ static _checkModelCompatibility(options) { if (options.tokenizer != null) { const tokens = options.tokenizer("<|eom_id|>", true, "trimLeadingSpace"); return tokens.length === 1 && options.tokenizer.isSpecialToken(tokens[0]) && !isLlama3_2LightweightModel(options); } return !isLlama3_2LightweightModel(options); } /** @internal */ static _getOptionConfigurationsToTestIfCanSupersedeJinjaTemplate() { return [ [{}, undefined, { functionCallMessageTemplate: "noJinja" }], [{ todayDate: null }, {}, { functionCallMessageTemplate: "noJinja" }], [{ cuttingKnowledgeDate: null }, {}, { functionCallMessageTemplate: "noJinja" }], [{ noToolInstructions: true }, {}, { functionCallMessageTemplate: "noJinja" }], [{ todayDate: null, cuttingKnowledgeDate: null }, {}, { functionCallMessageTemplate: "noJinja" }], [{ todayDate: null, cuttingKnowledgeDate: null, noToolInstructions: true }, {}, { functionCallMessageTemplate: "noJinja" }], [ { todayDate: new Date("2024-07-26T00:00:00"), cuttingKnowledgeDate: null, noToolInstructions: true }, {}, { functionCallMessageTemplate: "noJinja" } ], [ { todayDate: new Date("2024-07-26T00:00:00"), cuttingKnowledgeDate: new Date("2023-12-01T00:00:00Z"), noToolInstructions: true }, { cuttingKnowledgeDate: new Date("2023-12-01T00:00:00Z") }, { additionalRenderParameters: { "date_string": formatDate(new Date("2024-07-26T00:00:00"), undefined) }, functionCallMessageTemplate: "noJinja" } ], [ { todayDate: new Date("2024-07-26T00:00:00"), cuttingKnowledgeDate: new Date("2023-12-01T00:00:00Z"), noToolInstructions: true, _specialTokensTextForPreamble: true }, { cuttingKnowledgeDate: new Date("2023-12-01T00:00:00Z") }, { additionalRenderParameters: { "date_string": formatDate(new Date("2024-07-26T00:00:00"), undefined) }, functionCallMessageTemplate: "noJinja" } ] ]; } } function formatDate(date, timezone) { const day = date.toLocaleDateString("en-US", { day: "numeric", timeZone: timezone }); const month = date.toLocaleDateString("en-US", { month: "short", timeZone: timezone }); const year = date.toLocaleDateString("en-US", { year: "numeric", timeZone: timezone }); return `${day} ${month} ${year}`; } //# sourceMappingURL=Llama3_1ChatWrapper.js.map