node-llama-cpp
Version:
Run AI models locally on your machine with node.js bindings for llama.cpp. Enforce a JSON schema on the model output on the generation level
162 lines • 7.31 kB
JavaScript
import { ChatWrapper } from "../ChatWrapper.js";
import { isChatModelResponseSegment } from "../types.js";
import { LlamaText, SpecialToken, SpecialTokensText } from "../utils/LlamaText.js";
import { GgufArchitectureType } from "../gguf/types/GgufMetadataTypes.js";
import { ChatModelFunctionsDocumentationGenerator } from "./utils/ChatModelFunctionsDocumentationGenerator.js";
// source: https://huggingface.co/Qwen/Qwen2.5-14B-Instruct-1M/blob/main/tokenizer_config.json#L197
export class QwenChatWrapper extends ChatWrapper {
wrapperName = "Qwen";
keepOnlyLastThought;
settings = {
supportsSystemMessages: true,
functions: {
call: {
optionalPrefixSpace: true,
prefix: LlamaText("\n", new SpecialTokensText("<tool_call>"), '\n{"name": "'),
paramsPrefix: '", "arguments": ',
suffix: LlamaText("}\n", new SpecialTokensText("</tool_call>")),
emptyCallParamsPlaceholder: {}
},
result: {
prefix: LlamaText(new SpecialTokensText("\n<tool_response>\n")),
suffix: LlamaText(new SpecialTokensText("\n</tool_response>"))
},
parallelism: {
call: {
sectionPrefix: "",
sectionSuffix: LlamaText(new SpecialTokensText("<|im_end|>\n"))
},
result: {
sectionPrefix: LlamaText(new SpecialTokensText("<|im_start|>user")),
sectionSuffix: LlamaText(new SpecialTokensText("<|im_end|>\n<|im_start|>assistant\n"))
}
}
},
segments: {
reiterateStackAfterFunctionCalls: true,
thought: {
prefix: LlamaText(new SpecialTokensText("<think>")),
suffix: LlamaText(new SpecialTokensText("</think>"))
}
}
};
constructor(options = {}) {
super();
const { keepOnlyLastThought = true } = options;
this.keepOnlyLastThought = keepOnlyLastThought;
}
generateContextState({ chatHistory, availableFunctions, documentFunctionParams }) {
const historyWithFunctions = this.addAvailableFunctionsSystemMessageToHistory(chatHistory, availableFunctions, {
documentParams: documentFunctionParams
});
const resultItems = [];
let systemTexts = [];
let userTexts = [];
let modelTexts = [];
let currentAggregateFocus = null;
function flush() {
if (systemTexts.length > 0 || userTexts.length > 0 || modelTexts.length > 0)
resultItems.push({
system: LlamaText.joinValues("\n\n", systemTexts),
user: LlamaText.joinValues("\n\n", userTexts),
model: LlamaText.joinValues("\n\n", modelTexts)
});
systemTexts = [];
userTexts = [];
modelTexts = [];
}
for (let i = 0; i < historyWithFunctions.length; i++) {
const item = historyWithFunctions[i];
const isLastItem = i === historyWithFunctions.length - 1;
if (item.type === "system") {
if (currentAggregateFocus !== "system")
flush();
currentAggregateFocus = "system";
systemTexts.push(LlamaText.fromJSON(item.text));
}
else if (item.type === "user") {
flush();
currentAggregateFocus = null;
userTexts.push(LlamaText(item.text));
}
else if (item.type === "model") {
flush();
currentAggregateFocus = null;
modelTexts.push(this.generateModelResponseText((this.keepOnlyLastThought && !isLastItem)
? item.response.filter((response) => (!isChatModelResponseSegment(response) || response.segmentType !== "thought"))
: item.response));
}
else
void item;
}
flush();
const contextText = LlamaText(resultItems.map(({ system, user, model }, index) => {
const isLastItem = index === resultItems.length - 1;
return LlamaText([
(system.values.length === 0)
? LlamaText([])
: LlamaText([
new SpecialTokensText("<|im_start|>system\n"),
system,
new SpecialTokensText("<|im_end|>\n")
]),
(user.values.length === 0)
? LlamaText([])
: LlamaText([
new SpecialTokensText("<|im_start|>user\n"),
user,
new SpecialTokensText("<|im_end|>\n")
]),
(model.values.length === 0 && !isLastItem)
? LlamaText([])
: LlamaText([
new SpecialTokensText("<|im_start|>assistant\n"),
model,
isLastItem
? LlamaText([])
: new SpecialTokensText("<|im_end|>\n")
])
]);
}));
return {
contextText,
stopGenerationTriggers: [
LlamaText(new SpecialToken("EOS")),
LlamaText(new SpecialTokensText("<|im_end|>")),
LlamaText("<|im_end|>")
]
};
}
generateAvailableFunctionsSystemText(availableFunctions, { documentParams = true }) {
const functionsDocumentationGenerator = new ChatModelFunctionsDocumentationGenerator(availableFunctions);
if (!functionsDocumentationGenerator.hasAnyFunctions)
return LlamaText([]);
return LlamaText.joinValues("\n", [
"# Tools",
"",
"You may call one or more functions to assist with the user query.",
"",
LlamaText("You are provided with function signatures within ", new SpecialTokensText("<tools></tools>"), " XML tags:"),
LlamaText(new SpecialTokensText("<tools>")),
functionsDocumentationGenerator.getQwenFunctionSignatures({ documentParams }),
LlamaText(new SpecialTokensText("</tools>")),
"",
LlamaText("For each function call, return a json object with function name and arguments within ", new SpecialTokensText("<tool_call></tool_call>"), " XML tags:"),
LlamaText(new SpecialTokensText("<tool_call>")),
'{"name": <function-name>, "arguments": <args-json-object>}',
LlamaText(new SpecialTokensText("</tool_call>"))
]);
}
/** @internal */
static _checkModelCompatibility(options) {
const architecture = options.fileInfo?.metadata.general.architecture;
return architecture == null || architecture === GgufArchitectureType.qwen2;
}
/** @internal */
static _getOptionConfigurationsToTestIfCanSupersedeJinjaTemplate() {
return [
[undefined, {}, { _requireFunctionCallSettingsExtraction: true }]
];
}
}
//# sourceMappingURL=QwenChatWrapper.js.map