node-llama-cpp
Version:
Run AI models locally on your machine with node.js bindings for llama.cpp. Enforce a JSON schema on the model output on the generation level
462 lines • 25.5 kB
JavaScript
import path from "path";
import { fileURLToPath } from "url";
import process from "process";
import os from "os";
import fs from "fs-extra";
import chalk from "chalk";
import which from "which";
import { buildMetadataFileName, documentationPageUrls, llamaCppDirectory, llamaDirectory, llamaLocalBuildBinsDirectory, llamaPrebuiltBinsDirectory, llamaToolchainsDirectory } from "../../config.js";
import { convertBuildOptionsToBuildOptionsJSON } from "../types.js";
import { spawnCommand, SpawnError } from "../../utils/spawnCommand.js";
import { downloadCmakeIfNeeded, fixXpackPermissions, getCmakePath, hasBuiltinCmake } from "../../utils/cmake.js";
import { getConsoleLogPrefix } from "../../utils/getConsoleLogPrefix.js";
import { withLockfile } from "../../utils/withLockfile.js";
import { getModuleVersion } from "../../utils/getModuleVersion.js";
import { ensureLlamaCppRepoIsCloned, isLlamaCppRepoCloned } from "./cloneLlamaCppRepo.js";
import { getBuildFolderNameForBuildOptions } from "./getBuildFolderNameForBuildOptions.js";
import { setLastBuildInfo } from "./lastBuildInfo.js";
import { getPlatform } from "./getPlatform.js";
import { logDistroInstallInstruction } from "./logDistroInstallInstruction.js";
import { testCmakeBinary } from "./testCmakeBinary.js";
import { getCudaNvccPaths } from "./detectAvailableComputeLayers.js";
import { detectWindowsBuildTools } from "./detectBuildTools.js";
import { asyncSome } from "./asyncSome.js";
const __dirname = path.dirname(fileURLToPath(import.meta.url));
const buildConfigType = "Release";
const requiresMsvcOnWindowsFlags = ["blas", "cann", "cuda", "hip", "kompute", "musa", "sycl", "opencl"]
.map((backend) => ("GGML_" + backend.toUpperCase()));
export async function compileLlamaCpp(buildOptions, compileOptions) {
const { nodeTarget = process.version, updateLastBuildInfo: updateLastBuildInfoArg = true, includeBuildOptionsInBinaryFolderName = true, ensureLlamaCppRepoIsCloned: ensureLlamaCppRepoIsClonedArg = false, downloadCmakeIfNeeded: downloadCmakeIfNeededArg = false, ignoreWorkarounds = [], envVars = process.env, ciMode = false } = compileOptions;
const platform = getPlatform();
const buildFolderName = await getBuildFolderNameForBuildOptions(buildOptions);
const finalBuildFolderName = includeBuildOptionsInBinaryFolderName
? buildFolderName.withCustomCmakeOptions
: buildFolderName.withoutCustomCmakeOptions;
const useWindowsLlvm = (platform === "win" &&
(buildOptions.gpu === false ||
(buildOptions.gpu === "vulkan" && buildOptions.arch === "arm64") // Vulkan can't be compiled on Windows x64 with LLVM ATM
) &&
!ignoreWorkarounds.includes("avoidWindowsLlvm") &&
!buildOptions.customCmakeOptions.has("CMAKE_TOOLCHAIN_FILE") &&
!requiresMsvcOnWindowsFlags.some((flag) => buildOptions.customCmakeOptions.has(flag)))
? areWindowsBuildToolsCapableForLlvmBuild(await detectWindowsBuildTools())
: false;
const outDirectory = path.join(llamaLocalBuildBinsDirectory, finalBuildFolderName);
let parallelBuildThreads = getParallelBuildThreadsToUse(platform, buildOptions.gpu, ciMode);
if (ignoreWorkarounds.includes("singleBuildThread"))
parallelBuildThreads = 1;
else if (ignoreWorkarounds.includes("reduceParallelBuildThreads"))
parallelBuildThreads = reduceParallelBuildThreads(parallelBuildThreads);
await fs.mkdirp(llamaLocalBuildBinsDirectory);
try {
await withLockfile({
resourcePath: outDirectory
}, async () => {
try {
if (ensureLlamaCppRepoIsClonedArg)
await ensureLlamaCppRepoIsCloned({ progressLogs: buildOptions.progressLogs });
else if (!(await isLlamaCppRepoCloned()))
throw new Error(`"${llamaCppDirectory}" directory does not exist`);
if (downloadCmakeIfNeededArg)
await downloadCmakeIfNeeded(buildOptions.progressLogs);
const cmakePathArgs = await getCmakePathArgs();
const cmakeGeneratorArgs = getCmakeGeneratorArgs(buildOptions.platform, buildOptions.arch, useWindowsLlvm);
const toolchainFile = await getToolchainFileForArch(buildOptions.arch, useWindowsLlvm);
const runtimeVersion = nodeTarget.startsWith("v") ? nodeTarget.slice("v".length) : nodeTarget;
const cmakeCustomOptions = new Map(buildOptions.customCmakeOptions);
const cmakeToolchainOptions = new Map();
if (!cmakeCustomOptions.has("GGML_BUILD_NUMBER"))
cmakeCustomOptions.set("GGML_BUILD_NUMBER", "1");
cmakeCustomOptions.set("CMAKE_CONFIGURATION_TYPES", buildConfigType);
cmakeCustomOptions.set("NLC_CURRENT_PLATFORM", platform + "-" + process.arch);
cmakeCustomOptions.set("NLC_TARGET_PLATFORM", buildOptions.platform + "-" + buildOptions.arch);
if (toolchainFile != null && !cmakeCustomOptions.has("CMAKE_TOOLCHAIN_FILE"))
cmakeToolchainOptions.set("CMAKE_TOOLCHAIN_FILE", toolchainFile);
if (toolchainFile != null &&
buildOptions.gpu === "vulkan" &&
(useWindowsLlvm || (platform === "win" && buildOptions.arch === "arm64")) &&
!cmakeCustomOptions.has("GGML_VULKAN_SHADERS_GEN_TOOLCHAIN"))
cmakeToolchainOptions.set("GGML_VULKAN_SHADERS_GEN_TOOLCHAIN", toolchainFile);
if (buildOptions.gpu === "metal" && process.platform === "darwin" && !cmakeCustomOptions.has("GGML_METAL"))
cmakeCustomOptions.set("GGML_METAL", "1");
else if (!cmakeCustomOptions.has("GGML_METAL"))
cmakeCustomOptions.set("GGML_METAL", "OFF");
if (buildOptions.gpu === "cuda" && !cmakeCustomOptions.has("GGML_CUDA"))
cmakeCustomOptions.set("GGML_CUDA", "1");
if (buildOptions.gpu === "vulkan" && !cmakeCustomOptions.has("GGML_VULKAN"))
cmakeCustomOptions.set("GGML_VULKAN", "1");
if (!cmakeCustomOptions.has("GGML_CCACHE"))
cmakeCustomOptions.set("GGML_CCACHE", "OFF");
if (buildOptions.platform === "win" && buildOptions.arch === "arm64" && !cmakeCustomOptions.has("GGML_OPENMP"))
cmakeCustomOptions.set("GGML_OPENMP", "OFF");
if (useWindowsLlvm)
cmakeCustomOptions.set("GGML_OPENMP", "OFF");
if (ciMode) {
if (!cmakeCustomOptions.has("GGML_OPENMP"))
cmakeCustomOptions.set("GGML_OPENMP", "OFF");
if (!cmakeCustomOptions.has("GGML_NATIVE") || isCmakeValueOff(cmakeCustomOptions.get("GGML_NATIVE"))) {
cmakeCustomOptions.set("GGML_NATIVE", "OFF");
if (buildOptions.arch === "x64" && !cmakeCustomOptions.has("GGML_CPU_ALL_VARIANTS")) {
cmakeCustomOptions.set("GGML_CPU_ALL_VARIANTS", "ON");
cmakeCustomOptions.set("GGML_BACKEND_DL", "ON");
}
else if (!cmakeCustomOptions.has("GGML_BACKEND_DL"))
cmakeCustomOptions.set("GGML_BACKEND_DL", "ON");
}
}
await fs.remove(outDirectory);
await spawnCommand("npm", [
"run", "-s", "cmake-js-llama", "--", "clean",
"--log-level", "warn",
"--out", path.relative(llamaDirectory, outDirectory),
...cmakePathArgs
], __dirname, envVars, buildOptions.progressLogs);
await spawnCommand("npm", [
"run", "-s", "cmake-js-llama", "--", "compile",
"--log-level", "warn",
"--config", buildConfigType,
"--arch=" + buildOptions.arch,
"--out", path.relative(llamaDirectory, outDirectory),
"--runtime-version=" + runtimeVersion,
"--parallel=" + parallelBuildThreads,
...cmakeGeneratorArgs,
...cmakePathArgs,
...([
...cmakeCustomOptions,
...cmakeToolchainOptions
].map(([key, value]) => "--CD" + key + "=" + value))
], __dirname, envVars, buildOptions.progressLogs);
const compiledResultDirPath = await moveBuildFilesToResultDir(outDirectory);
await fs.writeFile(path.join(compiledResultDirPath, buildMetadataFileName), JSON.stringify({
buildOptions: convertBuildOptionsToBuildOptionsJSON(buildOptions)
}), "utf8");
await fs.writeFile(path.join(outDirectory, "buildDone.status"), "", "utf8");
if (updateLastBuildInfoArg) {
await setLastBuildInfo({
folderName: finalBuildFolderName
});
}
}
finally {
await fixXpackPermissions();
}
});
}
catch (err) {
if (platform === "linux" && await which("make", { nothrow: true }) == null) {
console.info("\n" +
getConsoleLogPrefix(true) +
chalk.yellow('It seems that "make" is not installed in your system. Install it to resolve build issues'));
await logDistroInstallInstruction('To install "make", ', {
linuxPackages: { apt: ["make"], apk: ["make"] },
macOsPackages: { brew: ["make"] }
});
}
else if (platform === "linux" && !(await testCmakeBinary(await getCmakePath()))) {
console.info("\n" +
getConsoleLogPrefix(true) +
chalk.yellow('It seems that the used "cmake" doesn\'t work properly. Install it on your system to resolve build issues'));
await logDistroInstallInstruction('To install "cmake", ', {
linuxPackages: { apt: ["cmake"], apk: ["cmake"] },
macOsPackages: { brew: ["cmake"] }
});
}
else if (platform === "mac" && ((await which("clang", { nothrow: true })) == null || (err instanceof SpawnError &&
err.combinedStd.toLowerCase().includes('"/usr/bin/cc" is not able to compile a simple test program'))))
console.info("\n" +
getConsoleLogPrefix(true) +
chalk.yellow("It seems that Xcode command line tools are not installed in your system. Install it to resolve build issues\n") +
getConsoleLogPrefix(true) +
chalk.yellow('To install Xcode command line tools, run "xcode-select --install"'));
else if (buildOptions.gpu === "cuda") {
if (!ignoreWorkarounds.includes("cudaArchitecture") && (platform === "win" || platform === "linux") &&
err instanceof SpawnError && (err.combinedStd.toLowerCase().includes("Failed to detect a default CUDA architecture".toLowerCase()) || (err.combinedStd.toLowerCase().includes("Tell CMake where to find the compiler by setting either the environment".toLowerCase()) &&
err.combinedStd.toLowerCase().includes('variable "CUDACXX" or the CMake cache entry CMAKE_CUDA_COMPILER to the full'.toLowerCase())))) {
for (const nvccPath of await getCudaNvccPaths()) {
if (buildOptions.progressLogs)
console.info(getConsoleLogPrefix(true) + `Trying to compile again with "CUDACXX=${nvccPath}" environment variable`);
try {
return await compileLlamaCpp(buildOptions, {
...compileOptions,
envVars: {
...envVars,
CUDACXX: nvccPath
},
ignoreWorkarounds: [...ignoreWorkarounds, "cudaArchitecture"]
});
}
catch (err) {
if (buildOptions.progressLogs)
console.error(getConsoleLogPrefix(true, false), err);
}
}
}
else if ((!ignoreWorkarounds.includes("reduceParallelBuildThreads") || !ignoreWorkarounds.includes("singleBuildThread")) &&
(platform === "win" || platform === "linux") &&
err instanceof SpawnError &&
reduceParallelBuildThreads(parallelBuildThreads) !== parallelBuildThreads && (err.combinedStd.toLowerCase().includes("LLVM error : out of memory".toLowerCase()) ||
err.combinedStd.toLowerCase().includes("compiler is out of heap space".toLowerCase()))) {
if (buildOptions.progressLogs) {
if (ignoreWorkarounds.includes("reduceParallelBuildThreads"))
console.info(getConsoleLogPrefix(true) + "Trying to compile again with a single build thread");
else
console.info(getConsoleLogPrefix(true) + "Trying to compile again with reduced parallel build threads");
}
try {
return await compileLlamaCpp(buildOptions, {
...compileOptions,
ignoreWorkarounds: [
...ignoreWorkarounds,
ignoreWorkarounds.includes("reduceParallelBuildThreads")
? "singleBuildThread"
: "reduceParallelBuildThreads"
]
});
}
catch (err) {
if (buildOptions.progressLogs)
console.error(getConsoleLogPrefix(true, false), err);
}
}
console.info("\n" +
getConsoleLogPrefix(true) +
chalk.yellow("To resolve errors related to CUDA compilation, see the CUDA guide: ") +
documentationPageUrls.CUDA);
}
else if (buildOptions.gpu === "vulkan")
console.info("\n" +
getConsoleLogPrefix(true) +
chalk.yellow("To resolve errors related to Vulkan compilation, see the Vulkan guide: ") +
documentationPageUrls.Vulkan);
else if (useWindowsLlvm && !ciMode) {
if (buildOptions.progressLogs)
console.info(getConsoleLogPrefix(true) + "Trying to compile again without LLVM");
try {
return await compileLlamaCpp(buildOptions, {
...compileOptions,
ignoreWorkarounds: [...ignoreWorkarounds, "avoidWindowsLlvm"]
});
}
catch (err) {
if (buildOptions.progressLogs)
console.error(getConsoleLogPrefix(true, false), err);
}
}
throw err;
}
}
export async function getLocalBuildBinaryPath(folderName) {
const binaryPath = path.join(llamaLocalBuildBinsDirectory, folderName, buildConfigType, "llama-addon.node");
const buildMetadataFilePath = path.join(llamaLocalBuildBinsDirectory, folderName, buildConfigType, buildMetadataFileName);
const buildDoneStatusPath = path.join(llamaLocalBuildBinsDirectory, folderName, "buildDone.status");
const [binaryExists, buildMetadataExists, buildDoneStatusExists] = await Promise.all([
fs.pathExists(binaryPath),
fs.pathExists(buildMetadataFilePath),
fs.pathExists(buildDoneStatusPath)
]);
if (binaryExists && buildMetadataExists && buildDoneStatusExists)
return binaryPath;
return null;
}
export async function getLocalBuildBinaryBuildMetadata(folderName) {
const buildMetadataFilePath = path.join(llamaLocalBuildBinsDirectory, folderName, buildConfigType, buildMetadataFileName);
if (!(await fs.pathExists(buildMetadataFilePath)))
throw new Error(`Could not find build metadata file for local build "${folderName}"`);
const buildMetadata = await fs.readJson(buildMetadataFilePath);
return buildMetadata;
}
export async function getPrebuiltBinaryPath(buildOptions, folderName) {
const localPrebuiltBinaryDirectoryPath = path.join(llamaPrebuiltBinsDirectory, folderName);
const binaryPath = await resolvePrebuiltBinaryPath(localPrebuiltBinaryDirectoryPath);
if (binaryPath != null)
return {
binaryPath,
folderName,
folderPath: localPrebuiltBinaryDirectoryPath
};
const packagePrebuiltBinariesDirectoryPath = await getPrebuiltBinariesPackageDirectoryForBuildOptions(buildOptions);
if (packagePrebuiltBinariesDirectoryPath == null)
return null;
const packagePrebuiltBinaryDirectoryPath = path.join(packagePrebuiltBinariesDirectoryPath, folderName);
const binaryPathFromPackage = await resolvePrebuiltBinaryPath(packagePrebuiltBinaryDirectoryPath);
if (binaryPathFromPackage != null)
return {
binaryPath: binaryPathFromPackage,
folderName,
folderPath: packagePrebuiltBinaryDirectoryPath
};
return null;
}
export async function getPrebuiltBinaryBuildMetadata(folderPath, folderName) {
const buildMetadataFilePath = path.join(folderPath, buildMetadataFileName);
if (!(await fs.pathExists(buildMetadataFilePath)))
throw new Error(`Could not find build metadata file for prebuilt build "${folderName}"`);
const buildMetadata = await fs.readJson(buildMetadataFilePath);
return buildMetadata;
}
async function moveBuildFilesToResultDir(outDirectory, canCreateReleaseDir = false) {
const binFilesDirPaths = [
path.join(outDirectory, "bin"),
path.join(outDirectory, "llama.cpp", "bin")
];
const compiledResultDirPath = path.join(outDirectory, buildConfigType);
if (!await fs.pathExists(compiledResultDirPath)) {
if (canCreateReleaseDir) {
if (await asyncSome(binFilesDirPaths.map((dirPath) => fs.pathExists(dirPath))))
await fs.ensureDir(compiledResultDirPath);
else
throw new Error(`Could not find ${buildConfigType} directory or any other output directory`);
}
else
throw new Error(`Could not find ${buildConfigType} directory`);
}
for (const binFilesDirPath of binFilesDirPaths) {
if (await fs.pathExists(binFilesDirPath)) {
const itemNames = await fs.readdir(binFilesDirPath);
await Promise.all(itemNames.map((itemName) => (fs.copy(path.join(binFilesDirPath, itemName), path.join(compiledResultDirPath, itemName), {
overwrite: false
}))));
}
}
await applyResultDirFixes(compiledResultDirPath, path.join(outDirectory, "_temp"));
return compiledResultDirPath;
}
async function applyResultDirFixes(resultDirPath, tempDirPath) {
const releaseDirPath = path.join(resultDirPath, buildConfigType);
if (await fs.pathExists(releaseDirPath)) {
await fs.remove(tempDirPath);
await fs.move(releaseDirPath, tempDirPath);
const itemNames = await fs.readdir(tempDirPath);
await Promise.all(itemNames.map((itemName) => (fs.move(path.join(tempDirPath, itemName), path.join(resultDirPath, itemName), {
overwrite: true
}))));
await fs.remove(tempDirPath);
}
}
async function resolvePrebuiltBinaryPath(prebuiltBinaryDirectoryPath) {
const binaryPath = path.join(prebuiltBinaryDirectoryPath, "llama-addon.node");
const buildMetadataFilePath = path.join(prebuiltBinaryDirectoryPath, buildMetadataFileName);
const [binaryExists, buildMetadataExists] = await Promise.all([
fs.pathExists(binaryPath),
fs.pathExists(buildMetadataFilePath)
]);
if (binaryExists && buildMetadataExists)
return binaryPath;
return null;
}
function getPrebuiltBinariesPackageDirectoryForBuildOptions(buildOptions) {
async function getBinariesPathFromModules(moduleImport) {
try {
const [binariesModule, currentModuleVersion] = await Promise.all([
moduleImport(),
getModuleVersion()
]);
const { binsDir, packageVersion } = binariesModule?.getBinsDir?.() ?? {};
if (binsDir == null || packageVersion !== currentModuleVersion)
return null;
return binsDir;
}
catch (err) {
return null;
}
}
/* eslint-disable import/no-unresolved */
if (buildOptions.platform === "mac") {
if (buildOptions.arch === "arm64" && buildOptions.gpu === "metal")
// @ts-ignore
return getBinariesPathFromModules(() => import("@node-llama-cpp/mac-arm64-metal"));
else if (buildOptions.arch === "x64" && buildOptions.gpu === false)
// @ts-ignore
return getBinariesPathFromModules(() => import("@node-llama-cpp/mac-x64"));
}
else if (buildOptions.platform === "linux") {
if (buildOptions.arch === "x64") {
if (buildOptions.gpu === "cuda")
// @ts-ignore
return getBinariesPathFromModules(() => import("@node-llama-cpp/linux-x64-cuda"));
else if (buildOptions.gpu === "vulkan")
// @ts-ignore
return getBinariesPathFromModules(() => import("@node-llama-cpp/linux-x64-vulkan"));
else if (buildOptions.gpu === false)
// @ts-ignore
return getBinariesPathFromModules(() => import("@node-llama-cpp/linux-x64"));
}
else if (buildOptions.arch === "arm64")
// @ts-ignore
return getBinariesPathFromModules(() => import("@node-llama-cpp/linux-arm64"));
else if (buildOptions.arch === "arm")
// @ts-ignore
return getBinariesPathFromModules(() => import("@node-llama-cpp/linux-armv7l"));
}
else if (buildOptions.platform === "win") {
if (buildOptions.arch === "x64") {
if (buildOptions.gpu === "cuda")
// @ts-ignore
return getBinariesPathFromModules(() => import("@node-llama-cpp/win-x64-cuda"));
else if (buildOptions.gpu === "vulkan")
// @ts-ignore
return getBinariesPathFromModules(() => import("@node-llama-cpp/win-x64-vulkan"));
else if (buildOptions.gpu === false)
// @ts-ignore
return getBinariesPathFromModules(() => import("@node-llama-cpp/win-x64"));
}
else if (buildOptions.arch === "arm64")
// @ts-ignore
return getBinariesPathFromModules(() => import("@node-llama-cpp/win-arm64"));
}
/* eslint-enable import/no-unresolved */
return null;
}
async function getCmakePathArgs() {
if (await hasBuiltinCmake())
return [];
const cmakePath = await getCmakePath();
if (cmakePath == null)
return [];
return ["--cmake-path", cmakePath];
}
async function getToolchainFileForArch(targetArch, windowsLlvmSupport = false) {
let toolchainPrefix = "";
if (process.platform === "win32" && process.arch === "arm64") {
// a toolchain is needed to cross-compile to arm64 on Windows, and to compile on arm64 on Windows
}
else if (process.platform === "win32" && process.arch === "x64" && targetArch === "x64" && windowsLlvmSupport) {
toolchainPrefix = "llvm.";
}
else if (process.arch === targetArch)
return null;
const platform = process.platform;
const hostArch = process.arch;
const toolchainFilename = `${toolchainPrefix}${platform}.host-${hostArch}.target-${targetArch}.cmake`;
const filePath = path.join(llamaToolchainsDirectory, toolchainFilename);
if (await fs.pathExists(filePath))
return path.resolve(filePath);
return null;
}
function getCmakeGeneratorArgs(targetPlatform, targetArch, windowsLlvmSupport) {
if (targetPlatform === "win" && targetArch === "arm64")
return ["--generator", "Ninja Multi-Config"];
else if (windowsLlvmSupport && targetPlatform === "win" && process.arch === "x64" && targetArch === "x64")
return ["--generator", "Ninja Multi-Config"];
return [];
}
function getParallelBuildThreadsToUse(platform, gpu, ciMode = false) {
const cpuCount = os.cpus().length;
if (ciMode && platform === "win" && gpu === "cuda" && cpuCount === 4)
return 3; // workaround for `compiler is out of heap space` error on GitHub Actions on Windows when building with CUDA
if (cpuCount <= 4)
return cpuCount;
if (platform === "mac" && process.arch === "arm64")
return cpuCount - 1;
return cpuCount - 2;
}
function reduceParallelBuildThreads(originalParallelBuildThreads) {
return Math.max(1, Math.round(originalParallelBuildThreads / 2));
}
function isCmakeValueOff(value) {
return value === "OFF" || value === "0";
}
function areWindowsBuildToolsCapableForLlvmBuild(detectedBuildTools) {
return detectedBuildTools.hasLlvm && detectedBuildTools.hasNinja && detectedBuildTools.hasLibExe;
}
//# sourceMappingURL=compileLLamaCpp.js.map