node-llama-cpp

Version:

Run AI models locally on your machine with node.js bindings for llama.cpp. Enforce a JSON schema on the model output on the generation level

node-llama-cpp.withcat.ai

withcatai/node-llama-cpp

101 lines (100 loc) • 4.4 kB

TypeScript

View Raw

100

101

import { EventRelay } from "lifecycle-utils"; import { LlamaModel, LlamaModelOptions } from "../evaluator/LlamaModel/LlamaModel.js"; import { GbnfJsonSchema } from "../utils/gbnfJson/types.js"; import { LlamaJsonSchemaGrammar } from "../evaluator/LlamaJsonSchemaGrammar.js"; import { LlamaGrammar, LlamaGrammarOptions } from "../evaluator/LlamaGrammar.js"; import { LlamaClasses } from "../utils/getLlamaClasses.js"; import { LlamaGpuType, LlamaLogLevel } from "./types.js"; export declare const LlamaLogLevelToAddonLogLevel: ReadonlyMap<LlamaLogLevel, number>; export declare class Llama { private _classes?; readonly onDispose: EventRelay<void>; private constructor(); dispose(): Promise<void>; /** @hidden */ [Symbol.asyncDispose](): Promise<void>; get disposed(): boolean; get classes(): LlamaClasses; get gpu(): LlamaGpuType; get supportsGpuOffloading(): boolean; get supportsMmap(): boolean; get gpuSupportsMmap(): boolean; get supportsMlock(): boolean; /** The number of CPU cores that are useful for math */ get cpuMathCores(): number; /** * The maximum number of threads that can be used by the Llama instance. * * If set to `0`, the Llama instance will have no limit on the number of threads. * * See the `maxThreads` option of `getLlama` for more information. */ get maxThreads(): number; set maxThreads(value: number); get logLevel(): LlamaLogLevel; set logLevel(value: LlamaLogLevel); get logger(): (level: LlamaLogLevel, message: string) => void; set logger(value: (level: LlamaLogLevel, message: string) => void); get buildType(): "localBuild" | "prebuilt"; get cmakeOptions(): Readonly<Record<string, string>>; get llamaCppRelease(): { readonly repo: string; readonly release: string; }; get systemInfo(): string; /** * VRAM padding used for memory size calculations, as these calculations are not always accurate. * This is set by default to ensure stability, but can be configured when you call `getLlama`. * * See `vramPadding` on `getLlama` for more information. */ get vramPaddingSize(): number; /** * The total amount of VRAM that is currently being used. * * `unifiedSize` represents the amount of VRAM that is shared between the CPU and GPU. * On SoC devices, this is usually the same as `total`. */ getVramState(): Promise<{ total: number; used: number; free: number; unifiedSize: number; }>; /** * Get the state of the swap memory. * * **`maxSize`** - The maximum size of the swap memory that the system can allocate. * If the swap size is dynamic (like on macOS), this will be `Infinity`. * * **`allocated`** - The total size allocated by the system for swap memory. * * **`used`** - The amount of swap memory that is currently being used from the `allocated` size. * * On Windows, this will return the info for the page file. */ getSwapState(): Promise<{ /** * The maximum size of the swap memory that the system can allocate. * If the swap size is dynamic (like on macOS), this will be `Infinity` */ maxSize: number; /** The total size allocated by the system for swap memory */ allocated: number; /** The amount of swap memory that is currently being used from the `allocated` size */ used: number; }>; getGpuDeviceNames(): Promise<string[]>; loadModel(options: LlamaModelOptions): Promise<LlamaModel>; /** * @see [Using a JSON Schema Grammar](https://node-llama-cpp.withcat.ai/guide/grammar#json-schema) tutorial * @see [Reducing Hallucinations When Using JSON Schema Grammar](https://node-llama-cpp.withcat.ai/guide/grammar#reducing-json-schema-hallucinations) tutorial */ createGrammarForJsonSchema<const T extends GbnfJsonSchema>(schema: Readonly<T>): Promise<LlamaJsonSchemaGrammar<T>>; getGrammarFor(type: Parameters<typeof LlamaGrammar.getFor>[1]): Promise<LlamaGrammar>; /** * @see [Using Grammar](https://node-llama-cpp.withcat.ai/guide/grammar) tutorial */ createGrammar(options: LlamaGrammarOptions): Promise<LlamaGrammar>; static defaultConsoleLogger(level: LlamaLogLevel, message: string): void; }