UNPKG

@socketsupply/socket

Version:

A Cross-Platform, Native Runtime for Desktop and Mobile Apps — Create apps using HTML, CSS, and JavaScript. Written from the ground up to be small and maintainable.

github.com/socketsupply/socket

socketsupply/socket

200 lines (181 loc) • 8.66 kB

JavaScript

// @ts-check /** * @module ai * * Provides high level classes for common AI tasks. * * If you download a model like `mistral-7b-openorca.Q4_0.gguf` from Hugging * Face, you can construct in JavaScript with a prompt. Prompt syntax isn't * concrete like programming syntax, so you'll usually want to know what the * author has to say about prompting, for example this might be worth reading... * * https://docs.mistral.ai/guides/prompting_capabilities * * Example usage: * * ```js * import { LLM } from 'socket:ai' * * const llm = new LLM({ * path: 'model.gguf', * prompt: '...' // insert your prompt here. * }) * * llm.on('end', () => { * // end of the token stream. * }) * * llm.on('data', data => { * // a new token has arrived in the token stream. * }) * ``` */ import { EventEmitter } from './events.js' import { rand64 } from './crypto.js' import process from './process.js' import ipc from './ipc.js' import gc from './gc.js' import * as exports from './ai.js' /** * A class to interact with large language models (using llama.cpp) */ export class LLM extends EventEmitter { /** * Constructs an LLM instance. Each parameter is designed to configure and control * the behavior of the underlying large language model provided by llama.cpp. * @param {Object} options - Configuration options for the LLM instance. * @param {string} options.path - The file path to the model in .gguf format. This model file contains * the weights and configuration necessary for initializing the language model. * @param {string} options.prompt - The initial input text to the model, setting the context or query * for generating responses. The model uses this as a starting point for text generation. * @param {string} [options.id] - An optional unique identifier for this specific instance of the model, * useful for tracking or referencing the model in multi-model setups. * @param {number} [options.n_ctx=1024] - Specifies the maximum number of tokens that the model can consider * for a single query. This is crucial for managing memory and computational * efficiency. Exceeding the model's configuration may lead to errors or truncated outputs. * @param {number} [options.n_threads=8] - The number of threads allocated for the model's computation, * affecting performance and speed of response generation. * @param {number} [options.temp=1.1] - Sampling temperature controls the randomness of predictions. * Higher values increase diversity, potentially at the cost of coherence. * @param {number} [options.max_tokens=512] - The upper limit on the number of tokens that the model can generate * in response to a single prompt. This prevents runaway generations. * @param {number} [options.n_gpu_layers=32] - The number of GPU layers dedicated to the model processing. * More layers can increase accuracy and complexity of the outputs. * @param {number} [options.n_keep=0] - Determines how many of the top generated responses are retained after * the initial generation phase. Useful for models that generate multiple outputs. * @param {number} [options.n_batch=0] - The size of processing batches. Larger batch sizes can reduce * the time per token generation by parallelizing computations. * @param {number} [options.n_predict=0] - Specifies how many forward predictions the model should make * from the current state. This can pre-generate responses or calculate probabilities. * @param {number} [options.grp_attn_n=0] - Group attention parameter 'N' modifies how attention mechanisms * within the model are grouped and interact, affecting the model’s focus and accuracy. * @param {number} [options.grp_attn_w=0] - Group attention parameter 'W' adjusts the width of each attention group, * influencing the breadth of context considered by each attention group. * @param {number} [options.seed=0] - A seed for the random number generator used in the model. Setting this ensures * consistent results in model outputs, important for reproducibility in experiments. * @param {number} [options.top_k=0] - Limits the model's output choices to the top 'k' most probable next words, * reducing the risk of less likely, potentially nonsensical outputs. * @param {number} [options.tok_p=0.0] - Top-p (nucleus) sampling threshold, filtering the token selection pool * to only those whose cumulative probability exceeds this value, enhancing output relevance. * @param {number} [options.min_p=0.0] - Sets a minimum probability filter for token generation, ensuring * that generated tokens have at least this likelihood of being relevant or coherent. * @param {number} [options.tfs_z=0.0] - Temperature factor scale for zero-shot learning scenarios, adjusting how * the model weights novel or unseen prompts during generation. * @throws {Error} Throws an error if the model path is not provided, as the model cannot initialize without it. */ constructor (options = null) { super() options = { ...options } if (!options.path) { throw new Error('expected a path to a valid model (.gguf)') } this.path = options.path this.prompt = options.prompt this.id = options.id || rand64() const opts = { id: this.id, path: this.path, prompt: this.prompt, // @ts-ignore antiprompt: options.antiprompt, // @ts-ignore conversation: options.conversation === true, // @ts-ignore chatml: options.chatml === true, // @ts-ignore instruct: options.instruct === true, n_ctx: options.n_ctx || 1024, // simplified, assuming default value of 1024 if not specified n_threads: options.n_threads || 8, temp: options.temp || 1.1, // assuming `temp` should be a number, not a string max_tokens: options.max_tokens || 512, n_gpu_layers: options.n_gpu_layers || 32, n_keep: options.n_keep || 0, n_batch: options.n_batch || 0, n_predict: options.n_predict || 0, grp_attn_n: options.grp_attn_n || 0, grp_attn_w: options.grp_attn_w || 0, seed: options.seed || 0, top_k: options.top_k || 0, tok_p: options.tok_p || 0.0, min_p: options.min_p || 0.0, tfs_z: options.tfs_z || 0.0 } globalThis.addEventListener('data', event => { // @ts-ignore const detail = event.detail const { err, data, source } = detail.params if (err && BigInt(err.id) === this.id) { return this.emit('error', err) } if (!data || BigInt(data.id) !== this.id) return if (source === 'ai.llm.log') { this.emit('log', data.message) return } if (source === 'ai.llm.chat') { if (data.complete) { return this.emit('end') } this.emit('data', decodeURIComponent(data.token)) } }) ipc.request('ai.llm.create', opts) .then((result) => { if (result.err) { this.emit('error', result.err) } }, (err) => { this.emit('error', err) }) } /** * Tell the LLM to stop after the next token. * @returns {Promise<void>} A promise that resolves when the LLM stops. */ async stop () { return await ipc.request('ai.llm.stop', { id: this.id }) } /** * @ignore */ [gc.finalizer] (options) { return { args: [this.id, options], async handle (id) { if (process.env.DEBUG) { console.warn('Closing LLM on garbage collection') } await ipc.request('ai.llm.destroy', { id }, options) } } } /** * Send a message to the chat. * @param {string} message - The message to send to the chat. * @returns {Promise<any>} A promise that resolves with the response from the chat. */ async chat (message) { return await ipc.request('ai.llm.chat', { id: this.id, message }) } } export default exports