rawi

Version:

Rawi (راوي) is the developer-friendly AI CLI that brings the power of 11 major AI providers directly to your terminal. With seamless shell integration, persistent conversations, and 200+ specialized prompt templates, Rawi transforms your command line into

rawi.mkabumattar.com

withrawi/rawi

1 lines • 4.67 kB

Source Map (JSON)

View Raw

{"version":3,"sources":["/home/mkabumattar/work/withrawi/rawi/dist/chunk-RYVLVSXY.cjs","../src/libs/providers/ollama/ollama-chat-settings.ts"],"names":["ollamaModelIds"],"mappings":"AAAA;AACA,wDAAwC,ICM3BA,CAAAA,CAAiBA,mBAAAA,CAAAA,cAAAA;ADL9B","file":"/home/mkabumattar/work/withrawi/rawi/dist/chunk-RYVLVSXY.cjs","sourcesContent":[null,"import type {OllamaChatModelId as _OllamaChatModelId} from './ollama-models-list.js';\nimport {ollamaModelIds as _ollamaModelIds} from './ollama-models-list.js';\n\nexport type OllamaChatModelId =\n | _OllamaChatModelId\n | (string & NonNullable<unknown>);\n\nexport const ollamaModelIds = _ollamaModelIds;\n\nexport interface OllamaChatSettings {\n /**\n * Until Ollama officially supports tool calling in streams, the provider can try to detect function calls. Enabled by\n * default to maintain backward compatibility, disable it if you encounter any issues.\n *\n * @deprecated Use `simulateStreaming` instead.\n */\n experimentalStreamTools?: boolean;\n\n /**\n * Enables the use of half-precision floating point values for key-value memory. This helps in optimizing memory usage. (Default: true)\n */\n f16Kv?: boolean;\n\n /**\n * If set to true, reduces the VRAM usage by trading off speed for memory. (Default: false)\n */\n lowVram?: boolean;\n\n /**\n * Sets which GPU is the main one.\n */\n mainGpu?: number;\n\n /**\n * Minimum cumulative probability for tokens to be considered. (Default: 0.0)\n */\n minP?: number;\n\n /**\n * Enable Mirostat sampling for controlling perplexity. (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)\n */\n mirostat?: 0 | 1 | 2;\n\n /**\n * Influences how quickly the algorithm responds to feedback from the generated text. A lower learning rate will\n * result in slower adjustments, while a higher learning rate will make the algorithm more responsive. (Default: 0.1)\n */\n mirostatEta?: number;\n\n /**\n * Controls the balance between coherence and diversity of the output. A lower value will result in more focused and\n * coherent text. (Default: 5.0)\n */\n mirostatTau?: number;\n\n /**\n * Controls whether to use Non-Uniform Memory Access (NUMA) for more efficient memory management. (Default: false)\n */\n numa?: boolean;\n\n /**\n * Sets the number of batches to be processed. (Default: 512)\n */\n numBatch?: number;\n\n /**\n * Sets the size of the context window used to generate the next token. (Default: 2048)\n */\n numCtx?: number;\n\n /**\n * Controls the number of GPUs to use for the operation. (Default: -1, indicates that NumGPU should be set dynamically)\n */\n numGpu?: number;\n\n /**\n * Keeps a number of tokens from the context. Controls how many of the previous tokens are retained. (Default: 4)\n */\n numKeep?: number;\n\n /**\n * Controls the number of tokens to predict in a single generation. (Default: -1)\n */\n numPredict?: number;\n\n /**\n * Sets the number of CPU threads to use. (Default: 0, indicates let the runtime decide)\n */\n numThread?: number;\n\n /**\n * Penalizes the model for generating newline characters. If set to true, it discourages the model from generating too many newlines. (Default: true)\n */\n penalizeNewline?: boolean;\n\n /**\n * Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx)\n */\n repeatLastN?: number;\n\n /**\n * Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly\n * , while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)\n */\n repeatPenalty?: number;\n\n /**\n * Whether to use structured outputs. Defaults to false.\n *\n * When enabled, tool calls and object generation will be strict and follow the provided schema.\n */\n structuredOutputs?: boolean;\n\n /**\n * Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0)\n * will reduce the impact more, while a value of 1.0 disables this setting. (default: 1)\n */\n tfsZ?: number;\n\n /**\n * Controls the \"typical\" sampling probability. (Default: 1.0)\n */\n typicalP?: number;\n\n /**\n * Locks the memory to prevent swapping, which can be useful for performance optimization. (Default: false)\n */\n useMlock?: boolean;\n\n /**\n * Enables memory mapping to reduce RAM usage. (Default: false)\n */\n useMmap?: boolean;\n\n /**\n * If true, the model will only load the vocabulary without performing further computation. (Default: false)\n */\n vocabOnly?: boolean;\n}\n"]}