UNPKG

aiwrapper

Version:

A Universal AI Wrapper for JavaScript & TypeScript

219 lines 10.8 kB
var __rest = (this && this.__rest) || function (s, e) { var t = {}; for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p) && e.indexOf(p) < 0) t[p] = s[p]; if (s != null && typeof Object.getOwnPropertySymbols === "function") for (var i = 0, p = Object.getOwnPropertySymbols(s); i < p.length; i++) { if (e.indexOf(p[i]) < 0 && Object.prototype.propertyIsEnumerable.call(s, p[i])) t[p[i]] = s[p[i]]; } return t; }; import { LangResultWithMessages, LangResultWithString, LanguageProvider } from "../language-provider.js"; import { httpRequestWithRetry as fetch } from "../../http-request.js"; import { processResponseStream } from "../../process-response-stream.js"; import { models } from 'aimodels'; import { calculateModelResponseTokens } from "../utils/token-calculator.js"; export class OllamaLang extends LanguageProvider { constructor(options) { const modelName = options.model || "llama2:latest"; super(modelName); this._config = { model: modelName, systemPrompt: options.systemPrompt || "", maxTokens: options.maxTokens, baseURL: options.url || "http://localhost:11434", }; // Try to get model info from aimodels this.modelInfo = models.id(modelName); // Print a warning if model is not in database, but don't block execution // This allows users to use any Ollama model, even if it's not in our database if (!this.modelInfo) { //console.error(`Invalid Ollama model: ${modelName}. Model not found in aimodels database.`); } } transformBody(body) { // Ollama uses context_length instead of max_tokens if (body.max_tokens) { const { max_tokens } = body, rest = __rest(body, ["max_tokens"]); return Object.assign(Object.assign({}, rest), { context_length: max_tokens }); } return body; } async ask(prompt, onResult) { const result = new LangResultWithString(prompt); // Try to get model info and calculate max tokens let requestMaxTokens = this._config.maxTokens; if (this.modelInfo) { requestMaxTokens = calculateModelResponseTokens(this.modelInfo, [{ role: "user", content: prompt }], this._config.maxTokens); } // Variables to track streaming state for thinking extraction let visibleContent = ""; let openThinkTagIndex = -1; let pendingThinkingContent = ""; const onData = (data) => { if (data.done) { // Final check for thinking content when streaming is complete const extracted = this.extractThinking(visibleContent); if (extracted.thinking) { result.thinking = extracted.thinking; result.answer = extracted.answer; } result.finished = true; onResult === null || onResult === void 0 ? void 0 : onResult(result); return; } if (data.response) { const currentChunk = data.response; visibleContent += currentChunk; // Process the chunk for potential thinking content this.processChunkForThinking(currentChunk, visibleContent, result, openThinkTagIndex, pendingThinkingContent); // Update tracking variables based on current state openThinkTagIndex = visibleContent.lastIndexOf("<think>"); if (openThinkTagIndex !== -1) { const closeTagIndex = visibleContent.indexOf("</think>", openThinkTagIndex); if (closeTagIndex === -1) { // We have an open tag but no close tag yet pendingThinkingContent = visibleContent.substring(openThinkTagIndex + 7); // +7 to skip "<think>" } } } onResult === null || onResult === void 0 ? void 0 : onResult(result); }; const response = await fetch(`${this._config.baseURL}/api/generate`, { method: "POST", headers: { "Content-Type": "application/json", }, body: JSON.stringify(Object.assign({ model: this._config.model, prompt, stream: true }, (requestMaxTokens && { num_predict: requestMaxTokens }))), }) .catch((err) => { throw new Error(err); }); await processResponseStream(response, onData); // For non-streaming case, perform final extraction if (!onResult) { const extracted = this.extractThinking(result.answer); if (extracted.thinking) { result.thinking = extracted.thinking; result.answer = extracted.answer; } } return result; } async chat(messages, onResult) { const result = new LangResultWithMessages(messages); // Try to get model info and calculate max tokens let requestMaxTokens = this._config.maxTokens; if (this.modelInfo) { requestMaxTokens = calculateModelResponseTokens(this.modelInfo, messages, this._config.maxTokens); } // Variables to track streaming state for thinking extraction let visibleContent = ""; let openThinkTagIndex = -1; let pendingThinkingContent = ""; const onData = (data) => { if (data.done) { // Final check for thinking content when streaming is complete const extracted = this.extractThinking(visibleContent); if (extracted.thinking) { result.thinking = extracted.thinking; result.answer = extracted.answer; } result.finished = true; onResult === null || onResult === void 0 ? void 0 : onResult(result); return; } if (data.message && data.message.content) { const currentChunk = data.message.content; visibleContent += currentChunk; // Process the chunk for potential thinking content this.processChunkForThinking(currentChunk, visibleContent, result, openThinkTagIndex, pendingThinkingContent); // Update tracking variables based on current state openThinkTagIndex = visibleContent.lastIndexOf("<think>"); if (openThinkTagIndex !== -1) { const closeTagIndex = visibleContent.indexOf("</think>", openThinkTagIndex); if (closeTagIndex === -1) { // We have an open tag but no close tag yet pendingThinkingContent = visibleContent.substring(openThinkTagIndex + 7); // +7 to skip "<think>" } } } onResult === null || onResult === void 0 ? void 0 : onResult(result); }; const response = await fetch(`${this._config.baseURL}/api/chat`, { method: "POST", body: JSON.stringify(Object.assign({ model: this._config.model, messages, stream: true }, (requestMaxTokens && { num_predict: requestMaxTokens }))) }) .catch((err) => { throw new Error(err); }); await processResponseStream(response, onData); // For non-streaming case, perform final extraction if (!onResult) { const extracted = this.extractThinking(result.answer); if (extracted.thinking) { result.thinking = extracted.thinking; result.answer = extracted.answer; } } return result; } // Helper to extract thinking content from <think> tags extractThinking(content) { const thinkRegex = /<think>([\s\S]*?)<\/think>/g; const matches = content.match(thinkRegex); if (!matches || matches.length === 0) { return { thinking: "", answer: content }; } // Extract thinking content const thinking = matches .map((match) => match.replace(/<think>|<\/think>/g, "").trim()) .join("\n"); // Remove thinking tags for clean answer const answer = content.replace(thinkRegex, "").trim(); return { thinking, answer }; } // Process a chunk for thinking content during streaming processChunkForThinking(currentChunk, fullContent, result, openTagIndex, pendingThinking) { // Check if we have a complete thinking section const extracted = this.extractThinking(fullContent); if (extracted.thinking) { // We have one or more complete thinking sections result.thinking = extracted.thinking; result.answer = extracted.answer; return; } // Check for partial thinking tags if (fullContent.includes("<think>")) { // We have at least an opening tag const lastOpenTagIndex = fullContent.lastIndexOf("<think>"); const firstCloseTagIndex = fullContent.indexOf("</think>"); if (firstCloseTagIndex === -1 || lastOpenTagIndex > firstCloseTagIndex) { // We have an open tag without a closing tag // Everything from the open tag to the end should be considered thinking const beforeThinkingContent = fullContent.substring(0, lastOpenTagIndex).trim(); const potentialThinkingContent = fullContent.substring(lastOpenTagIndex + 7).trim(); result.thinking = potentialThinkingContent; result.answer = beforeThinkingContent; return; } // If we have both tags but the regex didn't match (shouldn't happen but just in case) // Extract the content manually const startIndex = fullContent.indexOf("<think>") + 7; const endIndex = fullContent.indexOf("</think>"); if (startIndex < endIndex) { const thinkingContent = fullContent.substring(startIndex, endIndex).trim(); const beforeThinking = fullContent.substring(0, fullContent.indexOf("<think>")).trim(); const afterThinking = fullContent.substring(fullContent.indexOf("</think>") + 8).trim(); result.thinking = thinkingContent; result.answer = (beforeThinking + " " + afterThinking).trim(); } } else { // No thinking tags yet, just update the answer result.answer = fullContent; } } } //# sourceMappingURL=ollama-lang.js.map