UNPKG

@gguf/claw

Version:

WhatsApp gateway CLI (Baileys web) with Pi RPC agent

629 lines (622 loc) 22.3 kB
import { t as __exportAll } from "./rolldown-runtime-Cbj13DAv.js"; import { $ as getApiKeyForModel, H as resolveImplicitBedrockProvider, S as resolveOpenClawAgentDir, U as resolveImplicitCopilotProvider, V as normalizeProviders, W as resolveImplicitProviders, tt as requireApiKey } from "./auth-profiles-CYBuGiBb.js"; import { n as discoverModels, t as discoverAuthStorage } from "./pi-model-discovery-DzEIEgHL.js"; import { i as loadConfig } from "./config-CKLedg5Y.js"; import { k as sanitizeUserFacingText } from "./pi-embedded-helpers-DF8SAHU-.js"; import { n as resolveToolDisplay, t as formatToolDetail } from "./tool-display-DmgKs6-V.js"; import path from "node:path"; import fs from "node:fs/promises"; import { complete } from "@mariozechner/pi-ai"; //#region src/agents/models-config.ts const DEFAULT_MODE = "merge"; function isRecord$1(value) { return Boolean(value && typeof value === "object" && !Array.isArray(value)); } function mergeProviderModels(implicit, explicit) { const implicitModels = Array.isArray(implicit.models) ? implicit.models : []; const explicitModels = Array.isArray(explicit.models) ? explicit.models : []; if (implicitModels.length === 0) return { ...implicit, ...explicit }; const getId = (model) => { if (!model || typeof model !== "object") return ""; const id = model.id; return typeof id === "string" ? id.trim() : ""; }; const seen = new Set(explicitModels.map(getId).filter(Boolean)); const mergedModels = [...explicitModels, ...implicitModels.filter((model) => { const id = getId(model); if (!id) return false; if (seen.has(id)) return false; seen.add(id); return true; })]; return { ...implicit, ...explicit, models: mergedModels }; } function mergeProviders(params) { const out = params.implicit ? { ...params.implicit } : {}; for (const [key, explicit] of Object.entries(params.explicit ?? {})) { const providerKey = key.trim(); if (!providerKey) continue; const implicit = out[providerKey]; out[providerKey] = implicit ? mergeProviderModels(implicit, explicit) : explicit; } return out; } async function readJson(pathname) { try { const raw = await fs.readFile(pathname, "utf8"); return JSON.parse(raw); } catch { return null; } } async function ensureOpenClawModelsJson(config, agentDirOverride) { const cfg = config ?? loadConfig(); const agentDir = agentDirOverride?.trim() ? agentDirOverride.trim() : resolveOpenClawAgentDir(); const explicitProviders = cfg.models?.providers ?? {}; const providers = mergeProviders({ implicit: await resolveImplicitProviders({ agentDir }), explicit: explicitProviders }); const implicitBedrock = await resolveImplicitBedrockProvider({ agentDir, config: cfg }); if (implicitBedrock) { const existing = providers["amazon-bedrock"]; providers["amazon-bedrock"] = existing ? mergeProviderModels(implicitBedrock, existing) : implicitBedrock; } const implicitCopilot = await resolveImplicitCopilotProvider({ agentDir }); if (implicitCopilot && !providers["github-copilot"]) providers["github-copilot"] = implicitCopilot; if (Object.keys(providers).length === 0) return { agentDir, wrote: false }; const mode = cfg.models?.mode ?? DEFAULT_MODE; const targetPath = path.join(agentDir, "models.json"); let mergedProviders = providers; let existingRaw = ""; if (mode === "merge") { const existing = await readJson(targetPath); if (isRecord$1(existing) && isRecord$1(existing.providers)) mergedProviders = { ...existing.providers, ...providers }; } const normalizedProviders = normalizeProviders({ providers: mergedProviders, agentDir }); const next = `${JSON.stringify({ providers: normalizedProviders }, null, 2)}\n`; try { existingRaw = await fs.readFile(targetPath, "utf8"); } catch { existingRaw = ""; } if (existingRaw === next) return { agentDir, wrote: false }; await fs.mkdir(agentDir, { recursive: true, mode: 448 }); await fs.writeFile(targetPath, next, { mode: 384 }); return { agentDir, wrote: true }; } //#endregion //#region src/shared/text/reasoning-tags.ts const QUICK_TAG_RE = /<\s*\/?\s*(?:think(?:ing)?|thought|antthinking|final)\b/i; const FINAL_TAG_RE = /<\s*\/?\s*final\b[^<>]*>/gi; const THINKING_TAG_RE = /<\s*(\/?)\s*(?:think(?:ing)?|thought|antthinking)\b[^<>]*>/gi; function findCodeRegions(text) { const regions = []; for (const match of text.matchAll(/(^|\n)(```|~~~)[^\n]*\n[\s\S]*?(?:\n\2(?:\n|$)|$)/g)) { const start = (match.index ?? 0) + match[1].length; regions.push({ start, end: start + match[0].length - match[1].length }); } for (const match of text.matchAll(/`+[^`]+`+/g)) { const start = match.index ?? 0; const end = start + match[0].length; if (!regions.some((r) => start >= r.start && end <= r.end)) regions.push({ start, end }); } regions.sort((a, b) => a.start - b.start); return regions; } function isInsideCode(pos, regions) { return regions.some((r) => pos >= r.start && pos < r.end); } function applyTrim(value, mode) { if (mode === "none") return value; if (mode === "start") return value.trimStart(); return value.trim(); } function stripReasoningTagsFromText(text, options) { if (!text) return text; if (!QUICK_TAG_RE.test(text)) return text; const mode = options?.mode ?? "strict"; const trimMode = options?.trim ?? "both"; let cleaned = text; if (FINAL_TAG_RE.test(cleaned)) { FINAL_TAG_RE.lastIndex = 0; const finalMatches = []; const preCodeRegions = findCodeRegions(cleaned); for (const match of cleaned.matchAll(FINAL_TAG_RE)) { const start = match.index ?? 0; finalMatches.push({ start, length: match[0].length, inCode: isInsideCode(start, preCodeRegions) }); } for (let i = finalMatches.length - 1; i >= 0; i--) { const m = finalMatches[i]; if (!m.inCode) cleaned = cleaned.slice(0, m.start) + cleaned.slice(m.start + m.length); } } else FINAL_TAG_RE.lastIndex = 0; const codeRegions = findCodeRegions(cleaned); THINKING_TAG_RE.lastIndex = 0; let result = ""; let lastIndex = 0; let inThinking = false; for (const match of cleaned.matchAll(THINKING_TAG_RE)) { const idx = match.index ?? 0; const isClose = match[1] === "/"; if (isInsideCode(idx, codeRegions)) continue; if (!inThinking) { result += cleaned.slice(lastIndex, idx); if (!isClose) inThinking = true; } else if (isClose) inThinking = false; lastIndex = idx + match[0].length; } if (!inThinking || mode === "preserve") result += cleaned.slice(lastIndex); return applyTrim(result, trimMode); } //#endregion //#region src/agents/pi-embedded-utils.ts /** * Strip malformed Minimax tool invocations that leak into text content. * Minimax sometimes embeds tool calls as XML in text blocks instead of * proper structured tool calls. This removes: * - <invoke name="...">...</invoke> blocks * - </minimax:tool_call> closing tags */ function stripMinimaxToolCallXml(text) { if (!text) return text; if (!/minimax:tool_call/i.test(text)) return text; let cleaned = text.replace(/<invoke\b[^>]*>[\s\S]*?<\/invoke>/gi, ""); cleaned = cleaned.replace(/<\/?minimax:tool_call>/gi, ""); return cleaned; } /** * Strip downgraded tool call text representations that leak into text content. * When replaying history to Gemini, tool calls without `thought_signature` are * downgraded to text blocks like `[Tool Call: name (ID: ...)]`. These should * not be shown to users. */ function stripDowngradedToolCallText(text) { if (!text) return text; if (!/\[Tool (?:Call|Result)/i.test(text)) return text; const consumeJsonish = (input, start, options) => { const { allowLeadingNewlines = false } = options ?? {}; let index = start; while (index < input.length) { const ch = input[index]; if (ch === " " || ch === " ") { index += 1; continue; } if (allowLeadingNewlines && (ch === "\n" || ch === "\r")) { index += 1; continue; } break; } if (index >= input.length) return null; const startChar = input[index]; if (startChar === "{" || startChar === "[") { let depth = 0; let inString = false; let escape = false; for (let i = index; i < input.length; i += 1) { const ch = input[i]; if (inString) { if (escape) escape = false; else if (ch === "\\") escape = true; else if (ch === "\"") inString = false; continue; } if (ch === "\"") { inString = true; continue; } if (ch === "{" || ch === "[") { depth += 1; continue; } if (ch === "}" || ch === "]") { depth -= 1; if (depth === 0) return i + 1; } } return null; } if (startChar === "\"") { let escape = false; for (let i = index + 1; i < input.length; i += 1) { const ch = input[i]; if (escape) { escape = false; continue; } if (ch === "\\") { escape = true; continue; } if (ch === "\"") return i + 1; } return null; } let end = index; while (end < input.length && input[end] !== "\n" && input[end] !== "\r") end += 1; return end; }; const stripToolCalls = (input) => { const markerRe = /\[Tool Call:[^\]]*\]/gi; let result = ""; let cursor = 0; for (const match of input.matchAll(markerRe)) { const start = match.index ?? 0; if (start < cursor) continue; result += input.slice(cursor, start); let index = start + match[0].length; while (index < input.length && (input[index] === " " || input[index] === " ")) index += 1; if (input[index] === "\r") { index += 1; if (input[index] === "\n") index += 1; } else if (input[index] === "\n") index += 1; while (index < input.length && (input[index] === " " || input[index] === " ")) index += 1; if (input.slice(index, index + 9).toLowerCase() === "arguments") { index += 9; if (input[index] === ":") index += 1; if (input[index] === " ") index += 1; const end = consumeJsonish(input, index, { allowLeadingNewlines: true }); if (end !== null) index = end; } if ((input[index] === "\n" || input[index] === "\r") && (result.endsWith("\n") || result.endsWith("\r") || result.length === 0)) { if (input[index] === "\r") index += 1; if (input[index] === "\n") index += 1; } cursor = index; } result += input.slice(cursor); return result; }; let cleaned = stripToolCalls(text); cleaned = cleaned.replace(/\[Tool Result for ID[^\]]*\]\n?[\s\S]*?(?=\n*\[Tool |\n*$)/gi, ""); return cleaned.trim(); } /** * Strip thinking tags and their content from text. * This is a safety net for cases where the model outputs <think> tags * that slip through other filtering mechanisms. */ function stripThinkingTagsFromText(text) { return stripReasoningTagsFromText(text, { mode: "strict", trim: "both" }); } function extractAssistantText(msg) { const isTextBlock = (block) => { if (!block || typeof block !== "object") return false; const rec = block; return rec.type === "text" && typeof rec.text === "string"; }; return sanitizeUserFacingText((Array.isArray(msg.content) ? msg.content.filter(isTextBlock).map((c) => stripThinkingTagsFromText(stripDowngradedToolCallText(stripMinimaxToolCallXml(c.text))).trim()).filter(Boolean) : []).join("\n").trim()); } function extractAssistantThinking(msg) { if (!Array.isArray(msg.content)) return ""; return msg.content.map((block) => { if (!block || typeof block !== "object") return ""; const record = block; if (record.type === "thinking" && typeof record.thinking === "string") return record.thinking.trim(); return ""; }).filter(Boolean).join("\n").trim(); } function formatReasoningMessage(text) { const trimmed = text.trim(); if (!trimmed) return ""; return `Reasoning:\n${trimmed.split("\n").map((line) => line ? `_${line}_` : line).join("\n")}`; } function splitThinkingTaggedText(text) { const trimmedStart = text.trimStart(); if (!trimmedStart.startsWith("<")) return null; const openRe = /<\s*(?:think(?:ing)?|thought|antthinking)\s*>/i; const closeRe = /<\s*\/\s*(?:think(?:ing)?|thought|antthinking)\s*>/i; if (!openRe.test(trimmedStart)) return null; if (!closeRe.test(text)) return null; const scanRe = /<\s*(\/?)\s*(?:think(?:ing)?|thought|antthinking)\s*>/gi; let inThinking = false; let cursor = 0; let thinkingStart = 0; const blocks = []; const pushText = (value) => { if (!value) return; blocks.push({ type: "text", text: value }); }; const pushThinking = (value) => { const cleaned = value.trim(); if (!cleaned) return; blocks.push({ type: "thinking", thinking: cleaned }); }; for (const match of text.matchAll(scanRe)) { const index = match.index ?? 0; const isClose = Boolean(match[1]?.includes("/")); if (!inThinking && !isClose) { pushText(text.slice(cursor, index)); thinkingStart = index + match[0].length; inThinking = true; continue; } if (inThinking && isClose) { pushThinking(text.slice(thinkingStart, index)); cursor = index + match[0].length; inThinking = false; } } if (inThinking) return null; pushText(text.slice(cursor)); if (!blocks.some((b) => b.type === "thinking")) return null; return blocks; } function promoteThinkingTagsToBlocks(message) { if (!Array.isArray(message.content)) return; if (message.content.some((block) => block.type === "thinking")) return; const next = []; let changed = false; for (const block of message.content) { if (block.type !== "text") { next.push(block); continue; } const split = splitThinkingTaggedText(block.text); if (!split) { next.push(block); continue; } changed = true; for (const part of split) if (part.type === "thinking") next.push({ type: "thinking", thinking: part.thinking }); else if (part.type === "text") { const cleaned = part.text.trimStart(); if (cleaned) next.push({ type: "text", text: cleaned }); } } if (!changed) return; message.content = next; } function extractThinkingFromTaggedText(text) { if (!text) return ""; const scanRe = /<\s*(\/?)\s*(?:think(?:ing)?|thought|antthinking)\s*>/gi; let result = ""; let lastIndex = 0; let inThinking = false; for (const match of text.matchAll(scanRe)) { const idx = match.index ?? 0; if (inThinking) result += text.slice(lastIndex, idx); inThinking = !(match[1] === "/"); lastIndex = idx + match[0].length; } return result.trim(); } function extractThinkingFromTaggedStream(text) { if (!text) return ""; const closed = extractThinkingFromTaggedText(text); if (closed) return closed; const openRe = /<\s*(?:think(?:ing)?|thought|antthinking)\s*>/gi; const closeRe = /<\s*\/\s*(?:think(?:ing)?|thought|antthinking)\s*>/gi; const openMatches = [...text.matchAll(openRe)]; if (openMatches.length === 0) return ""; const closeMatches = [...text.matchAll(closeRe)]; const lastOpen = openMatches[openMatches.length - 1]; const lastClose = closeMatches[closeMatches.length - 1]; if (lastClose && (lastClose.index ?? -1) > (lastOpen.index ?? -1)) return closed; const start = (lastOpen.index ?? 0) + lastOpen[0].length; return text.slice(start).trim(); } function inferToolMetaFromArgs(toolName, args) { return formatToolDetail(resolveToolDisplay({ name: toolName, args })); } //#endregion //#region src/agents/minimax-vlm.ts function coerceApiHost(params) { const env = params.env ?? process.env; const raw = params.apiHost?.trim() || env.MINIMAX_API_HOST?.trim() || params.modelBaseUrl?.trim() || "https://api.minimax.io"; try { return new URL(raw).origin; } catch {} try { return new URL(`https://${raw}`).origin; } catch { return "https://api.minimax.io"; } } function isRecord(value) { return Boolean(value && typeof value === "object" && !Array.isArray(value)); } function pickString(rec, key) { const v = rec[key]; return typeof v === "string" ? v : ""; } async function minimaxUnderstandImage(params) { const apiKey = params.apiKey.trim(); if (!apiKey) throw new Error("MiniMax VLM: apiKey required"); const prompt = params.prompt.trim(); if (!prompt) throw new Error("MiniMax VLM: prompt required"); const imageDataUrl = params.imageDataUrl.trim(); if (!imageDataUrl) throw new Error("MiniMax VLM: imageDataUrl required"); if (!/^data:image\/(png|jpeg|webp);base64,/i.test(imageDataUrl)) throw new Error("MiniMax VLM: imageDataUrl must be a base64 data:image/(png|jpeg|webp) URL"); const host = coerceApiHost({ apiHost: params.apiHost, modelBaseUrl: params.modelBaseUrl }); const url = new URL("/v1/coding_plan/vlm", host).toString(); const res = await fetch(url, { method: "POST", headers: { Authorization: `Bearer ${apiKey}`, "Content-Type": "application/json", "MM-API-Source": "OpenClaw" }, body: JSON.stringify({ prompt, image_url: imageDataUrl }) }); const traceId = res.headers.get("Trace-Id") ?? ""; if (!res.ok) { const body = await res.text().catch(() => ""); const trace = traceId ? ` Trace-Id: ${traceId}` : ""; throw new Error(`MiniMax VLM request failed (${res.status} ${res.statusText}).${trace}${body ? ` Body: ${body.slice(0, 400)}` : ""}`); } const json = await res.json().catch(() => null); if (!isRecord(json)) { const trace = traceId ? ` Trace-Id: ${traceId}` : ""; throw new Error(`MiniMax VLM response was not JSON.${trace}`); } const baseResp = isRecord(json.base_resp) ? json.base_resp : {}; const code = typeof baseResp.status_code === "number" ? baseResp.status_code : -1; if (code !== 0) { const msg = (baseResp.status_msg ?? "").trim(); const trace = traceId ? ` Trace-Id: ${traceId}` : ""; throw new Error(`MiniMax VLM API error (${code})${msg ? `: ${msg}` : ""}.${trace}`); } const content = pickString(json, "content").trim(); if (!content) { const trace = traceId ? ` Trace-Id: ${traceId}` : ""; throw new Error(`MiniMax VLM returned no content.${trace}`); } return content; } //#endregion //#region src/agents/tools/image-tool.helpers.ts function decodeDataUrl(dataUrl) { const trimmed = dataUrl.trim(); const match = /^data:([^;,]+);base64,([a-z0-9+/=\r\n]+)$/i.exec(trimmed); if (!match) throw new Error("Invalid data URL (expected base64 data: URL)."); const mimeType = (match[1] ?? "").trim().toLowerCase(); if (!mimeType.startsWith("image/")) throw new Error(`Unsupported data URL type: ${mimeType || "unknown"}`); const b64 = (match[2] ?? "").trim(); const buffer = Buffer.from(b64, "base64"); if (buffer.length === 0) throw new Error("Invalid data URL: empty payload."); return { buffer, mimeType, kind: "image" }; } function coerceImageAssistantText(params) { const stop = params.message.stopReason; const errorMessage = params.message.errorMessage?.trim(); if (stop === "error" || stop === "aborted") throw new Error(errorMessage ? `Image model failed (${params.provider}/${params.model}): ${errorMessage}` : `Image model failed (${params.provider}/${params.model})`); if (errorMessage) throw new Error(`Image model failed (${params.provider}/${params.model}): ${errorMessage}`); const text = extractAssistantText(params.message); if (text.trim()) return text.trim(); throw new Error(`Image model returned no text (${params.provider}/${params.model}).`); } function coerceImageModelConfig(cfg) { const imageModel = cfg?.agents?.defaults?.imageModel; const primary = typeof imageModel === "string" ? imageModel.trim() : imageModel?.primary; const fallbacks = typeof imageModel === "object" ? imageModel?.fallbacks ?? [] : []; return { ...primary?.trim() ? { primary: primary.trim() } : {}, ...fallbacks.length > 0 ? { fallbacks } : {} }; } function resolveProviderVisionModelFromConfig(params) { const models = (params.cfg?.models?.providers?.[params.provider])?.models ?? []; const id = (((params.provider === "minimax" ? models.find((m) => (m?.id ?? "").trim() === "MiniMax-VL-01" && Array.isArray(m?.input) && m.input.includes("image")) : null) ?? models.find((m) => Boolean((m?.id ?? "").trim()) && m.input?.includes("image")))?.id ?? "").trim(); return id ? `${params.provider}/${id}` : null; } //#endregion //#region src/media-understanding/providers/image.ts var image_exports = /* @__PURE__ */ __exportAll({ describeImageWithModel: () => describeImageWithModel }); async function describeImageWithModel(params) { await ensureOpenClawModelsJson(params.cfg, params.agentDir); const authStorage = discoverAuthStorage(params.agentDir); const model = discoverModels(authStorage, params.agentDir).find(params.provider, params.model); if (!model) throw new Error(`Unknown model: ${params.provider}/${params.model}`); if (!model.input?.includes("image")) throw new Error(`Model does not support images: ${params.provider}/${params.model}`); const apiKey = requireApiKey(await getApiKeyForModel({ model, cfg: params.cfg, agentDir: params.agentDir, profileId: params.profile, preferredProfile: params.preferredProfile }), model.provider); authStorage.setRuntimeApiKey(model.provider, apiKey); const base64 = params.buffer.toString("base64"); if (model.provider === "minimax") return { text: await minimaxUnderstandImage({ apiKey, prompt: params.prompt ?? "Describe the image.", imageDataUrl: `data:${params.mime ?? "image/jpeg"};base64,${base64}`, modelBaseUrl: model.baseUrl }), model: model.id }; return { text: coerceImageAssistantText({ message: await complete(model, { messages: [{ role: "user", content: [{ type: "text", text: params.prompt ?? "Describe the image." }, { type: "image", data: base64, mimeType: params.mime ?? "image/jpeg" }], timestamp: Date.now() }] }, { apiKey, maxTokens: params.maxTokens ?? 512 }), provider: model.provider, model: model.id }), model: model.id }; } //#endregion export { stripThinkingTagsFromText as _, decodeDataUrl as a, extractAssistantText as c, extractThinkingFromTaggedText as d, formatReasoningMessage as f, stripMinimaxToolCallXml as g, stripDowngradedToolCallText as h, coerceImageModelConfig as i, extractAssistantThinking as l, promoteThinkingTagsToBlocks as m, image_exports as n, resolveProviderVisionModelFromConfig as o, inferToolMetaFromArgs as p, coerceImageAssistantText as r, minimaxUnderstandImage as s, describeImageWithModel as t, extractThinkingFromTaggedStream as u, ensureOpenClawModelsJson as v };