UNPKG

@deep-assistant/agent

Version:

A minimal, public domain AI CLI agent compatible with OpenCode's JSON interface. Bun-only runtime.

172 lines (151 loc) 5.07 kB
import z from "zod" import { Tool } from "./tool" import TurndownService from "turndown" import DESCRIPTION from "./webfetch.txt" const MAX_RESPONSE_SIZE = 5 * 1024 * 1024 // 5MB const DEFAULT_TIMEOUT = 30 * 1000 // 30 seconds const MAX_TIMEOUT = 120 * 1000 // 2 minutes export const WebFetchTool = Tool.define("webfetch", { description: DESCRIPTION, parameters: z.object({ url: z.string().describe("The URL to fetch content from"), format: z .enum(["text", "markdown", "html"]) .describe("The format to return the content in (text, markdown, or html)"), timeout: z.number().describe("Optional timeout in seconds (max 120)").optional(), }), async execute(params, ctx) { // Validate URL if (!params.url.startsWith("http://") && !params.url.startsWith("https://")) { throw new Error("URL must start with http:// or https://") } // No restrictions - unrestricted web fetch const timeout = Math.min((params.timeout ?? DEFAULT_TIMEOUT / 1000) * 1000, MAX_TIMEOUT) const controller = new AbortController() const timeoutId = setTimeout(() => controller.abort(), timeout) // Build Accept header based on requested format with q parameters for fallbacks let acceptHeader = "*/*" switch (params.format) { case "markdown": acceptHeader = "text/markdown;q=1.0, text/x-markdown;q=0.9, text/plain;q=0.8, text/html;q=0.7, */*;q=0.1" break case "text": acceptHeader = "text/plain;q=1.0, text/markdown;q=0.9, text/html;q=0.8, */*;q=0.1" break case "html": acceptHeader = "text/html;q=1.0, application/xhtml+xml;q=0.9, text/plain;q=0.8, text/markdown;q=0.7, */*;q=0.1" break default: acceptHeader = "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8" } const response = await fetch(params.url, { signal: AbortSignal.any([controller.signal, ctx.abort]), headers: { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", Accept: acceptHeader, "Accept-Language": "en-US,en;q=0.9", }, }) clearTimeout(timeoutId) if (!response.ok) { throw new Error(`Request failed with status code: ${response.status}`) } // Check content length const contentLength = response.headers.get("content-length") if (contentLength && parseInt(contentLength) > MAX_RESPONSE_SIZE) { throw new Error("Response too large (exceeds 5MB limit)") } const arrayBuffer = await response.arrayBuffer() if (arrayBuffer.byteLength > MAX_RESPONSE_SIZE) { throw new Error("Response too large (exceeds 5MB limit)") } const content = new TextDecoder().decode(arrayBuffer) const contentType = response.headers.get("content-type") || "" const title = `${params.url} (${contentType})` // Handle content based on requested format and actual content type switch (params.format) { case "markdown": if (contentType.includes("text/html")) { const markdown = convertHTMLToMarkdown(content) return { output: markdown, title, metadata: {}, } } return { output: content, title, metadata: {}, } case "text": if (contentType.includes("text/html")) { const text = await extractTextFromHTML(content) return { output: text, title, metadata: {}, } } return { output: content, title, metadata: {}, } case "html": return { output: content, title, metadata: {}, } default: return { output: content, title, metadata: {}, } } }, }) async function extractTextFromHTML(html: string) { let text = "" let skipContent = false const rewriter = new HTMLRewriter() .on("script, style, noscript, iframe, object, embed", { element() { skipContent = true }, text() { // Skip text content inside these elements }, }) .on("*", { element(element) { // Reset skip flag when entering other elements if (!["script", "style", "noscript", "iframe", "object", "embed"].includes(element.tagName)) { skipContent = false } }, text(input) { if (!skipContent) { text += input.text } }, }) .transform(new Response(html)) await rewriter.text() return text.trim() } function convertHTMLToMarkdown(html: string): string { const turndownService = new TurndownService({ headingStyle: "atx", hr: "---", bulletListMarker: "-", codeBlockStyle: "fenced", emDelimiter: "*", }) turndownService.remove(["script", "style", "meta", "link"]) return turndownService.turndown(html) }