@deep-assistant/agent
Version:
A minimal, public domain AI CLI agent compatible with OpenCode's JSON interface. Bun-only runtime.
172 lines (151 loc) • 5.07 kB
text/typescript
import z from "zod"
import { Tool } from "./tool"
import TurndownService from "turndown"
import DESCRIPTION from "./webfetch.txt"
const MAX_RESPONSE_SIZE = 5 * 1024 * 1024 // 5MB
const DEFAULT_TIMEOUT = 30 * 1000 // 30 seconds
const MAX_TIMEOUT = 120 * 1000 // 2 minutes
export const WebFetchTool = Tool.define("webfetch", {
description: DESCRIPTION,
parameters: z.object({
url: z.string().describe("The URL to fetch content from"),
format: z
.enum(["text", "markdown", "html"])
.describe("The format to return the content in (text, markdown, or html)"),
timeout: z.number().describe("Optional timeout in seconds (max 120)").optional(),
}),
async execute(params, ctx) {
// Validate URL
if (!params.url.startsWith("http://") && !params.url.startsWith("https://")) {
throw new Error("URL must start with http:// or https://")
}
// No restrictions - unrestricted web fetch
const timeout = Math.min((params.timeout ?? DEFAULT_TIMEOUT / 1000) * 1000, MAX_TIMEOUT)
const controller = new AbortController()
const timeoutId = setTimeout(() => controller.abort(), timeout)
// Build Accept header based on requested format with q parameters for fallbacks
let acceptHeader = "*/*"
switch (params.format) {
case "markdown":
acceptHeader = "text/markdown;q=1.0, text/x-markdown;q=0.9, text/plain;q=0.8, text/html;q=0.7, */*;q=0.1"
break
case "text":
acceptHeader = "text/plain;q=1.0, text/markdown;q=0.9, text/html;q=0.8, */*;q=0.1"
break
case "html":
acceptHeader = "text/html;q=1.0, application/xhtml+xml;q=0.9, text/plain;q=0.8, text/markdown;q=0.7, */*;q=0.1"
break
default:
acceptHeader =
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8"
}
const response = await fetch(params.url, {
signal: AbortSignal.any([controller.signal, ctx.abort]),
headers: {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
Accept: acceptHeader,
"Accept-Language": "en-US,en;q=0.9",
},
})
clearTimeout(timeoutId)
if (!response.ok) {
throw new Error(`Request failed with status code: ${response.status}`)
}
// Check content length
const contentLength = response.headers.get("content-length")
if (contentLength && parseInt(contentLength) > MAX_RESPONSE_SIZE) {
throw new Error("Response too large (exceeds 5MB limit)")
}
const arrayBuffer = await response.arrayBuffer()
if (arrayBuffer.byteLength > MAX_RESPONSE_SIZE) {
throw new Error("Response too large (exceeds 5MB limit)")
}
const content = new TextDecoder().decode(arrayBuffer)
const contentType = response.headers.get("content-type") || ""
const title = `${params.url} (${contentType})`
// Handle content based on requested format and actual content type
switch (params.format) {
case "markdown":
if (contentType.includes("text/html")) {
const markdown = convertHTMLToMarkdown(content)
return {
output: markdown,
title,
metadata: {},
}
}
return {
output: content,
title,
metadata: {},
}
case "text":
if (contentType.includes("text/html")) {
const text = await extractTextFromHTML(content)
return {
output: text,
title,
metadata: {},
}
}
return {
output: content,
title,
metadata: {},
}
case "html":
return {
output: content,
title,
metadata: {},
}
default:
return {
output: content,
title,
metadata: {},
}
}
},
})
async function extractTextFromHTML(html: string) {
let text = ""
let skipContent = false
const rewriter = new HTMLRewriter()
.on("script, style, noscript, iframe, object, embed", {
element() {
skipContent = true
},
text() {
// Skip text content inside these elements
},
})
.on("*", {
element(element) {
// Reset skip flag when entering other elements
if (!["script", "style", "noscript", "iframe", "object", "embed"].includes(element.tagName)) {
skipContent = false
}
},
text(input) {
if (!skipContent) {
text += input.text
}
},
})
.transform(new Response(html))
await rewriter.text()
return text.trim()
}
function convertHTMLToMarkdown(html: string): string {
const turndownService = new TurndownService({
headingStyle: "atx",
hr: "---",
bulletListMarker: "-",
codeBlockStyle: "fenced",
emDelimiter: "*",
})
turndownService.remove(["script", "style", "meta", "link"])
return turndownService.turndown(html)
}