@centralinc/browseragent
Version:
Browser automation agent using Computer Use with Playwright
1,524 lines (1,505 loc) • 70.6 kB
JavaScript
var __defProp = Object.defineProperty;
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
var __getOwnPropNames = Object.getOwnPropertyNames;
var __hasOwnProp = Object.prototype.hasOwnProperty;
var __esm = (fn, res) => function __init() {
return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
};
var __export = (target, all) => {
for (var name in all)
__defProp(target, name, { get: all[name], enumerable: true });
};
var __copyProps = (to, from, except, desc) => {
if (from && typeof from === "object" || typeof from === "function") {
for (let key of __getOwnPropNames(from))
if (!__hasOwnProp.call(to, key) && key !== except)
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
}
return to;
};
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
// tools/registry/registry.ts
var registry_exports = {};
__export(registry_exports, {
DefaultToolRegistry: () => DefaultToolRegistry,
getToolRegistry: () => getToolRegistry,
resetToolRegistry: () => resetToolRegistry
});
import { z as z2 } from "zod";
function getToolRegistry(config) {
if (!globalRegistry) {
globalRegistry = new DefaultToolRegistry(config);
}
return globalRegistry;
}
function resetToolRegistry() {
globalRegistry = null;
}
var DefaultToolRegistry, globalRegistry;
var init_registry = __esm({
"tools/registry/registry.ts"() {
"use strict";
DefaultToolRegistry = class {
constructor(config = {}) {
this.capabilities = /* @__PURE__ */ new Map();
this.config = {
includeInSystemPrompt: true,
validateInputs: true,
...config
};
}
getCapabilityKey(tool, method) {
return `${tool}:${method}`;
}
register(capability2) {
var _a;
const key = this.getCapabilityKey(capability2.tool, capability2.method);
if (this.capabilities.has(key)) {
throw new Error(`Capability '${key}' is already registered`);
}
if ((_a = this.config.overrides) == null ? void 0 : _a[key]) {
capability2 = {
...capability2,
...this.config.overrides[key]
};
}
if (this.config.filter && !this.config.filter(capability2)) {
return;
}
this.capabilities.set(key, capability2);
}
get(tool, method) {
return this.capabilities.get(this.getCapabilityKey(tool, method));
}
getToolCapabilities(tool) {
return Array.from(this.capabilities.values()).filter(
(cap) => cap.tool === tool
);
}
getAll() {
return Array.from(this.capabilities.values());
}
isEnabled(tool, method) {
const capability2 = this.get(tool, method);
return (capability2 == null ? void 0 : capability2.enabled) !== false;
}
getToolNames() {
const tools = /* @__PURE__ */ new Set();
this.capabilities.forEach((cap) => tools.add(cap.tool));
return Array.from(tools);
}
validate(tool, method, args) {
const capability2 = this.get(tool, method);
if (!capability2) {
return {
valid: false,
errors: [`Unknown capability: ${tool}:${method}`]
};
}
if (!this.config.validateInputs) {
return { valid: true };
}
try {
if (capability2.schema) {
if (args.length === 1 && typeof args[0] === "object" && !Array.isArray(args[0])) {
capability2.schema.parse(args[0]);
} else if (capability2.schema._def && capability2.schema._def.typeName === "ZodObject" && args.length > 0) {
const obj = args.reduce((acc, val, idx) => {
const keys = Object.keys(
capability2.schema._def.shape || {}
);
const key = keys[idx];
if (key) {
acc[key] = val;
}
return acc;
}, {});
capability2.schema.parse(obj);
} else {
capability2.schema.parse(args);
}
}
return { valid: true };
} catch (error) {
if (error instanceof z2.ZodError) {
return {
valid: false,
errors: error.errors.map((e) => `${e.path.join(".")}: ${e.message}`)
};
}
return { valid: false, errors: [String(error)] };
}
}
generateToolDocs(tool) {
const capabilities = this.getToolCapabilities(tool).filter(
(cap) => cap.enabled !== false
);
if (capabilities.length === 0) {
return "";
}
const sections = [
`${tool.toUpperCase()} TOOL CAPABILITIES:`,
`* You have access to a '${tool}' tool that provides the following capabilities:`
];
capabilities.forEach((cap) => {
sections.push(` - '${cap.method}': ${cap.description}`);
});
sections.push("");
capabilities.forEach((cap) => {
sections.push(this.generateCapabilityDoc(cap));
sections.push("");
});
return sections.join("\n");
}
generateAllDocs() {
const tools = this.getToolNames();
if (tools.length === 0) {
return "";
}
const sections = [];
tools.forEach((tool) => {
const toolDocs = this.generateToolDocs(tool);
if (toolDocs) {
sections.push(toolDocs);
sections.push("");
}
});
return sections.join("\n").trim();
}
generateCapabilityDoc(capability2) {
const lines = [];
lines.push(`HOW TO USE ${capability2.method.toUpperCase()}:`);
const usageLines = capability2.usage.split("\n").map((line) => line.trim()).filter(Boolean);
usageLines.forEach((line, index) => {
lines.push(`${index + 1}. ${line}`);
});
return lines.join("\n");
}
};
globalRegistry = null;
}
});
// agent.ts
import zodToJsonSchema from "zod-to-json-schema";
// loop.ts
import { Anthropic } from "@anthropic-ai/sdk";
import { DateTime } from "luxon";
import { Agent as HttpAgent } from "http";
import { Agent as HttpsAgent } from "https";
import { lookup } from "dns";
// tools/types/computer.ts
var Action = /* @__PURE__ */ ((Action2) => {
Action2["MOUSE_MOVE"] = "mouse_move";
Action2["LEFT_CLICK"] = "left_click";
Action2["RIGHT_CLICK"] = "right_click";
Action2["MIDDLE_CLICK"] = "middle_click";
Action2["DOUBLE_CLICK"] = "double_click";
Action2["TRIPLE_CLICK"] = "triple_click";
Action2["LEFT_CLICK_DRAG"] = "left_click_drag";
Action2["LEFT_MOUSE_DOWN"] = "left_mouse_down";
Action2["LEFT_MOUSE_UP"] = "left_mouse_up";
Action2["KEY"] = "key";
Action2["TYPE"] = "type";
Action2["HOLD_KEY"] = "hold_key";
Action2["SCREENSHOT"] = "screenshot";
Action2["CURSOR_POSITION"] = "cursor_position";
Action2["SCROLL"] = "scroll";
Action2["WAIT"] = "wait";
Action2["EXTRACT_URL"] = "extract_url";
return Action2;
})(Action || {});
// tools/utils/keyboard.ts
var KeyboardUtils = class {
static isModifierKey(key) {
if (!key) return false;
const normalizedKey = this.modifierKeyMap[key.toLowerCase()] || key;
return ["Control", "Alt", "Shift", "Meta"].includes(normalizedKey);
}
static getPlaywrightKey(key) {
if (!key) {
throw new Error("Key cannot be undefined");
}
const normalizedKey = key.toLowerCase();
if (normalizedKey in this.keyMap) {
return this.keyMap[normalizedKey];
}
if (normalizedKey in this.modifierKeyMap) {
return this.modifierKeyMap[normalizedKey];
}
return key;
}
static parseKeyCombination(combo) {
if (!combo) {
throw new Error("Key combination cannot be empty");
}
return combo.toLowerCase().split("+").map((key) => {
const trimmedKey = key.trim();
if (!trimmedKey) {
throw new Error("Invalid key combination: empty key");
}
return this.getPlaywrightKey(trimmedKey);
});
}
};
// Only map alternative names to standard Playwright modifier keys
KeyboardUtils.modifierKeyMap = {
ctrl: "Control",
alt: "Alt",
command: "Meta",
win: "Meta"
};
// Essential key mappings for Playwright compatibility
KeyboardUtils.keyMap = {
return: "Enter",
space: " ",
left: "ArrowLeft",
right: "ArrowRight",
up: "ArrowUp",
down: "ArrowDown",
home: "Home",
end: "End",
pageup: "PageUp",
pagedown: "PageDown",
delete: "Delete",
backspace: "Backspace",
tab: "Tab",
esc: "Escape",
escape: "Escape",
insert: "Insert",
super_l: "Meta",
f1: "F1",
f2: "F2",
f3: "F3",
f4: "F4",
f5: "F5",
f6: "F6",
f7: "F7",
f8: "F8",
f9: "F9",
f10: "F10",
f11: "F11",
f12: "F12"
};
// tools/types/base.ts
var ToolError = class extends Error {
constructor(message) {
super(message);
this.name = "ToolError";
}
};
var DEFAULT_EXECUTION_CONFIG = {
typing: {
mode: "character-by-character",
characterDelay: 12,
completionDelay: 100
},
screenshot: {
delay: 0.3,
quality: "medium"
},
mouse: {
moveSpeed: "normal",
clickDelay: 50
},
scrolling: {
mode: "percentage",
percentage: 90,
overlap: 10
}
};
// tools/utils/validator.ts
var ActionValidator = class {
static validateText(text, required, action) {
if (required && text === void 0) {
throw new ToolError(`text is required for ${action}`);
}
if (text !== void 0 && typeof text !== "string") {
throw new ToolError(`${text} must be a string`);
}
}
static validateCoordinate(coordinate, required, action) {
if (required && !coordinate) {
throw new ToolError(`coordinate is required for ${action}`);
}
if (coordinate) {
this.validateAndGetCoordinates(coordinate);
}
}
static validateDuration(duration) {
if (duration === void 0 || typeof duration !== "number") {
throw new ToolError(`${duration} must be a number`);
}
if (duration < 0) {
throw new ToolError(`${duration} must be non-negative`);
}
if (duration > 100) {
throw new ToolError(`${duration} is too long`);
}
}
static validateAndGetCoordinates(coordinate) {
if (!Array.isArray(coordinate) || coordinate.length !== 2) {
throw new ToolError(`${coordinate} must be a tuple of length 2`);
}
if (!coordinate.every((i) => typeof i === "number" && i >= 0)) {
throw new ToolError(
`${coordinate} must be a tuple of non-negative numbers`
);
}
return coordinate;
}
static validateActionParams(params, mouseActions, keyboardActions) {
const { action, text, coordinate, duration } = params;
if (keyboardActions.has(action)) {
this.validateText(text, true, action);
} else {
this.validateText(text, false, action);
}
if (mouseActions.has(action)) {
this.validateCoordinate(coordinate, true, action);
} else {
this.validateCoordinate(coordinate, false, action);
}
if (action === "hold_key" /* HOLD_KEY */ || action === "wait" /* WAIT */) {
this.validateDuration(duration);
}
}
};
// tools/computer.ts
var ComputerTool = class {
constructor(page, version = "20250124", config) {
this.name = "computer";
this.mouseActions = /* @__PURE__ */ new Set([
"left_click" /* LEFT_CLICK */,
"right_click" /* RIGHT_CLICK */,
"middle_click" /* MIDDLE_CLICK */,
"double_click" /* DOUBLE_CLICK */,
"triple_click" /* TRIPLE_CLICK */,
"mouse_move" /* MOUSE_MOVE */,
"left_click_drag" /* LEFT_CLICK_DRAG */,
"left_mouse_down" /* LEFT_MOUSE_DOWN */,
"left_mouse_up" /* LEFT_MOUSE_UP */
]);
this.keyboardActions = /* @__PURE__ */ new Set([
"key" /* KEY */,
"type" /* TYPE */,
"hold_key" /* HOLD_KEY */
]);
this.systemActions = /* @__PURE__ */ new Set([
"screenshot" /* SCREENSHOT */,
"cursor_position" /* CURSOR_POSITION */,
"scroll" /* SCROLL */,
"wait" /* WAIT */,
"extract_url" /* EXTRACT_URL */
]);
this.page = page;
this.version = version;
this.config = {
typing: {
...DEFAULT_EXECUTION_CONFIG.typing,
...(config == null ? void 0 : config.typing) || {}
},
screenshot: {
...DEFAULT_EXECUTION_CONFIG.screenshot,
...(config == null ? void 0 : config.screenshot) || {}
},
mouse: {
...DEFAULT_EXECUTION_CONFIG.mouse,
...(config == null ? void 0 : config.mouse) || {}
},
scrolling: {
...DEFAULT_EXECUTION_CONFIG.scrolling,
...(config == null ? void 0 : config.scrolling) || {}
}
};
}
get apiType() {
return this.version === "20241022" ? "computer_20241022" : "computer_20250124";
}
toParams() {
const params = {
name: this.name,
type: this.apiType,
display_width_px: 1280,
display_height_px: 720,
display_number: null
};
return params;
}
getMouseButton(action) {
switch (action) {
case "left_click" /* LEFT_CLICK */:
case "double_click" /* DOUBLE_CLICK */:
case "triple_click" /* TRIPLE_CLICK */:
case "left_click_drag" /* LEFT_CLICK_DRAG */:
case "left_mouse_down" /* LEFT_MOUSE_DOWN */:
case "left_mouse_up" /* LEFT_MOUSE_UP */:
return "left";
case "right_click" /* RIGHT_CLICK */:
return "right";
case "middle_click" /* MIDDLE_CLICK */:
return "middle";
default:
throw new ToolError(`Invalid mouse action: ${action}`);
}
}
async handleMouseAction(action, coordinate) {
const [x, y] = ActionValidator.validateAndGetCoordinates(coordinate);
await this.page.mouse.move(x, y);
await this.page.waitForTimeout(20);
if (action === "left_mouse_down" /* LEFT_MOUSE_DOWN */) {
await this.page.mouse.down();
} else if (action === "left_mouse_up" /* LEFT_MOUSE_UP */) {
await this.page.mouse.up();
} else {
const button = this.getMouseButton(action);
if (action === "double_click" /* DOUBLE_CLICK */) {
await this.page.mouse.dblclick(x, y, { button });
} else if (action === "triple_click" /* TRIPLE_CLICK */) {
await this.page.mouse.click(x, y, { button, clickCount: 3 });
} else {
await this.page.mouse.click(x, y, { button });
}
}
await this.page.waitForTimeout(100);
return await this.screenshot();
}
async handleKeyboardAction(action, text, duration) {
var _a;
if (action === "hold_key" /* HOLD_KEY */) {
const key = KeyboardUtils.getPlaywrightKey(text);
await this.page.keyboard.down(key);
await new Promise((resolve) => setTimeout(resolve, duration * 1e3));
await this.page.keyboard.up(key);
} else if (action === "key" /* KEY */) {
const keys = KeyboardUtils.parseKeyCombination(text);
for (const key of keys) {
await this.page.keyboard.down(key);
}
for (const key of keys.reverse()) {
await this.page.keyboard.up(key);
}
} else {
const typingConfig = this.config.typing;
if (typingConfig.mode === "fill") {
try {
const focusedElement = await this.page.locator(":focus").first();
if (await focusedElement.count() > 0) {
await focusedElement.fill(text);
} else {
await this.page.keyboard.type(text, { delay: 0 });
}
} catch {
await this.page.keyboard.type(text, { delay: 0 });
}
} else {
await this.page.keyboard.type(text, {
delay: typingConfig.characterDelay || 12
});
}
}
const completionDelay = ((_a = this.config.typing) == null ? void 0 : _a.completionDelay) || 100;
await this.page.waitForTimeout(completionDelay);
return await this.screenshot();
}
async screenshot() {
var _a;
try {
console.log("Starting screenshot...");
const screenshotDelay = ((_a = this.config.screenshot) == null ? void 0 : _a.delay) || 0.3;
await new Promise(
(resolve) => setTimeout(resolve, screenshotDelay * 1e3)
);
const screenshot = await this.page.screenshot({
type: "png",
fullPage: false
// viewport only for speed
});
console.log("Screenshot taken, size:", screenshot.length, "bytes");
return {
base64Image: screenshot.toString("base64")
};
} catch (error) {
throw new ToolError(`Failed to take screenshot: ${error}`);
}
}
async call(params) {
const {
action,
text,
coordinate,
scrollDirection: scrollDirectionParam,
scroll_amount,
scrollAmount,
duration,
...kwargs
} = params;
ActionValidator.validateActionParams(
params,
this.mouseActions,
this.keyboardActions
);
if (action === "screenshot" /* SCREENSHOT */) {
return await this.screenshot();
}
if (action === "cursor_position" /* CURSOR_POSITION */) {
const position = await this.page.evaluate(() => {
const selection = window.getSelection();
const range = selection == null ? void 0 : selection.getRangeAt(0);
const rect = range == null ? void 0 : range.getBoundingClientRect();
return rect ? { x: rect.x, y: rect.y } : null;
});
if (!position) {
throw new ToolError("Failed to get cursor position");
}
return { output: `X=${position.x},Y=${position.y}` };
}
if (action === "scroll" /* SCROLL */) {
if (this.version !== "20250124") {
throw new ToolError(`${action} is only available in version 20250124`);
}
let scrollDirection = scrollDirectionParam || kwargs.scroll_direction;
let scrollAmountValue = scrollAmount || scroll_amount;
if (typeof scrollAmountValue === "number" && scrollAmountValue < 0) {
scrollAmountValue = Math.abs(scrollAmountValue);
if (scrollDirection === "down" || scrollDirection === "right") {
scrollDirection = scrollDirection === "down" ? "up" : "left";
} else if (!scrollDirection) {
scrollDirection = "up";
}
}
if (!scrollDirection || !["up", "down", "left", "right"].includes(scrollDirection)) {
throw new ToolError(
`Scroll direction "${scrollDirection}" must be 'up', 'down', 'left', or 'right'`
);
}
if (typeof scrollAmountValue !== "number" || scrollAmountValue < 0) {
throw new ToolError(
`Scroll amount "${scrollAmountValue}" must be a non-negative number`
);
}
if (coordinate) {
const [x, y] = ActionValidator.validateAndGetCoordinates(coordinate);
await this.page.mouse.move(x, y);
await this.page.waitForTimeout(100);
}
const pageDimensions = await this.page.evaluate(() => {
return { h: window.innerHeight, w: window.innerWidth };
});
let scrollFactor = 0.9;
if (scrollAmountValue !== void 0) {
scrollFactor = Math.min(Math.max(scrollAmountValue / 100, 0.05), 1);
if (scrollAmountValue <= 20) {
scrollFactor = scrollAmountValue / 100;
} else if (scrollAmountValue >= 80) {
scrollFactor = Math.min(scrollAmountValue / 100, 0.95);
} else {
scrollFactor = scrollAmountValue / 100;
}
}
if (scrollDirection === "down" || scrollDirection === "up") {
const amount = pageDimensions.h * scrollFactor;
await this.page.mouse.wheel(
0,
scrollDirection === "down" ? amount : -amount
);
} else {
const amount = pageDimensions.w * scrollFactor;
await this.page.mouse.wheel(
scrollDirection === "right" ? amount : -amount,
0
);
}
await this.page.waitForTimeout(100);
return await this.screenshot();
}
if (action === "wait" /* WAIT */) {
if (this.version !== "20250124") {
throw new ToolError(`${action} is only available in version 20250124`);
}
await new Promise((resolve) => setTimeout(resolve, duration * 1e3));
return await this.screenshot();
}
if (this.mouseActions.has(action)) {
if (!coordinate) {
throw new ToolError(`coordinate is required for ${action}`);
}
return await this.handleMouseAction(action, coordinate);
}
if (this.keyboardActions.has(action)) {
if (!text) {
throw new ToolError(`text is required for ${action}`);
}
return await this.handleKeyboardAction(action, text, duration);
}
throw new ToolError(`Invalid action: ${action}`);
}
};
var ComputerTool20241022 = class extends ComputerTool {
constructor(page, config) {
super(page, "20241022", config);
}
};
var ComputerTool20250124 = class extends ComputerTool {
constructor(page, config) {
super(page, "20250124", config);
}
};
// tools/collection.ts
var DEFAULT_TOOL_VERSION = "computer_use_20250429";
var TOOL_GROUPS = [
{
version: "computer_use_20241022",
tools: [ComputerTool20241022],
beta_flag: "computer-use-2024-10-22"
},
{
version: "computer_use_20250124",
tools: [ComputerTool20250124],
beta_flag: "computer-use-2025-01-24"
},
// 20250429 version inherits from 20250124
{
version: "computer_use_20250429",
tools: [ComputerTool20250124],
beta_flag: "computer-use-2025-01-24"
}
];
var TOOL_GROUPS_BY_VERSION = Object.fromEntries(
TOOL_GROUPS.map((group) => [group.version, group])
);
var ToolCollection = class {
constructor(...tools) {
this.tools = new Map(tools.map((tool) => [tool.name, tool]));
}
toParams() {
return Array.from(this.tools.values()).map((tool) => tool.toParams());
}
setPage(page) {
this.page = page;
}
async run(name, toolInput) {
const tool = this.tools.get(name);
if (!tool) {
throw new Error(`Tool ${name} not found`);
}
console.log(`
=== Running tool: ${name} ===`);
console.log("Input:", JSON.stringify(toolInput, null, 2));
const toolCallParams = { ...toolInput };
if (this.page) {
toolCallParams._page = this.page;
}
const toolDef = tool.toParams();
if (name === "playwright") {
const playwrightInput = toolInput;
if (!playwrightInput.method || !Array.isArray(playwrightInput.args)) {
throw new Error(
`Invalid input for playwright tool: method and args are required`
);
}
return await tool.call(toolCallParams);
} else if ("type" in toolDef && toolDef.type === "custom") {
return await tool.call(toolCallParams);
} else {
const computerInput = toolInput;
if (!computerInput.action || !Object.values(Action).includes(computerInput.action)) {
throw new Error(
`Invalid action ${computerInput.action} for tool ${name}`
);
}
return await tool.call(toolCallParams);
}
}
};
// utils/message-processing.ts
function responseToParams(response) {
return response.content.map((block) => {
if (block.type === "text" && block.text) {
return {
type: "text",
text: block.text,
citations: block.citations || null
};
}
if (block.type === "thinking") {
const { thinking, signature, ...rest } = block;
return { ...rest, thinking, signature: signature || "" };
}
return block;
});
}
function maybeFilterToNMostRecentImages(messages, imagesToKeep, minRemovalThreshold) {
if (!imagesToKeep) return;
const toolResultBlocks = messages.flatMap(
(message) => Array.isArray(message == null ? void 0 : message.content) ? message.content : []
).filter(
(item) => typeof item === "object" && item.type === "tool_result"
);
const totalImages = toolResultBlocks.reduce((count, toolResult) => {
if (!Array.isArray(toolResult.content)) return count;
return count + toolResult.content.filter(
(content) => typeof content === "object" && content.type === "image"
).length;
}, 0);
let imagesToRemove = Math.floor((totalImages - imagesToKeep) / minRemovalThreshold) * minRemovalThreshold;
for (const toolResult of toolResultBlocks) {
if (Array.isArray(toolResult.content)) {
toolResult.content = toolResult.content.filter((content) => {
if (typeof content === "object" && content.type === "image") {
if (imagesToRemove > 0) {
imagesToRemove--;
return false;
}
}
return true;
});
}
}
}
var PROMPT_CACHING_BETA_FLAG = "prompt-caching-2024-07-31";
function injectPromptCaching(messages) {
let breakpointsRemaining = 3;
for (let i = messages.length - 1; i >= 0; i--) {
const message = messages[i];
if (!message) continue;
if (message.role === "user" && Array.isArray(message.content)) {
if (breakpointsRemaining > 0) {
breakpointsRemaining--;
const lastContent = message.content[message.content.length - 1];
if (lastContent) {
lastContent.cache_control = {
type: "ephemeral"
};
}
} else {
const lastContent = message.content[message.content.length - 1];
if (lastContent) {
delete lastContent.cache_control;
}
break;
}
}
}
}
function truncateMessageHistory(messages, maxMessages = 20, preserveSystemMessage = true) {
if (messages.length <= maxMessages) return;
const messagesToRemove = messages.length - maxMessages;
if (preserveSystemMessage && messages.length > 0) {
const firstUserMessage = messages.find((msg) => msg.role === "user");
if (firstUserMessage) {
const firstUserIndex = messages.indexOf(firstUserMessage);
const keepFromIndex = Math.max(firstUserIndex + 1, messagesToRemove);
messages.splice(1, keepFromIndex - 1);
} else {
messages.splice(0, messagesToRemove);
}
} else {
messages.splice(0, messagesToRemove);
}
for (const message of messages) {
if (message.role === "assistant" && Array.isArray(message.content)) {
const thinkingBlocks = message.content.filter(
(block) => typeof block === "object" && (block.type === "thinking" || block.type === "redacted_thinking")
);
const textBlocks = message.content.filter(
(block) => typeof block === "object" && block.type === "text"
);
const toolUseBlocks = message.content.filter(
(block) => typeof block === "object" && block.type === "tool_use"
);
const toolResultBlocks = message.content.filter(
(block) => typeof block === "object" && block.type === "tool_result"
);
message.content = [
...thinkingBlocks,
...textBlocks,
...toolUseBlocks,
...toolResultBlocks
];
}
}
}
function cleanMessageHistory(messages) {
const toolUseIds = /* @__PURE__ */ new Set();
for (const message of messages) {
if (Array.isArray(message.content)) {
for (const block of message.content) {
if (typeof block === "object" && block.type === "tool_use" && block.id) {
toolUseIds.add(block.id);
}
}
}
}
for (const message of messages) {
if (Array.isArray(message.content)) {
let cleanedContent = message.content.filter((block) => {
if (typeof block === "object" && block.type === "tool_result" && block.tool_use_id) {
return toolUseIds.has(block.tool_use_id);
}
return true;
});
if (message.role === "assistant") {
const thinkingBlocks = cleanedContent.filter(
(block) => typeof block === "object" && (block.type === "thinking" || block.type === "redacted_thinking")
);
const textBlocks = cleanedContent.filter(
(block) => typeof block === "object" && block.type === "text"
);
const toolUseBlocks = cleanedContent.filter(
(block) => typeof block === "object" && block.type === "tool_use"
);
const toolResultBlocks = cleanedContent.filter(
(block) => typeof block === "object" && block.type === "tool_result"
);
cleanedContent = [
...thinkingBlocks,
...textBlocks,
...toolUseBlocks,
...toolResultBlocks
];
}
message.content = cleanedContent;
}
}
}
// utils/tool-results.ts
function makeApiToolResult(result, toolUseId) {
const toolResultContent = [];
let isError = false;
if (result.error) {
isError = true;
toolResultContent.push({
type: "text",
text: maybePrependSystemToolResult(result, result.error)
});
} else {
if (result.output) {
toolResultContent.push({
type: "text",
text: maybePrependSystemToolResult(result, result.output)
});
}
if (result.base64Image) {
toolResultContent.push({
type: "image",
source: {
type: "base64",
media_type: "image/png",
data: result.base64Image
}
});
}
}
return {
type: "tool_result",
content: toolResultContent,
tool_use_id: toolUseId,
is_error: isError
};
}
function maybePrependSystemToolResult(result, resultText) {
if (result.system) {
return `<system>${result.system}</system>
${resultText}`;
}
return resultText;
}
// utils/logger.ts
var SimpleLogger = class {
constructor(includeData = true) {
this.includeData = includeData;
}
log(type, message, data) {
const timestamp = (/* @__PURE__ */ new Date()).toISOString();
let logMessage = `[${timestamp}] ${type.toUpperCase()}: ${message}`;
if (this.includeData && data) {
const truncatedData = this.truncateScreenshots(data);
logMessage += ` | ${JSON.stringify(truncatedData, null, 2)}`;
}
console.log(logMessage);
}
truncateScreenshots(data) {
if (typeof data !== "object" || data === null) {
return data;
}
if (Array.isArray(data)) {
return data.map((item) => this.truncateScreenshots(item));
}
const result = {};
for (const [key, value] of Object.entries(data)) {
if (typeof value === "string") {
if (value.startsWith("data:image/") && value.length > 100) {
const [prefix, ...rest] = value.split(",");
const base64Data = rest.join(",");
result[key] = `${prefix},<base64-image-data-${base64Data.length}-bytes>`;
} else if (value.length > 500) {
result[key] = `${value.substring(0, 500)}...<truncated-${value.length}-chars>`;
} else {
result[key] = value;
}
} else if (typeof value === "object") {
result[key] = this.truncateScreenshots(value);
} else {
result[key] = value;
}
}
return result;
}
// Convenience methods for common log types
agentStart(query, model, options) {
this.log(
"agent",
`\u{1F916} Started execution: "${query}" (model: ${model})`,
options
);
}
agentComplete(query, duration, messageCount) {
const seconds = (duration / 1e3).toFixed(2);
this.log(
"agent",
`\u2705 Completed: "${query}" in ${seconds}s (${messageCount} messages)`
);
}
agentError(query, error, duration) {
const seconds = (duration / 1e3).toFixed(2);
this.log(
"agent",
`\u274C Failed: "${query}" after ${seconds}s - ${error.message}`,
{ stack: error.stack }
);
}
llmResponse(stopReason, step, content) {
this.log(
"llm",
`\u{1F9E0} Response received [step ${step}] (stop: ${stopReason})`,
content
);
}
toolStart(toolName, step, input) {
this.log("tool", `\u{1F527} Starting ${toolName} [step ${step}]`, input);
}
toolComplete(toolName, step, duration, output) {
this.log(
"tool",
`\u2705 Completed ${toolName} [step ${step}] (${duration}ms)`,
output
);
}
toolError(toolName, step, error, duration) {
this.log(
"tool",
`\u274C Failed ${toolName} [step ${step}] (${duration}ms) - ${error.message}`
);
}
signal(signal, step, reason) {
const emoji = signal === "pause" ? "\u23F8\uFE0F" : signal === "resume" ? "\u25B6\uFE0F" : "\u{1F6D1}";
const reasonStr = reason ? ` (${reason})` : "";
this.log(
"signal",
`${emoji} ${signal.toUpperCase()} [step ${step}]${reasonStr}`
);
}
debug(message, data) {
this.log("debug", `\u{1F41B} ${message}`, data);
}
};
var NoOpLogger = class {
log() {
}
agentStart() {
}
agentComplete() {
}
agentError() {
}
llmResponse() {
}
toolStart() {
}
toolComplete() {
}
toolError() {
}
signal() {
}
debug() {
}
};
// utils/retry.ts
var DEFAULT_RETRY_CONFIG = {
maxRetries: 3,
initialDelayMs: 1e3,
maxDelayMs: 3e4,
backoffMultiplier: 2,
preferIPv4: false,
retryableErrors: [
"Connection error",
"ECONNREFUSED",
"ETIMEDOUT",
"ECONNRESET",
"ENOTFOUND",
"ENETUNREACH",
"EAI_AGAIN",
"socket hang up",
"read ECONNRESET"
]
};
function isRetryableError(error, retryableErrors) {
if (!error || typeof error !== "object") return false;
const errorMessage = error.message || "";
const errorStack = error.stack || "";
return retryableErrors.some(
(retryableError) => errorMessage.includes(retryableError) || errorStack.includes(retryableError)
);
}
async function withRetry(fn, config, logger) {
const finalConfig = { ...DEFAULT_RETRY_CONFIG, ...config };
let lastError;
for (let attempt = 0; attempt <= finalConfig.maxRetries; attempt++) {
try {
return await fn();
} catch (error) {
lastError = error;
if (attempt < finalConfig.maxRetries && isRetryableError(error, finalConfig.retryableErrors)) {
const delay = Math.min(
finalConfig.initialDelayMs * Math.pow(finalConfig.backoffMultiplier, attempt),
finalConfig.maxDelayMs
);
logger == null ? void 0 : logger.debug(`Retry attempt ${attempt + 1}/${finalConfig.maxRetries} after ${delay}ms`, {
error: error instanceof Error ? error.message : String(error),
attempt: attempt + 1,
delay
});
await new Promise((resolve) => setTimeout(resolve, delay));
} else {
throw error;
}
}
}
throw lastError;
}
// tools/playwright-capabilities.ts
import { z } from "zod";
var PLAYWRIGHT_CAPABILITIES = /* @__PURE__ */ new Map();
PLAYWRIGHT_CAPABILITIES.set("goto", {
method: "goto",
displayName: "Navigate to URL",
description: "Navigate directly to any URL or website with optional wait condition",
usage: `Use this to navigate to any website directly without using the URL bar
Call format: {"name": "playwright", "input": {"method": "goto", "args": ["url or domain", "waitUntil"]}}
- First argument: URL or domain (required)
- Second argument: waitUntil condition (optional, defaults to "networkidle")
Valid waitUntil options:
* "load" - wait for the load event (all resources loaded)
* "domcontentloaded" - wait for DOMContentLoaded event (DOM ready)
* "networkidle" - wait for no network activity for 500ms (default)
* "commit" - wait for network response and document starts loading
Examples:
- Basic navigation: ["https://example.com"]
- Fast navigation: ["https://example.com", "domcontentloaded"]
- Full load: ["https://example.com", "load"]
The tool will automatically add https:// if no protocol is specified
This is faster and more reliable than using ctrl+l and typing in the URL bar`,
schema: z.union([
z.tuple([z.string()]),
z.tuple([z.string(), z.enum(["load", "domcontentloaded", "networkidle", "commit"])])
]),
handler: async (page, args) => {
if (args.length < 1 || args.length > 2) {
throw new Error("goto method requires 1-2 arguments: URL and optional waitUntil condition");
}
const url = args[0];
const waitUntil = args[1];
if (!url || typeof url !== "string") {
throw new Error("URL must be a non-empty string");
}
const validWaitUntilOptions = ["load", "domcontentloaded", "networkidle", "commit"];
if (waitUntil && !validWaitUntilOptions.includes(waitUntil)) {
throw new Error(`Invalid waitUntil option: ${waitUntil}. Valid options: ${validWaitUntilOptions.join(", ")}`);
}
let normalizedURL;
try {
const urlObj = new URL(url);
normalizedURL = urlObj.href;
} catch {
try {
const urlObj = new URL(`https://${url}`);
normalizedURL = urlObj.href;
} catch {
throw new Error(`Invalid URL format: ${url}`);
}
}
const actualWaitUntil = waitUntil || "networkidle";
await page.goto(normalizedURL, {
waitUntil: actualWaitUntil,
timeout: 3e4
});
await page.waitForTimeout(1e3);
const currentURL = page.url();
const title = await page.title();
return {
output: `Successfully navigated to ${currentURL} (waitUntil: ${actualWaitUntil}). Page title: "${title}"`
};
}
});
PLAYWRIGHT_CAPABILITIES.set("extract_url", {
method: "extract_url",
displayName: "Extract URL",
description: "Extract URLs from visible text, links, or buttons on the page",
usage: `First, take a screenshot to see what's on the page
Identify the visible text of the link/button you want to extract the URL from
Call format: {"name": "playwright", "input": {"method": "extract_url", "args": ["exact visible text"]}}
The tool will search for the text and extract the associated URL`,
schema: z.tuple([z.string()]),
handler: async (page, args) => {
if (args.length !== 1) {
throw new Error("extract_url method requires exactly one argument");
}
const selector = args[0];
console.log(`
=== Extract URL: Looking for text: "${selector}" ===`);
let url = null;
let elementInfo = "";
const textElement = await page.locator(`text="${selector}"`).first();
if (await textElement.count() > 0) {
url = await textElement.getAttribute("href");
if (!url) {
const parentAnchor = await textElement.locator("xpath=ancestor::a[1]").first();
if (await parentAnchor.count() > 0) {
url = await parentAnchor.getAttribute("href");
elementInfo = "parent anchor of element with exact text";
}
} else {
elementInfo = "element with exact matching text";
}
}
if (!url) {
const anchorWithText = await page.locator(`a:has-text("${selector}")`).first();
if (await anchorWithText.count() > 0) {
url = await anchorWithText.getAttribute("href");
elementInfo = "anchor tag with text";
}
}
if (!url) {
throw new Error(
`Could not find any URL associated with text: "${selector}"`
);
}
if (url.startsWith("/")) {
const baseUrl = new URL(page.url());
url = `${baseUrl.origin}${url}`;
}
return {
output: `Successfully extracted URL: ${url} (from ${elementInfo})`
};
}
});
PLAYWRIGHT_CAPABILITIES.set("scroll_to_text", {
method: "scroll_to_text",
displayName: "Scroll to Text",
description: "Instantly scroll to specific text in dropdowns, lists, or on the page",
usage: `When you need to find specific text in a dropdown/list, use this FIRST
Call format: {"name": "playwright", "input": {"method": "scroll_to_text", "args": ["exact text"]}}
Only provide the text you're looking for - no CSS selectors needed
This instantly scrolls the text into view without multiple attempts
If it fails, fall back to regular computer scroll`,
schema: z.tuple([z.string()]),
handler: async (page, args) => {
const [targetText] = args;
if (!targetText) {
throw new Error("target_text argument is required");
}
console.log(`[scroll_to_text] Looking for text: "${targetText}"`);
const scrolled = await page.evaluate(
({ targetText: targetText2 }) => {
let foundElement = null;
const walker = document.createTreeWalker(
document.body,
NodeFilter.SHOW_TEXT,
{
acceptNode: (node) => {
if (node.textContent && node.textContent.includes(targetText2)) {
return NodeFilter.FILTER_ACCEPT;
}
return NodeFilter.FILTER_REJECT;
}
}
);
let textNode = walker.nextNode();
if (textNode && textNode.parentElement) {
foundElement = textNode.parentElement;
}
if (!foundElement) {
return { success: false, message: `Text "${targetText2}" not found` };
}
foundElement.scrollIntoView({
behavior: "smooth",
block: "center",
inline: "center"
});
return {
success: true,
message: `Scrolled to "${targetText2}"`
};
},
{ targetText }
);
if (!scrolled.success) {
return {
output: `${scrolled.message}. Consider using regular computer scroll instead.`
};
}
await page.waitForTimeout(800);
return {
output: scrolled.message
};
}
});
function getPlaywrightCapabilities() {
return Array.from(PLAYWRIGHT_CAPABILITIES.values());
}
function generatePlaywrightDocs(capabilities) {
const capsToUse = capabilities || getPlaywrightCapabilities();
const sections = [
"PLAYWRIGHT TOOL CAPABILITIES:",
"* You have access to a 'playwright' tool that provides browser automation capabilities:"
];
capsToUse.forEach((cap) => {
sections.push(` - '${cap.method}': ${cap.description}`);
});
sections.push("");
capsToUse.forEach((cap) => {
sections.push(`HOW TO USE ${cap.method.toUpperCase()}:`);
const usageLines = cap.usage.split("\n").filter(Boolean);
usageLines.forEach((line, index) => {
sections.push(`${index + 1}. ${line}`);
});
sections.push("");
});
return sections.join("\n");
}
// tools/playwright.ts
var PlaywrightTool = class {
constructor(page, instanceCapabilities = []) {
this.name = "playwright";
this.page = page;
this.capabilities = new Map(PLAYWRIGHT_CAPABILITIES);
for (const capability2 of instanceCapabilities) {
this.capabilities.set(capability2.method, capability2);
}
}
/**
* Get capability documentation for including in system prompt
*/
getCapabilityDocs() {
return generatePlaywrightDocs(Array.from(this.capabilities.values()));
}
/**
* Static method to get capability docs (for system prompt generation)
*/
static getCapabilityDocs() {
return generatePlaywrightDocs();
}
toParams() {
const enabledCapabilities = Array.from(this.capabilities.keys());
return {
name: this.name,
type: "custom",
input_schema: {
type: "object",
properties: {
method: {
type: "string",
description: "The playwright function to call.",
enum: enabledCapabilities
},
args: {
type: "array",
description: "The required arguments",
items: {
type: "string",
description: "The argument to pass to the function"
}
}
},
required: ["method", "args"]
}
};
}
async call(params) {
const { method, args } = params;
const capability2 = this.capabilities.get(method);
if (!capability2) {
const supportedMethods = Array.from(this.capabilities.keys());
throw new ToolError(
`Unsupported method: ${method}. Supported methods: ${supportedMethods.join(", ")}`
);
}
if (!Array.isArray(args)) {
throw new ToolError("args must be an array");
}
try {
capability2.schema.parse(args);
} catch (error) {
throw new ToolError(`Invalid arguments for ${method}: ${error}`);
}
try {
return await capability2.handler(this.page, args);
} catch (error) {
throw new ToolError(`Failed to execute ${method}: ${error}`);
}
}
};
// loop.ts
var SYSTEM_PROMPT = `<SYSTEM_CAPABILITY>
* You are utilising an Ubuntu virtual machine using ${process.arch} architecture with internet access.
* When you connect to the display, CHROMIUM IS ALREADY OPEN. The url bar is not visible but it is there.
* If you need to navigate to a new page, you can use the playwright 'goto' method for faster navigation.
* When viewing a page it can be helpful to zoom out so that you can see everything on the page.
* Either that, or make sure you scroll down to see everything before deciding something isn't available.
* When using your computer function calls, they take a while to run and send back to you.
* For efficient page navigation, use LARGE scroll amounts (80-90) to quickly move through content.
* Only use small scroll amounts (5-15) when scrolling within specific UI elements like dropdowns or small lists.
* Page-level scrolling with scroll_amount 80-90 shows mostly new content while keeping some overlap for context.
* IMPORTANT: Always use positive scroll amounts. Use scroll_direction ('up', 'down', 'left', 'right') to control direction, not negative values.
* The current date is ${DateTime.now().toFormat("EEEE, MMMM d, yyyy")}
</SYSTEM_CAPABILITY>
<IMPORTANT>
* When using Chromium, if a startup wizard appears, IGNORE IT. Do not even click "skip this step".
* Instead, click on the search bar on the center of the screen where it says "Search or enter address", and enter the appropriate search term or URL there.
* For faster navigation, prefer using the playwright 'goto' method over manually typing URLs.
</IMPORTANT>`;
async function samplingLoop({
model,
systemPromptSuffix,
messages,
apiKey,
onlyNMostRecentImages,
maxTokens = 4096,
toolVersion,
thinkingBudget,
tokenEfficientToolsBeta = false,
playwrightPage,
signalBus,
executionConfig,
playwrightCapabilities = [],
tools = [],
logger = new NoOpLogger(),
retryConfig
}) {
const selectedVersion = toolVersion || DEFAULT_TOOL_VERSION;
const toolGroup = TOOL_GROUPS_BY_VERSION[selectedVersion];
const computerTools = toolGroup.tools.map(
(Tool) => new Tool(playwrightPage, executionConfig)
);
const playwrightTool = new PlaywrightTool(
playwrightPage,
playwrightCapabilities
);
const toolCollection = new ToolCollection(
...computerTools,
playwrightTool,
...tools
);
toolCollection.setPage(playwrightPage);
const capabilityDocs = playwrightCapabilities.length > 0 ? playwrightTool.getCapabilityDocs() : PlaywrightTool.getCapabilityDocs();
const system = {
type: "text",
text: `${SYSTEM_PROMPT}${systemPromptSuffix ? " " + systemPromptSuffix : ""}
${capabilityDocs}`
};
let stepIndex = 0;
while (true) {
if (signalBus) {
signalBus.setStep(stepIndex);
if (signalBus.isCancelling()) {
console.log("Agent execution was cancelled");
break;
}
if (signalBus.getState() === "paused") {
await signalBus.waitUntilResumed();
if (signalBus.isCancelling()) {
console.log("Agent execution was cancelled during pause");
break;
}
}
}
const betas = toolGroup.beta_flag ? [toolGroup.beta_flag] : [];
if (tokenEfficientToolsBeta) {
betas.push("token-efficient-tools-2025-02-19");
}
let imageTruncationThreshold = onlyNMostRecentImages || 20;
const clientOptions = { apiKey, maxRetries: 4 };
if (retryConfig == null ? void 0 : retryConfig.preferIPv4) {
const ipv4Lookup = (hostname, options, callback) => {
if (typeof options === "function") {
return lookup(hostname, { family: 4 }, options);
}
if (callback) {
return lookup(hostname, { ...options, family: 4 }, callback);
}
};
clientOptions.httpAgent = new HttpAgent({ lookup: ipv4Lookup });
clientOptions.httpsAgent = new HttpsAgent({ lookup: ipv4Lookup });
}
const client = new Anthropic(clientOptions);
const enablePromptCaching = true;
if (enablePromptCaching) {
betas.push(PROMPT_CACHING_BETA_FLAG);
injectPromptCaching(messages);
onlyNMostRecentImages = 0;
system.cache_control = { type: "ephemeral" };
}
truncateMessageHistory(messages, 15);
cleanMessageHistory(messages);
if (onlyNMostRecentImages) {
maybeFilterToNMostRecentImages(
messages,
onlyNMostRecentImages,
imageTruncationThreshold
);
}
const extraBody = {};
if (thinkingBudget) {
extraBody.thinking = { type: "enabled", budget_tokens: thinkingBudget };
}
const toolParams = toolCollection.toParams();
const response = await withRetry(
() => client.beta.messages.create({
max_tokens: maxTokens,
messages,
model,
system: [system],
tools: toolParams,
betas,
...extraBody
}),
retryConfig,
logger
);
const responseParams = responseToParams(response);
const loggableContent = responseParams.map((block) => {
if (block.type === "tool_use") {
console.log(`
=== TOOL USE: ${block.name} ===`);
console.log("Full input:", JSON.stringify(block.input, null, 2));
return {
type: "tool_use",
name: block.name,
input: block.input
};
}
return block;
});
console.log("=== LLM RESPONSE ===");
console.log("Stop reason:", response.stop_reason);
console.log(loggableContent);
console.log("===");
logger.llmResponse(response.stop_reason ?? "unknown", stepIndex, loggableContent);
messages.push({
role: "assistant",
content: responseParams
});
if (response.stop_reason === "end_turn") {
if (signalBus) {
if (signalBus.isCancelling()) {