UNPKG

@hyperbrowser/agent

Version:

Hyperbrowsers Web Agent

1,015 lines (1,014 loc) • 41.7 kB

JavaScript

"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); Object.defineProperty(exports, "__esModule", { value: true }); exports.HyperAgent = void 0; const uuid_1 = require("uuid"); const providers_1 = require("../llm/providers"); const types_1 = require("../types"); const actions_1 = require("./actions"); const browser_providers_1 = require("../browser-providers"); const error_1 = require("./error"); const find_element_1 = require("./shared/find-element"); const types_2 = require("../context-providers/a11y-dom/types"); const client_1 = require("./mcp/client"); const agent_1 = require("./tools/agent"); const utils_1 = require("../utils"); const waitForSettledDOM_1 = require("../utils/waitForSettledDOM"); const perf_hooks_1 = require("perf_hooks"); const cdp_1 = require("../cdp"); const dom_cache_1 = require("../context-providers/a11y-dom/dom-cache"); const options_1 = require("../debug/options"); const runtime_context_1 = require("./shared/runtime-context"); const perform_action_1 = require("./actions/shared/perform-action"); class HyperAgent { get currentPage() { if (this._currentPage) { return this.setupHyperPage(this._currentPage); } return null; } set currentPage(page) { this._currentPage = page; } constructor(params = {}) { this.tasks = {}; this.tokenLimit = 128000; this.debug = false; this.actions = [...actions_1.DEFAULT_ACTIONS]; this.browser = null; this.context = null; this._currentPage = null; this._variables = {}; if (!params.llm) { if (process.env.OPENAI_API_KEY) { this.llm = (0, providers_1.createLLMClient)({ provider: "openai", model: "gpt-4o", temperature: 0, }); } else { throw new error_1.HyperagentError("No LLM provider provided", 400); } } else if (typeof params.llm === "object" && "provider" in params.llm) { // It's an LLMConfig this.llm = (0, providers_1.createLLMClient)(params.llm); } else { // It's already a HyperAgentLLM instance this.llm = params.llm; } this.browserProviderType = (params.browserProvider ?? "Local"); (0, options_1.setDebugOptions)(params.debugOptions, this.debug); // TODO(Phase4): This legacy provider branch will be replaced by connector configs. this.browserProvider = (this.browserProviderType === "Hyperbrowser" ? new browser_providers_1.HyperbrowserProvider({ ...(params.hyperbrowserConfig ?? {}), debug: params.debug, }) : new browser_providers_1.LocalBrowserProvider(params.localConfig)); if (params.customActions) { params.customActions.forEach(this.registerAction, this); } this.debug = params.debug ?? false; this.cdpActionsEnabled = params.cdpActions ?? true; this.errorEmitter = new utils_1.ErrorEmitter(); } /** * This is just exposed as a utility function. You don't need to call it explicitly. * @returns A reference to the current rebrowser-playwright browser instance. */ async initBrowser() { if (!this.browser) { this.browser = await this.browserProvider.start(); if (this.browserProviderType === "Hyperbrowser" && this.browser.contexts().length > 0) { this.context = this.browser.contexts()[0]; } else { this.context = await this.browser.newContext({ viewport: null, }); } // Inject script to track event listeners await this.context.addInitScript(() => { // TODO: Check this list of events const interactiveEvents = new Set([ "click", "mousedown", "mouseup", "keydown", "keyup", "keypress", "submit", "change", "input", "focus", "blur", ]); // Add more events as needed const originalAddEventListener = Element.prototype.addEventListener; Element.prototype.addEventListener = function (type, listener, options) { if (interactiveEvents.has(type.toLowerCase())) { this.setAttribute("data-has-interactive-listener", "true"); } originalAddEventListener.call(this, type, listener, options); }; }); // Listen for new pages (tabs/popups) this.context.on("page", () => { if (this.debug) { console.log("New tab/popup detected"); } // Note: We used to auto-switch this._currentPage here, but that breaks // scoped page interactions. If a user is awaiting pageA.ai(), and a new // tab opens, we don't want pageA to suddenly become pageB. // The user or the specific task logic should handle tab switching if desired. }); return this.browser; } return this.browser; } /** * Use this function instead of accessing this.actions directly. * This function configures if there is a need for an output schema as a part of the complete action. * @param outputSchema * @returns */ getActions(outputSchema) { if (outputSchema) { return [ ...this.actions, (0, actions_1.generateCompleteActionWithOutputDefinition)(outputSchema), ]; } else { return [...this.actions, actions_1.CompleteActionDefinition]; } } /** * Get all variables * @returns Record of variables */ getVariables() { return this._variables; } /** * Set a variable * @param key Key of the variable * @param value Value of the variable */ addVariable(variable) { this._variables[variable.key] = variable; } /** * Get a variable * @param key Key of the variable * @returns Value of the variable */ getVariable(key) { return this._variables[key]; } /** * Delete a variable * @param key Key of the variable */ deleteVariable(key) { delete this._variables[key]; } /** * Get all pages in the context * @returns Array of HyperPage objects */ async getPages() { if (!this.browser) { await this.initBrowser(); } if (!this.context) { throw new error_1.HyperagentError("No context found"); } return this.context.pages().map(this.setupHyperPage.bind(this), this); } /** * Create a new page in the context * @returns HyperPage object */ async newPage() { if (!this.browser) { await this.initBrowser(); } if (!this.context) { throw new error_1.HyperagentError("No context found"); } const page = await this.context.newPage(); return this.setupHyperPage(page); } /** * Close the agent and all associated resources */ async closeAgent() { await (0, cdp_1.disposeAllCDPClients)().catch((error) => { console.warn("[HyperAgent] Failed to dispose CDP clients:", error); }); for (const taskId in this.tasks) { const task = this.tasks[taskId]; if (!types_1.endTaskStatuses.has(task.status)) { task.status = types_1.TaskStatus.CANCELLED; } } if (this.mcpClient) { await this.mcpClient.disconnect(); this.mcpClient = undefined; } if (this.browser) { await this.browserProvider.close(); this.browser = null; this.context = null; } } /** * Get the current page or create a new one if none exists * @returns The current page */ async getCurrentPage() { if (!this.browser) { await this.initBrowser(); } if (!this.context) { throw new error_1.HyperagentError("No context found"); } // Poll context for new pages to catch any that opened since the last check // This handles race conditions where the 'page' event might not have fired yet // or where we missed it during a heavy operation. const pages = this.context.pages(); if (pages.length > 0) { const lastPage = pages[pages.length - 1]; // If the last page is different and not closed, switch to it // We prefer the newest page as it's likely the result of the user's last action if (lastPage && !lastPage.isClosed() && lastPage !== this._currentPage) { if (this.debug) { console.log(`[HyperAgent] Polling detected new page, switching focus: ${lastPage.url()}`); } this._currentPage = lastPage; } } if (!this.currentPage || this.currentPage.isClosed()) { this._currentPage = await this.context.newPage(); return this.setupHyperPage(this._currentPage); } return this.currentPage; } /** * Get task control object for a specific task * @param taskId ID of the task * @returns Task control object */ getTaskControl(taskId) { const taskState = this.tasks[taskId]; if (!taskState) { throw new error_1.HyperagentError(`Task ${taskId} not found`); } return { getStatus: () => taskState.status, pause: () => { if (taskState.status === types_1.TaskStatus.RUNNING) { taskState.status = types_1.TaskStatus.PAUSED; } return taskState.status; }, resume: () => { if (taskState.status === types_1.TaskStatus.PAUSED) { taskState.status = types_1.TaskStatus.RUNNING; } return taskState.status; }, cancel: () => { if (taskState.status !== types_1.TaskStatus.COMPLETED) { taskState.status = types_1.TaskStatus.CANCELLED; } return taskState.status; }, emitter: this.errorEmitter, }; } /** * Execute a task asynchronously and return a Task control object * @param task The task to execute * @param params Optional parameters for the task * @param initPage Optional page to use for the task * @returns A promise that resolves to a Task control object for managing the running task */ async executeTaskAsync(task, params, initPage) { const taskId = (0, uuid_1.v4)(); let activeTaskPage = initPage || (await this.getCurrentPage()); // Follow new tabs opened by the current active page const onPage = async (newPage) => { try { const opener = await newPage.opener(); if (opener === activeTaskPage) { if (this.debug) { console.log(`[HyperAgent] Task following new tab: ${newPage.url()}`); } activeTaskPage = newPage; } } catch { // Ignore } }; this.context?.on("page", onPage); const cleanup = () => this.context?.off("page", onPage); const taskState = { id: taskId, task: task, status: types_1.TaskStatus.PENDING, startingPage: activeTaskPage, steps: [], }; this.tasks[taskId] = taskState; const mergedParams = params ?? {}; (0, agent_1.runAgentTask)({ llm: this.llm, actions: this.getActions(mergedParams.outputSchema), tokenLimit: this.tokenLimit, debug: this.debug, mcpClient: this.mcpClient, variables: this._variables, cdpActions: this.cdpActionsEnabled, activePage: async () => activeTaskPage, }, taskState, mergedParams) .then(() => cleanup()) .catch((error) => { cleanup(); // Retrieve the correct state to update const failedTaskState = this.tasks[taskId]; if (failedTaskState) { failedTaskState.status = types_1.TaskStatus.FAILED; failedTaskState.error = error.message; // Emit error on the central emitter, including the taskId this.errorEmitter.emit("error", error); } else { // Fallback if task state somehow doesn't exist console.error(`Task state ${taskId} not found during error handling.`); } }); return this.getTaskControl(taskId); } /** * Execute a task and wait for completion * @param task The task to execute * @param params Optional parameters for the task * @param initPage Optional page to use for the task * @returns A promise that resolves to the task output */ async executeTask(task, params, initPage) { const taskId = (0, uuid_1.v4)(); let activeTaskPage = initPage || (await this.getCurrentPage()); // Follow new tabs opened by the current active page const onPage = async (newPage) => { try { const opener = await newPage.opener(); if (opener === activeTaskPage) { if (this.debug) { console.log(`[HyperAgent] Task following new tab: ${newPage.url()}`); } activeTaskPage = newPage; } } catch { // Ignore } }; this.context?.on("page", onPage); const taskState = { id: taskId, task: task, status: types_1.TaskStatus.PENDING, startingPage: activeTaskPage, steps: [], }; this.tasks[taskId] = taskState; try { const mergedParams = params ?? {}; const result = await (0, agent_1.runAgentTask)({ llm: this.llm, actions: this.getActions(mergedParams?.outputSchema), tokenLimit: this.tokenLimit, debug: this.debug, mcpClient: this.mcpClient, variables: this._variables, cdpActions: this.cdpActionsEnabled, activePage: async () => activeTaskPage, }, taskState, mergedParams); this.context?.off("page", onPage); return result; } catch (error) { this.context?.off("page", onPage); taskState.status = types_1.TaskStatus.FAILED; throw error; } } /** * Find element with retry logic * Retries element finding with DOM refetch until element is found or max retries reached * * @param instruction Natural language instruction for the action * @param page The page to search on * @param maxRetries Maximum number of retry attempts * @param retryDelayMs Delay between retries in milliseconds * @returns Object containing the found element, DOM state, and element map * @throws Error if element is not found after all retries */ async findElementWithRetry(instruction, page, maxRetries, retryDelayMs, startTime) { // Delegate to shared utility const result = await (0, find_element_1.findElementWithInstruction)(instruction, page, this.llm, { maxRetries, retryDelayMs, debug: this.debug, }); // Check if element was found if (result.success && result.element) { // Success - return the result return { element: result.element, domState: result.domState, elementMap: result.elementMap, llmResponse: result.llmResponse, }; } // Element not found after all retries - handle error case if (this.debug) { console.error(`[aiAction] No elements found for instruction: "${instruction}" after ${maxRetries} attempts`); console.error(`[aiAction] Current URL: ${page.url()}`); console.error(`[aiAction] Total elements in final a11y tree: ${result.domState.elements.size}`); // Write debug data to files before throwing error await this.writeDebugData({ instruction, page, startTime, domState: result.domState, elementMap: result.elementMap, llmResponse: result.llmResponse, error: new error_1.HyperagentError(`No elements found for instruction: "${instruction}" after ${maxRetries} retry attempts.`, 404), success: false, }); } throw new error_1.HyperagentError(`No elements found for instruction: "${instruction}" after ${maxRetries} retry attempts. The instruction may be too vague, the element may not exist, or the page may not have fully loaded.`, 404); } async writeDebugData(params) { if (!this.debug || !params.domState || !params.elementMap) { return; } const { writeAiActionDebug } = await Promise.resolve().then(() => __importStar(require("../utils/debugWriter"))); try { const screenshot = await params.page .screenshot({ type: "png" }) .catch(() => null); if (params.success && params.element) { // Success case - write found element data await writeAiActionDebug({ instruction: params.instruction, url: params.page.url(), timestamp: params.startTime, domElementCount: params.domState.elements.size, domTree: params.domState.domState, screenshot: screenshot || undefined, foundElement: { elementId: params.element.elementId, method: params.element.method, arguments: params.element.arguments, xpath: params.element.xpath, }, llmResponse: params.llmResponse, success: true, frameDebugInfo: params.domState.frameDebugInfo, }); } else { // Error case - write available elements const availableElements = this.collectInteractiveElements(params.elementMap, HyperAgent.AIACTION_CONFIG.MAX_DEBUG_ELEMENTS_TO_STORE); await writeAiActionDebug({ instruction: params.instruction, url: params.page.url(), timestamp: params.startTime, domElementCount: params.domState.elements.size, domTree: params.domState.domState, screenshot: screenshot || undefined, availableElements, llmResponse: params.llmResponse, error: { message: params.error instanceof Error ? params.error.message : String(params.error), stack: params.error instanceof Error ? params.error.stack : undefined, }, success: false, frameDebugInfo: params.domState.frameDebugInfo, }); } } catch (debugError) { console.error(`[aiAction] Failed to write debug data:`, debugError); } } /** * Collect interactive elements from element map for debugging * Extracts elements with interactive roles (button, link, textbox, etc.) * * @param elementMap Map of element IDs to element data * @param limit Maximum number of elements to collect * @returns Array of interactive elements with id, role, and label */ collectInteractiveElements(elementMap, limit = 20) { // Group elements by frame const frameElements = new Map(); for (const [id, elem] of elementMap) { const role = elem.role; if (role && [ "button", "link", "textbox", "searchbox", "combobox", "checkbox", "tab", "menuitem", ].includes(role)) { const label = elem.name || elem.description || elem.value || ""; if (label) { // Extract frame index from ID (format: "frameIndex-backendNodeId") const frameIndex = id.split("-")[0]; if (!frameElements.has(frameIndex)) { frameElements.set(frameIndex, []); } frameElements.get(frameIndex).push({ id, role, label }); } } } // Collect elements: prioritize iframe content, then main frame const result = []; // First, collect ALL iframe elements (non-0 frames) for (const [frameIndex, elements] of frameElements) { if (frameIndex !== "0") { result.push(...elements); } } // Then, fill remaining slots with main frame elements const mainFrameElements = frameElements.get("0") || []; const remainingSlots = limit - result.length; if (remainingSlots > 0) { result.push(...mainFrameElements.slice(0, remainingSlots)); } return result.slice(0, limit); } /** * Execute a single granular action using a11y mode * Internal method used by page.aiAction() * * Architecture: Simple examine->act flow * - 1 LLM call (examineDom finds element and suggests method) * - Direct execution (no agent loop) * * @param instruction Natural language instruction for a single action * @param page The page to execute the action on * @returns A promise that resolves to the task output */ async executeSingleAction(instruction, pageOrGetter, _params) { const actionStart = perf_hooks_1.performance.now(); const startTime = new Date().toISOString(); if (this.debug) { console.log(`[aiAction] Instruction: ${instruction}`); } const getPage = () => typeof pageOrGetter === "function" ? pageOrGetter() : pageOrGetter; const initialPage = getPage(); let domState = null; let elementMap = null; try { // Find element with retry logic const findStart = perf_hooks_1.performance.now(); const { element, domState: foundDomState, elementMap: foundElementMap, llmResponse, } = await this.findElementWithRetry(instruction, initialPage, HyperAgent.AIACTION_CONFIG.MAX_RETRIES, HyperAgent.AIACTION_CONFIG.RETRY_DELAY_MS, startTime); // Check if page context switched during findElement (e.g. new tab opened by previous action) if (getPage() !== initialPage) { throw new error_1.HyperagentError("Page context switched during execution", 409); } domState = foundDomState; elementMap = foundElementMap; logPerf(this.debug, "[Perf][executeSingleAction] findElementWithRetry", findStart); if (this.debug) { console.log(`[aiAction] Found element: ${element.elementId}`); console.log(`[aiAction] Method: ${element.method}`); console.log(`[aiAction] Arguments:`, element.arguments); } if (!element.method) { throw new error_1.HyperagentError("Element method is missing from LLM response", 500); } const method = element.method; const args = element.arguments || []; if (!(0, types_2.isEncodedId)(element.elementId)) { throw new error_1.HyperagentError(`Element ID "${element.elementId}" is not in encoded format (frameIndex-backendNodeId).`, 400); } let actionXPath; // Use shared runtime context const { cdpClient, frameContextManager } = await (0, runtime_context_1.initializeRuntimeContext)(initialPage, this.debug); // Check context switch again before action if (getPage() !== initialPage) { throw new error_1.HyperagentError("Page context switched during execution", 409); } // Create a context object compatible with performAction // We need to mock the ActionContext shape since performAction expects it // but we don't have a full AgentCtx/TaskState here const actionContext = { domState, page: initialPage, tokenLimit: this.tokenLimit, llm: this.llm, debug: this.debug, // Only provide CDP if enabled cdpActions: this.cdpActionsEnabled, cdp: this.cdpActionsEnabled ? { client: cdpClient, frameContextManager, resolveElement: cdp_1.resolveElement, dispatchCDPAction: cdp_1.dispatchCDPAction, preferScriptBoundingBox: this.debug, debug: this.debug, } : undefined, // These are required by ActionContext but not used by performAction debugDir: undefined, mcpClient: this.mcpClient, variables: Object.values(this._variables), invalidateDomCache: () => (0, dom_cache_1.markDomSnapshotDirty)(initialPage), }; // Use shared performAction to execute const actionOutput = await (0, perform_action_1.performAction)(actionContext, { elementId: element.elementId, method, arguments: args, instruction, confidence: 1, // Implicit confidence for single action }); if (actionOutput.debug && typeof actionOutput.debug === "object" && "requestedAction" in actionOutput.debug) { actionXPath = actionOutput.debug.elementMetadata?.xpath; } if (!actionOutput.success) { throw new Error(actionOutput.message); } // Wait for DOM to settle after action const waitStart = perf_hooks_1.performance.now(); await (0, waitForSettledDOM_1.waitForSettledDOM)(initialPage); (0, dom_cache_1.markDomSnapshotDirty)(initialPage); logPerf(this.debug, "[Perf][executeSingleAction] action execution", actionStart); logPerf(this.debug, "[Perf][executeSingleAction] waitForSettledDOM", waitStart); // Write debug data on success await this.writeDebugData({ instruction, page: initialPage, startTime, domState, elementMap, element: { elementId: element.elementId, method, arguments: args, xpath: actionXPath, }, llmResponse, success: true, }); logPerf(this.debug, "[Perf][executeSingleAction] total", actionStart); return { status: types_1.TaskStatus.COMPLETED, steps: [], output: `Successfully executed: ${instruction}`, }; } catch (error) { // If page switched during execution, prioritize that over the error // This catches cases where findElement failed because the old page closed/navigated if (getPage() !== initialPage) { throw new error_1.HyperagentError("Page context switched during execution", 409); } // Write debug data on error await this.writeDebugData({ instruction, page: initialPage, startTime, domState, elementMap, error, success: false, }); // Re-throw HyperagentErrors as-is if (error instanceof error_1.HyperagentError) { throw error; } // Wrap other errors const errorMsg = error instanceof Error ? error.message : String(error); throw new error_1.HyperagentError(`Failed to execute action: ${errorMsg}`, 500); } } /** * Register a new action with the agent * @param action The action to register */ async registerAction(action) { if (action.type === "complete") { throw new error_1.HyperagentError("Could not add an action with the name 'complete'. Complete is a reserved action.", 400); } const actionsList = new Set(this.actions.map((registeredAction) => registeredAction.type)); if (actionsList.has(action.type)) { throw new Error(`Could not register action of type ${action.type}. Action with the same name is already registered`); } else { this.actions.push(action); } } /** * Initialize the MCP client with the given configuration * @param config The MCP configuration */ async initializeMCPClient(config) { if (!config || config.servers.length === 0) { return; } this.mcpClient = new client_1.MCPClient(this.debug); try { for (const serverConfig of config.servers) { try { const { serverId, actions } = await this.mcpClient.connectToServer(serverConfig); for (const action of actions) { this.registerAction(action); } if (this.debug) { console.log(`MCP server ${serverId} initialized successfully`); } } catch (error) { console.error(`Failed to initialize MCP server ${serverConfig.id || "unknown"}:`, error); } } const serverIds = this.mcpClient.getServerIds(); if (this.debug) { console.log(`Successfully connected to ${serverIds.length} MCP servers`); } } catch (error) { console.error("Failed to initialize MCP client:", error); this.mcpClient = undefined; } } /** * Connect to an MCP server at runtime * @param serverConfig Configuration for the MCP server * @returns Server ID if connection was successful */ async connectToMCPServer(serverConfig) { if (!this.mcpClient) { this.mcpClient = new client_1.MCPClient(this.debug); } try { const { serverId, actions } = await this.mcpClient.connectToServer(serverConfig); // Register the actions from this server for (const action of actions) { this.registerAction(action); } if (this.debug) { console.log(`Connected to MCP server with ID: ${serverId}`); } return serverId; } catch (error) { console.error(`Failed to connect to MCP server:`, error); return null; } } /** * Disconnect from a specific MCP server * @param serverId ID of the server to disconnect from * @returns Boolean indicating if the disconnection was successful */ disconnectFromMCPServer(serverId) { if (!this.mcpClient) { return false; } try { this.mcpClient.disconnectServer(serverId); return true; } catch (error) { console.error(`Failed to disconnect from MCP server ${serverId}:`, error); return false; } } /** * Check if a specific MCP server is connected * @param serverId ID of the server to check * @returns Boolean indicating if the server is connected */ isMCPServerConnected(serverId) { if (!this.mcpClient) { return false; } return this.mcpClient.getServerIds().includes(serverId); } /** * Get all connected MCP server IDs * @returns Array of server IDs */ getMCPServerIds() { if (!this.mcpClient) { return []; } return this.mcpClient.getServerIds(); } /** * Get information about all connected MCP servers * @returns Array of server information objects or null if no MCP client is initialized */ getMCPServerInfo() { if (!this.mcpClient) { return null; } return this.mcpClient.getServerInfo(); } /** * Pretty print an action * @param action The action to print * @returns Formatted string representation of the action */ pprintAction(action) { const foundAction = this.actions.find((actions) => actions.type === action.type); if (foundAction && foundAction.pprintAction) { return foundAction.pprintAction(action.params); } return ""; } getSession() { const session = this.browserProvider.getSession(); if (!session) { return null; } return session; } setupHyperPage(page) { const hyperPage = page; // Clean up existing listener if this page was already setup if (hyperPage._scopeListenerCleanup) { hyperPage._scopeListenerCleanup(); } // History Stack: [Root, Tab1, Tab2, ...] const pageStack = [page]; const getActivePage = () => pageStack[pageStack.length - 1]; // Handle tab closing (Pop) const handleClose = (p) => { const idx = pageStack.indexOf(p); if (idx !== -1) { if (this.debug) { console.log(`[HyperPage] Tab closed, removing from stack`); } pageStack.splice(idx, 1); } }; // Listen for close on the root page page.on("close", () => handleClose(page)); // Handle new tabs (Push) const onPage = async (newPage) => { try { // Check if the new page is opened by our current active scope page const opener = await newPage.opener(); if (opener === getActivePage()) { if (this.debug) { console.log(`[HyperPage] Auto-switching to new tab (Push): ${newPage.url()}`); } // Update the scope to follow the new tab pageStack.push(newPage); // Listen for close on the new page newPage.on("close", () => handleClose(newPage)); } } catch { // Ignore } }; // Attach a persistent listener to track page flow for the lifetime of this wrapper page.context().on("page", onPage); hyperPage._scopeListenerCleanup = () => { page.context().off("page", onPage); }; hyperPage.ai = (task, params) => this.executeTask(task, params, getActivePage()); hyperPage.aiAction = async (instruction, params) => { const maxRetries = 3; for (let i = 0; i < maxRetries; i++) { try { return await this.executeSingleAction(instruction, getActivePage, params); } catch (err) { if (err.statusCode === 409 || (err.message && err.message.includes("Page context switched"))) { if (this.debug) { console.log("[HyperPage] Action aborted due to tab switch, retrying on new page..."); } // Wait briefly for stability await new Promise((resolve) => setTimeout(resolve, 500)); continue; } throw err; } } throw new error_1.HyperagentError("Failed to execute action after max retries due to page switching", 500); }; // aiAsync tasks run in background, so we just use the current scope start point. // The task itself has internal auto-following logic (from executeTaskAsync implementation). hyperPage.aiAsync = (task, params) => this.executeTaskAsync(task, params, getActivePage()); hyperPage.extract = async (task, outputSchema, params) => { if (!task && !outputSchema) { throw new error_1.HyperagentError("No task description or output schema specified", 400); } const taskParams = { maxSteps: params?.maxSteps ?? 2, ...params, outputSchema, }; if (task) { const res = await this.executeTask(`You have to perform an extraction on the current page. You have to perform the extraction according to the task: ${task}. Make sure your final response only contains the extracted content`, taskParams, getActivePage()); if (outputSchema) { const outputText = res.output; if (typeof outputText !== "string" || outputText === "") { throw new Error(`Extract failed: Agent did not complete with output. Task status: ${res.status}. Check debug output for details.`); } return JSON.parse(outputText); } const outputText = res.output; if (typeof outputText !== "string" || outputText === "") { throw new Error(`Extract failed: Agent did not complete with output. Task status: ${res.status}. Check debug output for details.`); } return outputText; } else { const res = await this.executeTask("You have to perform a data extraction on the current page. Make sure your final response only contains the extracted content", taskParams, getActivePage()); if (typeof res.output !== "string" || res.output === "") { throw new Error(`Extract failed: Agent did not complete with output. Task status: ${res.status}. Check debug output for details.`); } return JSON.parse(res.output); } }; return hyperPage; } } exports.HyperAgent = HyperAgent; // aiAction configuration constants HyperAgent.AIACTION_CONFIG = { MAX_RETRIES: 10, RETRY_DELAY_MS: 1000, CLICK_TIMEOUT: 3500, MAX_DEBUG_ELEMENTS_TO_DISPLAY: 20, MAX_DEBUG_ELEMENTS_TO_STORE: 50, MAX_LABEL_LENGTH: 60, }; function logPerf(debug, label, start) { if (!debug) return; const duration = perf_hooks_1.performance.now() - start; console.log(`${label} took ${Math.round(duration)}ms`); }