@hyperbrowser/agent
Version:
Hyperbrowsers Web Agent
185 lines (184 loc) • 6.67 kB
TypeScript
import { Browser, BrowserContext, Page } from "playwright-core";
import { BrowserProviders, HyperAgentConfig, MCPConfig, MCPServerConfig } from "../types/config";
import { ActionType, Task, TaskOutput, TaskParams } from "../types";
import { HyperPage, HyperVariable } from "../types/agent/types";
export declare class HyperAgent<T extends BrowserProviders = "Local"> {
private static readonly AIACTION_CONFIG;
private llm;
private tasks;
private tokenLimit;
private debug;
private mcpClient;
private browserProvider;
private browserProviderType;
private actions;
private cdpActionsEnabled;
browser: Browser | null;
context: BrowserContext | null;
private _currentPage;
private _variables;
private errorEmitter;
get currentPage(): HyperPage | null;
set currentPage(page: Page);
constructor(params?: HyperAgentConfig<T>);
/**
* This is just exposed as a utility function. You don't need to call it explicitly.
* @returns A reference to the current rebrowser-playwright browser instance.
*/
initBrowser(): Promise<Browser>;
/**
* Use this function instead of accessing this.actions directly.
* This function configures if there is a need for an output schema as a part of the complete action.
* @param outputSchema
* @returns
*/
private getActions;
/**
* Get all variables
* @returns Record of variables
*/
getVariables(): Record<string, HyperVariable>;
/**
* Set a variable
* @param key Key of the variable
* @param value Value of the variable
*/
addVariable(variable: HyperVariable): void;
/**
* Get a variable
* @param key Key of the variable
* @returns Value of the variable
*/
getVariable(key: string): HyperVariable | undefined;
/**
* Delete a variable
* @param key Key of the variable
*/
deleteVariable(key: string): void;
/**
* Get all pages in the context
* @returns Array of HyperPage objects
*/
getPages(): Promise<HyperPage[]>;
/**
* Create a new page in the context
* @returns HyperPage object
*/
newPage(): Promise<HyperPage>;
/**
* Close the agent and all associated resources
*/
closeAgent(): Promise<void>;
/**
* Get the current page or create a new one if none exists
* @returns The current page
*/
getCurrentPage(): Promise<Page>;
/**
* Get task control object for a specific task
* @param taskId ID of the task
* @returns Task control object
*/
private getTaskControl;
/**
* Execute a task asynchronously and return a Task control object
* @param task The task to execute
* @param params Optional parameters for the task
* @param initPage Optional page to use for the task
* @returns A promise that resolves to a Task control object for managing the running task
*/
executeTaskAsync(task: string, params?: TaskParams, initPage?: Page): Promise<Task>;
/**
* Execute a task and wait for completion
* @param task The task to execute
* @param params Optional parameters for the task
* @param initPage Optional page to use for the task
* @returns A promise that resolves to the task output
*/
executeTask(task: string, params?: TaskParams, initPage?: Page): Promise<TaskOutput>;
/**
* Find element with retry logic
* Retries element finding with DOM refetch until element is found or max retries reached
*
* @param instruction Natural language instruction for the action
* @param page The page to search on
* @param maxRetries Maximum number of retry attempts
* @param retryDelayMs Delay between retries in milliseconds
* @returns Object containing the found element, DOM state, and element map
* @throws Error if element is not found after all retries
*/
private findElementWithRetry;
private writeDebugData;
/**
* Collect interactive elements from element map for debugging
* Extracts elements with interactive roles (button, link, textbox, etc.)
*
* @param elementMap Map of element IDs to element data
* @param limit Maximum number of elements to collect
* @returns Array of interactive elements with id, role, and label
*/
private collectInteractiveElements;
/**
* Execute a single granular action using a11y mode
* Internal method used by page.aiAction()
*
* Architecture: Simple examine->act flow
* - 1 LLM call (examineDom finds element and suggests method)
* - Direct execution (no agent loop)
*
* @param instruction Natural language instruction for a single action
* @param page The page to execute the action on
* @returns A promise that resolves to the task output
*/
private executeSingleAction;
/**
* Register a new action with the agent
* @param action The action to register
*/
private registerAction;
/**
* Initialize the MCP client with the given configuration
* @param config The MCP configuration
*/
initializeMCPClient(config: MCPConfig): Promise<void>;
/**
* Connect to an MCP server at runtime
* @param serverConfig Configuration for the MCP server
* @returns Server ID if connection was successful
*/
connectToMCPServer(serverConfig: MCPServerConfig): Promise<string | null>;
/**
* Disconnect from a specific MCP server
* @param serverId ID of the server to disconnect from
* @returns Boolean indicating if the disconnection was successful
*/
disconnectFromMCPServer(serverId: string): boolean;
/**
* Check if a specific MCP server is connected
* @param serverId ID of the server to check
* @returns Boolean indicating if the server is connected
*/
isMCPServerConnected(serverId: string): boolean;
/**
* Get all connected MCP server IDs
* @returns Array of server IDs
*/
getMCPServerIds(): string[];
/**
* Get information about all connected MCP servers
* @returns Array of server information objects or null if no MCP client is initialized
*/
getMCPServerInfo(): Array<{
id: string;
toolCount: number;
toolNames: string[];
}> | null;
/**
* Pretty print an action
* @param action The action to print
* @returns Formatted string representation of the action
*/
pprintAction(action: ActionType): string;
getSession(): Browser | import("@hyperbrowser/sdk/types").SessionDetail | null;
private setupHyperPage;
}