UNPKG

@hyperbrowser/agent

Version:

Hyperbrowsers Web Agent

105 lines (104 loc) 4.5 kB
"use strict"; /** * Shared utility for finding elements via natural language instructions * Extracted from findElementWithRetry for reusability */ Object.defineProperty(exports, "__esModule", { value: true }); exports.findElementWithInstruction = findElementWithInstruction; const examine_dom_1 = require("../examine-dom"); const dom_capture_1 = require("./dom-capture"); const waitForSettledDOM_1 = require("../../utils/waitForSettledDOM"); /** * Find an element via natural language instruction with retry logic * * This function: * 1. Waits for DOM to settle (handled by captureDOMState) * 2. Fetches FRESH a11y DOM state * 3. Calls examineDom to find the element * 4. Retries on failure (with DOM refresh on each attempt) * * Used by: * - findElementWithRetry (aiAction) * - actElement action (executeTask agent) * * @param instruction Natural language instruction (e.g., "click the Login button") * @param page Playwright page * @param llm LLM instance for examineDom * @param options Configuration options * @returns Element, DOM state, element map, and LLM response * @throws Error if element not found after all retries */ async function findElementWithInstruction(instruction, page, llm, options = {}) { const { maxRetries = 1, retryDelayMs = 1000, debug = false } = options; let lastDomState = null; let lastElementMap = null; let lastLlmResponse; // Retry loop with DOM refresh (matches aiAction's findElementWithRetry pattern) for (let attempt = 0; attempt < maxRetries; attempt++) { if (debug) { if (attempt === 0) { console.log(`[findElement] Starting attempt ${attempt + 1}`); } else { console.log(`[findElement] Retry ${attempt + 1}/${maxRetries}`); } } await (0, waitForSettledDOM_1.waitForSettledDOM)(page); // Fetch FRESH a11y tree using the robust shared utility // captureDOMState handles DOM settling and retries for bad snapshots internally for this *single* capture attempt // We still need our outer loop for retrying the *finding* logic (e.g. if the LLM can't find the element) const domState = await (0, dom_capture_1.captureDOMState)(page, { debug, // Don't retry capture inside captureDOMState too aggressively since we have an outer loop here // But we do want it to handle transient CDP errors maxRetries: 2, }); if (debug) { console.log(`[findElement] Fetched a11y tree: ${domState.elements.size} elements`); } // Convert elements map to string-only keys for examineDom const elementMap = new Map(Array.from(domState.elements).map(([k, v]) => [String(k), v])); if (debug) { console.log(`[findElement] Calling examineDom to find element for: "${instruction}"`); } const examineResult = await (0, examine_dom_1.examineDom)(instruction, { tree: domState.domState, xpathMap: domState.xpathMap || {}, elements: elementMap, url: page.url(), }, llm); // Store last attempt's data for error case lastDomState = domState; lastElementMap = elementMap; lastLlmResponse = examineResult?.llmResponse; // Check if element was found if (examineResult && examineResult.elements.length > 0) { // Found it! Break out of retry loop if (debug && attempt > 0) { console.log(`[findElement] Element found on attempt ${attempt + 1}`); } return { success: true, element: examineResult.elements[0], domState, elementMap, llmResponse: examineResult.llmResponse, }; } // Retry if not last attempt if (attempt < maxRetries - 1) { if (debug) { console.log(`[aiAction] Element not found, retrying in ${retryDelayMs}ms (attempt ${attempt + 1}/${maxRetries})...`); } await new Promise((resolve) => setTimeout(resolve, retryDelayMs)); // DOM settling happens at start of next captureDOMState call } } // Max retries reached - return failure with last attempt's data return { success: false, domState: lastDomState, elementMap: lastElementMap, llmResponse: lastLlmResponse, }; }