UNPKG

@hyperbrowser/agent

Version:

Hyperbrowsers Web Agent

294 lines (293 loc) 13.7 kB
"use strict"; /** * Build backend ID maps for DOM traversal and xpath generation */ Object.defineProperty(exports, "__esModule", { value: true }); exports.buildBackendIdMaps = buildBackendIdMaps; const utils_1 = require("./utils"); async function annotateIframeBoundingBoxes(session, frameMap, debug) { if (!frameMap.size) { return; } for (const [frameIndex, frameInfo] of frameMap.entries()) { if (!frameInfo.iframeBackendNodeId) continue; try { const response = await session.send("DOM.getBoxModel", { backendNodeId: frameInfo.iframeBackendNodeId, }); const content = response?.model?.content; if (!content || content.length < 8) continue; const xs = [content[0], content[2], content[4], content[6]]; const ys = [content[1], content[3], content[5], content[7]]; const left = Math.min(...xs); const right = Math.max(...xs); const top = Math.min(...ys); const bottom = Math.max(...ys); const rect = { x: left, y: top, left, top, right, bottom, width: right - left, height: bottom - top, }; frameInfo.absoluteBoundingBox = rect; } catch { // error just means it's out of viewport // if (debug) { // console.warn( // `[DOM] Failed to compute bounding box for frame ${frameIndex}:`, // error // ); // } } } } /** * Join XPath segments */ function joinStep(base, step) { return base.endsWith("//") ? `${base}${step}` : `${base}/${step}`; } /** * Extract accessible name from DOM node attributes * Prioritizes: aria-label > title > placeholder * Returns undefined if no accessible name found */ function extractAccessibleName(attributes) { if (!attributes || attributes.length === 0) return undefined; let ariaLabel; let title; let placeholder; // CDP attributes are flat array: ["name1", "value1", "name2", "value2"] for (let i = 0; i < attributes.length; i += 2) { const attrName = attributes[i]; const attrValue = attributes[i + 1]; if (attrName === "aria-label" && attrValue) { ariaLabel = attrValue; } else if (attrName === "title" && attrValue) { title = attrValue; } else if (attrName === "placeholder" && attrValue) { placeholder = attrValue; } } // Return in priority order return ariaLabel || title || placeholder; } /** * Build maps from backend node IDs to tag names and XPaths * This is essential for enhancing accessibility nodes with DOM information */ async function buildBackendIdMaps(session, frameIndex = 0, debug = false, pierce = true // Default true for main frame, false for OOPIF to avoid capturing transient nested frames ) { try { // Step 1: Get full DOM tree from CDP // pierce=true: traverses into same-origin iframes (main frame needs this) // pierce=false: stops at iframe boundaries (OOPIF processing - nested OOPIFs have their own sessions) const { root } = (await session.send("DOM.getDocument", { depth: -1, pierce, })); // Step 2: Initialize maps const tagNameMap = {}; const xpathMap = {}; const accessibleNameMap = {}; // Maps encodedId -> accessible name const backendNodeMap = {}; const frameMap = new Map(); // Maps frameIndex -> iframe metadata // Debug: Count DOM nodes by frame (only if debug enabled) const domNodeCounts = debug ? new Map() : null; const inputElementsByFrame = debug ? new Map() : null; // DEBUG: Track encodedId uniqueness (only if debug enabled) const encodedIdCounts = debug ? new Map() : null; const stack = [ { node: root, path: "", currentFrameIndex: frameIndex }, ]; const seen = new Set(); let nextFrameIndex = frameIndex + 1; // Counter for iframe indices // Track sibling positions for frames with same parent+URL // Key: "parentFrameIndex:url", Value: position counter const siblingPositions = new Map(); while (stack.length) { const { node, path, currentFrameIndex } = stack.pop(); // Skip nodes without backend ID if (!node.backendNodeId) continue; // Create encoded ID with current frame index const encodedId = (0, utils_1.createEncodedId)(currentFrameIndex, node.backendNodeId); // DEBUG: Track encodedId creation (only if debug enabled) if (debug && encodedIdCounts) { encodedIdCounts.set(encodedId, (encodedIdCounts.get(encodedId) || 0) + 1); if (encodedIdCounts.get(encodedId) > 1) { console.warn(`[buildBackendIdMaps] ⚠️ Duplicate encodedId: "${encodedId}" (frameIndex=${currentFrameIndex}, backendNodeId=${node.backendNodeId}, tagName=${String(node.nodeName).toLowerCase()}), count=${encodedIdCounts.get(encodedId)}`); } } // Skip if already seen if (seen.has(encodedId)) continue; seen.add(encodedId); // Store tag name and xpath const tagName = String(node.nodeName).toLowerCase(); tagNameMap[encodedId] = tagName; xpathMap[encodedId] = path; backendNodeMap[encodedId] = node.backendNodeId; // Extract and store accessible name if present const accessibleName = extractAccessibleName(node.attributes); if (accessibleName) { accessibleNameMap[encodedId] = accessibleName; } // Debug: Count nodes by frame (only if debug enabled) if (debug && domNodeCounts) { domNodeCounts.set(currentFrameIndex, (domNodeCounts.get(currentFrameIndex) || 0) + 1); } // Debug: Count input/textarea elements (only if debug enabled) if (debug && inputElementsByFrame && (tagName === "input" || tagName === "textarea")) { inputElementsByFrame.set(currentFrameIndex, (inputElementsByFrame.get(currentFrameIndex) || 0) + 1); } // Handle iframe content documents (same-origin iframes only) // OOPIF (cross-origin) iframes won't have contentDocument due to security restrictions if (node.nodeName && node.nodeName.toLowerCase() === "iframe" && node.contentDocument) { // Assign a new frame index to this same-origin iframe's content // This frameIndex is based on DOM traversal order (DFS) and is authoritative const iframeFrameIndex = nextFrameIndex++; // Extract iframe attributes for frame resolution // CDP DOM.getDocument returns attributes as flat array: ["name", "value", "name2", "value2"] const attributes = node.attributes || []; let iframeSrc; let iframeName; for (let i = 0; i < attributes.length; i += 2) { const attrName = attributes[i]; const attrValue = attributes[i + 1]; if (attrName === "src") { iframeSrc = attrValue; } else if (attrName === "name") { iframeName = attrValue; } } // Try to get CDP frameId (typically undefined for same-origin iframes) // Same-origin iframes usually don't have a frameId in the DOM.getDocument response // because they're pierced inline. We'll match by backendNodeId later in syncFrameContextManager. const cdpFrameId = node.contentDocument.frameId; if (debug && !cdpFrameId) { console.log(`[DOM] Same-origin iframe without frameId (expected) - will match by backendNodeId=${node.backendNodeId}`); } // Track sibling position for this iframe const siblingKey = `${currentFrameIndex}:${iframeSrc || "no-src"}`; const siblingPosition = siblingPositions.get(siblingKey) || 0; siblingPositions.set(siblingKey, siblingPosition + 1); // Store iframe metadata for later frame resolution // Note: cdpFrameId is typically undefined for same-origin iframes in DOM.getDocument response // We rely on iframeBackendNodeId for matching in syncFrameContextManager const iframeInfo = { frameIndex: iframeFrameIndex, src: iframeSrc, name: iframeName, xpath: path, // XPath to the iframe element itself frameId: cdpFrameId, // Usually undefined for same-origin cdpFrameId, // Usually undefined for same-origin (kept for debugging) parentFrameIndex: currentFrameIndex, // Parent frame siblingPosition, // Position among siblings with same parent+URL iframeBackendNodeId: node.backendNodeId, // backendNodeId of <iframe> element (PRIMARY matching key) contentDocumentBackendNodeId: node.contentDocument.backendNodeId, // backendNodeId of content document root }; frameMap.set(iframeFrameIndex, iframeInfo); if (debug) { console.log(`[DOM] Iframe detected: frameIndex=${iframeFrameIndex}, parent=${currentFrameIndex}, iframeBackendNodeId=${node.backendNodeId}, contentDocBackendNodeId=${node.contentDocument.backendNodeId}, cdpFrameId="${cdpFrameId}", src="${iframeSrc}", siblingPos=${siblingPosition}`); } // Reset path for iframe content (XPath is relative to iframe document) stack.push({ node: node.contentDocument, path: "", currentFrameIndex: iframeFrameIndex, }); } // Handle shadow roots if (node.shadowRoots?.length) { for (const shadowRoot of node.shadowRoots) { stack.push({ node: shadowRoot, path: `${path}//`, currentFrameIndex, }); } } // Process children const children = node.children ?? []; if (children.length) { // Build XPath segments for each child (left-to-right) const segments = []; const counter = {}; for (const child of children) { const tag = String(child.nodeName).toLowerCase(); const key = `${child.nodeType}:${tag}`; const idx = (counter[key] = (counter[key] ?? 0) + 1); if (child.nodeType === 3) { // Text node segments.push(`text()[${idx}]`); } else if (child.nodeType === 8) { // Comment node segments.push(`comment()[${idx}]`); } else { // Element node // Handle namespaced elements (e.g., "svg:path") segments.push(tag.includes(":") ? `*[name()='${tag}'][${idx}]` : `${tag}[${idx}]`); } } // Push children in reverse order so traversal remains left-to-right for (let i = children.length - 1; i >= 0; i--) { stack.push({ node: children[i], path: joinStep(path, segments[i]), currentFrameIndex, }); } } } // Debug: Log DOM tree statistics (only if debug enabled) if (debug && domNodeCounts && inputElementsByFrame) { console.log("[DOM.getDocument] DOM tree statistics:"); for (const [frameIdx, count] of Array.from(domNodeCounts.entries()).sort((a, b) => a[0] - b[0])) { const inputs = inputElementsByFrame.get(frameIdx) || 0; const frameInfo = frameMap.get(frameIdx); const frameName = frameInfo ? frameInfo.src || frameInfo.name || `frame-${frameIdx}` : `frame-${frameIdx}`; console.log(` Frame ${frameIdx} (${frameName}): ${count} DOM nodes, ${inputs} input/textarea elements`); } } await annotateIframeBoundingBoxes(session, frameMap, debug); return { tagNameMap, xpathMap, accessibleNameMap, backendNodeMap, frameMap, }; } catch (error) { console.error("Error building backend ID maps:", error); return { tagNameMap: {}, xpathMap: {}, accessibleNameMap: {}, backendNodeMap: {}, frameMap: new Map(), }; } }