UNPKG

donobu

Version:

Create browser automations with an LLM agent and replay them as Playwright scripts.

790 lines 33.3 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.installSmartSelectorGenerator = installSmartSelectorGenerator; function installSmartSelectorGenerator() { // --------------------------------------------------------------------------- // Setup // --------------------------------------------------------------------------- const dnb = window.__donobu; if (!dnb) { throw new Error('[Donobu] __donobu namespace missing; smart-selector-generator cannot initialize.'); } else if (dnb.generateSmartSelectors && dnb.generateSmartSelectorLayers) { return; } const escapeCss = typeof CSS !== 'undefined' && typeof CSS.escape === 'function' ? CSS.escape.bind(CSS) : (value) => value.replace(/[^\w-]/g, (c) => '\\' + c); // --------------------------------------------------------------------------- // Tiny utils // --------------------------------------------------------------------------- /** * Properly quotes an attribute value for use in CSS selectors. * Handles escaping of backslashes and single quotes. * Example: quoteCssAttr("user's name") → "'user\\'s name'" */ const quoteCssAttr = (v) => `'${String(v) .replace(/\\/g, '\\\\') // escape backslashes .replace(/'/g, "\\'") // escape single quotes .replace(/\r?\n/g, '\\A ') // escape newlines as CSS \A (newline) }'`; /** * Counts how many elements match a CSS selector within a given scope. * Handles invalid selectors gracefully by returning 0 and logging warnings. * * @param {string} sel - CSS selector to test * @param {Document|ShadowRoot|Element} scope - Root element to search within * @returns {number} Number of matching elements */ const countMatchesCSS = (sel, scope) => { if (!sel || !scope || typeof scope.querySelectorAll !== 'function') { return 0; } try { return scope.querySelectorAll(sel).length; } catch (e) { const message = e instanceof Error ? e.message : String(e); console.warn('[Donobu] Invalid CSS selector:', sel, message); return 0; } }; /** * Counts how many elements match an XPath expression within a given scope. * XPath is more powerful than CSS for text-based matching. * * @param {string} xp - XPath expression to evaluate * @param {Document|ShadowRoot|Element} scope - Root element to search within * @returns {number} Number of matching elements */ const countMatchesXPath = (xp, scope) => { if (!xp || !scope) { return 0; } try { const doc = getDocumentForScope(scope); if (!doc) { return 0; } return doc.evaluate(xp, scope, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null).snapshotLength; } catch (e) { const message = e instanceof Error ? e.message : String(e); console.warn('[Donobu] Invalid XPath expression:', xp, message); return 0; } }; /** * Gets the appropriate Document object for XPath evaluation. * Shadow roots need their host's document, regular elements use their ownerDocument. */ const getDocumentForScope = (scope) => { if (scope instanceof Document) { return scope; } else if (scope instanceof ShadowRoot) { return scope.host?.ownerDocument || document; } else { return scope?.ownerDocument || document; } }; /* --------------------------------------------------------------- */ /* Collect all (open) shadow hosts between element and document */ /* --------------------------------------------------------------- */ /** * Collects all shadow hosts in the path from an element to the document root. * This is essential for generating selectors that work across shadow boundaries. * * Modern web apps heavily use Shadow DOM (web components, React portals, etc.) * and selectors must account for these boundaries. * * @param {Element} el - Target element * @returns {Array<{host: Element, open: boolean}>} Chain of shadow hosts, nearest-to-document first */ function gatherShadowChain(el) { const chain = []; // [{ host, open }, …] nearest-to-document first let node = el; const visited = new WeakSet(); // Prevent infinite loops while (node && node !== document && !visited.has(node)) { visited.add(node); const root = node.getRootNode?.(); if (root instanceof ShadowRoot) { chain.unshift({ host: root.host, open: root.mode === 'open' }); node = root.host; } else { node = node.parentNode; } } return chain; } /** * Detects machine-generated identifiers that are likely to change. * These heuristics help avoid creating brittle selectors based on: * - Webpack hash IDs * - UUID-style identifiers * - Long hexadecimal strings * * @param {string|SVGAnimatedString} raw - ID or class name to test * @returns {boolean} True if the value looks machine-generated */ const isHashLike = (raw) => { if (raw === null || raw === undefined) { return false; } const str = typeof raw === 'string' ? raw : typeof raw.baseVal === 'string' // SVGAnimatedString ? raw.baseVal : String(raw); /* Heuristics */ return ( // 1. Pure 6+ hex digits /^[a-f0-9]{6,}$/i.test(str) || // 2. UUID v4 style /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i.test(str) || // 3. ≥2 long hex-ish segments joined with - or _ str.split(/[-_]/).filter((s) => /^[a-f0-9]{6,}$/i.test(s)).length >= 2); }; // Semantic HTML5 elements that are likely to be unique and stable. const LANDMARK_TAGS = [ 'html', 'body', 'header', 'nav', 'main', 'footer', 'form', ]; /** * Safely escapes text for use in XPath expressions. * XPath has complex quoting rules, especially when text contains both single and double quotes. * This function handles all edge cases including control characters and Unicode. * * @param {string} txt - Text to escape for XPath * @returns {string} Properly escaped XPath string literal */ const safeXpath = (txt) => { // Remove control characters but preserve valid Unicode including surrogate pairs const cleaned = String(txt) .replace(/[\u0000-\u001F\u007F-\u009F]/g, '') .replace(/\\/g, '\\\\'); // If no quotes at all, use single quotes (simplest case) if (!cleaned.includes("'") && !cleaned.includes('"')) { return `'${cleaned}'`; } // If only double quotes, use single quotes if (!cleaned.includes("'")) { return `'${cleaned}'`; } // If only single quotes, use double quotes if (!cleaned.includes('"')) { return `"${cleaned}"`; } // Both types of quotes present - need to use concat() const parts = cleaned.split("'"); const concatParts = []; for (let i = 0; i < parts.length; i++) { if (i > 0) { // Add the single quote that was removed by split concatParts.push('"\'"'); } if (parts[i]) { // If this part contains double quotes, escape them const part = parts[i].includes('"') ? `'${parts[i]}'` // Safe to use single quotes : `"${parts[i]}"`; // Use double quotes for variety/readability concatParts.push(part); } } return `concat(${concatParts.join(', ')})`; }; /** * Safely extracts className from both regular DOM and SVG elements. * SVG elements have className as an SVGAnimatedString object, not a string. * * @param {Element} el - Element to get class value from * @returns {string} Class value as a string */ const getClassValue = (el) => { const className = el.className; if (!className) { return ''; } if (typeof className === 'string') { // Regular DOM element return className; } // SVG element with SVGAnimatedString if (typeof className === 'object' && 'baseVal' in className) { return className.baseVal; } // Fallback for unknown className types return String(className); }; // --------------------------------------------------------------------------- // Core // --------------------------------------------------------------------------- /** * The main class that generates smart selectors for a given element. * * Strategy overview: * 1. Try semantic anchors first (ID, ARIA, data attributes, text content). * 2. Fall back to positional selectors when semantic ones aren't unique. * 3. Rank results by uniqueness, then by semantic value, then by length. * * Weight system (higher = higher priority): * - 100: Unique, human-readable ID * - 95: data-testid, data-test attributes * - 90: aria-label * - 88: Label associations * - 85: Unique text content * - 80: name attribute * - 70: title attribute * - 60: role attribute * - 50: href attribute * - 40: other data-* attributes * - 35: stable class names * - 20: positional with stable ancestors * - 1: full DOM path (last resort) */ class SelectorBuilder { constructor(el) { this.el = el; this.tag = el.tagName.toLowerCase(); const rootNode = el.getRootNode?.(); this.root = rootNode instanceof ShadowRoot || rootNode instanceof Document ? rootNode : document; this.list = new Map(); // selector → {cnt, weight} } /** * Main entry point - generates and ranks all possible selectors. * @returns {Array<string>} Ordered array of selectors, best first. */ build() { /* 1. Semantic anchors */ this.idAnchor(); // #my-button this.ariaAnchor(); // [aria-label="Submit form"] this.attrAnchors(); // [data-testid="login-btn"] this.placeholderAnchor(); // [placeholder="Enter email"] this.textAnchor(); // .//button[text()="Click me"] this.labelAnchor(); // .//label[text()="Email"]/input this.classAnchor(); // button.primary-btn /* 2. Positional fall-backs - used when semantic anchors aren't unique */ this.stableAncestorAnchors(); // #header > nav > button:nth-of-type(2) this.fullDomPath(); // html > body > div:nth-of-type(3) > button /* 3. Rank by uniqueness (lower count = better), then weight, then length */ const results = [...this.list.entries()] .filter(([, m]) => m.cnt > 0) // valid only .sort((a, b) => { const da = a[1], db = b[1]; if (da.cnt !== db.cnt) { return da.cnt - db.cnt; } if (da.weight !== db.weight) { return db.weight - da.weight; } // Higher weight = higher priority return a[0].length - b[0].length; }) .map(([sel]) => sel); // Clean up to prevent memory leaks this.list.clear(); return results; } /* ---------------------------------------------------------------------- */ /* Utils */ /* ---------------------------------------------------------------------- */ /** * Adds a selector to the candidate list if it actually matches the target element. * Supports both CSS selectors and XPath expressions. * * @param {string} sel - Selector to test * @param {number} weight - Priority weight (higher = more preferred) */ push(sel, weight) { if (!sel || typeof sel !== 'string') { return; } let cnt = 0; let matchesOurElement = false; const isXPath = sel.startsWith('/') || sel.startsWith('.//') || sel.startsWith('('); try { if (isXPath) { const doc = getDocumentForScope(this.root); if (!doc) { return; } const snap = doc.evaluate(sel, this.root, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null); cnt = snap.snapshotLength; if (cnt > 0) { for (let i = 0; i < cnt; i++) { if (snap.snapshotItem(i) === this.el) { matchesOurElement = true; break; } } } } else { // CSS Selector const results = this.root.querySelectorAll(sel); cnt = results.length; if (cnt > 0) { matchesOurElement = Array.from(results).includes(this.el); } } } catch (e) { // Log for debugging but don't throw const message = e instanceof Error ? e.message : String(e); console.warn('[Donobu] Invalid selector:', sel, message); return; } if (matchesOurElement) { this.list.set(sel, { cnt, weight }); } } /** * Adds a base selector and tries to make it unique by adding parent context. * This is key to the "smart" behavior - we start with semantic selectors * and add just enough context to make them unique. * * Example: button.submit → #form > button.submit → #header > #form > button.submit * * @param {string} baseSel - Base CSS selector * @param {number} weight - Priority weight * @param {number} maxDepth - Maximum ancestor levels to try */ scopedUntilUnique(baseSel, weight, maxDepth = 5) { this.push(baseSel, weight - 10); // keep raw anchor (lower priority) if (countMatchesCSS(baseSel, this.root) === 1) { this.push(baseSel, weight); // High priority if unique return; } let cur = this.el; let depth = 0; let sel = baseSel; const visited = new WeakSet(); // Prevent infinite loops while (cur.parentElement && depth < maxDepth && !visited.has(cur)) { visited.add(cur); const nextElem = cur.parentElement; const tag = nextElem.tagName.toLowerCase(); const siblings = nextElem.parentElement ? Array.from(nextElem.parentElement.children) : []; const siblingsSame = siblings.filter((c) => c.tagName.toLowerCase() === tag); const parentSeg = siblingsSame.length > 1 ? `${tag}:nth-of-type(${siblingsSame.indexOf(nextElem) + 1})` : tag; sel = `${parentSeg} > ${sel}`; if (countMatchesCSS(sel, this.root) === 1) { this.push(sel, weight); // unique variant return; } cur = nextElem; depth += 1; } } /* ---------------------------------------------------------------------- */ /* Anchors – High weight = high priority */ /* ---------------------------------------------------------------------- */ /** * Generates ID-based selectors, but penalizes machine-generated IDs. * Machine-generated IDs (hashes, UUIDs) are likely to change between builds. */ idAnchor() { const id = this.el.id; if (!id) { return; } const dynamic = isHashLike(id) || id.length > 24; this.push(`#${escapeCss(id)}`, dynamic ? 20 : 100); } /** * Generates ARIA label selectors - these are excellent for accessibility * and tend to be stable since they're user-facing. */ ariaAnchor() { const aria = this.el.getAttribute('aria-label'); if (aria) { this.scopedUntilUnique(`[aria-label=${quoteCssAttr(aria)}]`, 90); } } /** * Generates selectors for various attributes, prioritizing test-specific ones. * data-testid and data-test are specifically added for testing and are very stable. */ attrAnchors() { const ATTRS = { 'data-testid': 95, 'data-test': 95, name: 80, title: 70, role: 60, href: 50, }; for (const { name, value } of Array.from(this.el.attributes)) { if (!Object.keys(ATTRS).includes(name) && !name.startsWith('data-')) { continue; } else if (name.startsWith('data-donobu-')) { continue; } const sel = `[${name}=${quoteCssAttr(value)}]`; this.scopedUntilUnique(sel, ATTRS[name] ?? 40); } } /** * Generates selectors based on placeholder text. * Placeholders are user-visible and typically stable. */ placeholderAnchor() { const ph = this.el.getAttribute('placeholder'); if (ph) { this.scopedUntilUnique(`[placeholder=${quoteCssAttr(ph)}]`, 75); } } /** * Generates XPath selectors based on text content. * Text-based selectors are very semantic but can be brittle if text changes. * Uses XPath because CSS can't match on text content directly. */ textAnchor() { const raw = this.el.textContent ?? ''; const text = raw.trim(); if (!text || ['body', 'html'].includes(this.tag) || text.length > 100) { return; } const normalizedText = text.replace(/\s+/g, ' ').trim(); const baseXP = `.//${this.tag}[normalize-space(.)=${safeXpath(normalizedText)}]`; const cnt = countMatchesXPath(baseXP, this.root); if (cnt === 0) { return; } this.push(baseXP, cnt === 1 ? 85 : 30); // Semantic anchor is valuable even if not unique if (cnt > 1) { const doc = getDocumentForScope(this.root); if (!doc) { return; } try { const snap = doc.evaluate(baseXP, this.root, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null); let idx = -1; for (let i = 0; i < cnt; i += 1) { if (snap.snapshotItem(i) === this.el) { idx = i + 1; break; } } if (idx > 0) { const uniqueXP = `(${baseXP})[${idx}]`; this.push(uniqueXP, 85); // same weight as a unique semantic anchor } } catch (e) { const message = e instanceof Error ? e.message : String(e); console.warn('[Donobu] XPath evaluation failed:', baseXP, message); } } } /** * Generates selectors for form inputs based on their associated labels. * This creates very semantic selectors that mirror how users think about forms. * Handles both for/id associations and wrapping label elements. */ labelAnchor() { if (!['input', 'textarea', 'select'].includes(this.tag)) { return; } // --- Helper for <label for> and wrapping <label> --- // This helper assumes a close structural relationship (sibling/descendant) const addForLabel = (lab) => { const txt = (lab.textContent || '').trim(); if (!txt) { return; } const xp = safeXpath(txt); // Assumes input is a descendant this.push(`.//label[normalize-space()=${xp}]//${this.tag}`, 88); // Assumes input is a following sibling this.push(`.//label[normalize-space()=${xp}]/following-sibling::${this.tag}`, 88); }; // --- Pattern 1: <label for="..."> --- if (this.el.id) { const lab = this.root.querySelector(`label[for=${quoteCssAttr(this.el.id)}]`); if (lab) { addForLabel(lab); } } // --- Pattern 2: Wrapping <label> --- const wrapLab = this.el.closest('label'); if (wrapLab) { addForLabel(wrapLab); } // --- Pattern 3: aria-labelledby --- const labelledby = this.el.getAttribute('aria-labelledby'); if (labelledby) { const labelIds = labelledby.split(/\s+/); for (const labelId of labelIds) { if (!labelId) { continue; } const lab = this.root.querySelector(`#${escapeCss(labelId)}`); if (lab) { const txt = (lab.textContent || '').trim(); if (txt) { const textXp = safeXpath(txt); const labelTag = lab.tagName.toLowerCase(); // This is the key: a robust XPath that finds the input by matching // its aria-labelledby attribute to the ID of a label found by its text. // It works regardless of where the label and input are in the DOM. // The `contains(concat(' ',...` part safely checks for a word in a space-separated list. const robustXp = `//${this.tag}[contains(concat(' ', normalize-space(@aria-labelledby), ' '), concat(' ', //${labelTag}[normalize-space()=${textXp}]/@id, ' '))]`; this.push(robustXp, 90); // Give it a high weight, similar to aria-label } } } } } /** * Generates class-based selectors, but only for human-readable class names. * Avoids CSS-in-JS generated classes and other machine-generated names. */ classAnchor() { const classValue = getClassValue(this.el); if (!classValue) { return; } const clsToken = classValue.split(/\s+/).find((c) => c && !isHashLike(c)); if (!clsToken) { return; } this.scopedUntilUnique(`${this.tag}.${escapeCss(clsToken)}`, 35); } /* -------------------------- Positional --------------------------------- */ /** * Generates selectors by finding stable ancestors and creating relative paths. * This creates selectors like: #header > nav > button:nth-of-type(2) * * The key insight is that while the target element might not have good identifiers, * its ancestors might, and we can create a stable path from those ancestors. */ stableAncestorAnchors() { let anc = this.el.parentElement; let depth = 0; const visited = new WeakSet(); // Prevent infinite loops while (anc && depth < 7 && !visited.has(anc)) { visited.add(anc); const sel = this.bestStableSelector(anc); if (sel) { const rel = this.relPath(anc, this.el); if (rel) { this.push(`${sel} > ${rel}`, 20); } } anc = anc.parentElement; depth += 1; } } /** * Generates a full DOM path selector as absolute last resort. * These are very brittle but will always work for the current page state. * Example: html > body > div:nth-of-type(3) > section > button:nth-of-type(1) */ fullDomPath() { let cur = this.el; const segs = []; const visited = new WeakSet(); // Prevent infinite loops while (cur?.parentElement && !visited.has(cur)) { visited.add(cur); let part = cur.tagName.toLowerCase(); const p = cur.parentElement; if (!p) { break; } const like = Array.from(p.children).filter((c) => c.tagName.toLowerCase() === part); if (like.length > 1) { let i = like.indexOf(cur) + 1; part += `:nth-of-type(${i})`; } segs.unshift(part); cur = p; } if (segs.length === 0) { const rootDoc = getDocumentForScope(this.root); if (this.el === rootDoc?.documentElement) { this.push('html', 1); return; } if (this.el === rootDoc?.body) { this.push('body', 1); return; } } else { this.push(segs.join(' > '), 1); } } /* ---------------------------------------------------------------------- */ /* Helpers */ /* ---------------------------------------------------------------------- */ /** * Finds the best stable selector for a given element. * "Stable" means likely to survive page updates and not be machine-generated. * * Priority order: * 1. Human-readable unique ID. * 2. Unique data attributes (especially test-related). * 3. Unique tag + stable class combination. * 4. Unique landmark tags. * * @param {Element} el - Element to find selector for * @returns {string|null} Best stable selector, or null if none found */ bestStableSelector(el) { const scopeNode = el.getRootNode?.(); const scope = scopeNode instanceof ShadowRoot || scopeNode instanceof Document ? scopeNode : document; if (el.id && !isHashLike(el.id) && el.id.length <= 24) { if (countMatchesCSS(`#${escapeCss(el.id)}`, scope) === 1) { return `#${escapeCss(el.id)}`; } } const ATTRS = [ 'data-testid', 'data-test', 'name', 'title', 'role', 'aria-label', ]; for (const a of ATTRS) { const v = el.getAttribute(a); if (v && countMatchesCSS(`[${a}=${quoteCssAttr(v)}]`, scope) === 1) { return `[${a}=${quoteCssAttr(v)}]`; } } const tag = el.tagName.toLowerCase(); const classValue = getClassValue(el); if (classValue) { const cls = classValue.split(/\s+/).find((c) => c && !isHashLike(c)); if (cls && countMatchesCSS(`${tag}.${escapeCss(cls)}`, scope) === 1) { return `${tag}.${escapeCss(cls)}`; } } if (LANDMARK_TAGS.includes(tag) && countMatchesCSS(tag, scope) === 1) { return tag; } return null; } /** * Creates a relative path between an ancestor and target element. * Used to build selectors like: ancestor > child:nth-of-type(2) > target * * @param {Element} anc - Ancestor element * @param {Element} tgt - Target element * @returns {string} Relative path selector */ relPath(anc, tgt) { const bits = []; let cur = tgt; const visited = new WeakSet(); // Prevent infinite loops while (cur && cur !== anc && !visited.has(cur)) { visited.add(cur); let seg = cur.tagName.toLowerCase(); const p = cur.parentElement; if (p) { const like = Array.from(p.children).filter((c) => c.tagName.toLowerCase() === seg); if (like.length > 1) { seg += `:nth-of-type(${like.indexOf(cur) + 1})`; } } bits.unshift(seg); cur = p; } return bits.join(' > '); } } /* --------------------------------------------------------------- */ /* Public: returns Array<Array<string>> */ /* --------------------------------------------------------------- */ /** * Generates selectors that work across shadow DOM boundaries. * * Modern web applications often use shadow DOM for encapsulation (web components, * React portals, design systems). A single element might be nested within multiple * shadow roots, each requiring separate selectors. * * This function returns an array of selector arrays - one for each shadow boundary * that needs to be crossed to reach the target element. * * Example output for an element deep in shadow DOM: * [ * ['#app'], // Selector for outermost shadow host * ['my-component'], // Selector for middle shadow host * ['button.primary', '#btn1'] // Selectors for target element * ] * * @param {Element} el - Target element (possibly inside shadow DOM) * @returns {Array<Array<string>>} Array of selector layers */ function generateSmartSelectorLayers(el) { const chain = gatherShadowChain(el); const layers = []; for (const { host, open } of chain) { if (!open) { layers.push(['✖︎ closed shadow-host']); continue; } layers.push(dnb.generateSmartSelectors(host)); } layers.push(dnb.generateSmartSelectors(el)); return layers; } // --------------------------------------------------------------------------- // Public API // --------------------------------------------------------------------------- if (!dnb.generateSmartSelectors) { Object.defineProperty(dnb, 'generateSmartSelectors', { value: (el) => { try { if (!el || typeof el.tagName !== 'string') { return []; } return new SelectorBuilder(el).build(); } catch (err) { console.warn('[Donobu] selector generation failed:', err); return []; } }, writable: false, enumerable: false, configurable: false, }); } if (!dnb.generateSmartSelectorLayers) { Object.defineProperty(dnb, 'generateSmartSelectorLayers', { value: (el) => { try { if (!el || typeof el.tagName !== 'string') { return []; } return generateSmartSelectorLayers(el); } catch (err) { console.warn('[Donobu] selector layer generation failed:', err); return []; } }, writable: false, enumerable: false, configurable: false, }); } } //# sourceMappingURL=smart-selector-generator.js.map