donobu
Version:
Create browser automations with an LLM agent and replay them as Playwright scripts.
269 lines (223 loc) • 8.16 kB
JavaScript
(() => {
// Prevent sneaky websites from undefining the CSS escape function.
// We run first and we can save the CSS escape function.
window.donobuCssEscape =
typeof CSS.escape !== 'undefined' ? CSS.escape : window.donobuCssEscape;
class SelectorToCount {
constructor(selector) {
this.selector = selector;
if (selector.startsWith('//')) {
// Handle XPath selector
const xpathResult = document.evaluate(
selector,
document,
null,
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,
null,
);
this.count = xpathResult.snapshotLength;
} else {
// Handle CSS selector
this.count = document.querySelectorAll(selector).length;
}
}
}
// A helper function to safely escape any string for use in an XPath expression:
function xpathLiteral(value) {
// If the string has no single quotes, wrap it in single quotes
if (!value.includes("'")) {
return `'${value}'`;
}
// If the string has single quotes but no double quotes, wrap in double quotes
if (!value.includes('"')) {
return `"${value}"`;
}
// Otherwise, the string has both single and double quotes.
// Split on single quotes and use concat(...) with "'".
let parts = value.split("'");
// Each part is wrapped in single quotes; join with ,"'", so the final expression is valid.
return `concat('${parts.join("', \"'\", '")}')`;
}
class SelectorGenerator {
constructor(element) {
this.element = element;
}
generate() {
const selectors = new Set();
// Generate ID-based selector
this.getIdSelector().forEach((selector) => selectors.add(selector));
// Generate class-based selector
this.getClassSelector().forEach((selector) => selectors.add(selector));
// Generate attribute-based selectors
this.getAttributeSelectors().forEach((selector) =>
selectors.add(selector),
);
// Generate text-based selectors
this.getTextBasedSelectors().forEach((selector) =>
selectors.add(selector),
);
// Generate nth-child and nth-of-type selectors
this.getNthChildSelectors().forEach((selector) =>
selectors.add(selector),
);
// Generate tag-based selector
selectors.add(this.getTagSelector());
// Generate placeholder-based selector
this.getPlaceholderTextSelector().forEach((selector) =>
selectors.add(selector),
);
// Generate aria-label-based selector
this.getAriaLabelSelector().forEach((selector) =>
selectors.add(selector),
);
// Generate label-based selectors for input elements
this.getLabelBasedSelectors().forEach((selector) =>
selectors.add(selector),
);
// Combine selectors for robustness
this.getCombinedSelector().forEach((selector) => selectors.add(selector));
const rankedSelectors = Array.from(selectors)
.map((selector) => {
try {
return new SelectorToCount(selector);
} catch (e) {
console.warn(`Failed to create selector: ${selector}`, e);
return null;
}
})
.filter((a) => a !== null && a.count !== 0)
.sort((a, b) => a.count - b.count);
return rankedSelectors.map((a) => a.selector);
}
getIdSelector() {
const id = this.element.id;
return id ? [`#${this.escapeCss(id)}`] : [];
}
getClassSelector() {
const className =
typeof this.element.className === 'string'
? this.element.className
: this.element.className.baseVal;
return className
? [`.${this.escapeCss(className.trim().replace(/\s+/g, '.'))}`]
: [];
}
getAttributeSelectors() {
const selectors = [];
const tagName = this.element.tagName.toLowerCase();
const attributes = this.element.attributes;
for (let i = 0; i < attributes.length; i++) {
const name = this.escapeCss(attributes[i].name);
const value = this.escapeCss(attributes[i].value);
// Filter out Donobu attributes since they're non-deterministic.
if (
(!name.startsWith('data-donobu-') && name.startsWith('data-')) ||
name === 'href'
) {
// Escape single quotes in attribute values
const escapedValue = value.replace(/'/g, "\\'");
selectors.push(`[${name}='${escapedValue}']`);
selectors.push(`${tagName}[${name}='${escapedValue}']`);
}
}
return selectors;
}
getTextBasedSelectors() {
const selectors = [];
const tagName = this.element.tagName.toLowerCase();
// Skip text-based selectors for large containers
if (tagName === 'body' || tagName === 'html') {
return selectors;
}
const textContent = this.element.textContent.trim();
if (textContent) {
// Use xpathLiteral() to escape any quotes in textContent
const safeText = xpathLiteral(textContent);
selectors.push(`//${tagName}[normalize-space(.)=${safeText}]`);
}
return selectors;
}
getNthChildSelectors() {
const selectors = [];
const tagName = this.element.tagName.toLowerCase();
const parent = this.element.parentElement;
if (parent) {
const index = Array.from(parent.children).indexOf(this.element) + 1;
selectors.push(
`${parent.tagName.toLowerCase()} > :nth-child(${index})`,
);
const typeIndex =
Array.from(parent.children)
.filter((child) => child.tagName.toLowerCase() === tagName)
.indexOf(this.element) + 1;
selectors.push(
`${parent.tagName.toLowerCase()} > ${tagName}:nth-of-type(${typeIndex})`,
);
}
return selectors;
}
getPlaceholderTextSelector() {
const placeholder = this.element.getAttribute('placeholder');
return placeholder
? [`[placeholder='${this.escapeCss(placeholder)}']`]
: [];
}
getAriaLabelSelector() {
const ariaLabel = this.element.getAttribute('aria-label');
return ariaLabel ? [`[aria-label='${this.escapeCss(ariaLabel)}']`] : [];
}
getLabelBasedSelectors() {
const tagName = this.element.tagName.toLowerCase();
if (['input', 'textarea', 'select'].includes(tagName)) {
// Check for 'for' attribute
const id = this.element.id;
if (id) {
const label = document.querySelector(`label[for='${id}']`);
if (label) {
const labelText = label.textContent.trim();
if (labelText) {
const safeLabelText = xpathLiteral(labelText);
return [
`//label[text()=${safeLabelText}]/following-sibling::${tagName}`,
`//label[text()=${safeLabelText}]/${tagName}`,
];
}
}
}
// Check for wrapping label
const wrappingLabel = this.element.closest('label');
if (wrappingLabel) {
// Remove the element's own value from the label's text if it appears,
// then trim again
const labelText = wrappingLabel.textContent
.trim()
.replace(this.element.value || '', '')
.trim();
if (labelText) {
const safeLabelText = xpathLiteral(labelText);
return [`//label[contains(text(), ${safeLabelText})]/${tagName}`];
}
}
}
return [];
}
getCombinedSelector() {
const classSelector = this.getClassSelector()[0];
return classSelector ? [`${this.getTagSelector()}${classSelector}`] : [];
}
getTagSelector() {
return this.element.tagName.toLowerCase();
}
escapeCss(str) {
return window.donobuCssEscape(str);
}
}
window.donobuGenerateSmartSelectors = (element) => {
try {
return new SelectorGenerator(element).generate();
} catch (e) {
console.warn('Exception while generating selector', e);
return [];
}
};
})();