donobu
Version:
Create browser automations with an LLM agent and replay them as Playwright scripts.
658 lines • 31.6 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.ReplayableInteraction = exports.AnnotationBasedSchema = exports.SelectorBasedSchema = void 0;
const v4_1 = require("zod/v4");
const PageClosedException_1 = require("../exceptions/PageClosedException");
const ElementSelector_1 = require("../models/ElementSelector");
const ToolSchema_1 = require("../models/ToolSchema");
const Logger_1 = require("../utils/Logger");
const PlaywrightUtils_1 = require("../utils/PlaywrightUtils");
const TargetUtils_1 = require("../utils/TargetUtils");
const Tool_1 = require("./Tool");
/**
* Schema containing the minimal, deterministic selector information
* necessary to -locate an element on the page.
*
* Used when a human or an autonomous run has already produced an exact
* `{ frame, element }` selector pair and we simply want to replay that
* interaction without any involvement from an LLM.
*/
exports.SelectorBasedSchema = v4_1.z.object({
// WARNING! If you change this field name, you must also update 'prepareToolCallForRerun'!
selector: ElementSelector_1.ElementSelectorSchema,
});
/**
* Schema used when the LLM chooses an element by its numbered Donobu
* annotation.
*
* The LLM supplies:
* * `annotation - the chosen annotation number rendered in the
* Donobu overlay.
* * `whyThisAnnotation - a natural-language explanation that is surfaced
* to users (and logged) to aid debugging and transparency.
*
* During replay these two properties are stripped out and replaced by a
* deterministic selector derived from the element actually found on-screen.
*/
exports.AnnotationBasedSchema = v4_1.z.object({
...ToolSchema_1.BaseGptArgsSchema.shape,
whyThisAnnotation: v4_1.z
.string()
.describe('The reason why this particular numbered annotation was chosen.'),
annotation: v4_1.z
.string()
.describe('The numbered annotation of the element to click.'),
});
/**
* Abstract base class for all "selector-driven" browser interactions that
* can be *replayed deterministically*.
*
* A concrete subclass represents a single kind of user action (e.g.
* clicking, typing, etc). The class handles the tedious parts:
*
* 1. **Element discovery.**
* From a set of candidate CSS selectors (and an optional frame selector)
* it produces one or more `Locator` objects ordered by how specific they
* are. The first uniquely matching locator is preferred.
*
* 2. **Resilience.**
* When an element is missing, off-screen, or quickly detaches, the class
* automatically falls back to the next best locator until all options are
* exhausted.
*
* 3. **Replay support.**
* Every successful call stores selector metadata inside the returned
* {@link ToolCallResult}. The static {@link remapForRerun} helper can
* later turn that metadata into a fresh {@link ProposedToolCall} so the
* exact same element can be located in a future run without LLM input.
*
* ### Type Parameters
* | Name | Meaning |
* |----------------|----------------------------------------------------------------------------|
* | `CoreSchema` | Zod schema describing *core* parameters consumed by the concrete tool |
* | `NonGptSchema` | Schema for deterministic invocations (extends {@link SelectorBasedSchema}) |
* | `GptSchema` | Schema for LLM-driven invocations (extends {@link AnnotationBasedSchema}) |
*
* Subclasses **only** need to implement {@link invoke}, which receives a
* *resolved* locator and may perform the actual browser operation (e.g.
* click, type, etc).
*/
class ReplayableInteraction extends Tool_1.Tool {
constructor(name, description, coreSchema, inputSchema, inputSchemaForGpt, requiresGpt = false) {
super(name, description, inputSchema, inputSchemaForGpt, requiresGpt, undefined, ['web']);
this.coreSchema = coreSchema;
}
async call(context, parameters) {
const page = (0, TargetUtils_1.webPage)(context);
const locators = await ReplayableInteraction.getLocatorsOrderedByMatchCount(page, parameters.selector, ReplayableInteraction.MAX_LOCATOR_MATCH_COUNT, parameters.selector.element.length);
if (!locators.length) {
return {
isSuccessful: false,
forLlm: 'FAILED! Unable to resolve HTML element to operate with.',
metadata: {
selector: parameters.selector,
},
};
}
return this.callCore(context, parameters, locators, parameters.selector);
}
async callFromGpt(context, parameters) {
const page = (0, TargetUtils_1.webPage)(context);
const elementSelector = `[${context.targetInspector.interactableElementAttribute}="${parameters.annotation}"]`;
let locatorData = null;
for (const frame of page.frames()) {
if (frame.isDetached()) {
continue;
}
const locator = frame.locator(elementSelector);
if ((await locator.count()) > 0) {
const selectorCandidates = (await PlaywrightUtils_1.PlaywrightUtils.generateSelectors(locator)).slice(0, ReplayableInteraction.MAX_SELECTOR_FAILOVERS);
const frameSelector = frame.parentFrame() === null
? null
: (await PlaywrightUtils_1.PlaywrightUtils.generateSelectors(await frame.frameElement()))[0];
locatorData = {
locators: [locator],
selectorForReplay: {
element: selectorCandidates,
frame: frameSelector,
},
};
break;
}
}
if (!locatorData?.locators.length) {
return {
isSuccessful: false,
forLlm: `FAILED! Unable to resolve HTML element for annotation '${parameters.annotation}'.`,
metadata: {
annotation: parameters.annotation,
attemptedSelector: elementSelector,
},
};
}
return this.callCore(context, parameters, locatorData.locators.map((locator) => {
return {
selector: elementSelector,
locator: locator,
};
}), locatorData.selectorForReplay);
}
async callCore(context, parameters, selectorLocators, selectorForReplay) {
// Extract core parameters using the stored schema
const coreParameters = this.coreSchema.parse(parameters);
const timeoutOpt = {
timeout: 1000, // Millis
};
const page = (0, TargetUtils_1.webPage)(context);
const selectorAttempts = [];
for (const selectorLocator of selectorLocators) {
const attemptSummary = {
selector: selectorLocator.selector,
matchCount: null,
errors: [],
};
selectorAttempts.push(attemptSummary);
try {
const count = await selectorLocator.locator.count();
attemptSummary.matchCount = count;
if (count === 0) {
attemptSummary.errors.push('Locator resolved to 0 elements before interaction.');
}
for (let i = 0; i < count; ++i) {
try {
const originalLocator = selectorLocator.locator.nth(i);
const targetHandle = await originalLocator
.first()
.elementHandle(timeoutOpt);
if (!targetHandle) {
throw new Error(`Failed to resolve element handle for selector '${selectorLocator.selector}'`);
}
const labelLocator = await ReplayableInteraction.getLocatorOrItsLabel(originalLocator);
let labelHandle;
try {
labelHandle = await labelLocator
.first()
.elementHandle(timeoutOpt);
}
catch {
labelHandle = null;
}
const chosenHandle = labelHandle ?? targetHandle;
// Check if element is still attached before attempting to do anything meaningful.
const isAttached = await chosenHandle.evaluate((el) => el.isConnected);
if (!isAttached) {
throw new Error(`Element '${i}' for selector '${selectorLocator.selector}' does not exist in the DOM`);
}
let htmlSnippet = '';
try {
htmlSnippet +=
await (0, TargetUtils_1.webInspector)(context).pageInspector.getHtmlSnippet(chosenHandle);
}
catch (error) {
Logger_1.appLogger.warn('Failed to get HTML snippet', error);
}
const forLlm = await this.invoke(context, coreParameters, {
target: targetHandle,
label: labelHandle ?? undefined,
});
await PlaywrightUtils_1.PlaywrightUtils.waitForPageStability(page);
// Be careful to not report back the internal, ephemeral, Donobu selector
// used for autonomous runs. If we are in autonomous mode, just report
// back the top-resolved selector.
const reportedSelector = !selectorLocator.selector.includes(context.targetInspector.interactableElementAttribute)
? selectorLocator.selector
: selectorForReplay.element[0];
return {
isSuccessful: true,
forLlm: forLlm + htmlSnippet,
metadata: {
...selectorForReplay,
usedSelector: reportedSelector,
},
};
}
catch (elementError) {
if (PlaywrightUtils_1.PlaywrightUtils.isPageClosedError(elementError)) {
throw elementError;
}
Logger_1.appLogger.error(`Failed to interact with element '${i}' for selector '${selectorLocator.selector}' due to exception, will fail over to remaining elements (if any)`, elementError);
attemptSummary.errors.push(`element ${i}: ${ReplayableInteraction.describeError(elementError)}`);
}
}
}
catch (locatorError) {
if (PlaywrightUtils_1.PlaywrightUtils.isPageClosedError(locatorError)) {
throw locatorError;
}
Logger_1.appLogger.error(`Failed to interact with selector '${selectorLocator.selector}' due to exception, will fail over to remaining selectors (if any)`, locatorError);
attemptSummary.errors.push(`selector error: ${ReplayableInteraction.describeError(locatorError)}`);
}
}
return {
isSuccessful: false,
forLlm: `FAILED! Unable to apply operation. ${ReplayableInteraction.summarizeAttemptsForLlm(selectorAttempts)}`,
metadata: {
selectorForReplay: selectorForReplay,
selectorAttempts: selectorAttempts,
},
};
}
static describeError(error) {
if (error instanceof Error) {
return `${error.name}: ${error.message}`;
}
if (typeof error === 'string') {
return error;
}
try {
return JSON.stringify(error);
}
catch {
return String(error);
}
}
static summarizeAttemptsForLlm(attempts) {
if (!attempts.length) {
return 'No selector attempts were recorded.';
}
const maxAttemptsToReport = 3;
const summarized = attempts.slice(0, maxAttemptsToReport).map((attempt) => {
const matchCount = attempt.matchCount === null ? 'unknown' : attempt.matchCount;
const errors = attempt.errors.length > 0
? ` errors: ${attempt.errors
.slice(0, 2)
.join('; ')}${attempt.errors.length > 2 ? '...' : ''}`
: '';
return `'${attempt.selector}' (matches=${matchCount}${errors})`;
});
const trimmed = attempts.length > maxAttemptsToReport
? ' (trimmed to first attempts)'
: '';
return `Tried ${attempts.length} selector(s): ${summarized.join(' | ')}${trimmed}.`;
}
/**
* Retrieves a list of {@link Locator} objects based on the provided selector
* candidates, ordered by their match count in ascending order. If the match
* count for a locator exceeds
* {@link ReplayableInteraction.MAX_LOCATOR_MATCH_COUNT}, then they are ignored, as
* it is considered too broad of a locator to be useful.
*
* This method iterates through a list of CSS selector candidates and creates
* a {@link Locator} for each candidate. It counts the number of elements that
* match each selector within a given page. If a frame selector is provided,
* it looks for the elements within the specified frame; otherwise, it
* searches within the entire page. Only locators with a positive match count
* are added to the result list.
*
* @param page - The the web page to search within.
* @return A list of {@link Locator} objects that have been found, ordered by
* their match count in ascending order.
*/
static async getLocatorsOrderedByMatchCount(page, selector, maxLocatorMatchCount = ReplayableInteraction.MAX_LOCATOR_MATCH_COUNT, maxSelectorFailovers = ReplayableInteraction.MAX_SELECTOR_FAILOVERS) {
if (selector.element.length === 0) {
return [];
}
try {
// Store the first selector promise separately
let firstSelectorResolver = null;
// Create a promise we can resolve early if the first selector is unique
const firstSelectorUniqueMatch = new Promise((resolve) => {
firstSelectorResolver = resolve;
});
// Process all selectors in parallel
const allSelectorsPromise = new Promise((resolve, reject) => {
const locatorResults = [];
// Create and execute all promises in parallel
const promises = selector.element
.map((selectorCandidate) => {
return PlaywrightUtils_1.PlaywrightUtils.normalizeSelector(selectorCandidate);
})
.map(async (selectorCandidate, index) => {
try {
const elementLocator = selector.frame
? page.frameLocator(selector.frame).locator(selectorCandidate)
: page.locator(selectorCandidate);
await elementLocator
.waitFor({
state: 'attached',
timeout: 2000,
})
.catch(() => {
// Continue if timeout
});
const count = await elementLocator.count();
// Special case: first selector with exactly one match
if (index === 0 && count === 1) {
firstSelectorResolver([
{ selector: selectorCandidate, locator: elementLocator },
]);
}
if (count > 0 && maxLocatorMatchCount >= count) {
locatorResults.push({
selector: selectorCandidate,
locator: elementLocator,
count,
});
}
}
catch (e) {
Logger_1.appLogger.warn(`Invalid selector: ${selectorCandidate}`, e);
}
});
// Wait for all selectors to be processed
Promise.all(promises)
.then(() => {
// Sort and return the results
resolve(locatorResults
.sort((a, b) => {
if (a.count !== b.count) {
return a.count - b.count;
}
return (ReplayableInteraction.selectorTiebreakerPriority(a.selector) -
ReplayableInteraction.selectorTiebreakerPriority(b.selector));
})
.map((item) => {
return { selector: item.selector, locator: item.locator };
}));
})
.catch(reject);
});
// Race between the fast path and full processing
const prelimiaryList = await Promise.race([
firstSelectorUniqueMatch,
allSelectorsPromise,
]);
return prelimiaryList.slice(0, maxSelectorFailovers);
}
catch (error) {
if (PlaywrightUtils_1.PlaywrightUtils.isPageClosedError(error)) {
throw new PageClosedException_1.PageClosedException();
}
else {
throw error;
}
}
}
/**
* Returns the given locator or its label, if it has one and is associated with a labelable element.
* Labelable elements include: button, input (except hidden), meter, output, progress, select, and textarea.
*/
static async getLocatorOrItsLabel(element) {
const timeoutMilliseconds = 5000;
try {
// First check if the element itself is visible
try {
await element.waitFor({
state: 'visible',
timeout: timeoutMilliseconds,
});
}
catch (error) {
// If the element itself isn't visible, just return it without looking for labels
if (PlaywrightUtils_1.PlaywrightUtils.isPageClosedError(error)) {
throw new PageClosedException_1.PageClosedException();
}
return element;
}
// Check if the element is a labelable element
// See https://developer.mozilla.org/en-US/docs/Web/HTML/Reference/Elements/label
const isLabelable = await element.evaluate((elem) => {
const tagName = elem.tagName.toLowerCase();
if (tagName === 'button' ||
tagName === 'meter' ||
tagName === 'output' ||
tagName === 'progress' ||
tagName === 'select' ||
tagName === 'textarea') {
return true;
}
if (tagName === 'input') {
// Input is labelable except when type="hidden"
const type = elem.getAttribute('type')?.toLowerCase() || 'text';
return type !== 'hidden';
}
return false;
});
// If the element is not labelable, return the original locator
if (!isLabelable) {
return element;
}
// Try to get the ID attribute
const id = await element
.getAttribute('id', {
timeout: timeoutMilliseconds,
})
.catch(() => null);
if (id) {
try {
// Get the underlying ElementHandle first
const handle = await element.elementHandle({
timeout: timeoutMilliseconds,
});
if (handle) {
// From the handle, retrieve the frame that owns this element
const frame = await handle.ownerFrame();
if (frame) {
// Look for label with matching 'for' attribute
const labelByFor = frame.locator(`label[for="${id}"]`);
// Check if the label exists and is visible
if ((await labelByFor.count()) > 0) {
try {
await labelByFor.waitFor({
state: 'visible',
timeout: timeoutMilliseconds,
});
return labelByFor;
}
catch (visibilityError) {
// If label exists but isn't visible, continue to other checks
if (PlaywrightUtils_1.PlaywrightUtils.isPageClosedError(visibilityError)) {
throw new PageClosedException_1.PageClosedException();
}
}
}
}
}
}
catch (error) {
// Handle errors in the ElementHandle/frame retrieval, but continue to other checks
if (PlaywrightUtils_1.PlaywrightUtils.isPageClosedError(error)) {
throw new PageClosedException_1.PageClosedException();
}
}
}
// Fallback: look for a directly wrapping label (immediate ancestor)
// Using 'xpath=parent::label' to only get direct parent if it's a label
const directParentLabel = element.locator('xpath=parent::label');
if ((await directParentLabel.count()) > 0) {
try {
await directParentLabel.waitFor({
state: 'visible',
timeout: timeoutMilliseconds,
});
return directParentLabel;
}
catch (visibilityError) {
if (PlaywrightUtils_1.PlaywrightUtils.isPageClosedError(visibilityError)) {
throw new PageClosedException_1.PageClosedException();
}
}
}
// If direct parent isn't a label, check any ancestor label
const wrappingLabel = element.locator('xpath=ancestor::label[1]'); // Get closest ancestor label
if ((await wrappingLabel.count()) > 0) {
try {
await wrappingLabel.waitFor({
state: 'visible',
timeout: timeoutMilliseconds,
});
return wrappingLabel;
}
catch (visibilityError) {
if (PlaywrightUtils_1.PlaywrightUtils.isPageClosedError(visibilityError)) {
throw new PageClosedException_1.PageClosedException();
}
}
}
// Neither "for" nor wrapping label found or visible
return element;
}
catch (error) {
if (PlaywrightUtils_1.PlaywrightUtils.isPageClosedError(error)) {
throw new PageClosedException_1.PageClosedException();
}
throw error;
}
}
/**
* Calculates a priority value for a CSS selector to be used in tiebreaker situations
* when multiple selectors have the same match count.
*
* The function assigns lower numerical values to selectors that are generally more
* reliable and specific for targeting elements in a user interface.
*/
static selectorTiebreakerPriority(sel) {
// Convert selector to lowercase for case-insensitive matching of attribute names
const lowerSel = sel.toLowerCase();
// Check for aria-label selectors (highest priority)
if (/\[aria-label\s*=/.test(lowerSel) || /@aria-label/.test(lowerSel)) {
return 0;
}
// Check for data-testid based selectors
if (/\[(data-testid|data-test-id)\s*(=|~=|\^=|\$=|\*=)/.test(lowerSel) ||
/@testid/.test(lowerSel)) {
return 1;
}
// Check for ID-based selectors
if (/^#/.test(sel)) {
return 2;
}
// Check for placeholder selectors
if (/\[placeholder\s*=/.test(lowerSel) || /@placeholder/.test(lowerSel)) {
return 3;
}
// Check for text-based XPath selectors
if (PlaywrightUtils_1.PlaywrightUtils.isXpathSelector(sel)) {
return 4;
}
// Any other selector types
return 5 + sel.length;
}
/**
* Helper function to check if a selector is ID-based
*/
static isIdBasedSelector(selector) {
const trimmed = selector.trim();
const isHashId = /^#/.test(trimmed);
const isAttrId = /\[id\s*=\s*['"]?[^\]]+['"]?\]/i.test(trimmed) ||
/@id\s*=\s*['"]?[^\]]+['"]?/i.test(trimmed);
return isHashId || isAttrId;
}
/**
* Helper function to check if a selector is aria-based
*/
static isAriaBasedSelector(selector) {
const lowerSel = selector.toLowerCase();
return /\[aria-label\s*=/.test(lowerSel) || /@aria-label/.test(lowerSel);
}
/**
* Transform a historical {@link ToolCall} into a replay-ready
* {@link ProposedToolCall}.
*
* The original execution stored selector metadata inside
* `toolCall.outcome.metadata`. This helper:
*
* 1. **Validates** that the metadata is present and still contains a
* deterministic selector.
* 2. **Builds** a new `parameters` object which:
* * Moves the recorded selector into the expected `selector` slot.
* * Removes any `annotation` / `whyThisAnnotation` properties (they are
* meaningless for deterministic replays).
* 3. **Returns** a ready-to-run {@link ProposedToolCall} that points to the
* same tool (`toolName`) with the adjusted parameters.
*
* If the historical call did **not** record selector metadata, replaying
* would be impossible and the method throws an `Error` so the caller can
* handle this situation explicitly (e.g. fall back to a fresh LLM
* invocation or surface an error to the user).
*
* @param toolCall - The historical {@link ToolCall}.
* @param options - Behaviour switches:
* * `areElementIdsVolatile` - If `true`, ID-only selectors
* (e.g. `#submit-btn`) are *dropped* because the element's `id`
* attribute is considered volatile.
* When every candidate is ID-based the list is left unchanged
* (replay is better than nothing).
* * `useSelectorFailover` - If `false`, only the **first** selector
* (the most specific one) is kept, disabling automatic fail-over to
* broader selectors.
*
* @returns A {@link ProposedToolCall} that can be executed in a fresh run.
* @throws Error if the original call lacks selector metadata.
*/
prepareForRerun(toolCall, options) {
// If the tool call is from a GPT, we need the result metadata to continue.
if (!('selector' in toolCall.parameters)) {
if (!toolCall.outcome.metadata) {
throw new Error(`Failed to prepare tool call for rerun: result metadata is missing. Original call: ${JSON.stringify(toolCall)}`);
}
else if (!toolCall.outcome.metadata.element) {
throw new Error(`Failed to prepare tool call for rerun: selector data is absent in metadata. Original call: ${JSON.stringify(toolCall)}`);
}
}
// Parse and clone the stored selector ------------------------------------
const originalSelector = 'selector' in toolCall.parameters
? ElementSelector_1.ElementSelectorSchema.parse(toolCall.parameters.selector)
: ElementSelector_1.ElementSelectorSchema.parse(toolCall.outcome.metadata);
let selectorCandidates = [...originalSelector.element];
// 1) Drop ID-based selectors if requested --------------------------------
if (options.areElementIdsVolatile) {
const nonId = selectorCandidates.filter((sel) => {
return !ReplayableInteraction.isIdBasedSelector(sel);
});
// Use the filtered list only if something remains; otherwise keep the
// original list to avoid total selector loss.
if (nonId.length > 0) {
selectorCandidates = nonId;
}
}
else {
// 2) Promote ID-based selectors to the top if areElementIdsVolatile is not true
// Exception: Do not promote ID selectors above aria-based selectors
const firstSelectorIsAria = selectorCandidates.length > 0 &&
ReplayableInteraction.isAriaBasedSelector(selectorCandidates[0]);
if (!firstSelectorIsAria) {
const idBasedSelectors = selectorCandidates.filter((sel) => ReplayableInteraction.isIdBasedSelector(sel));
const nonIdBasedSelectors = selectorCandidates.filter((sel) => !ReplayableInteraction.isIdBasedSelector(sel));
// Reorder: ID-based selectors first, then non-ID-based selectors
if (idBasedSelectors.length > 0) {
selectorCandidates = [...idBasedSelectors, ...nonIdBasedSelectors];
}
}
// If first selector is aria-based, leave the order unchanged
}
// 3) Disable fail-over if requested --------------------------------------
if (options.disableSelectorFailover && selectorCandidates.length > 1) {
selectorCandidates = [selectorCandidates[0]];
}
const modifiedSelector = {
element: selectorCandidates,
...(originalSelector.frame ? { frame: originalSelector.frame } : {}),
};
// Build the new parameter object ----------------------------------------
const revisedArgs = {
...toolCall.parameters,
selector: modifiedSelector,
annotation: undefined,
whyThisAnnotation: undefined,
};
return {
name: toolCall.toolName,
parameters: revisedArgs,
};
}
}
exports.ReplayableInteraction = ReplayableInteraction;
ReplayableInteraction.MAX_SELECTOR_FAILOVERS = 3;
ReplayableInteraction.MAX_LOCATOR_MATCH_COUNT = 3;
//# sourceMappingURL=ReplayableInteraction.js.map