donobu
Version:
Create browser automations with an LLM agent and replay them as Playwright scripts.
186 lines • 9.33 kB
TypeScript
import type { ElementHandle, Locator, Page } from 'playwright';
import { z } from 'zod/v4';
import type { CodeGenerationOptions } from '../models/CodeGenerationOptions';
import { ElementSelectorSchema } from '../models/ElementSelector';
import type { ProposedToolCall } from '../models/ProposedToolCall';
import type { ToolCall } from '../models/ToolCall';
import type { ToolCallContext } from '../models/ToolCallContext';
import type { ToolCallResult } from '../models/ToolCallResult';
import { Tool } from './Tool';
/**
* Schema containing the minimal, deterministic selector information
* necessary to -locate an element on the page.
*
* Used when a human or an autonomous run has already produced an exact
* `{ frame, element }` selector pair and we simply want to replay that
* interaction without any involvement from an LLM.
*/
export declare const SelectorBasedSchema: z.ZodObject<{
selector: z.ZodObject<{
element: z.ZodArray<z.ZodString>;
frame: z.ZodOptional<z.ZodNullable<z.ZodString>>;
}, z.core.$strip>;
}, z.core.$strip>;
/**
* Schema used when the LLM chooses an element by its numbered Donobu
* annotation.
*
* The LLM supplies:
* * `annotation - the chosen annotation number rendered in the
* Donobu overlay.
* * `whyThisAnnotation - a natural-language explanation that is surfaced
* to users (and logged) to aid debugging and transparency.
*
* During replay these two properties are stripped out and replaced by a
* deterministic selector derived from the element actually found on-screen.
*/
export declare const AnnotationBasedSchema: z.ZodObject<{
whyThisAnnotation: z.ZodString;
annotation: z.ZodString;
rationale: z.ZodString;
}, z.core.$strip>;
/**
* Thin wrapper type that lets us easily map a selector query to a Locator object.
*/
type SelectorLocator = {
selector: string;
locator: Locator;
};
/**
* Abstract base class for all "selector-driven" browser interactions that
* can be *replayed deterministically*.
*
* A concrete subclass represents a single kind of user action (e.g.
* clicking, typing, etc). The class handles the tedious parts:
*
* 1. **Element discovery.**
* From a set of candidate CSS selectors (and an optional frame selector)
* it produces one or more `Locator` objects ordered by how specific they
* are. The first uniquely matching locator is preferred.
*
* 2. **Resilience.**
* When an element is missing, off-screen, or quickly detaches, the class
* automatically falls back to the next best locator until all options are
* exhausted.
*
* 3. **Replay support.**
* Every successful call stores selector metadata inside the returned
* {@link ToolCallResult}. The static {@link remapForRerun} helper can
* later turn that metadata into a fresh {@link ProposedToolCall} so the
* exact same element can be located in a future run without LLM input.
*
* ### Type Parameters
* | Name | Meaning |
* |----------------|----------------------------------------------------------------------------|
* | `CoreSchema` | Zod schema describing *core* parameters consumed by the concrete tool |
* | `NonGptSchema` | Schema for deterministic invocations (extends {@link SelectorBasedSchema}) |
* | `GptSchema` | Schema for LLM-driven invocations (extends {@link AnnotationBasedSchema}) |
*
* Subclasses **only** need to implement {@link invoke}, which receives a
* *resolved* locator and may perform the actual browser operation (e.g.
* click, type, etc).
*/
export declare abstract class ReplayableInteraction<CoreSchema extends z.ZodObject, NonGptSchema extends typeof SelectorBasedSchema, GptSchema extends typeof AnnotationBasedSchema> extends Tool<NonGptSchema, GptSchema> {
readonly coreSchema: CoreSchema;
static readonly MAX_SELECTOR_FAILOVERS = 3;
static readonly MAX_LOCATOR_MATCH_COUNT = 3;
constructor(name: string, description: string, coreSchema: CoreSchema, inputSchema: NonGptSchema, inputSchemaForGpt: GptSchema, requiresGpt?: boolean);
call(context: ToolCallContext, parameters: z.infer<NonGptSchema>): Promise<ToolCallResult>;
callFromGpt(context: ToolCallContext, parameters: z.infer<GptSchema>): Promise<ToolCallResult>;
/**
* **Concrete subclasses implement the actual user action here.**
*
* @param context - The active {@link ToolCallContext}.
* @param parameter - The validated *core* parameters for this tool.
* @param handles - The element handles to operate with:
* * `target` - the element matching the selector
* * `label` - the associated `<label>` (or label-resolved element) if one exists
* @returns A string that will be prepended to an HTML snippet and surfaced
* to the LLM (e.g. "CLICKED OK <button>").
*/
protected abstract invoke(context: ToolCallContext, parameters: z.infer<CoreSchema>, handles: {
target: ElementHandle<HTMLElement | SVGElement>;
label?: ElementHandle<HTMLElement | SVGElement>;
}): Promise<string>;
private callCore;
private static describeError;
private static summarizeAttemptsForLlm;
/**
* Retrieves a list of {@link Locator} objects based on the provided selector
* candidates, ordered by their match count in ascending order. If the match
* count for a locator exceeds
* {@link ReplayableInteraction.MAX_LOCATOR_MATCH_COUNT}, then they are ignored, as
* it is considered too broad of a locator to be useful.
*
* This method iterates through a list of CSS selector candidates and creates
* a {@link Locator} for each candidate. It counts the number of elements that
* match each selector within a given page. If a frame selector is provided,
* it looks for the elements within the specified frame; otherwise, it
* searches within the entire page. Only locators with a positive match count
* are added to the result list.
*
* @param page - The the web page to search within.
* @return A list of {@link Locator} objects that have been found, ordered by
* their match count in ascending order.
*/
static getLocatorsOrderedByMatchCount(page: Page, selector: z.infer<typeof ElementSelectorSchema>, maxLocatorMatchCount?: number, maxSelectorFailovers?: number): Promise<SelectorLocator[]>;
/**
* Returns the given locator or its label, if it has one and is associated with a labelable element.
* Labelable elements include: button, input (except hidden), meter, output, progress, select, and textarea.
*/
static getLocatorOrItsLabel(element: Locator): Promise<Locator>;
/**
* Calculates a priority value for a CSS selector to be used in tiebreaker situations
* when multiple selectors have the same match count.
*
* The function assigns lower numerical values to selectors that are generally more
* reliable and specific for targeting elements in a user interface.
*/
static selectorTiebreakerPriority(sel: string): number;
/**
* Helper function to check if a selector is ID-based
*/
private static isIdBasedSelector;
/**
* Helper function to check if a selector is aria-based
*/
private static isAriaBasedSelector;
/**
* Transform a historical {@link ToolCall} into a replay-ready
* {@link ProposedToolCall}.
*
* The original execution stored selector metadata inside
* `toolCall.outcome.metadata`. This helper:
*
* 1. **Validates** that the metadata is present and still contains a
* deterministic selector.
* 2. **Builds** a new `parameters` object which:
* * Moves the recorded selector into the expected `selector` slot.
* * Removes any `annotation` / `whyThisAnnotation` properties (they are
* meaningless for deterministic replays).
* 3. **Returns** a ready-to-run {@link ProposedToolCall} that points to the
* same tool (`toolName`) with the adjusted parameters.
*
* If the historical call did **not** record selector metadata, replaying
* would be impossible and the method throws an `Error` so the caller can
* handle this situation explicitly (e.g. fall back to a fresh LLM
* invocation or surface an error to the user).
*
* @param toolCall - The historical {@link ToolCall}.
* @param options - Behaviour switches:
* * `areElementIdsVolatile` - If `true`, ID-only selectors
* (e.g. `#submit-btn`) are *dropped* because the element's `id`
* attribute is considered volatile.
* When every candidate is ID-based the list is left unchanged
* (replay is better than nothing).
* * `useSelectorFailover` - If `false`, only the **first** selector
* (the most specific one) is kept, disabling automatic fail-over to
* broader selectors.
*
* @returns A {@link ProposedToolCall} that can be executed in a fresh run.
* @throws Error if the original call lacks selector metadata.
*/
prepareForRerun(toolCall: ToolCall, options: CodeGenerationOptions): ProposedToolCall;
}
export {};
//# sourceMappingURL=ReplayableInteraction.d.ts.map