UNPKG

donobu

Version:

Create browser automations with an LLM agent and replay them as Playwright scripts.

186 lines 9.33 kB
import type { ElementHandle, Locator, Page } from 'playwright'; import { z } from 'zod/v4'; import type { CodeGenerationOptions } from '../models/CodeGenerationOptions'; import { ElementSelectorSchema } from '../models/ElementSelector'; import type { ProposedToolCall } from '../models/ProposedToolCall'; import type { ToolCall } from '../models/ToolCall'; import type { ToolCallContext } from '../models/ToolCallContext'; import type { ToolCallResult } from '../models/ToolCallResult'; import { Tool } from './Tool'; /** * Schema containing the minimal, deterministic selector information * necessary to -locate an element on the page. * * Used when a human or an autonomous run has already produced an exact * `{ frame, element }` selector pair and we simply want to replay that * interaction without any involvement from an LLM. */ export declare const SelectorBasedSchema: z.ZodObject<{ selector: z.ZodObject<{ element: z.ZodArray<z.ZodString>; frame: z.ZodOptional<z.ZodNullable<z.ZodString>>; }, z.core.$strip>; }, z.core.$strip>; /** * Schema used when the LLM chooses an element by its numbered Donobu * annotation. * * The LLM supplies: * * `annotation - the chosen annotation number rendered in the * Donobu overlay. * * `whyThisAnnotation - a natural-language explanation that is surfaced * to users (and logged) to aid debugging and transparency. * * During replay these two properties are stripped out and replaced by a * deterministic selector derived from the element actually found on-screen. */ export declare const AnnotationBasedSchema: z.ZodObject<{ whyThisAnnotation: z.ZodString; annotation: z.ZodString; rationale: z.ZodString; }, z.core.$strip>; /** * Thin wrapper type that lets us easily map a selector query to a Locator object. */ type SelectorLocator = { selector: string; locator: Locator; }; /** * Abstract base class for all "selector-driven" browser interactions that * can be *replayed deterministically*. * * A concrete subclass represents a single kind of user action (e.g. * clicking, typing, etc). The class handles the tedious parts: * * 1. **Element discovery.** * From a set of candidate CSS selectors (and an optional frame selector) * it produces one or more `Locator` objects ordered by how specific they * are. The first uniquely matching locator is preferred. * * 2. **Resilience.** * When an element is missing, off-screen, or quickly detaches, the class * automatically falls back to the next best locator until all options are * exhausted. * * 3. **Replay support.** * Every successful call stores selector metadata inside the returned * {@link ToolCallResult}. The static {@link remapForRerun} helper can * later turn that metadata into a fresh {@link ProposedToolCall} so the * exact same element can be located in a future run without LLM input. * * ### Type Parameters * | Name | Meaning | * |----------------|----------------------------------------------------------------------------| * | `CoreSchema` | Zod schema describing *core* parameters consumed by the concrete tool | * | `NonGptSchema` | Schema for deterministic invocations (extends {@link SelectorBasedSchema}) | * | `GptSchema` | Schema for LLM-driven invocations (extends {@link AnnotationBasedSchema}) | * * Subclasses **only** need to implement {@link invoke}, which receives a * *resolved* locator and may perform the actual browser operation (e.g. * click, type, etc). */ export declare abstract class ReplayableInteraction<CoreSchema extends z.ZodObject, NonGptSchema extends typeof SelectorBasedSchema, GptSchema extends typeof AnnotationBasedSchema> extends Tool<NonGptSchema, GptSchema> { readonly coreSchema: CoreSchema; static readonly MAX_SELECTOR_FAILOVERS = 3; static readonly MAX_LOCATOR_MATCH_COUNT = 3; constructor(name: string, description: string, coreSchema: CoreSchema, inputSchema: NonGptSchema, inputSchemaForGpt: GptSchema, requiresGpt?: boolean); call(context: ToolCallContext, parameters: z.infer<NonGptSchema>): Promise<ToolCallResult>; callFromGpt(context: ToolCallContext, parameters: z.infer<GptSchema>): Promise<ToolCallResult>; /** * **Concrete subclasses implement the actual user action here.** * * @param context - The active {@link ToolCallContext}. * @param parameter - The validated *core* parameters for this tool. * @param handles - The element handles to operate with: * * `target` - the element matching the selector * * `label` - the associated `<label>` (or label-resolved element) if one exists * @returns A string that will be prepended to an HTML snippet and surfaced * to the LLM (e.g. "CLICKED OK <button>"). */ protected abstract invoke(context: ToolCallContext, parameters: z.infer<CoreSchema>, handles: { target: ElementHandle<HTMLElement | SVGElement>; label?: ElementHandle<HTMLElement | SVGElement>; }): Promise<string>; private callCore; private static describeError; private static summarizeAttemptsForLlm; /** * Retrieves a list of {@link Locator} objects based on the provided selector * candidates, ordered by their match count in ascending order. If the match * count for a locator exceeds * {@link ReplayableInteraction.MAX_LOCATOR_MATCH_COUNT}, then they are ignored, as * it is considered too broad of a locator to be useful. * * This method iterates through a list of CSS selector candidates and creates * a {@link Locator} for each candidate. It counts the number of elements that * match each selector within a given page. If a frame selector is provided, * it looks for the elements within the specified frame; otherwise, it * searches within the entire page. Only locators with a positive match count * are added to the result list. * * @param page - The the web page to search within. * @return A list of {@link Locator} objects that have been found, ordered by * their match count in ascending order. */ static getLocatorsOrderedByMatchCount(page: Page, selector: z.infer<typeof ElementSelectorSchema>, maxLocatorMatchCount?: number, maxSelectorFailovers?: number): Promise<SelectorLocator[]>; /** * Returns the given locator or its label, if it has one and is associated with a labelable element. * Labelable elements include: button, input (except hidden), meter, output, progress, select, and textarea. */ static getLocatorOrItsLabel(element: Locator): Promise<Locator>; /** * Calculates a priority value for a CSS selector to be used in tiebreaker situations * when multiple selectors have the same match count. * * The function assigns lower numerical values to selectors that are generally more * reliable and specific for targeting elements in a user interface. */ static selectorTiebreakerPriority(sel: string): number; /** * Helper function to check if a selector is ID-based */ private static isIdBasedSelector; /** * Helper function to check if a selector is aria-based */ private static isAriaBasedSelector; /** * Transform a historical {@link ToolCall} into a replay-ready * {@link ProposedToolCall}. * * The original execution stored selector metadata inside * `toolCall.outcome.metadata`. This helper: * * 1. **Validates** that the metadata is present and still contains a * deterministic selector. * 2. **Builds** a new `parameters` object which: * * Moves the recorded selector into the expected `selector` slot. * * Removes any `annotation` / `whyThisAnnotation` properties (they are * meaningless for deterministic replays). * 3. **Returns** a ready-to-run {@link ProposedToolCall} that points to the * same tool (`toolName`) with the adjusted parameters. * * If the historical call did **not** record selector metadata, replaying * would be impossible and the method throws an `Error` so the caller can * handle this situation explicitly (e.g. fall back to a fresh LLM * invocation or surface an error to the user). * * @param toolCall - The historical {@link ToolCall}. * @param options - Behaviour switches: * * `areElementIdsVolatile` - If `true`, ID-only selectors * (e.g. `#submit-btn`) are *dropped* because the element's `id` * attribute is considered volatile. * When every candidate is ID-based the list is left unchanged * (replay is better than nothing). * * `useSelectorFailover` - If `false`, only the **first** selector * (the most specific one) is kept, disabling automatic fail-over to * broader selectors. * * @returns A {@link ProposedToolCall} that can be executed in a fresh run. * @throws Error if the original call lacks selector metadata. */ prepareForRerun(toolCall: ToolCall, options: CodeGenerationOptions): ProposedToolCall; } export {}; //# sourceMappingURL=ReplayableInteraction.d.ts.map