UNPKG

donobu

Version:

Create browser automations with an LLM agent and replay them as Playwright scripts.

180 lines 8.01 kB
import type { InteractionTrackingHost } from '../bindings/PageInteractionTracker'; import type { FlowMetadata } from '../models/FlowMetadata'; import type { FocusedTarget } from '../models/FocusedTarget'; import type { InteractableElement } from '../models/InteractableElement'; import type { FlowsPersistence } from '../persistence/flows/FlowsPersistence'; /** * Abstraction over screen/page inspection for LLM-driven flows. * * Inspectors are bound to their target at construction — methods operate * on the current target state without requiring an explicit target parameter. * * The exported {@link TargetInspector} type is a discriminated union of all * concrete implementations. Use the `type` discriminant to narrow when * platform-specific access is truly needed (e.g. `webInspector()` in tools). * * To add a new platform, create a class implementing * {@link TargetInspectorBase} with a unique `type` literal and add it to the * {@link TargetInspector} union. */ export interface TargetInspectorBase { /** Discriminant for narrowing the {@link TargetInspector} union. */ readonly type: string; /** The mutable target reference this inspector is bound to. */ readonly target: FocusedTarget; /** Whether the underlying target connection is alive (`current` is non-null). */ readonly connected: boolean; /** The attribute name used to mark interactable elements (e.g. 'data-donobu-interactable'). */ readonly interactableElementAttribute: string; /** Discover and mark all interactable elements on the current screen/page. */ attributeInteractableElements(): Promise<void>; /** Retrieve the list of previously attributed interactable elements. */ getAttributedInteractableElements(): Promise<InteractableElement[]>; /** Render numbered visual annotations onto the current screen/page. */ annotateInteractableElements(): Promise<void>; /** Remove visual annotations (keeps attributed elements intact). */ removeAnnotations(): Promise<void>; /** Capture a clean (un-annotated) screenshot of the current target. */ takeCleanScreenshot(): Promise<Buffer>; /** Capture an annotated screenshot (call after {@link annotateInteractableElements}). */ takeAnnotatedScreenshot(): Promise<Buffer>; /** * Capture a fresh screenshot independent of the attribution lifecycle. * * Unlike {@link takeCleanScreenshot}, this always takes a new screenshot * rather than returning a cached buffer from attribution. */ captureScreenshot(): Promise<Buffer>; /** * Throw the platform-appropriate exception if the target is not connected. * * @throws PageClosedException (web) or DeviceClosedException (mobile) */ checkConnectedOrThrow(): void; /** * Check if the target is connected but no longer alive (e.g. web page * called `close()`). Throws if the target exists but is dead. * * For mobile this is typically a no-op (the device session is either * present or null, with no intermediate "closed" state). */ checkTargetAliveOrThrow(): void; /** * Whether the given error represents the target being disconnected or * closed. Web checks for Playwright closed-page errors; mobile checks * for {@link DeviceClosedException}. */ isTargetClosedError(error: unknown): boolean; /** * Attempt recovery after the target is closed. * * Web: looks for another open page in the browser context and reassigns. * Mobile: no recovery possible. * * @returns `{ recovered: true }` if a new target was assigned, or * `{ recovered: false, reason }` if the flow should fail. */ handleTargetClosed(): Promise<{ recovered: true; } | { recovered: false; reason: string; }>; /** Show the interaction cursor. No-op on platforms without a live cursor. */ showInteractionCursor(): Promise<void>; /** Hide the interaction cursor. No-op on platforms without a live cursor. */ hideInteractionCursor(): Promise<void>; /** * Current URL or location identifier for recording in tool call data. * * Web returns the active page URL; mobile returns `'mobile://app'`. * Returns a fallback string if the target is disconnected. */ getCurrentLocation(): string; /** * Platform-specific text fragments used when constructing LLM prompts. * * Returning a single object avoids scattering many one-liner methods * across every inspector implementation. */ getPlatformPromptInfo(): PlatformPromptInfo; /** * Context description for the LLM user message (e.g. browser tabs list * for web, or a generic description for mobile). */ getContextDescription(): string; /** * Platform-specific initialization called once during flow startup. * * Web: sets up browser context bindings, reloads existing pages, ensures * a page is assigned. * Mobile: verifies the device session is alive. * * @param callbacks - Flow-level callbacks the inspector may register on * the underlying automation context. Implementations cast the opaque * `automationContext` parameter to their concrete type. */ initialize(callbacks: TargetInitCallbacks): Promise<void>; /** * Persist platform-specific session state (e.g. browser storage state). * No-op for platforms that don't support session persistence. */ persistSessionState(persistence: FlowsPersistence, flowId: string): Promise<void>; } /** * Platform-specific text fragments for LLM prompt construction. * * Each inspector implementation returns a static instance of this. */ export interface PlatformPromptInfo { /** Opening paragraph(s) of the LLM system message. */ readonly systemPreamble: string; /** e.g. "a webpage" or "a mobile app screen" — used after "two images of". */ readonly screenshotSubject: string; /** e.g. "web page (i.e. the current viewport)" or "mobile app screen". */ readonly currentViewDescription: string; /** e.g. "viewport of the web page" or "mobile app screen". */ readonly annotatedViewDescription: string; /** e.g. "website" or "mobile app" — used in "interact with the {x}". */ readonly interactionTarget: string; /** e.g. "webpage" or "app" — short noun for the target. */ readonly targetNoun: string; } /** * Callbacks provided by the flow engine during {@link TargetInspectorBase.initialize}. * * These let the inspector register platform-specific event handlers without * depending on the flow engine class directly, avoiding circular imports. */ export interface TargetInitCallbacks { /** Flow metadata (mutable reference — used to read `runMode` etc.). */ readonly metadata: FlowMetadata; /** * Handler for platform dialog events (e.g. browser alert/confirm/prompt). * * The inspector wires this to the appropriate platform event. The parameter * is the platform-specific dialog object (e.g. Playwright `Dialog`), typed * as `unknown` to keep this interface platform-agnostic. * * Only used by web targets. Mobile targets ignore this. */ readonly dialogHandler?: (dialog: unknown) => Promise<void>; /** * Flow-engine state needed for interaction tracking registration. * * The inspector passes this to {@link PageInteractionTracker.register} * alongside its own automation context. * * Only used by web targets. Mobile targets ignore this. */ readonly interactionTrackingHost?: InteractionTrackingHost; } /** * Union of all target inspector implementations. * * Narrow via the `type` field (e.g. `'web'`, `'mobile'`). * Plugin-provided inspectors satisfy {@link TargetInspectorBase} and * register with their own `type` literal. */ export type TargetInspector = TargetInspectorBase; //# sourceMappingURL=TargetInspector.d.ts.map