UNPKG

donobu

Version:

Create browser automations with an LLM agent and replay them as Playwright scripts.

534 lines 22.9 kB
import type { LanguageModel } from 'ai'; import type { Locator, Page } from 'playwright-core'; import type { z } from 'zod/v4'; import type { GptClient } from '../../clients/GptClient'; import type { DonobuStack } from '../../managers/DonobuStack'; import type { InteractionVisualizer } from '../../managers/InteractionVisualizer'; import type { AuditOptions, AuditReport } from '../../models/AuditReport'; import type { BrowserStorageState } from '../../models/BrowserStorageState'; import type { ControlPanelFactory } from '../../models/ControlPanel'; import type { FlowMetadata } from '../../models/FlowMetadata'; import type { ToolCallResult } from '../../models/ToolCallResult'; import type { FlowsPersistence } from '../../persistence/flows/FlowsPersistence'; import type { TestsPersistence } from '../../persistence/tests/TestsPersistence'; import type { CookieAnalyses } from '../../tools/CreateBrowserCookieReportTool'; import type { AccessibilityResults } from '../../tools/RunAccessibilityTestTool'; import type { PlaywrightAssertionStep } from '../ai/cache/assertCache'; import type { PageAiCache } from '../ai/cache/cache'; import type { LocateOptions } from '../ai/locate/locateTypes'; import type { PageAi, PageAiNoSchemaOptions, PageAiOptions, PageAiSchemaOptions } from '../ai/PageAi'; import type { SmartSelector } from './SmartSelector'; import type { TbdSession } from './tbd/tbdTypes'; export type { AccessibilityResults, CookieAnalyses, PageAiNoSchemaOptions, PageAiOptions, PageAiSchemaOptions, }; type PageAiActWithSchemaOptions<Schema extends z.ZodObject> = Omit<PageAiSchemaOptions<Schema>, 'gptClient'> & { gptClient?: GptClient | Exclude<LanguageModel, string>; }; type PageAiActWithoutSchemaOptions = Omit<PageAiNoSchemaOptions, 'gptClient'> & { gptClient?: GptClient | Exclude<LanguageModel, string>; }; /** * Options accepted by {@link PageAiCallable.assert}. */ export type AssertOptions = { /** How many times to retry the assertion on failure. Defaults to 0. */ retries?: number; /** Seconds to wait between retries. Defaults to 3. */ retryDelaySeconds?: number; gptClient?: GptClient | Exclude<LanguageModel, string>; /** * Enables cache lookup/storage for deterministic replays using standard * Playwright assertions. On the first run the AI evaluates the assertion * and generates equivalent Playwright code which is cached. Subsequent * runs execute the cached code directly, skipping the AI call entirely. * Defaults to `true`. * * Cached steps preserve `{{$.env.*}}` placeholders for any value that came * from an env var, so changing an env value between runs replays the same * cached steps with the new value rather than re-invoking the AI. */ cache?: boolean; /** * Explicit environment variable names (in addition to the heuristically * derived ones) that the assertion may read via `{{$.env.*}}` interpolations. */ envVars?: string[]; /** * Explicitly supply environment variable values that amend (or override) * the environment observed by this `page.ai.assert` call. Keys are merged * with any names derived from {@link AssertOptions.envVars} and from * `{{$.env.*}}` interpolations in the assertion text. * * - A `string` value sets or overrides the variable for this invocation. * - An `undefined` value *removes* the variable, even if it would * otherwise be resolved from persistence. * * Only the **names** (keys) influence cache lookup; changing a value * replays the cached steps with the new value via `{{$.env.*}}` placeholder * substitution rather than busting the cache. */ envVals?: Record<string, string | undefined>; }; type PageAiAct = { <Schema extends z.ZodObject>(instruction: string, options?: PageAiActWithSchemaOptions<Schema>): Promise<z.infer<Schema>>; (instruction: string, options?: PageAiActWithoutSchemaOptions): Promise<void>; }; /** * Callable wrapper that powers the `page.ai` helper inside Playwright tests. * * This value can be invoked directly (`await page.ai('...')`) or through its * methods (e.g., `await page.ai.act(...)`). Each call spins up a Donobu * autonomous flow with the current page context, allowing AI to interact with * the browser, call tools, and return structured data. Control per-invocation * behaviour with options such as `allowedTools`, `maxToolCalls`, `envVars`, and * `cache` to constrain how the agent explores. * * @example Direct call with optional schema * ```ts * import { z } from 'zod/v4'; * * // Ask AI to navigate the UI and return the detected user card. * const userCard = await page.ai('Open settings and read the user card', { * schema: z.object({ * fullName: z.string(), * email: z.string().email(), * }), * }); * * expect(userCard.fullName).toMatch(/Doe/); * ``` * @example Opting into a different GPT client for a single call * ```ts * await page.ai('Generate a shareable invite link', { * gptClient: experimentalCreativeClient, * allowedTools: ['click', 'inputText'], * maxToolCalls: 8, * }); * ``` */ export type PageAiCallable = PageAiAct & { /** * Launches an autonomous Donobu flow that uses AI to satisfy a natural-language instruction. * The flow may call Donobu tools and mirrors the metadata recorded for the surrounding test run. * * Provide a Zod schema when you want the flow to produce structured output; the AI's response is * validated against the schema and is returned as the result. Tune tool * allow-lists, max tool calls, environment variable access, and caching flags * through the options parameter to keep the run deterministic when needed. * When `allowedTools` is omitted, the agent receives the default Donobu tool * pack: * - analyzePageText * - assert * - assertPage * - changeWebBrowserTab * - chooseSelectOption * - click * - goForwardOrBack * - goToWebpage * - handleBrowserDialog * - hoverOverElement * - inputFaker * - inputRandomizedEmailAddress * - inputText * - makeComment * - markObjectiveComplete * - markObjectiveNotCompletable * - pressKey * - rememberPageText * - setRunMode * - scrollPage * - wait * * @template Schema The Zod object schema describing the expected shape of the AI result. * @param instruction High-level description of what the autonomous flow should accomplish. * @returns The parsed schema result when a schema is provided; otherwise, void. * @throws PageAiException if the autonomous flow fails or reaches a terminal non-success state. * * @example Collecting structured data while the AI drives the UI * ```ts * const discountSchema = z.object({ code: z.string(), expiresOn: z.string() }); * * const result = await page.ai.act( * 'Find any available promo code on the checkout page', * { schema: discountSchema, allowedTools: ['click', 'inputText'] }, * ); * * console.log(result.code); * ``` * @example Lightweight call without schema (fire-and-forget) * ```ts * await page.ai.act('Dismiss any popups and proceed to the dashboard'); * ``` */ act: PageAiAct; /** * Assert that a given condition about the current page holds true (or throws). * The assertion is evaluated by AI using the context of the current page's... * - Textual context * - Viewport screenshot * - URL * - Title * * Retries trigger fresh screenshots/context captures, giving dynamic UIs time * to settle before ultimately failing the test if the condition never holds. * * @example Resilient assertion with retries * ```ts * await page.ai.assert('The publish button is visible and enabled', { * retries: 3, * retryDelaySeconds: 2, * }); * ``` */ assert(assertion: string, options?: AssertOptions): Promise<void>; /** * Return an object conforming to the given Zod schema. The object will be * generated considering the following: * - The given instruction (if any). * - A viewport screenshot of the current page. * - The raw textual content of the current page. * * @example Extracting a hero section summary * ```ts * const schema = z.object({ * heading: z.string(), * subheading: z.string().optional(), * primaryCta: z.string(), * }); * * const hero = await page.ai.extract(schema, { * instruction: 'Only read the hero at the top of the page', * }); * * expect(hero.primaryCta).toMatch(/Start trial/i); * ``` */ extract<Schema extends z.ZodObject>(schema: Schema, options?: { instruction?: string; gptClient?: GptClient | Exclude<LanguageModel, string>; }): Promise<z.infer<Schema>>; /** * Run an analysis on the current webpage's full raw text. Note that since this runs an analysis * on the raw text, rather than, the HTML of the page, the raw text may be a bit jumbled, * have its styling lost, careful positioning lost, etc. * * Returns a natural-language report string you can log, store, or feed into * further AI steps. * * If there is context relevant to running the analysis that would not be found in the webpage * text itself, perhaps context given in another message, prompt, overall object, etc, then * provide it in the "additionalContext" parameter. * * @example Detecting mentions of deprecated APIs * ```ts * const report = await page.ai.analyzePageText( * 'List any deprecated API names or security warnings you see', * { additionalContext: 'This is a developer changelog page.' }, * ); * * console.log(report); * ``` */ analyzePageText(analysisToRun: string, options?: { additionalContext?: string; gptClient?: GptClient | Exclude<LanguageModel, string>; }): Promise<string>; /** * Perform an analysis of the cookies for the current web browsing session. * Returns the cookies alongside AI-written notes on their likely purpose and * security attributes, which is useful for privacy reviews or regression audits. * * @example Auditing cookies after sign-in * ```ts * const cookies = await page.ai.createCookieReport(); * expect(cookies.find((c) => c.name === 'session')?.secure).toBe(true); * ``` */ createCookieReport(options?: { gptClient?: GptClient | Exclude<LanguageModel, string>; }): Promise<CookieAnalyses>; /** * Resolve a natural-language description to a Playwright {@link Locator}. * * The method captures a viewport screenshot and a pruned DOM snapshot of the * current page (including iframes), sends both to the AI model, and receives * back a structured locator chain that is mechanically constructed — no * `eval` or string parsing involved. * * If the initial locator matches a small number of elements (≤ 5), a * disambiguation step shows HTML snippets of the candidates to the AI so it * can pick the correct one (appended as `.nth(n)`). If the locator matches * too many elements (> 5) or zero, one automatic retry is attempted before * throwing. * * @param description Human-readable description of the target element, * e.g. `"The submit button"`, `"Third row in the pricing table"`. * @returns A Playwright {@link Locator} pointing at the resolved element. * @throws {LocateException} When the element cannot be resolved after retries. * * @example Clicking a described element * ```ts * const submit = await page.ai.locate('The submit button'); * await submit.click(); * ``` * * @example Extracting text from a located element * ```ts * const heading = await page.ai.locate('The main page heading'); * const text = await heading.textContent(); * expect(text).toContain('Welcome'); * ``` * * @example Targeting an element inside an iframe * ```ts * const payButton = await page.ai.locate('The Pay Now button inside the Stripe checkout iframe'); * await payButton.click(); * ``` */ locate(description: string, options?: LocateOptions): Promise<Locator>; }; /** * Extends the Page object with additional test-related methods. * * @example Using the extended helpers inside a Playwright test * ```ts * const page = (await context.newPage()) as DonobuExtendedPage; * * await page.ai('Log in and open the notifications panel'); * const bell = page.find('[data-testid=notification-bell]', { * failover: ['text=Notifications'], * }); * await bell.click(); * ``` */ export interface DonobuExtendedPage extends Page { /** * AI helper combining the callable `page.ai(...)` entrypoint with specialised * utilities like `assert`, `extract`, and `analyzePageText`. * * @example Driving the UI while collecting structured data * ```ts * const profile = await page.ai('Open profile and summarize it', { * schema: z.object({ name: z.string(), headline: z.string() }), * }); * * expect(profile.headline).toContain('Staff'); * ``` */ ai: PageAiCallable; /** * Builds a smart selector object that will automatically failover when performing * actions. Failovers let you specify backup selectors Playwright will try if * the primary locator becomes stale or cannot be found. * * @example Clicking a button with selector fallbacks * ```ts * const submit = page.find('[data-testid=submit]', { * failover: ['text=Submit', 'button:has-text("Continue")'], * }); * * await submit.click(); * ``` */ find(selector: string, options?: { failover?: string[]; frame?: string; }): SmartSelector; /** * Run an arbitrary Donobu tool by name. * Useful for directly invoking custom tools outside of the autonomous AI * planner while still recording metadata and using the configured GPT client. * * @example Triggering a custom tool * ```ts * const result = await page.run('syncSsoSession', { tenant: 'acme' }); * expect(result.success).toBe(true); * ``` */ run(toolName: string, toolParams?: any, options?: { gptClient?: GptClient | Exclude<LanguageModel, string>; }): Promise<ToolCallResult>; /** * Checks all of the pages in the current browser context and returns the one * matching the given URL. Use the exact URL or a distinctive substring; the * method searches open tabs in the current context and returns the match. * * @example Switching to a newly opened dashboard tab * ```ts * const dashboard = await page.changeTab('https://app.example.com/dashboard'); * await dashboard.ai.assert('User avatar is visible'); * ``` */ changeTab(url: string): Promise<DonobuExtendedPage>; /** * Run a web accessibility (i.e. axe-core) test on the current webpage. * Returns an `AccessibilityResults` summary you can use to gate builds or * surface violations in test reports. * * @example Asserting an accessibility scan passes * ```ts * const accessibility = await page.runAccessibilityTest(); * expect(accessibility.violations).toHaveLength(0); * ``` */ runAccessibilityTest(): Promise<AccessibilityResults>; /** * Run a comprehensive audit of the current webpage. * * By default, an audit will: * - Ensure the page fully loads, including waiting for spinners, et. al. * - Run an accessibility check, filtering for critical issues. * - Ensure that all DOM element 'id' properies are correctly unique. * - Ensure that all DOM element 'data-testid' properies (and common variants) are correctly unique. * - Flag any JavaScript console errors or uncaught exceptions. * - Flag any failed network requests (4xx/5xx responses, request failures). * * Pass a `url` to navigate before running checks. Without a URL the audit * reloads the current page so that console and network errors from the * initial page load are captured consistently in both modes. * * **Warning:** Because the audit navigates or reloads the page, any * in-progress page state (filled forms, open modals, scroll position, etc.) * will be lost. Run the audit *before* interacting with the page, or pass a * `url` explicitly. * * @example Full-lifecycle audit of a URL * ```ts * const report = await page.audit({ url: 'https://example.com' }); * console.log(report.passed); // true if all checks passed * ``` * * @example Auditing the current page (reloads it) * ```ts * await page.goto('https://example.com'); * const report = await page.audit(); * ``` * * @example Customising individual checks * ```ts * const report = await page.audit({ * url: 'https://example.com', * pageLoad: { retries: 5, retryDelaySeconds: 5 }, * accessibility: { impact: 'serious' }, * consoleErrors: { ignore: [/analytics/, /hotjar/] }, * }); * ``` * * @example Disabling a check * ```ts * const report = await page.audit({ accessibility: false }); * ``` */ audit(options?: AuditOptions): Promise<AuditReport>; /** * Opens an interactive mini-session that pauses test execution and lets the * user explore the page manually and/or give AI instructions. In headed mode * the user interacts directly with the visible browser; in headless mode a * live CDP screencast is mirrored into the control panel window. * * During the session the user can: * - Interact with the page directly (clicks, typing, etc.) * - Type an instruction and press Enter to have the AI take over * - Pause the AI mid-execution to revise instructions or take over manually * - Close the control panel window to end the session * * All interactions — both manual and AI-driven — are recorded. When the test * completes, the `await page.tbd()` line is replaced in the source file with * the equivalent Playwright code (manual actions become `page.find(...).click()` * etc., AI instructions become `page.ai(...)`). * * @example Placeholder step during test authoring * ```ts * test('checkout flow', async ({ page }) => { * await page.goto('https://shop.example.com'); * * // Opens a mini-session — explore the page, give AI instructions, * // then close the window when done. The line below will be replaced * // with the recorded actions after the test finishes. * await page.tbd(); * }); * ``` */ tbd(): Promise<void>; _dnb: { donobuFlowMetadata: FlowMetadata; donobuStack: DonobuStack; interactionVisualizer: InteractionVisualizer; pageAi: PageAi | undefined; pageAiCache: PageAiCache | undefined; persistence: FlowsPersistence; testsPersistence: TestsPersistence; initialBrowserState: BrowserStorageState; gptClient: GptClient | undefined; controlPanelFactory?: ControlPanelFactory; runtimeDirectives?: { clearPageAiCache?: boolean; }; /** Env var overrides set by {@link PageAi} for the current invocation. */ envVals?: Record<string, string | undefined>; /** Sessions recorded by {@link tbd} for post-test code generation. */ tbdSessions: TbdSession[]; /** * Wrapping records for every `page.ai`, `page.ai.assert`, and * `page.ai.locate` invocation in this test. The HTML reporter renders * each as a parent node containing whichever Donobu tool calls and * native Playwright steps fell inside its time window, with a * `[cached]` badge driven by the per-record `cacheHit` flag. * * Recording happens for ALL calls (cache hit or miss) so the wrapper * is visible regardless. Nested AI calls (e.g. a cached `page.ai` * whose runSource calls `page.ai.assert(...)`) become nested wrappers * — each carries its own cache state. */ aiInvocations: AiInvocationRecord[]; }; } export interface AiInvocationRecord { kind: 'act' | 'assert' | 'locate'; description: string; startedAt: number; endedAt: number; cacheHit: boolean; /** * For live (non-replay) invocations: `true` once this run successfully * wrote an entry into the relevant page-AI cache, `false` if a write was * attempted (or would have been) but didn't land. Combined with * `cacheHit`, this gives the reporter a tri-state cache outcome — hit * (replayed), stored (live + recorded), or miss (live + nothing cached). * Always `false` when `cacheHit` is `true`. */ cacheStored: boolean; passed: boolean; error?: { message?: string; }; /** * For live `page.ai.assert` runs: metadata about the post-pass structured * step verification. After the AI judges the assertion passed against a * screenshot, AssertTool re-executes the AI-emitted Playwright `expect()` * calls against the page to decide whether those structured steps are * cache-worthy. When `failed: true`, the AI's visual verdict still stands * — the tool returns success — but one of the structured `expect()` calls * underneath threw. The reporter uses this to surface the divergence as a * labelled signal rather than render the inner expect failure as a regular * assertion failure. * * Undefined when verification didn't run (no structured steps emitted, AI * verdict was failed, cached replay path, or AssertTool invoked outside * the page.ai.assert wrapper). */ verification?: { startedAt: number; endedAt: number; failed: boolean; errorMessage?: string; }; /** * For cached `page.ai.assert` invocations: the structured Playwright * assertion steps that were replayed. The reporter formats these back * into source-code lines so the report shows exactly what was checked * (e.g. `expect(page.getByRole('heading', { name: '…' })).toBeVisible()`). * Undefined for live assert runs, `act`, and `locate` records. */ assertSteps?: PlaywrightAssertionStep[]; } //# sourceMappingURL=DonobuExtendedPage.d.ts.map