UNPKG

donobu

Version:

Create browser automations with an LLM agent and replay them as Playwright scripts.

206 lines 9.89 kB
import type { z } from 'zod/v4'; import type { GptClient } from '../clients/GptClient'; import type { AiQuery } from '../models/AiQuery'; import type { ControlPanel } from '../models/ControlPanel'; import type { FlowMetadata } from '../models/FlowMetadata'; import type { GptMessage, StructuredOutputMessage, TextItem } from '../models/GptMessage'; import type { SystemMessage } from '../models/GptMessage'; import type { InteractableElement } from '../models/InteractableElement'; import type { ProposedToolCall } from '../models/ProposedToolCall'; import type { ToolCall } from '../models/ToolCall'; import type { FlowsPersistence } from '../persistence/flows/FlowsPersistence'; import type { DonobuFlowsManager } from './DonobuFlowsManager'; import type { InteractionVisualizer } from './InteractionVisualizer'; import type { TargetInspectorBase } from './TargetInspector'; import type { TargetInspector } from './TargetInspector'; import type { ToolManager } from './ToolManager'; /** * Return an object conforming to the given JSON-schema. The object will be * generated considering the given target and tool call history. */ export declare function extractFromPage<T>(instruction: string, zodSchema: z.ZodSchema<T>, screenshot: Buffer | null, toolCallHistory: ToolCall[], gptClient: GptClient, options?: { timeout?: number; }): Promise<StructuredOutputMessage<T>>; /** * This is the main business-logic class of Donobu. This class goes through a * flow via its `run` method. */ export declare class DonobuFlow { readonly flowsManager: DonobuFlowsManager; readonly envData: Record<string, string>; readonly persistence: FlowsPersistence; readonly gptClient: GptClient | null; readonly toolManager: ToolManager; readonly interactionVisualizer: InteractionVisualizer; readonly proposedToolCalls: ProposedToolCall[]; readonly invokedToolCalls: ToolCall[]; readonly gptMessages: GptMessage[]; readonly targetInspector: TargetInspector; readonly metadata: FlowMetadata; readonly controlPanel: ControlPanel; private static readonly MAIN_MESSAGE_ELEMENT_LIST_MARKER; static readonly USER_INTERRUPT_MARKER = "[User interruption while flow was paused, this MUST be acknowledged]"; inProgressToolCall: ToolCall | null; readonly aiQueries: AiQuery[]; constructor(flowsManager: DonobuFlowsManager, envData: Record<string, string>, persistence: FlowsPersistence, gptClient: GptClient | null, toolManager: ToolManager, interactionVisualizer: InteractionVisualizer, proposedToolCalls: ProposedToolCall[], invokedToolCalls: ToolCall[], gptMessages: GptMessage[], targetInspector: TargetInspector, metadata: FlowMetadata, controlPanel: ControlPanel); /** * Drives the entire Donobu flow state-machine until it reaches a * terminal state. * * The method loops indefinitely, delegating to a handler that matches the * current {@link metadata.state}. Each handler may mutate state, enqueue * tool calls, and/or persist data. After the handler returns, * {@link transitionState} decides the next state (taking into account user * signals, control-panel input, or tool-call outcomes). * * High-level sequence per iteration: * 1. Refresh the control-panel UI. * 2. Invoke the appropriate `on*` handler for the active state. * 3. If the flow is *not* yet in a terminal state, compute and persist the * next state via `transitionState`; otherwise execute {@link onComplete} * and exit the loop. * * Robustness features: * - **Error handling:** Specific exceptions such as * {@link PageClosedException} (page vanished) and * {@link GptPlatformInternalErrorException} (persistent LLM failure) * are converted into a `FAILED` state; any other unhandled error bubbles * up after setting the flow result accordingly. * * Side-effects (per successful iteration): * - Persists tool calls, screenshots, token counts, and mutated * {@link FlowMetadata} via the injected {@link FlowsPersistence}. * - May write Playwright storage state for later sessions. * - Optionally POSTs flow-completion callbacks (`callbackUrl`). * * @returns A promise resolving to the object stored in * {@link metadata.result}, or `null` when the flow ended without an * explicit result. */ run(): Promise<FlowMetadata['result']>; /** * Delegates to the inspector to attempt recovery after the target is * closed. If recovery fails, the flow is marked as failed. */ private onTargetClosed; /** * This method is called if there are persistent GPT platform failures (there * are internal retries). This method will mark the flow as a failure. */ private onPersistentGptFailure; /** * This method is called when the AI platform reports that the account's * usage quota or credits have been exhausted (HTTP 402). */ private onInsufficientQuota; /** * This method is called when a user interrupts the flow. * It handles the user action and sets the appropriate flow state. * * Note that this *bypasses* the normal state transition logic! */ private onUserInterruption; /** * This method is called if there is an unhandled unexpected exception. This * method will mark the flow as a failure. */ private onUnexpectedException; /** * This method is called when a flow is complete (i.e. when {@link DonobuFlow.run} should return). * * Browser session state and the terminal-state metadata write are * committed by whichever code path produced the terminal state * (transitionState for tool-driven completion; onTargetClosed / * onPersistentGptFailure / onInsufficientQuota / onUnexpectedException * for failure paths) — by the time we reach onComplete those have * already happened. This method just runs the post-completion side * effects. */ private onComplete; /** * Persists the current browser session state if the flow's config has * `persistState` enabled. Must be called BEFORE the in-memory `state` * is mutated to a terminal value at every site that produces a * terminal state — otherwise FlowCatalog.getFlowById can read the * live FlowMetadata object (LOCAL deployments) and a frontend that * observes the terminal state will race the (potentially network- * bound) upload here, getting a 404 from a subsequent browser-state * fetch. * * The browser context typically survives all-pages-closed (the read * goes against the context, not a specific page), so this is safe to * call from failure handlers like onTargetClosed. If the read does * fail, persistSessionState catches and logs internally — it doesn't * propagate. */ private persistTerminalSessionStateIfNeeded; /** * Attempt to POST a JSON body containing given flow ID to the given * ${@link callbackUrl} if the URL is non-null. Note that there is no retying * if the POST fails for any reason; this is a best-effort 1-shot try. */ private static invokeFlowFinishedCallback; private onDialog; /** * Transitions the flow to its next state. After this method completes, the * `this.metadata.state` will have been updated and the * `this.metadata.nextState` will have been cleared. */ private transitionState; /** * Returns an object formatted according to the following priority: * * <ol> * <li>If the `result` for this flow is non-null, then it is returned as-is. * <li>If the given next state of the flow is `SUCCESS`, the * `resultJsonSchema` is non-null, and the `gptClient` is non-null, * then an attempt is made to take the context of the entire run and * conform it to this schema. If there is an error when attempting to * map the data of the current flow to the `resultJsonSchema`, then an * object with error details is returned. * <li>Otherwise, the `metadata` of the last tool call is returned. * </ol> */ private createResultJson; /** * All this method does is set the next state to {@link State.INITIALIZING}. */ private onUnstarted; /** * This method sets up the page initialization scripts and bindings, and * initializes the GPT message history. */ private onInitializing; private onRunningAction; private onQueryingLlmForNextAction; private onWaitingForUserForNextAction; private onPaused; private onResuming; private onFailed; private onSuccess; private updateGptMessagesWithUserProposedToolCall; private queryGptForProposedToolCalls; /** * Calls {@link #gptClient} with the given messages and will retry on failure * up to an internally specified maximum number of attempts. */ private queryGptWithRetry; /** Target-agnostic sleep (replaces Playwright's waitForTimeout). */ private static sleep; /** * @internal - Exposed for testing purposes only */ static createSystemMessageForOverallObjective(envVars: string[] | null, overallObjective: string | null, inspector: TargetInspectorBase): SystemMessage; /** * @internal - Exposed for testing purposes only */ static createMainUserMessage(inspector: TargetInspectorBase, interactableElements: InteractableElement[]): TextItem; /** * Returns a size-optimized GPT message history by stripping images and text * from old messages. * * @internal - Exposed for testing purposes only */ static createOptimizedHistoryForGptCall(currentHistory: GptMessage[]): GptMessage[]; } //# sourceMappingURL=DonobuFlow.d.ts.map