UNPKG

@ui-tars/sdk

Version:

A powerful cross-platform(ANY device/platform) toolkit for building GUI automation agents for UI-TARS

97 lines 3.31 kB
import { Message, GUIAgentData, PredictionParsed, UITarsModelVersion, ScreenshotResult, GUIAgentError, StatusEnum } from '@ui-tars/shared/types'; import { BaseOperator, BaseModel } from './base'; import { UITarsModel } from './Model'; import { Factors } from './constants'; export interface ExecuteParams { prediction: string; parsedPrediction: PredictionParsed; /** Device Physical Resolution */ screenWidth: number; /** Device Physical Resolution */ screenHeight: number; /** Device DPR */ scaleFactor: number; /** model coordinates scaling factor [widthFactor, heightFactor] */ factors: Factors; } export type ExecuteOutput = { status: StatusEnum; } & (object | void); export interface ScreenshotOutput extends ScreenshotResult { } export interface InvokeParams { conversations: Message[]; images: string[]; /** logical size */ screenContext: { /** screenshot width */ width: number; /** screenshot height */ height: number; }; /** physicalSize = screenshotSize * scaleFactor */ scaleFactor?: number; /** the ui-tars's version */ uiTarsVersion?: UITarsModelVersion; headers?: Record<string, string>; /** == Response API only == */ /** previous response id */ previousResponseId?: string; } export interface InvokeOutput { prediction: string; parsedPredictions: PredictionParsed[]; costTime?: number; costTokens?: number; /** == Response API only == */ /** response id */ responseId?: string; } export declare abstract class Operator extends BaseOperator { static MANUAL: { ACTION_SPACES: string[]; EXAMPLES?: string[]; }; abstract screenshot(): Promise<ScreenshotOutput>; abstract execute(params: ExecuteParams): Promise<ExecuteOutput>; } export declare abstract class Model extends BaseModel<InvokeParams, InvokeOutput> { abstract invoke(params: InvokeParams): Promise<InvokeOutput>; } export type Logger = Pick<Console, 'log' | 'error' | 'warn' | 'info'>; export interface RetryConfig { maxRetries: number; onRetry?: (error: Error, attempt: number) => void; } export interface GUIAgentConfig<TOperator> { operator: TOperator; model: InstanceType<typeof UITarsModel> | ConstructorParameters<typeof UITarsModel>[0]; systemPrompt?: string; signal?: AbortSignal; onData?: (params: { data: GUIAgentData; }) => void; onError?: (params: { data: GUIAgentData; error: GUIAgentError; }) => void; logger?: Logger; retry?: { model?: RetryConfig; /** TODO: whether need to provider retry config in SDK?, should be provided with operator? */ screenshot?: RetryConfig; execute?: RetryConfig; }; /** Maximum number of turns for Agent to execute, @default 25 */ maxLoopCount?: number; /** Time interval between two loop iterations (in milliseconds), @default 0 */ loopIntervalInMs?: number; uiTarsVersion?: UITarsModelVersion; } export interface AgentContext<T = Operator> extends GUIAgentConfig<T> { logger: NonNullable<GUIAgentConfig<T>['logger']>; /** [widthFactor, heightFactor] */ factors: [number, number]; model: InstanceType<typeof UITarsModel>; } //# sourceMappingURL=types.d.ts.map