@ui-tars/sdk
Version:
A powerful cross-platform(ANY device/platform) toolkit for building GUI automation agents for UI-TARS
1 lines • 3.69 kB
Source Map (JSON)
{"version":3,"file":"types.mjs","sources":["webpack://@ui-tars/sdk/./src/types.ts"],"sourcesContent":["/*\n * Copyright (c) 2025 Bytedance, Inc. and its affiliates.\n * SPDX-License-Identifier: Apache-2.0\n */\nimport {\n Message,\n GUIAgentData,\n PredictionParsed,\n UITarsModelVersion,\n ScreenshotResult,\n GUIAgentError,\n StatusEnum,\n} from '@ui-tars/shared/types';\n\nimport { BaseOperator, BaseModel } from './base';\nimport { UITarsModel } from './Model';\nimport { Factors } from './constants';\n\nexport interface ExecuteParams {\n prediction: string;\n parsedPrediction: PredictionParsed;\n /** Device Physical Resolution */\n screenWidth: number;\n /** Device Physical Resolution */\n screenHeight: number;\n /** Device DPR */\n scaleFactor: number;\n /** model coordinates scaling factor [widthFactor, heightFactor] */\n factors: Factors;\n}\n\nexport type ExecuteOutput = { status: StatusEnum } & (object | void);\n\nexport interface ScreenshotOutput extends ScreenshotResult {}\n\nexport interface InvokeParams {\n conversations: Message[];\n images: string[];\n /** logical size */\n screenContext: {\n /** screenshot width */\n width: number;\n /** screenshot height */\n height: number;\n };\n /** physicalSize = screenshotSize * scaleFactor */\n scaleFactor?: number;\n /** the ui-tars's version */\n uiTarsVersion?: UITarsModelVersion;\n headers?: Record<string, string>;\n /** == Response API only == */\n /** previous response id */\n previousResponseId?: string;\n}\n\nexport interface InvokeOutput {\n prediction: string;\n parsedPredictions: PredictionParsed[];\n costTime?: number;\n costTokens?: number;\n /** == Response API only == */\n /** response id */\n responseId?: string;\n // TODO: status: StatusEnum, status should be provided by model\n}\nexport abstract class Operator extends BaseOperator {\n static MANUAL: {\n ACTION_SPACES: string[];\n EXAMPLES?: string[];\n };\n abstract screenshot(): Promise<ScreenshotOutput>;\n abstract execute(params: ExecuteParams): Promise<ExecuteOutput>;\n}\n\nexport abstract class Model extends BaseModel<InvokeParams, InvokeOutput> {\n abstract invoke(params: InvokeParams): Promise<InvokeOutput>;\n}\n\nexport type Logger = Pick<Console, 'log' | 'error' | 'warn' | 'info'>;\n\nexport interface RetryConfig {\n maxRetries: number;\n onRetry?: (error: Error, attempt: number) => void;\n}\n\nexport interface GUIAgentConfig<TOperator> {\n operator: TOperator;\n model:\n | InstanceType<typeof UITarsModel>\n | ConstructorParameters<typeof UITarsModel>[0];\n\n // ===== Optional =====\n systemPrompt?: string;\n signal?: AbortSignal;\n onData?: (params: { data: GUIAgentData }) => void;\n onError?: (params: { data: GUIAgentData; error: GUIAgentError }) => void;\n logger?: Logger;\n retry?: {\n model?: RetryConfig;\n /** TODO: whether need to provider retry config in SDK?, should be provided with operator? */\n screenshot?: RetryConfig;\n execute?: RetryConfig;\n };\n /** Maximum number of turns for Agent to execute, @default 25 */\n maxLoopCount?: number;\n /** Time interval between two loop iterations (in milliseconds), @default 0 */\n loopIntervalInMs?: number;\n uiTarsVersion?: UITarsModelVersion;\n}\n\nexport interface AgentContext<T = Operator> extends GUIAgentConfig<T> {\n logger: NonNullable<GUIAgentConfig<T>['logger']>;\n /** [widthFactor, heightFactor] */\n factors: [number, number];\n model: InstanceType<typeof UITarsModel>;\n}\n"],"names":["Operator","BaseOperator","Model","BaseModel"],"mappings":";;;;;AAGC;;;;;;;;;;AA8DM,MAAeA,iBAAiBC;AAOvC;AANE,iBADoBD,UACb,UAAP;AAQK,MAAeE,cAAcC;AAEpC"}