UNPKG

@ui-tars/sdk

Version:

A powerful cross-platform(ANY device/platform) toolkit for building GUI automation agents for UI-TARS

280 lines (279 loc) 15.2 kB
/** * Copyright (c) 2025 Bytedance, Inc. and its affiliates. * SPDX-License-Identifier: Apache-2.0 */ import * as __WEBPACK_EXTERNAL_MODULE__ui_tars_shared_types_07351026__ from "@ui-tars/shared/types"; import * as __WEBPACK_EXTERNAL_MODULE__ui_tars_shared_constants_7e5a2ecd__ from "@ui-tars/shared/constants"; import * as __WEBPACK_EXTERNAL_MODULE__ui_tars_shared_utils_f3fe5586__ from "@ui-tars/shared/utils"; import * as __WEBPACK_EXTERNAL_MODULE_async_retry_b2da4330__ from "async-retry"; import * as __WEBPACK_EXTERNAL_MODULE_jimp__ from "jimp"; import * as __WEBPACK_EXTERNAL_MODULE__context_useContext_mjs_c75ccb70__ from "./context/useContext.mjs"; import * as __WEBPACK_EXTERNAL_MODULE__Model_mjs_4bbd6f0d__ from "./Model.mjs"; import * as __WEBPACK_EXTERNAL_MODULE__base_index_mjs_888496ed__ from "./base/index.mjs"; import * as __WEBPACK_EXTERNAL_MODULE__utils_mjs_25ece7d1__ from "./utils.mjs"; import * as __WEBPACK_EXTERNAL_MODULE__constants_mjs_225410ff__ from "./constants.mjs"; function _define_property(obj, key, value) { if (key in obj) Object.defineProperty(obj, key, { value: value, enumerable: true, configurable: true, writable: true }); else obj[key] = value; return obj; } class GUIAgent extends __WEBPACK_EXTERNAL_MODULE__base_index_mjs_888496ed__.BaseGUIAgent { async run(instruction) { const { operator, model, logger } = this; const { signal, onData, onError, retry = {}, maxLoopCount = __WEBPACK_EXTERNAL_MODULE__ui_tars_shared_constants_7e5a2ecd__.MAX_LOOP_COUNT } = this.config; const currentTime = Date.now(); const data = { version: __WEBPACK_EXTERNAL_MODULE__ui_tars_shared_types_07351026__.ShareVersion.V1, systemPrompt: this.systemPrompt, instruction, modelName: this.model.modelName, status: __WEBPACK_EXTERNAL_MODULE__ui_tars_shared_types_07351026__.StatusEnum.INIT, logTime: currentTime, conversations: [ { from: 'human', value: instruction, timing: { start: currentTime, end: currentTime, cost: 0 } } ] }; (0, __WEBPACK_EXTERNAL_MODULE__context_useContext_mjs_c75ccb70__.setContext)(Object.assign(this.config, { logger: this.logger, systemPrompt: this.systemPrompt, factors: this.model.factors, model: this.model })); let loopCnt = 0; let snapshotErrCnt = 0; data.status = __WEBPACK_EXTERNAL_MODULE__ui_tars_shared_types_07351026__.StatusEnum.RUNNING; await (null == onData ? void 0 : onData({ data: { ...data, conversations: [] } })); try { while(true){ var _retry_screenshot, _retry_screenshot1, _retry_model, _retry_model1; console.log('[run_data_status]', data.status); if (data.status !== __WEBPACK_EXTERNAL_MODULE__ui_tars_shared_types_07351026__.StatusEnum.RUNNING || (null == signal ? void 0 : signal.aborted)) { (null == signal ? void 0 : signal.aborted) && (data.status = __WEBPACK_EXTERNAL_MODULE__ui_tars_shared_types_07351026__.StatusEnum.END); await (null == onData ? void 0 : onData({ data: { ...data, conversations: [] } })); break; } if (loopCnt >= maxLoopCount || snapshotErrCnt >= __WEBPACK_EXTERNAL_MODULE__constants_mjs_225410ff__.MAX_SNAPSHOT_ERR_CNT) { Object.assign(data, { status: __WEBPACK_EXTERNAL_MODULE__ui_tars_shared_types_07351026__.StatusEnum.MAX_LOOP, errMsg: loopCnt >= maxLoopCount ? 'Exceeds the maximum number of loops' : 'Too many screenshot failures' }); await (null == onData ? void 0 : onData({ data: { ...data, conversations: [] } })); break; } loopCnt += 1; const start = Date.now(); const snapshot = await (0, __WEBPACK_EXTERNAL_MODULE_async_retry_b2da4330__["default"])(()=>operator.screenshot(), { retries: (null == retry ? void 0 : null === (_retry_screenshot = retry.screenshot) || void 0 === _retry_screenshot ? void 0 : _retry_screenshot.maxRetries) ?? 0, onRetry: null == retry ? void 0 : null === (_retry_screenshot1 = retry.screenshot) || void 0 === _retry_screenshot1 ? void 0 : _retry_screenshot1.onRetry }); const { width, height, mime } = await __WEBPACK_EXTERNAL_MODULE_jimp__.Jimp.fromBuffer(Buffer.from((0, __WEBPACK_EXTERNAL_MODULE__utils_mjs_25ece7d1__.replaceBase64Prefix)(snapshot.base64), 'base64')).catch((e)=>{ logger.error('[GUIAgent] screenshot error', e); return { width: null, height: null, mime: '' }; }); const isValidImage = !!((null == snapshot ? void 0 : snapshot.base64) && width && height); if (!isValidImage) { loopCnt -= 1; snapshotErrCnt += 1; await (0, __WEBPACK_EXTERNAL_MODULE__ui_tars_shared_utils_f3fe5586__.sleep)(1000); continue; } let end = Date.now(); if (isValidImage) { data.conversations.push({ from: 'human', value: __WEBPACK_EXTERNAL_MODULE__ui_tars_shared_constants_7e5a2ecd__.IMAGE_PLACEHOLDER, screenshotBase64: snapshot.base64, screenshotContext: { size: { width, height }, mime, scaleFactor: snapshot.scaleFactor }, timing: { start, end, cost: end - start } }); await (null == onData ? void 0 : onData({ data: { ...data, conversations: data.conversations.slice(-1) } })); } const modelFormat = (0, __WEBPACK_EXTERNAL_MODULE__utils_mjs_25ece7d1__.toVlmModelFormat)({ conversations: data.conversations, systemPrompt: data.systemPrompt }); const vlmParams = { ...(0, __WEBPACK_EXTERNAL_MODULE__utils_mjs_25ece7d1__.processVlmParams)(modelFormat.conversations, modelFormat.images), screenContext: { width, height }, mime, scaleFactor: snapshot.scaleFactor }; const { prediction, parsedPredictions } = await (0, __WEBPACK_EXTERNAL_MODULE_async_retry_b2da4330__["default"])(async (bail)=>{ try { const result = await model.invoke(vlmParams); return result; } catch (error) { var _error_message; if (error instanceof Error && ((null == error ? void 0 : error.name) === 'APIUserAbortError' || (null == error ? void 0 : null === (_error_message = error.message) || void 0 === _error_message ? void 0 : _error_message.includes('aborted')))) { bail(error); return { prediction: '', parsedPredictions: [] }; } throw error; } }, { retries: (null == retry ? void 0 : null === (_retry_model = retry.model) || void 0 === _retry_model ? void 0 : _retry_model.maxRetries) ?? 0, onRetry: null == retry ? void 0 : null === (_retry_model1 = retry.model) || void 0 === _retry_model1 ? void 0 : _retry_model1.onRetry }); logger.info('[GUIAgent Response]:', prediction); logger.info('GUIAgent Parsed Predictions:', JSON.stringify(parsedPredictions)); if (!prediction) { logger.error('[GUIAgent Response Empty]:', prediction); continue; } const predictionSummary = (0, __WEBPACK_EXTERNAL_MODULE__utils_mjs_25ece7d1__.getSummary)(prediction); end = Date.now(); data.conversations.push({ from: 'gpt', value: predictionSummary, timing: { start, end, cost: end - start }, screenshotContext: { size: { width, height }, scaleFactor: snapshot.scaleFactor }, predictionParsed: parsedPredictions }); await (null == onData ? void 0 : onData({ data: { ...data, conversations: data.conversations.slice(-1) } })); for (const parsedPrediction of parsedPredictions){ const actionType = parsedPrediction.action_type; logger.info('GUIAgent Action:', actionType); if ([ __WEBPACK_EXTERNAL_MODULE__constants_mjs_225410ff__.INTERNAL_ACTION_SPACES_ENUM.CALL_USER, __WEBPACK_EXTERNAL_MODULE__constants_mjs_225410ff__.INTERNAL_ACTION_SPACES_ENUM.ERROR_ENV, __WEBPACK_EXTERNAL_MODULE__constants_mjs_225410ff__.INTERNAL_ACTION_SPACES_ENUM.FINISHED ].includes(actionType)) { data.status = __WEBPACK_EXTERNAL_MODULE__ui_tars_shared_types_07351026__.StatusEnum.END; break; } if (actionType === __WEBPACK_EXTERNAL_MODULE__constants_mjs_225410ff__.INTERNAL_ACTION_SPACES_ENUM.MAX_LOOP) { data.status = __WEBPACK_EXTERNAL_MODULE__ui_tars_shared_types_07351026__.StatusEnum.MAX_LOOP; break; } if (!(null == signal ? void 0 : signal.aborted)) { var _retry_execute, _retry_execute1; logger.info('GUIAgent Action Inputs:', parsedPrediction.action_inputs, parsedPrediction.action_type); const executeOutput = await (0, __WEBPACK_EXTERNAL_MODULE_async_retry_b2da4330__["default"])(()=>operator.execute({ prediction, parsedPrediction, screenWidth: width, screenHeight: height, scaleFactor: snapshot.scaleFactor, factors: this.model.factors }), { retries: (null == retry ? void 0 : null === (_retry_execute = retry.execute) || void 0 === _retry_execute ? void 0 : _retry_execute.maxRetries) ?? 0, onRetry: null == retry ? void 0 : null === (_retry_execute1 = retry.execute) || void 0 === _retry_execute1 ? void 0 : _retry_execute1.onRetry }).catch((e)=>{ logger.error('GUIAgent execute error', e); }); if (executeOutput && (null == executeOutput ? void 0 : executeOutput.status)) data.status = executeOutput.status; } } } } catch (error) { var _error_message; if (error instanceof Error && ('AbortError' === error.name || (null === (_error_message = error.message) || void 0 === _error_message ? void 0 : _error_message.includes('aborted')))) { logger.info('Request was aborted'); return; } logger.error('[GUIAgent] run error', error); null == onError || onError({ data, error: { code: -1, error: 'GUIAgent Service Error', stack: `${error}` } }); throw error; } finally{ const prevStatus = data.status; data.status = __WEBPACK_EXTERNAL_MODULE__ui_tars_shared_types_07351026__.StatusEnum.END; if (data.status !== prevStatus) await (null == onData ? void 0 : onData({ data: { ...data, conversations: [] } })); logger.info('[GUIAgent] finally: status', data.status); } } buildSystemPrompt() { var _this_operator_constructor_MANUAL, _this_operator_constructor; const actionSpaces = null === (_this_operator_constructor = this.operator.constructor) || void 0 === _this_operator_constructor ? void 0 : null === (_this_operator_constructor_MANUAL = _this_operator_constructor.MANUAL) || void 0 === _this_operator_constructor_MANUAL ? void 0 : _this_operator_constructor_MANUAL.ACTION_SPACES; return null == actionSpaces || 0 === actionSpaces.length ? __WEBPACK_EXTERNAL_MODULE__constants_mjs_225410ff__.SYSTEM_PROMPT : __WEBPACK_EXTERNAL_MODULE__constants_mjs_225410ff__.SYSTEM_PROMPT_TEMPLATE.replace('{{action_spaces_holder}}', actionSpaces.join('\n')); } constructor(config){ super(config), _define_property(this, "operator", void 0), _define_property(this, "model", void 0), _define_property(this, "logger", void 0), _define_property(this, "systemPrompt", void 0); this.operator = config.operator; this.model = config.model instanceof __WEBPACK_EXTERNAL_MODULE__Model_mjs_4bbd6f0d__.UITarsModel ? config.model : new __WEBPACK_EXTERNAL_MODULE__Model_mjs_4bbd6f0d__.UITarsModel(config.model); this.logger = config.logger || console; this.systemPrompt = config.systemPrompt || this.buildSystemPrompt(); } } export { GUIAgent }; //# sourceMappingURL=GUIAgent.mjs.map