@ui-tars/sdk
Version:
A powerful cross-platform(ANY device/platform) toolkit for building GUI automation agents for UI-TARS
280 lines (279 loc) • 15.2 kB
JavaScript
/**
* Copyright (c) 2025 Bytedance, Inc. and its affiliates.
* SPDX-License-Identifier: Apache-2.0
*/
import * as __WEBPACK_EXTERNAL_MODULE__ui_tars_shared_types_07351026__ from "@ui-tars/shared/types";
import * as __WEBPACK_EXTERNAL_MODULE__ui_tars_shared_constants_7e5a2ecd__ from "@ui-tars/shared/constants";
import * as __WEBPACK_EXTERNAL_MODULE__ui_tars_shared_utils_f3fe5586__ from "@ui-tars/shared/utils";
import * as __WEBPACK_EXTERNAL_MODULE_async_retry_b2da4330__ from "async-retry";
import * as __WEBPACK_EXTERNAL_MODULE_jimp__ from "jimp";
import * as __WEBPACK_EXTERNAL_MODULE__context_useContext_mjs_c75ccb70__ from "./context/useContext.mjs";
import * as __WEBPACK_EXTERNAL_MODULE__Model_mjs_4bbd6f0d__ from "./Model.mjs";
import * as __WEBPACK_EXTERNAL_MODULE__base_index_mjs_888496ed__ from "./base/index.mjs";
import * as __WEBPACK_EXTERNAL_MODULE__utils_mjs_25ece7d1__ from "./utils.mjs";
import * as __WEBPACK_EXTERNAL_MODULE__constants_mjs_225410ff__ from "./constants.mjs";
function _define_property(obj, key, value) {
if (key in obj) Object.defineProperty(obj, key, {
value: value,
enumerable: true,
configurable: true,
writable: true
});
else obj[key] = value;
return obj;
}
class GUIAgent extends __WEBPACK_EXTERNAL_MODULE__base_index_mjs_888496ed__.BaseGUIAgent {
async run(instruction) {
const { operator, model, logger } = this;
const { signal, onData, onError, retry = {}, maxLoopCount = __WEBPACK_EXTERNAL_MODULE__ui_tars_shared_constants_7e5a2ecd__.MAX_LOOP_COUNT } = this.config;
const currentTime = Date.now();
const data = {
version: __WEBPACK_EXTERNAL_MODULE__ui_tars_shared_types_07351026__.ShareVersion.V1,
systemPrompt: this.systemPrompt,
instruction,
modelName: this.model.modelName,
status: __WEBPACK_EXTERNAL_MODULE__ui_tars_shared_types_07351026__.StatusEnum.INIT,
logTime: currentTime,
conversations: [
{
from: 'human',
value: instruction,
timing: {
start: currentTime,
end: currentTime,
cost: 0
}
}
]
};
(0, __WEBPACK_EXTERNAL_MODULE__context_useContext_mjs_c75ccb70__.setContext)(Object.assign(this.config, {
logger: this.logger,
systemPrompt: this.systemPrompt,
factors: this.model.factors,
model: this.model
}));
let loopCnt = 0;
let snapshotErrCnt = 0;
data.status = __WEBPACK_EXTERNAL_MODULE__ui_tars_shared_types_07351026__.StatusEnum.RUNNING;
await (null == onData ? void 0 : onData({
data: {
...data,
conversations: []
}
}));
try {
while(true){
var _retry_screenshot, _retry_screenshot1, _retry_model, _retry_model1;
console.log('[run_data_status]', data.status);
if (data.status !== __WEBPACK_EXTERNAL_MODULE__ui_tars_shared_types_07351026__.StatusEnum.RUNNING || (null == signal ? void 0 : signal.aborted)) {
(null == signal ? void 0 : signal.aborted) && (data.status = __WEBPACK_EXTERNAL_MODULE__ui_tars_shared_types_07351026__.StatusEnum.END);
await (null == onData ? void 0 : onData({
data: {
...data,
conversations: []
}
}));
break;
}
if (loopCnt >= maxLoopCount || snapshotErrCnt >= __WEBPACK_EXTERNAL_MODULE__constants_mjs_225410ff__.MAX_SNAPSHOT_ERR_CNT) {
Object.assign(data, {
status: __WEBPACK_EXTERNAL_MODULE__ui_tars_shared_types_07351026__.StatusEnum.MAX_LOOP,
errMsg: loopCnt >= maxLoopCount ? 'Exceeds the maximum number of loops' : 'Too many screenshot failures'
});
await (null == onData ? void 0 : onData({
data: {
...data,
conversations: []
}
}));
break;
}
loopCnt += 1;
const start = Date.now();
const snapshot = await (0, __WEBPACK_EXTERNAL_MODULE_async_retry_b2da4330__["default"])(()=>operator.screenshot(), {
retries: (null == retry ? void 0 : null === (_retry_screenshot = retry.screenshot) || void 0 === _retry_screenshot ? void 0 : _retry_screenshot.maxRetries) ?? 0,
onRetry: null == retry ? void 0 : null === (_retry_screenshot1 = retry.screenshot) || void 0 === _retry_screenshot1 ? void 0 : _retry_screenshot1.onRetry
});
const { width, height, mime } = await __WEBPACK_EXTERNAL_MODULE_jimp__.Jimp.fromBuffer(Buffer.from((0, __WEBPACK_EXTERNAL_MODULE__utils_mjs_25ece7d1__.replaceBase64Prefix)(snapshot.base64), 'base64')).catch((e)=>{
logger.error('[GUIAgent] screenshot error', e);
return {
width: null,
height: null,
mime: ''
};
});
const isValidImage = !!((null == snapshot ? void 0 : snapshot.base64) && width && height);
if (!isValidImage) {
loopCnt -= 1;
snapshotErrCnt += 1;
await (0, __WEBPACK_EXTERNAL_MODULE__ui_tars_shared_utils_f3fe5586__.sleep)(1000);
continue;
}
let end = Date.now();
if (isValidImage) {
data.conversations.push({
from: 'human',
value: __WEBPACK_EXTERNAL_MODULE__ui_tars_shared_constants_7e5a2ecd__.IMAGE_PLACEHOLDER,
screenshotBase64: snapshot.base64,
screenshotContext: {
size: {
width,
height
},
mime,
scaleFactor: snapshot.scaleFactor
},
timing: {
start,
end,
cost: end - start
}
});
await (null == onData ? void 0 : onData({
data: {
...data,
conversations: data.conversations.slice(-1)
}
}));
}
const modelFormat = (0, __WEBPACK_EXTERNAL_MODULE__utils_mjs_25ece7d1__.toVlmModelFormat)({
conversations: data.conversations,
systemPrompt: data.systemPrompt
});
const vlmParams = {
...(0, __WEBPACK_EXTERNAL_MODULE__utils_mjs_25ece7d1__.processVlmParams)(modelFormat.conversations, modelFormat.images),
screenContext: {
width,
height
},
mime,
scaleFactor: snapshot.scaleFactor
};
const { prediction, parsedPredictions } = await (0, __WEBPACK_EXTERNAL_MODULE_async_retry_b2da4330__["default"])(async (bail)=>{
try {
const result = await model.invoke(vlmParams);
return result;
} catch (error) {
var _error_message;
if (error instanceof Error && ((null == error ? void 0 : error.name) === 'APIUserAbortError' || (null == error ? void 0 : null === (_error_message = error.message) || void 0 === _error_message ? void 0 : _error_message.includes('aborted')))) {
bail(error);
return {
prediction: '',
parsedPredictions: []
};
}
throw error;
}
}, {
retries: (null == retry ? void 0 : null === (_retry_model = retry.model) || void 0 === _retry_model ? void 0 : _retry_model.maxRetries) ?? 0,
onRetry: null == retry ? void 0 : null === (_retry_model1 = retry.model) || void 0 === _retry_model1 ? void 0 : _retry_model1.onRetry
});
logger.info('[GUIAgent Response]:', prediction);
logger.info('GUIAgent Parsed Predictions:', JSON.stringify(parsedPredictions));
if (!prediction) {
logger.error('[GUIAgent Response Empty]:', prediction);
continue;
}
const predictionSummary = (0, __WEBPACK_EXTERNAL_MODULE__utils_mjs_25ece7d1__.getSummary)(prediction);
end = Date.now();
data.conversations.push({
from: 'gpt',
value: predictionSummary,
timing: {
start,
end,
cost: end - start
},
screenshotContext: {
size: {
width,
height
},
scaleFactor: snapshot.scaleFactor
},
predictionParsed: parsedPredictions
});
await (null == onData ? void 0 : onData({
data: {
...data,
conversations: data.conversations.slice(-1)
}
}));
for (const parsedPrediction of parsedPredictions){
const actionType = parsedPrediction.action_type;
logger.info('GUIAgent Action:', actionType);
if ([
__WEBPACK_EXTERNAL_MODULE__constants_mjs_225410ff__.INTERNAL_ACTION_SPACES_ENUM.CALL_USER,
__WEBPACK_EXTERNAL_MODULE__constants_mjs_225410ff__.INTERNAL_ACTION_SPACES_ENUM.ERROR_ENV,
__WEBPACK_EXTERNAL_MODULE__constants_mjs_225410ff__.INTERNAL_ACTION_SPACES_ENUM.FINISHED
].includes(actionType)) {
data.status = __WEBPACK_EXTERNAL_MODULE__ui_tars_shared_types_07351026__.StatusEnum.END;
break;
}
if (actionType === __WEBPACK_EXTERNAL_MODULE__constants_mjs_225410ff__.INTERNAL_ACTION_SPACES_ENUM.MAX_LOOP) {
data.status = __WEBPACK_EXTERNAL_MODULE__ui_tars_shared_types_07351026__.StatusEnum.MAX_LOOP;
break;
}
if (!(null == signal ? void 0 : signal.aborted)) {
var _retry_execute, _retry_execute1;
logger.info('GUIAgent Action Inputs:', parsedPrediction.action_inputs, parsedPrediction.action_type);
const executeOutput = await (0, __WEBPACK_EXTERNAL_MODULE_async_retry_b2da4330__["default"])(()=>operator.execute({
prediction,
parsedPrediction,
screenWidth: width,
screenHeight: height,
scaleFactor: snapshot.scaleFactor,
factors: this.model.factors
}), {
retries: (null == retry ? void 0 : null === (_retry_execute = retry.execute) || void 0 === _retry_execute ? void 0 : _retry_execute.maxRetries) ?? 0,
onRetry: null == retry ? void 0 : null === (_retry_execute1 = retry.execute) || void 0 === _retry_execute1 ? void 0 : _retry_execute1.onRetry
}).catch((e)=>{
logger.error('GUIAgent execute error', e);
});
if (executeOutput && (null == executeOutput ? void 0 : executeOutput.status)) data.status = executeOutput.status;
}
}
}
} catch (error) {
var _error_message;
if (error instanceof Error && ('AbortError' === error.name || (null === (_error_message = error.message) || void 0 === _error_message ? void 0 : _error_message.includes('aborted')))) {
logger.info('Request was aborted');
return;
}
logger.error('[GUIAgent] run error', error);
null == onError || onError({
data,
error: {
code: -1,
error: 'GUIAgent Service Error',
stack: `${error}`
}
});
throw error;
} finally{
const prevStatus = data.status;
data.status = __WEBPACK_EXTERNAL_MODULE__ui_tars_shared_types_07351026__.StatusEnum.END;
if (data.status !== prevStatus) await (null == onData ? void 0 : onData({
data: {
...data,
conversations: []
}
}));
logger.info('[GUIAgent] finally: status', data.status);
}
}
buildSystemPrompt() {
var _this_operator_constructor_MANUAL, _this_operator_constructor;
const actionSpaces = null === (_this_operator_constructor = this.operator.constructor) || void 0 === _this_operator_constructor ? void 0 : null === (_this_operator_constructor_MANUAL = _this_operator_constructor.MANUAL) || void 0 === _this_operator_constructor_MANUAL ? void 0 : _this_operator_constructor_MANUAL.ACTION_SPACES;
return null == actionSpaces || 0 === actionSpaces.length ? __WEBPACK_EXTERNAL_MODULE__constants_mjs_225410ff__.SYSTEM_PROMPT : __WEBPACK_EXTERNAL_MODULE__constants_mjs_225410ff__.SYSTEM_PROMPT_TEMPLATE.replace('{{action_spaces_holder}}', actionSpaces.join('\n'));
}
constructor(config){
super(config), _define_property(this, "operator", void 0), _define_property(this, "model", void 0), _define_property(this, "logger", void 0), _define_property(this, "systemPrompt", void 0);
this.operator = config.operator;
this.model = config.model instanceof __WEBPACK_EXTERNAL_MODULE__Model_mjs_4bbd6f0d__.UITarsModel ? config.model : new __WEBPACK_EXTERNAL_MODULE__Model_mjs_4bbd6f0d__.UITarsModel(config.model);
this.logger = config.logger || console;
this.systemPrompt = config.systemPrompt || this.buildSystemPrompt();
}
}
export { GUIAgent };
//# sourceMappingURL=GUIAgent.mjs.map