@ui-tars/operator-nut-js
Version:
Operator Nut JS SDK for UI-TARS
284 lines (283 loc) • 14.1 kB
JavaScript
/**
* Copyright (c) 2025 Bytedance, Inc. and its affiliates.
* SPDX-License-Identifier: Apache-2.0
*/
;
var __webpack_require__ = {};
(()=>{
__webpack_require__.n = (module)=>{
var getter = module && module.__esModule ? ()=>module['default'] : ()=>module;
__webpack_require__.d(getter, {
a: getter
});
return getter;
};
})();
(()=>{
__webpack_require__.d = (exports1, definition)=>{
for(var key in definition)if (__webpack_require__.o(definition, key) && !__webpack_require__.o(exports1, key)) Object.defineProperty(exports1, key, {
enumerable: true,
get: definition[key]
});
};
})();
(()=>{
__webpack_require__.o = (obj, prop)=>Object.prototype.hasOwnProperty.call(obj, prop);
})();
(()=>{
__webpack_require__.r = (exports1)=>{
if ('undefined' != typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, {
value: 'Module'
});
Object.defineProperty(exports1, '__esModule', {
value: true
});
};
})();
var __webpack_exports__ = {};
__webpack_require__.r(__webpack_exports__);
__webpack_require__.d(__webpack_exports__, {
NutJSOperator: ()=>NutJSOperator
});
const core_namespaceObject = require("@ui-tars/sdk/core");
const external_jimp_namespaceObject = require("jimp");
const nut_js_namespaceObject = require("@computer-use/nut-js");
const external_big_js_namespaceObject = require("big.js");
var external_big_js_default = /*#__PURE__*/ __webpack_require__.n(external_big_js_namespaceObject);
function _define_property(obj, key, value) {
if (key in obj) Object.defineProperty(obj, key, {
value: value,
enumerable: true,
configurable: true,
writable: true
});
else obj[key] = value;
return obj;
}
const moveStraightTo = async (startX, startY)=>{
if (null === startX || null === startY) return;
await nut_js_namespaceObject.mouse.move((0, nut_js_namespaceObject.straightTo)(new nut_js_namespaceObject.Point(startX, startY)));
};
class NutJSOperator extends core_namespaceObject.Operator {
async screenshot() {
const { logger } = (0, core_namespaceObject.useContext)();
const grabImage = await nut_js_namespaceObject.screen.grab();
const screenWithScale = await grabImage.toRGB();
const scaleFactor = screenWithScale.pixelDensity.scaleX;
logger.info('[NutjsOperator]', 'scaleX', screenWithScale.pixelDensity.scaleX, 'scaleY', screenWithScale.pixelDensity.scaleY);
const screenWithScaleImage = await external_jimp_namespaceObject.Jimp.fromBitmap({
width: screenWithScale.width,
height: screenWithScale.height,
data: Buffer.from(screenWithScale.data)
});
const width = screenWithScale.width / screenWithScale.pixelDensity.scaleX;
const height = screenWithScale.height / screenWithScale.pixelDensity.scaleY;
const physicalScreenImage = await screenWithScaleImage.resize({
w: width,
h: height
}).getBuffer('image/png');
const output = {
base64: physicalScreenImage.toString('base64'),
scaleFactor
};
null == logger || logger.info(`[NutjsOperator] screenshot: ${width}x${height}, scaleFactor: ${scaleFactor}`);
return output;
}
async execute(params) {
const { logger } = (0, core_namespaceObject.useContext)();
const { parsedPrediction, screenWidth, screenHeight, scaleFactor } = params;
const { action_type, action_inputs } = parsedPrediction;
const startBoxStr = (null == action_inputs ? void 0 : action_inputs.start_box) || '';
logger.info('[NutjsOperator] execute', scaleFactor);
const { x: startX, y: startY } = (0, core_namespaceObject.parseBoxToScreenCoords)({
boxStr: startBoxStr,
screenWidth,
screenHeight
});
logger.info(`[NutjsOperator Position]: (${startX}, ${startY})`);
nut_js_namespaceObject.mouse.config.mouseSpeed = 3600;
const getHotkeys = (keyStr)=>{
if (keyStr) {
const platformCommandKey = 'darwin' === process.platform ? nut_js_namespaceObject.Key.LeftCmd : nut_js_namespaceObject.Key.LeftWin;
const platformCtrlKey = 'darwin' === process.platform ? nut_js_namespaceObject.Key.LeftCmd : nut_js_namespaceObject.Key.LeftControl;
const keyMap = {
return: nut_js_namespaceObject.Key.Enter,
ctrl: platformCtrlKey,
shift: nut_js_namespaceObject.Key.LeftShift,
alt: nut_js_namespaceObject.Key.LeftAlt,
'page down': nut_js_namespaceObject.Key.PageDown,
'page up': nut_js_namespaceObject.Key.PageUp,
meta: platformCommandKey,
win: platformCommandKey,
command: platformCommandKey,
cmd: platformCommandKey,
',': nut_js_namespaceObject.Key.Comma,
arrowup: nut_js_namespaceObject.Key.Up,
arrowdown: nut_js_namespaceObject.Key.Down,
arrowleft: nut_js_namespaceObject.Key.Left,
arrowright: nut_js_namespaceObject.Key.Right
};
const lowercaseKeyMap = Object.fromEntries(Object.entries(nut_js_namespaceObject.Key).map(([k, v])=>[
k.toLowerCase(),
v
]));
const keys = keyStr.split(/[\s+]/).map((k)=>k.toLowerCase()).map((k)=>keyMap[k] ?? lowercaseKeyMap[k]).filter(Boolean);
logger.info('[NutjsOperator] hotkey: ', keys);
return keys;
}
logger.error('[NutjsOperator] hotkey error: ', `${keyStr} is not a valid key`);
return [];
};
switch(action_type){
case 'wait':
logger.info('[NutjsOperator] wait', action_inputs);
await (0, nut_js_namespaceObject.sleep)(5000);
break;
case 'mouse_move':
case 'hover':
logger.info('[NutjsOperator] mouse_move');
await moveStraightTo(startX, startY);
break;
case 'click':
case 'left_click':
case 'left_single':
logger.info('[NutjsOperator] left_click');
await moveStraightTo(startX, startY);
await (0, nut_js_namespaceObject.sleep)(100);
await nut_js_namespaceObject.mouse.click(nut_js_namespaceObject.Button.LEFT);
break;
case 'left_double':
case 'double_click':
logger.info(`[NutjsOperator] ${action_type}(${startX}, ${startY})`);
await moveStraightTo(startX, startY);
await (0, nut_js_namespaceObject.sleep)(100);
await nut_js_namespaceObject.mouse.doubleClick(nut_js_namespaceObject.Button.LEFT);
break;
case 'right_click':
case 'right_single':
logger.info('[NutjsOperator] right_click');
await moveStraightTo(startX, startY);
await (0, nut_js_namespaceObject.sleep)(100);
await nut_js_namespaceObject.mouse.click(nut_js_namespaceObject.Button.RIGHT);
break;
case 'middle_click':
logger.info('[NutjsOperator] middle_click');
await moveStraightTo(startX, startY);
await nut_js_namespaceObject.mouse.click(nut_js_namespaceObject.Button.MIDDLE);
break;
case 'left_click_drag':
case 'drag':
case 'select':
logger.info('[NutjsOperator] drag', action_inputs);
if (null == action_inputs ? void 0 : action_inputs.end_box) {
const { x: endX, y: endY } = (0, core_namespaceObject.parseBoxToScreenCoords)({
boxStr: action_inputs.end_box,
screenWidth,
screenHeight
});
if (startX && startY && endX && endY) {
const diffX = external_big_js_default()(endX).minus(startX).toNumber();
const diffY = external_big_js_default()(endY).minus(startY).toNumber();
await nut_js_namespaceObject.mouse.drag((0, nut_js_namespaceObject.straightTo)((0, nut_js_namespaceObject.centerOf)(new nut_js_namespaceObject.Region(startX, startY, diffX, diffY))));
}
}
break;
case 'type':
{
var _action_inputs_content;
const content = null == (_action_inputs_content = action_inputs.content) ? void 0 : _action_inputs_content.trim();
logger.info('[NutjsOperator] type', content);
if (content) {
const stripContent = content.replace(/\\n$/, '').replace(/\n$/, '');
nut_js_namespaceObject.keyboard.config.autoDelayMs = 0;
if ('win32' === process.platform) {
const originalClipboard = await nut_js_namespaceObject.clipboard.getContent();
await nut_js_namespaceObject.clipboard.setContent(stripContent);
await nut_js_namespaceObject.keyboard.pressKey(nut_js_namespaceObject.Key.LeftControl, nut_js_namespaceObject.Key.V);
await (0, nut_js_namespaceObject.sleep)(50);
await nut_js_namespaceObject.keyboard.releaseKey(nut_js_namespaceObject.Key.LeftControl, nut_js_namespaceObject.Key.V);
await (0, nut_js_namespaceObject.sleep)(50);
await nut_js_namespaceObject.clipboard.setContent(originalClipboard);
} else await nut_js_namespaceObject.keyboard.type(stripContent);
if (content.endsWith('\n') || content.endsWith('\\n')) {
await nut_js_namespaceObject.keyboard.pressKey(nut_js_namespaceObject.Key.Enter);
await nut_js_namespaceObject.keyboard.releaseKey(nut_js_namespaceObject.Key.Enter);
}
nut_js_namespaceObject.keyboard.config.autoDelayMs = 500;
}
break;
}
case 'hotkey':
{
const keyStr = (null == action_inputs ? void 0 : action_inputs.key) || (null == action_inputs ? void 0 : action_inputs.hotkey);
const keys = getHotkeys(keyStr);
if (keys.length > 0) {
await nut_js_namespaceObject.keyboard.pressKey(...keys);
await nut_js_namespaceObject.keyboard.releaseKey(...keys);
}
break;
}
case 'press':
{
const keyStr = (null == action_inputs ? void 0 : action_inputs.key) || (null == action_inputs ? void 0 : action_inputs.hotkey);
const keys = getHotkeys(keyStr);
if (keys.length > 0) await nut_js_namespaceObject.keyboard.pressKey(...keys);
break;
}
case 'release':
{
const keyStr = (null == action_inputs ? void 0 : action_inputs.key) || (null == action_inputs ? void 0 : action_inputs.hotkey);
const keys = getHotkeys(keyStr);
if (keys.length > 0) await nut_js_namespaceObject.keyboard.releaseKey(...keys);
break;
}
case 'scroll':
{
const { direction } = action_inputs;
if (null !== startX && null !== startY) await moveStraightTo(startX, startY);
switch(null == direction ? void 0 : direction.toLowerCase()){
case 'up':
await nut_js_namespaceObject.mouse.scrollUp(500);
break;
case 'down':
await nut_js_namespaceObject.mouse.scrollDown(500);
break;
default:
console.warn(`[NutjsOperator] Unsupported scroll direction: ${direction}`);
}
break;
}
case 'error_env':
case 'call_user':
case 'finished':
case 'user_stop':
return {
status: core_namespaceObject.StatusEnum.END
};
default:
logger.warn(`Unsupported action: ${action_type}`);
}
}
}
_define_property(NutJSOperator, "MANUAL", {
ACTION_SPACES: [
"click(start_box='[x1, y1, x2, y2]')",
"left_double(start_box='[x1, y1, x2, y2]')",
"right_single(start_box='[x1, y1, x2, y2]')",
"drag(start_box='[x1, y1, x2, y2]', end_box='[x3, y3, x4, y4]')",
"hotkey(key='')",
"type(content='') #If you want to submit your input, use \"\\n\" at the end of `content`.",
"scroll(start_box='[x1, y1, x2, y2]', direction='down or up or right or left')",
"wait() #Sleep for 5s and take a screenshot to check for any changes.",
"finished()",
"call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help."
]
});
exports.NutJSOperator = __webpack_exports__.NutJSOperator;
for(var __webpack_i__ in __webpack_exports__)if (-1 === [
"NutJSOperator"
].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
Object.defineProperty(exports, '__esModule', {
value: true
});
//# sourceMappingURL=index.js.map