@ui-tars/operator-nut-js
Version:
Operator Nut JS SDK for UI-TARS
256 lines (255 loc) • 10.2 kB
JavaScript
;
var __create = Object.create;
var __defProp = Object.defineProperty;
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
var __getOwnPropNames = Object.getOwnPropertyNames;
var __getProtoOf = Object.getPrototypeOf;
var __hasOwnProp = Object.prototype.hasOwnProperty;
var __export = (target, all) => {
for (var name in all)
__defProp(target, name, { get: all[name], enumerable: true });
};
var __copyProps = (to, from, except, desc) => {
if (from && typeof from === "object" || typeof from === "function") {
for (let key of __getOwnPropNames(from))
if (!__hasOwnProp.call(to, key) && key !== except)
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
}
return to;
};
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
// If the importer is in node compatibility mode or this is not an ESM
// file that has been converted to a CommonJS file using a Babel-
// compatible transform (i.e. "__esModule" has not been set), then set
// "default" to the CommonJS "module.exports" for node compatibility.
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
mod
));
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
var index_exports = {};
__export(index_exports, {
NutJSOperator: () => NutJSOperator
});
module.exports = __toCommonJS(index_exports);
var import_core = require("@ui-tars/sdk/core");
var import_jimp = require("jimp");
var import_nut_js = require("@computer-use/nut-js");
var import_big = __toESM(require("big.js"));
const moveStraightTo = async (startX, startY) => {
if (startX === null || startY === null) {
return;
}
await import_nut_js.mouse.move((0, import_nut_js.straightTo)(new import_nut_js.Point(startX, startY)));
};
class NutJSOperator extends import_core.Operator {
static MANUAL = {
ACTION_SPACES: [
`click(start_box='[x1, y1, x2, y2]')`,
`left_double(start_box='[x1, y1, x2, y2]')`,
`right_single(start_box='[x1, y1, x2, y2]')`,
`drag(start_box='[x1, y1, x2, y2]', end_box='[x3, y3, x4, y4]')`,
`hotkey(key='')`,
`type(content='') #If you want to submit your input, use "\\n" at the end of \`content\`.`,
`scroll(start_box='[x1, y1, x2, y2]', direction='down or up or right or left')`,
`wait() #Sleep for 5s and take a screenshot to check for any changes.`,
`finished()`,
`call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.`
]
};
async screenshot() {
const { logger } = (0, import_core.useContext)();
const grabImage = await import_nut_js.screen.grab();
const screenWithScale = await grabImage.toRGB();
const scaleFactor = screenWithScale.pixelDensity.scaleX;
logger.info(
"[NutjsOperator]",
"scaleX",
screenWithScale.pixelDensity.scaleX,
"scaleY",
screenWithScale.pixelDensity.scaleY
);
const screenWithScaleImage = await import_jimp.Jimp.fromBitmap({
width: screenWithScale.width,
height: screenWithScale.height,
data: Buffer.from(screenWithScale.data)
});
const width = screenWithScale.width / screenWithScale.pixelDensity.scaleX;
const height = screenWithScale.height / screenWithScale.pixelDensity.scaleY;
const realScreenImage = await screenWithScaleImage.resize({
w: width,
h: height
}).getBuffer("image/png", { quality: 75 });
const output = {
base64: realScreenImage.toString("base64"),
width,
height,
scaleFactor
};
logger?.info(
`[NutjsOperator] screenshot: ${output.width}x${output.height}, scaleFactor: ${scaleFactor}`
);
return output;
}
async execute(params) {
const { logger } = (0, import_core.useContext)();
const {
parsedPrediction,
screenWidth,
screenHeight,
scaleFactor,
factors
} = params;
const { action_type, action_inputs } = parsedPrediction;
const startBoxStr = action_inputs?.start_box || "";
logger.info("[NutjsOperator] execute", scaleFactor);
const { x: startX, y: startY } = (0, import_core.parseBoxToScreenCoords)({
boxStr: startBoxStr,
screenWidth,
screenHeight,
factors
});
logger.info(`[NutjsOperator Position]: (${startX}, ${startY})`);
import_nut_js.mouse.config.mouseSpeed = 3600;
switch (action_type) {
case "wait":
logger.info("[NutjsOperator] wait", action_inputs);
await (0, import_nut_js.sleep)(1e3);
break;
case "mouse_move":
case "hover":
logger.info("[NutjsOperator] mouse_move");
await moveStraightTo(startX, startY);
break;
case "click":
case "left_click":
case "left_single":
logger.info("[NutjsOperator] left_click");
await moveStraightTo(startX, startY);
await (0, import_nut_js.sleep)(100);
await import_nut_js.mouse.click(import_nut_js.Button.LEFT);
break;
case "left_double":
case "double_click":
logger.info(`[NutjsOperator] ${action_type}(${startX}, ${startY})`);
await moveStraightTo(startX, startY);
await (0, import_nut_js.sleep)(100);
await import_nut_js.mouse.doubleClick(import_nut_js.Button.LEFT);
break;
case "right_click":
case "right_single":
logger.info("[NutjsOperator] right_click");
await moveStraightTo(startX, startY);
await (0, import_nut_js.sleep)(100);
await import_nut_js.mouse.click(import_nut_js.Button.RIGHT);
break;
case "middle_click":
logger.info("[NutjsOperator] middle_click");
await moveStraightTo(startX, startY);
await import_nut_js.mouse.click(import_nut_js.Button.MIDDLE);
break;
case "left_click_drag":
case "drag":
case "select": {
logger.info("[NutjsOperator] drag", action_inputs);
if (action_inputs?.end_box) {
const { x: endX, y: endY } = (0, import_core.parseBoxToScreenCoords)({
boxStr: action_inputs.end_box,
screenWidth,
screenHeight
});
if (startX && startY && endX && endY) {
const diffX = (0, import_big.default)(endX).minus(startX).toNumber();
const diffY = (0, import_big.default)(endY).minus(startY).toNumber();
await import_nut_js.mouse.drag(
(0, import_nut_js.straightTo)((0, import_nut_js.centerOf)(new import_nut_js.Region(startX, startY, diffX, diffY)))
);
}
}
break;
}
case "type": {
const content = action_inputs.content?.trim();
logger.info("[NutjsOperator] type", content);
if (content) {
const stripContent = content.replace(/\\n$/, "").replace(/\n$/, "");
import_nut_js.keyboard.config.autoDelayMs = 0;
if (process.platform === "win32") {
const originalClipboard = await import_nut_js.clipboard.getContent();
await import_nut_js.clipboard.setContent(stripContent);
await import_nut_js.keyboard.pressKey(import_nut_js.Key.LeftControl, import_nut_js.Key.V);
await (0, import_nut_js.sleep)(50);
await import_nut_js.keyboard.releaseKey(import_nut_js.Key.LeftControl, import_nut_js.Key.V);
await (0, import_nut_js.sleep)(50);
await import_nut_js.clipboard.setContent(originalClipboard);
} else {
await import_nut_js.keyboard.type(stripContent);
}
if (content.endsWith("\n") || content.endsWith("\\n")) {
await import_nut_js.keyboard.pressKey(import_nut_js.Key.Enter);
await import_nut_js.keyboard.releaseKey(import_nut_js.Key.Enter);
}
import_nut_js.keyboard.config.autoDelayMs = 500;
}
break;
}
case "hotkey": {
const keyStr = action_inputs?.key || action_inputs?.hotkey;
if (keyStr) {
const platformCommandKey = process.platform === "darwin" ? import_nut_js.Key.LeftCmd : import_nut_js.Key.LeftWin;
const keyMap = {
return: import_nut_js.Key.Enter,
enter: import_nut_js.Key.Enter,
ctrl: import_nut_js.Key.LeftControl,
shift: import_nut_js.Key.LeftShift,
alt: import_nut_js.Key.LeftAlt,
space: import_nut_js.Key.Space,
"page down": import_nut_js.Key.PageDown,
pagedown: import_nut_js.Key.PageDown,
"page up": import_nut_js.Key.PageUp,
pageup: import_nut_js.Key.PageUp,
meta: platformCommandKey,
win: platformCommandKey,
command: platformCommandKey,
cmd: platformCommandKey
};
const keys = keyStr.split(/[\s+]/).map(
(k) => keyMap[k.toLowerCase()] || import_nut_js.Key[k.toUpperCase()]
);
logger.info("[NutjsOperator] hotkey: ", keys);
await import_nut_js.keyboard.pressKey(...keys);
await import_nut_js.keyboard.releaseKey(...keys);
}
break;
}
case "scroll": {
const { direction } = action_inputs;
if (startX !== null && startY !== null) {
await moveStraightTo(startX, startY);
}
switch (direction?.toLowerCase()) {
case "up":
await import_nut_js.mouse.scrollUp(5 * 100);
break;
case "down":
await import_nut_js.mouse.scrollDown(5 * 100);
break;
default:
console.warn(
`[NutjsOperator] Unsupported scroll direction: ${direction}`
);
}
break;
}
case "error_env":
case "call_user":
case "finished":
return { status: import_core.StatusEnum.END };
default:
logger.warn(`Unsupported action: ${action_type}`);
}
}
}
// Annotate the CommonJS export names for ESM import in node:
0 && (module.exports = {
NutJSOperator
});