@ui-tars/operator-nut-js
Version:
Operator Nut JS SDK for UI-TARS
239 lines (238 loc) • 7.74 kB
JavaScript
import {
Operator,
useContext,
parseBoxToScreenCoords,
StatusEnum
} from "@ui-tars/sdk/core";
import { Jimp } from "jimp";
import {
screen,
Button,
Key,
Point,
Region,
centerOf,
keyboard,
mouse,
sleep,
straightTo,
clipboard
} from "@computer-use/nut-js";
import Big from "big.js";
const moveStraightTo = async (startX, startY) => {
if (startX === null || startY === null) {
return;
}
await mouse.move(straightTo(new Point(startX, startY)));
};
class NutJSOperator extends Operator {
static MANUAL = {
ACTION_SPACES: [
`click(start_box='[x1, y1, x2, y2]')`,
`left_double(start_box='[x1, y1, x2, y2]')`,
`right_single(start_box='[x1, y1, x2, y2]')`,
`drag(start_box='[x1, y1, x2, y2]', end_box='[x3, y3, x4, y4]')`,
`hotkey(key='')`,
`type(content='') #If you want to submit your input, use "\\n" at the end of \`content\`.`,
`scroll(start_box='[x1, y1, x2, y2]', direction='down or up or right or left')`,
`wait() #Sleep for 5s and take a screenshot to check for any changes.`,
`finished()`,
`call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.`
]
};
async screenshot() {
const { logger } = useContext();
const grabImage = await screen.grab();
const screenWithScale = await grabImage.toRGB();
const scaleFactor = screenWithScale.pixelDensity.scaleX;
logger.info(
"[NutjsOperator]",
"scaleX",
screenWithScale.pixelDensity.scaleX,
"scaleY",
screenWithScale.pixelDensity.scaleY
);
const screenWithScaleImage = await Jimp.fromBitmap({
width: screenWithScale.width,
height: screenWithScale.height,
data: Buffer.from(screenWithScale.data)
});
const width = screenWithScale.width / screenWithScale.pixelDensity.scaleX;
const height = screenWithScale.height / screenWithScale.pixelDensity.scaleY;
const realScreenImage = await screenWithScaleImage.resize({
w: width,
h: height
}).getBuffer("image/png", { quality: 75 });
const output = {
base64: realScreenImage.toString("base64"),
width,
height,
scaleFactor
};
logger?.info(
`[NutjsOperator] screenshot: ${output.width}x${output.height}, scaleFactor: ${scaleFactor}`
);
return output;
}
async execute(params) {
const { logger } = useContext();
const {
parsedPrediction,
screenWidth,
screenHeight,
scaleFactor,
factors
} = params;
const { action_type, action_inputs } = parsedPrediction;
const startBoxStr = action_inputs?.start_box || "";
logger.info("[NutjsOperator] execute", scaleFactor);
const { x: startX, y: startY } = parseBoxToScreenCoords({
boxStr: startBoxStr,
screenWidth,
screenHeight,
factors
});
logger.info(`[NutjsOperator Position]: (${startX}, ${startY})`);
mouse.config.mouseSpeed = 3600;
switch (action_type) {
case "wait":
logger.info("[NutjsOperator] wait", action_inputs);
await sleep(1e3);
break;
case "mouse_move":
case "hover":
logger.info("[NutjsOperator] mouse_move");
await moveStraightTo(startX, startY);
break;
case "click":
case "left_click":
case "left_single":
logger.info("[NutjsOperator] left_click");
await moveStraightTo(startX, startY);
await sleep(100);
await mouse.click(Button.LEFT);
break;
case "left_double":
case "double_click":
logger.info(`[NutjsOperator] ${action_type}(${startX}, ${startY})`);
await moveStraightTo(startX, startY);
await sleep(100);
await mouse.doubleClick(Button.LEFT);
break;
case "right_click":
case "right_single":
logger.info("[NutjsOperator] right_click");
await moveStraightTo(startX, startY);
await sleep(100);
await mouse.click(Button.RIGHT);
break;
case "middle_click":
logger.info("[NutjsOperator] middle_click");
await moveStraightTo(startX, startY);
await mouse.click(Button.MIDDLE);
break;
case "left_click_drag":
case "drag":
case "select": {
logger.info("[NutjsOperator] drag", action_inputs);
if (action_inputs?.end_box) {
const { x: endX, y: endY } = parseBoxToScreenCoords({
boxStr: action_inputs.end_box,
screenWidth,
screenHeight
});
if (startX && startY && endX && endY) {
const diffX = Big(endX).minus(startX).toNumber();
const diffY = Big(endY).minus(startY).toNumber();
await mouse.drag(
straightTo(centerOf(new Region(startX, startY, diffX, diffY)))
);
}
}
break;
}
case "type": {
const content = action_inputs.content?.trim();
logger.info("[NutjsOperator] type", content);
if (content) {
const stripContent = content.replace(/\\n$/, "").replace(/\n$/, "");
keyboard.config.autoDelayMs = 0;
if (process.platform === "win32") {
const originalClipboard = await clipboard.getContent();
await clipboard.setContent(stripContent);
await keyboard.pressKey(Key.LeftControl, Key.V);
await sleep(50);
await keyboard.releaseKey(Key.LeftControl, Key.V);
await sleep(50);
await clipboard.setContent(originalClipboard);
} else {
await keyboard.type(stripContent);
}
if (content.endsWith("\n") || content.endsWith("\\n")) {
await keyboard.pressKey(Key.Enter);
await keyboard.releaseKey(Key.Enter);
}
keyboard.config.autoDelayMs = 500;
}
break;
}
case "hotkey": {
const keyStr = action_inputs?.key || action_inputs?.hotkey;
if (keyStr) {
const platformCommandKey = process.platform === "darwin" ? Key.LeftCmd : Key.LeftWin;
const keyMap = {
return: Key.Enter,
enter: Key.Enter,
ctrl: Key.LeftControl,
shift: Key.LeftShift,
alt: Key.LeftAlt,
space: Key.Space,
"page down": Key.PageDown,
pagedown: Key.PageDown,
"page up": Key.PageUp,
pageup: Key.PageUp,
meta: platformCommandKey,
win: platformCommandKey,
command: platformCommandKey,
cmd: platformCommandKey
};
const keys = keyStr.split(/[\s+]/).map(
(k) => keyMap[k.toLowerCase()] || Key[k.toUpperCase()]
);
logger.info("[NutjsOperator] hotkey: ", keys);
await keyboard.pressKey(...keys);
await keyboard.releaseKey(...keys);
}
break;
}
case "scroll": {
const { direction } = action_inputs;
if (startX !== null && startY !== null) {
await moveStraightTo(startX, startY);
}
switch (direction?.toLowerCase()) {
case "up":
await mouse.scrollUp(5 * 100);
break;
case "down":
await mouse.scrollDown(5 * 100);
break;
default:
console.warn(
`[NutjsOperator] Unsupported scroll direction: ${direction}`
);
}
break;
}
case "error_env":
case "call_user":
case "finished":
return { status: StatusEnum.END };
default:
logger.warn(`Unsupported action: ${action_type}`);
}
}
}
export {
NutJSOperator
};