UNPKG

@ui-tars/operator-nut-js

Version:
256 lines (255 loc) 10.2 kB
"use strict"; var __create = Object.create; var __defProp = Object.defineProperty; var __getOwnPropDesc = Object.getOwnPropertyDescriptor; var __getOwnPropNames = Object.getOwnPropertyNames; var __getProtoOf = Object.getPrototypeOf; var __hasOwnProp = Object.prototype.hasOwnProperty; var __export = (target, all) => { for (var name in all) __defProp(target, name, { get: all[name], enumerable: true }); }; var __copyProps = (to, from, except, desc) => { if (from && typeof from === "object" || typeof from === "function") { for (let key of __getOwnPropNames(from)) if (!__hasOwnProp.call(to, key) && key !== except) __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable }); } return to; }; var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps( // If the importer is in node compatibility mode or this is not an ESM // file that has been converted to a CommonJS file using a Babel- // compatible transform (i.e. "__esModule" has not been set), then set // "default" to the CommonJS "module.exports" for node compatibility. isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target, mod )); var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod); var index_exports = {}; __export(index_exports, { NutJSOperator: () => NutJSOperator }); module.exports = __toCommonJS(index_exports); var import_core = require("@ui-tars/sdk/core"); var import_jimp = require("jimp"); var import_nut_js = require("@computer-use/nut-js"); var import_big = __toESM(require("big.js")); const moveStraightTo = async (startX, startY) => { if (startX === null || startY === null) { return; } await import_nut_js.mouse.move((0, import_nut_js.straightTo)(new import_nut_js.Point(startX, startY))); }; class NutJSOperator extends import_core.Operator { static MANUAL = { ACTION_SPACES: [ `click(start_box='[x1, y1, x2, y2]')`, `left_double(start_box='[x1, y1, x2, y2]')`, `right_single(start_box='[x1, y1, x2, y2]')`, `drag(start_box='[x1, y1, x2, y2]', end_box='[x3, y3, x4, y4]')`, `hotkey(key='')`, `type(content='') #If you want to submit your input, use "\\n" at the end of \`content\`.`, `scroll(start_box='[x1, y1, x2, y2]', direction='down or up or right or left')`, `wait() #Sleep for 5s and take a screenshot to check for any changes.`, `finished()`, `call_user() # Submit the task and call the user when the task is unsolvable, or when you need the user's help.` ] }; async screenshot() { const { logger } = (0, import_core.useContext)(); const grabImage = await import_nut_js.screen.grab(); const screenWithScale = await grabImage.toRGB(); const scaleFactor = screenWithScale.pixelDensity.scaleX; logger.info( "[NutjsOperator]", "scaleX", screenWithScale.pixelDensity.scaleX, "scaleY", screenWithScale.pixelDensity.scaleY ); const screenWithScaleImage = await import_jimp.Jimp.fromBitmap({ width: screenWithScale.width, height: screenWithScale.height, data: Buffer.from(screenWithScale.data) }); const width = screenWithScale.width / screenWithScale.pixelDensity.scaleX; const height = screenWithScale.height / screenWithScale.pixelDensity.scaleY; const realScreenImage = await screenWithScaleImage.resize({ w: width, h: height }).getBuffer("image/png", { quality: 75 }); const output = { base64: realScreenImage.toString("base64"), width, height, scaleFactor }; logger?.info( `[NutjsOperator] screenshot: ${output.width}x${output.height}, scaleFactor: ${scaleFactor}` ); return output; } async execute(params) { const { logger } = (0, import_core.useContext)(); const { parsedPrediction, screenWidth, screenHeight, scaleFactor, factors } = params; const { action_type, action_inputs } = parsedPrediction; const startBoxStr = action_inputs?.start_box || ""; logger.info("[NutjsOperator] execute", scaleFactor); const { x: startX, y: startY } = (0, import_core.parseBoxToScreenCoords)({ boxStr: startBoxStr, screenWidth, screenHeight, factors }); logger.info(`[NutjsOperator Position]: (${startX}, ${startY})`); import_nut_js.mouse.config.mouseSpeed = 3600; switch (action_type) { case "wait": logger.info("[NutjsOperator] wait", action_inputs); await (0, import_nut_js.sleep)(1e3); break; case "mouse_move": case "hover": logger.info("[NutjsOperator] mouse_move"); await moveStraightTo(startX, startY); break; case "click": case "left_click": case "left_single": logger.info("[NutjsOperator] left_click"); await moveStraightTo(startX, startY); await (0, import_nut_js.sleep)(100); await import_nut_js.mouse.click(import_nut_js.Button.LEFT); break; case "left_double": case "double_click": logger.info(`[NutjsOperator] ${action_type}(${startX}, ${startY})`); await moveStraightTo(startX, startY); await (0, import_nut_js.sleep)(100); await import_nut_js.mouse.doubleClick(import_nut_js.Button.LEFT); break; case "right_click": case "right_single": logger.info("[NutjsOperator] right_click"); await moveStraightTo(startX, startY); await (0, import_nut_js.sleep)(100); await import_nut_js.mouse.click(import_nut_js.Button.RIGHT); break; case "middle_click": logger.info("[NutjsOperator] middle_click"); await moveStraightTo(startX, startY); await import_nut_js.mouse.click(import_nut_js.Button.MIDDLE); break; case "left_click_drag": case "drag": case "select": { logger.info("[NutjsOperator] drag", action_inputs); if (action_inputs?.end_box) { const { x: endX, y: endY } = (0, import_core.parseBoxToScreenCoords)({ boxStr: action_inputs.end_box, screenWidth, screenHeight }); if (startX && startY && endX && endY) { const diffX = (0, import_big.default)(endX).minus(startX).toNumber(); const diffY = (0, import_big.default)(endY).minus(startY).toNumber(); await import_nut_js.mouse.drag( (0, import_nut_js.straightTo)((0, import_nut_js.centerOf)(new import_nut_js.Region(startX, startY, diffX, diffY))) ); } } break; } case "type": { const content = action_inputs.content?.trim(); logger.info("[NutjsOperator] type", content); if (content) { const stripContent = content.replace(/\\n$/, "").replace(/\n$/, ""); import_nut_js.keyboard.config.autoDelayMs = 0; if (process.platform === "win32") { const originalClipboard = await import_nut_js.clipboard.getContent(); await import_nut_js.clipboard.setContent(stripContent); await import_nut_js.keyboard.pressKey(import_nut_js.Key.LeftControl, import_nut_js.Key.V); await (0, import_nut_js.sleep)(50); await import_nut_js.keyboard.releaseKey(import_nut_js.Key.LeftControl, import_nut_js.Key.V); await (0, import_nut_js.sleep)(50); await import_nut_js.clipboard.setContent(originalClipboard); } else { await import_nut_js.keyboard.type(stripContent); } if (content.endsWith("\n") || content.endsWith("\\n")) { await import_nut_js.keyboard.pressKey(import_nut_js.Key.Enter); await import_nut_js.keyboard.releaseKey(import_nut_js.Key.Enter); } import_nut_js.keyboard.config.autoDelayMs = 500; } break; } case "hotkey": { const keyStr = action_inputs?.key || action_inputs?.hotkey; if (keyStr) { const platformCommandKey = process.platform === "darwin" ? import_nut_js.Key.LeftCmd : import_nut_js.Key.LeftWin; const keyMap = { return: import_nut_js.Key.Enter, enter: import_nut_js.Key.Enter, ctrl: import_nut_js.Key.LeftControl, shift: import_nut_js.Key.LeftShift, alt: import_nut_js.Key.LeftAlt, space: import_nut_js.Key.Space, "page down": import_nut_js.Key.PageDown, pagedown: import_nut_js.Key.PageDown, "page up": import_nut_js.Key.PageUp, pageup: import_nut_js.Key.PageUp, meta: platformCommandKey, win: platformCommandKey, command: platformCommandKey, cmd: platformCommandKey }; const keys = keyStr.split(/[\s+]/).map( (k) => keyMap[k.toLowerCase()] || import_nut_js.Key[k.toUpperCase()] ); logger.info("[NutjsOperator] hotkey: ", keys); await import_nut_js.keyboard.pressKey(...keys); await import_nut_js.keyboard.releaseKey(...keys); } break; } case "scroll": { const { direction } = action_inputs; if (startX !== null && startY !== null) { await moveStraightTo(startX, startY); } switch (direction?.toLowerCase()) { case "up": await import_nut_js.mouse.scrollUp(5 * 100); break; case "down": await import_nut_js.mouse.scrollDown(5 * 100); break; default: console.warn( `[NutjsOperator] Unsupported scroll direction: ${direction}` ); } break; } case "error_env": case "call_user": case "finished": return { status: import_core.StatusEnum.END }; default: logger.warn(`Unsupported action: ${action_type}`); } } } // Annotate the CommonJS export names for ESM import in node: 0 && (module.exports = { NutJSOperator });