@gui-agent/operator-adb
Version:
Android operator based ADB for GUI Agent
296 lines (295 loc) • 11.5 kB
JavaScript
/**
* Copyright (c) 2025 Bytedance, Inc. and its affiliates.
* SPDX-License-Identifier: Apache-2.0
*/
import node_fs from "node:fs";
import node_path from "node:path";
import node_os from "node:os";
import { exec } from "node:child_process";
import { promisify } from "node:util";
import { Operator } from "@gui-agent/shared/base";
import { ConsoleLogger, LogLevel } from "@agent-infra/logger";
import { ADB } from "appium-adb";
function _define_property(obj, key, value) {
if (key in obj) Object.defineProperty(obj, key, {
value: value,
enumerable: true,
configurable: true,
writable: true
});
else obj[key] = value;
return obj;
}
const defaultLogger = new ConsoleLogger(void 0, LogLevel.DEBUG);
const yadbCommand = 'app_process -Djava.class.path=/data/local/tmp/yadb /data/local/tmp com.ysbing.yadb.Main';
const screenshotPathOnAndroid = '/data/local/tmp/ui_tars_screenshot.png';
const screenshotPathOnLocal = node_path.join(node_os.homedir(), 'Downloads', 'ui_tars_screenshot.png');
class AdbOperator extends Operator {
async initialize() {
this._deviceId = await this.getConnectedDevices();
this._adb = await ADB.createADB({
udid: this._deviceId,
adbExecTimeout: 60000
});
this._screenContext = await this.calculateScreenContext(this._adb);
}
supportedActions() {
throw new Error('Method not implemented.');
}
screenContext() {
if (!this._screenContext) throw new Error('The Operator not initialized');
return this._screenContext;
}
async screenshot() {
if (!this._adb) throw new Error('The Operator not initialized');
return await this.screenshotWithFallback();
}
async execute(params) {
const { actions } = params;
for (const action of actions){
this.logger.info('execute action', action);
await this.singleActionExecutor(action);
}
return {
status: 'success'
};
}
async singleActionExecutor(action) {
const { type: actionType, inputs: actionInputs } = action;
switch(actionType){
case 'click':
{
const { point } = actionInputs;
if (!point) throw new Error('point is required when click');
const { realX, realY } = await this.calculateRealCoords(point);
await this.handleClick(realX, realY);
break;
}
case 'long_press':
{
const { point } = actionInputs;
if (!point) throw new Error('point is required when click');
const { realX, realY } = await this.calculateRealCoords(point);
this.handleSwipe({
x: realX,
y: realY
}, {
x: realX,
y: realY
}, 1500);
break;
}
case 'swipe':
case 'drag':
{
const { start: startPoint, end: endPoint } = actionInputs;
if (!startPoint) throw new Error('start point is required when swipe/drag');
if (!endPoint) throw new Error('end point is required when swipe/drag');
const { realX: startX, realY: startY } = await this.calculateRealCoords(startPoint);
const { realX: endX, realY: endY } = await this.calculateRealCoords(endPoint);
this.handleSwipe({
x: startX,
y: startY
}, {
x: endX,
y: endY
}, 300);
break;
}
case 'scroll':
{
const { direction, point } = actionInputs;
if (!direction) throw new Error("Direction required when scroll");
this.handleScroll(direction, point);
break;
}
case 'type':
{
const { content } = actionInputs;
this.handleType(content);
break;
}
case 'hotkey':
{
const { key } = actionInputs;
await this.handleHotkey(key);
break;
}
case 'open_app':
throw new Error('The device does NOT support open app directly');
case 'home':
case 'press_home':
await this.handleHotkey('home');
break;
case 'back':
case 'press_back':
await this.handleHotkey('back');
break;
default:
this.logger.warn(`[AdbOperator] Unsupported action: ${actionType}`);
throw new Error(`Unsupported action: ${actionType}`);
}
}
async calculateRealCoords(coords) {
if (!coords.normalized) {
if (!coords.raw) throw new Error('Invalide coordinates');
return {
realX: coords.raw.x,
realY: coords.raw.y
};
}
const screenContext = await this.getScreenContext();
return {
realX: coords.normalized.x * screenContext.screenWidth * screenContext.scaleX,
realY: coords.normalized.y * screenContext.screenHeight * screenContext.scaleY
};
}
async getConnectedDevices() {
const execPromise = promisify(exec);
try {
const { stdout } = await execPromise('adb devices');
const devices = stdout.split('\n').slice(1).map((line)=>{
const [id, status] = line.split('\t');
return {
id,
status
};
}).filter(({ id, status })=>id && status && 'device' === status.trim()).map(({ id })=>id);
if (0 === devices.length) throw new Error('No available Android devices found');
if (devices.length > 1) this.logger.warn(`Multiple devices detected: ${devices.join(',')}. Using the first: ${devices[0]}`);
return devices[0];
} catch (error) {
this.logger.error('Failed to get devices:', error);
throw error;
}
}
async calculateScreenContext(adb) {
const screenSize = await adb.getScreenSize();
this.logger.debug('getScreenSize', screenSize);
if (!screenSize) throw new Error('Unable to get screenSize');
const match = screenSize.match(/(\d+)x(\d+)/);
if (!match || match.length < 3) throw new Error(`Unable to parse screenSize: ${screenSize}`);
const width = Number.parseInt(match[1], 10);
const height = Number.parseInt(match[2], 10);
const densityNum = await adb.getScreenDensity();
this.logger.debug('getScreenDensity', densityNum);
const deviceRatio = Number(densityNum) / 160;
this.logger.debug('deviceRatio', deviceRatio);
const adjustedSize = this.reverseAdjustCoordinates(deviceRatio, width, height);
this.logger.debug('adjustedWidth', adjustedSize);
return {
screenWidth: width,
screenHeight: height,
scaleX: 1,
scaleY: 1
};
}
reverseAdjustCoordinates(ratio, x, y) {
return {
x: Math.round(x / ratio),
y: Math.round(y / ratio)
};
}
async screenshotWithFallback() {
let screenshotBuffer;
try {
screenshotBuffer = await this._adb.takeScreenshot(null);
} catch (error) {
this.logger.warn('screenshotWithFallback', error.message);
try {
const result = await this._adb.shell(`screencap -p ${screenshotPathOnAndroid}`);
this.logger.debug('screenshotWithFallback result of screencap:', result);
} catch (error) {
await this.executeWithYadb(`-screenshot ${screenshotPathOnAndroid}`);
}
await this._adb.pull(screenshotPathOnAndroid, screenshotPathOnLocal);
screenshotBuffer = await node_fs.promises.readFile(screenshotPathOnLocal);
}
const base64 = screenshotBuffer.toString('base64');
return {
status: 'success',
base64
};
}
async handleClick(x, y) {
await this._adb.shell(`input tap ${x} ${y}`);
}
async handleType(text) {
if (!text) throw new Error('The content of type is empty');
const isChinese = /[\p{Script=Han}\p{sc=Hani}]/u.test(text);
if (!isChinese) return void await this._adb.inputText(text);
await this.executeWithYadb(`-keyboard "${text}"`);
}
async handleHotkey(keyStr) {
if (!keyStr) throw new Error('The hotkey is empty');
const keyMap = {
home: 3,
back: 4,
menu: 82,
power: 26,
volume_up: 24,
volumeup: 24,
volume_down: 25,
volumedown: 25,
mute: 164,
enter: 66,
delete: 112,
lock: 26
};
const keyCode = keyMap[keyStr.toLowerCase()];
if (!keyCode) throw new Error(`Unsupported key: ${keyStr}`);
this._adb.keyevent(keyCode);
}
async handleSwipe(from, to, duration) {
await this._adb.shell(`input swipe ${from.x} ${from.y} ${to.x} ${to.y} ${duration}`);
}
async handleScroll(direction, point) {
const screenContext = await this.getScreenContext();
let startX = screenContext.screenWidth / 2;
let startY = screenContext.screenHeight / 2;
if (point) {
const { realX, realY } = await this.calculateRealCoords(point);
startX = realX;
startY = realY;
}
let endX = startX;
let endY = startY;
switch(direction.toLowerCase()){
case 'up':
endY -= 200;
break;
case 'down':
endY += 200;
break;
case 'left':
endX -= 200;
break;
case 'right':
endX += 200;
break;
default:
throw new Error(`Unsupported scroll direction: ${direction}`);
}
this.handleSwipe({
x: startX,
y: startY
}, {
x: endX,
y: endY
}, 300);
}
async executeWithYadb(subCommand) {
if (!this._hasPushedYadb) {
const yadbBin = node_path.join(__dirname, '../bin/yadb');
await this._adb.push(yadbBin, '/data/local/tmp');
this._hasPushedYadb = true;
}
await this._adb.shell(`${yadbCommand} ${subCommand}`);
}
constructor(logger = defaultLogger){
super(), _define_property(this, "logger", void 0), _define_property(this, "_deviceId", null), _define_property(this, "_adb", null), _define_property(this, "_hasPushedYadb", false), _define_property(this, "_screenContext", null);
this.logger = logger.spawn('[AdbOperator]');
}
}
export { AdbOperator };
//# sourceMappingURL=AdbOperator.mjs.map