askui
Version:
Reliable, automated end-to-end-testing that depends on what is shown on your screen instead of the technology you are running on
958 lines (957 loc) • 37.5 kB
JavaScript
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
import { PC_KEY_VALUES, MODIFIER_KEY_VALUES, ANDROID_KEY_VALUES, } from '../../../../execution/dsl';
import { BaseAgentTool, ToolError } from './base';
import { ControlCommand, ControlCommandCode, InputEvent, Action, } from '../../../ui-control-commands';
import { Base64Image } from '../../../../utils/base_64_image/base-64-image';
import { AgentError } from './agent-errors';
export class OsAgentHandler {
constructor(AgentOsClient, screenDimensions, runtime) {
this.AgentOsClient = AgentOsClient;
this.screenDimensions = screenDimensions;
this.runtime = runtime;
this.targetResolution = { width: 1280, height: 800 };
this.paddingInfo = null;
this.screenDimensions = screenDimensions;
this.updatePaddingInfo();
}
updatePaddingInfo() {
const targetAspectRatio = this.targetResolution.width / this.targetResolution.height;
const screenAspectRatio = this.screenDimensions.width / this.screenDimensions.height;
let scaledWidth;
let scaledHeight;
let scaleFactor;
let padLeft = 0;
let padTop = 0;
if (targetAspectRatio > screenAspectRatio) {
scaleFactor = this.targetResolution.height / this.screenDimensions.height;
scaledWidth = Math.floor(this.screenDimensions.width * scaleFactor);
scaledHeight = this.targetResolution.height;
padLeft = Math.floor((this.targetResolution.width - scaledWidth) / 2);
}
else {
scaleFactor = this.targetResolution.width / this.screenDimensions.width;
scaledWidth = this.targetResolution.width;
scaledHeight = Math.floor(this.screenDimensions.height * scaleFactor);
padTop = Math.floor((this.targetResolution.height - scaledHeight) / 2);
}
this.paddingInfo = {
scaleFactor,
scaledWidth,
scaledHeight,
padLeft,
padTop,
};
}
// Add image support to act, an check for function overload in typescript.
static createInstance(AgentOsClient) {
return __awaiter(this, void 0, void 0, function* () {
const base64ImageString = yield AgentOsClient.getScreenshot();
const image_info = yield (yield Base64Image.fromString(base64ImageString)).getInfo();
const startingArguments = yield AgentOsClient.getStartingArguments();
const runtime = startingArguments['runtime'] === 'android' ? 'android' : 'desktop';
return new OsAgentHandler(AgentOsClient, {
width: image_info.width,
height: image_info.height,
}, runtime);
});
}
getTargetResolution() {
return this.targetResolution;
}
getScreenDimensions() {
return this.screenDimensions;
}
setTargetResolution(width, height) {
this.targetResolution = { width, height };
this.updatePaddingInfo();
}
takeScreenshot() {
return __awaiter(this, void 0, void 0, function* () {
const base64ImageString = yield this.AgentOsClient.getScreenshot();
const base64Image = yield Base64Image.fromString(base64ImageString);
const image_info = yield base64Image.getInfo();
this.screenDimensions = {
width: image_info.width,
height: image_info.height,
};
this.updatePaddingInfo();
const resized_image = yield base64Image.resizeWithSameAspectRatio(this.targetResolution.width, this.targetResolution.height);
return resized_image.toString(false);
});
}
scaleCoordinates(source, x, y) {
if (!this.paddingInfo) {
throw new ToolError('Padding information not initialized');
}
const { scaleFactor, scaledWidth, scaledHeight, padLeft, padTop, } = this.paddingInfo;
if (source === 'api') {
if (x > this.targetResolution.width || y > this.targetResolution.height || x < 0 || y < 0) {
throw new ToolError(`Coordinates ${x}, ${y} are outside screen bounds `
+ `(${this.targetResolution.width}x${this.targetResolution.height})`);
}
const adjustedX = x - padLeft;
const adjustedY = y - padTop;
if (adjustedX < 0 || adjustedX > scaledWidth || adjustedY < 0 || adjustedY > scaledHeight) {
throw new ToolError(`Coordinates ${x}, ${y} are outside the scaled image area `
+ `(${scaledWidth}x${scaledHeight} with padding ${padLeft},${padTop})`);
}
return [
Math.round(adjustedX / scaleFactor),
Math.round(adjustedY / scaleFactor),
];
}
const apiX = Math.round(x * scaleFactor) + padLeft;
const apiY = Math.round(y * scaleFactor) + padTop;
return [apiX, apiY];
}
requestControl(controlCommand) {
return __awaiter(this, void 0, void 0, function* () {
for (const action of controlCommand.actions) {
if (action.inputEvent === InputEvent.MOUSE_MOVE || action.inputEvent === InputEvent.MOUSE_SCROLL) {
[action.position.x, action.position.y] = this.scaleCoordinates('api', action.position.x, action.position.y);
}
}
yield this.AgentOsClient.requestControl(controlCommand);
});
}
mouseMove(x, y) {
return __awaiter(this, void 0, void 0, function* () {
const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.MOUSE_MOVE, { x, y }, '', {})]);
yield this.requestControl(controlCommand);
});
}
mouseClick(button, doubleClick) {
return __awaiter(this, void 0, void 0, function* () {
let action = InputEvent.MOUSE_CLICK_LEFT;
if (doubleClick) {
if (button === 'left') {
action = InputEvent.MOUSE_CLICK_DOUBLE_LEFT;
}
else if (button === 'right') {
action = InputEvent.MOUSE_CLICK_DOUBLE_RIGHT;
}
else if (button === 'middle') {
action = InputEvent.MOUSE_CLICK_DOUBLE_MIDDLE;
}
}
else if (button === 'right') {
action = InputEvent.MOUSE_CLICK_RIGHT;
}
else if (button === 'middle') {
action = InputEvent.MOUSE_CLICK_MIDDLE;
}
const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(action, { x: 0, y: 0 }, '', {})]);
yield this.requestControl(controlCommand);
});
}
mouseScroll(dx, dy) {
return __awaiter(this, void 0, void 0, function* () {
const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.MOUSE_SCROLL, { x: dx, y: dy }, '', {})]);
yield this.requestControl(controlCommand);
});
}
mouseHoldLeftButtonDown() {
return __awaiter(this, void 0, void 0, function* () {
const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.MOUSE_DOWN, { x: 0, y: 0 }, '', {})]);
yield this.requestControl(controlCommand);
});
}
mouseReleaseLeftButton() {
return __awaiter(this, void 0, void 0, function* () {
const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.MOUSE_UP, { x: 0, y: 0 }, '', {})]);
yield this.requestControl(controlCommand);
});
}
desktopKeyPressAndRelease(key_1) {
return __awaiter(this, arguments, void 0, function* (key, modifiers = []) {
let keyString = key;
if (modifiers.length > 0) {
keyString = `${modifiers.join('+')}+${key}`;
}
const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.PRESS_KEY_SEQUENCE, { x: 0, y: 0 }, keyString, {})]);
yield this.requestControl(controlCommand);
});
}
desktopKeyHoldDown(key_1) {
return __awaiter(this, arguments, void 0, function* (key, modifiers = []) {
const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.KEY_PRESS, { x: 0, y: 0 }, '', {
key,
modifiers,
})]);
yield this.requestControl(controlCommand);
});
}
desktopKeyRelease(key_1) {
return __awaiter(this, arguments, void 0, function* (key, modifiers = []) {
const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.KEY_RELEASE, { x: 0, y: 0 }, '', {
key,
modifiers,
})]);
yield this.requestControl(controlCommand);
});
}
typeText(text) {
return __awaiter(this, void 0, void 0, function* () {
const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.TYPE, { x: 0, y: 0 }, text, {})]);
yield this.requestControl(controlCommand);
});
}
androidKeyPress(key) {
return __awaiter(this, void 0, void 0, function* () {
const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.PRESS_ANDROID_SINGLE_KEY, { x: 0, y: 0 }, key, {})]);
yield this.requestControl(controlCommand);
});
}
androidKeySequencePress(keys) {
return __awaiter(this, void 0, void 0, function* () {
const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.PRESS_ANDROID_KEY_SEQUENCE, { x: 0, y: 0 }, keys.join(' '), {})]);
yield this.requestControl(controlCommand);
});
}
executeShellCommand(command) {
return __awaiter(this, void 0, void 0, function* () {
const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.EXECUTE_COMMAND, { x: 0, y: 0 }, command, {})]);
yield this.requestControl(controlCommand);
});
}
AndroidSwipeTool(startX, startY, endX, endY) {
return __awaiter(this, void 0, void 0, function* () {
if (this.runtime !== 'android') {
throw new ToolError('This tool is only available on Android devices');
}
[startX, startY] = this.scaleCoordinates('api', startX, startY);
[endX, endY] = this.scaleCoordinates('api', endX, endY);
const adbCommand = `input swipe ${startX} ${startY} ${endX} ${endY}`;
yield this.executeShellCommand(adbCommand);
});
}
AndroidDragAndDropTool(startX, startY, endX, endY) {
return __awaiter(this, void 0, void 0, function* () {
if (this.runtime !== 'android') {
throw new ToolError('This tool is only available on Android devices');
}
[startX, startY] = this.scaleCoordinates('api', startX, startY);
[endX, endY] = this.scaleCoordinates('api', endX, endY);
const adbCommand = `input draganddrop ${startX} ${startY} ${endX} ${endY}`;
yield this.executeShellCommand(adbCommand);
});
}
AndroidTapTool(x, y) {
return __awaiter(this, void 0, void 0, function* () {
if (this.runtime !== 'android') {
throw new ToolError('This tool is only available on Android devices');
}
[x, y] = this.scaleCoordinates('api', x, y);
const adbCommand = `input tap ${x} ${y}`;
yield this.executeShellCommand(adbCommand);
});
}
executeAndroidShellCommand(command) {
return __awaiter(this, void 0, void 0, function* () {
if (this.runtime !== 'android') {
throw new ToolError('This tool is only available on Android devices');
}
command = command.replace(/^adb shell /, '');
yield this.executeShellCommand(command);
});
}
}
export class ScreenShotTool extends BaseAgentTool {
constructor(osAgentHandler) {
super();
this.osAgentHandler = osAgentHandler;
}
execute() {
return __awaiter(this, void 0, void 0, function* () {
const screenshot = yield this.osAgentHandler.takeScreenshot();
return {
base64Images: [screenshot],
output: `Screenshot was taken, with resolution width ${this.osAgentHandler.getTargetResolution().width} and height ${this.osAgentHandler.getTargetResolution().height}`,
};
});
}
toParams() {
return {
name: 'screenshot_tool',
description: 'Takes a screenshot of the current screen and returns it as a base64 image.',
input_schema: { type: 'object', properties: {}, required: [] },
};
}
}
export class MouseMoveTool extends BaseAgentTool {
constructor(osAgentHandler) {
super();
this.osAgentHandler = osAgentHandler;
}
execute(command) {
return __awaiter(this, void 0, void 0, function* () {
yield this.osAgentHandler.mouseMove(command.x, command.y);
return {
output: `Moved mouse to (${command.x}, ${command.y})`,
};
});
}
toParams() {
return {
name: 'mouse_move_tool',
description: 'Moves the mouse to the specified absolute coordinates. The top left corner of the screen is (0,0)',
input_schema: {
type: 'object',
properties: {
x: {
type: 'number',
description: 'The x (pixels from the left edge) coordinate to move the mouse to',
},
y: {
type: 'number',
description: 'The y (pixels from the top edge) coordinate to move the mouse to',
},
},
},
};
}
}
export class MouseClickTool extends BaseAgentTool {
constructor(osAgentHandler) {
super();
this.osAgentHandler = osAgentHandler;
}
execute(command) {
return __awaiter(this, void 0, void 0, function* () {
yield this.osAgentHandler.mouseClick(command.button, command.doubleClick);
const returnedMessage = command.doubleClick ? `Double clicked ${command.button} button` : `Clicked ${command.button} button`;
return {
output: returnedMessage,
};
});
}
toParams() {
return {
name: 'mouse_click_tool',
description: 'Clicks the specified button on the mouse',
input_schema: {
type: 'object',
properties: {
button: {
type: 'string',
enum: ['left', 'right', 'middle'],
description: 'The button to click',
},
doubleClick: {
type: 'boolean',
description: 'Whether to double click the button',
},
},
required: ['button', 'doubleClick'],
},
};
}
}
export class MouseScrollTool extends BaseAgentTool {
constructor(osAgentHandler) {
super();
this.osAgentHandler = osAgentHandler;
}
execute(command) {
return __awaiter(this, void 0, void 0, function* () {
yield this.osAgentHandler.mouseScroll(command.dx, command.dy);
return {
output: `Scrolled by (${command.dx}, ${command.dy})`,
};
});
}
toParams() {
return {
name: 'mouse_scroll_tool',
description: 'Scrolls the mouse by the specified amount',
input_schema: {
type: 'object',
properties: {
dx: {
type: 'number',
description: 'The amount to scroll horizontally (positive is right, negative is left)',
},
dy: {
type: 'number',
description: 'The amount to scroll vertically (positive is down, negative is up)',
},
},
required: ['dx', 'dy'],
},
};
}
}
export class MouseDragAndDropTool extends BaseAgentTool {
constructor(osAgentHandler) {
super();
this.osAgentHandler = osAgentHandler;
}
execute(command) {
return __awaiter(this, void 0, void 0, function* () {
yield this.osAgentHandler.mouseMove(command.startX, command.startY);
yield this.osAgentHandler.mouseHoldLeftButtonDown();
yield this.osAgentHandler.mouseMove(command.endX, command.endY);
yield this.osAgentHandler.mouseReleaseLeftButton();
return {
output: `Dragged from (${command.startX}, ${command.startY}) to (${command.endX}, ${command.endY})`,
};
});
}
toParams() {
return {
name: 'mouse_drag_and_drop_tool',
description: 'Drags the mouse from the specified start coordinates to the specified end coordinates. The top left corner of the screen is (0,0)',
input_schema: {
type: 'object',
properties: {
startX: {
type: 'number',
description: 'The x (pixels from the left edge) coordinate of the start position',
},
startY: {
type: 'number',
description: 'The y (pixels from the top edge) coordinate of the start position',
},
endX: {
type: 'number',
description: 'The x (pixels from the left edge) coordinate of the end position',
},
endY: {
type: 'number',
description: 'The y (pixels from the top edge) coordinate of the end position',
},
},
required: ['startX', 'startY', 'endX', 'endY'],
},
};
}
}
export class MouseHoldLeftButtonDownTool extends BaseAgentTool {
constructor(osAgentHandler) {
super();
this.osAgentHandler = osAgentHandler;
}
execute() {
return __awaiter(this, void 0, void 0, function* () {
yield this.osAgentHandler.mouseHoldLeftButtonDown();
return {
output: 'Holding down left mouse button',
};
});
}
toParams() {
return {
name: 'mouse_hold_left_button_down_tool',
description: 'Hold down the left mouse button at the current position.',
input_schema: { type: 'object', properties: {}, required: [] },
};
}
}
export class MouseReleaseLeftButtonTool extends BaseAgentTool {
constructor(osAgentHandler) {
super();
this.osAgentHandler = osAgentHandler;
}
execute() {
return __awaiter(this, void 0, void 0, function* () {
yield this.osAgentHandler.mouseReleaseLeftButton();
return {
output: 'Released left mouse button',
};
});
}
toParams() {
return {
name: 'mouse_release_left_button_tool',
description: 'Release the left mouse button at the current position.',
input_schema: { type: 'object', properties: {}, required: [] },
};
}
}
export class DesktopPressAndReleaseKeysTool extends BaseAgentTool {
constructor(osAgentHandler) {
super();
this.osAgentHandler = osAgentHandler;
}
execute(command) {
return __awaiter(this, void 0, void 0, function* () {
const modifiers = command.modifiers || [];
yield this.osAgentHandler.desktopKeyPressAndRelease(command.key, modifiers);
return {
output: `Pressed key ${command.key} with modifiers ${modifiers.join(' ')}`,
};
});
}
toParams() {
return {
name: 'desktop_key_press_sequence_tool',
description: 'Presses a key with optional modifiers',
input_schema: {
type: 'object',
properties: {
key: {
type: 'string',
enum: PC_KEY_VALUES,
description: 'The key to press',
},
modifiers: {
type: 'array',
items: {
type: 'string',
enum: MODIFIER_KEY_VALUES,
},
description: 'The modifiers to press',
},
},
required: ['key'],
},
};
}
}
export class DesktopKeyHoldDownTool extends BaseAgentTool {
constructor(osAgentHandler) {
super();
this.osAgentHandler = osAgentHandler;
}
execute(command) {
return __awaiter(this, void 0, void 0, function* () {
const modifiers = command.modifiers || [];
yield this.osAgentHandler.desktopKeyHoldDown(command.key, modifiers);
return {
output: `Holding down key ${command.key} with modifiers ${modifiers.join(' ')}`,
};
});
}
toParams() {
return {
name: 'desktop_key_hold_down_tool',
description: 'Hold down a key and optional modifiers. Keys will be still pressed after the tool is finished.',
input_schema: {
type: 'object',
properties: {
key: {
type: 'string',
enum: [...PC_KEY_VALUES, ...MODIFIER_KEY_VALUES],
description: 'The key to hold down',
},
modifiers: {
type: 'array',
items: {
type: 'string',
enum: MODIFIER_KEY_VALUES,
},
description: 'The modifiers to hold down',
},
},
required: ['key'],
},
};
}
}
export class DesktopKeyReleaseTool extends BaseAgentTool {
constructor(osAgentHandler) {
super();
this.osAgentHandler = osAgentHandler;
}
execute(command) {
return __awaiter(this, void 0, void 0, function* () {
const modifiers = command.modifiers || [];
yield this.osAgentHandler.desktopKeyRelease(command.key, modifiers);
return {
output: `Released key ${command.key} with modifiers ${modifiers.join(' ')}`,
};
});
}
toParams() {
return {
name: 'desktop_key_release_tool',
description: 'Releases a key and optional modifiers. This can be used after keys were held down with the desktop_key_hold_down_tool',
input_schema: {
type: 'object',
properties: {
key: {
type: 'string',
enum: [...PC_KEY_VALUES, ...MODIFIER_KEY_VALUES],
description: 'The key to release',
},
modifiers: {
type: 'array',
items: {
type: 'string',
enum: MODIFIER_KEY_VALUES,
},
description: 'The modifiers to release',
},
},
required: ['key'],
},
};
}
}
export class TypeTool extends BaseAgentTool {
constructor(osAgentHandler) {
super();
this.osAgentHandler = osAgentHandler;
}
execute(command) {
return __awaiter(this, void 0, void 0, function* () {
yield this.osAgentHandler.typeText(command.text);
return {
output: `Typed text: ${command.text}`,
};
});
}
toParams() {
return {
name: 'type_tool',
description: 'Types the specified text',
input_schema: {
type: 'object',
properties: {
text: {
type: 'string',
description: 'The text to type',
},
},
required: ['text'],
},
};
}
}
export class AndroidSingleKeyPressTool extends BaseAgentTool {
constructor(osAgentHandler) {
super();
this.osAgentHandler = osAgentHandler;
}
execute(command) {
return __awaiter(this, void 0, void 0, function* () {
const adbCommand = `input keyevent ${command.key.toUpperCase()}`;
yield this.osAgentHandler.executeShellCommand(adbCommand);
return {
output: `Pressed Android key ${command.key}`,
};
});
}
toParams() {
return {
name: 'android_single_key_press_tool',
description: 'Presses a single Android key',
input_schema: {
type: 'object',
properties: {
key: {
type: 'string',
enum: ANDROID_KEY_VALUES,
description: 'The Android key to press',
},
},
required: ['key'],
},
};
}
}
export class AndroidSequenceKeyPressTool extends BaseAgentTool {
constructor(osAgentHandler) {
super();
this.osAgentHandler = osAgentHandler;
}
execute(command) {
return __awaiter(this, void 0, void 0, function* () {
const adbCommand = `input keyevent ${command.keys.map((key) => key.toUpperCase()).join(' ')}`;
yield this.osAgentHandler.executeShellCommand(adbCommand);
return {
output: `Pressed Android keys: ${command.keys.join(', ')}`,
};
});
}
toParams() {
return {
name: 'android_sequence_key_press_tool',
description: 'Presses a sequence of Android keys',
input_schema: {
type: 'object',
properties: {
keys: {
type: 'array',
items: {
type: 'string',
enum: ANDROID_KEY_VALUES,
},
description: 'The sequence of Android keys to press',
},
},
required: ['keys'],
},
};
}
}
export class AgentErrorTool extends BaseAgentTool {
constructor() {
super();
}
execute(command) {
return __awaiter(this, void 0, void 0, function* () {
throw new AgentError(command.error);
});
}
toParams() {
return {
name: 'agent_error_tool',
description: 'Intentionally raises an error to signal that the agent cannot proceed with the current task. Use this when the agent encounters an unsolvable problem, gets stuck in a loop, or needs to communicate a critical failure that prevents further automation.',
input_schema: {
type: 'object',
properties: {
error: {
type: 'string',
description: 'The error message to raise',
},
},
required: ['error'],
},
};
}
}
export class ExecuteShellCommandTool extends BaseAgentTool {
constructor(osAgentHandler) {
super();
this.osAgentHandler = osAgentHandler;
}
execute(command) {
return __awaiter(this, void 0, void 0, function* () {
yield this.osAgentHandler.executeShellCommand(command.command);
return {
output: `Executed shell command: ${command.command}`,
};
});
}
toParams() {
return {
name: 'execute_shell_command_tool',
description: 'Executes a shell command. It does not return the output of the command.',
input_schema: {
type: 'object',
properties: {
command: {
type: 'string',
description: 'The shell command to execute',
},
},
required: ['command'],
},
};
}
}
export class WaitTool extends BaseAgentTool {
constructor() {
super();
}
execute(command) {
return __awaiter(this, void 0, void 0, function* () {
yield new Promise((resolve) => setTimeout(resolve, command.milliseconds));
return {
output: `Waited for ${command.milliseconds} milliseconds`,
};
});
}
toParams() {
return {
name: 'wait_tool',
description: 'Waits for a specified number of milliseconds',
input_schema: {
type: 'object',
properties: {
milliseconds: {
type: 'number',
description: 'The number of milliseconds to wait',
},
},
required: ['milliseconds'],
},
};
}
}
export class PrintTool extends BaseAgentTool {
constructor() {
super();
}
execute(command) {
return __awaiter(this, void 0, void 0, function* () {
console.log(command.text);
return {
output: `Printed text: ${command.text}`,
};
});
}
toParams() {
return {
name: 'print_tool',
description: 'Outputs text to the console for debugging, status updates, or user communication. Useful for providing feedback about automation progress, errors, or important information during test execution.',
input_schema: {
type: 'object',
properties: {
text: {
type: 'string',
description: 'The text to output to the console.',
},
},
required: ['text'],
},
};
}
}
export class AndroidSwipeTool extends BaseAgentTool {
constructor(osAgentHandler) {
super();
this.osAgentHandler = osAgentHandler;
}
execute(command) {
return __awaiter(this, void 0, void 0, function* () {
yield this.osAgentHandler.AndroidSwipeTool(command.startX, command.startY, command.endX, command.endY);
return {
output: `Swiped from ${command.startX}, ${command.startY} to ${command.endX}, ${command.endY} on the screen`,
};
});
}
toParams() {
return {
name: 'android_swipe_tool',
description: 'Swipes from a starting point to an ending point on the screen',
input_schema: {
type: 'object',
properties: {
startX: {
type: 'number',
description: 'The x (pixels from the left edge) coordinate of the start position',
},
startY: {
type: 'number',
description: 'The y (pixels from the top edge) coordinate of the start position',
},
endX: {
type: 'number',
description: 'The x (pixels from the left edge) coordinate of the end position',
},
endY: {
type: 'number',
description: 'The y (pixels from the top edge) coordinate of the end position',
},
},
required: ['startX', 'startY', 'endX', 'endY'],
},
};
}
}
export class AndroidDragAndDropTool extends BaseAgentTool {
constructor(osAgentHandler) {
super();
this.osAgentHandler = osAgentHandler;
}
execute(command) {
return __awaiter(this, void 0, void 0, function* () {
yield this.osAgentHandler.AndroidDragAndDropTool(command.startX, command.startY, command.endX, command.endY);
return {
output: `Dragged and dropped from ${command.startX}, ${command.startY} to ${command.endX}, ${command.endY} on the screen`,
};
});
}
toParams() {
return {
name: 'android_drag_and_drop_tool',
description: 'Drags and drops from a starting point to an ending point on the screen',
input_schema: {
type: 'object',
properties: {
startX: {
type: 'number',
description: 'The x (pixels from the left edge) coordinate of the start position',
},
startY: {
type: 'number',
description: 'The y (pixels from the top edge) coordinate of the start position',
},
endX: {
type: 'number',
description: 'The x (pixels from the left edge) coordinate of the end position',
},
endY: {
type: 'number',
description: 'The y (pixels from the top edge) coordinate of the end position',
},
},
required: ['startX', 'startY', 'endX', 'endY'],
},
};
}
}
export class AndroidTapTool extends BaseAgentTool {
constructor(osAgentHandler) {
super();
this.osAgentHandler = osAgentHandler;
}
execute(command) {
return __awaiter(this, void 0, void 0, function* () {
yield this.osAgentHandler.AndroidTapTool(command.x, command.y);
return {
output: `Tapped the screen at ${command.x}, ${command.y}`,
};
});
}
toParams() {
return {
name: 'android_tap_tool',
description: 'Taps the screen at the specified coordinates',
input_schema: {
type: 'object',
properties: {
x: {
type: 'number',
description: 'The x (pixels from the left edge) coordinate of the tap position',
},
y: {
type: 'number',
description: 'The y (pixels from the top edge) coordinate of the tap position',
},
},
required: ['x', 'y'],
},
};
}
}
export class AndroidShellCommandTool extends BaseAgentTool {
constructor(osAgentHandler) {
super();
this.osAgentHandler = osAgentHandler;
}
execute(command) {
return __awaiter(this, void 0, void 0, function* () {
yield this.osAgentHandler.executeAndroidShellCommand(command.command);
return {
output: `Executed shell command: ${command.command}`,
};
});
}
toParams() {
return {
name: 'android_shell_command_tool',
description: 'Executes a shell command on the Android device. It does not return the output of the command.',
input_schema: {
type: 'object',
properties: {
command: {
type: 'string',
description: 'The shell command to execute without the "adb shell" prefix',
},
},
required: ['command'],
},
};
}
}