@gacua/backend
Version:
GACUA Backend
106 lines • 4.58 kB
JavaScript
/**
* @license
* Copyright 2025 MuleRun
* SPDX-License-Identifier: Apache-2.0
*/
import { BaseGroundableTool, } from './groundable-tool.js';
import { highlightBox } from '../screen.js';
import { logger } from '../../../logger.js';
const clickToolLogger = logger.child({ module: 'computer-click-tool' });
export class ComputerClick extends BaseGroundableTool {
functionDeclaration = {
name: 'computer_click',
parametersJsonSchema: {
properties: {
image_id: {
description: 'The index of the image in the cropped screenshots that contains the element to click on.',
type: 'number',
minimum: 0,
},
element_description: {
description: 'A precise and unambiguous description of the target UI element to click on. Include its text or icon, and if multiple similar elements exist, add positional details.',
type: 'string',
},
num_clicks: {
description: 'Number of times to click the element. Use 2 for double-click to open files or applications in many contexts.',
type: 'number',
default: 1,
},
button_type: {
description: 'Which mouse button to press can be "left", "middle", or "right"',
type: 'string',
enum: ['left', 'middle', 'right'],
default: 'left',
},
hold_keys: {
description: 'List of keys to hold while clicking',
type: 'array',
items: {
type: 'string',
},
default: [],
},
},
required: ['image_id', 'element_description'],
type: 'object',
},
};
async ground(args, screenshot, croppedScreenshotParts, detectElement) {
clickToolLogger.debug({
imageId: args.image_id,
elementDescription: args.element_description,
numClicks: args.num_clicks,
buttonType: args.button_type,
holdKeys: args.hold_keys,
}, 'Grounding click action');
const result = await this.detectAndTransform(args.image_id, croppedScreenshotParts, async (imagePart) => detectElement(imagePart, 'Click on: ' + args.element_description));
if (typeof result === 'string') {
clickToolLogger.warn({ error: result }, 'Failed to ground click action');
return result;
}
const { indexAndBox, screenCoordinate: screenCoords } = result;
clickToolLogger.debug({
screenCoords,
boundingBox: indexAndBox,
}, 'Click action grounded successfully');
async function getDescription(saveImage) {
let action = '';
const buttonType = args.button_type || 'left';
const numClicks = args.num_clicks || 1;
if (numClicks === 2 && buttonType === 'left') {
action = 'Double click';
}
else if (numClicks === 2) {
action = `Double ${buttonType} click`;
}
else if (numClicks > 1) {
action = `${buttonType === 'left' ? 'Click' : `${buttonType} click`} ${numClicks} times`;
}
else {
action = buttonType === 'left' ? 'Click' : `${buttonType} click`;
}
let description = `${action} on here`;
if (args.hold_keys && args.hold_keys.length > 0) {
description += ` while holding ${args.hold_keys.join(' + ')}`;
}
description += ':';
const annotatedImage = await highlightBox(screenshot, indexAndBox);
const annotatedImageFileName = await saveImage(annotatedImage.buffer, 'screenshot_annotated');
return [{ text: description }, { imageFileName: annotatedImageFileName }];
}
function value() {
return {
name: '.computer',
args: {
action: 'click',
coordinate: [screenCoords.x, screenCoords.y],
num_clicks: args.num_clicks,
button_type: args.button_type,
hold_keys: args.hold_keys,
},
};
}
return { getDescription, value };
}
}
//# sourceMappingURL=click.js.map