@gacua/backend
Version:
GACUA Backend
94 lines • 3.93 kB
JavaScript
/**
* @license
* Copyright 2025 MuleRun
* SPDX-License-Identifier: Apache-2.0
*/
import { BaseGroundableTool, } from './groundable-tool.js';
import { highlightBox, } from '../screen.js';
export class ComputerType extends BaseGroundableTool {
functionDeclaration = {
name: 'computer_type',
parametersJsonSchema: {
properties: {
image_id: {
description: 'The index of the image in the cropped screenshots that contains the element to enter text in. This is optional if the input box is already focused.',
type: 'number',
minimum: 0,
},
element_description: {
description: 'A precise and unambiguous description of the target UI element to enter text in. Include its text or icon, and if multiple similar elements exist, add positional details.',
type: 'string',
},
text: {
description: 'The text to type',
type: 'string',
},
overwrite: {
description: 'Assign it to True if the text should overwrite the existing text, otherwise assign it to False. Using this argument clears all text in an element.',
type: 'boolean',
default: false,
},
enter: {
description: 'Assign it to True if the enter key should be pressed after typing the text, otherwise assign it to False.',
type: 'boolean',
default: false,
},
},
required: ['text'],
type: 'object',
},
};
validate(args) {
return this.validateImageElementPair(args);
}
async ground(args, screenshot, croppedScreenshotParts, detectElement) {
let indexAndBox;
let screenCoordinate;
if (args.image_id !== undefined && args.element_description !== undefined) {
const result = await this.detectAndTransform(args.image_id, croppedScreenshotParts, async (imagePart) => detectElement(imagePart, 'Click on here to type: ' + args.element_description));
if (typeof result === 'string') {
return result;
}
indexAndBox = result.indexAndBox;
screenCoordinate = result.screenCoordinate;
}
async function getDescription(saveImage) {
let description = `Type "${args.text}"`;
if (args.overwrite) {
description = `Clear existing text and type "${args.text}"`;
}
if (args.enter) {
description += ' and press Enter';
}
if (indexAndBox && screenCoordinate) {
description += ' here:';
const annotatedImage = await highlightBox(screenshot, indexAndBox);
const annotatedImageFileName = await saveImage(annotatedImage.buffer, 'screenshot_annotated');
return [
{ text: description },
{ imageFileName: annotatedImageFileName },
];
}
else {
description += ' in the currently focused input field';
return [{ text: description }];
}
}
function value() {
return {
name: '.computer',
args: {
action: 'type',
coordinate: screenCoordinate
? [screenCoordinate.x, screenCoordinate.y]
: undefined,
text: args.text,
overwrite: args.overwrite,
enter: args.enter,
},
};
}
return { getDescription, value };
}
}
//# sourceMappingURL=type.js.map