UNPKG

@gacua/backend

Version:

GACUA Backend

87 lines 4.12 kB
/** * @license * Copyright 2025 MuleRun * SPDX-License-Identifier: Apache-2.0 */ import { BaseGroundableTool, } from './groundable-tool.js'; import { highlightConnectedBoxes } from '../screen.js'; export class ComputerDragAndDrop extends BaseGroundableTool { functionDeclaration = { name: 'computer_drag_and_drop', parametersJsonSchema: { properties: { starting_image_id: { description: 'The index of the image in the cropped screenshots that contains the element to start the drag action.', type: 'number', minimum: 0, }, starting_description: { description: 'A precise and unambiguous description of the target UI element to drag from. Include its text or icon, and if multiple similar elements exist, add positional details.', type: 'string', }, ending_image_id: { description: 'The index of the image in the cropped screenshots that contains the element to end the drag action.', type: 'number', minimum: 0, }, ending_description: { description: 'A precise and unambiguous description of the target UI element to drag to. Include its text or icon, and if multiple similar elements exist, add positional details.', type: 'string', }, hold_keys: { description: 'List of keys to hold while dragging', type: 'array', items: { type: 'string', }, default: [], }, }, required: [ 'starting_image_id', 'starting_description', 'ending_image_id', 'ending_description', ], type: 'object', }, }; async ground(args, screenshot, croppedScreenshotParts, detectElement) { const startingResult = await this.detectAndTransform(args.starting_image_id, croppedScreenshotParts, async (imagePart) => detectElement(imagePart, 'Drag from: ' + args.starting_description)); if (typeof startingResult === 'string') { return startingResult; } const { indexAndBox: startingIndexAndBox, screenCoordinate: startingScreenCoordinate, } = startingResult; const endingResult = await this.detectAndTransform(args.ending_image_id, croppedScreenshotParts, async (imagePart) => detectElement(imagePart, 'Drag to: ' + args.ending_description)); if (typeof endingResult === 'string') { return endingResult; } const { indexAndBox: endingIndexAndBox, screenCoordinate: endingScreenCoordinate, } = endingResult; async function getDescription(saveImage) { let description = 'Drag and drop from here to there'; if (args.hold_keys && args.hold_keys.length > 0) { description += ` while holding ${args.hold_keys.join(' + ')}`; } description += ':'; const annotatedImage = await highlightConnectedBoxes(screenshot, startingIndexAndBox, endingIndexAndBox); const annotatedImageFileName = await saveImage(annotatedImage.buffer, 'screenshot_annotated'); return [{ text: description }, { imageFileName: annotatedImageFileName }]; } function value() { return { name: '.computer', args: { action: 'drag_and_drop', coordinate: [startingScreenCoordinate.x, startingScreenCoordinate.y], target_coordinate: [ endingScreenCoordinate.x, endingScreenCoordinate.y, ], hold_keys: args.hold_keys, }, }; } return { getDescription, value }; } } //# sourceMappingURL=drag-and-drop.js.map