@gacua/backend
Version:
GACUA Backend
219 lines • 10 kB
JavaScript
/**
* @license
* Copyright 2025 MuleRun
* SPDX-License-Identifier: Apache-2.0
*/
import sharp from 'sharp';
class Screen {
resolution;
cropDirection;
cropSquareSideLength;
squareStartingPoints;
constructor(resolution) {
this.resolution = resolution;
this.setCropConfiguration();
}
setCropConfiguration() {
this.cropDirection =
this.resolution.width > this.resolution.height
? 'vertical'
: 'horizontal';
this.cropSquareSideLength = Math.min(this.resolution.width, this.resolution.height);
this.squareStartingPoints = [{ x: 0, y: 0 }];
const cropStep = Math.round(this.cropSquareSideLength * 0.5);
if (this.cropDirection === 'vertical') {
let x = cropStep;
while (x + this.cropSquareSideLength < this.resolution.width) {
this.squareStartingPoints.push({ x, y: 0 });
x += cropStep;
}
const finalX = this.resolution.width - this.cropSquareSideLength;
if (finalX >
this.squareStartingPoints[this.squareStartingPoints.length - 1].x) {
this.squareStartingPoints.push({ x: finalX, y: 0 });
}
}
else {
// horizontal
let y = cropStep;
while (y + this.cropSquareSideLength < this.resolution.height) {
this.squareStartingPoints.push({ x: 0, y });
y += cropStep;
}
const finalY = this.resolution.height - this.cropSquareSideLength;
if (finalY >
this.squareStartingPoints[this.squareStartingPoints.length - 1].y) {
this.squareStartingPoints.push({ x: 0, y: finalY });
}
}
}
async crop(image) {
this.checkImageResolution(image);
const croppedImages = [];
for (const startPoint of this.squareStartingPoints) {
const cropBox = {
left: startPoint.x,
top: startPoint.y,
width: this.cropSquareSideLength,
height: this.cropSquareSideLength,
};
const croppedImage = sharp(image.buffer)
.extract(cropBox)
.resize(768, 768, { fit: 'fill' });
croppedImages.push(croppedImage);
}
return await Promise.all(croppedImages.map(async (croppedImage) => ({
buffer: await croppedImage.toBuffer(),
resolution: { width: 768, height: 768 },
mimeType: 'image/png',
})));
}
toScreenCoordinate({ index, boxOrCoordinate, }) {
const center = 'ymin' in boxOrCoordinate
? Screen.getBoxCenter(boxOrCoordinate)
: boxOrCoordinate;
const unnormalizedX = Math.round((center.x / 1000) * this.cropSquareSideLength);
const unnormalizedY = Math.round((center.y / 1000) * this.cropSquareSideLength);
const startPoint = this.squareStartingPoints[index];
return {
x: startPoint.x + unnormalizedX,
y: startPoint.y + unnormalizedY,
};
}
toScreenRectangle({ index, box }) {
const { ymin, xmin, ymax, xmax } = box;
const { x: left, y: top } = this.toScreenCoordinate({
index,
boxOrCoordinate: { x: xmin, y: ymin },
});
const { x: right, y: bottom } = this.toScreenCoordinate({
index,
boxOrCoordinate: { x: xmax, y: ymax },
});
const rectWidth = right - left;
const rectHeight = bottom - top;
return { left, top, rectWidth, rectHeight };
}
async highlightBox(image, indexAndBox, color = 'gray', width = 1) {
this.checkImageResolution(image);
const { left, top, rectWidth, rectHeight } = this.toScreenRectangle(indexAndBox);
const svgMask = `
<svg width="${image.resolution.width}" height="${image.resolution.height}">
<defs>
<mask id="vignetteMask">
<rect x="0" y="0" width="${image.resolution.width}" height="${image.resolution.height}" fill="white" />
<rect x="${left}" y="${top}" width="${rectWidth}" height="${rectHeight}" fill="black" />
</mask>
</defs>
<rect x="0" y="0" width="${image.resolution.width}" height="${image.resolution.height}" fill="black" mask="url(#vignetteMask)" opacity="0.5" />
<rect x="${left}" y="${top}" width="${rectWidth}" height="${rectHeight}" stroke="${color}" stroke-width="${width}" fill="none" />
</svg>`;
return {
...image,
buffer: await sharp(image.buffer)
.composite([{ input: Buffer.from(svgMask), top: 0, left: 0 }])
.toBuffer(),
};
}
async highlightConnectedBoxes(image, startingIndexAndBox, endingIndexAndBox, borderColor = 'gray', borderWidth = 1, arrowColor = 'gray', arrowWidth = 1) {
this.checkImageResolution(image);
const { left: box1Left, top: box1Top, rectWidth: box1Width, rectHeight: box1Height, } = this.toScreenRectangle(startingIndexAndBox);
const { left: box2Left, top: box2Top, rectWidth: box2Width, rectHeight: box2Height, } = this.toScreenRectangle(endingIndexAndBox);
const arrowStartX = Math.round(box1Left + box1Width / 2);
const arrowStartY = Math.round(box1Top + box1Height / 2);
const arrowEndX = Math.round(box2Left + box2Width / 2);
const arrowEndY = Math.round(box2Top + box2Height / 2);
const svgMask = `
<svg width="${image.resolution.width}" height="${image.resolution.height}">
<defs>
<mask id="vignetteMask">
<rect x="0" y="0" width="${image.resolution.width}" height="${image.resolution.height}" fill="white" />
<rect x="${box1Left}" y="${box1Top}" width="${box1Width}" height="${box1Height}" fill="black" />
<rect x="${box2Left}" y="${box2Top}" width="${box2Width}" height="${box2Height}" fill="black" />
</mask>
<marker id="arrowhead" markerWidth="10" markerHeight="7" refX="0" refY="3.5" orient="auto">
<polygon points="0 0, 10 3.5, 0 7" fill="${arrowColor}" />
</marker>
</defs>
<rect x="0" y="0" width="${image.resolution.width}" height="${image.resolution.height}" fill="black" mask="url(#vignetteMask)" opacity="0.5" />
<rect x="${box1Left}" y="${box1Top}" width="${box1Width}" height="${box1Height}" stroke="${borderColor}" stroke-width="${borderWidth}" fill="none" />
<rect x="${box2Left}" y="${box2Top}" width="${box2Width}" height="${box2Height}" stroke="${borderColor}" stroke-width="${borderWidth}" fill="none" />
<line x1="${arrowStartX}" y1="${arrowStartY}" x2="${arrowEndX}" y2="${arrowEndY}" stroke="${arrowColor}" stroke-width="${arrowWidth}" marker-end="url(#arrowhead)" />
</svg>`;
return {
...image,
buffer: await sharp(image.buffer)
.composite([{ input: Buffer.from(svgMask), top: 0, left: 0 }])
.toBuffer(),
};
}
checkImageResolution(image) {
if (image.resolution !== this.resolution) {
throw Error(`Image resolution does not match screen resolution: ${image.resolution} != ${this.resolution}`);
}
}
static getBoxCenter(box) {
const { ymin, xmin, ymax, xmax } = box;
const centerX = Math.floor((xmin + xmax) / 2);
const centerY = Math.floor((ymin + ymax) / 2);
return { x: centerX, y: centerY };
}
}
let screen; // This refreshes every time a screenshot is taken.
export async function takeScreenshot(screenshotToolResult) {
if (!screenshotToolResult || !Array.isArray(screenshotToolResult)) {
throw new Error('Invalid screenshot response format');
}
const inlineDataPart = screenshotToolResult.at(-1);
if (typeof inlineDataPart !== 'object' || !('inlineData' in inlineDataPart)) {
throw new Error('Invalid screenshot response format');
}
const inlineData = inlineDataPart.inlineData;
if (!inlineData ||
typeof inlineData.mimeType !== 'string' ||
typeof inlineData.data !== 'string') {
throw new Error('Invalid screenshot response format');
}
if (inlineData.mimeType !== 'image/png') {
throw new Error(`Invalid screenshot response format: expected image/png, got ${inlineData.mimeType}`);
}
const imageBuffer = Buffer.from(inlineData.data, 'base64');
const imageSharp = sharp(imageBuffer);
const metadata = await imageSharp.metadata();
const { width, height } = metadata;
if (!width || !height) {
throw new Error(`Invalid screenshot: width: ${width}, height: ${height}`);
}
const resolution = { width, height };
screen = new Screen(resolution);
return {
buffer: imageBuffer,
resolution,
mimeType: inlineData.mimeType,
};
}
export async function cropScreenshot(screenshot) {
return (await screen.crop(screenshot)).map((croppedImage, index) => ({
image: croppedImage,
description: `${screen.cropDirection}ly cropped screenshot ${index}`,
nameSuffix: `screenshot_${screen.cropDirection.slice(0, 1)}c${index}`,
}));
}
export function imageToPart(image) {
return {
inlineData: {
data: image.buffer.toString('base64'),
mimeType: image.mimeType,
},
};
}
export function toScreenCoords(args) {
return screen.toScreenCoordinate(args);
}
export async function highlightBox(image, indexAndBox, color = 'gray', width = 1) {
return screen.highlightBox(image, indexAndBox, color, width);
}
export async function highlightConnectedBoxes(image, startingIndexAndBox, endingIndexAndBox, borderColor = 'gray', borderWidth = 1, arrowColor = 'gray', arrowWidth = 1) {
return screen.highlightConnectedBoxes(image, startingIndexAndBox, endingIndexAndBox, borderColor, borderWidth, arrowColor, arrowWidth);
}
//# sourceMappingURL=screen.js.map