UNPKG

@gacua/backend

Version:

GACUA Backend

219 lines 10 kB
/** * @license * Copyright 2025 MuleRun * SPDX-License-Identifier: Apache-2.0 */ import sharp from 'sharp'; class Screen { resolution; cropDirection; cropSquareSideLength; squareStartingPoints; constructor(resolution) { this.resolution = resolution; this.setCropConfiguration(); } setCropConfiguration() { this.cropDirection = this.resolution.width > this.resolution.height ? 'vertical' : 'horizontal'; this.cropSquareSideLength = Math.min(this.resolution.width, this.resolution.height); this.squareStartingPoints = [{ x: 0, y: 0 }]; const cropStep = Math.round(this.cropSquareSideLength * 0.5); if (this.cropDirection === 'vertical') { let x = cropStep; while (x + this.cropSquareSideLength < this.resolution.width) { this.squareStartingPoints.push({ x, y: 0 }); x += cropStep; } const finalX = this.resolution.width - this.cropSquareSideLength; if (finalX > this.squareStartingPoints[this.squareStartingPoints.length - 1].x) { this.squareStartingPoints.push({ x: finalX, y: 0 }); } } else { // horizontal let y = cropStep; while (y + this.cropSquareSideLength < this.resolution.height) { this.squareStartingPoints.push({ x: 0, y }); y += cropStep; } const finalY = this.resolution.height - this.cropSquareSideLength; if (finalY > this.squareStartingPoints[this.squareStartingPoints.length - 1].y) { this.squareStartingPoints.push({ x: 0, y: finalY }); } } } async crop(image) { this.checkImageResolution(image); const croppedImages = []; for (const startPoint of this.squareStartingPoints) { const cropBox = { left: startPoint.x, top: startPoint.y, width: this.cropSquareSideLength, height: this.cropSquareSideLength, }; const croppedImage = sharp(image.buffer) .extract(cropBox) .resize(768, 768, { fit: 'fill' }); croppedImages.push(croppedImage); } return await Promise.all(croppedImages.map(async (croppedImage) => ({ buffer: await croppedImage.toBuffer(), resolution: { width: 768, height: 768 }, mimeType: 'image/png', }))); } toScreenCoordinate({ index, boxOrCoordinate, }) { const center = 'ymin' in boxOrCoordinate ? Screen.getBoxCenter(boxOrCoordinate) : boxOrCoordinate; const unnormalizedX = Math.round((center.x / 1000) * this.cropSquareSideLength); const unnormalizedY = Math.round((center.y / 1000) * this.cropSquareSideLength); const startPoint = this.squareStartingPoints[index]; return { x: startPoint.x + unnormalizedX, y: startPoint.y + unnormalizedY, }; } toScreenRectangle({ index, box }) { const { ymin, xmin, ymax, xmax } = box; const { x: left, y: top } = this.toScreenCoordinate({ index, boxOrCoordinate: { x: xmin, y: ymin }, }); const { x: right, y: bottom } = this.toScreenCoordinate({ index, boxOrCoordinate: { x: xmax, y: ymax }, }); const rectWidth = right - left; const rectHeight = bottom - top; return { left, top, rectWidth, rectHeight }; } async highlightBox(image, indexAndBox, color = 'gray', width = 1) { this.checkImageResolution(image); const { left, top, rectWidth, rectHeight } = this.toScreenRectangle(indexAndBox); const svgMask = ` <svg width="${image.resolution.width}" height="${image.resolution.height}"> <defs> <mask id="vignetteMask"> <rect x="0" y="0" width="${image.resolution.width}" height="${image.resolution.height}" fill="white" /> <rect x="${left}" y="${top}" width="${rectWidth}" height="${rectHeight}" fill="black" /> </mask> </defs> <rect x="0" y="0" width="${image.resolution.width}" height="${image.resolution.height}" fill="black" mask="url(#vignetteMask)" opacity="0.5" /> <rect x="${left}" y="${top}" width="${rectWidth}" height="${rectHeight}" stroke="${color}" stroke-width="${width}" fill="none" /> </svg>`; return { ...image, buffer: await sharp(image.buffer) .composite([{ input: Buffer.from(svgMask), top: 0, left: 0 }]) .toBuffer(), }; } async highlightConnectedBoxes(image, startingIndexAndBox, endingIndexAndBox, borderColor = 'gray', borderWidth = 1, arrowColor = 'gray', arrowWidth = 1) { this.checkImageResolution(image); const { left: box1Left, top: box1Top, rectWidth: box1Width, rectHeight: box1Height, } = this.toScreenRectangle(startingIndexAndBox); const { left: box2Left, top: box2Top, rectWidth: box2Width, rectHeight: box2Height, } = this.toScreenRectangle(endingIndexAndBox); const arrowStartX = Math.round(box1Left + box1Width / 2); const arrowStartY = Math.round(box1Top + box1Height / 2); const arrowEndX = Math.round(box2Left + box2Width / 2); const arrowEndY = Math.round(box2Top + box2Height / 2); const svgMask = ` <svg width="${image.resolution.width}" height="${image.resolution.height}"> <defs> <mask id="vignetteMask"> <rect x="0" y="0" width="${image.resolution.width}" height="${image.resolution.height}" fill="white" /> <rect x="${box1Left}" y="${box1Top}" width="${box1Width}" height="${box1Height}" fill="black" /> <rect x="${box2Left}" y="${box2Top}" width="${box2Width}" height="${box2Height}" fill="black" /> </mask> <marker id="arrowhead" markerWidth="10" markerHeight="7" refX="0" refY="3.5" orient="auto"> <polygon points="0 0, 10 3.5, 0 7" fill="${arrowColor}" /> </marker> </defs> <rect x="0" y="0" width="${image.resolution.width}" height="${image.resolution.height}" fill="black" mask="url(#vignetteMask)" opacity="0.5" /> <rect x="${box1Left}" y="${box1Top}" width="${box1Width}" height="${box1Height}" stroke="${borderColor}" stroke-width="${borderWidth}" fill="none" /> <rect x="${box2Left}" y="${box2Top}" width="${box2Width}" height="${box2Height}" stroke="${borderColor}" stroke-width="${borderWidth}" fill="none" /> <line x1="${arrowStartX}" y1="${arrowStartY}" x2="${arrowEndX}" y2="${arrowEndY}" stroke="${arrowColor}" stroke-width="${arrowWidth}" marker-end="url(#arrowhead)" /> </svg>`; return { ...image, buffer: await sharp(image.buffer) .composite([{ input: Buffer.from(svgMask), top: 0, left: 0 }]) .toBuffer(), }; } checkImageResolution(image) { if (image.resolution !== this.resolution) { throw Error(`Image resolution does not match screen resolution: ${image.resolution} != ${this.resolution}`); } } static getBoxCenter(box) { const { ymin, xmin, ymax, xmax } = box; const centerX = Math.floor((xmin + xmax) / 2); const centerY = Math.floor((ymin + ymax) / 2); return { x: centerX, y: centerY }; } } let screen; // This refreshes every time a screenshot is taken. export async function takeScreenshot(screenshotToolResult) { if (!screenshotToolResult || !Array.isArray(screenshotToolResult)) { throw new Error('Invalid screenshot response format'); } const inlineDataPart = screenshotToolResult.at(-1); if (typeof inlineDataPart !== 'object' || !('inlineData' in inlineDataPart)) { throw new Error('Invalid screenshot response format'); } const inlineData = inlineDataPart.inlineData; if (!inlineData || typeof inlineData.mimeType !== 'string' || typeof inlineData.data !== 'string') { throw new Error('Invalid screenshot response format'); } if (inlineData.mimeType !== 'image/png') { throw new Error(`Invalid screenshot response format: expected image/png, got ${inlineData.mimeType}`); } const imageBuffer = Buffer.from(inlineData.data, 'base64'); const imageSharp = sharp(imageBuffer); const metadata = await imageSharp.metadata(); const { width, height } = metadata; if (!width || !height) { throw new Error(`Invalid screenshot: width: ${width}, height: ${height}`); } const resolution = { width, height }; screen = new Screen(resolution); return { buffer: imageBuffer, resolution, mimeType: inlineData.mimeType, }; } export async function cropScreenshot(screenshot) { return (await screen.crop(screenshot)).map((croppedImage, index) => ({ image: croppedImage, description: `${screen.cropDirection}ly cropped screenshot ${index}`, nameSuffix: `screenshot_${screen.cropDirection.slice(0, 1)}c${index}`, })); } export function imageToPart(image) { return { inlineData: { data: image.buffer.toString('base64'), mimeType: image.mimeType, }, }; } export function toScreenCoords(args) { return screen.toScreenCoordinate(args); } export async function highlightBox(image, indexAndBox, color = 'gray', width = 1) { return screen.highlightBox(image, indexAndBox, color, width); } export async function highlightConnectedBoxes(image, startingIndexAndBox, endingIndexAndBox, borderColor = 'gray', borderWidth = 1, arrowColor = 'gray', arrowWidth = 1) { return screen.highlightConnectedBoxes(image, startingIndexAndBox, endingIndexAndBox, borderColor, borderWidth, arrowColor, arrowWidth); } //# sourceMappingURL=screen.js.map