UNPKG

@ui-tars/sdk

Version:

A powerful cross-platform(ANY device/platform) toolkit for building GUI automation agents for UI-TARS

111 lines (110 loc) 4.71 kB
/** * Copyright (c) 2025 Bytedance, Inc. and its affiliates. * SPDX-License-Identifier: Apache-2.0 */ import * as __WEBPACK_EXTERNAL_MODULE_jimp__ from "jimp"; import * as __WEBPACK_EXTERNAL_MODULE__ui_tars_shared_constants_7e5a2ecd__ from "@ui-tars/shared/constants"; import * as __WEBPACK_EXTERNAL_MODULE__constants_mjs_225410ff__ from "./constants.mjs"; const parseBoxToScreenCoords = ({ boxStr, screenWidth, screenHeight, factors = __WEBPACK_EXTERNAL_MODULE__constants_mjs_225410ff__.DEFAULT_FACTORS })=>{ if (!boxStr) return { x: null, y: null }; const coords = boxStr.replace('[', '').replace(']', '').split(',').map((num)=>parseFloat(num.trim())); const [x1, y1, x2 = x1, y2 = y1] = coords; const [widthFactor, heightFactor] = factors; return { x: Math.round((x1 + x2) / 2 * screenWidth * widthFactor) / widthFactor, y: Math.round((y1 + y2) / 2 * screenHeight * heightFactor) / heightFactor }; }; const processVlmParams = (conversations, images)=>{ if (images.length > __WEBPACK_EXTERNAL_MODULE__ui_tars_shared_constants_7e5a2ecd__.MAX_IMAGE_LENGTH) { const excessCount = images.length - __WEBPACK_EXTERNAL_MODULE__ui_tars_shared_constants_7e5a2ecd__.MAX_IMAGE_LENGTH; images = images.slice(excessCount); let imageCountToRemove = excessCount; conversations = conversations.filter((convo)=>{ if (imageCountToRemove > 0 && convo.value === __WEBPACK_EXTERNAL_MODULE__ui_tars_shared_constants_7e5a2ecd__.IMAGE_PLACEHOLDER) { imageCountToRemove--; return false; } return true; }); } return { images, conversations }; }; const toVlmModelFormat = ({ conversations, systemPrompt })=>({ conversations: conversations.map((conv, idx)=>{ if (0 === idx && 'human' === conv.from) return { from: conv.from, value: `${systemPrompt}${conv.value}` }; return { from: conv.from, value: conv.value }; }), images: conversations.filter((conv)=>conv.value === __WEBPACK_EXTERNAL_MODULE__ui_tars_shared_constants_7e5a2ecd__.IMAGE_PLACEHOLDER && !!conv.screenshotBase64).map((conv)=>conv.screenshotBase64) }); const getSummary = (prediction)=>prediction.replace(/Reflection:[\s\S]*?(?=Action_Summary:|Action:|$)/g, '').trim(); const convertToOpenAIMessages = ({ conversations, images })=>{ const messages = []; let imageIndex = 0; conversations.forEach((conv)=>{ if (conv.value === __WEBPACK_EXTERNAL_MODULE__ui_tars_shared_constants_7e5a2ecd__.IMAGE_PLACEHOLDER) { if (imageIndex < images.length) { messages.push({ role: 'user', content: [ { type: 'image_url', image_url: { url: `data:image/png;base64,${images[imageIndex]}` } } ] }); imageIndex++; } } else messages.push({ role: 'human' === conv.from ? 'user' : 'assistant', content: conv.value }); }); return messages; }; function replaceBase64Prefix(base64) { return base64.replace(/^data:image\/\w+;base64,/, ''); } async function preprocessResizeImage(image_base64, maxPixels) { try { const imageBuffer = Buffer.from(image_base64, 'base64'); const image = await __WEBPACK_EXTERNAL_MODULE_jimp__.Jimp.read(imageBuffer); const { width, height } = image.bitmap; const currentPixels = width * height; if (currentPixels > maxPixels) { const resizeFactor = Math.sqrt(maxPixels / currentPixels); const newWidth = Math.floor(width * resizeFactor); const newHeight = Math.floor(height * resizeFactor); const resized = await image.resize({ w: newWidth, h: newHeight }).getBuffer('image/png', { quality: 60 }); return resized.toString('base64'); } const base64 = await image.getBase64('image/png', { quality: 60 }); return replaceBase64Prefix(base64); } catch (error) { console.error('preprocessResizeImage error:', error); throw error; } } export { convertToOpenAIMessages, getSummary, parseBoxToScreenCoords, preprocessResizeImage, processVlmParams, replaceBase64Prefix, toVlmModelFormat }; //# sourceMappingURL=utils.mjs.map