@ui-tars/sdk
Version:
A powerful cross-platform(ANY device/platform) toolkit for building GUI automation agents for UI-TARS
111 lines (110 loc) • 4.71 kB
JavaScript
/**
* Copyright (c) 2025 Bytedance, Inc. and its affiliates.
* SPDX-License-Identifier: Apache-2.0
*/
import * as __WEBPACK_EXTERNAL_MODULE_jimp__ from "jimp";
import * as __WEBPACK_EXTERNAL_MODULE__ui_tars_shared_constants_7e5a2ecd__ from "@ui-tars/shared/constants";
import * as __WEBPACK_EXTERNAL_MODULE__constants_mjs_225410ff__ from "./constants.mjs";
const parseBoxToScreenCoords = ({ boxStr, screenWidth, screenHeight, factors = __WEBPACK_EXTERNAL_MODULE__constants_mjs_225410ff__.DEFAULT_FACTORS })=>{
if (!boxStr) return {
x: null,
y: null
};
const coords = boxStr.replace('[', '').replace(']', '').split(',').map((num)=>parseFloat(num.trim()));
const [x1, y1, x2 = x1, y2 = y1] = coords;
const [widthFactor, heightFactor] = factors;
return {
x: Math.round((x1 + x2) / 2 * screenWidth * widthFactor) / widthFactor,
y: Math.round((y1 + y2) / 2 * screenHeight * heightFactor) / heightFactor
};
};
const processVlmParams = (conversations, images)=>{
if (images.length > __WEBPACK_EXTERNAL_MODULE__ui_tars_shared_constants_7e5a2ecd__.MAX_IMAGE_LENGTH) {
const excessCount = images.length - __WEBPACK_EXTERNAL_MODULE__ui_tars_shared_constants_7e5a2ecd__.MAX_IMAGE_LENGTH;
images = images.slice(excessCount);
let imageCountToRemove = excessCount;
conversations = conversations.filter((convo)=>{
if (imageCountToRemove > 0 && convo.value === __WEBPACK_EXTERNAL_MODULE__ui_tars_shared_constants_7e5a2ecd__.IMAGE_PLACEHOLDER) {
imageCountToRemove--;
return false;
}
return true;
});
}
return {
images,
conversations
};
};
const toVlmModelFormat = ({ conversations, systemPrompt })=>({
conversations: conversations.map((conv, idx)=>{
if (0 === idx && 'human' === conv.from) return {
from: conv.from,
value: `${systemPrompt}${conv.value}`
};
return {
from: conv.from,
value: conv.value
};
}),
images: conversations.filter((conv)=>conv.value === __WEBPACK_EXTERNAL_MODULE__ui_tars_shared_constants_7e5a2ecd__.IMAGE_PLACEHOLDER && !!conv.screenshotBase64).map((conv)=>conv.screenshotBase64)
});
const getSummary = (prediction)=>prediction.replace(/Reflection:[\s\S]*?(?=Action_Summary:|Action:|$)/g, '').trim();
const convertToOpenAIMessages = ({ conversations, images })=>{
const messages = [];
let imageIndex = 0;
conversations.forEach((conv)=>{
if (conv.value === __WEBPACK_EXTERNAL_MODULE__ui_tars_shared_constants_7e5a2ecd__.IMAGE_PLACEHOLDER) {
if (imageIndex < images.length) {
messages.push({
role: 'user',
content: [
{
type: 'image_url',
image_url: {
url: `data:image/png;base64,${images[imageIndex]}`
}
}
]
});
imageIndex++;
}
} else messages.push({
role: 'human' === conv.from ? 'user' : 'assistant',
content: conv.value
});
});
return messages;
};
function replaceBase64Prefix(base64) {
return base64.replace(/^data:image\/\w+;base64,/, '');
}
async function preprocessResizeImage(image_base64, maxPixels) {
try {
const imageBuffer = Buffer.from(image_base64, 'base64');
const image = await __WEBPACK_EXTERNAL_MODULE_jimp__.Jimp.read(imageBuffer);
const { width, height } = image.bitmap;
const currentPixels = width * height;
if (currentPixels > maxPixels) {
const resizeFactor = Math.sqrt(maxPixels / currentPixels);
const newWidth = Math.floor(width * resizeFactor);
const newHeight = Math.floor(height * resizeFactor);
const resized = await image.resize({
w: newWidth,
h: newHeight
}).getBuffer('image/png', {
quality: 60
});
return resized.toString('base64');
}
const base64 = await image.getBase64('image/png', {
quality: 60
});
return replaceBase64Prefix(base64);
} catch (error) {
console.error('preprocessResizeImage error:', error);
throw error;
}
}
export { convertToOpenAIMessages, getSummary, parseBoxToScreenCoords, preprocessResizeImage, processVlmParams, replaceBase64Prefix, toVlmModelFormat };
//# sourceMappingURL=utils.mjs.map