UNPKG

@ui-tars/sdk

Version:

A powerful cross-platform(ANY device/platform) toolkit for building GUI automation agents for UI-TARS

1 lines 8.72 kB
{"version":3,"file":"utils.mjs","sources":["webpack://@ui-tars/sdk/./src/utils.ts"],"sourcesContent":["/*\n * Copyright (c) 2025 Bytedance, Inc. and its affiliates.\n * SPDX-License-Identifier: Apache-2.0\n */\nimport { Jimp } from 'jimp';\nimport { ChatCompletionMessageParam } from 'openai/resources/chat/completions';\n\nimport { IMAGE_PLACEHOLDER, MAX_IMAGE_LENGTH } from '@ui-tars/shared/constants';\nimport { Conversation, Message } from '@ui-tars/shared/types';\nimport { DEFAULT_FACTORS, type Factors } from './constants';\n\n/**\n * Parse box string to screen coordinates\n *\n * e.g. '[0.131,0.25,0.131,0.25]' 2560x1440 -> { x: 335.36, y: 360 }\n *\n * @param boxStr box string\n * @param screenWidth screen width\n * @param screenHeight screen height\n * @param factors scaling factor, the training space of the target model.\n * @returns screen coordinates\n */\nexport const parseBoxToScreenCoords = ({\n boxStr,\n screenWidth,\n screenHeight,\n factors = DEFAULT_FACTORS,\n}: {\n boxStr: string;\n screenWidth: number;\n screenHeight: number;\n factors?: Factors;\n}) => {\n if (!boxStr) {\n return { x: null, y: null };\n }\n const coords = boxStr\n .replace('[', '')\n .replace(']', '')\n .split(',')\n .map((num) => parseFloat(num.trim()));\n\n const [x1, y1, x2 = x1, y2 = y1] = coords;\n const [widthFactor, heightFactor] = factors;\n\n return {\n x: Math.round(((x1 + x2) / 2) * screenWidth * widthFactor) / widthFactor,\n y: Math.round(((y1 + y2) / 2) * screenHeight * heightFactor) / heightFactor,\n };\n};\n\nexport const processVlmParams = (\n conversations: Message[],\n images: string[],\n) => {\n // Check if the images array exceeds the limit\n // TODO: configurable max image length\n if (images.length > MAX_IMAGE_LENGTH) {\n // Calculate the number of items to remove\n const excessCount = images.length - MAX_IMAGE_LENGTH;\n\n // Remove excess images from the start\n images = images.slice(excessCount);\n\n // Remove corresponding conversations where \"value\" is \"<image>\"\n let imageCountToRemove = excessCount;\n conversations = conversations.filter((convo) => {\n if (imageCountToRemove > 0 && convo.value === IMAGE_PLACEHOLDER) {\n imageCountToRemove--;\n return false;\n }\n return true;\n });\n }\n\n // Return the processed result\n return { images, conversations };\n};\n\nexport const toVlmModelFormat = ({\n conversations,\n systemPrompt,\n}: {\n conversations: Conversation[];\n systemPrompt: string;\n}): {\n conversations: Message[];\n images: string[];\n} => {\n return {\n conversations: conversations.map((conv, idx) => {\n if (idx === 0 && conv.from === 'human') {\n return {\n from: conv.from,\n value: `${systemPrompt}${conv.value}`,\n };\n }\n return {\n from: conv.from,\n value: conv.value,\n };\n }),\n images: conversations\n .filter(\n (conv) => conv.value === IMAGE_PLACEHOLDER && !!conv.screenshotBase64,\n )\n .map((conv) => conv.screenshotBase64!),\n };\n};\n\nexport const getSummary = (prediction: string) =>\n prediction\n .replace(/Reflection:[\\s\\S]*?(?=Action_Summary:|Action:|$)/g, '')\n .trim();\n\n/**\n * convert conversations to OpenAI ChatCompletionMessageParam\n * @param conversations conversations\n * @param images images\n * @returns OpenAI ChatCompletionMessageParam\n */\nexport const convertToOpenAIMessages = ({\n conversations,\n images,\n}: {\n conversations: Message[];\n images: string[];\n}): Array<ChatCompletionMessageParam> => {\n const messages: Array<ChatCompletionMessageParam> = [];\n let imageIndex = 0;\n\n conversations.forEach((conv) => {\n if (conv.value === IMAGE_PLACEHOLDER) {\n // handle image message\n if (imageIndex < images.length) {\n messages.push({\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: { url: `data:image/png;base64,${images[imageIndex]}` },\n },\n ],\n });\n imageIndex++;\n }\n } else {\n // handle text message\n messages.push({\n role: conv.from === 'human' ? 'user' : 'assistant',\n content: conv.value,\n });\n }\n });\n\n return messages;\n};\n\nexport function replaceBase64Prefix(base64: string) {\n return base64.replace(/^data:image\\/\\w+;base64,/, '');\n}\n\nexport async function preprocessResizeImage(\n image_base64: string,\n maxPixels: number,\n): Promise<string> {\n try {\n const imageBuffer = Buffer.from(image_base64, 'base64');\n\n const image = await Jimp.read(imageBuffer);\n const { width, height } = image.bitmap;\n\n const currentPixels = width * height;\n if (currentPixels > maxPixels) {\n const resizeFactor = Math.sqrt(maxPixels / currentPixels);\n const newWidth = Math.floor(width * resizeFactor);\n const newHeight = Math.floor(height * resizeFactor);\n\n const resized = await image\n .resize({\n w: newWidth,\n h: newHeight,\n })\n .getBuffer('image/png', { quality: 60 });\n\n return resized.toString('base64');\n }\n\n const base64 = await image.getBase64('image/png', { quality: 60 });\n\n return replaceBase64Prefix(base64);\n } catch (error) {\n console.error('preprocessResizeImage error:', error);\n throw error;\n }\n}\n"],"names":["parseBoxToScreenCoords","boxStr","screenWidth","screenHeight","factors","DEFAULT_FACTORS","coords","num","parseFloat","x1","y1","x2","y2","widthFactor","heightFactor","Math","processVlmParams","conversations","images","MAX_IMAGE_LENGTH","excessCount","imageCountToRemove","convo","IMAGE_PLACEHOLDER","toVlmModelFormat","systemPrompt","conv","idx","getSummary","prediction","convertToOpenAIMessages","messages","imageIndex","replaceBase64Prefix","base64","preprocessResizeImage","image_base64","maxPixels","imageBuffer","Buffer","image","Jimp","width","height","currentPixels","resizeFactor","newWidth","newHeight","resized","error","console"],"mappings":";;;;;;;AAsBO,MAAMA,yBAAyB,CAAC,EACrCC,MAAM,EACNC,WAAW,EACXC,YAAY,EACZC,UAAUC,oDAAAA,eAAe,EAM1B;IACC,IAAI,CAACJ,QACH,OAAO;QAAE,GAAG;QAAM,GAAG;IAAK;IAE5B,MAAMK,SAASL,OACZ,OAAO,CAAC,KAAK,IACb,OAAO,CAAC,KAAK,IACb,KAAK,CAAC,KACN,GAAG,CAAC,CAACM,MAAQC,WAAWD,IAAI,IAAI;IAEnC,MAAM,CAACE,IAAIC,IAAIC,KAAKF,EAAE,EAAEG,KAAKF,EAAE,CAAC,GAAGJ;IACnC,MAAM,CAACO,aAAaC,aAAa,GAAGV;IAEpC,OAAO;QACL,GAAGW,KAAK,KAAK,CAAGN,AAAAA,CAAAA,KAAKE,EAAC,IAAK,IAAKT,cAAcW,eAAeA;QAC7D,GAAGE,KAAK,KAAK,CAAGL,AAAAA,CAAAA,KAAKE,EAAC,IAAK,IAAKT,eAAeW,gBAAgBA;IACjE;AACF;AAEO,MAAME,mBAAmB,CAC9BC,eACAC;IAIA,IAAIA,OAAO,MAAM,GAAGC,+DAAAA,gBAAgBA,EAAE;QAEpC,MAAMC,cAAcF,OAAO,MAAM,GAAGC,+DAAAA,gBAAgBA;QAGpDD,SAASA,OAAO,KAAK,CAACE;QAGtB,IAAIC,qBAAqBD;QACzBH,gBAAgBA,cAAc,MAAM,CAAC,CAACK;YACpC,IAAID,qBAAqB,KAAKC,MAAM,KAAK,KAAKC,+DAAAA,iBAAiBA,EAAE;gBAC/DF;gBACA,OAAO;YACT;YACA,OAAO;QACT;IACF;IAGA,OAAO;QAAEH;QAAQD;IAAc;AACjC;AAEO,MAAMO,mBAAmB,CAAC,EAC/BP,aAAa,EACbQ,YAAY,EAIb,GAIQ;QACL,eAAeR,cAAc,GAAG,CAAC,CAACS,MAAMC;YACtC,IAAIA,AAAQ,MAARA,OAAaD,AAAc,YAAdA,KAAK,IAAI,EACxB,OAAO;gBACL,MAAMA,KAAK,IAAI;gBACf,OAAO,GAAGD,eAAeC,KAAK,KAAK,EAAE;YACvC;YAEF,OAAO;gBACL,MAAMA,KAAK,IAAI;gBACf,OAAOA,KAAK,KAAK;YACnB;QACF;QACA,QAAQT,cACL,MAAM,CACL,CAACS,OAASA,KAAK,KAAK,KAAKH,+DAAAA,iBAAiBA,IAAI,CAAC,CAACG,KAAK,gBAAgB,EAEtE,GAAG,CAAC,CAACA,OAASA,KAAK,gBAAgB;IACxC;AAGK,MAAME,aAAa,CAACC,aACzBA,WACG,OAAO,CAAC,qDAAqD,IAC7D,IAAI;AAQF,MAAMC,0BAA0B,CAAC,EACtCb,aAAa,EACbC,MAAM,EAIP;IACC,MAAMa,WAA8C,EAAE;IACtD,IAAIC,aAAa;IAEjBf,cAAc,OAAO,CAAC,CAACS;QACrB,IAAIA,KAAK,KAAK,KAAKH,+DAAAA,iBAAiBA,EAElC;YAAA,IAAIS,aAAad,OAAO,MAAM,EAAE;gBAC9Ba,SAAS,IAAI,CAAC;oBACZ,MAAM;oBACN,SAAS;wBACP;4BACE,MAAM;4BACN,WAAW;gCAAE,KAAK,CAAC,sBAAsB,EAAEb,MAAM,CAACc,WAAW,EAAE;4BAAC;wBAClE;qBACD;gBACH;gBACAA;YACF;QAAA,OAGAD,SAAS,IAAI,CAAC;YACZ,MAAML,AAAc,YAAdA,KAAK,IAAI,GAAe,SAAS;YACvC,SAASA,KAAK,KAAK;QACrB;IAEJ;IAEA,OAAOK;AACT;AAEO,SAASE,oBAAoBC,MAAc;IAChD,OAAOA,OAAO,OAAO,CAAC,4BAA4B;AACpD;AAEO,eAAeC,sBACpBC,YAAoB,EACpBC,SAAiB;IAEjB,IAAI;QACF,MAAMC,cAAcC,OAAO,IAAI,CAACH,cAAc;QAE9C,MAAMI,QAAQ,MAAMC,iCAAAA,IAAAA,CAAAA,IAAS,CAACH;QAC9B,MAAM,EAAEI,KAAK,EAAEC,MAAM,EAAE,GAAGH,MAAM,MAAM;QAEtC,MAAMI,gBAAgBF,QAAQC;QAC9B,IAAIC,gBAAgBP,WAAW;YAC7B,MAAMQ,eAAe9B,KAAK,IAAI,CAACsB,YAAYO;YAC3C,MAAME,WAAW/B,KAAK,KAAK,CAAC2B,QAAQG;YACpC,MAAME,YAAYhC,KAAK,KAAK,CAAC4B,SAASE;YAEtC,MAAMG,UAAU,MAAMR,MACnB,MAAM,CAAC;gBACN,GAAGM;gBACH,GAAGC;YACL,GACC,SAAS,CAAC,aAAa;gBAAE,SAAS;YAAG;YAExC,OAAOC,QAAQ,QAAQ,CAAC;QAC1B;QAEA,MAAMd,SAAS,MAAMM,MAAM,SAAS,CAAC,aAAa;YAAE,SAAS;QAAG;QAEhE,OAAOP,oBAAoBC;IAC7B,EAAE,OAAOe,OAAO;QACdC,QAAQ,KAAK,CAAC,gCAAgCD;QAC9C,MAAMA;IACR;AACF"}