@ui-tars/sdk
Version:
A powerful cross-platform(ANY device/platform) toolkit for building GUI automation agents for UI-TARS
1 lines • 13.4 kB
Source Map (JSON)
{"version":3,"file":"utils.mjs","sources":["webpack://@ui-tars/sdk/./src/utils.ts"],"sourcesContent":["/*\n * Copyright (c) 2025 Bytedance, Inc. and its affiliates.\n * SPDX-License-Identifier: Apache-2.0\n */\nimport { Jimp } from 'jimp';\nimport { ChatCompletionMessageParam } from 'openai/resources/chat/completions';\n\nimport { IMAGE_PLACEHOLDER, MAX_IMAGE_LENGTH } from '@ui-tars/shared/constants';\nimport { Conversation, Message } from '@ui-tars/shared/types';\nimport { DEFAULT_FACTORS, type Factors } from './constants';\nimport {\n ResponseInput,\n ResponseInputImage,\n ResponseInputItem,\n ResponseInputText,\n} from 'openai/resources/responses/responses.js';\n\n/**\n * Parse box string to screen coordinates\n *\n * e.g. '[0.131,0.25,0.131,0.25]' 2560x1440 -> { x: 335.36, y: 360 }\n *\n * @param boxStr box string\n * @param screenWidth screen width\n * @param screenHeight screen height\n * @param factors scaling factor, the training space of the target model.\n * @returns screen coordinates\n */\nexport const parseBoxToScreenCoords = ({\n boxStr,\n screenWidth,\n screenHeight,\n factors = DEFAULT_FACTORS,\n}: {\n boxStr: string;\n screenWidth: number;\n screenHeight: number;\n factors?: Factors;\n}) => {\n if (!boxStr) {\n return { x: null, y: null };\n }\n const coords = boxStr\n .replace('[', '')\n .replace(']', '')\n .split(',')\n .map((num) => parseFloat(num.trim()));\n\n const [x1, y1, x2 = x1, y2 = y1] = coords;\n const [widthFactor, heightFactor] = factors;\n\n return {\n x: Math.round(((x1 + x2) / 2) * screenWidth * widthFactor) / widthFactor,\n y: Math.round(((y1 + y2) / 2) * screenHeight * heightFactor) / heightFactor,\n };\n};\n\nexport const processVlmParams = (\n conversations: Message[],\n images: string[],\n maxImageLength: number = MAX_IMAGE_LENGTH,\n): {\n images: string[];\n conversations: Message[];\n} => {\n // Check if the images array exceeds the limit\n // TODO: configurable max image length\n if (images.length > maxImageLength) {\n // Calculate the number of items to remove\n const excessCount = images.length - maxImageLength;\n\n // Remove excess images from the start\n images = images.slice(excessCount);\n\n // Remove corresponding conversations where \"value\" is \"<image>\"\n let imageCountToRemove = excessCount;\n conversations = conversations.filter((convo) => {\n if (imageCountToRemove > 0 && convo.value === IMAGE_PLACEHOLDER) {\n imageCountToRemove--;\n return false;\n }\n return true;\n });\n }\n\n // Return the processed result\n return { images, conversations };\n};\n\nexport const toVlmModelFormat = ({\n historyMessages,\n conversations,\n systemPrompt,\n}: {\n historyMessages: Message[];\n conversations: Conversation[];\n systemPrompt: string;\n}): {\n conversations: Message[];\n images: string[];\n} => {\n const USER_INSTRUCTION_MARKER = '## User Instruction';\n const history = formatHistoryMessages(historyMessages);\n return {\n conversations: conversations.map((conv, idx) => {\n if (idx === 0 && conv.from === 'human') {\n let newValue = '';\n if (systemPrompt.includes(USER_INSTRUCTION_MARKER)) {\n const insertIndex = systemPrompt.lastIndexOf(USER_INSTRUCTION_MARKER);\n const slicedPrefix = systemPrompt.slice(0, insertIndex);\n const slicedSuffix = systemPrompt.slice(insertIndex);\n newValue =\n slicedPrefix +\n (slicedPrefix.endsWith('\\n') ? '' : '\\n') +\n history +\n '\\n' +\n slicedSuffix +\n (slicedSuffix.endsWith('\\n') ? '' : '\\n') +\n conv.value;\n } else {\n newValue = `${systemPrompt}\\n${history}\\n${USER_INSTRUCTION_MARKER}\\n${conv.value}`;\n }\n return {\n from: conv.from,\n value: newValue,\n };\n }\n return {\n from: conv.from,\n value: conv.value,\n };\n }),\n images: conversations\n .filter(\n (conv) => conv.value === IMAGE_PLACEHOLDER && !!conv.screenshotBase64,\n )\n .map((conv) => conv.screenshotBase64!),\n };\n};\n\nexport const getSummary = (prediction: string) =>\n prediction\n .replace(/Reflection:[\\s\\S]*?(?=Action_Summary:|Action:|$)/g, '')\n .trim();\n\n/**\n * convert conversations to OpenAI ChatCompletionMessageParam\n * @param conversations conversations\n * @param images images\n * @returns OpenAI ChatCompletionMessageParam\n */\nexport const convertToOpenAIMessages = ({\n conversations,\n images,\n}: {\n conversations: Message[];\n images: string[];\n}): Array<ChatCompletionMessageParam> => {\n const messages: Array<ChatCompletionMessageParam> = [];\n let imageIndex = 0;\n\n conversations.forEach((conv) => {\n if (conv.value === IMAGE_PLACEHOLDER) {\n // handle image message\n if (imageIndex < images.length) {\n messages.push({\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: { url: `data:image/png;base64,${images[imageIndex]}` },\n },\n ],\n });\n imageIndex++;\n }\n } else {\n // handle text message\n messages.push({\n role: conv.from === 'human' ? 'user' : 'assistant',\n content: conv.value,\n });\n }\n });\n\n return messages;\n};\n\nexport function replaceBase64Prefix(base64: string) {\n return base64.replace(/^data:image\\/\\w+;base64,/, '');\n}\n\nexport async function preprocessResizeImage(\n image_base64: string,\n maxPixels: number,\n): Promise<string> {\n try {\n const imageBuffer = Buffer.from(image_base64, 'base64');\n\n const image = await Jimp.read(imageBuffer);\n const { width, height } = image.bitmap;\n\n const currentPixels = width * height;\n if (currentPixels > maxPixels) {\n const resizeFactor = Math.sqrt(maxPixels / currentPixels);\n const newWidth = Math.floor(width * resizeFactor);\n const newHeight = Math.floor(height * resizeFactor);\n\n const resized = await image\n .resize({\n w: newWidth,\n h: newHeight,\n })\n .getBuffer('image/png', { quality: 60 });\n\n return resized.toString('base64');\n }\n\n const base64 = await image.getBase64('image/png', { quality: 60 });\n\n return replaceBase64Prefix(base64);\n } catch (error) {\n console.error('preprocessResizeImage error:', error);\n throw error;\n }\n}\n\nfunction formatHistoryMessages(messages: Message[]): string {\n const lastMessages = messages.slice(-30);\n\n const lines = lastMessages.map((msg) => {\n const role = msg.from === 'human' ? 'human' : 'assistant';\n return `${role}: ${msg.value}`;\n });\n\n // human: xxx, assistant: xxx.\n // const formattedLines = lines.map((line) => {\n // if (line.startsWith('human:')) {\n // return line + ',';\n // } else {\n // return line + '.';\n // }\n // });\n\n return '## History Messages\\n' + lines.join('\\n') + '\\n';\n}\n\n/**\n * convert ChatCompletionMessageParam to Response API input\n * @param messages messages\n * @returns Response API input\n */\nexport const convertToResponseApiInput = (\n messages: ChatCompletionMessageParam[],\n): ResponseInput => {\n return messages.map((message) => {\n if (Array.isArray(message?.content) && message?.content.length > 0) {\n const content = message.content.map((item) => {\n if (item.type === 'image_url' && item.image_url?.url) {\n return {\n type: 'input_image',\n image_url: item.image_url.url,\n } as ResponseInputImage;\n }\n return item;\n });\n return {\n role: message.role,\n content,\n } as ResponseInputItem.Message;\n }\n\n return message as unknown as ResponseInputItem.Message;\n });\n};\n\n/**\n * check if the message is an image message\n * @param c message\n * @returns true if the message is an image message\n */\nexport const isMessageImage = (\n c: ChatCompletionMessageParam | ResponseInputItem,\n) =>\n 'role' in c &&\n c.role === 'user' &&\n Array.isArray(c.content) &&\n c.content.some(\n (item) =>\n (item.type === 'image_url' && item.image_url?.url) ||\n (item.type === 'input_image' && item.image_url),\n );\n"],"names":["parseBoxToScreenCoords","boxStr","screenWidth","screenHeight","factors","DEFAULT_FACTORS","coords","num","parseFloat","x1","y1","x2","y2","widthFactor","heightFactor","Math","processVlmParams","conversations","images","maxImageLength","MAX_IMAGE_LENGTH","excessCount","imageCountToRemove","convo","IMAGE_PLACEHOLDER","toVlmModelFormat","historyMessages","systemPrompt","USER_INSTRUCTION_MARKER","history","formatHistoryMessages","conv","idx","newValue","insertIndex","slicedPrefix","slicedSuffix","getSummary","prediction","convertToOpenAIMessages","messages","imageIndex","replaceBase64Prefix","base64","preprocessResizeImage","image_base64","maxPixels","imageBuffer","Buffer","image","Jimp","width","height","currentPixels","resizeFactor","newWidth","newHeight","resized","error","console","lastMessages","lines","msg","role","convertToResponseApiInput","message","Array","content","item","_item_image_url","isMessageImage","c"],"mappings":";;;;;;;AA4BO,MAAMA,yBAAyB,CAAC,EACrCC,MAAM,EACNC,WAAW,EACXC,YAAY,EACZC,UAAUC,eAAe,EAM1B;IACC,IAAI,CAACJ,QACH,OAAO;QAAE,GAAG;QAAM,GAAG;IAAK;IAE5B,MAAMK,SAASL,OACZ,OAAO,CAAC,KAAK,IACb,OAAO,CAAC,KAAK,IACb,KAAK,CAAC,KACN,GAAG,CAAC,CAACM,MAAQC,WAAWD,IAAI,IAAI;IAEnC,MAAM,CAACE,IAAIC,IAAIC,KAAKF,EAAE,EAAEG,KAAKF,EAAE,CAAC,GAAGJ;IACnC,MAAM,CAACO,aAAaC,aAAa,GAAGV;IAEpC,OAAO;QACL,GAAGW,KAAK,KAAK,CAAGN,AAAAA,CAAAA,KAAKE,EAAC,IAAK,IAAKT,cAAcW,eAAeA;QAC7D,GAAGE,KAAK,KAAK,CAAGL,AAAAA,CAAAA,KAAKE,EAAC,IAAK,IAAKT,eAAeW,gBAAgBA;IACjE;AACF;AAEO,MAAME,mBAAmB,CAC9BC,eACAC,QACAC,iBAAyBC,gBAAgB;IAOzC,IAAIF,OAAO,MAAM,GAAGC,gBAAgB;QAElC,MAAME,cAAcH,OAAO,MAAM,GAAGC;QAGpCD,SAASA,OAAO,KAAK,CAACG;QAGtB,IAAIC,qBAAqBD;QACzBJ,gBAAgBA,cAAc,MAAM,CAAC,CAACM;YACpC,IAAID,qBAAqB,KAAKC,MAAM,KAAK,KAAKC,mBAAmB;gBAC/DF;gBACA,OAAO;YACT;YACA,OAAO;QACT;IACF;IAGA,OAAO;QAAEJ;QAAQD;IAAc;AACjC;AAEO,MAAMQ,mBAAmB,CAAC,EAC/BC,eAAe,EACfT,aAAa,EACbU,YAAY,EAKb;IAIC,MAAMC,0BAA0B;IAChC,MAAMC,UAAUC,sBAAsBJ;IACtC,OAAO;QACL,eAAeT,cAAc,GAAG,CAAC,CAACc,MAAMC;YACtC,IAAIA,AAAQ,MAARA,OAAaD,AAAc,YAAdA,KAAK,IAAI,EAAc;gBACtC,IAAIE,WAAW;gBACf,IAAIN,aAAa,QAAQ,CAACC,0BAA0B;oBAClD,MAAMM,cAAcP,aAAa,WAAW,CAACC;oBAC7C,MAAMO,eAAeR,aAAa,KAAK,CAAC,GAAGO;oBAC3C,MAAME,eAAeT,aAAa,KAAK,CAACO;oBACxCD,WACEE,eACCA,CAAAA,aAAa,QAAQ,CAAC,QAAQ,KAAK,IAAG,IACvCN,UACA,OACAO,eACCA,CAAAA,aAAa,QAAQ,CAAC,QAAQ,KAAK,IAAG,IACvCL,KAAK,KAAK;gBACd,OACEE,WAAW,GAAGN,aAAa,EAAE,EAAEE,QAAQ,EAAE,EAAED,wBAAwB,EAAE,EAAEG,KAAK,KAAK,EAAE;gBAErF,OAAO;oBACL,MAAMA,KAAK,IAAI;oBACf,OAAOE;gBACT;YACF;YACA,OAAO;gBACL,MAAMF,KAAK,IAAI;gBACf,OAAOA,KAAK,KAAK;YACnB;QACF;QACA,QAAQd,cACL,MAAM,CACL,CAACc,OAASA,KAAK,KAAK,KAAKP,qBAAqB,CAAC,CAACO,KAAK,gBAAgB,EAEtE,GAAG,CAAC,CAACA,OAASA,KAAK,gBAAgB;IACxC;AACF;AAEO,MAAMM,aAAa,CAACC,aACzBA,WACG,OAAO,CAAC,qDAAqD,IAC7D,IAAI;AAQF,MAAMC,0BAA0B,CAAC,EACtCtB,aAAa,EACbC,MAAM,EAIP;IACC,MAAMsB,WAA8C,EAAE;IACtD,IAAIC,aAAa;IAEjBxB,cAAc,OAAO,CAAC,CAACc;QACrB,IAAIA,KAAK,KAAK,KAAKP,mBAEjB;YAAA,IAAIiB,aAAavB,OAAO,MAAM,EAAE;gBAC9BsB,SAAS,IAAI,CAAC;oBACZ,MAAM;oBACN,SAAS;wBACP;4BACE,MAAM;4BACN,WAAW;gCAAE,KAAK,CAAC,sBAAsB,EAAEtB,MAAM,CAACuB,WAAW,EAAE;4BAAC;wBAClE;qBACD;gBACH;gBACAA;YACF;QAAA,OAGAD,SAAS,IAAI,CAAC;YACZ,MAAMT,AAAc,YAAdA,KAAK,IAAI,GAAe,SAAS;YACvC,SAASA,KAAK,KAAK;QACrB;IAEJ;IAEA,OAAOS;AACT;AAEO,SAASE,oBAAoBC,MAAc;IAChD,OAAOA,OAAO,OAAO,CAAC,4BAA4B;AACpD;AAEO,eAAeC,sBACpBC,YAAoB,EACpBC,SAAiB;IAEjB,IAAI;QACF,MAAMC,cAAcC,OAAO,IAAI,CAACH,cAAc;QAE9C,MAAMI,QAAQ,MAAMC,KAAK,IAAI,CAACH;QAC9B,MAAM,EAAEI,KAAK,EAAEC,MAAM,EAAE,GAAGH,MAAM,MAAM;QAEtC,MAAMI,gBAAgBF,QAAQC;QAC9B,IAAIC,gBAAgBP,WAAW;YAC7B,MAAMQ,eAAevC,KAAK,IAAI,CAAC+B,YAAYO;YAC3C,MAAME,WAAWxC,KAAK,KAAK,CAACoC,QAAQG;YACpC,MAAME,YAAYzC,KAAK,KAAK,CAACqC,SAASE;YAEtC,MAAMG,UAAU,MAAMR,MACnB,MAAM,CAAC;gBACN,GAAGM;gBACH,GAAGC;YACL,GACC,SAAS,CAAC,aAAa;gBAAE,SAAS;YAAG;YAExC,OAAOC,QAAQ,QAAQ,CAAC;QAC1B;QAEA,MAAMd,SAAS,MAAMM,MAAM,SAAS,CAAC,aAAa;YAAE,SAAS;QAAG;QAEhE,OAAOP,oBAAoBC;IAC7B,EAAE,OAAOe,OAAO;QACdC,QAAQ,KAAK,CAAC,gCAAgCD;QAC9C,MAAMA;IACR;AACF;AAEA,SAAS5B,sBAAsBU,QAAmB;IAChD,MAAMoB,eAAepB,SAAS,KAAK,CAAC;IAEpC,MAAMqB,QAAQD,aAAa,GAAG,CAAC,CAACE;QAC9B,MAAMC,OAAOD,AAAa,YAAbA,IAAI,IAAI,GAAe,UAAU;QAC9C,OAAO,GAAGC,KAAK,EAAE,EAAED,IAAI,KAAK,EAAE;IAChC;IAWA,OAAO,0BAA0BD,MAAM,IAAI,CAAC,QAAQ;AACtD;AAOO,MAAMG,4BAA4B,CACvCxB,WAEOA,SAAS,GAAG,CAAC,CAACyB;QACnB,IAAIC,MAAM,OAAO,CAACD,QAAAA,UAAAA,KAAAA,IAAAA,QAAS,OAAO,KAAKA,AAAAA,CAAAA,QAAAA,UAAAA,KAAAA,IAAAA,QAAS,OAAO,CAAC,MAAM,AAAD,IAAI,GAAG;YAClE,MAAME,UAAUF,QAAQ,OAAO,CAAC,GAAG,CAAC,CAACG;oBACFC;gBAAjC,IAAID,AAAc,gBAAdA,KAAK,IAAI,IAAK,SAAeC,CAAAA,kBAAAA,KAAK,SAAS,AAAD,IAAbA,KAAAA,IAAAA,gBAAgB,GAAG,AAAD,GACjD,OAAO;oBACL,MAAM;oBACN,WAAWD,KAAK,SAAS,CAAC,GAAG;gBAC/B;gBAEF,OAAOA;YACT;YACA,OAAO;gBACL,MAAMH,QAAQ,IAAI;gBAClBE;YACF;QACF;QAEA,OAAOF;IACT;AAQK,MAAMK,iBAAiB,CAC5BC,IAEA,UAAUA,KACVA,AAAW,WAAXA,EAAE,IAAI,IACNL,MAAM,OAAO,CAACK,EAAE,OAAO,KACvBA,EAAE,OAAO,CAAC,IAAI,CACZ,CAACH;YAC+BC;eAA7BD,AAAc,gBAAdA,KAAK,IAAI,IAAK,SAAeC,CAAAA,kBAAAA,KAAK,SAAS,AAAD,IAAbA,KAAAA,IAAAA,gBAAgB,GAAG,AAAD,KAC/CD,AAAc,kBAAdA,KAAK,IAAI,IAAsBA,KAAK,SAAS"}