UNPKG

@ui-tars/action-parser

Version:
145 lines (144 loc) 5 kB
"use strict"; var __defProp = Object.defineProperty; var __getOwnPropDesc = Object.getOwnPropertyDescriptor; var __getOwnPropNames = Object.getOwnPropertyNames; var __hasOwnProp = Object.prototype.hasOwnProperty; var __export = (target, all) => { for (var name in all) __defProp(target, name, { get: all[name], enumerable: true }); }; var __copyProps = (to, from, except, desc) => { if (from && typeof from === "object" || typeof from === "function") { for (let key of __getOwnPropNames(from)) if (!__hasOwnProp.call(to, key) && key !== except) __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable }); } return to; }; var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod); var actionParser_exports = {}; __export(actionParser_exports, { actionParser: () => actionParser, parseActionVlm: () => parseActionVlm }); module.exports = __toCommonJS(actionParser_exports); function actionParser(params) { const { prediction, factor } = params; const parsed = parseActionVlm(prediction, factor); return { parsed }; } function parseActionVlm(text, factor = 1e3, mode = "bc") { let reflection = null; let thought = null; let actionStr = ""; text = text.trim(); if (mode === "bc") { if (text.startsWith("Thought:")) { const thoughtMatch = text.match(/Thought: ([\s\S]+?)(?=\s*Action:|$)/); if (thoughtMatch) { thought = thoughtMatch[1].trim(); } } else if (text.startsWith("Reflection:")) { const reflectionMatch = text.match( /Reflection: ([\s\S]+?)Action_Summary: ([\s\S]+?)(?=\s*Action:|$)/ ); if (reflectionMatch) { thought = reflectionMatch[2].trim(); reflection = reflectionMatch[1].trim(); } } else if (text.startsWith("Action_Summary:")) { const summaryMatch = text.match(/Action_Summary: (.+?)(?=\s*Action:|$)/); if (summaryMatch) { thought = summaryMatch[1].trim(); } } if (!text.includes("Action:")) { actionStr = text; } else { const actionParts = text.split("Action:"); actionStr = actionParts[actionParts.length - 1]; } } else if (mode === "o1") { const thoughtMatch = text.match(/<Thought>\s*(.*?)\s*<\/Thought>/); const actionSummaryMatch = text.match( /\nAction_Summary:\s*(.*?)\s*Action:/ ); const actionMatch = text.match(/\nAction:\s*(.*?)\s*<\/Output>/); const thoughtContent = thoughtMatch ? thoughtMatch[1] : null; const actionSummaryContent = actionSummaryMatch ? actionSummaryMatch[1] : null; const actionContent = actionMatch ? actionMatch[1] : null; thought = `${thoughtContent} <Action_Summary> ${actionSummaryContent}`; actionStr = actionContent || ""; } const allActions = actionStr.split("\n\n"); const actions = []; for (const rawStr of allActions) { const actionInstance = parseAction(rawStr.replace(/\n/g, String.raw`\n`).trimStart()); let actionType = ""; let actionInputs = {}; if (actionInstance) { actionType = actionInstance.function; const params = actionInstance.args; actionInputs = {}; for (const [paramName, param] of Object.entries(params)) { if (!param) continue; const trimmedParam = param.trim(); actionInputs[paramName.trim()] = trimmedParam; if (paramName.includes("start_box") || paramName.includes("end_box")) { const oriBox = trimmedParam; const numbers = oriBox.replace(/[()[\]]/g, "").split(","); const floatNumbers = numbers.map( (num) => Number.parseFloat(num) / factor ); if (floatNumbers.length === 2) { floatNumbers.push(floatNumbers[0], floatNumbers[1]); } actionInputs[paramName.trim()] = JSON.stringify(floatNumbers); } } } actions.push({ reflection, thought: thought || "", action_type: actionType, action_inputs: actionInputs }); } return actions; } function parseAction(actionStr) { try { const functionPattern = /^(\w+)\((.*)\)$/; const match = actionStr.trim().match(functionPattern); if (!match) { throw new Error("Not a function call"); } const [_, functionName, argsStr] = match; const kwargs = {}; if (argsStr.trim()) { const argPairs = argsStr.match(/([^,']|'[^']*')+/g) || []; for (const pair of argPairs) { const [key, ...valueParts] = pair.split("="); if (!key) continue; const value = valueParts.join("=").trim().replace(/^['"]|['"]$/g, ""); kwargs[key.trim()] = value; } } return { function: functionName, args: kwargs }; } catch (e) { console.error(`Failed to parse action '${actionStr}': ${e}`); return null; } } // Annotate the CommonJS export names for ESM import in node: 0 && (module.exports = { actionParser, parseActionVlm });