@ui-tars/action-parser
Version:
Action parser SDK for UI-TARS
145 lines (144 loc) • 5 kB
JavaScript
var __defProp = Object.defineProperty;
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
var __getOwnPropNames = Object.getOwnPropertyNames;
var __hasOwnProp = Object.prototype.hasOwnProperty;
var __export = (target, all) => {
for (var name in all)
__defProp(target, name, { get: all[name], enumerable: true });
};
var __copyProps = (to, from, except, desc) => {
if (from && typeof from === "object" || typeof from === "function") {
for (let key of __getOwnPropNames(from))
if (!__hasOwnProp.call(to, key) && key !== except)
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
}
return to;
};
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
var actionParser_exports = {};
__export(actionParser_exports, {
actionParser: () => actionParser,
parseActionVlm: () => parseActionVlm
});
module.exports = __toCommonJS(actionParser_exports);
function actionParser(params) {
const { prediction, factor } = params;
const parsed = parseActionVlm(prediction, factor);
return {
parsed
};
}
function parseActionVlm(text, factor = 1e3, mode = "bc") {
let reflection = null;
let thought = null;
let actionStr = "";
text = text.trim();
if (mode === "bc") {
if (text.startsWith("Thought:")) {
const thoughtMatch = text.match(/Thought: ([\s\S]+?)(?=\s*Action:|$)/);
if (thoughtMatch) {
thought = thoughtMatch[1].trim();
}
} else if (text.startsWith("Reflection:")) {
const reflectionMatch = text.match(
/Reflection: ([\s\S]+?)Action_Summary: ([\s\S]+?)(?=\s*Action:|$)/
);
if (reflectionMatch) {
thought = reflectionMatch[2].trim();
reflection = reflectionMatch[1].trim();
}
} else if (text.startsWith("Action_Summary:")) {
const summaryMatch = text.match(/Action_Summary: (.+?)(?=\s*Action:|$)/);
if (summaryMatch) {
thought = summaryMatch[1].trim();
}
}
if (!text.includes("Action:")) {
actionStr = text;
} else {
const actionParts = text.split("Action:");
actionStr = actionParts[actionParts.length - 1];
}
} else if (mode === "o1") {
const thoughtMatch = text.match(/<Thought>\s*(.*?)\s*<\/Thought>/);
const actionSummaryMatch = text.match(
/\nAction_Summary:\s*(.*?)\s*Action:/
);
const actionMatch = text.match(/\nAction:\s*(.*?)\s*<\/Output>/);
const thoughtContent = thoughtMatch ? thoughtMatch[1] : null;
const actionSummaryContent = actionSummaryMatch ? actionSummaryMatch[1] : null;
const actionContent = actionMatch ? actionMatch[1] : null;
thought = `${thoughtContent}
<Action_Summary>
${actionSummaryContent}`;
actionStr = actionContent || "";
}
const allActions = actionStr.split("\n\n");
const actions = [];
for (const rawStr of allActions) {
const actionInstance = parseAction(rawStr.replace(/\n/g, String.raw`\n`).trimStart());
let actionType = "";
let actionInputs = {};
if (actionInstance) {
actionType = actionInstance.function;
const params = actionInstance.args;
actionInputs = {};
for (const [paramName, param] of Object.entries(params)) {
if (!param) continue;
const trimmedParam = param.trim();
actionInputs[paramName.trim()] = trimmedParam;
if (paramName.includes("start_box") || paramName.includes("end_box")) {
const oriBox = trimmedParam;
const numbers = oriBox.replace(/[()[\]]/g, "").split(",");
const floatNumbers = numbers.map(
(num) => Number.parseFloat(num) / factor
);
if (floatNumbers.length === 2) {
floatNumbers.push(floatNumbers[0], floatNumbers[1]);
}
actionInputs[paramName.trim()] = JSON.stringify(floatNumbers);
}
}
}
actions.push({
reflection,
thought: thought || "",
action_type: actionType,
action_inputs: actionInputs
});
}
return actions;
}
function parseAction(actionStr) {
try {
const functionPattern = /^(\w+)\((.*)\)$/;
const match = actionStr.trim().match(functionPattern);
if (!match) {
throw new Error("Not a function call");
}
const [_, functionName, argsStr] = match;
const kwargs = {};
if (argsStr.trim()) {
const argPairs = argsStr.match(/([^,']|'[^']*')+/g) || [];
for (const pair of argPairs) {
const [key, ...valueParts] = pair.split("=");
if (!key) continue;
const value = valueParts.join("=").trim().replace(/^['"]|['"]$/g, "");
kwargs[key.trim()] = value;
}
}
return {
function: functionName,
args: kwargs
};
} catch (e) {
console.error(`Failed to parse action '${actionStr}': ${e}`);
return null;
}
}
// Annotate the CommonJS export names for ESM import in node:
0 && (module.exports = {
actionParser,
parseActionVlm
});
;