@visactor/vmind
Version:
<div align="center"> <a href="https://github.com/VisActor#gh-light-mode-only" target="_blank"> <img alt="VisActor Logo" width="200" src="https://github.com/VisActor/.github/blob/main/profile/logo_500_200_light.svg"/> </a> <a href="https://githu
123 lines (117 loc) • 6.46 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", {
value: !0
}), exports.registerDataExtractionAtom = exports.DataExtractionAtom = void 0;
const atom_1 = require("../../types/atom"), base_1 = require("../base"), vutils_1 = require("@visactor/vutils"), prompt_1 = require("./prompt/prompt"), text_1 = require("../../utils/text"), field_1 = require("../../utils/field"), utils_1 = require("../dataClean/utils"), types_1 = require("../../types"), factory_1 = require("../../core/factory");
class DataExtractionAtom extends base_1.BaseAtom {
constructor(context, option) {
super(context, option), this.name = atom_1.AtomName.DATA_EXTRACT, this.isLLMAtom = !0;
const currentYear = (new Date).getFullYear();
this.replaceData = [ {
template: "今年",
replace: `${currentYear}年`
}, {
template: "去年",
replace: currentYear - 1 + "年"
}, {
template: "前年",
replace: currentYear - 2 + "年"
} ];
}
buildDefaultContext(context) {
return (0, vutils_1.merge)({}, {
dataTable: [],
fieldInfo: []
}, context);
}
buildDefaultOptions() {
return Object.assign(Object.assign({}, super.buildDefaultOptions()), {
reGenerateFieldInfo: !0,
isMultiple: !1
});
}
shouldRunByContextUpdate(context) {
return context.text !== this.context.text || context.fieldInfo !== this.context.fieldInfo;
}
revisedText(text) {
let newText = text;
return this.isTextReplaceStatus = [], this.replaceData.forEach((v => {
newText = newText.replaceAll(v.template, v.replace), this.isTextReplaceStatus.push(text.includes(v.template));
})), `text: ${newText}`;
}
getLLMMessages(query) {
var _a, _b;
const {fieldInfo: fieldInfo, text: text} = this.context, {showThoughts: showThoughts, reGenerateFieldInfo: reGenerateFieldInfo, llm: llm, isMultiple: isMultiple} = this.options, addtionContent = this.getHistoryLLMMessages(query), language = null !== (_b = null === (_a = this.options) || void 0 === _a ? void 0 : _a.language) && void 0 !== _b ? _b : (0,
text_1.getLanguageOfText)(text);
if (!fieldInfo || !(null == fieldInfo ? void 0 : fieldInfo.length)) return [ {
role: "system",
content: (0, prompt_1.getBasePrompt)(llm.options.model, language, isMultiple, showThoughts)
}, {
role: "user",
content: this.revisedText(text)
}, ...(0, prompt_1.getUserQuery)(llm.options.model, language, isMultiple), ...addtionContent ];
const fieldInfoContent = fieldInfo.map((info => (0, vutils_1.pick)(info, [ "fieldName", "dataExample", "type", "description" ]))), userContent = `User's fieldInfo is bellow:\n\`\`\` TypeScript\n${JSON.stringify(fieldInfoContent)}\n\`\`\`\n${"english" === language ? "Extracted text is bellow:" : "提取文本如下:"}${text}\n`;
return [ {
role: "system",
content: (0, prompt_1.getFieldInfoPrompt)(language, showThoughts, reGenerateFieldInfo)
}, {
role: "user",
content: userContent
}, ...addtionContent ];
}
revisedFieldInfo(dataTable, fieldInfo) {
const fieldMapping = (0, field_1.getFieldInfoFromDataset)(dataTable).reduce(((prev, curV) => Object.assign(Object.assign({}, prev), {
[curV.fieldName]: curV
})), {});
return fieldInfo.map((info => {
var _a;
const {fieldName: fieldName, type: type, isRatio: isRatio, unit: unit} = info, mapInfo = null == fieldMapping ? void 0 : fieldMapping[fieldName];
let finalType = "dimension" === type ? types_1.DataType.STRING : types_1.DataType.NUMERICAL;
return finalType = isRatio ? types_1.DataType.RATIO : null !== (_a = null == mapInfo ? void 0 : mapInfo.type) && void 0 !== _a ? _a : finalType,
{
fieldName: fieldName,
unit: unit,
ratioGranularity: isRatio ? unit : null,
type: finalType,
role: (0, field_1.getRoleByFieldType)(finalType)
};
}));
}
parseSubText(text, textRange) {
const [start, end] = null != textRange ? textRange : [];
if (!start || !end || !text) return text;
let match;
return null !== (match = new RegExp(start + "(.*?)" + end, "gs").exec(text)) ? `${start}${match[1]}${end}` : text;
}
parseMultipleResult(dataset) {
return dataset.map((result => Object.assign(Object.assign({}, result), {
text: this.parseSubText(this.context.text, result.textRange),
fieldInfo: (0, field_1.formatFieldInfo)(this.revisedFieldInfo(result.dataTable, result.fieldInfo))
}))).filter((result => (0, field_1.hasMeasureField)(result.fieldInfo)));
}
parseLLMContent(resJson) {
var _a, _b, _c, _d;
const {isMultiple: isMultiple} = this.options, {dataTable: dataTable, fieldInfo: fieldInfo, isDataExtraction: isDataExtraction, dataset: dataset, thoughts: thoughts} = resJson;
if (!1 === isDataExtraction || isMultiple && !dataset) return console.error("It's not a data extraction task"),
this.context;
if (isMultiple) return Object.assign(Object.assign({}, this.context), {
thoughts: thoughts,
datasets: this.parseMultipleResult(dataset)
});
const llmFieldInfo = this.revisedFieldInfo(dataTable, fieldInfo);
return Object.assign(Object.assign({}, this.context), {
thoughts: thoughts,
fieldInfo: (0, field_1.formatFieldInfo)(null !== (_d = null !== (_b = (null === (_a = this.options) || void 0 === _a ? void 0 : _a.reGenerateFieldInfo) ? llmFieldInfo : null) && void 0 !== _b ? _b : null === (_c = this.context) || void 0 === _c ? void 0 : _c.fieldInfo) && void 0 !== _d ? _d : []),
dataTable: dataTable
});
}
_runWithOutLLM() {
return (0, utils_1.getCtxBymeasureAutoTransfer)(this.context, this.context.text);
}
}
exports.DataExtractionAtom = DataExtractionAtom;
const registerDataExtractionAtom = () => {
factory_1.Factory.registerAtom(atom_1.AtomName.DATA_EXTRACT, DataExtractionAtom);
};
exports.registerDataExtractionAtom = registerDataExtractionAtom;
//# sourceMappingURL=index.js.map