UNPKG

@visactor/vmind

Version:

<div align="center"> <a href="https://github.com/VisActor#gh-light-mode-only" target="_blank"> <img alt="VisActor Logo" width="200" src="https://github.com/VisActor/.github/blob/main/profile/logo_500_200_light.svg"/> </a> <a href="https://githu

123 lines (117 loc) 6.46 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: !0 }), exports.registerDataExtractionAtom = exports.DataExtractionAtom = void 0; const atom_1 = require("../../types/atom"), base_1 = require("../base"), vutils_1 = require("@visactor/vutils"), prompt_1 = require("./prompt/prompt"), text_1 = require("../../utils/text"), field_1 = require("../../utils/field"), utils_1 = require("../dataClean/utils"), types_1 = require("../../types"), factory_1 = require("../../core/factory"); class DataExtractionAtom extends base_1.BaseAtom { constructor(context, option) { super(context, option), this.name = atom_1.AtomName.DATA_EXTRACT, this.isLLMAtom = !0; const currentYear = (new Date).getFullYear(); this.replaceData = [ { template: "今年", replace: `${currentYear}年` }, { template: "去年", replace: currentYear - 1 + "年" }, { template: "前年", replace: currentYear - 2 + "年" } ]; } buildDefaultContext(context) { return (0, vutils_1.merge)({}, { dataTable: [], fieldInfo: [] }, context); } buildDefaultOptions() { return Object.assign(Object.assign({}, super.buildDefaultOptions()), { reGenerateFieldInfo: !0, isMultiple: !1 }); } shouldRunByContextUpdate(context) { return context.text !== this.context.text || context.fieldInfo !== this.context.fieldInfo; } revisedText(text) { let newText = text; return this.isTextReplaceStatus = [], this.replaceData.forEach((v => { newText = newText.replaceAll(v.template, v.replace), this.isTextReplaceStatus.push(text.includes(v.template)); })), `text: ${newText}`; } getLLMMessages(query) { var _a, _b; const {fieldInfo: fieldInfo, text: text} = this.context, {showThoughts: showThoughts, reGenerateFieldInfo: reGenerateFieldInfo, llm: llm, isMultiple: isMultiple} = this.options, addtionContent = this.getHistoryLLMMessages(query), language = null !== (_b = null === (_a = this.options) || void 0 === _a ? void 0 : _a.language) && void 0 !== _b ? _b : (0, text_1.getLanguageOfText)(text); if (!fieldInfo || !(null == fieldInfo ? void 0 : fieldInfo.length)) return [ { role: "system", content: (0, prompt_1.getBasePrompt)(llm.options.model, language, isMultiple, showThoughts) }, { role: "user", content: this.revisedText(text) }, ...(0, prompt_1.getUserQuery)(llm.options.model, language, isMultiple), ...addtionContent ]; const fieldInfoContent = fieldInfo.map((info => (0, vutils_1.pick)(info, [ "fieldName", "dataExample", "type", "description" ]))), userContent = `User's fieldInfo is bellow:\n\`\`\` TypeScript\n${JSON.stringify(fieldInfoContent)}\n\`\`\`\n${"english" === language ? "Extracted text is bellow:" : "提取文本如下:"}${text}\n`; return [ { role: "system", content: (0, prompt_1.getFieldInfoPrompt)(language, showThoughts, reGenerateFieldInfo) }, { role: "user", content: userContent }, ...addtionContent ]; } revisedFieldInfo(dataTable, fieldInfo) { const fieldMapping = (0, field_1.getFieldInfoFromDataset)(dataTable).reduce(((prev, curV) => Object.assign(Object.assign({}, prev), { [curV.fieldName]: curV })), {}); return fieldInfo.map((info => { var _a; const {fieldName: fieldName, type: type, isRatio: isRatio, unit: unit} = info, mapInfo = null == fieldMapping ? void 0 : fieldMapping[fieldName]; let finalType = "dimension" === type ? types_1.DataType.STRING : types_1.DataType.NUMERICAL; return finalType = isRatio ? types_1.DataType.RATIO : null !== (_a = null == mapInfo ? void 0 : mapInfo.type) && void 0 !== _a ? _a : finalType, { fieldName: fieldName, unit: unit, ratioGranularity: isRatio ? unit : null, type: finalType, role: (0, field_1.getRoleByFieldType)(finalType) }; })); } parseSubText(text, textRange) { const [start, end] = null != textRange ? textRange : []; if (!start || !end || !text) return text; let match; return null !== (match = new RegExp(start + "(.*?)" + end, "gs").exec(text)) ? `${start}${match[1]}${end}` : text; } parseMultipleResult(dataset) { return dataset.map((result => Object.assign(Object.assign({}, result), { text: this.parseSubText(this.context.text, result.textRange), fieldInfo: (0, field_1.formatFieldInfo)(this.revisedFieldInfo(result.dataTable, result.fieldInfo)) }))).filter((result => (0, field_1.hasMeasureField)(result.fieldInfo))); } parseLLMContent(resJson) { var _a, _b, _c, _d; const {isMultiple: isMultiple} = this.options, {dataTable: dataTable, fieldInfo: fieldInfo, isDataExtraction: isDataExtraction, dataset: dataset, thoughts: thoughts} = resJson; if (!1 === isDataExtraction || isMultiple && !dataset) return console.error("It's not a data extraction task"), this.context; if (isMultiple) return Object.assign(Object.assign({}, this.context), { thoughts: thoughts, datasets: this.parseMultipleResult(dataset) }); const llmFieldInfo = this.revisedFieldInfo(dataTable, fieldInfo); return Object.assign(Object.assign({}, this.context), { thoughts: thoughts, fieldInfo: (0, field_1.formatFieldInfo)(null !== (_d = null !== (_b = (null === (_a = this.options) || void 0 === _a ? void 0 : _a.reGenerateFieldInfo) ? llmFieldInfo : null) && void 0 !== _b ? _b : null === (_c = this.context) || void 0 === _c ? void 0 : _c.fieldInfo) && void 0 !== _d ? _d : []), dataTable: dataTable }); } _runWithOutLLM() { return (0, utils_1.getCtxBymeasureAutoTransfer)(this.context, this.context.text); } } exports.DataExtractionAtom = DataExtractionAtom; const registerDataExtractionAtom = () => { factory_1.Factory.registerAtom(atom_1.AtomName.DATA_EXTRACT, DataExtractionAtom); }; exports.registerDataExtractionAtom = registerDataExtractionAtom; //# sourceMappingURL=index.js.map