UNPKG

@visactor/vmind

Version:

<div align="center"> <a href="https://github.com/VisActor#gh-light-mode-only" target="_blank"> <img alt="VisActor Logo" width="200" src="https://github.com/VisActor/.github/blob/main/profile/logo_500_200_light.svg"/> </a> <a href="https://githu

128 lines (118 loc) 6.17 kB
import { AtomName } from "../../types/atom"; import { BaseAtom } from "../base"; import { merge, pick } from "@visactor/vutils"; import { getBasePrompt, getFieldInfoPrompt, getUserQuery } from "./prompt/prompt"; import { getLanguageOfText } from "../../utils/text"; import { formatFieldInfo, getFieldInfoFromDataset, getRoleByFieldType, hasMeasureField } from "../../utils/field"; import { getCtxBymeasureAutoTransfer } from "../dataClean/utils"; import { DataType } from "../../types"; import { Factory } from "../../core/factory"; export class DataExtractionAtom extends BaseAtom { constructor(context, option) { super(context, option), this.name = AtomName.DATA_EXTRACT, this.isLLMAtom = !0; const currentYear = (new Date).getFullYear(); this.replaceData = [ { template: "今年", replace: `${currentYear}年` }, { template: "去年", replace: currentYear - 1 + "年" }, { template: "前年", replace: currentYear - 2 + "年" } ]; } buildDefaultContext(context) { return merge({}, { dataTable: [], fieldInfo: [] }, context); } buildDefaultOptions() { return Object.assign(Object.assign({}, super.buildDefaultOptions()), { reGenerateFieldInfo: !0, isMultiple: !1 }); } shouldRunByContextUpdate(context) { return context.text !== this.context.text || context.fieldInfo !== this.context.fieldInfo; } revisedText(text) { let newText = text; return this.isTextReplaceStatus = [], this.replaceData.forEach((v => { newText = newText.replaceAll(v.template, v.replace), this.isTextReplaceStatus.push(text.includes(v.template)); })), `text: ${newText}`; } getLLMMessages(query) { var _a, _b; const {fieldInfo: fieldInfo, text: text} = this.context, {showThoughts: showThoughts, reGenerateFieldInfo: reGenerateFieldInfo, llm: llm, isMultiple: isMultiple} = this.options, addtionContent = this.getHistoryLLMMessages(query), language = null !== (_b = null === (_a = this.options) || void 0 === _a ? void 0 : _a.language) && void 0 !== _b ? _b : getLanguageOfText(text); if (!fieldInfo || !(null == fieldInfo ? void 0 : fieldInfo.length)) return [ { role: "system", content: getBasePrompt(llm.options.model, language, isMultiple, showThoughts) }, { role: "user", content: this.revisedText(text) }, ...getUserQuery(llm.options.model, language, isMultiple), ...addtionContent ]; const fieldInfoContent = fieldInfo.map((info => pick(info, [ "fieldName", "dataExample", "type", "description" ]))), userContent = `User's fieldInfo is bellow:\n\`\`\` TypeScript\n${JSON.stringify(fieldInfoContent)}\n\`\`\`\n${"english" === language ? "Extracted text is bellow:" : "提取文本如下:"}${text}\n`; return [ { role: "system", content: getFieldInfoPrompt(language, showThoughts, reGenerateFieldInfo) }, { role: "user", content: userContent }, ...addtionContent ]; } revisedFieldInfo(dataTable, fieldInfo) { const fieldMapping = getFieldInfoFromDataset(dataTable).reduce(((prev, curV) => Object.assign(Object.assign({}, prev), { [curV.fieldName]: curV })), {}); return fieldInfo.map((info => { var _a; const {fieldName: fieldName, type: type, isRatio: isRatio, unit: unit} = info, mapInfo = null == fieldMapping ? void 0 : fieldMapping[fieldName]; let finalType = "dimension" === type ? DataType.STRING : DataType.NUMERICAL; return finalType = isRatio ? DataType.RATIO : null !== (_a = null == mapInfo ? void 0 : mapInfo.type) && void 0 !== _a ? _a : finalType, { fieldName: fieldName, unit: unit, ratioGranularity: isRatio ? unit : null, type: finalType, role: getRoleByFieldType(finalType) }; })); } parseSubText(text, textRange) { const [start, end] = null != textRange ? textRange : []; if (!start || !end || !text) return text; let match; return null !== (match = new RegExp(start + "(.*?)" + end, "gs").exec(text)) ? `${start}${match[1]}${end}` : text; } parseMultipleResult(dataset) { return dataset.map((result => Object.assign(Object.assign({}, result), { text: this.parseSubText(this.context.text, result.textRange), fieldInfo: formatFieldInfo(this.revisedFieldInfo(result.dataTable, result.fieldInfo)) }))).filter((result => hasMeasureField(result.fieldInfo))); } parseLLMContent(resJson) { var _a, _b, _c, _d; const {isMultiple: isMultiple} = this.options, {dataTable: dataTable, fieldInfo: fieldInfo, isDataExtraction: isDataExtraction, dataset: dataset, thoughts: thoughts} = resJson; if (!1 === isDataExtraction || isMultiple && !dataset) return console.error("It's not a data extraction task"), this.context; if (isMultiple) return Object.assign(Object.assign({}, this.context), { thoughts: thoughts, datasets: this.parseMultipleResult(dataset) }); const llmFieldInfo = this.revisedFieldInfo(dataTable, fieldInfo); return Object.assign(Object.assign({}, this.context), { thoughts: thoughts, fieldInfo: formatFieldInfo(null !== (_d = null !== (_b = (null === (_a = this.options) || void 0 === _a ? void 0 : _a.reGenerateFieldInfo) ? llmFieldInfo : null) && void 0 !== _b ? _b : null === (_c = this.context) || void 0 === _c ? void 0 : _c.fieldInfo) && void 0 !== _d ? _d : []), dataTable: dataTable }); } _runWithOutLLM() { return getCtxBymeasureAutoTransfer(this.context, this.context.text); } } export const registerDataExtractionAtom = () => { Factory.registerAtom(AtomName.DATA_EXTRACT, DataExtractionAtom); }; //# sourceMappingURL=index.js.map