tmaiplugin
Version:
TrainingMaster AIGC Component
114 lines (112 loc) • 5.87 kB
text/typescript
import { FaqItem } from "./declare";
import { fixedJsonString,splitLongText } from "./util/stringutil";
import PluginBase from './aipluginbase'
const FAQ_ROLE_DEFINE = [
'你是一位精通各行业的培训专家,擅长根据文档内容提取重点要点,形成培训问题及答案',
'你是一位程序设计专家,特别擅长数据分析,内容组织并进行结构化设计,形成Json格式的数据'
]
const PROMPT_LINK:any = [
`根据以下内容提炼{{COUNT}}道问题、答案和答案的关键词。要求如下:
1、问题偏口语化,问题需要与所给的资料相关,绝不能问超出所给资料的范围;所提问题需要准确、完整、清晰,绝不能有歧义;意思相近的问题不要重复给出;
2、对应的答案必须要在资料中能找到,绝对不能给出在资料里没有的答案,答案的关键词必须完整在所给的答案中出现,绝不能给出不在答案里的关键词;
3、生成问题的时候,请出具有代表性的问题,对于一些无关紧要的问题可以忽略。特别注意资料中关于数字、参数、特点等关键信息的提取,在给出的问题中尽可能覆盖;
4、输出结果包括:问题、答案、答案关键词;
5、如果无法从内容中提炼任何问题,仅需返回"NO"。
内容如下:"""
{{CONTENT}}
"""`,
`请将内容按照[{"question":"提问内容","answer":"答案内容","keywords":["关键词1","关键词2"]}]的标准Json数组结构输出。
内容如下:"""
{{CONTENT}}
"""`
]
/**
* Faq问题的提取器插件
*/
export class FaqPlugin extends PluginBase {
/**
* 从指定的文本内容中生成相关的问答
* @param {*} content
* @param {*} count
* @param {*} axiosOption
* @returns
*///并在答案末尾处必须给出答案内容中的关键词
async execute(params: any):Promise<any> {
let { content, count, sectionlength, axios} = params;
sectionlength = sectionlength || 1024;
if (!this.gptInstance) return null;
let arrContent = splitLongText(content, sectionlength);
///每一句话需要产生的题目
let questions4EverySentense: number = count / arrContent.length; //Math.ceil(arrContent.length / 20);
let faqs: FaqItem[] = [], gotted: number = 0;
while (arrContent.length > 0 && gotted < count) {
questions4EverySentense = (count - gotted) / arrContent.length
////每次最多送MESSAGE_LENGTH句话给openai
let itemCount = Math.min(Math.ceil(questions4EverySentense), count - gotted);
let subarray = [
{ role: 'system', content: FAQ_ROLE_DEFINE[0] },
{ role: 'user', content: PROMPT_LINK[0].replace('{{COUNT}}', itemCount).replace('{{CONTENT}}', arrContent.slice(0, 1)[0])},
]
console.log('Faq Question Pick Prompt:', subarray)
let result: any = await this.gptInstance.chatRequest(subarray, { replyCounts: 1 }, axios);
///如果请求发生了网络错误(不是内容合规问题),则再重试一次,如果任然有错则放弃
if (!result.successed && result.error != 'content_filter') {
console.log('network error,retry onemore time')
result = await this.gptInstance.chatRequest(subarray, { replyCounts: 1 }, axios);
}
if (result.successed && result.message) {
let msgs = await this.pickUpFaqContent(result.message);
if (msgs.length) {
///对外发送检出问答题的信号
this.emit('parseout', { type: 'qa', items: msgs })
gotted += msgs.length; //result.message.length;
faqs = faqs.concat(msgs);
}
}
////删除已经处理的文本
arrContent.splice(0, 1);
}
arrContent = []; /// 释放内存
///发出信号,解析完毕
this.emit('parseover', { type: 'qa', items: faqs })
return { successed: true, message: faqs.slice(0, count) };
}
/**
* 解析Faq返回的问题
* @param {*} messages
* @returns
*/
protected async pickUpFaqContent(messages: Array<any>): Promise<Array<FaqItem>> {
if (!this.gptInstance || !messages[0]?.message?.content) return [];
let answerString = messages[0].message.content.trim().replace(/\t|\n|\v|\r|\f/g, '');
if (answerString.includes('NO')) return [];
let orgJsonPrompt = [
{ role: 'system', content: FAQ_ROLE_DEFINE[1] },
{ role: 'user', content: PROMPT_LINK[1].replace('{{CONTENT}}', answerString) }
]
console.log('orgJsonPrompt', orgJsonPrompt)
let fixedJsonResult: any = await this.gptInstance.chatRequest(orgJsonPrompt, { replyCounts: 1 }, {})
if (fixedJsonResult.successed) {
answerString = fixedJsonResult.message[0].message.content.trim().replace(/\t|\n|\v|\r|\f/g, '');
}
let jsonObj = fixedJsonResult.successed?fixedJsonString(answerString):[];
if (!jsonObj.length) return []
try {
jsonObj.map((item: FaqItem) => {
let realKeyword: string[] = [];
let keywords: string[] = (item.keywords + '').split(',');
let answer = item.answer || '';
item.question = item.question.replace(/^问题\s*\d*\s*(:|:)*/, '');
for (const k of keywords) {
if (k && answer.indexOf(k) >= 0) realKeyword.push(k)
}
item.keywords = realKeyword;
return item;
})
return jsonObj;
} catch (err) {
console.log('JSON error', err)
return [];
}
}
}