tmaiplugin
Version:
TrainingMaster AIGC Component
78 lines (75 loc) • 3.37 kB
text/typescript
const SECTION_LENGTH:number = 1024;
/**
* 将一段很长的文本,按1024长度来划分到多个段落中
* @param {*} content
*/
export function splitLongText(content: string, len = SECTION_LENGTH): string[]{ //Array<ChatCompletionRequestMessage> {
let start = 0, message: string[] = [], length = content.length;
while (start < length) {
let realLength: number = len;
////以句号或引号进行分段,不要随意截取
for (let i = start + len; i >= start; i--) {
if (/[。”"??]/.test(content[i] + '')) {
realLength = i - start + 1;
break;
}
}
const subtext = content.substr(start, realLength).replace(/\s+/g, "").replace(/\t|\n|\v|\r|\f/g, ' ')
if (subtext) message.push(subtext); //message.push({ role: 'user', content: subtext })
start += realLength || len;
}
/**
* 防止最后一个段落过短,没有意义
*/
let totalLen = message.length;
if (totalLen >= 2 && (message[totalLen - 1]?.length || 0) < 100) {
message[totalLen - 2] += message[totalLen - 1];
message.splice(totalLen - 1, 1);
}
return message;
}
/**
* 修复JSON的字符串
* 验证JSON字符串是否是真正可转换为JSON的合法格式
* 这里只能做一个最简单的处理,就是用两端的符号
* @param jsonstr
*/
export function fixedJsonString(jsonstr: string): any[] {
console.log('input json string:', jsonstr)
///检查返回的是不是一个数组对象(我们需要的是数组对象)
let firstBracketSymbol = jsonstr.indexOf("["); ////必须过滤出来数组
let lastBracketSymbol = jsonstr.lastIndexOf("]");
///第一个花括号出现的位置,如果花括号出现的位置早于 [ ,则默认返回的对象不是一个数组,仅仅是一个对象,
///则需要我们用中括号包住
let firstBraceSymbol = jsonstr.indexOf("{");
let lastBraceSymbol = jsonstr.lastIndexOf("}");
///返回的不是一个数组结构的,只是一个{},我们帮他完成数组拼接
if (firstBraceSymbol >= 0 &&
firstBraceSymbol < (firstBracketSymbol >= 0 ? firstBracketSymbol : 1000) &&
lastBraceSymbol > firstBraceSymbol &&
lastBraceSymbol >= 0 && lastBraceSymbol > lastBracketSymbol) {
jsonstr = '[' + jsonstr.substr(firstBraceSymbol, lastBraceSymbol - firstBraceSymbol + 1) +']';
firstBracketSymbol = 0;
lastBracketSymbol = jsonstr.length - 1;
}
else if (firstBracketSymbol < 0 || lastBracketSymbol < 0 || lastBracketSymbol <= firstBracketSymbol) {
return [];
}
jsonstr = jsonstr.substr(firstBracketSymbol, lastBracketSymbol - firstBracketSymbol + 1);
///尽量处理一些能够一眼识别出来的JSON错误
jsonstr = jsonstr.replace(/}{/g, '},{');
let mutilitems = jsonstr.split('][');
///确实存在多个数组拼接在一起,中间没有逗号隔开的了
let retObject: any[] = [];
for (let str of mutilitems) {
if (!str.startsWith('[')) str = '[' + str;
if (!str.endsWith(']')) str = str + ']';
try {
let jsonObj = eval(str);
retObject = retObject.concat(jsonObj);
} catch (err) {
console.log('json error', str)
}
}
return retObject;
}