UNPKG

tmaiplugin

Version:

TrainingMaster AIGC Component

86 lines (85 loc) 3.56 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.fixedJsonString = exports.splitLongText = void 0; const SECTION_LENGTH = 1024; /** * 将一段很长的文本,按1024长度来划分到多个段落中 * @param {*} content */ function splitLongText(content, len = SECTION_LENGTH) { var _a; let start = 0, message = [], length = content.length; while (start < length) { let realLength = len; ////以句号或引号进行分段,不要随意截取 for (let i = start + len; i >= start; i--) { if (/[。”"??]/.test(content[i] + '')) { realLength = i - start + 1; break; } } const subtext = content.substr(start, realLength).replace(/\s+/g, "").replace(/\t|\n|\v|\r|\f/g, ' '); if (subtext) message.push(subtext); //message.push({ role: 'user', content: subtext }) start += realLength || len; } /** * 防止最后一个段落过短,没有意义 */ let totalLen = message.length; if (totalLen >= 2 && (((_a = message[totalLen - 1]) === null || _a === void 0 ? void 0 : _a.length) || 0) < 100) { message[totalLen - 2] += message[totalLen - 1]; message.splice(totalLen - 1, 1); } return message; } exports.splitLongText = splitLongText; /** * 修复JSON的字符串 * 验证JSON字符串是否是真正可转换为JSON的合法格式 * 这里只能做一个最简单的处理,就是用两端的符号 * @param jsonstr */ function fixedJsonString(jsonstr) { console.log('input json string:', jsonstr); ///检查返回的是不是一个数组对象(我们需要的是数组对象) let firstBracketSymbol = jsonstr.indexOf("["); ////必须过滤出来数组 let lastBracketSymbol = jsonstr.lastIndexOf("]"); ///第一个花括号出现的位置,如果花括号出现的位置早于 [ ,则默认返回的对象不是一个数组,仅仅是一个对象, ///则需要我们用中括号包住 let firstBraceSymbol = jsonstr.indexOf("{"); let lastBraceSymbol = jsonstr.lastIndexOf("}"); ///返回的不是一个数组结构的,只是一个{},我们帮他完成数组拼接 if (firstBraceSymbol >= 0 && firstBraceSymbol < (firstBracketSymbol >= 0 ? firstBracketSymbol : 1000) && lastBraceSymbol > firstBraceSymbol && lastBraceSymbol >= 0 && lastBraceSymbol > lastBracketSymbol) { jsonstr = '[' + jsonstr.substr(firstBraceSymbol, lastBraceSymbol - firstBraceSymbol + 1) + ']'; firstBracketSymbol = 0; lastBracketSymbol = jsonstr.length - 1; } else if (firstBracketSymbol < 0 || lastBracketSymbol < 0 || lastBracketSymbol <= firstBracketSymbol) { return []; } jsonstr = jsonstr.substr(firstBracketSymbol, lastBracketSymbol - firstBracketSymbol + 1); ///尽量处理一些能够一眼识别出来的JSON错误 jsonstr = jsonstr.replace(/}{/g, '},{'); let mutilitems = jsonstr.split(']['); ///确实存在多个数组拼接在一起,中间没有逗号隔开的了 let retObject = []; for (let str of mutilitems) { if (!str.startsWith('[')) str = '[' + str; if (!str.endsWith(']')) str = str + ']'; try { let jsonObj = eval(str); retObject = retObject.concat(jsonObj); } catch (err) { console.log('json error', str); } } return retObject; } exports.fixedJsonString = fixedJsonString;