@paroicms/site-generator-plugin
Version:
ParoiCMS Site Generator Plugin
155 lines (154 loc) • 6.36 kB
JavaScript
import { parse } from "yaml";
export function parseLlmResponseAsProperties(llmResponse, outputTags) {
const rawTags = parseLlmRawTags(llmResponse, outputTags.map((tag) => tag.tagName));
const map = new Map(rawTags.map((tag) => [tag.tagName, tag]));
if (rawTags.length !== outputTags.length) {
const missingTags = outputTags.filter((tag) => !map.has(tag.tagName));
throw new Error(`Missing tags: ${missingTags.map((tag) => tag.tagName).join(", ")}`);
}
const resultObj = {};
for (const outputTag of outputTags) {
const raw = map.get(outputTag.tagName);
if (!raw) {
if (!outputTag.optional)
throw new Error(`Missing tag: ${outputTag.tagName}`);
continue;
}
resultObj[outputTag.key] = formatRawContent(raw.content, outputTag);
}
return resultObj;
}
export function parseLlmResponseAsList(llmResponse, outputTags, options = {}) {
const { tolerateErrors } = options;
const rawTags = parseLlmRawTags(llmResponse, outputTags.map((tag) => tag.tagName), options);
if (rawTags.length === 0)
return [];
const outputTagMap = new Map(outputTags.map((tag) => [tag.tagName, tag]));
const result = [];
let current = {};
for (const rawTag of rawTags) {
const outputTag = outputTagMap.get(rawTag.tagName);
if (!outputTag)
throw new Error(`Unexpected output tag "${rawTag.tagName}"`); // it's a real bug
if (rawTag.tagName in current) {
const rawTag = ensureProperties(current, outputTags, options);
if (rawTag) {
result.push(rawTag);
}
current = {};
}
if (rawTag.content.trim() === "") {
if (!outputTag.optional) {
const message = `Empty tag <${outputTag.tagName}>`;
if (!tolerateErrors)
throw new Error(message);
tolerateErrors.errorMessages.push(message);
current = {};
}
continue;
}
current[outputTag.key] = formatRawContent(rawTag.content, outputTag);
}
if (Object.keys(current).length > 0) {
const rawTag = ensureProperties(current, outputTags, options);
if (rawTag) {
result.push(rawTag);
}
}
return result;
}
function ensureProperties(obj, outputTags, options) {
const { tolerateErrors } = options;
for (const tag of outputTags) {
if (!(tag.key in obj) && !tag.optional) {
const message = `Missing tag <${tag.tagName}>`;
if (!tolerateErrors)
throw new Error(message);
tolerateErrors.errorMessages.push(message);
return;
}
}
return obj;
}
function formatRawContent(rawContent, tag) {
const { format, optional } = tag;
if (!rawContent && optional)
return;
switch (format) {
case "yaml":
return parse(rawContent);
case "json":
return JSON.parse(rawContent);
case "markdown":
case "text":
return rawContent;
default:
throw new Error(`Unknown format "${format}"`);
}
}
export function parseLlmRawTags(llmResponse, tagNames, options = {}) {
const { tolerateErrors } = options;
const tagList = [];
// Create regex to match all opening and closing tags
const tagNamesPattern = tagNames.join("|");
const pattern = new RegExp(`<(${tagNamesPattern})>|</(${tagNamesPattern})>`, "g");
const matches = [];
let match;
// Find all tags (opening and closing) and store them with their positions
// biome-ignore lint/suspicious/noAssignInExpressions: regex exec requires assignment in loop
while ((match = pattern.exec(llmResponse)) !== null) {
const isOpening = match[1] !== undefined;
const tagName = isOpening ? match[1] : match[2];
matches.push({ isOpening, tagName, position: match.index });
}
// Process the tags to extract content
for (let i = 0; i < matches.length; ++i) {
const current = matches[i];
if (current.isOpening) {
// Find the next corresponding closing tag
const j = i + 1;
let foundClosing = false;
if (j < matches.length) {
const next = matches[j];
// If we encounter another opening tag of any type before finding our closing tag,
// it's an error if not tolerating errors
if (next.isOpening) {
const message = `Missing closing tag for <${current.tagName}>`;
if (!tolerateErrors)
throw new Error(message);
tolerateErrors.errorMessages.push(message);
foundClosing = undefined;
// If we are tolerating errors, we skip this opening tag entirely
}
else {
if (next.tagName !== current.tagName) {
// Found a non-matching closing tag
const message = `Mismatched tags: opening <${current.tagName}>, closing </${next.tagName}>`;
if (!tolerateErrors)
throw new Error(message);
tolerateErrors.errorMessages.push(message);
}
// Found a matching closing tag
const contentStart = current.position + `<${current.tagName}>`.length;
const contentEnd = next.position;
const content = llmResponse.substring(contentStart, contentEnd).trim();
tagList.push({
tagName: current.tagName,
content,
});
// Skip to after this closing tag
i = j;
foundClosing = true;
}
}
// Handle case where no matching closing tag was found
if (foundClosing === false) {
const message = `Unclosed tag <${current.tagName}>`;
if (!tolerateErrors)
throw new Error(message);
tolerateErrors.errorMessages.push(message);
}
}
}
return tagList;
}