@paroicms/site-generator-plugin
Version:
ParoiCMS Site Generator Plugin
235 lines (234 loc) • 8.92 kB
JavaScript
import { isDef } from "@paroicms/public-anywhere-lib";
import { loadStep } from "../../db/db-read.queries.js";
import { insertStep, saveCompletedSchemaStep, updateStep, } from "../../db/db-write.queries.js";
import { reorderObjectKeys } from "../helpers/js-utils.js";
import { invokeClaude } from "../lib/calling-llm-anthropic.js";
import { createPromptTemplate, getPredefinedFields, getSiteSchemaTsDefs, } from "../lib/create-prompt.js";
import { debugLlmOutput } from "../lib/debug-utils.js";
import { parseMarkdownBulletedList } from "../lib/markdown-bulleted-list-parser.js";
import { parseLlmResponseAsProperties } from "../lib/parse-llm-response.js";
import { safeCallStep } from "../lib/session-utils.js";
import { createL10n } from "../site-schema-generator/create-l10n.js";
import { createSiteSchemaFromAnalysis } from "../site-schema-generator/create-site-schema.js";
import { invokeUpdateSiteSchema } from "./invoke-update-site-schema.js";
export const analyzePrompt = await createPromptTemplate({
filename: "initial-1-analysis.md",
withSiteSchemaTsDefs: true,
});
const fieldsPrompt = await createPromptTemplate({
filename: "initial-2-fields.md",
withSiteSchemaTsDefs: true,
});
export async function startInitialAnalysis(ctx, input) {
const stepHandle = await insertStep(ctx, {
kind: "initialSchema",
status: "pending",
currentActivity: "initial1",
});
safeCallStep(ctx, stepHandle, () => invokeInitialAnalysis(ctx, stepHandle, input));
return await loadStep(ctx, stepHandle.stepNumber);
}
export async function invokeInitialAnalysis(ctx, stepHandle, input) {
const { analysis, unusedInformation, explanation, llmReport: llmReport1, } = await invokeAnalysisStep1(ctx, input, stepHandle);
const siteSchema = createSiteSchemaFromAnalysis(analysis);
await updateStep(ctx, stepHandle, {
currentActivity: "initial2",
explanation: explanation ?? null,
});
const { unusedInformation: unusedInformation2, llmReport: llmReport2 } = await invokeAnalysisStep2(ctx, { prompt: createUnusedInformationPrompt(unusedInformation, analysis) ?? "" }, siteSchema, stepHandle);
reorderSiteSchemaNodeTypes(siteSchema);
const l10n = createL10n(analysis, siteSchema);
const siteTitle = {
[analysis.siteProperties.language]: analysis.siteProperties.title,
};
const completedValues = {
status: "completed",
siteSchema,
l10n,
localizedValues: { siteTitle },
inputTokenCount: llmReport1.inputTokenCount + llmReport2.inputTokenCount,
outputTokenCount: (llmReport1.outputTokenCount ?? 0) + (llmReport2.outputTokenCount ?? 0),
promptTitle: undefined, // TODO: implement prompt title
};
if (!unusedInformation2) {
await saveCompletedSchemaStep(ctx, stepHandle, completedValues);
return;
}
ctx.logger.debug("Unused information:", unusedInformation2);
await invokeUpdateSiteSchema(ctx, stepHandle, {
prompt: unusedInformation2,
fromStepSchema: completedValues,
}, {
asRemainingOf: completedValues,
});
}
async function invokeAnalysisStep1(ctx, input, stepHandle) {
const llmTaskName = "analysis-1";
const llmInput = {
message: input.prompt,
siteSchemaTsDefs: getSiteSchemaTsDefs(),
};
const debug = await debugLlmOutput(ctx, llmTaskName, ctx.anthropicModelName, stepHandle, {
message: llmInput.message,
});
let llmOutput = debug.stored;
if (!llmOutput) {
// Create formatted messages using the template
const prompt = analyzePrompt(llmInput);
// Call the model
const { messageContent, report } = await invokeClaude(ctx, {
llmTaskName,
prompt,
maxTokens: 4_000,
temperature: 0.1,
systemInstruction: "beSmart",
});
llmOutput = await debug.getMessageContent(messageContent, report);
}
const rawAnalysis = parseLlmResponseAsProperties(llmOutput.output, [
{
tagName: "website_properties_json",
key: "siteProperties",
format: "json",
},
{
tagName: "hierarchical_md",
key: "tree",
format: "markdown",
},
{
tagName: "dictionary_json",
key: "dictionary",
format: "json",
},
{
tagName: "explanation_md",
key: "explanation",
format: "markdown",
},
{
tagName: "unused_information_md",
key: "unusedInformation",
format: "markdown",
optional: true,
},
]);
const tree = parseMarkdownBulletedList(rawAnalysis.tree);
const analysis = {
dictionary: rawAnalysis.dictionary,
siteProperties: rawAnalysis.siteProperties,
tree,
};
return {
analysis,
unusedInformation: rawAnalysis.unusedInformation,
explanation: rawAnalysis.explanation,
llmReport: llmOutput.llmReport,
};
}
async function invokeAnalysisStep2(ctx, input,
/** Will be mutated. */
siteSchema, stepHandle) {
const llmTaskName = "analysis-2";
const llmInput = {
siteSchemaTsDefs: getSiteSchemaTsDefs(),
predefinedFields: JSON.stringify(getPredefinedFields(), undefined, 2),
siteSchemaJson: JSON.stringify(siteSchema, undefined, 2),
message: input.prompt,
};
const debug = await debugLlmOutput(ctx, llmTaskName, ctx.anthropicModelName, stepHandle, {
message: llmInput.message,
siteSchemaJson: llmInput.siteSchemaJson,
});
let llmOutput = debug.stored;
if (!llmOutput) {
const prompt = fieldsPrompt(llmInput);
const { messageContent, report } = await invokeClaude(ctx, {
llmTaskName,
prompt,
maxTokens: 700,
temperature: 0.1,
systemInstruction: "beFast",
});
llmOutput = await debug.getMessageContent(messageContent, report);
}
const { assignedFields, unusedInformation } = parseLlmResponseAsProperties(llmOutput.output, [
{
tagName: "json_result",
key: "assignedFields",
format: "json",
},
{
tagName: "unused_information_md",
key: "unusedInformation",
format: "markdown",
optional: true,
},
]);
if (siteSchema.nodeTypes) {
assignFieldsToNodeTypes(ctx, assignedFields, siteSchema.nodeTypes);
}
return { unusedInformation, llmReport: llmOutput.llmReport };
}
function assignFieldsToNodeTypes(ctx, assignedFields, nodeTypes) {
const remainingTypeNames = new Set(Object.keys(assignedFields));
for (const nodeType of nodeTypes) {
const typeName = nodeType.kind === "site" ? "_site" : nodeType.typeName;
const fieldNames = assignedFields[typeName];
if (!fieldNames) {
ctx.logger.warn(`Missing assigned fields for node type "${typeName}"`);
continue;
}
// inject authors field in temporal documents
if (nodeType.kind === "document" &&
nodeType.documentKind === "regular" &&
nodeType.route === ":yyyy/:mm/:dd/:relativeId-:slug") {
nodeType.fields = [
{
localized: false,
storedAs: "labeling",
name: "authors",
taxonomy: "authors",
multiple: true,
},
];
}
if (fieldNames.length > 0) {
nodeType.fields = [...(nodeType.fields ?? []), ...fieldNames];
}
remainingTypeNames.delete(typeName);
}
if (remainingTypeNames.size > 0) {
ctx.logger.warn(`Field names were produced for unknown node types: ${[...remainingTypeNames].join(", ")}`);
}
}
function reorderSiteSchemaNodeTypes(siteSchema) {
if (!siteSchema.nodeTypes)
return;
siteSchema.nodeTypes = siteSchema.nodeTypes.map((nodeType) => reorderObjectKeys(nodeType, [
"typeName",
"kind",
"documentKind",
"route",
"redirectTo",
"jsonLdType",
"withFeaturedImage",
"fields",
"lists",
"routingChildren",
"children",
"orderChildrenBy",
]));
}
function createUnusedInformationPrompt(unusedInformation, analysis) {
const prompts = Object.entries(analysis.dictionary)
.map(([typeName, entry]) => {
return entry.prompt ? `${typeName}: ${entry.prompt}` : undefined;
})
.filter(isDef);
if (prompts.length > 0) {
const nodeTypePrompts = `To do:\n\n- ${prompts.join("- \n")}`;
return unusedInformation ? `${nodeTypePrompts}\n\n${unusedInformation}` : nodeTypePrompts;
}
return unusedInformation;
}