UNPKG

@paroicms/site-generator-plugin

Version:

ParoiCMS Site Generator Plugin

235 lines (234 loc) 8.92 kB
import { isDef } from "@paroicms/public-anywhere-lib"; import { loadStep } from "../../db/db-read.queries.js"; import { insertStep, saveCompletedSchemaStep, updateStep, } from "../../db/db-write.queries.js"; import { reorderObjectKeys } from "../helpers/js-utils.js"; import { invokeClaude } from "../lib/calling-llm-anthropic.js"; import { createPromptTemplate, getPredefinedFields, getSiteSchemaTsDefs, } from "../lib/create-prompt.js"; import { debugLlmOutput } from "../lib/debug-utils.js"; import { parseMarkdownBulletedList } from "../lib/markdown-bulleted-list-parser.js"; import { parseLlmResponseAsProperties } from "../lib/parse-llm-response.js"; import { safeCallStep } from "../lib/session-utils.js"; import { createL10n } from "../site-schema-generator/create-l10n.js"; import { createSiteSchemaFromAnalysis } from "../site-schema-generator/create-site-schema.js"; import { invokeUpdateSiteSchema } from "./invoke-update-site-schema.js"; export const analyzePrompt = await createPromptTemplate({ filename: "initial-1-analysis.md", withSiteSchemaTsDefs: true, }); const fieldsPrompt = await createPromptTemplate({ filename: "initial-2-fields.md", withSiteSchemaTsDefs: true, }); export async function startInitialAnalysis(ctx, input) { const stepHandle = await insertStep(ctx, { kind: "initialSchema", status: "pending", currentActivity: "initial1", }); safeCallStep(ctx, stepHandle, () => invokeInitialAnalysis(ctx, stepHandle, input)); return await loadStep(ctx, stepHandle.stepNumber); } export async function invokeInitialAnalysis(ctx, stepHandle, input) { const { analysis, unusedInformation, explanation, llmReport: llmReport1, } = await invokeAnalysisStep1(ctx, input, stepHandle); const siteSchema = createSiteSchemaFromAnalysis(analysis); await updateStep(ctx, stepHandle, { currentActivity: "initial2", explanation: explanation ?? null, }); const { unusedInformation: unusedInformation2, llmReport: llmReport2 } = await invokeAnalysisStep2(ctx, { prompt: createUnusedInformationPrompt(unusedInformation, analysis) ?? "" }, siteSchema, stepHandle); reorderSiteSchemaNodeTypes(siteSchema); const l10n = createL10n(analysis, siteSchema); const siteTitle = { [analysis.siteProperties.language]: analysis.siteProperties.title, }; const completedValues = { status: "completed", siteSchema, l10n, localizedValues: { siteTitle }, inputTokenCount: llmReport1.inputTokenCount + llmReport2.inputTokenCount, outputTokenCount: (llmReport1.outputTokenCount ?? 0) + (llmReport2.outputTokenCount ?? 0), promptTitle: undefined, // TODO: implement prompt title }; if (!unusedInformation2) { await saveCompletedSchemaStep(ctx, stepHandle, completedValues); return; } ctx.logger.debug("Unused information:", unusedInformation2); await invokeUpdateSiteSchema(ctx, stepHandle, { prompt: unusedInformation2, fromStepSchema: completedValues, }, { asRemainingOf: completedValues, }); } async function invokeAnalysisStep1(ctx, input, stepHandle) { const llmTaskName = "analysis-1"; const llmInput = { message: input.prompt, siteSchemaTsDefs: getSiteSchemaTsDefs(), }; const debug = await debugLlmOutput(ctx, llmTaskName, ctx.anthropicModelName, stepHandle, { message: llmInput.message, }); let llmOutput = debug.stored; if (!llmOutput) { // Create formatted messages using the template const prompt = analyzePrompt(llmInput); // Call the model const { messageContent, report } = await invokeClaude(ctx, { llmTaskName, prompt, maxTokens: 4_000, temperature: 0.1, systemInstruction: "beSmart", }); llmOutput = await debug.getMessageContent(messageContent, report); } const rawAnalysis = parseLlmResponseAsProperties(llmOutput.output, [ { tagName: "website_properties_json", key: "siteProperties", format: "json", }, { tagName: "hierarchical_md", key: "tree", format: "markdown", }, { tagName: "dictionary_json", key: "dictionary", format: "json", }, { tagName: "explanation_md", key: "explanation", format: "markdown", }, { tagName: "unused_information_md", key: "unusedInformation", format: "markdown", optional: true, }, ]); const tree = parseMarkdownBulletedList(rawAnalysis.tree); const analysis = { dictionary: rawAnalysis.dictionary, siteProperties: rawAnalysis.siteProperties, tree, }; return { analysis, unusedInformation: rawAnalysis.unusedInformation, explanation: rawAnalysis.explanation, llmReport: llmOutput.llmReport, }; } async function invokeAnalysisStep2(ctx, input, /** Will be mutated. */ siteSchema, stepHandle) { const llmTaskName = "analysis-2"; const llmInput = { siteSchemaTsDefs: getSiteSchemaTsDefs(), predefinedFields: JSON.stringify(getPredefinedFields(), undefined, 2), siteSchemaJson: JSON.stringify(siteSchema, undefined, 2), message: input.prompt, }; const debug = await debugLlmOutput(ctx, llmTaskName, ctx.anthropicModelName, stepHandle, { message: llmInput.message, siteSchemaJson: llmInput.siteSchemaJson, }); let llmOutput = debug.stored; if (!llmOutput) { const prompt = fieldsPrompt(llmInput); const { messageContent, report } = await invokeClaude(ctx, { llmTaskName, prompt, maxTokens: 700, temperature: 0.1, systemInstruction: "beFast", }); llmOutput = await debug.getMessageContent(messageContent, report); } const { assignedFields, unusedInformation } = parseLlmResponseAsProperties(llmOutput.output, [ { tagName: "json_result", key: "assignedFields", format: "json", }, { tagName: "unused_information_md", key: "unusedInformation", format: "markdown", optional: true, }, ]); if (siteSchema.nodeTypes) { assignFieldsToNodeTypes(ctx, assignedFields, siteSchema.nodeTypes); } return { unusedInformation, llmReport: llmOutput.llmReport }; } function assignFieldsToNodeTypes(ctx, assignedFields, nodeTypes) { const remainingTypeNames = new Set(Object.keys(assignedFields)); for (const nodeType of nodeTypes) { const typeName = nodeType.kind === "site" ? "_site" : nodeType.typeName; const fieldNames = assignedFields[typeName]; if (!fieldNames) { ctx.logger.warn(`Missing assigned fields for node type "${typeName}"`); continue; } // inject authors field in temporal documents if (nodeType.kind === "document" && nodeType.documentKind === "regular" && nodeType.route === ":yyyy/:mm/:dd/:relativeId-:slug") { nodeType.fields = [ { localized: false, storedAs: "labeling", name: "authors", taxonomy: "authors", multiple: true, }, ]; } if (fieldNames.length > 0) { nodeType.fields = [...(nodeType.fields ?? []), ...fieldNames]; } remainingTypeNames.delete(typeName); } if (remainingTypeNames.size > 0) { ctx.logger.warn(`Field names were produced for unknown node types: ${[...remainingTypeNames].join(", ")}`); } } function reorderSiteSchemaNodeTypes(siteSchema) { if (!siteSchema.nodeTypes) return; siteSchema.nodeTypes = siteSchema.nodeTypes.map((nodeType) => reorderObjectKeys(nodeType, [ "typeName", "kind", "documentKind", "route", "redirectTo", "jsonLdType", "withFeaturedImage", "fields", "lists", "routingChildren", "children", "orderChildrenBy", ])); } function createUnusedInformationPrompt(unusedInformation, analysis) { const prompts = Object.entries(analysis.dictionary) .map(([typeName, entry]) => { return entry.prompt ? `${typeName}: ${entry.prompt}` : undefined; }) .filter(isDef); if (prompts.length > 0) { const nodeTypePrompts = `To do:\n\n- ${prompts.join("- \n")}`; return unusedInformation ? `${nodeTypePrompts}\n\n${unusedInformation}` : nodeTypePrompts; } return unusedInformation; }