UNPKG

@entro314labs/starlight-document-converter

Version:

A comprehensive document converter for Astro Starlight that transforms various document formats into Starlight-compatible Markdown with proper frontmatter

385 lines (384 loc) 13.4 kB
// src/plugins/built-in/frontmatter-repair.ts import matter from "gray-matter"; var frontmatterEnhancer = { metadata: { name: "frontmatter-enhancer", version: "1.0.0", description: "Enhances and repairs frontmatter metadata", author: "Starlight Document Converter" }, priority: 100, // High priority to run early enhance: async (metadata, context) => { const repairer = new FrontmatterRepair(); let content = ""; if (context.inputPath) { const { readFile } = await import("fs/promises"); content = await readFile(context.inputPath, "utf-8"); } const repairResult = repairer.repairFrontmatter(content, context.inputPath); if (repairResult.success && repairResult.fixed) { try { const parsed = matter(repairResult.repairedContent); return { ...metadata, ...parsed.data }; } catch { return metadata; } } return metadata; } }; var frontmatterValidator = { metadata: { name: "frontmatter-validator", version: "1.0.0", description: "Validates frontmatter quality and structure", author: "Starlight Document Converter" }, validate: (content, metadata, context) => { const repairer = new FrontmatterRepair(); const validation = repairer.validateContent(content, context.inputPath); return { score: validation.score?.titleScore || 0, level: validation.score?.overall === "good" ? "high" : validation.score?.overall === "fair" ? "medium" : "low", issues: validation.issues.map((issue) => ({ type: issue.type, message: issue.message, severity: issue.type === "error" ? 8 : issue.type === "warning" ? 5 : 2 })), suggestions: validation.score?.suggestions || [] }; } }; var FrontmatterRepair = class { requiredFields = ["title"]; recommendedFields = ["description"]; maxTitleLength = 60; maxDescriptionLength = 160; /** * Validate frontmatter and content structure */ validateContent(content, filePath) { const issues = []; let metadata; try { if (!content.startsWith("---\n")) { issues.push({ type: "error", message: "Missing frontmatter", suggestion: "Add YAML frontmatter with title and description" }); return { valid: false, issues, score: this.calculateQualityScore(content, void 0, issues) }; } const parsed = matter(content); metadata = parsed.data; const contentBody = parsed.content; for (const field of this.requiredFields) { if (!metadata[field]) { issues.push({ type: "error", field, message: `Missing required frontmatter field: ${field}`, suggestion: `Add ${field} field to frontmatter` }); } } for (const field of this.recommendedFields) { if (!metadata[field]) { issues.push({ type: "warning", field, message: `Missing recommended frontmatter field: ${field}`, suggestion: `Add ${field} field for better SEO and navigation` }); } } if (metadata.title && metadata.title.length > this.maxTitleLength) { issues.push({ type: "warning", field: "title", message: `Title is too long (${metadata.title.length} chars, max ${this.maxTitleLength})`, suggestion: "Shorten title for better readability" }); } if (metadata.description && metadata.description.length > this.maxDescriptionLength) { issues.push({ type: "warning", field: "description", message: `Description is too long (${metadata.description.length} chars, max ${this.maxDescriptionLength})`, suggestion: "Shorten description for better SEO" }); } this.validateContentStructure(contentBody, issues); } catch (error) { issues.push({ type: "error", message: `Failed to parse frontmatter: ${error instanceof Error ? error.message : "Unknown error"}`, suggestion: "Fix YAML syntax errors in frontmatter" }); } return { valid: issues.filter((i) => i.type === "error").length === 0, issues, metadata, score: this.calculateQualityScore(content, metadata, issues) }; } /** * Repair frontmatter issues */ repairFrontmatter(content, filePath) { const originalContent = content; const issues = []; let needsFix = false; try { if (!content.startsWith("---\n")) { const generatedMeta = this.generateFrontmatterFromContent(content, filePath); const frontmatterStr = this.buildFrontmatterString(generatedMeta); const repairedContent = `${frontmatterStr} ${content}`; issues.push("Added missing frontmatter"); return { success: true, fixed: true, issues, originalContent, repairedContent }; } const parsed = matter(content); let metadata = { ...parsed.data }; const contentBody = parsed.content; if (!metadata.title) { metadata.title = this.extractTitleFromContent(contentBody, filePath); issues.push("Generated missing title"); needsFix = true; } if (!metadata.description) { metadata.description = this.generateDescriptionFromContent( contentBody, metadata.title || "" ); issues.push("Generated missing description"); needsFix = true; } if (metadata.title) { const cleanTitle = this.cleanString(metadata.title); if (cleanTitle !== metadata.title) { metadata.title = cleanTitle; issues.push("Cleaned title formatting"); needsFix = true; } } if (metadata.description) { const cleanDesc = this.cleanString(metadata.description); if (cleanDesc !== metadata.description || metadata.description.length > this.maxDescriptionLength) { metadata.description = cleanDesc.length > this.maxDescriptionLength ? `${cleanDesc.substring(0, this.maxDescriptionLength - 3)}...` : cleanDesc; issues.push("Cleaned and truncated description"); needsFix = true; } } if (!metadata.category) { metadata.category = this.inferCategoryFromPath(filePath); if (metadata.category) { issues.push(`Inferred category: ${metadata.category}`); needsFix = true; } } if (needsFix) { const frontmatterStr = this.buildFrontmatterString(metadata); const repairedContent = `${frontmatterStr} ${contentBody}`; return { success: true, fixed: true, issues, originalContent, repairedContent }; } return { success: true, fixed: false, issues: ["No repairs needed"], originalContent, repairedContent: originalContent }; } catch (error) { return { success: false, fixed: false, issues: [`Failed to repair: ${error instanceof Error ? error.message : "Unknown error"}`], originalContent, repairedContent: originalContent }; } } validateContentStructure(content, issues) { if (content.trim().length === 0) { issues.push({ type: "warning", message: "Document appears to be empty", suggestion: "Add content to the document" }); return; } const headings = content.match(/^#{1,6}\s+.+$/gm) || []; if (headings.length === 0) { issues.push({ type: "warning", message: "No headings found", suggestion: "Add headings to improve document structure" }); } else if (!this.hasProperHeadingStructure(headings)) { issues.push({ type: "warning", message: "Inconsistent heading structure", suggestion: "Use sequential heading levels (h1 -> h2 -> h3)" }); } } hasProperHeadingStructure(headings) { let previousLevel = 0; for (const heading of headings) { const level = heading.match(/^(#+)/)?.[1].length || 0; if (level > previousLevel + 1) { return false; } previousLevel = level; } return true; } calculateQualityScore(content, metadata, issues = []) { let titleScore = 0; let descriptionScore = 0; let contentScore = 0; let structureScore = 0; if (metadata?.title) { titleScore = metadata.title.length > 10 && metadata.title.length <= this.maxTitleLength ? 100 : 60; } if (metadata?.description) { const length = metadata.description.length; descriptionScore = length >= 50 && length <= this.maxDescriptionLength ? 100 : 70; } const wordCount = content.split(/\s+/).length; contentScore = wordCount > 100 ? 100 : Math.max(40, wordCount); const headings = content.match(/^#{1,6}\s+.+$/gm) || []; structureScore = headings.length > 0 ? 100 : 50; const avgScore = (titleScore + descriptionScore + contentScore + structureScore) / 4; const errorPenalty = issues.filter((i) => i.type === "error").length * 20; const finalScore = Math.max(0, avgScore - errorPenalty); const overall = finalScore >= 80 ? "good" : finalScore >= 60 ? "fair" : "poor"; const suggestions = []; if (titleScore < 80) suggestions.push("Improve title quality"); if (descriptionScore < 80) suggestions.push("Add better description"); if (contentScore < 80) suggestions.push("Add more content"); if (structureScore < 80) suggestions.push("Improve document structure with headings"); return { overall, titleScore, descriptionScore, contentScore, structureScore, suggestions }; } generateFrontmatterFromContent(content, filePath) { const title = this.extractTitleFromContent(content, filePath); const description = this.generateDescriptionFromContent(content, title); const category = this.inferCategoryFromPath(filePath); return { title, description, category }; } extractTitleFromContent(content, filePath) { const headingMatch = content.match(/^#\s+(.+)$/m); if (headingMatch) { return this.cleanString(headingMatch[1]); } const titleMatch = content.match(/title:\s*(.+)/i); if (titleMatch) { return this.cleanString(titleMatch[1]); } const filename = filePath.split("/").pop()?.replace(/\.[^.]+$/, "") || "Untitled"; return this.cleanFilename(filename); } generateDescriptionFromContent(content, title) { const cleanContent = content.replace(/^---[\s\S]*?---\n/, ""); const paragraphs = cleanContent.split("\n\n").map((p) => p.replace(/\n/g, " ").trim()).filter( (p) => p.length > 20 && !p.startsWith("#") && !p.startsWith("```") && !p.startsWith("{") ); if (paragraphs.length > 0) { let description = paragraphs[0].replace(/[#*_`[\]]/g, "").replace(/\[([^\]]+)\]\([^)]+\)/g, "$1").substring(0, this.maxDescriptionLength - 3); if (description.length === this.maxDescriptionLength - 3) { const lastSpace = description.lastIndexOf(" "); if (lastSpace > 100) { description = description.substring(0, lastSpace); } } return `${this.cleanString(description)}.`; } if (title.toLowerCase().includes("readme")) { return `Documentation and setup guide for ${title.replace(/readme/i, "").trim()}.`; } if (title.toLowerCase().includes("guide")) { return `Comprehensive guide covering ${title.toLowerCase().replace("guide", "").trim()}.`; } if (title.toLowerCase().includes("api")) { return `API documentation and reference for ${title.replace(/api/i, "").trim()}.`; } return `Documentation for ${title}.`; } inferCategoryFromPath(filePath) { const pathParts = filePath.split("/"); if (pathParts.length > 1) { const category = pathParts.at(-2); return category ? this.cleanFilename(category) : void 0; } return void 0; } cleanString(str) { return str.replace(/[{}[\]"'\\]/g, "").replace(/\s+/g, " ").trim(); } cleanFilename(filename) { return filename.replace(/[-_]/g, " ").replace(/([a-z])([A-Z])/g, "$1 $2").replace(/\b\w/g, (l) => l.toUpperCase()).replace(/\s+/g, " ").trim(); } buildFrontmatterString(metadata) { const lines = ["---"]; if (metadata.title) { lines.push(`title: "${this.escapeYamlString(metadata.title)}"`); } if (metadata.description) { lines.push(`description: "${this.escapeYamlString(metadata.description)}"`); } if (metadata.category) { lines.push(`category: "${this.escapeYamlString(metadata.category)}"`); } if (metadata.tags && metadata.tags.length > 0) { lines.push("tags:"); for (const tag of metadata.tags) { lines.push(` - "${this.escapeYamlString(tag)}"`); } } if (metadata.lastUpdated) { lines.push(`lastUpdated: ${metadata.lastUpdated}`); } lines.push("---"); return lines.join("\n"); } escapeYamlString(str) { return str.replace(/"/g, '\\"'); } }; export { frontmatterEnhancer, frontmatterValidator, FrontmatterRepair }; //# sourceMappingURL=chunk-QAOV4ICX.js.map