UNPKG

@entro314labs/starlight-document-converter

Version:

A comprehensive document converter for Astro Starlight that transforms various document formats into Starlight-compatible Markdown with proper frontmatter

351 lines (350 loc) 12.4 kB
// src/plugins/built-in/content-analyzer.ts import matter from "gray-matter"; var ContentAnalyzer = class { categoryPatterns; tagPatterns; constructor(categoryPatterns, tagPatterns) { this.categoryPatterns = categoryPatterns || this.getDefaultCategoryPatterns(); this.tagPatterns = tagPatterns || this.getDefaultTagPatterns(); } /** * Analyze content and generate comprehensive metadata */ analyzeContent(content, filePath) { let parsedContent; let existingMetadata = {}; if (content.startsWith("---\n")) { try { const parsed = matter(content); existingMetadata = parsed.data; parsedContent = parsed.content; } catch { parsedContent = content; } } else { parsedContent = content; } const analysis = { wordCount: this.calculateWordCount(parsedContent), readingTime: this.estimateReadingTime(parsedContent), complexity: this.assessComplexity(parsedContent), headingStructure: this.extractHeadingStructure(parsedContent), topics: this.extractTopics(parsedContent, filePath), suggestedTags: this.suggestTags(parsedContent, filePath), contentType: this.detectContentType(parsedContent, filePath) }; const metadata = { title: existingMetadata.title || this.generateTitle(parsedContent, filePath), description: existingMetadata.description || this.generateDescription(parsedContent, existingMetadata.title || ""), category: existingMetadata.category || this.inferCategory(filePath, analysis.topics), tags: existingMetadata.tags || analysis.suggestedTags, lastUpdated: existingMetadata.lastUpdated }; return { metadata, analysis }; } /** * Generate intelligent title from content */ generateTitle(content, filePath) { const firstHeading = content.match(/^#\s+(.+)$/m); if (firstHeading) { return this.cleanTitle(firstHeading[1]); } const titlePatterns = [ /^(.+)\n=+$/m, // Setext-style H1 /^\*\*(.+)\*\*$/m, // Bold text at start /^(.+)(?:\n-{3,})?$/m // First line ]; for (const pattern of titlePatterns) { const match = content.match(pattern); if (match && match[1].length < 100) { return this.cleanTitle(match[1]); } } const fileName = filePath.split("/").pop()?.replace(/\.[^.]+$/, "") || "Untitled"; return this.humanizeFilename(fileName); } /** * Generate intelligent description from content */ generateDescription(content, title) { const cleanContent = content.replace(/```[\s\S]*?```/g, "").replace(/`[^`]+`/g, "").replace(/!\[.*?\]\(.*?\)/g, "").replace(/\[([^\]]+)\]\([^)]+\)/g, "$1"); const paragraphs = cleanContent.split("\n\n").map((p) => p.replace(/\n/g, " ").trim()).filter( (p) => p.length > 30 && !p.startsWith("#") && !p.startsWith("*") && !p.startsWith("|") && !p.match(/^\d+\./) && !p.includes("TODO") && !p.includes("FIXME") ); if (paragraphs.length > 0) { let description = paragraphs[0]; description = description.replace(/\*\*(.*?)\*\*/g, "$1").replace(/\*(.*?)\*/g, "$1").replace(/_([^_]+)_/g, "$1").replace(/~([^~]+)~/g, "$1"); if (description.length > 155) { const truncated = description.substring(0, 152); const lastSpace = truncated.lastIndexOf(" "); description = lastSpace > 100 ? truncated.substring(0, lastSpace) : truncated; description += "..."; } if (!/[.!?]$/.test(description)) { description += "."; } return description; } return this.generateFallbackDescription(title, content); } /** * Infer category from file path and content */ inferCategory(filePath, topics) { const pathParts = filePath.toLowerCase().split("/"); for (const part of pathParts) { for (const [pattern, category] of Object.entries(this.categoryPatterns)) { if (part.includes(pattern)) { return category; } } } const topicCategories = { "AI & ML": ["ai", "machine learning", "llm", "neural", "model"], Development: ["code", "programming", "function", "class", "method"], Guides: ["tutorial", "guide", "how to", "step", "setup"], Reference: ["api", "reference", "documentation", "spec"], Design: ["ui", "ux", "design", "style", "component"] }; for (const [category, keywords] of Object.entries(topicCategories)) { if (keywords.some((keyword) => topics.some((topic) => topic.includes(keyword)))) { return category; } } return "Documentation"; } /** * Suggest relevant tags based on content analysis */ suggestTags(content, filePath) { const contentLower = content.toLowerCase(); const pathLower = filePath.toLowerCase(); const suggestedTags = /* @__PURE__ */ new Set(); for (const [tag, patterns] of Object.entries(this.tagPatterns)) { if (patterns.some((pattern) => contentLower.includes(pattern) || pathLower.includes(pattern))) { suggestedTags.add(tag); } } const techKeywords = [ "react", "vue", "angular", "svelte", "astro", "typescript", "javascript", "python", "rust", "go", "node", "npm", "pnpm", "webpack", "vite", "api", "rest", "graphql", "database", "sql", "docker", "kubernetes", "aws", "azure", "gcp" ]; for (const keyword of techKeywords) { if (contentLower.includes(keyword)) { suggestedTags.add(keyword); } } return Array.from(suggestedTags).slice(0, 8); } /** * Detect content type based on structure and keywords */ detectContentType(content, filePath) { const contentLower = content.toLowerCase(); const pathLower = filePath.toLowerCase(); if (pathLower.includes("tutorial") || pathLower.includes("guide")) { return contentLower.includes("step") ? "tutorial" : "guide"; } if (pathLower.includes("api") || pathLower.includes("reference")) { return "reference"; } if (pathLower.includes("blog") || pathLower.includes("post")) { return "blog"; } const tutorialIndicators = ["step 1", "first, ", "next, ", "finally, ", "prerequisites"]; const referenceIndicators = ["parameters", "returns", "example", "usage", "api"]; const guideIndicators = ["overview", "introduction", "getting started", "how to"]; const tutorialScore = tutorialIndicators.reduce( (score, indicator) => score + (contentLower.includes(indicator) ? 1 : 0), 0 ); const referenceScore = referenceIndicators.reduce( (score, indicator) => score + (contentLower.includes(indicator) ? 1 : 0), 0 ); const guideScore = guideIndicators.reduce( (score, indicator) => score + (contentLower.includes(indicator) ? 1 : 0), 0 ); if (tutorialScore >= 2) return "tutorial"; if (referenceScore >= 2) return "reference"; if (guideScore >= 2) return "guide"; return "documentation"; } /** * Extract topics and keywords from content */ extractTopics(content, filePath) { const topics = /* @__PURE__ */ new Set(); const headings = content.match(/^#+\s+(.+)$/gm) || []; for (const heading of headings) { const title = heading.replace(/^#+\s+/, "").toLowerCase(); topics.add(title); } const emphasizedTerms = content.match(/\*\*(.*?)\*\*/g) || []; for (const term of emphasizedTerms) { const cleaned = term.replace(/\*\*/g, "").trim(); if (cleaned.length > 2 && cleaned.length < 30) { topics.add(cleaned.toLowerCase()); } } const pathParts = filePath.split("/").map((part) => part.replace(/[-_]/g, " ").replace(/\.[^.]+$/, "")); pathParts.forEach((part) => topics.add(part.toLowerCase())); return Array.from(topics).filter((topic) => topic.length > 2).slice(0, 10); } /** * Calculate reading time estimate */ estimateReadingTime(content) { const wordsPerMinute = 200; const wordCount = this.calculateWordCount(content); return Math.max(1, Math.ceil(wordCount / wordsPerMinute)); } /** * Calculate word count */ calculateWordCount(content) { const textContent = content.replace(/```[\s\S]*?```/g, "").replace(/`[^`]+`/g, "").replace(/!\[.*?\]\(.*?\)/g, "").replace(/\[.*?\]\(.*?\)/g, "").replace(/[#*_~`]/g, ""); return textContent.trim().split(/\s+/).length; } /** * Assess content complexity */ assessComplexity(content) { const wordCount = this.calculateWordCount(content); const headingCount = (content.match(/^#+/gm) || []).length; const codeBlockCount = (content.match(/```/g) || []).length / 2; const linkCount = (content.match(/\[.*?\]\(.*?\)/g) || []).length; const complexityScore = (wordCount > 1e3 ? 1 : 0) + (headingCount > 5 ? 1 : 0) + (codeBlockCount > 3 ? 1 : 0) + (linkCount > 10 ? 1 : 0); if (complexityScore >= 3) return "complex"; if (complexityScore >= 1) return "moderate"; return "simple"; } /** * Extract heading structure */ extractHeadingStructure(content) { const headings = content.match(/^(#+)\s+(.+)$/gm) || []; return headings.map((heading) => { const match = heading.match(/^(#+)\s+(.+)$/); if (!match) return null; const level = match[1].length; const title = match[2].trim(); const anchor = this.generateAnchor(title); return { level, title, anchor }; }).filter((entry) => entry !== null); } /** * Generate URL-friendly anchor */ generateAnchor(title) { return title.toLowerCase().replace(/[^a-z0-9\s-]/g, "").replace(/\s+/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, ""); } /** * Clean and format title */ cleanTitle(title) { return title.replace(/[#*_`]/g, "").replace(/\[([^\]]+)\]/g, "$1").trim().substring(0, 60); } /** * Humanize filename for title generation */ humanizeFilename(filename) { return filename.replace(/[-_]/g, " ").replace(/([a-z])([A-Z])/g, "$1 $2").replace(/\b\w/g, (l) => l.toUpperCase()).trim(); } /** * Generate fallback description based on content type */ generateFallbackDescription(title, content) { const titleLower = title.toLowerCase(); if (titleLower.includes("api") || content.includes("endpoint")) { return `API documentation and reference for ${title.replace(/api/i, "").trim()}.`; } if (titleLower.includes("guide")) { return `Comprehensive guide covering ${title.toLowerCase().replace("guide", "").trim()}.`; } if (titleLower.includes("tutorial")) { return `Step-by-step tutorial for ${title.toLowerCase().replace("tutorial", "").trim()}.`; } if (titleLower.includes("reference")) { return `Reference documentation for ${title.replace(/reference/i, "").trim()}.`; } return `Documentation and information about ${title.toLowerCase()}.`; } /** * Default category patterns */ getDefaultCategoryPatterns() { return { ai: "AI & ML", ml: "AI & ML", claude: "AI & ML", guide: "Guides", tutorial: "Guides", howto: "Guides", reference: "Reference", api: "Reference", docs: "Reference", design: "Design System", ui: "Design System", component: "Design System", project: "Projects", blog: "Blog", post: "Blog", news: "Blog" }; } /** * Default tag patterns */ getDefaultTagPatterns() { return { javascript: ["javascript", "js", "node.js", "npm", "pnpm"], typescript: ["typescript", "ts"], react: ["react", "jsx", "react.js"], vue: ["vue", "vue.js", "nuxt"], astro: ["astro", "starlight"], css: ["css", "scss", "sass", "tailwind"], api: ["api", "rest", "graphql", "endpoint"], database: ["database", "sql", "mongodb", "postgres", "supabase"], ai: ["ai", "machine learning", "llm", "claude", "gpt"], guide: ["guide", "tutorial", "how-to", "documentation"], reference: ["reference", "docs"], business: ["business", "plan", "strategy"], security: ["security", "auth", "authentication"], performance: ["performance", "optimization", "cache"], testing: ["test", "testing", "jest", "vitest"] }; } }; export { ContentAnalyzer }; //# sourceMappingURL=chunk-3YZ5TT75.js.map