@entro314labs/starlight-document-converter
Version:
A comprehensive document converter for Astro Starlight that transforms various document formats into Starlight-compatible Markdown with proper frontmatter
908 lines (905 loc) • 31.6 kB
JavaScript
// src/converter.ts
import { mkdir, readdir, readFile, writeFile } from "fs/promises";
import { basename, dirname, extname, join } from "path";
import chalk from "chalk";
import matter from "gray-matter";
import mammoth from "mammoth";
import TurndownService from "turndown";
var DocumentConverter = class {
options;
stats;
turndownService;
constructor(options = {}) {
this.options = {
outputDir: options.outputDir || "src/content/docs",
preserveStructure: options.preserveStructure ?? true,
generateTitles: options.generateTitles ?? true,
generateDescriptions: options.generateDescriptions ?? true,
addTimestamps: options.addTimestamps ?? false,
defaultCategory: options.defaultCategory || "documentation",
verbose: options.verbose ?? false,
dryRun: options.dryRun ?? false,
categoryPatterns: options.categoryPatterns || this.getDefaultCategoryPatterns(),
tagPatterns: options.tagPatterns || this.getDefaultTagPatterns(),
ignorePatterns: options.ignorePatterns || this.getDefaultIgnorePatterns(),
repairMode: options.repairMode ?? false,
validateContent: options.validateContent ?? false,
generateToc: options.generateToc ?? false,
processImages: options.processImages ?? false,
fixLinks: options.fixLinks ?? false,
generateSidebar: options.generateSidebar ?? false,
maxDescriptionLength: options.maxDescriptionLength ?? 160
};
this.stats = {
processed: 0,
skipped: 0,
errors: 0,
formats: /* @__PURE__ */ new Map()
};
this.turndownService = new TurndownService({
headingStyle: "atx",
codeBlockStyle: "fenced",
bulletListMarker: "-"
});
}
getDefaultCategoryPatterns() {
return {
claude: "Claude Code",
guide: "Guides",
tutorial: "Guides",
reference: "Reference",
api: "Reference",
ai: "AI & ML",
ml: "AI & ML",
design: "Design System",
project: "Projects",
blog: "Blog",
docs: "Documentation"
};
}
getDefaultTagPatterns() {
return {
javascript: ["javascript", "js", "node.js", "npm", "pnpm"],
typescript: ["typescript", "ts"],
react: ["react", "jsx", "react.js"],
vue: ["vue", "vue.js", "nuxt"],
astro: ["astro", "starlight"],
css: ["css", "scss", "sass", "tailwind"],
api: ["api", "rest", "graphql", "endpoint"],
database: ["database", "sql", "mongodb", "postgres", "supabase"],
ai: ["ai", "machine learning", "llm", "claude", "gpt"],
guide: ["guide", "tutorial", "how-to", "documentation"],
reference: ["reference", "docs"],
business: ["business", "plan", "strategy"],
security: ["security", "auth", "authentication"],
performance: ["performance", "optimization", "cache"],
testing: ["test", "testing", "jest", "vitest"]
};
}
getDefaultIgnorePatterns() {
return ["node_modules/**", ".git/**", "dist/**", ".astro/**", "**/*.log", "**/.*"];
}
log(message, level = "info") {
if (this.options.verbose || level === "error") {
const timestamp = (/* @__PURE__ */ new Date()).toISOString().slice(11, 19);
const coloredMessage = level === "error" ? chalk.red(message) : level === "warn" ? chalk.yellow(message) : chalk.blue(message);
console.log(`[${chalk.gray(timestamp)}] ${coloredMessage}`);
}
}
extractTitle(content, filename) {
if (!this.options.generateTitles) return;
const titleMatch = content.match(/<title[^>]*>([^<]+)<\/title>/i);
if (titleMatch) return titleMatch[1].trim();
const lines = content.split("\n").filter((line) => line.trim());
if (lines.length > 0) {
const firstLine = lines[0].trim();
if (!firstLine.startsWith("#") && firstLine.length > 0 && firstLine.length < 100) {
if (!(firstLine.endsWith(".") || firstLine.includes("\n"))) {
return firstLine;
}
}
}
const h1Match = content.match(/^#\s+(.+)$/m);
if (h1Match) return h1Match[1].trim();
const headingMatch = content.match(/^#{1,6}\s+(.+)$/m);
if (headingMatch) return headingMatch[1].trim();
return filename.replace(/\.[^/.]+$/, "").replace(/[-_]/g, " ").replace(/\b\w/g, (l) => l.toUpperCase());
}
extractDescription(content) {
if (!this.options.generateDescriptions) return;
const paragraphs = this.extractParagraphs(content);
const startIndex = this.getDescriptionStartIndex(paragraphs);
for (let i = startIndex; i < paragraphs.length; i++) {
const result = this.processDescriptionParagraph(paragraphs[i]);
if (result) return result;
}
return;
}
extractParagraphs(content) {
const withoutFrontmatter = content.replace(/^---[\s\S]*?---/, "").trim();
return withoutFrontmatter.split(/\n\s*\n/).map((p) => p.trim()).filter((p) => p.length > 0);
}
getDescriptionStartIndex(paragraphs) {
if (paragraphs.length > 1 && paragraphs[0].length < 100 && !paragraphs[0].endsWith(".") && !paragraphs[0].includes("\n")) {
return 1;
}
return 0;
}
isStructuralElement(paragraph) {
return paragraph.startsWith("#") || paragraph.startsWith("```") || paragraph.startsWith("-") || paragraph.startsWith("*") || paragraph.startsWith("|") || paragraph.match(/^\d+\./) !== null || paragraph.startsWith(">") || paragraph.match(/^Table|^Figure|^Image|^Code|^Example:/i) !== null;
}
cleanParagraph(paragraph) {
return paragraph.replace(/[#*_`[\]]/g, "").replace(/\s+/g, " ").replace(/^\s*[-*]\s*/, "").trim();
}
processDescriptionParagraph(paragraph) {
if (this.isStructuralElement(paragraph)) {
return;
}
const cleanParagraph = this.cleanParagraph(paragraph);
if (cleanParagraph.length < 20) return;
if (cleanParagraph.length > 200) {
return this.truncateDescription(cleanParagraph);
}
const result = cleanParagraph.endsWith(".") ? cleanParagraph : `${cleanParagraph}.`;
if (result.length >= 20 && !result.match(/^(Table|Figure|Image|Code|Example):/i)) {
return result;
}
return;
}
truncateDescription(text) {
const truncated = text.substring(0, 150);
const lastSpace = truncated.lastIndexOf(" ");
const result = lastSpace > 100 ? truncated.substring(0, lastSpace) : truncated;
return `${result}...`;
}
extractTags(content, filename, category) {
const tags = /* @__PURE__ */ new Set();
const text = content.toLowerCase();
this.addTechTags(tags, text);
this.addCategoryTags(tags, category);
this.addContentTypeTags(tags, text);
this.addFilenameTags(tags, filename);
this.addComplexityTags(tags, content, text);
return Array.from(tags).filter((tag) => tag.length > 2).slice(0, 8);
}
getTechPatterns() {
return {
react: ["react", "jsx", "usestate", "useeffect", "component"],
vue: ["vue", "vuejs", "vue.js", "nuxt"],
angular: ["angular", "ng-", "@component"],
svelte: ["svelte", "sveltekit"],
nodejs: ["node.js", "nodejs", "npm", "express", "fastify"],
python: ["python", "django", "flask", "fastapi", "pip"],
typescript: ["typescript", "ts", ".ts"],
javascript: ["javascript", "js", ".js"],
java: ["java", "spring", "maven", "gradle"],
rust: ["rust", "cargo", "rustc"],
go: ["golang", "go mod", "go get"],
postgresql: ["postgres", "postgresql", "psql"],
mysql: ["mysql", "mariadb"],
mongodb: ["mongo", "mongodb", "nosql"],
supabase: ["supabase", "supabase.js"],
aws: ["aws", "amazon web services", "s3", "ec2", "lambda"],
docker: ["docker", "container", "dockerfile"],
kubernetes: ["kubernetes", "k8s", "kubectl"],
terraform: ["terraform", "infrastructure as code"],
ai: ["artificial intelligence", "machine learning", "llm", "gpt", "claude"],
openai: ["openai", "gpt-3", "gpt-4", "chatgpt"],
api: ["api", "endpoint", "rest", "graphql"],
guide: ["tutorial", "guide", "walkthrough", "how-to"],
reference: ["reference", "documentation", "docs"]
};
}
addTechTags(tags, text) {
const techPatterns = this.getTechPatterns();
for (const [tag, patterns] of Object.entries(techPatterns)) {
if (patterns.some((pattern) => text.includes(pattern))) {
tags.add(tag);
}
}
}
addCategoryTags(tags, category) {
if (category && category !== "documentation") {
tags.add(category.toLowerCase().replace(/\s+/g, "-"));
}
}
addContentTypeTags(tags, text) {
const contentPatterns = [
{ pattern: /install|setup|configuration/, tag: "setup" },
{ pattern: /deploy|production|release/, tag: "deployment" },
{ pattern: /security|auth|permission/, tag: "security" },
{ pattern: /performance|optimization|speed/, tag: "performance" },
{ pattern: /test|testing|unit test/, tag: "testing" },
{ pattern: /debug|troubleshoot|error/, tag: "debugging" },
{ pattern: /business|strategy|plan/, tag: "business" },
{ pattern: /market|revenue|funding/, tag: "business-strategy" }
];
for (const { pattern, tag } of contentPatterns) {
if (text.match(pattern)) {
tags.add(tag);
}
}
}
addFilenameTags(tags, filename) {
const filenameLower = filename.toLowerCase();
const filenamePatterns = [
{ includes: "readme", tag: "overview" },
{ includes: "changelog", tag: "changelog" },
{ includes: "contributing", tag: "contributing" },
{ includes: "license", tag: "legal" }
];
for (const { includes, tag } of filenamePatterns) {
if (filenameLower.includes(includes)) {
tags.add(tag);
}
}
}
addComplexityTags(tags, content, text) {
const codeBlocks = (content.match(/```/g) || []).length / 2;
if (codeBlocks > 3) tags.add("code-heavy");
if (content.length > 5e3) tags.add("comprehensive");
if (text.includes("beginner") || text.includes("getting started")) tags.add("beginner");
if (text.includes("advanced") || text.includes("expert")) tags.add("advanced");
}
generateCategory(content, _filename, filePath) {
const pathParts = filePath.split("/").filter((p) => p && p !== ".");
for (const [pattern, categoryName] of Object.entries(this.options.categoryPatterns)) {
if (pathParts.some((p) => p.toLowerCase().includes(pattern.toLowerCase()))) {
return categoryName;
}
}
const text = content.toLowerCase();
if (text.includes("endpoint") || text.includes("api") || text.match(/get|post|put|delete.*\//) || text.includes("parameter") || text.includes("response")) {
return "Reference";
}
if (text.includes("step") || text.includes("tutorial") || text.includes("getting started") || text.includes("walkthrough") || text.match(/\d+\.\s/) || text.includes("how to")) {
return "Guides";
}
if (text.includes("business plan") || text.includes("strategy") || text.includes("market") || text.includes("revenue") || text.includes("funding") || text.includes("investor")) {
return "Business";
}
if (text.includes("architecture") || text.includes("design pattern") || text.includes("implementation") || text.includes("technical") || text.includes("database") || text.includes("infrastructure")) {
return "Architecture";
}
if (text.includes("config") || text.includes("setup") || text.includes("installation") || text.includes("environment")) {
return "Configuration";
}
return this.options.defaultCategory;
}
generateFrontmatterYaml(metadata) {
const yamlLines = [];
if (metadata.title) {
const escapedTitle = metadata.title.replace(/"/g, '\\"').replace(/\n/g, " ");
yamlLines.push(`title: "${escapedTitle}"`);
}
if (metadata.description) {
const desc = metadata.description.replace(/"/g, '\\"').replace(/\n/g, " ");
if (desc.length > 80) {
yamlLines.push("description: |");
yamlLines.push(` ${desc}`);
} else {
yamlLines.push(`description: "${desc}"`);
}
}
if (metadata.category && metadata.category !== "documentation") {
yamlLines.push(`category: "${metadata.category}"`);
}
if (metadata.tags && metadata.tags.length > 0) {
yamlLines.push("tags:");
metadata.tags.forEach((tag) => yamlLines.push(` - ${tag}`));
}
if (metadata.lastUpdated) {
yamlLines.push(`lastUpdated: ${metadata.lastUpdated}`);
}
return yamlLines.join("\n");
}
validateConvertedContent(content, metadata) {
const warnings = [];
const suggestions = [];
this.validateTitle(metadata.title, warnings);
this.validateDescription(metadata.description, warnings, suggestions);
this.validateContentStructure(content, suggestions);
this.validateCodeContent(content, metadata, suggestions);
const quality = this.calculateQuality(warnings, metadata);
return {
isValid: warnings.length === 0,
warnings,
suggestions,
quality
};
}
validateTitle(title, warnings) {
if (!title || title.length < 5) {
warnings.push("Title is too short or missing");
}
if (title && title.length > 100) {
warnings.push("Title is unusually long");
}
}
validateDescription(description, warnings, suggestions) {
if (!description) {
warnings.push("Description is missing");
suggestions.push("Consider adding a brief description of the document content");
} else if (description.length < 20) {
warnings.push("Description is very short");
} else if (description.length > 200) {
suggestions.push("Description is long, consider summarizing key points");
}
}
validateContentStructure(content, suggestions) {
const headingCount = (content.match(/^#{1,6}\s/gm) || []).length;
if (headingCount === 0) {
suggestions.push("Consider adding headings to improve document structure");
} else if (headingCount > 20) {
suggestions.push("Document has many headings, consider reorganizing content");
}
}
validateCodeContent(content, metadata, suggestions) {
const codeBlocks = (content.match(/```/g) || []).length / 2;
if (codeBlocks > 0 && !metadata.tags?.includes("code-heavy")) {
suggestions.push("Document contains code - consider adding relevant technical tags");
}
}
calculateQuality(warnings, metadata) {
let qualityScore = 100;
if (warnings.length > 0) qualityScore -= warnings.length * 15;
if (!metadata.description) qualityScore -= 25;
if (!metadata.category || metadata.category === "documentation") qualityScore -= 10;
if (!metadata.tags || metadata.tags.length === 0) qualityScore -= 10;
return qualityScore >= 80 ? "high" : qualityScore >= 60 ? "medium" : "low";
}
generateFrontmatter(content, filename, filePath) {
const frontmatter = {};
const title = this.extractTitle(content, filename);
if (title) frontmatter.title = title;
const description = this.extractDescription(content);
if (description) frontmatter.description = description;
const category = this.generateCategory(content, filename, filePath);
if (category !== "documentation") {
frontmatter.category = category;
}
const tags = this.extractTags(content, filename, category);
if (tags.length > 0) frontmatter.tags = tags;
if (this.options.addTimestamps) {
frontmatter.lastUpdated = (/* @__PURE__ */ new Date()).toISOString().split("T")[0];
}
return frontmatter;
}
isSupportedFormat(ext) {
const supportedFormats = [
".docx",
".doc",
".txt",
".html",
".htm",
".md",
".mdx",
".rtf"
];
return supportedFormats.includes(ext);
}
isTextBasedFile(ext) {
const textExtensions = [
".txt",
".md",
".mdx",
".html",
".htm",
".rtf",
".csv",
".tsv",
".xml",
".json",
".yaml",
".yml",
".ini",
".cfg",
".conf",
".log",
".sh",
".bash",
".zsh",
".fish",
".js",
".ts",
".jsx",
".tsx",
".py",
".rb",
".php",
".java",
".c",
".cpp",
".h",
".hpp",
".css",
".scss",
".sass",
".less",
".sql",
".go",
".rs",
".kt",
".swift",
".dart",
".r",
".m",
".gradle",
".cmake",
".dockerfile"
];
return textExtensions.includes(ext.toLowerCase());
}
shouldSkipFile(filename, ext) {
if (filename.startsWith(".") && ![".md", ".html", ".htm", ".txt"].includes(ext)) {
return { skip: true, reason: "hidden file" };
}
const binaryExtensions = [
".png",
".jpg",
".jpeg",
".gif",
".bmp",
".tiff",
".svg",
".webp",
".ico",
".xls",
".xlsx",
".ppt",
".pptx",
".zip",
".rar",
".7z",
".tar",
".gz",
".mp3",
".mp4",
".avi",
".mov",
".wmv",
".flv",
".wav",
".ogg",
".woff",
".woff2",
".ttf",
".otf",
".eot",
".jar",
".war",
".ear",
".exe",
".dll",
".so",
".dylib",
".bin",
".iso",
".dmg",
".pkg",
".deb",
".rpm"
];
if (binaryExtensions.includes(ext.toLowerCase())) {
return { skip: true, reason: "binary file" };
}
if (!(this.isSupportedFormat(ext) || this.isTextBasedFile(ext))) {
return { skip: true, reason: "unsupported format" };
}
return { skip: false };
}
convertPlainText(content) {
const lines = content.split("\n");
const markdown = [];
let inCodeBlock = false;
for (let i = 0; i < lines.length; i++) {
const result = this.processPlainTextLine(lines, i, inCodeBlock);
inCodeBlock = result.inCodeBlock;
if (result.output) {
markdown.push(result.output);
}
if (result.additionalOutput) {
markdown.push(result.additionalOutput);
}
}
if (inCodeBlock) {
markdown.push("```");
}
return markdown.join("\n");
}
processPlainTextLine(lines, index, inCodeBlock) {
const line = lines[index];
const trimmed = line.trim();
if (!trimmed && inCodeBlock) {
return { output: line, inCodeBlock };
}
if (this.isCodeLine(line, trimmed)) {
if (!inCodeBlock) {
return {
output: "```",
additionalOutput: line.replace(/^ {4}/, ""),
inCodeBlock: true
};
}
return { output: line.replace(/^ {4}/, ""), inCodeBlock: true };
}
if (inCodeBlock && !this.isIndentedLine(line)) {
return {
output: "```",
additionalOutput: this.convertNonCodeLine(line, trimmed, lines, index),
inCodeBlock: false
};
}
return {
output: this.convertNonCodeLine(line, trimmed, lines, index),
inCodeBlock
};
}
isCodeLine(line, trimmed) {
return line.match(/^ {4}/) !== null || line.match(/^\t/) !== null || trimmed.match(/^(function|const|let|var|class|import|export|<\w+|{\s*$)/) !== null;
}
isIndentedLine(line) {
return line.match(/^ {4}/) !== null || line.match(/^\t/) !== null;
}
convertNonCodeLine(line, trimmed, lines, index) {
if (trimmed.endsWith(":") && index + 1 < lines.length && lines[index + 1].trim()) {
return `## ${trimmed.slice(0, -1)}`;
}
if (trimmed.match(/^[-*•]\s/)) {
return line.replace(/^(\s*)[-*•]\s/, "$1- ");
}
return line;
}
convertHTML(content) {
try {
return this.turndownService.turndown(content).trim();
} catch (error) {
this.log(`Turndown conversion failed, using fallback: ${error}`, "warn");
return content.replace(
/<h([1-6])[^>]*>(.*?)<\/h[1-6]>/gi,
(_, level, text) => `${"#".repeat(Number.parseInt(level, 10))} ${text.replace(/<[^>]*>/g, "")}`
).replace(/<p[^>]*>(.*?)<\/p>/gi, "$1\n").replace(/<[^>]*>/g, "").trim();
}
}
async convertWordDocument(filePath) {
try {
const result = await mammoth.convertToHtml({ path: filePath });
if (result.messages.length > 0) {
this.log(`Word conversion warnings for ${filePath}:`, "warn");
result.messages.forEach((msg) => this.log(` ${msg.message}`, "warn"));
}
return this.convertHTML(result.value);
} catch (error) {
throw new Error(`Failed to convert Word document: ${error}`);
}
}
convertRTF(content) {
const text = content.replace(/\\[a-zA-Z]+\d*\s?/g, "").replace(/[{}]/g, "").replace(/\\\\/g, "\\").replace(/\\'/g, "'").trim();
return this.convertPlainText(text);
}
async processFileByType(inputPath, ext, outputPath) {
switch (ext) {
case ".docx":
case ".doc":
return {
content: await this.convertWordDocument(inputPath),
needsConversion: true
};
case ".rtf": {
const rtfContent = await readFile(inputPath, "utf-8");
return {
content: this.convertRTF(rtfContent),
needsConversion: true
};
}
case ".txt": {
const textContent = await readFile(inputPath, "utf-8");
return {
content: this.convertPlainText(textContent),
needsConversion: true
};
}
case ".html":
case ".htm": {
const htmlContent = await readFile(inputPath, "utf-8");
return {
content: this.convertHTML(htmlContent),
needsConversion: true
};
}
case ".md":
case ".mdx": {
const mdContent = await readFile(inputPath, "utf-8");
const parsed = matter(mdContent);
return {
content: mdContent,
needsConversion: Object.keys(parsed.data).length === 0
};
}
default:
if (this.isTextBasedFile(ext)) {
try {
const textContent = await readFile(inputPath, "utf-8");
this.log(`Processing ${ext} file as plain text`, "info");
return {
content: this.convertPlainText(textContent),
needsConversion: true
};
} catch (error) {
return {
success: false,
inputPath,
outputPath,
skipped: true,
errorMessage: `Failed to read text file: ${error}`
};
}
} else {
return {
success: false,
inputPath,
outputPath,
skipped: true,
errorMessage: `Skipped unsupported file format: ${ext}`
};
}
}
}
async convertFile(inputPath, outputPath) {
try {
const filename = basename(inputPath);
const ext = extname(inputPath).toLowerCase();
const resolvedOutputPath = outputPath || join(this.options.outputDir, filename.replace(/\.[^/.]+$/, ".md"));
const skipCheck = this.shouldSkipFile(filename, ext);
if (skipCheck.skip) {
this.log(`Skipping ${filename}: ${skipCheck.reason}`, "info");
this.stats.skipped++;
return {
success: false,
inputPath,
outputPath: resolvedOutputPath,
skipped: true,
errorMessage: `Skipped: ${skipCheck.reason}`
};
}
this.stats.formats.set(ext, (this.stats.formats.get(ext) || 0) + 1);
let processedContent = "";
let needsConversion = false;
const processingResult = await this.processFileByType(inputPath, ext, resolvedOutputPath);
if ("success" in processingResult) {
return processingResult;
}
processedContent = processingResult.content;
needsConversion = processingResult.needsConversion;
let finalContent = processedContent;
let metadata = {};
if (needsConversion || !processedContent.startsWith("---")) {
metadata = this.generateFrontmatter(processedContent, filename, inputPath);
const frontmatterYaml = this.generateFrontmatterYaml(metadata);
const contentWithoutFrontmatter = processedContent.replace(/^---[\s\S]*?---/, "").trim();
finalContent = `---
${frontmatterYaml}
---
${contentWithoutFrontmatter}`;
}
const validation = this.validateConvertedContent(finalContent, metadata);
const outputDir = dirname(resolvedOutputPath);
await mkdir(outputDir, { recursive: true });
if (!this.options.dryRun) {
await writeFile(resolvedOutputPath, finalContent, "utf-8");
}
const qualityEmoji = validation.quality === "high" ? "\u{1F7E2}" : validation.quality === "medium" ? "\u{1F7E1}" : "\u{1F534}";
this.log(`\u2705 ${qualityEmoji} Converted: ${inputPath} \u2192 ${resolvedOutputPath}`);
if (this.options.verbose && validation.warnings.length > 0) {
validation.warnings.forEach((warning) => this.log(` \u26A0\uFE0F ${warning}`, "warn"));
}
this.stats.processed++;
return {
success: true,
inputPath,
outputPath: resolvedOutputPath,
metadata
};
} catch (error) {
const errorMessage = `Error processing ${inputPath}: ${error}`;
this.log(errorMessage, "error");
this.stats.errors++;
return {
success: false,
inputPath,
outputPath: outputPath || "",
error: errorMessage
};
}
}
async convertDirectory(inputDir, outputDir) {
const results = [];
const resolvedOutputDir = outputDir || this.options.outputDir;
try {
const entries = await readdir(inputDir, { withFileTypes: true });
for (const entry of entries) {
const inputPath = join(inputDir, entry.name);
if (this.options.ignorePatterns.some(
(pattern) => inputPath.includes(pattern.replace("/**", "").replace("**/", ""))
)) {
continue;
}
if (entry.isDirectory()) {
const nestedOutputDir = this.options.preserveStructure ? join(resolvedOutputDir, entry.name) : resolvedOutputDir;
const nestedResults = await this.convertDirectory(inputPath, nestedOutputDir);
results.push(...nestedResults);
} else {
const outputPath = this.options.preserveStructure ? join(resolvedOutputDir, entry.name.replace(/\.[^/.]+$/, ".md")) : join(resolvedOutputDir, entry.name.replace(/\.[^/.]+$/, ".md"));
const result = await this.convertFile(inputPath, outputPath);
results.push(result);
}
}
} catch (error) {
this.log(`Error processing directory ${inputDir}: ${error}`, "error");
}
return results;
}
getStats() {
return { ...this.stats };
}
printStats() {
console.log(`
${chalk.bold("\u{1F4CA} Conversion Statistics:")}`);
console.log(` ${chalk.green("\u2705 Processed:")} ${this.stats.processed} files`);
console.log(` ${chalk.yellow("\u23ED\uFE0F Skipped:")} ${this.stats.skipped} files`);
console.log(` ${chalk.red("\u274C Errors:")} ${this.stats.errors} files`);
if (this.stats.formats.size > 0) {
console.log(`
${chalk.bold("\u{1F4C1} File formats processed:")}`);
for (const [ext, count] of this.stats.formats.entries()) {
console.log(` ${chalk.cyan(ext || "(no extension)")}: ${count} files`);
}
}
if (this.options.dryRun) {
console.log(`
${chalk.yellow("\u{1F9EA} Dry run completed - no files were actually modified.")}`);
}
}
};
// src/utils/starlight-detector.ts
import { existsSync, readFileSync } from "fs";
import { join as join2, resolve } from "path";
var TITLE_REGEX = /title:\s*['"`]([^'"`]+)['"`]/;
var DESCRIPTION_REGEX = /description:\s*['"`]([^'"`]+)['"`]/;
var CONTENT_DIR_REGEX = /content:\s*{[^}]*dir:\s*['"`]([^'"`]+)['"`]/;
function detectStarlightConfig(projectRoot) {
const defaults = {
contentDir: "src/content",
collectionsDir: "src/content",
docsDir: "src/content/docs",
title: "Documentation",
description: "Documentation site powered by Starlight"
};
try {
const configPaths = ["astro.config.mjs", "astro.config.ts", "astro.config.js"];
for (const configPath of configPaths) {
const fullPath = resolve(projectRoot, configPath);
if (existsSync(fullPath)) {
const config = parseAstroConfig(fullPath);
if (config) {
return mergeWithDefaults(defaults, config);
}
}
}
const detectedDirs = detectContentDirectories(projectRoot);
return {
...defaults,
...detectedDirs
};
} catch (error) {
console.warn("Could not detect Starlight config, using defaults:", error);
return defaults;
}
}
function parseAstroConfig(configPath) {
try {
const content = readFileSync(configPath, "utf-8");
const config = {};
const titleMatch = content.match(TITLE_REGEX);
if (titleMatch) {
config.title = titleMatch[1];
}
const descMatch = content.match(DESCRIPTION_REGEX);
if (descMatch) {
config.description = descMatch[1];
}
const contentMatch = content.match(CONTENT_DIR_REGEX);
if (contentMatch) {
const customContentDir = contentMatch[1];
config.contentDir = customContentDir;
config.collectionsDir = customContentDir;
config.docsDir = join2(customContentDir, "docs");
}
return config;
} catch (error) {
console.warn(`Failed to parse ${configPath}:`, error);
return null;
}
}
function detectContentDirectories(projectRoot) {
const config = {};
const contentPatterns = ["src/content", "content", "src/pages", "docs"];
for (const pattern of contentPatterns) {
const dir = resolve(projectRoot, pattern);
if (existsSync(dir)) {
config.contentDir = pattern;
config.collectionsDir = pattern;
const docsDir = join2(dir, "docs");
if (existsSync(resolve(projectRoot, docsDir))) {
config.docsDir = docsDir;
} else {
config.docsDir = pattern;
}
break;
}
}
return config;
}
function mergeWithDefaults(defaults, detected) {
return {
...defaults,
...detected
};
}
function getRecommendedInputDirs(projectRoot, _starlightConfig) {
const recommendations = [];
const importPatterns = [
"docs-import",
"content-import",
"documents",
"imports",
"_import",
"draft"
];
for (const pattern of importPatterns) {
const dir = resolve(projectRoot, pattern);
if (existsSync(dir)) {
recommendations.push(pattern);
}
}
if (recommendations.length === 0) {
recommendations.push("docs-import");
}
return recommendations;
}
function isStarlightProject(projectRoot) {
try {
const packageJsonPath = resolve(projectRoot, "package.json");
if (existsSync(packageJsonPath)) {
const packageJson = JSON.parse(readFileSync(packageJsonPath, "utf-8"));
const deps = {
...packageJson.dependencies,
...packageJson.devDependencies,
...packageJson.peerDependencies
};
if (deps["@astrojs/starlight"]) {
return true;
}
}
const configPaths = ["astro.config.mjs", "astro.config.ts", "astro.config.js"];
for (const configPath of configPaths) {
const fullPath = resolve(projectRoot, configPath);
if (existsSync(fullPath)) {
const content = readFileSync(fullPath, "utf-8");
if (content.includes("@astrojs/starlight") || content.includes("starlight")) {
return true;
}
}
}
return false;
} catch {
return false;
}
}
export {
DocumentConverter,
detectStarlightConfig,
getRecommendedInputDirs,
isStarlightProject
};
//# sourceMappingURL=chunk-HMTAMXXQ.js.map