UNPKG

@entro314labs/starlight-document-converter

Version:

A comprehensive document converter for Astro Starlight that transforms various document formats into Starlight-compatible Markdown with proper frontmatter

764 lines (746 loc) 22 kB
export { default as mammoth } from 'mammoth'; export { default as TurndownService } from 'turndown'; interface ConversionOptions$1 { /** Output directory for converted files */ outputDir?: string; /** Preserve directory structure */ preserveStructure?: boolean; /** Auto-generate titles from content */ generateTitles?: boolean; /** Auto-generate descriptions from content */ generateDescriptions?: boolean; /** Add lastUpdated timestamps */ addTimestamps?: boolean; /** Default category for documents */ defaultCategory?: string; /** Enable verbose logging */ verbose?: boolean; /** Dry run mode (no file writes) */ dryRun?: boolean; /** Custom category patterns */ categoryPatterns?: Record<string, string>; /** Custom tag patterns */ tagPatterns?: Record<string, string[]>; /** Files to ignore */ ignorePatterns?: string[]; /** Repair existing frontmatter */ repairMode?: boolean; /** Validate content structure */ validateContent?: boolean; /** Generate table of contents */ generateToc?: boolean; /** Process images and copy them */ processImages?: boolean; /** Fix internal links */ fixLinks?: boolean; /** Auto-generate sidebar configuration */ generateSidebar?: boolean; /** Maximum description length */ maxDescriptionLength?: number; } interface ConversionStats { processed: number; skipped: number; errors: number; formats: Map<string, number>; } interface DocumentMetadata$1 { title?: string; description?: string; category?: string; tags?: string[]; lastUpdated?: string; author?: string; draft?: boolean; readingTime?: number; wordCount?: number; contentType?: string; complexity?: string; [key: string]: unknown; } interface ConversionResult { success: boolean; inputPath: string; outputPath: string; skipped?: boolean; error?: string; errorMessage?: string; metadata?: DocumentMetadata$1; } interface StarlightIntegrationConfig { /** Enable the document converter integration */ enabled?: boolean; /** Conversion options */ converter?: ConversionOptions$1; /** Watch for file changes and auto-convert */ watch?: boolean; /** Input directories to monitor */ inputDirs?: string[]; } type SupportedFormat = '.docx' | '.doc' | '.txt' | '.html' | '.htm' | '.md' | '.mdx' | '.rtf'; interface FileProcessor$1 { extensions: SupportedFormat[]; process: (filePath: string, options: ConversionOptions$1) => Promise<string>; } interface ConversionContext { inputPath: string; outputPath: string; filename: string; extension: SupportedFormat; content: string; options: ConversionOptions$1; } interface ValidationResult { valid: boolean; issues: ValidationIssue[]; metadata?: DocumentMetadata$1; score?: QualityScore; } interface ValidationIssue { type: 'error' | 'warning'; field?: string; message: string; suggestion?: string; } interface QualityScore { overall: 'good' | 'fair' | 'poor'; titleScore: number; descriptionScore: number; contentScore: number; structureScore: number; suggestions: string[]; } interface RepairResult { success: boolean; fixed: boolean; issues: string[]; originalContent: string; repairedContent: string; } interface TocEntry { level: number; title: string; anchor: string; children?: TocEntry[]; } interface LinkInfo { original: string; resolved: string; isInternal: boolean; exists: boolean; needsRepair: boolean; } interface ImageInfo { original: string; resolved: string; copied: boolean; outputPath?: string; alt?: string; } declare class DocumentConverter { private options; private stats; private turndownService; constructor(options?: ConversionOptions$1); private getDefaultCategoryPatterns; private getDefaultTagPatterns; private getDefaultIgnorePatterns; private log; private extractTitle; private extractDescription; private extractParagraphs; private getDescriptionStartIndex; private isStructuralElement; private cleanParagraph; private processDescriptionParagraph; private truncateDescription; private extractTags; private getTechPatterns; private addTechTags; private addCategoryTags; private addContentTypeTags; private addFilenameTags; private addComplexityTags; private generateCategory; private generateFrontmatterYaml; private validateConvertedContent; private validateTitle; private validateDescription; private validateContentStructure; private validateCodeContent; private calculateQuality; private generateFrontmatter; private isSupportedFormat; private isTextBasedFile; private shouldSkipFile; private convertPlainText; private processPlainTextLine; private isCodeLine; private isIndentedLine; private convertNonCodeLine; private convertHTML; private convertWordDocument; private convertRTF; private processFileByType; convertFile(inputPath: string, outputPath?: string): Promise<ConversionResult>; convertDirectory(inputDir: string, outputDir?: string): Promise<ConversionResult[]>; getStats(): ConversionStats; printStats(): void; } interface AstroHookContext { config: AstroConfig; logger: Logger; } interface AstroIntegration { name: string; hooks: { [key: string]: ((context: AstroHookContext) => void | Promise<void>) | undefined; }; } interface AstroConfig { root: { pathname: string; }; } interface Logger { info(message: string): void; warn(message: string): void; error(message: string): void; } declare function starlightDocumentConverter(userConfig?: StarlightIntegrationConfig): AstroIntegration; declare class ContentAnalyzer { private categoryPatterns; private tagPatterns; constructor(categoryPatterns?: Record<string, string>, tagPatterns?: Record<string, string[]>); /** * Analyze content and generate comprehensive metadata */ analyzeContent(content: string, filePath: string): { metadata: DocumentMetadata$1; analysis: { wordCount: number; readingTime: number; complexity: 'simple' | 'moderate' | 'complex'; headingStructure: TocEntry[]; topics: string[]; suggestedTags: string[]; contentType: 'guide' | 'reference' | 'tutorial' | 'blog' | 'documentation'; }; }; /** * Generate intelligent title from content */ private generateTitle; /** * Generate intelligent description from content */ private generateDescription; /** * Infer category from file path and content */ private inferCategory; /** * Suggest relevant tags based on content analysis */ private suggestTags; /** * Detect content type based on structure and keywords */ private detectContentType; /** * Extract topics and keywords from content */ private extractTopics; /** * Calculate reading time estimate */ private estimateReadingTime; /** * Calculate word count */ private calculateWordCount; /** * Assess content complexity */ private assessComplexity; /** * Extract heading structure */ private extractHeadingStructure; /** * Generate URL-friendly anchor */ private generateAnchor; /** * Clean and format title */ private cleanTitle; /** * Humanize filename for title generation */ private humanizeFilename; /** * Generate fallback description based on content type */ private generateFallbackDescription; /** * Default category patterns */ private getDefaultCategoryPatterns; /** * Default tag patterns */ private getDefaultTagPatterns; } interface FileProcessor { /** Supported file extensions */ extensions: string[]; /** Processing function */ process: (content: string, context: ProcessingContext) => Promise<string> | string; /** Plugin metadata */ metadata: { name: string; version: string; description: string; author?: string; }; /** Optional validation function */ validate?: (content: string, context: ProcessingContext) => boolean | Promise<boolean>; /** Optional preprocessing function */ preprocess?: (content: string, context: ProcessingContext) => Promise<string> | string; /** Optional postprocessing function */ postprocess?: (content: string, context: ProcessingContext) => Promise<string> | string; } interface MetadataEnhancer { /** Enhancement function */ enhance: (metadata: DocumentMetadata, context: ProcessingContext) => Promise<DocumentMetadata> | DocumentMetadata; /** Plugin metadata */ metadata: { name: string; version: string; description: string; author?: string; }; /** Priority for execution order (higher = earlier) */ priority?: number; } interface QualityValidator { /** Validation function */ validate: (content: string, metadata: DocumentMetadata, context: ProcessingContext) => QualityReport; /** Plugin metadata */ metadata: { name: string; version: string; description: string; author?: string; }; } interface ProcessingContext { /** Input file path */ inputPath: string; /** Output file path */ outputPath: string; /** Original filename */ filename: string; /** File extension */ extension: string; /** Conversion options */ options: ConversionOptions; /** Additional context data */ data?: Record<string, unknown>; } interface QualityReport { /** Overall quality score (0-100) */ score: number; /** Quality level */ level: 'high' | 'medium' | 'low'; /** List of issues found */ issues: QualityIssue[]; /** Suggestions for improvement */ suggestions: string[]; } interface QualityIssue { /** Issue type */ type: 'error' | 'warning' | 'info'; /** Issue message */ message: string; /** Optional line number */ line?: number; /** Optional column number */ column?: number; /** Severity (1-10) */ severity: number; } interface DocumentMetadata { title?: string; description?: string; category?: string; tags?: string[]; lastUpdated?: string; author?: string; draft?: boolean; readingTime?: number; wordCount?: number; contentType?: string; complexity?: string; [key: string]: unknown; } interface ConversionOptions { outputDir?: string; preserveStructure?: boolean; generateTitles?: boolean; generateDescriptions?: boolean; addTimestamps?: boolean; defaultCategory?: string; verbose?: boolean; dryRun?: boolean; categoryPatterns?: Record<string, string>; tagPatterns?: Record<string, string[]>; ignorePatterns?: string[]; repairMode?: boolean; validateContent?: boolean; generateToc?: boolean; processImages?: boolean; fixLinks?: boolean; generateSidebar?: boolean; maxDescriptionLength?: number; } interface PluginRegistry { /** Register a file processor plugin */ registerProcessor: (processor: FileProcessor) => void; /** Register a metadata enhancer plugin */ registerEnhancer: (enhancer: MetadataEnhancer) => void; /** Register a quality validator plugin */ registerValidator: (validator: QualityValidator) => void; /** Get all registered processors */ getProcessors: () => FileProcessor[]; /** Get processors for specific extension */ getProcessorsForExtension: (extension: string) => FileProcessor[]; /** Get all registered enhancers */ getEnhancers: () => MetadataEnhancer[]; /** Get all registered validators */ getValidators: () => QualityValidator[]; /** Clear all plugins */ clear: () => void; } declare class FrontmatterRepair { private readonly requiredFields; private readonly recommendedFields; private readonly maxTitleLength; private readonly maxDescriptionLength; /** * Validate frontmatter and content structure */ validateContent(content: string, filePath: string): ValidationResult; /** * Repair frontmatter issues */ repairFrontmatter(content: string, filePath: string): RepairResult; private validateContentStructure; private hasProperHeadingStructure; private calculateQualityScore; private generateFrontmatterFromContent; private extractTitleFromContent; private generateDescriptionFromContent; private inferCategoryFromPath; private cleanString; private cleanFilename; private buildFrontmatterString; private escapeYamlString; } /** * JSON file processor that converts JSON files to formatted markdown */ declare const jsonProcessor: FileProcessor; declare class LinkImageProcessor { private baseDir; private outputDir; private assetsDir; private logger?; constructor(baseDir: string, outputDir: string, assetsDir?: string, logger?: { warn: (msg: string) => void; error: (msg: string, error?: any) => void; }); /** * Process all links and images in markdown content */ processContent(content: string, sourceFilePath: string, targetFilePath: string): Promise<{ content: string; links: LinkInfo[]; images: ImageInfo[]; }>; /** * Process and fix internal links */ private processLinks; /** * Process and copy images */ private processImages; /** * Process a single internal link */ private processInternalLink; /** * Process and copy a single image */ private processImage; /** * Get relative path for image in markdown */ private getRelativeImagePath; /** * Generate an image report */ generateImageReport(images: ImageInfo[]): { total: number; copied: number; external: number; missing: number; missingImages: string[]; }; /** * Generate suggestions for missing images */ generateImageSuggestions(missingImages: string[]): string[]; /** * Generate a link report */ generateLinkReport(links: LinkInfo[]): { total: number; internal: number; external: number; broken: number; repaired: number; }; /** * Extract all images from content for batch processing */ static extractImages(content: string): Array<{ alt: string; src: string; }>; /** * Extract all links from content for batch processing */ static extractLinks(content: string): Array<{ text: string; url: string; }>; } /** * Built-in markdown metadata enhancer that improves title and description extraction */ declare const markdownEnhancer: MetadataEnhancer; /** * Advanced markdown processor that handles link/image processing and TOC generation */ declare const markdownProcessor: FileProcessor; /** * Comprehensive quality validator for generated content */ declare const contentQualityValidator: QualityValidator; declare class TocGenerator { private maxDepth; private minEntries; constructor(maxDepth?: number, minEntries?: number); /** * Generate table of contents from markdown content */ generateToc(content: string): TocEntry[]; /** * Generate table of contents with custom anchor generation */ generateTocWithCustomAnchors(content: string, anchorGenerator?: (title: string) => string): TocEntry[]; /** * Insert table of contents into content */ insertTocIntoContent(content: string, tocPosition?: 'top' | 'after-title' | 'custom', customMarker?: string): string; /** * Extract headings from content */ private extractHeadings; /** * Build hierarchical TOC tree */ private buildTocTree; /** * Clean heading text (remove markdown formatting) */ private cleanHeadingText; /** * Generate URL-friendly anchor from title */ private generateAnchor; /** * Render TOC as markdown */ renderTocAsMarkdown(toc: TocEntry[]): string; /** * Render TOC as HTML */ renderTocAsHtml(toc: TocEntry[]): string; /** * Render TOC as JSON for Starlight sidebar */ renderTocForStarlightSidebar(toc: TocEntry[]): any[]; /** * Render a level of TOC in markdown format */ private renderTocLevel; /** * Render a level of TOC in HTML format */ private renderTocLevelHtml; /** * Convert TOC entry to Starlight sidebar format */ private tocEntryToStarlightFormat; /** * Insert TOC at the top of content */ private insertAtTop; /** * Insert TOC after the first heading */ private insertAfterTitle; /** * Check if content already has a table of contents */ hasExistingToc(content: string): boolean; /** * Remove existing table of contents from content */ removeExistingToc(content: string): string; /** * Generate navigation structure for Starlight */ generateStarlightNavigation(tocEntries: TocEntry[], baseUrl?: string): any[]; /** * Extract headings for automated sidebar generation */ extractHeadingsForSidebar(content: string, filePath: string): { title: string; headings: Array<{ title: string; anchor: string; level: number; }>; }; /** * Generate title from file path */ private generateTitleFromPath; } /** * Integration helper to bridge the main converter with the plugin system */ declare class PluginIntegrationHelper { /** * Apply all registered metadata enhancers to the given metadata */ static enhanceMetadata(metadata: DocumentMetadata$1, context: ProcessingContext): Promise<DocumentMetadata$1>; /** * Process content using registered file processors */ static processContent(content: string, context: ProcessingContext): Promise<string>; /** * Validate content using all registered validators */ static validateContent(content: string, metadata: DocumentMetadata$1, context: ProcessingContext): { score: number; level: "high" | "medium" | "low"; issues: QualityIssue[]; suggestions: string[]; validator: string; }[]; /** * Create a processing context from conversion parameters */ static createProcessingContext(inputPath: string, outputPath: string, filename: string, extension: string, options: ConversionOptions$1, additionalData?: Record<string, unknown>): ProcessingContext; /** * Initialize all built-in plugins */ static initializePlugins(): Promise<{ processors: number; enhancers: number; validators: number; supportedExtensions: string[]; }>; /** * Get plugin statistics */ static getPluginStats(): { processors: number; enhancers: number; validators: number; supportedExtensions: string[]; }; /** * Check if plugins are properly registered */ static validatePluginSetup(): { valid: boolean; issues: string[]; }; } /** * Central plugin registry for managing file processors, metadata enhancers, and quality validators */ declare class DefaultPluginRegistry implements PluginRegistry { private processors; private enhancers; private validators; /** * Register a file processor plugin */ registerProcessor(processor: FileProcessor): void; /** * Register a metadata enhancer plugin */ registerEnhancer(enhancer: MetadataEnhancer): void; /** * Register a quality validator plugin */ registerValidator(validator: QualityValidator): void; /** * Get all registered processors */ getProcessors(): FileProcessor[]; /** * Get processors that can handle a specific file extension */ getProcessorsForExtension(extension: string): FileProcessor[]; /** * Get all registered enhancers (sorted by priority) */ getEnhancers(): MetadataEnhancer[]; /** * Get all registered validators */ getValidators(): QualityValidator[]; /** * Clear all registered plugins */ clear(): void; /** * Get plugin statistics */ getStats(): { processors: number; enhancers: number; validators: number; supportedExtensions: string[]; }; private validateProcessor; private validateEnhancer; private validateValidator; } declare const pluginRegistry: DefaultPluginRegistry; declare function registerBuiltInPlugins(): Promise<{ processors: number; enhancers: number; validators: number; supportedExtensions: string[]; }>; export { ContentAnalyzer, type ConversionContext, type ConversionOptions$1 as ConversionOptions, type ConversionResult, type ConversionStats, DefaultPluginRegistry, DocumentConverter, type DocumentMetadata$1 as DocumentMetadata, type FileProcessor$1 as FileProcessor, FrontmatterRepair, LinkImageProcessor, type MetadataEnhancer, PluginIntegrationHelper, type PluginRegistry, type ProcessingContext, type QualityIssue, type QualityReport, type QualityValidator, type StarlightIntegrationConfig, type SupportedFormat, TocGenerator, contentQualityValidator, starlightDocumentConverter as default, jsonProcessor, markdownEnhancer, markdownProcessor, pluginRegistry, registerBuiltInPlugins, starlightDocumentConverter };