@entro314labs/starlight-document-converter
Version:
A comprehensive document converter for Astro Starlight that transforms various document formats into Starlight-compatible Markdown with proper frontmatter
764 lines (746 loc) • 22 kB
TypeScript
export { default as mammoth } from 'mammoth';
export { default as TurndownService } from 'turndown';
interface ConversionOptions$1 {
/** Output directory for converted files */
outputDir?: string;
/** Preserve directory structure */
preserveStructure?: boolean;
/** Auto-generate titles from content */
generateTitles?: boolean;
/** Auto-generate descriptions from content */
generateDescriptions?: boolean;
/** Add lastUpdated timestamps */
addTimestamps?: boolean;
/** Default category for documents */
defaultCategory?: string;
/** Enable verbose logging */
verbose?: boolean;
/** Dry run mode (no file writes) */
dryRun?: boolean;
/** Custom category patterns */
categoryPatterns?: Record<string, string>;
/** Custom tag patterns */
tagPatterns?: Record<string, string[]>;
/** Files to ignore */
ignorePatterns?: string[];
/** Repair existing frontmatter */
repairMode?: boolean;
/** Validate content structure */
validateContent?: boolean;
/** Generate table of contents */
generateToc?: boolean;
/** Process images and copy them */
processImages?: boolean;
/** Fix internal links */
fixLinks?: boolean;
/** Auto-generate sidebar configuration */
generateSidebar?: boolean;
/** Maximum description length */
maxDescriptionLength?: number;
}
interface ConversionStats {
processed: number;
skipped: number;
errors: number;
formats: Map<string, number>;
}
interface DocumentMetadata$1 {
title?: string;
description?: string;
category?: string;
tags?: string[];
lastUpdated?: string;
author?: string;
draft?: boolean;
readingTime?: number;
wordCount?: number;
contentType?: string;
complexity?: string;
[key: string]: unknown;
}
interface ConversionResult {
success: boolean;
inputPath: string;
outputPath: string;
skipped?: boolean;
error?: string;
errorMessage?: string;
metadata?: DocumentMetadata$1;
}
interface StarlightIntegrationConfig {
/** Enable the document converter integration */
enabled?: boolean;
/** Conversion options */
converter?: ConversionOptions$1;
/** Watch for file changes and auto-convert */
watch?: boolean;
/** Input directories to monitor */
inputDirs?: string[];
}
type SupportedFormat = '.docx' | '.doc' | '.txt' | '.html' | '.htm' | '.md' | '.mdx' | '.rtf';
interface FileProcessor$1 {
extensions: SupportedFormat[];
process: (filePath: string, options: ConversionOptions$1) => Promise<string>;
}
interface ConversionContext {
inputPath: string;
outputPath: string;
filename: string;
extension: SupportedFormat;
content: string;
options: ConversionOptions$1;
}
interface ValidationResult {
valid: boolean;
issues: ValidationIssue[];
metadata?: DocumentMetadata$1;
score?: QualityScore;
}
interface ValidationIssue {
type: 'error' | 'warning';
field?: string;
message: string;
suggestion?: string;
}
interface QualityScore {
overall: 'good' | 'fair' | 'poor';
titleScore: number;
descriptionScore: number;
contentScore: number;
structureScore: number;
suggestions: string[];
}
interface RepairResult {
success: boolean;
fixed: boolean;
issues: string[];
originalContent: string;
repairedContent: string;
}
interface TocEntry {
level: number;
title: string;
anchor: string;
children?: TocEntry[];
}
interface LinkInfo {
original: string;
resolved: string;
isInternal: boolean;
exists: boolean;
needsRepair: boolean;
}
interface ImageInfo {
original: string;
resolved: string;
copied: boolean;
outputPath?: string;
alt?: string;
}
declare class DocumentConverter {
private options;
private stats;
private turndownService;
constructor(options?: ConversionOptions$1);
private getDefaultCategoryPatterns;
private getDefaultTagPatterns;
private getDefaultIgnorePatterns;
private log;
private extractTitle;
private extractDescription;
private extractParagraphs;
private getDescriptionStartIndex;
private isStructuralElement;
private cleanParagraph;
private processDescriptionParagraph;
private truncateDescription;
private extractTags;
private getTechPatterns;
private addTechTags;
private addCategoryTags;
private addContentTypeTags;
private addFilenameTags;
private addComplexityTags;
private generateCategory;
private generateFrontmatterYaml;
private validateConvertedContent;
private validateTitle;
private validateDescription;
private validateContentStructure;
private validateCodeContent;
private calculateQuality;
private generateFrontmatter;
private isSupportedFormat;
private isTextBasedFile;
private shouldSkipFile;
private convertPlainText;
private processPlainTextLine;
private isCodeLine;
private isIndentedLine;
private convertNonCodeLine;
private convertHTML;
private convertWordDocument;
private convertRTF;
private processFileByType;
convertFile(inputPath: string, outputPath?: string): Promise<ConversionResult>;
convertDirectory(inputDir: string, outputDir?: string): Promise<ConversionResult[]>;
getStats(): ConversionStats;
printStats(): void;
}
interface AstroHookContext {
config: AstroConfig;
logger: Logger;
}
interface AstroIntegration {
name: string;
hooks: {
[key: string]: ((context: AstroHookContext) => void | Promise<void>) | undefined;
};
}
interface AstroConfig {
root: {
pathname: string;
};
}
interface Logger {
info(message: string): void;
warn(message: string): void;
error(message: string): void;
}
declare function starlightDocumentConverter(userConfig?: StarlightIntegrationConfig): AstroIntegration;
declare class ContentAnalyzer {
private categoryPatterns;
private tagPatterns;
constructor(categoryPatterns?: Record<string, string>, tagPatterns?: Record<string, string[]>);
/**
* Analyze content and generate comprehensive metadata
*/
analyzeContent(content: string, filePath: string): {
metadata: DocumentMetadata$1;
analysis: {
wordCount: number;
readingTime: number;
complexity: 'simple' | 'moderate' | 'complex';
headingStructure: TocEntry[];
topics: string[];
suggestedTags: string[];
contentType: 'guide' | 'reference' | 'tutorial' | 'blog' | 'documentation';
};
};
/**
* Generate intelligent title from content
*/
private generateTitle;
/**
* Generate intelligent description from content
*/
private generateDescription;
/**
* Infer category from file path and content
*/
private inferCategory;
/**
* Suggest relevant tags based on content analysis
*/
private suggestTags;
/**
* Detect content type based on structure and keywords
*/
private detectContentType;
/**
* Extract topics and keywords from content
*/
private extractTopics;
/**
* Calculate reading time estimate
*/
private estimateReadingTime;
/**
* Calculate word count
*/
private calculateWordCount;
/**
* Assess content complexity
*/
private assessComplexity;
/**
* Extract heading structure
*/
private extractHeadingStructure;
/**
* Generate URL-friendly anchor
*/
private generateAnchor;
/**
* Clean and format title
*/
private cleanTitle;
/**
* Humanize filename for title generation
*/
private humanizeFilename;
/**
* Generate fallback description based on content type
*/
private generateFallbackDescription;
/**
* Default category patterns
*/
private getDefaultCategoryPatterns;
/**
* Default tag patterns
*/
private getDefaultTagPatterns;
}
interface FileProcessor {
/** Supported file extensions */
extensions: string[];
/** Processing function */
process: (content: string, context: ProcessingContext) => Promise<string> | string;
/** Plugin metadata */
metadata: {
name: string;
version: string;
description: string;
author?: string;
};
/** Optional validation function */
validate?: (content: string, context: ProcessingContext) => boolean | Promise<boolean>;
/** Optional preprocessing function */
preprocess?: (content: string, context: ProcessingContext) => Promise<string> | string;
/** Optional postprocessing function */
postprocess?: (content: string, context: ProcessingContext) => Promise<string> | string;
}
interface MetadataEnhancer {
/** Enhancement function */
enhance: (metadata: DocumentMetadata, context: ProcessingContext) => Promise<DocumentMetadata> | DocumentMetadata;
/** Plugin metadata */
metadata: {
name: string;
version: string;
description: string;
author?: string;
};
/** Priority for execution order (higher = earlier) */
priority?: number;
}
interface QualityValidator {
/** Validation function */
validate: (content: string, metadata: DocumentMetadata, context: ProcessingContext) => QualityReport;
/** Plugin metadata */
metadata: {
name: string;
version: string;
description: string;
author?: string;
};
}
interface ProcessingContext {
/** Input file path */
inputPath: string;
/** Output file path */
outputPath: string;
/** Original filename */
filename: string;
/** File extension */
extension: string;
/** Conversion options */
options: ConversionOptions;
/** Additional context data */
data?: Record<string, unknown>;
}
interface QualityReport {
/** Overall quality score (0-100) */
score: number;
/** Quality level */
level: 'high' | 'medium' | 'low';
/** List of issues found */
issues: QualityIssue[];
/** Suggestions for improvement */
suggestions: string[];
}
interface QualityIssue {
/** Issue type */
type: 'error' | 'warning' | 'info';
/** Issue message */
message: string;
/** Optional line number */
line?: number;
/** Optional column number */
column?: number;
/** Severity (1-10) */
severity: number;
}
interface DocumentMetadata {
title?: string;
description?: string;
category?: string;
tags?: string[];
lastUpdated?: string;
author?: string;
draft?: boolean;
readingTime?: number;
wordCount?: number;
contentType?: string;
complexity?: string;
[key: string]: unknown;
}
interface ConversionOptions {
outputDir?: string;
preserveStructure?: boolean;
generateTitles?: boolean;
generateDescriptions?: boolean;
addTimestamps?: boolean;
defaultCategory?: string;
verbose?: boolean;
dryRun?: boolean;
categoryPatterns?: Record<string, string>;
tagPatterns?: Record<string, string[]>;
ignorePatterns?: string[];
repairMode?: boolean;
validateContent?: boolean;
generateToc?: boolean;
processImages?: boolean;
fixLinks?: boolean;
generateSidebar?: boolean;
maxDescriptionLength?: number;
}
interface PluginRegistry {
/** Register a file processor plugin */
registerProcessor: (processor: FileProcessor) => void;
/** Register a metadata enhancer plugin */
registerEnhancer: (enhancer: MetadataEnhancer) => void;
/** Register a quality validator plugin */
registerValidator: (validator: QualityValidator) => void;
/** Get all registered processors */
getProcessors: () => FileProcessor[];
/** Get processors for specific extension */
getProcessorsForExtension: (extension: string) => FileProcessor[];
/** Get all registered enhancers */
getEnhancers: () => MetadataEnhancer[];
/** Get all registered validators */
getValidators: () => QualityValidator[];
/** Clear all plugins */
clear: () => void;
}
declare class FrontmatterRepair {
private readonly requiredFields;
private readonly recommendedFields;
private readonly maxTitleLength;
private readonly maxDescriptionLength;
/**
* Validate frontmatter and content structure
*/
validateContent(content: string, filePath: string): ValidationResult;
/**
* Repair frontmatter issues
*/
repairFrontmatter(content: string, filePath: string): RepairResult;
private validateContentStructure;
private hasProperHeadingStructure;
private calculateQualityScore;
private generateFrontmatterFromContent;
private extractTitleFromContent;
private generateDescriptionFromContent;
private inferCategoryFromPath;
private cleanString;
private cleanFilename;
private buildFrontmatterString;
private escapeYamlString;
}
/**
* JSON file processor that converts JSON files to formatted markdown
*/
declare const jsonProcessor: FileProcessor;
declare class LinkImageProcessor {
private baseDir;
private outputDir;
private assetsDir;
private logger?;
constructor(baseDir: string, outputDir: string, assetsDir?: string, logger?: {
warn: (msg: string) => void;
error: (msg: string, error?: any) => void;
});
/**
* Process all links and images in markdown content
*/
processContent(content: string, sourceFilePath: string, targetFilePath: string): Promise<{
content: string;
links: LinkInfo[];
images: ImageInfo[];
}>;
/**
* Process and fix internal links
*/
private processLinks;
/**
* Process and copy images
*/
private processImages;
/**
* Process a single internal link
*/
private processInternalLink;
/**
* Process and copy a single image
*/
private processImage;
/**
* Get relative path for image in markdown
*/
private getRelativeImagePath;
/**
* Generate an image report
*/
generateImageReport(images: ImageInfo[]): {
total: number;
copied: number;
external: number;
missing: number;
missingImages: string[];
};
/**
* Generate suggestions for missing images
*/
generateImageSuggestions(missingImages: string[]): string[];
/**
* Generate a link report
*/
generateLinkReport(links: LinkInfo[]): {
total: number;
internal: number;
external: number;
broken: number;
repaired: number;
};
/**
* Extract all images from content for batch processing
*/
static extractImages(content: string): Array<{
alt: string;
src: string;
}>;
/**
* Extract all links from content for batch processing
*/
static extractLinks(content: string): Array<{
text: string;
url: string;
}>;
}
/**
* Built-in markdown metadata enhancer that improves title and description extraction
*/
declare const markdownEnhancer: MetadataEnhancer;
/**
* Advanced markdown processor that handles link/image processing and TOC generation
*/
declare const markdownProcessor: FileProcessor;
/**
* Comprehensive quality validator for generated content
*/
declare const contentQualityValidator: QualityValidator;
declare class TocGenerator {
private maxDepth;
private minEntries;
constructor(maxDepth?: number, minEntries?: number);
/**
* Generate table of contents from markdown content
*/
generateToc(content: string): TocEntry[];
/**
* Generate table of contents with custom anchor generation
*/
generateTocWithCustomAnchors(content: string, anchorGenerator?: (title: string) => string): TocEntry[];
/**
* Insert table of contents into content
*/
insertTocIntoContent(content: string, tocPosition?: 'top' | 'after-title' | 'custom', customMarker?: string): string;
/**
* Extract headings from content
*/
private extractHeadings;
/**
* Build hierarchical TOC tree
*/
private buildTocTree;
/**
* Clean heading text (remove markdown formatting)
*/
private cleanHeadingText;
/**
* Generate URL-friendly anchor from title
*/
private generateAnchor;
/**
* Render TOC as markdown
*/
renderTocAsMarkdown(toc: TocEntry[]): string;
/**
* Render TOC as HTML
*/
renderTocAsHtml(toc: TocEntry[]): string;
/**
* Render TOC as JSON for Starlight sidebar
*/
renderTocForStarlightSidebar(toc: TocEntry[]): any[];
/**
* Render a level of TOC in markdown format
*/
private renderTocLevel;
/**
* Render a level of TOC in HTML format
*/
private renderTocLevelHtml;
/**
* Convert TOC entry to Starlight sidebar format
*/
private tocEntryToStarlightFormat;
/**
* Insert TOC at the top of content
*/
private insertAtTop;
/**
* Insert TOC after the first heading
*/
private insertAfterTitle;
/**
* Check if content already has a table of contents
*/
hasExistingToc(content: string): boolean;
/**
* Remove existing table of contents from content
*/
removeExistingToc(content: string): string;
/**
* Generate navigation structure for Starlight
*/
generateStarlightNavigation(tocEntries: TocEntry[], baseUrl?: string): any[];
/**
* Extract headings for automated sidebar generation
*/
extractHeadingsForSidebar(content: string, filePath: string): {
title: string;
headings: Array<{
title: string;
anchor: string;
level: number;
}>;
};
/**
* Generate title from file path
*/
private generateTitleFromPath;
}
/**
* Integration helper to bridge the main converter with the plugin system
*/
declare class PluginIntegrationHelper {
/**
* Apply all registered metadata enhancers to the given metadata
*/
static enhanceMetadata(metadata: DocumentMetadata$1, context: ProcessingContext): Promise<DocumentMetadata$1>;
/**
* Process content using registered file processors
*/
static processContent(content: string, context: ProcessingContext): Promise<string>;
/**
* Validate content using all registered validators
*/
static validateContent(content: string, metadata: DocumentMetadata$1, context: ProcessingContext): {
score: number;
level: "high" | "medium" | "low";
issues: QualityIssue[];
suggestions: string[];
validator: string;
}[];
/**
* Create a processing context from conversion parameters
*/
static createProcessingContext(inputPath: string, outputPath: string, filename: string, extension: string, options: ConversionOptions$1, additionalData?: Record<string, unknown>): ProcessingContext;
/**
* Initialize all built-in plugins
*/
static initializePlugins(): Promise<{
processors: number;
enhancers: number;
validators: number;
supportedExtensions: string[];
}>;
/**
* Get plugin statistics
*/
static getPluginStats(): {
processors: number;
enhancers: number;
validators: number;
supportedExtensions: string[];
};
/**
* Check if plugins are properly registered
*/
static validatePluginSetup(): {
valid: boolean;
issues: string[];
};
}
/**
* Central plugin registry for managing file processors, metadata enhancers, and quality validators
*/
declare class DefaultPluginRegistry implements PluginRegistry {
private processors;
private enhancers;
private validators;
/**
* Register a file processor plugin
*/
registerProcessor(processor: FileProcessor): void;
/**
* Register a metadata enhancer plugin
*/
registerEnhancer(enhancer: MetadataEnhancer): void;
/**
* Register a quality validator plugin
*/
registerValidator(validator: QualityValidator): void;
/**
* Get all registered processors
*/
getProcessors(): FileProcessor[];
/**
* Get processors that can handle a specific file extension
*/
getProcessorsForExtension(extension: string): FileProcessor[];
/**
* Get all registered enhancers (sorted by priority)
*/
getEnhancers(): MetadataEnhancer[];
/**
* Get all registered validators
*/
getValidators(): QualityValidator[];
/**
* Clear all registered plugins
*/
clear(): void;
/**
* Get plugin statistics
*/
getStats(): {
processors: number;
enhancers: number;
validators: number;
supportedExtensions: string[];
};
private validateProcessor;
private validateEnhancer;
private validateValidator;
}
declare const pluginRegistry: DefaultPluginRegistry;
declare function registerBuiltInPlugins(): Promise<{
processors: number;
enhancers: number;
validators: number;
supportedExtensions: string[];
}>;
export { ContentAnalyzer, type ConversionContext, type ConversionOptions$1 as ConversionOptions, type ConversionResult, type ConversionStats, DefaultPluginRegistry, DocumentConverter, type DocumentMetadata$1 as DocumentMetadata, type FileProcessor$1 as FileProcessor, FrontmatterRepair, LinkImageProcessor, type MetadataEnhancer, PluginIntegrationHelper, type PluginRegistry, type ProcessingContext, type QualityIssue, type QualityReport, type QualityValidator, type StarlightIntegrationConfig, type SupportedFormat, TocGenerator, contentQualityValidator, starlightDocumentConverter as default, jsonProcessor, markdownEnhancer, markdownProcessor, pluginRegistry, registerBuiltInPlugins, starlightDocumentConverter };