UNPKG

document-outline-extractor

Version:

Extract structured outlines from documents with optional AI enhancement

252 lines (214 loc) 7.25 kB
#!/usr/bin/env node import * as fs from 'fs'; import * as path from 'path'; import { OutlineExtractor } from '../extractor'; import { OpenAIConfig, ExtractOptions, OutlineFormat } from '../types'; interface CLIOptions { input?: string; output?: string; format?: OutlineFormat; maxDepth?: number; openaiKey?: string; openaiUrl?: string; model?: string; quality?: boolean; help?: boolean; version?: boolean; config?: string; } class CLI { private args: string[]; private options: CLIOptions = {}; constructor(args: string[]) { this.args = args; this.parseArguments(); } private parseArguments(): void { for (let i = 0; i < this.args.length; i++) { const arg = this.args[i]; switch (arg) { case '-i': case '--input': this.options.input = this.args[++i]; break; case '-o': case '--output': this.options.output = this.args[++i]; break; case '-f': case '--format': this.options.format = this.args[++i] as OutlineFormat; break; case '-d': case '--max-depth': this.options.maxDepth = parseInt(this.args[++i], 10); break; case '--openai-key': this.options.openaiKey = this.args[++i]; break; case '--openai-url': this.options.openaiUrl = this.args[++i]; break; case '--model': this.options.model = this.args[++i]; break; case '-q': case '--quality': this.options.quality = true; break; case '-c': case '--config': this.options.config = this.args[++i]; break; case '-h': case '--help': this.options.help = true; break; case '-v': case '--version': this.options.version = true; break; } } } private printHelp(): void { console.log(` Document Outline Extractor CLI Usage: outline-extractor [options] Options: -i, --input <file> Input markdown file path (required unless piping) -o, --output <file> Output file path (default: stdout) -f, --format <format> Output format: tree, markdown, json, flat (default: tree) -d, --max-depth <n> Maximum heading depth to include (default: all) -q, --quality Show quality metrics instead of outline -c, --config <file> Configuration file path (JSON) OpenAI Options: --openai-key <key> OpenAI API key --openai-url <url> OpenAI base URL (default: https://api.openai.com/v1) --model <name> Model name (default: gpt-4o-mini) Other: -h, --help Show this help message -v, --version Show version Examples: # Extract outline from file outline-extractor -i document.md # Extract with specific format outline-extractor -i document.md -f json -o outline.json # Use OpenAI for better extraction outline-extractor -i document.md --openai-key sk-... --model gpt-4o # Check document quality outline-extractor -i document.md -q # Pipe content cat document.md | outline-extractor -f markdown # Use configuration file outline-extractor -i document.md -c config.json `); } private printVersion(): void { const packagePath = path.join(__dirname, '../../package.json'); const pkg = JSON.parse(fs.readFileSync(packagePath, 'utf-8')); console.log(`document-outline-extractor v${pkg.version}`); } private async readInput(): Promise<string> { if (this.options.input) { return fs.readFileSync(this.options.input, 'utf-8'); } // Read from stdin return new Promise((resolve) => { let data = ''; process.stdin.setEncoding('utf-8'); process.stdin.on('data', chunk => data += chunk); process.stdin.on('end', () => resolve(data)); }); } private writeOutput(content: string): void { if (this.options.output) { fs.writeFileSync(this.options.output, content, 'utf-8'); console.log(`✓ Outline written to ${this.options.output}`); } else { console.log(content); } } private loadConfig(): any { if (!this.options.config) return {}; try { const configContent = fs.readFileSync(this.options.config, 'utf-8'); return JSON.parse(configContent); } catch (error) { console.error(`Error loading config file: ${error}`); return {}; } } async run(): Promise<void> { if (this.options.help) { this.printHelp(); return; } if (this.options.version) { this.printVersion(); return; } try { // Load configuration const config = this.loadConfig(); // Build OpenAI config if provided let openaiConfig: OpenAIConfig | undefined; if (this.options.openaiKey || config.openai) { openaiConfig = { apiKey: this.options.openaiKey || config.openai?.apiKey, baseUrl: this.options.openaiUrl || config.openai?.baseUrl || 'https://api.openai.com/v1', model: this.options.model || config.openai?.model || 'gpt-4o-mini', ...config.openai }; } // Initialize extractor const extractor = new OutlineExtractor({ openai: openaiConfig, ...config.extractor }); // Read input const content = await this.readInput(); if (!content.trim()) { console.error('Error: No input content provided'); process.exit(1); } // Show quality metrics if requested if (this.options.quality) { const metrics = extractor.evaluateQuality(content); console.log('\nDocument Outline Quality Metrics:'); console.log('─'.repeat(40)); console.log(`Overall Score: ${(metrics.score * 100).toFixed(1)}%`); console.log(`Richness: ${(metrics.richness * 100).toFixed(1)}%`); console.log(`Balance: ${(metrics.balance * 100).toFixed(1)}%`); console.log(`Coherence: ${(metrics.coherence * 100).toFixed(1)}%`); console.log(`Coverage: ${(metrics.coverage * 100).toFixed(1)}%`); console.log(`Heading Count: ${metrics.headingCount}`); console.log(`Max Depth: ${metrics.depth}`); console.log('─'.repeat(40)); if (metrics.score >= 0.8) { console.log('✓ Document has good outline structure'); } else if (metrics.score >= 0.5) { console.log('⚠ Document outline could be improved'); } else { console.log('✗ Document lacks proper outline structure'); } return; } // Extract outline const extractOptions: ExtractOptions = { format: this.options.format || config.format, maxDepth: this.options.maxDepth || config.maxDepth }; console.error('Extracting outline...'); const outline = await extractor.extract(content, extractOptions); // Write output this.writeOutput(outline); } catch (error) { console.error(`Error: ${error}`); process.exit(1); } } } // Run CLI const cli = new CLI(process.argv.slice(2)); cli.run().catch(console.error);