document-outline-extractor
Version:
Extract structured outlines from documents with optional AI enhancement
252 lines (214 loc) • 7.25 kB
text/typescript
import * as fs from 'fs';
import * as path from 'path';
import { OutlineExtractor } from '../extractor';
import { OpenAIConfig, ExtractOptions, OutlineFormat } from '../types';
interface CLIOptions {
input?: string;
output?: string;
format?: OutlineFormat;
maxDepth?: number;
openaiKey?: string;
openaiUrl?: string;
model?: string;
quality?: boolean;
help?: boolean;
version?: boolean;
config?: string;
}
class CLI {
private args: string[];
private options: CLIOptions = {};
constructor(args: string[]) {
this.args = args;
this.parseArguments();
}
private parseArguments(): void {
for (let i = 0; i < this.args.length; i++) {
const arg = this.args[i];
switch (arg) {
case '-i':
case '--input':
this.options.input = this.args[++i];
break;
case '-o':
case '--output':
this.options.output = this.args[++i];
break;
case '-f':
case '--format':
this.options.format = this.args[++i] as OutlineFormat;
break;
case '-d':
case '--max-depth':
this.options.maxDepth = parseInt(this.args[++i], 10);
break;
case '--openai-key':
this.options.openaiKey = this.args[++i];
break;
case '--openai-url':
this.options.openaiUrl = this.args[++i];
break;
case '--model':
this.options.model = this.args[++i];
break;
case '-q':
case '--quality':
this.options.quality = true;
break;
case '-c':
case '--config':
this.options.config = this.args[++i];
break;
case '-h':
case '--help':
this.options.help = true;
break;
case '-v':
case '--version':
this.options.version = true;
break;
}
}
}
private printHelp(): void {
console.log(`
Document Outline Extractor CLI
Usage: outline-extractor [options]
Options:
-i, --input <file> Input markdown file path (required unless piping)
-o, --output <file> Output file path (default: stdout)
-f, --format <format> Output format: tree, markdown, json, flat (default: tree)
-d, --max-depth <n> Maximum heading depth to include (default: all)
-q, --quality Show quality metrics instead of outline
-c, --config <file> Configuration file path (JSON)
OpenAI Options:
--openai-key <key> OpenAI API key
--openai-url <url> OpenAI base URL (default: https://api.openai.com/v1)
--model <name> Model name (default: gpt-4o-mini)
Other:
-h, --help Show this help message
-v, --version Show version
Examples:
# Extract outline from file
outline-extractor -i document.md
# Extract with specific format
outline-extractor -i document.md -f json -o outline.json
# Use OpenAI for better extraction
outline-extractor -i document.md --openai-key sk-... --model gpt-4o
# Check document quality
outline-extractor -i document.md -q
# Pipe content
cat document.md | outline-extractor -f markdown
# Use configuration file
outline-extractor -i document.md -c config.json
`);
}
private printVersion(): void {
const packagePath = path.join(__dirname, '../../package.json');
const pkg = JSON.parse(fs.readFileSync(packagePath, 'utf-8'));
console.log(`document-outline-extractor v${pkg.version}`);
}
private async readInput(): Promise<string> {
if (this.options.input) {
return fs.readFileSync(this.options.input, 'utf-8');
}
// Read from stdin
return new Promise((resolve) => {
let data = '';
process.stdin.setEncoding('utf-8');
process.stdin.on('data', chunk => data += chunk);
process.stdin.on('end', () => resolve(data));
});
}
private writeOutput(content: string): void {
if (this.options.output) {
fs.writeFileSync(this.options.output, content, 'utf-8');
console.log(`✓ Outline written to ${this.options.output}`);
} else {
console.log(content);
}
}
private loadConfig(): any {
if (!this.options.config) return {};
try {
const configContent = fs.readFileSync(this.options.config, 'utf-8');
return JSON.parse(configContent);
} catch (error) {
console.error(`Error loading config file: ${error}`);
return {};
}
}
async run(): Promise<void> {
if (this.options.help) {
this.printHelp();
return;
}
if (this.options.version) {
this.printVersion();
return;
}
try {
// Load configuration
const config = this.loadConfig();
// Build OpenAI config if provided
let openaiConfig: OpenAIConfig | undefined;
if (this.options.openaiKey || config.openai) {
openaiConfig = {
apiKey: this.options.openaiKey || config.openai?.apiKey,
baseUrl: this.options.openaiUrl || config.openai?.baseUrl || 'https://api.openai.com/v1',
model: this.options.model || config.openai?.model || 'gpt-4o-mini',
...config.openai
};
}
// Initialize extractor
const extractor = new OutlineExtractor({
openai: openaiConfig,
...config.extractor
});
// Read input
const content = await this.readInput();
if (!content.trim()) {
console.error('Error: No input content provided');
process.exit(1);
}
// Show quality metrics if requested
if (this.options.quality) {
const metrics = extractor.evaluateQuality(content);
console.log('\nDocument Outline Quality Metrics:');
console.log('─'.repeat(40));
console.log(`Overall Score: ${(metrics.score * 100).toFixed(1)}%`);
console.log(`Richness: ${(metrics.richness * 100).toFixed(1)}%`);
console.log(`Balance: ${(metrics.balance * 100).toFixed(1)}%`);
console.log(`Coherence: ${(metrics.coherence * 100).toFixed(1)}%`);
console.log(`Coverage: ${(metrics.coverage * 100).toFixed(1)}%`);
console.log(`Heading Count: ${metrics.headingCount}`);
console.log(`Max Depth: ${metrics.depth}`);
console.log('─'.repeat(40));
if (metrics.score >= 0.8) {
console.log('✓ Document has good outline structure');
} else if (metrics.score >= 0.5) {
console.log('⚠ Document outline could be improved');
} else {
console.log('✗ Document lacks proper outline structure');
}
return;
}
// Extract outline
const extractOptions: ExtractOptions = {
format: this.options.format || config.format,
maxDepth: this.options.maxDepth || config.maxDepth
};
console.error('Extracting outline...');
const outline = await extractor.extract(content, extractOptions);
// Write output
this.writeOutput(outline);
} catch (error) {
console.error(`Error: ${error}`);
process.exit(1);
}
}
}
// Run CLI
const cli = new CLI(process.argv.slice(2));
cli.run().catch(console.error);