UNPKG

@lenne.tech/cli

Version:

lenne.Tech CLI: lt

172 lines (171 loc) 8.34 kB
"use strict"; var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; Object.defineProperty(exports, "__esModule", { value: true }); const fs_1 = require("fs"); const path_1 = require("path"); const marker_1 = require("../../lib/marker"); /** * OCR command: convert PDFs (or a directory of PDFs) to clean Markdown * using marker-pdf with Apple Silicon MPS acceleration when available. * * Marker is kept in `~/.lt/marker/.venv/`; it is auto-installed on the * first run (~3 GB model download). Subsequent runs reuse the cache. * * Examples: * lt tools ocr ./report.pdf * lt tools ocr ./pdfs --output-dir ./md --workers 4 * lt tools ocr --install * lt tools ocr --status */ const NewCommand = { alias: ['ocr', 'pdf2md'], description: 'OCR PDFs to Markdown via marker-pdf (MPS-accelerated on Apple Silicon)', hidden: false, name: 'ocr', // GluegunCommand types `run` against the base `Toolbox`, but the lt CLI // augments it with `helper`, `git`, etc. Cast inside so the implementation // remains typed against the project-specific `ExtendedGluegunToolbox` while // satisfying the upstream `(toolbox: Toolbox) => void` signature. run: (rawToolbox) => __awaiter(void 0, void 0, void 0, function* () { var _a, _b, _c, _d, _e; const toolbox = rawToolbox; const { parameters, print: { error, info, spin, warning }, } = toolbox; const showStatus = !!parameters.options.status; const installOnly = !!parameters.options.install; // Status mode if (showStatus) { const status = yield (0, marker_1.getMarkerStatus)(); const device = (0, marker_1.resolveDevice)('auto'); info('marker-pdf status:'); info(` installed: ${status.installed ? 'yes' : 'no'}`); info(` python3: ${status.pythonAvailable ? 'yes' : 'no'}`); info(` uv: ${status.uvAvailable ? 'yes' : 'no'}`); info(` venv: ${status.venvPath}`); info(` device: ${device} (auto-detected)`); if (!toolbox.parameters.options.fromGluegunMenu) process.exit(0); return 'ocr'; } // Install-only mode if (installOnly) { const installSpinner = spin('Installing marker-pdf …'); try { yield (0, marker_1.installMarker)({ onProgress: (msg) => { installSpinner.text = msg; }, }); installSpinner.succeed('marker-pdf installed'); } catch (err) { installSpinner.fail('Installation failed'); error(String(err.message)); if (!toolbox.parameters.options.fromGluegunMenu) process.exit(1); return 'ocr'; } if (!toolbox.parameters.options.fromGluegunMenu) process.exit(0); return 'ocr'; } // Normal run: need an input path const inputArg = parameters.first; if (!inputArg) { error('Missing input path. Usage:'); info(' lt tools ocr <file.pdf|directory> Convert PDFs to Markdown'); info(' lt tools ocr --install Install marker-pdf locally'); info(' lt tools ocr --status Show installation status'); info(''); info('Options:'); info(' --output-dir <dir> Output directory (default: <input>-MD/)'); info(' --workers <n> Parallel workers for batch mode (default: 3)'); info(' --device <auto|mps|cuda|cpu> Override TORCH_DEVICE (default: auto)'); info(' --skip-existing Skip already-converted files (batch mode)'); info(' --keep-images Extract embedded images (default: off)'); info(' --format <markdown|json|html|chunks> Output format (default: markdown)'); if (!toolbox.parameters.options.fromGluegunMenu) process.exit(1); return 'ocr'; } const inputPath = (0, path_1.resolve)(process.cwd(), inputArg); if (!(0, fs_1.existsSync)(inputPath)) { error(`Input not found: ${inputPath}`); if (!toolbox.parameters.options.fromGluegunMenu) process.exit(1); return 'ocr'; } // Auto-install if needed let status = yield (0, marker_1.getMarkerStatus)(); if (!status.installed) { warning('marker-pdf not yet installed — running first-time setup …'); const installSpinner = spin('Installing marker-pdf (one-time, ~3 GB model download) …'); try { yield (0, marker_1.installMarker)({ onProgress: (msg) => { installSpinner.text = msg; }, }); installSpinner.succeed('marker-pdf installed'); status = yield (0, marker_1.getMarkerStatus)(); } catch (err) { installSpinner.fail('Installation failed'); error(String(err.message)); if (!toolbox.parameters.options.fromGluegunMenu) process.exit(1); return 'ocr'; } } // Resolve options const isDir = (0, fs_1.statSync)(inputPath).isDirectory(); const defaultOutput = isDir ? `${inputPath}-MD` : `${inputPath}.md-out`; const outputDir = (0, path_1.resolve)(process.cwd(), String((_b = (_a = parameters.options['output-dir']) !== null && _a !== void 0 ? _a : parameters.options.outputDir) !== null && _b !== void 0 ? _b : defaultOutput)); const workers = Number((_c = parameters.options.workers) !== null && _c !== void 0 ? _c : 3); const skipExisting = parameters.options['skip-existing'] !== false; // default: true const keepImages = !!parameters.options['keep-images']; const format = String((_d = parameters.options.format) !== null && _d !== void 0 ? _d : 'markdown'); const device = ((_e = parameters.options.device) !== null && _e !== void 0 ? _e : 'auto'); info(`OCR ${isDir ? 'batch' : 'single'}${outputDir}`); info(` device: ${(0, marker_1.resolveDevice)(device)}`); if (isDir) info(` workers: ${workers}, skip-existing: ${skipExisting}`); info(''); const runSpinner = spin('Converting (may take a while on first run while models load)…'); let lastLine = ''; const result = yield (0, marker_1.runMarker)(inputPath, { device, disableImages: !keepImages, onLine: (line) => { // Forward marker output to spinner text (last line) so the user sees progress if (line.trim()) { lastLine = line.replace(/\s+/g, ' ').trim().slice(-160); runSpinner.text = lastLine; } }, outputDir, outputFormat: format, skipExisting, workers, }); if (result.exitCode === 0) { runSpinner.succeed(`Done — output in ${outputDir}`); } else { runSpinner.fail(`marker exited with code ${result.exitCode}: ${lastLine}`); if (!toolbox.parameters.options.fromGluegunMenu) process.exit(result.exitCode); } if (!toolbox.parameters.options.fromGluegunMenu) process.exit(0); return 'ocr'; }), }; exports.default = NewCommand;