@lenne.tech/cli
Version:
lenne.Tech CLI: lt
172 lines (171 loc) • 8.34 kB
JavaScript
;
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
Object.defineProperty(exports, "__esModule", { value: true });
const fs_1 = require("fs");
const path_1 = require("path");
const marker_1 = require("../../lib/marker");
/**
* OCR command: convert PDFs (or a directory of PDFs) to clean Markdown
* using marker-pdf with Apple Silicon MPS acceleration when available.
*
* Marker is kept in `~/.lt/marker/.venv/`; it is auto-installed on the
* first run (~3 GB model download). Subsequent runs reuse the cache.
*
* Examples:
* lt tools ocr ./report.pdf
* lt tools ocr ./pdfs --output-dir ./md --workers 4
* lt tools ocr --install
* lt tools ocr --status
*/
const NewCommand = {
alias: ['ocr', 'pdf2md'],
description: 'OCR PDFs to Markdown via marker-pdf (MPS-accelerated on Apple Silicon)',
hidden: false,
name: 'ocr',
// GluegunCommand types `run` against the base `Toolbox`, but the lt CLI
// augments it with `helper`, `git`, etc. Cast inside so the implementation
// remains typed against the project-specific `ExtendedGluegunToolbox` while
// satisfying the upstream `(toolbox: Toolbox) => void` signature.
run: (rawToolbox) => __awaiter(void 0, void 0, void 0, function* () {
var _a, _b, _c, _d, _e;
const toolbox = rawToolbox;
const { parameters, print: { error, info, spin, warning }, } = toolbox;
const showStatus = !!parameters.options.status;
const installOnly = !!parameters.options.install;
// Status mode
if (showStatus) {
const status = yield (0, marker_1.getMarkerStatus)();
const device = (0, marker_1.resolveDevice)('auto');
info('marker-pdf status:');
info(` installed: ${status.installed ? 'yes' : 'no'}`);
info(` python3: ${status.pythonAvailable ? 'yes' : 'no'}`);
info(` uv: ${status.uvAvailable ? 'yes' : 'no'}`);
info(` venv: ${status.venvPath}`);
info(` device: ${device} (auto-detected)`);
if (!toolbox.parameters.options.fromGluegunMenu)
process.exit(0);
return 'ocr';
}
// Install-only mode
if (installOnly) {
const installSpinner = spin('Installing marker-pdf …');
try {
yield (0, marker_1.installMarker)({
onProgress: (msg) => {
installSpinner.text = msg;
},
});
installSpinner.succeed('marker-pdf installed');
}
catch (err) {
installSpinner.fail('Installation failed');
error(String(err.message));
if (!toolbox.parameters.options.fromGluegunMenu)
process.exit(1);
return 'ocr';
}
if (!toolbox.parameters.options.fromGluegunMenu)
process.exit(0);
return 'ocr';
}
// Normal run: need an input path
const inputArg = parameters.first;
if (!inputArg) {
error('Missing input path. Usage:');
info(' lt tools ocr <file.pdf|directory> Convert PDFs to Markdown');
info(' lt tools ocr --install Install marker-pdf locally');
info(' lt tools ocr --status Show installation status');
info('');
info('Options:');
info(' --output-dir <dir> Output directory (default: <input>-MD/)');
info(' --workers <n> Parallel workers for batch mode (default: 3)');
info(' --device <auto|mps|cuda|cpu> Override TORCH_DEVICE (default: auto)');
info(' --skip-existing Skip already-converted files (batch mode)');
info(' --keep-images Extract embedded images (default: off)');
info(' --format <markdown|json|html|chunks> Output format (default: markdown)');
if (!toolbox.parameters.options.fromGluegunMenu)
process.exit(1);
return 'ocr';
}
const inputPath = (0, path_1.resolve)(process.cwd(), inputArg);
if (!(0, fs_1.existsSync)(inputPath)) {
error(`Input not found: ${inputPath}`);
if (!toolbox.parameters.options.fromGluegunMenu)
process.exit(1);
return 'ocr';
}
// Auto-install if needed
let status = yield (0, marker_1.getMarkerStatus)();
if (!status.installed) {
warning('marker-pdf not yet installed — running first-time setup …');
const installSpinner = spin('Installing marker-pdf (one-time, ~3 GB model download) …');
try {
yield (0, marker_1.installMarker)({
onProgress: (msg) => {
installSpinner.text = msg;
},
});
installSpinner.succeed('marker-pdf installed');
status = yield (0, marker_1.getMarkerStatus)();
}
catch (err) {
installSpinner.fail('Installation failed');
error(String(err.message));
if (!toolbox.parameters.options.fromGluegunMenu)
process.exit(1);
return 'ocr';
}
}
// Resolve options
const isDir = (0, fs_1.statSync)(inputPath).isDirectory();
const defaultOutput = isDir ? `${inputPath}-MD` : `${inputPath}.md-out`;
const outputDir = (0, path_1.resolve)(process.cwd(), String((_b = (_a = parameters.options['output-dir']) !== null && _a !== void 0 ? _a : parameters.options.outputDir) !== null && _b !== void 0 ? _b : defaultOutput));
const workers = Number((_c = parameters.options.workers) !== null && _c !== void 0 ? _c : 3);
const skipExisting = parameters.options['skip-existing'] !== false; // default: true
const keepImages = !!parameters.options['keep-images'];
const format = String((_d = parameters.options.format) !== null && _d !== void 0 ? _d : 'markdown');
const device = ((_e = parameters.options.device) !== null && _e !== void 0 ? _e : 'auto');
info(`OCR ${isDir ? 'batch' : 'single'} → ${outputDir}`);
info(` device: ${(0, marker_1.resolveDevice)(device)}`);
if (isDir)
info(` workers: ${workers}, skip-existing: ${skipExisting}`);
info('');
const runSpinner = spin('Converting (may take a while on first run while models load)…');
let lastLine = '';
const result = yield (0, marker_1.runMarker)(inputPath, {
device,
disableImages: !keepImages,
onLine: (line) => {
// Forward marker output to spinner text (last line) so the user sees progress
if (line.trim()) {
lastLine = line.replace(/\s+/g, ' ').trim().slice(-160);
runSpinner.text = lastLine;
}
},
outputDir,
outputFormat: format,
skipExisting,
workers,
});
if (result.exitCode === 0) {
runSpinner.succeed(`Done — output in ${outputDir}`);
}
else {
runSpinner.fail(`marker exited with code ${result.exitCode}: ${lastLine}`);
if (!toolbox.parameters.options.fromGluegunMenu)
process.exit(result.exitCode);
}
if (!toolbox.parameters.options.fromGluegunMenu)
process.exit(0);
return 'ocr';
}),
};
exports.default = NewCommand;