UNPKG

@thecodingwhale/cv-processor

Version:

CV Processor to extract structured data from PDF resumes using TypeScript

151 lines (150 loc) 7.2 kB
#!/usr/bin/env node "use strict"; /** * CV Processor CLI - Extract structured data from CV/resume PDFs * * Usage: * npx cv-processor-ts input.pdf * * Output: * Creates a JSON file with the same name (input.json) containing the extracted CV data */ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.ConversionType = exports.AICVProcessor = void 0; exports.processCv = processCv; const commander_1 = require("commander"); const dotenv = __importStar(require("dotenv")); const fs = __importStar(require("fs")); const path = __importStar(require("path")); const AICVProcessor_1 = require("./AICVProcessor"); Object.defineProperty(exports, "AICVProcessor", { enumerable: true, get: function () { return AICVProcessor_1.AICVProcessor; } }); const AIProviderFactory_1 = require("./ai/AIProviderFactory"); const createCsv_1 = __importDefault(require("./cli/createCsv")); const mergeReports_1 = __importDefault(require("./cli/mergeReports")); const AIProvider_1 = require("./types/AIProvider"); Object.defineProperty(exports, "ConversionType", { enumerable: true, get: function () { return AIProvider_1.ConversionType; } }); const aiConfig_1 = require("./utils/aiConfig"); // Load environment variables dotenv.config(); // Configure CLI const program = new commander_1.Command(); program .name('cv-processor-ts') .description('Extract structured data from CV/resume PDF') .version('1.0.0'); program .command('process') .description('Process a CV/resume PDF file or URL') .argument('<input>', 'Path to the CV/resume PDF file or URL to process') .option('-o, --output <file>', 'Output JSON file (defaults to input filename with .json extension)') .option('-v, --verbose', 'Verbose output') .option('--use-ai [provider]', 'Use AI for processing (gemini, openai, azure, grok, aws)') .option('--ai-model <model>', 'AI model to use (default depends on provider)') .option('--accuracy-calculator [type]', 'Type of accuracy calculator to use (traditional, null-based)', 'traditional') .option('--conversion-type <type>', 'Type of conversion to use (pdftoimages, pdftotexts, urltotexts)', 'pdftoimages') .option('--instructions-path <path>', 'Path to the instructions file (defaults to instructions.txt in project root)') .option('--expected-total-fields <number>', 'Expected total number of fields for emptiness percentage calculation', parseInt) .action(async (input, options) => { try { // Validate input - check if it's a URL or file path const isUrl = input.startsWith('http://') || input.startsWith('https://'); if (!isUrl && !fs.existsSync(input)) { console.error(`Error: Input file not found: ${input}`); process.exit(1); } // Determine output file const outputFile = options.output || (isUrl ? `url-${Date.now()}.json` : `${path.basename(input, path.extname(input))}.json`); // Process CV const startTime = new Date(); console.log(`Starting CV processing at ${startTime.toISOString()}`); // Use AI processing const providerType = options.useAi; console.log(`Using AI processing with provider: ${providerType}`); // Get AI configuration const aiConfig = (0, aiConfig_1.getAIConfig)(providerType, options.aiModel); // Create AI provider and processor const aiProvider = AIProviderFactory_1.AIProviderFactory.createProvider(providerType, aiConfig); const processor = new AICVProcessor_1.AICVProcessor(aiProvider, { verbose: options.verbose, instructionsPath: options.instructionsPath || path.join(process.cwd(), 'instructions.txt'), expectedTotalFields: options.expectedTotalFields, }); // Process the CV with the specified conversion type const conversionType = options.conversionType === 'pdftotexts' ? AIProvider_1.ConversionType.PdfToTexts : options.conversionType === 'urltotexts' ? AIProvider_1.ConversionType.UrlToTexts : AIProvider_1.ConversionType.PdfToImages; console.log(`Using conversion type: ${conversionType}`); const cvData = await processor.processCv(input, conversionType); processor.saveToJson(cvData, outputFile); const processingTime = (new Date().getTime() - startTime.getTime()) / 1000; console.log(`CV processing completed in ${processingTime.toFixed(2)} seconds`); } catch (error) { console.error(`Error processing CV: ${error}`); process.exit(1); } }); // Register the merge-reports command using the function from cli/mergeReports.ts (0, mergeReports_1.default)(program); // Register the create-csv command using the function from cli/createCsv.ts (0, createCsv_1.default)(program); // For backward compatibility, make 'process' the default command program.parse(process.argv); // If no arguments or if only the program name is provided, show help if (process.argv.length <= 2) { program.help(); } /** * Process a CV PDF and extract structured information using AI * @param pdfPath Path to the PDF file * @param aiProvider AI provider to use for processing * @param options Processing options * @param conversionType Type of conversion to use (default: PdfToTexts) * @returns Promise resolving to structured CV data */ async function processCv(pdfPath, aiProvider, options = {}, conversionType = AIProvider_1.ConversionType.PdfToTexts) { const processor = new AICVProcessor_1.AICVProcessor(aiProvider, options); return processor.processCv(pdfPath, conversionType); }