@thecodingwhale/cv-processor
Version:
CV Processor to extract structured data from PDF resumes using TypeScript
151 lines (150 loc) • 7.2 kB
JavaScript
;
/**
* CV Processor CLI - Extract structured data from CV/resume PDFs
*
* Usage:
* npx cv-processor-ts input.pdf
*
* Output:
* Creates a JSON file with the same name (input.json) containing the extracted CV data
*/
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.ConversionType = exports.AICVProcessor = void 0;
exports.processCv = processCv;
const commander_1 = require("commander");
const dotenv = __importStar(require("dotenv"));
const fs = __importStar(require("fs"));
const path = __importStar(require("path"));
const AICVProcessor_1 = require("./AICVProcessor");
Object.defineProperty(exports, "AICVProcessor", { enumerable: true, get: function () { return AICVProcessor_1.AICVProcessor; } });
const AIProviderFactory_1 = require("./ai/AIProviderFactory");
const createCsv_1 = __importDefault(require("./cli/createCsv"));
const mergeReports_1 = __importDefault(require("./cli/mergeReports"));
const AIProvider_1 = require("./types/AIProvider");
Object.defineProperty(exports, "ConversionType", { enumerable: true, get: function () { return AIProvider_1.ConversionType; } });
const aiConfig_1 = require("./utils/aiConfig");
// Load environment variables
dotenv.config();
// Configure CLI
const program = new commander_1.Command();
program
.name('cv-processor-ts')
.description('Extract structured data from CV/resume PDF')
.version('1.0.0');
program
.command('process')
.description('Process a CV/resume PDF file or URL')
.argument('<input>', 'Path to the CV/resume PDF file or URL to process')
.option('-o, --output <file>', 'Output JSON file (defaults to input filename with .json extension)')
.option('-v, --verbose', 'Verbose output')
.option('--use-ai [provider]', 'Use AI for processing (gemini, openai, azure, grok, aws)')
.option('--ai-model <model>', 'AI model to use (default depends on provider)')
.option('--accuracy-calculator [type]', 'Type of accuracy calculator to use (traditional, null-based)', 'traditional')
.option('--conversion-type <type>', 'Type of conversion to use (pdftoimages, pdftotexts, urltotexts)', 'pdftoimages')
.option('--instructions-path <path>', 'Path to the instructions file (defaults to instructions.txt in project root)')
.option('--expected-total-fields <number>', 'Expected total number of fields for emptiness percentage calculation', parseInt)
.action(async (input, options) => {
try {
// Validate input - check if it's a URL or file path
const isUrl = input.startsWith('http://') || input.startsWith('https://');
if (!isUrl && !fs.existsSync(input)) {
console.error(`Error: Input file not found: ${input}`);
process.exit(1);
}
// Determine output file
const outputFile = options.output ||
(isUrl
? `url-${Date.now()}.json`
: `${path.basename(input, path.extname(input))}.json`);
// Process CV
const startTime = new Date();
console.log(`Starting CV processing at ${startTime.toISOString()}`);
// Use AI processing
const providerType = options.useAi;
console.log(`Using AI processing with provider: ${providerType}`);
// Get AI configuration
const aiConfig = (0, aiConfig_1.getAIConfig)(providerType, options.aiModel);
// Create AI provider and processor
const aiProvider = AIProviderFactory_1.AIProviderFactory.createProvider(providerType, aiConfig);
const processor = new AICVProcessor_1.AICVProcessor(aiProvider, {
verbose: options.verbose,
instructionsPath: options.instructionsPath ||
path.join(process.cwd(), 'instructions.txt'),
expectedTotalFields: options.expectedTotalFields,
});
// Process the CV with the specified conversion type
const conversionType = options.conversionType === 'pdftotexts'
? AIProvider_1.ConversionType.PdfToTexts
: options.conversionType === 'urltotexts'
? AIProvider_1.ConversionType.UrlToTexts
: AIProvider_1.ConversionType.PdfToImages;
console.log(`Using conversion type: ${conversionType}`);
const cvData = await processor.processCv(input, conversionType);
processor.saveToJson(cvData, outputFile);
const processingTime = (new Date().getTime() - startTime.getTime()) / 1000;
console.log(`CV processing completed in ${processingTime.toFixed(2)} seconds`);
}
catch (error) {
console.error(`Error processing CV: ${error}`);
process.exit(1);
}
});
// Register the merge-reports command using the function from cli/mergeReports.ts
(0, mergeReports_1.default)(program);
// Register the create-csv command using the function from cli/createCsv.ts
(0, createCsv_1.default)(program);
// For backward compatibility, make 'process' the default command
program.parse(process.argv);
// If no arguments or if only the program name is provided, show help
if (process.argv.length <= 2) {
program.help();
}
/**
* Process a CV PDF and extract structured information using AI
* @param pdfPath Path to the PDF file
* @param aiProvider AI provider to use for processing
* @param options Processing options
* @param conversionType Type of conversion to use (default: PdfToTexts)
* @returns Promise resolving to structured CV data
*/
async function processCv(pdfPath, aiProvider, options = {}, conversionType = AIProvider_1.ConversionType.PdfToTexts) {
const processor = new AICVProcessor_1.AICVProcessor(aiProvider, options);
return processor.processCv(pdfPath, conversionType);
}