UNPKG

@thecodingwhale/cv-processor

Version:

CV Processor to extract structured data from PDF resumes using TypeScript

172 lines (171 loc) 7.38 kB
"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); Object.defineProperty(exports, "__esModule", { value: true }); exports.CVProcessor = void 0; const fs = __importStar(require("fs")); const path = __importStar(require("path")); const EducationExtractor_1 = require("./extractors/EducationExtractor"); const ExperienceExtractor_1 = require("./extractors/ExperienceExtractor"); const PersonalInfoExtractor_1 = require("./extractors/PersonalInfoExtractor"); const SectionExtractor_1 = require("./extractors/SectionExtractor"); const SkillsExtractor_1 = require("./extractors/SkillsExtractor"); const TextExtractor_1 = require("./extractors/TextExtractor"); const AccuracyCalculator_1 = require("./utils/AccuracyCalculator"); /** * Main CV Processor class to extract structured data from PDF resumes */ class CVProcessor { /** * Initialize the CV processor */ constructor(options = {}) { this.textExtractor = new TextExtractor_1.TextExtractor(); this.sectionExtractor = new SectionExtractor_1.SectionExtractor(); this.personalInfoExtractor = new PersonalInfoExtractor_1.PersonalInfoExtractor(); this.educationExtractor = new EducationExtractor_1.EducationExtractor(); this.experienceExtractor = new ExperienceExtractor_1.ExperienceExtractor(); this.skillsExtractor = new SkillsExtractor_1.SkillsExtractor(); this.accuracyCalculator = new AccuracyCalculator_1.AccuracyCalculator(options); this.verbose = options.verbose || false; this.minAccuracyThreshold = options.minAccuracyThreshold || 70; if (this.verbose) { console.log('CV Processor initialized'); } } /** * Process a CV PDF and extract structured information */ async processCv(pdfPath) { console.log(`Processing CV: ${pdfPath}`); // Extract text from PDF const text = await this.textExtractor.extractTextFromPDF(pdfPath); // Segment into sections const sections = this.sectionExtractor.segmentCVIntoSections(text); // Extract information from each section const personalInfo = this.personalInfoExtractor.extractPersonalInfo(sections.header || ''); if (sections.summary) { personalInfo.summary = this.personalInfoExtractor.extractSummary(sections.summary); } const education = this.educationExtractor.extractEducation(sections.education || null); const experience = this.experienceExtractor.extractWorkExperience(sections.experience || null); const skills = this.skillsExtractor.extractSkills(sections.skills || null); // Build complete CV data const cvData = { personalInfo, education, experience, skills, metadata: { processedDate: new Date().toISOString(), sourceFile: path.basename(pdfPath), provider: 'traditional', model: 'rule-based', }, }; // Calculate accuracy score const accuracy = this.accuracyCalculator.calculateAccuracy(cvData); cvData.accuracy = accuracy; if (this.verbose) { console.log(`CV Accuracy Score: ${accuracy.score}`); console.log(`Completeness: ${accuracy.completeness}`); console.log(`Confidence: ${accuracy.confidence}`); if (accuracy.missingFields.length > 0) { console.log('Missing Fields:', accuracy.missingFields); } if (!this.accuracyCalculator.meetsThreshold(accuracy)) { console.warn(`Warning: CV data does not meet minimum accuracy threshold of ${this.minAccuracyThreshold}%`); } } return cvData; } /** * Save CV data to a JSON file */ saveToJson(cvData, outputPath) { try { // Generate a filename that includes provider, model, and timestamp const timestamp = new Date() .toISOString() .replace(/:/g, '-') .replace(/\./g, '-'); const providerName = cvData.metadata?.provider || 'unknown'; const modelName = cvData.metadata?.model || 'unknown'; // Extract base path and extension const outputDir = path.dirname(outputPath); const outputBaseName = path.basename(outputPath, path.extname(outputPath)); const outputExt = path.extname(outputPath); // Create filename with provider, model, and timestamp const newOutputPath = path.join(outputDir, `${outputBaseName}_${providerName}_${modelName}_${timestamp}${outputExt}`); fs.writeFileSync(newOutputPath, JSON.stringify(cvData, null, 2)); console.log(`Results saved to ${newOutputPath}`); // Log accuracy information if available if (cvData.accuracy) { console.log(`CV Accuracy: ${cvData.accuracy.score}%`); if (!this.accuracyCalculator.meetsThreshold(cvData.accuracy)) { console.warn(`Warning: This CV scored below the minimum accuracy threshold (${this.minAccuracyThreshold}%)`); } } } catch (error) { console.error(`Error saving JSON file: ${error}`); throw error; } } getModelInfo() { return { provider: 'traditional', model: 'rule-based', }; } /** * Check if the CV meets the minimum accuracy threshold */ meetsAccuracyThreshold(cvData) { if (!cvData.accuracy) { return false; } return this.accuracyCalculator.meetsThreshold(cvData.accuracy); } /** * Set minimum accuracy threshold */ setMinAccuracyThreshold(threshold) { if (threshold < 0 || threshold > 100) { throw new Error('Accuracy threshold must be between 0 and 100'); } this.minAccuracyThreshold = threshold; } } exports.CVProcessor = CVProcessor;