UNPKG

@thecodingwhale/cv-processor

Version:

CV Processor to extract structured data from PDF resumes using TypeScript

123 lines (122 loc) 4.61 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.ExperienceExtractor = void 0; const nlp_1 = require("../utils/nlp"); const patterns_1 = require("../utils/patterns"); /** * Class for extracting work experience information from CV text */ class ExperienceExtractor { /** * Extract work experience information from the experience section text */ extractWorkExperience(experienceText) { if (!experienceText) { return []; } // Split by double newlines to separate individual entries const experiences = experienceText .split(/\n\s*\n/) .filter((exp) => exp.trim()); const entries = []; for (const expText of experiences) { const entry = { company: null, position: null, startDate: null, endDate: null, location: null, description: [], }; // Extract company (usually an organization entity) const organizations = nlp_1.NLPUtils.extractOrganizations(expText); if (organizations.length > 0) { entry.company = organizations[0]; } // Extract job title this.extractJobTitle(expText, entry); // Extract dates this.extractDates(expText, entry); // Extract location const locations = nlp_1.NLPUtils.extractLocations(expText); if (locations.length > 0) { entry.location = locations[0]; } // Extract description (bullet points or paragraph after title) this.extractDescription(expText, entry); entries.push(entry); } return entries; } /** * Extract job title from experience text */ extractJobTitle(expText, entry) { for (const pattern of patterns_1.Patterns.titlePatterns) { const matches = expText.match(pattern); if (matches && matches.length > 1) { entry.position = matches[1].trim(); break; } } } /** * Extract dates from experience text */ extractDates(expText, entry) { const dateMatches = expText.match(patterns_1.Patterns.date); if (dateMatches && dateMatches.length > 0) { // Check for patterns that indicate current position const isPresentOrCurrent = /present|current|now/i.test(expText); // Try to find multiple date matches const allMatches = [...expText.matchAll(new RegExp(patterns_1.Patterns.date, 'gi'))]; if (allMatches.length >= 2) { // Assume first is start date, second is end date entry.startDate = allMatches[0][0]; entry.endDate = allMatches[1][0]; } else if (allMatches.length === 1) { if (isPresentOrCurrent) { entry.startDate = allMatches[0][0]; entry.endDate = 'Present'; } else { // Just one date, assume it's the end date entry.endDate = allMatches[0][0]; } } } } /** * Extract job description from experience text */ extractDescription(expText, entry) { const lines = expText.split('\n'); let descriptionStarted = false; for (const line of lines) { const trimmedLine = line.trim(); if (!trimmedLine) { continue; } // Skip the line with company name, title, and dates if ((entry.company && trimmedLine.includes(entry.company)) || (entry.position && trimmedLine.includes(entry.position))) { continue; } // Look for bullet points or narrative descriptions if (trimmedLine.startsWith('•') || trimmedLine.startsWith('-') || trimmedLine.startsWith('*') || descriptionStarted) { descriptionStarted = true; // Clean bullet points const cleanLine = trimmedLine.replace(patterns_1.Patterns.bulletPoint, '').trim(); if (cleanLine && cleanLine.length > 10) { // Minimum meaningful description entry.description.push(cleanLine); } } } } } exports.ExperienceExtractor = ExperienceExtractor;