UNPKG

@thecodingwhale/cv-processor

Version:

CV Processor to extract structured data from PDF resumes using TypeScript

68 lines (67 loc) 2.72 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.PersonalInfoExtractor = void 0; const nlp_1 = require("../utils/nlp"); const patterns_1 = require("../utils/patterns"); /** * Class for extracting personal information from CV text */ class PersonalInfoExtractor { /** * Extract personal information from the text */ extractPersonalInfo(text) { // Use the first ~1000 chars for personal info (usually at the top) const topText = text.substring(0, 1000); // Extract name (usually one of the first person entities) const personEntities = nlp_1.NLPUtils.extractNames(topText); const name = personEntities.length > 0 ? personEntities[0] : null; // Extract email const emailMatches = text.match(patterns_1.Patterns.email); const email = emailMatches && emailMatches.length > 0 ? emailMatches[0] : null; // Extract phone const phoneMatches = text.match(patterns_1.Patterns.phone); const phone = phoneMatches && phoneMatches.length > 0 ? phoneMatches[0] : null; // Extract LinkedIn profile const linkedinMatches = text.match(patterns_1.Patterns.linkedin); let linkedin = linkedinMatches && linkedinMatches.length > 0 ? linkedinMatches[0] : null; if (linkedin && !linkedin.startsWith('http')) { linkedin = `https://${linkedin}`; } // Extract GitHub profile const githubMatches = text.match(patterns_1.Patterns.github); let github = githubMatches && githubMatches.length > 0 ? githubMatches[0] : null; if (github && !github.startsWith('http')) { github = `https://${github}`; } // Extract location (usually a GPE entity near the top) const locations = nlp_1.NLPUtils.extractLocations(topText); const location = locations.length > 0 ? locations[0] : null; return { name, email, phone, location, linkedin, github, }; } /** * Extract and clean the summary/profile section */ extractSummary(summaryText) { if (!summaryText) { return null; } // Clean up the summary text const cleanSummary = summaryText.trim(); // Limit to 500 characters if too long if (cleanSummary.length > 500) { // Try to find a good break point (end of sentence) const breakPoint = nlp_1.NLPUtils.findSentenceBreak(cleanSummary, 500); return cleanSummary.substring(0, breakPoint); } return cleanSummary; } } exports.PersonalInfoExtractor = PersonalInfoExtractor;