@thecodingwhale/cv-processor
Version:
CV Processor to extract structured data from PDF resumes using TypeScript
68 lines (67 loc) • 2.72 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.PersonalInfoExtractor = void 0;
const nlp_1 = require("../utils/nlp");
const patterns_1 = require("../utils/patterns");
/**
* Class for extracting personal information from CV text
*/
class PersonalInfoExtractor {
/**
* Extract personal information from the text
*/
extractPersonalInfo(text) {
// Use the first ~1000 chars for personal info (usually at the top)
const topText = text.substring(0, 1000);
// Extract name (usually one of the first person entities)
const personEntities = nlp_1.NLPUtils.extractNames(topText);
const name = personEntities.length > 0 ? personEntities[0] : null;
// Extract email
const emailMatches = text.match(patterns_1.Patterns.email);
const email = emailMatches && emailMatches.length > 0 ? emailMatches[0] : null;
// Extract phone
const phoneMatches = text.match(patterns_1.Patterns.phone);
const phone = phoneMatches && phoneMatches.length > 0 ? phoneMatches[0] : null;
// Extract LinkedIn profile
const linkedinMatches = text.match(patterns_1.Patterns.linkedin);
let linkedin = linkedinMatches && linkedinMatches.length > 0 ? linkedinMatches[0] : null;
if (linkedin && !linkedin.startsWith('http')) {
linkedin = `https://${linkedin}`;
}
// Extract GitHub profile
const githubMatches = text.match(patterns_1.Patterns.github);
let github = githubMatches && githubMatches.length > 0 ? githubMatches[0] : null;
if (github && !github.startsWith('http')) {
github = `https://${github}`;
}
// Extract location (usually a GPE entity near the top)
const locations = nlp_1.NLPUtils.extractLocations(topText);
const location = locations.length > 0 ? locations[0] : null;
return {
name,
email,
phone,
location,
linkedin,
github,
};
}
/**
* Extract and clean the summary/profile section
*/
extractSummary(summaryText) {
if (!summaryText) {
return null;
}
// Clean up the summary text
const cleanSummary = summaryText.trim();
// Limit to 500 characters if too long
if (cleanSummary.length > 500) {
// Try to find a good break point (end of sentence)
const breakPoint = nlp_1.NLPUtils.findSentenceBreak(cleanSummary, 500);
return cleanSummary.substring(0, breakPoint);
}
return cleanSummary;
}
}
exports.PersonalInfoExtractor = PersonalInfoExtractor;