@thecodingwhale/cv-processor
Version:
CV Processor to extract structured data from PDF resumes using TypeScript
107 lines (106 loc) • 3.9 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.SkillsExtractor = void 0;
const patterns_1 = require("../utils/patterns");
/**
* Class for extracting and categorizing skills from CV text
*/
class SkillsExtractor {
/**
* Extract skills from the skills section
*/
extractSkills(skillsText) {
if (!skillsText) {
return {};
}
// Clean and normalize the text
const cleanedText = skillsText
.replace(/•/g, '\n')
.replace(/‣/g, '\n')
.replace(/>/g, '\n');
// Split text into lines and extract potential skills
let skillCandidates = [];
// 1. Lines with bullet points or newlines
const lines = cleanedText.split('\n');
for (const line of lines) {
const trimmedLine = line.trim().replace(/^[•\-*]\s*/, '');
if (trimmedLine && trimmedLine.length < 100) {
// Skills are typically short phrases
skillCandidates.push(trimmedLine);
}
}
// 2. Skills separated by commas, slashes, or similar separators
const commaSkills = [];
for (const candidate of skillCandidates) {
const splitSkills = candidate
.split(/,|\|/)
.map((s) => s.trim())
.filter((s) => s);
commaSkills.push(...splitSkills);
}
skillCandidates = commaSkills;
// Filter and categorize skills
const categorizedSkills = {
programmingLanguages: [],
frameworks: [],
tools: [],
softSkills: [],
other: [],
};
for (const skill of skillCandidates) {
const trimmedSkill = skill.trim();
if (!trimmedSkill || trimmedSkill.length < 2) {
// Skip too short skills
continue;
}
const skillLower = trimmedSkill.toLowerCase();
// Check which category this skill belongs to
let categorized = false;
if (this.matchesCategory(skillLower, patterns_1.SkillCategories.programmingLanguages)) {
categorizedSkills.programmingLanguages.push(trimmedSkill);
categorized = true;
}
else if (this.matchesCategory(skillLower, patterns_1.SkillCategories.frameworks)) {
categorizedSkills.frameworks.push(trimmedSkill);
categorized = true;
}
else if (this.matchesCategory(skillLower, patterns_1.SkillCategories.tools)) {
categorizedSkills.tools.push(trimmedSkill);
categorized = true;
}
else if (this.matchesCategory(skillLower, patterns_1.SkillCategories.softSkills)) {
categorizedSkills.softSkills.push(trimmedSkill);
categorized = true;
}
if (!categorized) {
categorizedSkills.other.push(trimmedSkill);
}
}
// Remove empty categories
Object.keys(categorizedSkills).forEach((key) => {
const typedKey = key;
if (!categorizedSkills[typedKey] ||
categorizedSkills[typedKey].length === 0) {
delete categorizedSkills[typedKey];
}
});
return categorizedSkills;
}
/**
* Check if a skill matches a category
*/
matchesCategory(skill, categorySkills) {
// Direct match
if (categorySkills.has(skill)) {
return true;
}
// Partial match
for (const categorySkill of categorySkills) {
if (skill.includes(categorySkill) || categorySkill.includes(skill)) {
return true;
}
}
return false;
}
}
exports.SkillsExtractor = SkillsExtractor;