@thecodingwhale/cv-processor
Version:
CV Processor to extract structured data from PDF resumes using TypeScript
354 lines (353 loc) • 12.7 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.AccuracyCalculator = void 0;
/**
* Utility class to calculate accuracy scores for CV data extraction
*/
class AccuracyCalculator {
constructor(options = {}) {
// Default weights for different sections (can be customized)
this.weights = {
personalInfo: options.accuracyWeights?.personalInfo || 0.25,
education: options.accuracyWeights?.education || 0.25,
experience: options.accuracyWeights?.experience || 0.3,
skills: options.accuracyWeights?.skills || 0.2,
};
this.minAccuracyThreshold = options.minAccuracyThreshold || 70; // Default 70% minimum threshold
}
/**
* Calculate accuracy score for the extracted CV data
*/
calculateAccuracy(cvData) {
// Ensure all required objects exist
if (!cvData.personalInfo)
cvData.personalInfo = {
name: null,
email: null,
phone: null,
location: null,
linkedin: null,
github: null,
};
if (!cvData.education)
cvData.education = [];
if (!cvData.experience)
cvData.experience = [];
if (!cvData.skills)
cvData.skills = {};
if (!cvData.metadata)
cvData.metadata = {
processedDate: new Date().toISOString(),
sourceFile: 'unknown',
};
const missingFields = [];
// Calculate scores for each section
const personalInfoScore = this.calculatePersonalInfoScore(cvData.personalInfo, missingFields);
const educationScore = this.calculateEducationScore(cvData.education, missingFields);
const experienceScore = this.calculateExperienceScore(cvData.experience, missingFields);
const skillsScore = this.calculateSkillsScore(cvData.skills, missingFields);
// Calculate weighted overall score
const fieldScores = {
personalInfo: Math.round(personalInfoScore * 100),
education: Math.round(educationScore * 100),
experience: Math.round(experienceScore * 100),
skills: Math.round(skillsScore * 100),
};
const overallScore = personalInfoScore * this.weights.personalInfo +
educationScore * this.weights.education +
experienceScore * this.weights.experience +
skillsScore * this.weights.skills;
// Calculate completeness based on required fields
const completeness = this.calculateCompleteness(cvData, missingFields);
// For confidence, we can use a heuristic based on null values and field consistency
const confidence = this.calculateConfidence(cvData);
return {
score: Math.round(overallScore * 100), // Convert to percentage
completeness: Math.round(completeness * 100), // Convert to percentage
confidence: Math.round(confidence * 100), // Convert to percentage
fieldScores,
missingFields,
};
}
/**
* Check if CV meets the minimum accuracy threshold
*/
meetsThreshold(accuracy) {
return accuracy.score >= this.minAccuracyThreshold;
}
/**
* Calculate score for personal information section
*/
calculatePersonalInfoScore(personalInfo, missingFields) {
// Ensure personalInfo object exists
if (!personalInfo) {
missingFields.push('personalInfo');
return 0;
}
let score = 0;
let totalFields = 0;
// Required fields with higher weight
const requiredFields = [
'name',
'email',
'phone',
];
const optionalFields = [
'location',
'linkedin',
'github',
'summary',
];
// Check required fields (70% of score)
requiredFields.forEach((field) => {
totalFields++;
if (personalInfo[field]) {
score += 0.7 / requiredFields.length;
}
else {
missingFields.push(`personalInfo.${field}`);
}
});
// Check optional fields (30% of score)
optionalFields.forEach((field) => {
totalFields++;
if (personalInfo[field]) {
score += 0.3 / optionalFields.length;
}
});
return score;
}
/**
* Calculate score for education section
*/
calculateEducationScore(education, missingFields) {
if (!education || education.length === 0) {
missingFields.push('education');
return 0;
}
let totalScore = 0;
// Score each education entry
education.forEach((edu, index) => {
let entryScore = 0;
let requiredFieldsCount = 0;
// Check required fields
const requiredFields = [
'institution',
'degree',
'fieldOfStudy',
];
const optionalFields = [
'startDate',
'endDate',
'gpa',
'location',
];
// Required fields (75% of score)
requiredFields.forEach((field) => {
requiredFieldsCount++;
if (edu[field]) {
entryScore += 0.75 / requiredFields.length;
}
else {
missingFields.push(`education[${index}].${field}`);
}
});
// Optional fields (25% of score)
optionalFields.forEach((field) => {
if (edu[field]) {
entryScore += 0.25 / optionalFields.length;
}
});
totalScore += entryScore;
});
// Average score across all education entries
return Math.min(1, totalScore / Math.max(1, education.length));
}
/**
* Calculate score for experience section
*/
calculateExperienceScore(experience, missingFields) {
if (!experience || experience.length === 0) {
missingFields.push('experience');
return 0;
}
let totalScore = 0;
// Score each experience entry
experience.forEach((exp, index) => {
let entryScore = 0;
// Check required fields
const requiredFields = ['company', 'position'];
const dateFields = ['startDate', 'endDate'];
const otherFields = ['location'];
// Required fields (60% of score)
requiredFields.forEach((field) => {
if (exp[field]) {
entryScore += 0.6 / requiredFields.length;
}
else {
missingFields.push(`experience[${index}].${field}`);
}
});
// Date fields (30% of score)
dateFields.forEach((field) => {
if (exp[field]) {
entryScore += 0.3 / dateFields.length;
}
});
// Other fields (10% of score)
otherFields.forEach((field) => {
if (exp[field]) {
entryScore += 0.1 / otherFields.length;
}
});
// Check for description content
if (exp.description && exp.description.length > 0) {
// Bonus for having comprehensive descriptions
entryScore *= 1 + Math.min(0.2, exp.description.length * 0.02);
}
else {
missingFields.push(`experience[${index}].description`);
}
totalScore += entryScore;
});
// Average score across all experience entries, capped at 1.0
return Math.min(1, totalScore / Math.max(1, experience.length));
}
/**
* Calculate score for skills section
*/
calculateSkillsScore(skills, missingFields) {
// Ensure skills object exists
if (!skills) {
missingFields.push('skills');
return 0;
}
let score = 0;
const skillSections = [
'programmingLanguages',
'frameworks',
'tools',
'softSkills',
'other',
];
// Check if at least one skill section has content
let hasAnySkills = false;
let populatedSections = 0;
skillSections.forEach((section) => {
if (skills[section] && skills[section].length > 0) {
hasAnySkills = true;
populatedSections++;
// Bonus for more comprehensive skill lists
if (skills[section].length >= 5) {
score += 0.2;
}
else {
score += 0.1;
}
}
});
if (!hasAnySkills) {
missingFields.push('skills');
return 0;
}
// Base score from populated sections ratio
const baseScore = populatedSections / skillSections.length;
// Combine base score and bonuses, but cap at 1.0
return Math.min(1, baseScore + score * 0.5);
}
/**
* Calculate overall completeness of CV data
*/
calculateCompleteness(cvData, missingFields) {
const totalFields = this.countTotalFields(cvData);
const populatedFields = totalFields - missingFields.length;
return populatedFields / totalFields;
}
/**
* Count total number of fields in the CV data
*/
countTotalFields(cvData) {
// This is a simplified calculation
let count = 0;
// Personal info fields
count += 7;
// Education fields (per entry)
const eduFieldsPerEntry = 7;
count += (cvData.education || []).length * eduFieldsPerEntry;
// Experience fields (per entry)
const expFieldsPerEntry = 6;
count += (cvData.experience || []).length * expFieldsPerEntry;
// Skills sections
count += 5;
return count;
}
/**
* Calculate confidence in the extracted data
*/
calculateConfidence(cvData) {
// This is a heuristic calculation
let confidence = 0.8; // Start with a base confidence
// Reduce confidence for AI-extracted data (less verifiable)
if (cvData.metadata?.provider?.toLowerCase().includes('ai')) {
confidence *= 0.9;
}
// Check for data consistency
if (this.hasConsistentDates(cvData)) {
confidence *= 1.1;
}
// Check for reasonable data lengths
if (this.hasReasonableDataLengths(cvData)) {
confidence *= 1.1;
}
return Math.min(1, confidence);
}
/**
* Check if dates in the CV are consistent (e.g., no education ending before it starts)
*/
hasConsistentDates(cvData) {
// Check education dates
for (const edu of cvData.education || []) {
if (edu.startDate && edu.endDate) {
const start = new Date(edu.startDate);
const end = new Date(edu.endDate);
if (!isNaN(start.getTime()) && !isNaN(end.getTime()) && start > end) {
return false;
}
}
}
// Check experience dates
for (const exp of cvData.experience || []) {
if (exp.startDate && exp.endDate) {
const start = new Date(exp.startDate);
const end = new Date(exp.endDate);
if (!isNaN(start.getTime()) && !isNaN(end.getTime()) && start > end) {
return false;
}
}
}
return true;
}
/**
* Check if data field lengths are reasonable
*/
hasReasonableDataLengths(cvData) {
// Check personal info
if (cvData.personalInfo?.name && cvData.personalInfo.name.length > 100) {
return false;
}
if (cvData.personalInfo?.email && cvData.personalInfo.email.length > 100) {
return false;
}
// Check education
for (const edu of cvData.education || []) {
if (edu.institution && edu.institution.length > 200) {
return false;
}
if (edu.degree && edu.degree.length > 200) {
return false;
}
}
return true;
}
}
exports.AccuracyCalculator = AccuracyCalculator;