UNPKG

@thecodingwhale/cv-processor

Version:

CV Processor to extract structured data from PDF resumes using TypeScript

132 lines (128 loc) 5.66 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.AISkillsExtractor = void 0; const AIPatternExtractor_1 = require("../utils/AIPatternExtractor"); /** * Class for extracting and categorizing skills from CV text using AI */ class AISkillsExtractor { constructor(aiProvider) { this.tokenUsage = { promptTokens: 0, completionTokens: 0, totalTokens: 0, estimatedCost: 0, }; this.aiProvider = aiProvider; this.patternExtractor = new AIPatternExtractor_1.AIPatternExtractor(aiProvider); } /** * Extract skills from CV text using AI * @param skillsText The skills section text from the CV * @param industryContext Optional industry context for better extraction * @returns Structured Skills object */ async extractSkills(skillsText, industryContext = 'film and television') { if (!skillsText) { return {}; } try { // Define a schema for skills extraction that matches the Skills interface // but adds industry-specific categories const skillsSchema = { type: 'object', properties: { // Standard skill categories programmingLanguages: { type: 'array', items: { type: 'string' } }, frameworks: { type: 'array', items: { type: 'string' } }, tools: { type: 'array', items: { type: 'string' } }, softSkills: { type: 'array', items: { type: 'string' } }, // Entertainment industry skill categories actingStyles: { type: 'array', items: { type: 'string' } }, dialects: { type: 'array', items: { type: 'string' } }, languages: { type: 'array', items: { type: 'string' } }, performanceSkills: { type: 'array', items: { type: 'string' } }, movementSkills: { type: 'array', items: { type: 'string' } }, musicalAbilities: { type: 'array', items: { type: 'string' } }, danceStyles: { type: 'array', items: { type: 'string' } }, combatSkills: { type: 'array', items: { type: 'string' } }, specializedSkills: { type: 'array', items: { type: 'string' } }, // Catch-all category other: { type: 'array', items: { type: 'string' } }, }, }; // Create a prompt that's specific to the entertainment industry const instructions = ` You are a CV parser specializing in the ${industryContext} industry. Extract and categorize skills from the following skills section text. Focus on: 1. Acting styles and techniques (method, classical, improv, etc.) 2. Dialects and accents the person can perform 3. Languages they speak 4. Performance skills (stage combat, singing, etc.) 5. Movement skills (dance, physical theater, etc.) 6. Musical abilities (instruments, vocal range, etc.) 7. Dance styles they're proficient in 8. Combat/stunts capabilities 9. Any other specialized skills relevant to ${industryContext} Only include skills actually mentioned in the text, not every possible skill. Categorize each skill appropriately. If a skill doesn't fit in a specific category, put it in "other". Return only valid categories that have at least one skill. `; // Get skills using AI extraction const extractedSkills = await this.aiProvider.extractStructuredData(skillsText, skillsSchema, instructions); // Track token usage if (extractedSkills.tokenUsage) { this.addTokenUsage(extractedSkills.tokenUsage); } // Remove empty categories Object.keys(extractedSkills).forEach((key) => { if (key === 'tokenUsage') return; const typedKey = key; if (!extractedSkills[typedKey] || (Array.isArray(extractedSkills[typedKey]) && extractedSkills[typedKey].length === 0)) { delete extractedSkills[typedKey]; } }); // Return skills with token usage return { ...extractedSkills, tokenUsage: this.tokenUsage, }; } catch (error) { console.error('Error extracting skills with AI:', error); // Fallback to empty skills object with token usage info return { tokenUsage: this.tokenUsage }; } } /** * Add token usage from a response to the running total */ addTokenUsage(usage) { this.tokenUsage.promptTokens += usage.promptTokens || 0; this.tokenUsage.completionTokens += usage.completionTokens || 0; this.tokenUsage.totalTokens += usage.totalTokens || 0; this.tokenUsage.estimatedCost = (this.tokenUsage.estimatedCost || 0) + (usage.estimatedCost || 0); } /** * Get token usage statistics */ getTokenUsage() { return { ...this.tokenUsage }; } /** * Reset token usage statistics */ resetTokenUsage() { this.tokenUsage = { promptTokens: 0, completionTokens: 0, totalTokens: 0, estimatedCost: 0, }; } } exports.AISkillsExtractor = AISkillsExtractor;