UNPKG

@thecodingwhale/cv-processor

Version:

CV Processor to extract structured data from PDF resumes using TypeScript

48 lines (47 loc) 1.68 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.SectionExtractor = void 0; const patterns_1 = require("../utils/patterns"); /** * Class for extracting sections from CV text */ class SectionExtractor { /** * Split CV text into sections based on common section headers */ segmentCVIntoSections(text) { // Split text into lines for processing const lines = text.split('\n'); let currentSection = 'header'; const sections = { [currentSection]: [] }; for (const line of lines) { const trimmedLine = line.trim(); if (!trimmedLine) { continue; } // Check if this line is a section header let sectionFound = false; for (const [sectionName, pattern] of Object.entries(patterns_1.Patterns.sections)) { if (pattern.test(trimmedLine) && trimmedLine.length < 50) { // Section headers are usually short currentSection = sectionName; if (!sections[currentSection]) { sections[currentSection] = []; } sectionFound = true; break; } } if (!sectionFound) { sections[currentSection].push(trimmedLine); } } // Combine lines in each section const result = {}; for (const [section, lines] of Object.entries(sections)) { result[section] = lines.join('\n'); } return result; } } exports.SectionExtractor = SectionExtractor;