@thecodingwhale/cv-processor
Version:
CV Processor to extract structured data from PDF resumes using TypeScript
48 lines (47 loc) • 1.68 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.SectionExtractor = void 0;
const patterns_1 = require("../utils/patterns");
/**
* Class for extracting sections from CV text
*/
class SectionExtractor {
/**
* Split CV text into sections based on common section headers
*/
segmentCVIntoSections(text) {
// Split text into lines for processing
const lines = text.split('\n');
let currentSection = 'header';
const sections = { [currentSection]: [] };
for (const line of lines) {
const trimmedLine = line.trim();
if (!trimmedLine) {
continue;
}
// Check if this line is a section header
let sectionFound = false;
for (const [sectionName, pattern] of Object.entries(patterns_1.Patterns.sections)) {
if (pattern.test(trimmedLine) && trimmedLine.length < 50) {
// Section headers are usually short
currentSection = sectionName;
if (!sections[currentSection]) {
sections[currentSection] = [];
}
sectionFound = true;
break;
}
}
if (!sectionFound) {
sections[currentSection].push(trimmedLine);
}
}
// Combine lines in each section
const result = {};
for (const [section, lines] of Object.entries(sections)) {
result[section] = lines.join('\n');
}
return result;
}
}
exports.SectionExtractor = SectionExtractor;