UNPKG

@thecodingwhale/cv-processor

Version:

CV Processor to extract structured data from PDF resumes using TypeScript

57 lines (56 loc) 1.76 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.NLPUtils = void 0; const compromise_1 = __importDefault(require("compromise")); /** * NLP utility functions using compromise.js */ class NLPUtils { /** * Extract person names from text */ static extractNames(text) { const doc = (0, compromise_1.default)(text); return doc.people().out('array'); } /** * Extract organization names from text */ static extractOrganizations(text) { const doc = (0, compromise_1.default)(text); return doc.organizations().out('array'); } /** * Extract places/locations from text */ static extractLocations(text) { const doc = (0, compromise_1.default)(text); return doc.places().out('array'); } /** * Extract dates from text */ static extractDates(text) { const doc = (0, compromise_1.default)(text); // Use match('#Date') instead of dates() which isn't available in the type definitions return doc.match('#Date').out('array'); } /** * Find potential sentence breaks for summary truncation */ static findSentenceBreak(text, maxLength) { if (text.length <= maxLength) return text.length; const truncated = text.substring(0, maxLength); const lastPeriod = truncated.lastIndexOf('.'); if (lastPeriod > maxLength * 0.6) { // Only truncate if it's a significant portion return lastPeriod + 1; } return maxLength; } } exports.NLPUtils = NLPUtils;