@thecodingwhale/cv-processor
Version:
CV Processor to extract structured data from PDF resumes using TypeScript
57 lines (56 loc) • 1.76 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.NLPUtils = void 0;
const compromise_1 = __importDefault(require("compromise"));
/**
* NLP utility functions using compromise.js
*/
class NLPUtils {
/**
* Extract person names from text
*/
static extractNames(text) {
const doc = (0, compromise_1.default)(text);
return doc.people().out('array');
}
/**
* Extract organization names from text
*/
static extractOrganizations(text) {
const doc = (0, compromise_1.default)(text);
return doc.organizations().out('array');
}
/**
* Extract places/locations from text
*/
static extractLocations(text) {
const doc = (0, compromise_1.default)(text);
return doc.places().out('array');
}
/**
* Extract dates from text
*/
static extractDates(text) {
const doc = (0, compromise_1.default)(text);
// Use match('#Date') instead of dates() which isn't available in the type definitions
return doc.match('#Date').out('array');
}
/**
* Find potential sentence breaks for summary truncation
*/
static findSentenceBreak(text, maxLength) {
if (text.length <= maxLength)
return text.length;
const truncated = text.substring(0, maxLength);
const lastPeriod = truncated.lastIndexOf('.');
if (lastPeriod > maxLength * 0.6) {
// Only truncate if it's a significant portion
return lastPeriod + 1;
}
return maxLength;
}
}
exports.NLPUtils = NLPUtils;