UNPKG

name-fixer

Version:

A fully typed library for fixing capitalization of people's names. Based on tamtamchik PHP library.

github.com/daniseijo/name-fixer

daniseijo/name-fixer

656 lines (654 loc) • 13.3 kB

JavaScript

"use strict"; var __defProp = Object.defineProperty; var __getOwnPropDesc = Object.getOwnPropertyDescriptor; var __getOwnPropNames = Object.getOwnPropertyNames; var __hasOwnProp = Object.prototype.hasOwnProperty; var __export = (target, all) => { for (var name in all) __defProp(target, name, { get: all[name], enumerable: true }); }; var __copyProps = (to, from, except, desc) => { if (from && typeof from === "object" || typeof from === "function") { for (let key of __getOwnPropNames(from)) if (!__hasOwnProp.call(to, key) && key !== except) __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable }); } return to; }; var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod); // src/index.ts var src_exports = {}; __export(src_exports, { default: () => src_default, excludePostNominals: () => excludePostNominals, nameFixer: () => nameFixer, setOptions: () => setOptions }); module.exports = __toCommonJS(src_exports); var Environment = class { // Irish exceptions. EXCEPTIONS = [ [/\bMacEdo/, "Macedo"], [/\bMacEvicius/, "Macevicius"], [/\bMacHado/, "Machado"], [/\bMacHar/, "Machar"], [/\bMacHin/, "Machin"], [/\bMacHlin/, "Machlin"], [/\bMacIas/, "Macias"], [/\bMacIulis/, "Maciulis"], [/\bMacKie/, "Mackie"], [/\bMacKle/, "Mackle"], [/\bMacKlin/, "Macklin"], [/\bMacKmin/, "Mackmin"], [/\bMacQuarie/, "Macquarie"], [/\bMacOmber/, "Macomber"], [/\bMacIn/, "Macin"], [/\bMacKintosh/, "Mackintosh"], [/\bMacKen/, "Macken"], [/\bMacHen/, "Machen"], [/\bMacisaac/, "MacIsaac"], [/\bMacHiel/, "Machiel"], [/\bMacIol/, "Maciol"], [/\bMacKell/, "Mackell"], [/\bMacKlem/, "Macklem"], [/\bMacKrell/, "Mackrell"], [/\bMacLin/, "Maclin"], [/\bMacKey/, "Mackey"], [/\bMacKley/, "Mackley"], [/\bMacHell/, "Machell"], [/\bMacHon/, "Machon"] ]; // General replacements. REPLACEMENTS = [ [/\b(Al)(\s+\w)/, "al$2"], // al Arabic or forename Al. [/\b(Ap)\b/, "ap"], // ap Welsh. [/\b(Bin|Binti|Binte)\b/, "bin"], // bin, binti, binte Arabic. [/\bDell([ae])\b/, "dell$1"], // della and delle Italian. [/\bD([aeiou])\b/, "d$1"], // da, de, di Italian; du French; do Brasil. [/\bD([ao]s)\b/, "d$1"], // das, dos Brasileiros. [/\bDe([lrn])\b/, "de$1"], // del Italian; der/den Dutch/Flemish. [/\bL([eo])\b/, "l$1"], // lo Italian; le French. [/\b(El)\b/, "el"], // el Greek or El Spanish. [/\b(La)\b/, "la"], // la French or La Spanish. [/\b(Te)([rn])\b/, "te$2"], // ten, ter Dutch/Flemish. [/\b(Van)(\s+\w)/, "van$2"], // van German or forename Van. [/\b(Von)\b/, "von"] // von Dutch/Flemish. ]; HEBREW = [ [/\b(Ben)(\s+\w)/, "ben$2"], // ben Hebrew or forename Ben. [/\b(Bat)(\s+\w)/, "bat$2"] // bat Hebrew or forename Bat. ]; // Spanish conjunctions. CONJUNCTIONS = ["Y", "E", "I"]; // Roman letters regexp. ROMAN_REGEX = /\b((?:[Xx]{1,3}|[Xx][Ll]|[Ll][Xx]{0,3})?(?:[Ii]{1,3}|[Ii][VvXx]|[Vv][Ii]{0,3})?)\b/g; // Post nominal values. // prettier-ignore POST_NOMINALS = [ "ACILEx", "ACSM", "ADC", "AEPC", "AFC", "AFM", "AICSM", "AKC", "AM", "ARBRIBA", "ARCS", "ARRC", "ARSM", "AUH", "AUS", "BA", "BArch", "BCh", "BChir", "BCL", "BDS", "BEd", "BEM", "BEng", "BM", "BS", "BSc", "BSW", "BVM&S", "BVScBVetMed", "CB", "CBE", "CEng", "CertHE", "CGC", "CGM", "CH", "CIE", "CMarEngCMarSci", "CMarTech", "CMG", "CMILT", "CML", "CPhT", "CPLCTP", "CPM", "CQSW", "CSciTeach", "CSI", "CTL", "CVO", "DBE", "DBEnv", "DC", "DCB", "DCM", "DCMG", "DConstMgt", "DCVO", "DD", "DEM", "DFC", "DFM", "DIC", "Dip", "DipHE", "DipLP", "DipSW", "DL", "DLitt", "DLP", "DPhil", "DProf", "DPT", "DREst", "DSC", "DSM", "DSO", "DSocSci", "ED", "EdD", "EJLog", "EMLog", "EN", "EngD", "EngTech", "ERD", "ESLog", "FADO", "FAWM", "FBDOFCOptom", "FCEM", "FCILEx", "FCILT", "FCSP.", "FdAFdSc", "FdEng", "FFHOM", "FFPM", "FRCAFFPMRCA", "FRCGP", "FRCOG", "FRCP", "FRCPsych", "FRCS", "FRCVS", "FSCR.", "GBE", "GC", "GCB", "GCIE", "GCILEx", "GCMG", "GCSI", "GCVO", "GM", "HNC", "HNCert", "HND", "HNDip", "ICTTech", "IDSM", "IEng", "IMarEng", "IOMCPM", "ISO", "J", "JP", "JrLog", "KBE", "KC", "KCB", "KCIE", "KCMG", "KCSI", "KCVO", "KG", "KP", "KT", "LFHOM", "LG", "LJ", "LLB", "LLD", "LLM", "Log", "LPE", /* 'LT', - excluded, see initial names */ "LVO", "MA", "MAcc", "MAnth", "MArch", "MarEngTech", "MB", "MBA", "MBChB", "MBE", "MBEIOM", "MBiochem", "MC", "MCEM", "MCGI", "MCh.", "MChem", "MChiro", "MClinRes", "MComp", "MCOptom", "MCSM", "MCSP", "MD", "MEarthSc", "MEng", "MEnt", "MEP", "MFHOM", "MFin", "MFPM", "MGeol", "MILT", "MJur", "MLA", "MLitt", "MM", "MMath", "MMathStat", "MMORSE", "MMus", "MOst", "MP", "MPAMEd", "MPharm", "MPhil", "MPhys", "MRCGP", "MRCOG", "MRCP", "MRCPath", "MRCPCHFRCPCH", "MRCPsych", "MRCS", "MRCVS", "MRes", /* 'MS', - excluded, see initial names */ "MSc", "MScChiro", "MSci", "MSCR", "MSM", "MSocSc", "MSP", "MSt", "MSW", "MSYP", "MVO", "NPQH", "OBE", "OBI", "OM", "OND", "PgC", "PGCAP", "PGCE", "PgCert", "PGCHE", "PgCLTHE", "PgD", "PGDE", "PgDip", "PhD", "PLog", "PLS", "QAM", "QC", "QFSM", "QGM", "QHC", "QHDS", "QHNS", "QHP", "QHS", "QPM", "QS", "QTSCSci", "RD", "RFHN", "RGN", "RHV", "RIAI", "RIAS", "RM", "RMN", "RN", "RN1RNA", "RN2", "RN3", "RN4", "RN5", "RN6", "RN7", "RN8", "RN9", "RNC", "RNLD", "RNMH", "ROH", "RRC", "RSAW", "RSci", "RSciTech", "RSCN", "RSN", "RVM", "RVN", "SCHM", "SCJ", "SCLD", "SEN", "SGM", "SL", "SPANSPMH", "SPCC", "SPCN", "SPDN", "SPHP", "SPLD", "SrLog", "SRN", "SROT", "TD", "UD", "V100", "V200", "V300", "VC", "VD", "VetMB", "VN", "VRD" ]; LOWER_CASE_WORDS = ["The", "Of", "And"]; // Excluded post-nominals postNominalsExcluded = []; // Most two-letter words with no vowels should be kept in all caps as initials INITIAL_NAME_REGEX = /\b(Aj|[bcdfghjklmnpqrstvwxyzBCDFGHJKLMNPQRSTVWXYZ]{2})\s/; INITIAL_NAME_EXCEPTIONS = [ "Mr", "Ms", // Replaces Member of the Senedd post nominal. "Dr", "St", "Jr", "Sr", "Lt" // Replaces Lady of the Order of the Thistle post nominal. ]; options = { lazy: true, irish: true, spanish: true, roman: true, hebrew: true, postNominal: true }; bckOptions = { ...this.options }; constructor(options) { this.setOptions(options); } /** * Global options setter. * * @param options */ setOptions(options) { this.options = { ...this.options, ...options }; } backupOptions() { this.bckOptions = { ...this.options }; } restoreOptions() { this.options = { ...this.bckOptions }; } /** * Global post-nominals exclusions setter. * * @param values * @returns boolean */ excludePostNominals(values) { this.postNominalsExcluded = this.postNominalsExcluded.concat(values); } /** * Main function for NameFixer. * * @param name * @param options * * @returns string */ nameFixer(name, options = {}) { if (name === "") return name; this.backupOptions(); this.setOptions(options); if (this.options.lazy && this.skipMixed(name)) return name; name = this.capitalizeFirstLetters(name); name = this.lowercaseFinalS(name); name = this.updateIrish(name); for (const [pattern, replacement] of this.getReplacements()) { name = name.replace(pattern, replacement); } name = this.correctInitialNames(name); name = this.correctLowerCaseWords(name); name = this.processOptions(name); this.restoreOptions(); return name; } processOptions(name) { if (this.options.roman) { name = this.updateRoman(name); } if (this.options.spanish) { name = this.updateSpanish(name); } if (this.options.postNominal) { name = this.fixPostNominal(name); } return name; } /** * Capitalize first letters. * * @param name * * @returns string */ capitalizeFirstLetters(name) { name = name.toLowerCase(); return name.replace(/([\s,.:;"'(-]|^)([^\s,.:;"'(-])/g, (...matches) => matches[1] + matches[2].toUpperCase()); } lowercaseFinalS(name) { return name.replace( /'([^\s,.:;"'(-])([\s,.:;"'(-]|$)/g, (...matches) => "'" + matches[1].toLowerCase() + matches[2] ); } /** * Define required replacements. * * @return array */ getReplacements() { let replacements = this.REPLACEMENTS; if (this.options.hebrew) { replacements = replacements.concat(this.HEBREW); } return replacements; } /** * Update for Irish names. * * @param name * * @returns string */ updateIrish(name) { if (!this.options.irish) return name; if (/.*?\bMac[A-Za-z]{2,}[^aciozj]\b/.test(name) || /.*?\bMc/.test(name)) { name = this.updateMac(name); } return name.replace(/\bMacmurdo/, "MacMurdo").replace(/\bMacisaac/, "MacIsaac"); } /** * Updates irish Mac & Mc. * * @param name * * @returns string */ updateMac(name) { name = name.replace( /\b(Ma?c)([A-Za-z]+)/, (...matches) => matches[1] + matches[2].charAt(0).toUpperCase() + matches[2].substring(1) ); for (const [pattern, replacement] of this.EXCEPTIONS) { name = name.replace(pattern, replacement); } return name; } /** * Fix roman numeral names. * * @param name * * @returns string */ updateRoman(name) { return name.replace(this.ROMAN_REGEX, (...matches) => { return matches[0].toUpperCase(); }); } /** * Fix Spanish rules. * * @param name * * @returns string */ updateSpanish(name) { for (const conjunction of this.CONJUNCTIONS) { name = name.replace( new RegExp(`([\\s,.:;"'-(]|^)${conjunction}([\\s,:;"'-(]|$)`, "g"), (...matches) => matches[1] + conjunction.toLowerCase() + matches[2] ); } return name; } /** * Correct capitalization of initial names like JJ and TJ. * * @param name * * @return string */ correctInitialNames(name) { return name.replace(this.INITIAL_NAME_REGEX, (...matches) => { const match = matches[0]; if (this.INITIAL_NAME_EXCEPTIONS.includes(matches[1])) { return match; } return match.toUpperCase(); }); } /** * Correct lower-case words of titles. * * @param name * * @return string */ correctLowerCaseWords(name) { for (const lowerCase of this.LOWER_CASE_WORDS) { name = name.replace( new RegExp(`([\\s,.:;"'-(]|^)${lowerCase}([\\s,.:;"'-(]|$)`, "g"), (...matches) => matches[1] + lowerCase.toLowerCase() + matches[2] ); } return name; } /** * Fix post-nominal letter cases. * * @param name * @returns string */ fixPostNominal(name) { const postNominals = this.POST_NOMINALS.filter((x) => !this.postNominalsExcluded.includes(x)); for (const postNominal of postNominals) { name = name.replace( new RegExp(`([\\s,.:;"'-(]|^)${this.capitalizeFirstLetters(postNominal)}([\\s,.:;"'-(]|$)`, "g"), (...matches) => matches[1] + postNominal + matches[2] ); } return name; } /** * Skip if string is mixed case. * * @param name * * @returns bool */ skipMixed(name) { const firstLetterLower = name[0] === name[0].toLowerCase(); const allLowerOrUpper = name.toLowerCase() === name || name.toUpperCase() === name; return !(firstLetterLower || allLowerOrUpper); } }; var defaultEnvironment = new Environment(); var setOptions = (options) => defaultEnvironment.setOptions(options); var excludePostNominals = (values) => defaultEnvironment.excludePostNominals(values); var nameFixer = (name, options) => defaultEnvironment.nameFixer(name, options); var src_default = Environment; // Annotate the CommonJS export names for ESM import in node: 0 && (module.exports = { excludePostNominals, nameFixer, setOptions });