UNPKG

rosaenlg-filter

Version:

Filtering feature of RosaeNLG

92 lines 3.7 kB
"use strict"; /** * @license * Copyright 2019 Ludan Stoecklé * SPDX-License-Identifier: Apache-2.0 */ var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.LanguageFilterEnglish = void 0; const LanguageFilter_1 = require("./LanguageFilter"); const english_a_an_1 = require("english-a-an"); const aan_json_1 = __importDefault(require("english-a-an-list/dist/aan.json")); const better_title_case_1 = __importDefault(require("better-title-case")); class LanguageFilterEnglish extends LanguageFilter_1.LanguageFilter { constructor(languageCommon) { super(languageCommon); this.cleanSpacesPunctuationDoDefault = true; } protectRawNumbers(input) { let res = input; const regexNumber = new RegExp(`([^\\d])${this.constants.stdBeforeWithParenthesis}((\\d{1,3}(?:\\,\\d{3})*|(?:\\d+))(?:\\.\\d+)?)`, 'g'); res = res.replace(regexNumber, (_match, before1, before2, content) => { return before1 + before2 + '<protect>' + content + '</protect>'; }); return res; } beforeProtect(input) { let res = input; res = this.aAnGeneric(res, true); res = this.enPossessivesBeforeProtect(res); return res; } justBeforeUnprotect(input) { let res = input; res = this.aAnGeneric(res, false); res = this.enPossessives(res); return res; } enPossessives(input) { let res = input; // the <b>earrings</b> 's size => The <b>earrings</b>' size const regexSS = new RegExp("s([☞☜\\s]*)'s([^" + this.constants.tousCaracteresMinMajRe + '])', 'g'); res = res.replace(regexSS, (_match, between, after) => { return `s${between}'${after}`; }); return res; } enPossessivesBeforeProtect(input) { let res = input; const regexSS = new RegExp("(s\\s*§[\\s¤]*'s)([^" + this.constants.tousCaracteresMinMajRe + '])', 'g'); res = res.replace(regexSS, (_corresp, _first, second) => { return `s§' ${second}`; }); return res; } aAnGeneric(input, beforeProtect) { let res = input; const regexA = new RegExp(`([^${this.constants.tousCaracteresMinMajRe}])([aA])${this.constants.stdBetweenWithParenthesis}(${this.constants.getInBetween(beforeProtect)})([${this.constants.tousCaracteresMinMajRe}]*)`, 'g'); res = res.replace(regexA, (match, before, aA, between, beforeWord, word) => { if (word != null && word != '') { // can be null when orphan "a" at the very end of a text const newAa = this.redoCapitalization(aA, (0, english_a_an_1.getAAn)(this.dictManager.getAdjsWordsData(), aan_json_1.default, word)); return `${before}${newAa}${between}${beforeWord}${word}`; } else { return match; } }); return res; } redoCapitalization(initial, replacement) { if (initial === 'A') { return replacement.substring(0, 1).toUpperCase() + replacement.substring(1); // A or An... } else { return replacement; } } titlecase(input) { return (0, better_title_case_1.default)(input); } cleanSpacesPunctuationCorrect(input) { let res = input; // ['the phone \'s', 'The phone\'s'], res = res.replace(/\s*'/g, "'"); return res; } } exports.LanguageFilterEnglish = LanguageFilterEnglish; //# sourceMappingURL=LanguageFilterEnglish.js.map