rosaenlg-filter
Version:
Filtering feature of RosaeNLG
92 lines • 3.7 kB
JavaScript
;
/**
* @license
* Copyright 2019 Ludan Stoecklé
* SPDX-License-Identifier: Apache-2.0
*/
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.LanguageFilterEnglish = void 0;
const LanguageFilter_1 = require("./LanguageFilter");
const english_a_an_1 = require("english-a-an");
const aan_json_1 = __importDefault(require("english-a-an-list/dist/aan.json"));
const better_title_case_1 = __importDefault(require("better-title-case"));
class LanguageFilterEnglish extends LanguageFilter_1.LanguageFilter {
constructor(languageCommon) {
super(languageCommon);
this.cleanSpacesPunctuationDoDefault = true;
}
protectRawNumbers(input) {
let res = input;
const regexNumber = new RegExp(`([^\\d])${this.constants.stdBeforeWithParenthesis}((\\d{1,3}(?:\\,\\d{3})*|(?:\\d+))(?:\\.\\d+)?)`, 'g');
res = res.replace(regexNumber, (_match, before1, before2, content) => {
return before1 + before2 + '<protect>' + content + '</protect>';
});
return res;
}
beforeProtect(input) {
let res = input;
res = this.aAnGeneric(res, true);
res = this.enPossessivesBeforeProtect(res);
return res;
}
justBeforeUnprotect(input) {
let res = input;
res = this.aAnGeneric(res, false);
res = this.enPossessives(res);
return res;
}
enPossessives(input) {
let res = input;
// the <b>earrings</b> 's size => The <b>earrings</b>' size
const regexSS = new RegExp("s([☞☜\\s]*)'s([^" + this.constants.tousCaracteresMinMajRe + '])', 'g');
res = res.replace(regexSS, (_match, between, after) => {
return `s${between}'${after}`;
});
return res;
}
enPossessivesBeforeProtect(input) {
let res = input;
const regexSS = new RegExp("(s\\s*§[\\s¤]*'s)([^" + this.constants.tousCaracteresMinMajRe + '])', 'g');
res = res.replace(regexSS, (_corresp, _first, second) => {
return `s§' ${second}`;
});
return res;
}
aAnGeneric(input, beforeProtect) {
let res = input;
const regexA = new RegExp(`([^${this.constants.tousCaracteresMinMajRe}])([aA])${this.constants.stdBetweenWithParenthesis}(${this.constants.getInBetween(beforeProtect)})([${this.constants.tousCaracteresMinMajRe}]*)`, 'g');
res = res.replace(regexA, (match, before, aA, between, beforeWord, word) => {
if (word != null && word != '') {
// can be null when orphan "a" at the very end of a text
const newAa = this.redoCapitalization(aA, (0, english_a_an_1.getAAn)(this.dictManager.getAdjsWordsData(), aan_json_1.default, word));
return `${before}${newAa}${between}${beforeWord}${word}`;
}
else {
return match;
}
});
return res;
}
redoCapitalization(initial, replacement) {
if (initial === 'A') {
return replacement.substring(0, 1).toUpperCase() + replacement.substring(1); // A or An...
}
else {
return replacement;
}
}
titlecase(input) {
return (0, better_title_case_1.default)(input);
}
cleanSpacesPunctuationCorrect(input) {
let res = input;
// ['the phone \'s', 'The phone\'s'],
res = res.replace(/\s*'/g, "'");
return res;
}
}
exports.LanguageFilterEnglish = LanguageFilterEnglish;
//# sourceMappingURL=LanguageFilterEnglish.js.map