rosaenlg-filter
Version:
Filtering feature of RosaeNLG
51 lines • 2.06 kB
JavaScript
;
/**
* @license
* Copyright 2019 Ludan Stoecklé
* SPDX-License-Identifier: Apache-2.0
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.LanguageFilterSpanish = void 0;
const LanguageFilter_1 = require("./LanguageFilter");
class LanguageFilterSpanish extends LanguageFilter_1.LanguageFilter {
constructor(languageCommon) {
super(languageCommon);
this.cleanSpacesPunctuationDoDefault = true;
}
protectRawNumbers(input) {
let res = input;
const regexNumber = new RegExp(`([^\\d])${this.constants.stdBeforeWithParenthesis}((\\d{1,3}(?:\\.\\d{3})*|(?:\\d+))(?:\\,\\d+)?)`, 'g');
res = res.replace(regexNumber, (_match, before1, before2, content) => {
return before1 + before2 + '<protect>' + content + '</protect>';
});
return res;
}
contractions(input) {
let res = input;
// de + el => del
res = this.contract2elts('de', 'el', 'del', res);
// a + el => al
res = this.contract2elts('a', 'el', 'al', res);
return res;
}
cleanSpacesPunctuation(input) {
let res = input;
const regexSpanishPunct = new RegExp(`([¡¿])(${this.constants.spaceOrNonBlockingClass}*)`, 'g');
res = res.replace(regexSpanishPunct, (_match, punct, after) => {
return `${punct}${after.replace(/\s/g, '')}`;
});
return res;
}
addCapsSpecific(input) {
let res = input;
const triggerCapsNoSpace = '[¡¿]';
const regexCapsAfterDot = new RegExp(`(${triggerCapsNoSpace})(${this.constants.spaceOrNonBlockingClass}*)([${this.constants.tousCaracteresMinMajRe}])`, 'g');
res = res.replace(regexCapsAfterDot, (_match, punct, before, firstWord) => {
// same as above but without added space
return `${punct}${before.replace(/\s/g, '')}${firstWord.toUpperCase()}`;
});
return res;
}
}
exports.LanguageFilterSpanish = LanguageFilterSpanish;
//# sourceMappingURL=LanguageFilterSpanish.js.map