UNPKG

rosaenlg-filter

Version:

Filtering feature of RosaeNLG

131 lines 5.8 kB
"use strict"; /** * @license * Copyright 2019 Ludan Stoecklé * SPDX-License-Identifier: Apache-2.0 */ Object.defineProperty(exports, "__esModule", { value: true }); exports.LanguageFilterItalian = void 0; const LanguageFilter_1 = require("./LanguageFilter"); class LanguageFilterItalian extends LanguageFilter_1.LanguageFilter { constructor(languageCommon) { super(languageCommon); this.languageCommon = languageCommon; this.cleanSpacesPunctuationDoDefault = true; } // same as Spanish protectRawNumbers(input) { let res = input; const regexNumber = new RegExp(`([^\\d])${this.constants.stdBeforeWithParenthesis}((\\d{1,3}(?:\\.\\d{3})*|(?:\\d+))(?:\\,\\d+)?)`, 'g'); res = res.replace(regexNumber, (_match, before1, before2, content) => { return before1 + before2 + '<protect>' + content + '</protect>'; }); return res; } getRegex(part) { return new RegExp(`${this.constants.stdBeforeWithParenthesis}(${part})${this.constants.stdBetweenWithParenthesis}([${this.constants.tousCaracteresMinMajRe}]*)`, 'g'); } getElt(before, determiner, capRef, between, word) { return `${before}${this.getDetElt(determiner, capRef, between)}${word}`; } getDetElt(determiner, capRef, between) { const isUc = capRef.substring(0, 1).toLowerCase() != capRef.substring(0, 1); const newDet = isUc ? determiner.substring(0, 1).toUpperCase() + determiner.substring(1) : determiner; const newBetween = determiner.endsWith("'") ? between.replace(/ /g, '') : between.replace(/\s+/g, ' '); return `${newDet}${newBetween}`; } articlesContractions(input) { let res = input; // definite masc sing { res = res.replace(this.getRegex('[Ii]l|[Ll]o'), (_match, before, determiner, between, word) => { if (this.languageCommon.isConsonneImpure(word) || this.languageCommon.isIFollowedByVowel(word)) { return this.getElt(before, 'lo', determiner, between, word); } else if (this.languageCommon.startsWithVowel(word)) { return this.getElt(before, "l'", determiner, between, word); } else { return this.getElt(before, 'il', determiner, between, word); } }); } // definite masc plural { res = res.replace(this.getRegex('[Ii]|[Gg]li'), (_match, before, determiner, between, word) => { if (this.languageCommon.isConsonneImpure(word) || this.languageCommon.startsWithVowel(word) || word.toLowerCase() === 'dei') { return this.getElt(before, 'gli', determiner, between, word); } else { return this.getElt(before, 'i', determiner, between, word); } }); } // definite fem sing { res = res.replace(this.getRegex('[Ll]a'), (_match, before, determiner, between, word) => { if (this.languageCommon.startsWithVowel(word) && !this.languageCommon.isIFollowedByVowel(word)) { return this.getElt(before, "l'", determiner, between, word); } else { return this.getElt(before, 'la', determiner, between, word); } }); } // definite fem plural // nothing to do // indefinite masc { res = res.replace(this.getRegex('[Uu]n|[Uu]no'), (_match, before, determiner, between, word) => { if (this.languageCommon.isConsonneImpure(word) || this.languageCommon.isIFollowedByVowel(word)) { return this.getElt(before, 'uno', determiner, between, word); } else { return this.getElt(before, 'un', determiner, between, word); } }); } // indefinite fem { res = res.replace(this.getRegex('[Uu]na'), (_match, before, determiner, between, word) => { if (this.languageCommon.startsWithVowel(word) && !this.languageCommon.isIFollowedByVowel(word)) { return this.getElt(before, "un'", determiner, between, word); } else { return this.getElt(before, 'una', determiner, between, word); } }); } return res; } twoWordsContractions(input) { let res = input; // https://www.italien-facile.com/exercices/exercice-italien-2/exercice-italien-78139.php const seconds = ['il', 'lo', "l'", 'i', 'gli', 'la', 'le']; const contrList = { a: ['al', 'allo', "all'", 'ai', 'agli', 'alla', 'alle'], di: ['del', 'dello', "dell'", 'dei', 'degli', 'della', 'delle'], da: ['dal', 'dallo', "dall'", 'dai', 'dagli', 'dalla', 'dalle'], in: ['nel', 'nello', "nell'", 'nei', 'negli', 'nella', 'nelle'], su: ['sul', 'sullo', "sull'", 'sui', 'sugli', 'sulla', 'sulle'], }; const preps = Object.keys(contrList); for (const prep of preps) { const vals = contrList[prep]; for (let j = 0; j < seconds.length; j++) { res = this.contract2elts(prep, seconds[j], vals[j], res); } } return res; } contractions(input) { let res = input; res = this.articlesContractions(res); res = this.twoWordsContractions(res); return res; } } exports.LanguageFilterItalian = LanguageFilterItalian; //# sourceMappingURL=LanguageFilterItalian.js.map