UNPKG

rosaenlg-filter

Version:

Filtering feature of RosaeNLG

104 lines 4.53 kB
"use strict"; /** * @license * Copyright 2019 Ludan Stoecklé * SPDX-License-Identifier: Apache-2.0 */ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k); __setModuleDefault(result, mod); return result; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.filter = exports.EATSPACE = exports.inlineHtmlElts = exports.blockLevelHtmlElts = void 0; const punctuation = __importStar(require("./punctuation")); const clean = __importStar(require("./clean")); const titlecase_1 = require("./titlecase"); const protect = __importStar(require("./protect")); const protectTag_1 = require("./protectTag"); const html = __importStar(require("./html")); const languageFilterHelper_1 = require("./languageFilterHelper"); exports.blockLevelHtmlElts = html.blockLevelElts; exports.inlineHtmlElts = html.inlineElts; exports.EATSPACE = punctuation.EATSPACE; function egg(input) { let res = input; const x = '\x41\x64\x64\x76\x65\x6E\x74\x61'; const regex = new RegExp(x, 'g'); res = res.replace(regex, x + ' 👍'); return res; } function filter(input, languageCommon, filterParams) { const languageFilter = (0, languageFilterHelper_1.languageFilterFromLanguageCommon)(languageCommon); let res = input; // PROTECT HTML SEQ res = html.protectHtmlEscapeSeq(res); // PROTECT HTML TAGS const replacedHtml = html.replaceHtml(res); res = replacedHtml.replaced; // ADD START to avoid the problem of the ^ in regexp res = 'START. ' + res; // must be done after protecting html tags res = languageFilter.protectRawNumbers(res); // transform <protect>...</protect> into §...§ // must be done before 'beforeProtect', as 'beforeProtect' relies on § knowledge res = (0, protectTag_1.processProtectHtmlTags)(res); res = languageFilter.beforeProtect(res); // PROTECT § BLOCKS const protectedMappings = protect.protectBlocks(res); res = protectedMappings.protectedString; res = clean.joinLines(res); // do it early so that all the rest does not have to care for ¤ res = clean.specialSpacesToNormalSpaces(res); res = punctuation.duplicatePunctuation(res, languageFilter); res = languageFilter.contractions(res); res = clean.cleanStruct(res, languageFilter.constants); res = punctuation.parenthesis(res, languageFilter); // must be before cleanSpacesPunctuation as it can introduce double spaces res = punctuation.quotes(res); res = punctuation.cleanSpacesPunctuation(res, languageFilter); // must be before contractions otherwise difficult to find words res = punctuation.addCaps(res, languageFilter); res = egg(res); res = (0, titlecase_1.titlecase)(res, languageFilter); // must be done at the very end, as there is a recapitalization process res = languageFilter.justBeforeUnprotect(res); // UNPROTECT § BLOCKS res = protect.unprotect(res, protectedMappings.mappings); // REMOVE START - has to be before UNPROTECT HTML TAGS const regexRemoveStart = /^START([☞\s\.]+)/; res = res.replace(regexRemoveStart, (_match, before) => { return `${before.replace(/[\s\.]*/g, '')}`; }); // UNPROTECT HTML TAGS res = html.replacePlaceholders(res, replacedHtml.elts); if (filterParams.renderDebug) { res = html.changeRenderDebug(res); } res = clean.cleanStructAfterUnprotect(res); // UNPROTECT HTML SEQ res = html.unProtectHtmlEscapeSeq(res); // REMOVE spaces at the beginning and at the end res = res.trim(); return res; } exports.filter = filter; //# sourceMappingURL=index.js.map