rosaenlg-filter
Version:
Filtering feature of RosaeNLG
104 lines • 4.53 kB
JavaScript
;
/**
* @license
* Copyright 2019 Ludan Stoecklé
* SPDX-License-Identifier: Apache-2.0
*/
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
__setModuleDefault(result, mod);
return result;
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.filter = exports.EATSPACE = exports.inlineHtmlElts = exports.blockLevelHtmlElts = void 0;
const punctuation = __importStar(require("./punctuation"));
const clean = __importStar(require("./clean"));
const titlecase_1 = require("./titlecase");
const protect = __importStar(require("./protect"));
const protectTag_1 = require("./protectTag");
const html = __importStar(require("./html"));
const languageFilterHelper_1 = require("./languageFilterHelper");
exports.blockLevelHtmlElts = html.blockLevelElts;
exports.inlineHtmlElts = html.inlineElts;
exports.EATSPACE = punctuation.EATSPACE;
function egg(input) {
let res = input;
const x = '\x41\x64\x64\x76\x65\x6E\x74\x61';
const regex = new RegExp(x, 'g');
res = res.replace(regex, x + ' 👍');
return res;
}
function filter(input, languageCommon, filterParams) {
const languageFilter = (0, languageFilterHelper_1.languageFilterFromLanguageCommon)(languageCommon);
let res = input;
// PROTECT HTML SEQ
res = html.protectHtmlEscapeSeq(res);
// PROTECT HTML TAGS
const replacedHtml = html.replaceHtml(res);
res = replacedHtml.replaced;
// ADD START to avoid the problem of the ^ in regexp
res = 'START. ' + res;
// must be done after protecting html tags
res = languageFilter.protectRawNumbers(res);
// transform <protect>...</protect> into §...§
// must be done before 'beforeProtect', as 'beforeProtect' relies on § knowledge
res = (0, protectTag_1.processProtectHtmlTags)(res);
res = languageFilter.beforeProtect(res);
// PROTECT § BLOCKS
const protectedMappings = protect.protectBlocks(res);
res = protectedMappings.protectedString;
res = clean.joinLines(res);
// do it early so that all the rest does not have to care for ¤
res = clean.specialSpacesToNormalSpaces(res);
res = punctuation.duplicatePunctuation(res, languageFilter);
res = languageFilter.contractions(res);
res = clean.cleanStruct(res, languageFilter.constants);
res = punctuation.parenthesis(res, languageFilter);
// must be before cleanSpacesPunctuation as it can introduce double spaces
res = punctuation.quotes(res);
res = punctuation.cleanSpacesPunctuation(res, languageFilter);
// must be before contractions otherwise difficult to find words
res = punctuation.addCaps(res, languageFilter);
res = egg(res);
res = (0, titlecase_1.titlecase)(res, languageFilter);
// must be done at the very end, as there is a recapitalization process
res = languageFilter.justBeforeUnprotect(res);
// UNPROTECT § BLOCKS
res = protect.unprotect(res, protectedMappings.mappings);
// REMOVE START - has to be before UNPROTECT HTML TAGS
const regexRemoveStart = /^START([☞\s\.]+)/;
res = res.replace(regexRemoveStart, (_match, before) => {
return `${before.replace(/[\s\.]*/g, '')}`;
});
// UNPROTECT HTML TAGS
res = html.replacePlaceholders(res, replacedHtml.elts);
if (filterParams.renderDebug) {
res = html.changeRenderDebug(res);
}
res = clean.cleanStructAfterUnprotect(res);
// UNPROTECT HTML SEQ
res = html.unProtectHtmlEscapeSeq(res);
// REMOVE spaces at the beginning and at the end
res = res.trim();
return res;
}
exports.filter = filter;
//# sourceMappingURL=index.js.map