rosaenlg-filter
Version:
Filtering feature of RosaeNLG
189 lines • 4.59 kB
JavaScript
;
/**
* @license
* Copyright 2019 Ludan Stoecklé
* SPDX-License-Identifier: Apache-2.0
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.changeRenderDebug = exports.unProtectHtmlEscapeSeq = exports.protectHtmlEscapeSeq = exports.replacePlaceholders = exports.replaceHtml = exports.inlineElts = exports.blockLevelElts = void 0;
const clean_1 = require("./clean");
exports.blockLevelElts = [
'address',
'article',
'aside',
'blockquote',
'canvas',
'dd',
'div',
'dl',
'dt',
'fieldset',
'figcaption',
'figure',
'footer',
'form',
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
'header',
'head',
'body',
'hr',
'li',
'main',
'nav',
'noscript',
'ol',
'p',
'pre',
'section',
'table',
'tfoot',
'ul',
'video',
// special ones
'li_block',
'ul_block',
'ol_block',
];
exports.inlineElts = [
'a',
'abbr',
'acronym',
'b',
'bdo',
'big',
'br',
'button',
'cite',
'code',
'dfn',
'em',
'i',
'img',
'input',
'kbd',
'label',
'map',
'object',
'output',
'q',
'samp',
'script',
'select',
'small',
'span',
'strong',
'sub',
'sup',
'textarea',
'time',
'tt',
'var',
// special ones
'li_inline',
'ul_inline',
'ol_inline',
];
function replaceHtml(input) {
const replacedHtml = { replaced: input, elts: [] };
const regexHtml = /<(\/?)([a-zA-Z1-9_-]+).*?>/g; // _ to support li_* ; numbers for h1 etc. tags
replacedHtml.replaced = replacedHtml.replaced.replace(regexHtml, function (match, begin, tag) {
if (tag === 'protect') {
// we don't replace it now - it is a pseudo tag used by RosaeNLG to protect from filtering
// and we don't push it in the matches
if (begin === '/') {
return '</protect>';
}
else {
return '<protect>';
}
}
// it is a match
replacedHtml.elts.push(match);
if (exports.blockLevelElts.indexOf(tag) > -1) {
if (begin === '/') {
return '☚';
}
else {
return '☛';
}
}
else {
// inlineElts or other
if (begin === '/') {
return '☜';
}
else {
return '☞';
}
}
});
return replacedHtml;
}
exports.replaceHtml = replaceHtml;
function cleanReplacedTag(tag) {
// <td¤ class="texteGenere" id="14"¤> issues
return (0, clean_1.specialSpacesToNormalSpaces)(tag.replace('_block', '').replace('_inline', ''));
/*
.replace('li_block', 'li')
.replace('li_inline', 'li')
.replace('ul_block', 'ul')
.replace('ul_inline', 'ul')
.replace('ol_block', 'ol')
.replace('ol_inline', 'ol');
*/
}
function replacePlaceholders(input, elts) {
const regexPlaceholder = /[☛☚☞☜]/g;
const res = input.replace(regexPlaceholder, function () {
const tag = elts.shift();
if (typeof tag === 'undefined') {
const err = new Error();
err.name = 'InternalError';
err.message = `There are not enough html tags`;
throw err;
}
return cleanReplacedTag(tag);
});
if (elts.length > 0) {
const err = new Error();
err.name = 'InternalError';
err.message = `There are left html tags: ${elts}`;
throw err;
}
return res;
}
exports.replacePlaceholders = replacePlaceholders;
const protectMap = {
AMPROTECT: '&',
LTPROTECT: '<',
GTPROTECT: '>',
};
function protectHtmlEscapeSeq(input) {
let res = input;
for (const key in protectMap) {
res = res.replace(protectMap[key], key);
}
return res;
}
exports.protectHtmlEscapeSeq = protectHtmlEscapeSeq;
function unProtectHtmlEscapeSeq(input) {
let res = input;
for (const key in protectMap) {
res = res.replace(key, protectMap[key]);
}
return res;
}
exports.unProtectHtmlEscapeSeq = unProtectHtmlEscapeSeq;
function changeRenderDebug(input) {
const regexRenderDebug = /<span class="rosaenlg-debug" id="(.*?)"><\/span>/g;
return input.replace(regexRenderDebug, function (_match, id) {
return `<span class="rosaenlg-debug">${id}</span>`;
});
}
exports.changeRenderDebug = changeRenderDebug;
//# sourceMappingURL=html.js.map