@adobe/htlengine
Version:
Javascript Based HTL (Sightly) parser
292 lines (267 loc) • 9.85 kB
JavaScript
/*
* Copyright 2018 Adobe. All rights reserved.
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. You may obtain a copy
* of the License at http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
* OF ANY KIND, either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/
;
const esapiEncoder = require('node-esapi').encoder();
const XRegExp = require('xregexp');
const createDOMPurify = require('dompurify');
const { JSDOM } = require('jsdom');
const DOMPurify = createDOMPurify(new JSDOM('').window);
const RESERVED_WORDS = {
break: true,
case: true,
catch: true,
continue: true,
debugger: true,
default: true,
delete: true,
do: true,
else: true,
finally: true,
for: true,
function: true,
if: true,
in: true,
instanceof: true,
typeof: true,
new: true,
return: true,
void: true,
switch: true,
while: true,
this: true,
with: true,
throw: true,
try: true,
class: true,
enum: true,
extends: true,
super: true,
const: true,
export: true,
import: true,
implements: true,
let: true,
private: true,
public: true,
interface: true,
package: true,
protected: true,
static: true,
};
/* Building the URL validation RegEx */
const ALPHA = '(?:\\p{L}\\p{M}*)';
const HEX_DIGIT = '[A-Fa-f0-9]';
const PCT_ENCODED = `%${HEX_DIGIT}${HEX_DIGIT}`;
const UNRESERVED_CHARACTERS = `${ALPHA}|[\\p{N}\\-._~]`;
const SUB_DELIMS = '[!$&\'()*+,;=]';
const REG_NAME = `(?:(?:${UNRESERVED_CHARACTERS})*|(?:${PCT_ENCODED})*|(?:${SUB_DELIMS})*)`;
const PCHAR = `${UNRESERVED_CHARACTERS}|${PCT_ENCODED}|${SUB_DELIMS}|:|@`;
const DEC_OCTET = '(?:\\p{N}|[\\x31-\\x39]\\p{N}|1\\p{N}{2}|2[\\x30-\\x34]\\p{N}|25[\\x30-\\x35])';
const H16 = `${HEX_DIGIT}{1,4}`;
const IPV4_ADDRESS = `${DEC_OCTET}\\.${DEC_OCTET}\\.${DEC_OCTET}\\.${DEC_OCTET}`;
const LS32 = `(?:${H16}:${H16})|${IPV4_ADDRESS}`;
const IPV6_ADDRESS = `(?:(?:(?:${H16}:){6}(?:${LS32}))|`
+ `(?:::(?:${H16}:){5}(?:${LS32}))|`
+ `(?:(?:${H16}){0,1}::(?:${H16}:){4}(?:${LS32}))|`
+ `(?:(?:(?:${H16}:){0,1}${H16})?::(?:${H16}:){3}(?:${LS32}))|`
+ `(?:(?:(?:${H16}:){0,2}${H16})?::(?:${H16}:){2}(?:${LS32}))|`
+ `(?:(?:(?:${H16}:){0,3}${H16})?::(?:${H16}:){1}(?:${LS32}))|`
+ `(?:(?:(?:${H16}:){0,4}${H16})?::(?:${LS32}))|`
+ `(?:(?:(?:${H16}:){0,5}${H16})?::(?:${H16}))|`
+ `(?:(?:(?:${H16}:){0,6}${H16})?::))`;
const IP_LITERAL = `\\[${IPV6_ADDRESS}]`;
const PORT = '[0-9]+';
const HOST = `(?:${IP_LITERAL}|${IPV4_ADDRESS}|${REG_NAME})`;
const USER_INFO = `(?:(?:${UNRESERVED_CHARACTERS})|(?:${PCT_ENCODED})|(?:${SUB_DELIMS}))*`;
const AUTHORITY = `(?:${USER_INFO}@)?${HOST}(?::${PORT})?`;
const SCHEME_PATTERN = '(?!\\s*javascript)\\p{L}[\\p{L}\\p{N}+.\\-]*';
const FRAGMENT = `(?:${PCHAR}|\\/|\\?)*`;
const QUERY = `(?:${PCHAR}|\\/|\\?)*`;
const SEGMENT_NZ = `(?:${PCHAR})+`;
const SEGMENT_NZ_NC = `(?:${UNRESERVED_CHARACTERS}|${PCT_ENCODED}|${SUB_DELIMS}|@)+`;
const PATH_ABEMPTY = `(?:\\/|(\\/${SEGMENT_NZ}\\/?)*)`;
const PATH_ABSOLUTE = `\\/(?:${SEGMENT_NZ}${PATH_ABEMPTY})?`;
const PATH_NOSCHEME = `${SEGMENT_NZ_NC}(?:\\/|(\\/${SEGMENT_NZ})*)`;
const PATH_ROOTLESS = `${SEGMENT_NZ}(?:\\/|(\\/${SEGMENT_NZ})*)`;
const PATH_EMPTY = '(?:^$)';
const RELATIVE_PART = `(?:(?:\\/\\/${AUTHORITY}${PATH_ABEMPTY})|(?:${PATH_ABSOLUTE})|(?:${PATH_ROOTLESS}))`;
const HIER_PART = `(?:(?:\\/\\/${AUTHORITY}${PATH_ABEMPTY})|(?:${PATH_ABSOLUTE})|(?:${PATH_NOSCHEME})|${PATH_EMPTY})`;
const RELATIVE_REF = `^(?!\\s*javascript(?::|:))${RELATIVE_PART}?(?:\\?${QUERY})?(?:#${FRAGMENT})?$`;
const URI = `^${SCHEME_PATTERN}:${HIER_PART}(?:\\?${QUERY})?(?:#${FRAGMENT})?$`;
function escapeJSString(input) {
return esapiEncoder.encodeForJavaScript(input);
}
const IDENTIFIER_PATTERN = /^[a-zA-Z$_][a-zA-Z0-9_$]*$/;
const NUMBER_PATTERN = /^-?\d+$/; // todo: expand for decimal point numbers
function isStringLiteral(input) {
if (input.length < 2) {
return false;
}
const first = input[0];
return (first === '"' || first === "'") && input[input.length - 1] === first;
}
function escapeJSToken(input) {
const trimmedInput = input.trim();
if (isStringLiteral(trimmedInput)) {
const quoteChar = trimmedInput[0];
const strContent = trimmedInput.substring(1, trimmedInput.length - 1);
const escaped = (strContent.length > 0) ? escapeJSString(strContent) : '';
return quoteChar + escaped + quoteChar;
}
if (NUMBER_PATTERN.test(trimmedInput)) {
return trimmedInput;
}
if (IDENTIFIER_PATTERN.test(trimmedInput) && !RESERVED_WORDS[trimmedInput]) {
return trimmedInput;
}
return undefined;
}
function sanitizeURL(url) {
try {
if (XRegExp(RELATIVE_REF).test(url) || XRegExp(URI).test(url)) {
return url;
}
} catch (e) {
// ignore
}
return '';
}
/* eslint-disable no-underscore-dangle */
const _NON_ASCII = '\\x00\\x08\\x0B\\x0C\\x0E-\\x1F';
/** http://www.w3.org/TR/css-syntax-3/#number-token-diagram */
const _NUMBER = '[+-]?[\\d]*[\\.]?[\\d]*(?:[e][+-]?\\d+)?';
/** http://www.w3.org/TR/css-syntax-3/#hex-digit-diagram */
const _HEX_DIGITS = '#[0-9a-f]*';
/** http://www.w3.org/TR/css-syntax-3/#ident-token-diagram */
const _IDENTIFIER = `-?[a-z_${_NON_ASCII}][\\w_\\-${_NON_ASCII}]*`;
/** http://www.w3.org/TR/css-syntax-3/#string-token-diagram */
const _STRING = "\"(?:[^\"^\\\\^\\n]|(?:\\\\\"))*\"|'(?:[^'^\\\\^\\n]|(?:\\\\'))*'";
/** http://www.w3.org/TR/css-syntax-3/#dimension-token-diagram */
const _DIMENSION = _NUMBER + _IDENTIFIER;
/** http://www.w3.org/TR/css-syntax-3/#percentage-token-diagram */
const _PERCENT = `${_NUMBER}%`;
/** http://www.w3.org/TR/css-syntax-3/#function-token-diagram */
const _FUNCTION = `${_IDENTIFIER}\\((?:(?:${_NUMBER})|(?:${_IDENTIFIER})|(?:[\\s]*)|(?:,))*\\)`;
/** http://www.w3.org/TR/css-syntax-3/#url-unquoted-diagram */
const _URL_UNQUOTED = `[^"^'^\\(^\\)^[${_NON_ASCII}]]*`;
/** http://www.w3.org/TR/css-syntax-3/#url-token-diagram */
const _URL = `url\\((?:(?:${_URL_UNQUOTED})|(?:${_STRING}))\\)`;
/** composite regular expression for style token validation */
const _CSS_TOKEN = `(?:${_NUMBER})`
+ `|(?:${_DIMENSION})`
+ `|(?:${_PERCENT})`
+ `|(?:${_HEX_DIGITS})`
+ `|(?:${_IDENTIFIER})`
+ `|(?:${_STRING})`
+ `|(?:${_FUNCTION})`
+ `|(?:${_URL})`;
/* eslint-enable no-underscore-dangle */
const CSS_TOKEN = new RegExp(_CSS_TOKEN, 'i');
// const URL = new RegExp(_URL);
// const URL_UNQUOTED = new RegExp(_URL_UNQUOTED);
// const FUNCTION = new RegExp(_FUNCTION);
// const PERCENT = new RegExp(_PERCENT);
// const DIMENSION = new RegExp(_DIMENSION);
// const STRING = new RegExp(_STRING);
// const IDENTIFIER = new RegExp(_IDENTIFIER);
// const HEX_DIGITS = new RegExp(_HEX_DIGITS);
// const NUMBER = new RegExp(_NUMBER);
// const NON_ASCII = new RegExp(_NON_ASCII);
/**
* Provides filtering against cross-site scripting attacks
*/
module.exports = {
/**
* Filter potentially user-contributed HTML by removing malicious tags and
* attributes
* @param {String} input - the original input
* @returns {String}
*/
filterHTML(input) {
return DOMPurify.sanitize(input, { USE_PROFILES: { html: true } });
},
/**
* Encodes an input string for HTML element content.
* @param {String} input - the original input
* @returns {String}
*/
encodeForHTML(input) {
return esapiEncoder.encodeForHTML(input);
},
/**
* Encodes a source string for writing to an HTML attribute value.
* @param {String} input
* @returns {String}
*/
encodeForHTMLAttribute(input) {
return esapiEncoder.encodeForHTMLAttribute(input);
},
/**
* Encodes a source string for writing to JavaScript string content.
* @param {String} input - the original input
* @returns {String}
*/
encodeForJSString(input) {
return escapeJSString(input);
},
/**
* Validate a Javascript token. The value must be either a single identifier, a literal number,
* or a literal string.
* @param {String} input - the original input
* @param {String} defaultValue - a default value to use if the source doesn't meet validity
* constraints.
* @returns {String}
*/
getValidJSToken(input, defaultValue) {
if (typeof input !== 'string') {
return defaultValue;
}
const encoded = escapeJSToken(input);
if (encoded === undefined) {
return defaultValue;
}
return encoded;
},
/**
* Sanitizes a URL for writing as an HTML href or src attribute value.
* @param {String} url
* @returns {String} a sanitized URL (possibly empty)
*/
getValidHref(url) {
if (typeof url !== 'string') {
return '';
}
return sanitizeURL(url.trim());
},
/**
* Validate a style/CSS token. Valid CSS tokens are specified at http://www.w3.org/TR/css3-syntax/
* @param {String} input the source token
* @param {String} defaultValue a default value to use if the source is {@code null},
* an empty string, or doesn't meet validity constraints.
* @return {String} a string containing sanitized style token
*/
getValidStyleToken(input, defaultValue) {
if (typeof input === 'string' && input.length > 0 && CSS_TOKEN.test(input)) {
return input;
}
return defaultValue;
},
encodeForCSSString(input) {
return esapiEncoder.encodeForCSS(input);
},
getValidMultiLineComment(comment, defaultComment) {
if (comment != null && comment.indexOf('*/') < 0) {
return comment;
}
return defaultComment;
},
};