ses
Version:
Hardened JavaScript for Fearless Cooperation
268 lines (248 loc) • 10.4 kB
JavaScript
// @ts-check
import {
FERAL_REG_EXP,
SyntaxError,
stringReplace,
stringSearch,
stringSlice,
stringSplit,
freeze,
} from './commons.js';
import { getSourceURL } from './get-source-url.js';
/**
* Find the first occurence of the given pattern and return
* the location as the approximate line number.
*
* @param {string} src
* @param {RegExp} pattern
* @returns {number}
*/
function getLineNumber(src, pattern) {
const index = stringSearch(src, pattern);
if (index < 0) {
return -1;
}
// The importPattern incidentally captures an initial \n in
// an attempt to reject a . prefix, so we need to offset
// the line number in that case.
const adjustment = src[index] === '\n' ? 1 : 0;
return stringSplit(stringSlice(src, 0, index), '\n').length + adjustment;
}
// /////////////////////////////////////////////////////////////////////////////
const htmlCommentPattern = new FERAL_REG_EXP(`(?:${'<'}!--|--${'>'})`, 'g');
/**
* Conservatively reject the source text if it may contain text that some
* JavaScript parsers may treat as an html-like comment. To reject without
* parsing, `rejectHtmlComments` will also reject some other text as well.
*
* https://www.ecma-international.org/ecma-262/9.0/index.html#sec-html-like-comments
* explains that JavaScript parsers may or may not recognize html
* comment tokens "<" immediately followed by "!--" and "--"
* immediately followed by ">" in non-module source text, and treat
* them as a kind of line comment. Since otherwise both of these can
* appear in normal JavaScript source code as a sequence of operators,
* we have the terrifying possibility of the same source code parsing
* one way on one correct JavaScript implementation, and another way
* on another.
*
* This shim takes the conservative strategy of just rejecting source
* text that contains these strings anywhere. Note that this very
* source file is written strangely to avoid mentioning these
* character strings explicitly.
*
* We do not write the regexp in a straightforward way, so that an
* apparennt html comment does not appear in this file. Thus, we avoid
* rejection by the overly eager rejectDangerousSources.
*
* @param {string} src
* @returns {string}
*/
export const rejectHtmlComments = src => {
const lineNumber = getLineNumber(src, htmlCommentPattern);
if (lineNumber < 0) {
return src;
}
const name = getSourceURL(src);
// See https://github.com/endojs/endo/blob/master/packages/ses/error-codes/SES_HTML_COMMENT_REJECTED.md
throw SyntaxError(
`Possible HTML comment rejected at ${name}:${lineNumber}. (SES_HTML_COMMENT_REJECTED)`,
);
};
/**
* An optional transform to place ahead of `rejectHtmlComments` to evade *that*
* rejection. However, it may change the meaning of the program.
*
* This evasion replaces each alleged html comment with the space-separated
* JavaScript operator sequence that it may mean, assuming that it appears
* outside of a comment or literal string, in source code where the JS
* parser makes no special case for html comments (like module source code).
* In that case, this evasion preserves the meaning of the program, though it
* does change the souce column numbers on each effected line.
*
* If the html comment appeared in a literal (a string literal, regexp literal,
* or a template literal), then this evasion will change the meaning of the
* program by changing the text of that literal.
*
* If the html comment appeared in a JavaScript comment, then this evasion does
* not change the meaning of the program because it only changes the contents of
* those comments.
*
* @param {string} src
* @returns {string}
*/
export const evadeHtmlCommentTest = src => {
const replaceFn = match => (match[0] === '<' ? '< ! --' : '-- >');
return stringReplace(src, htmlCommentPattern, replaceFn);
};
// /////////////////////////////////////////////////////////////////////////////
const importPattern = new FERAL_REG_EXP(
'(^|[^.]|\\.\\.\\.)\\bimport(\\s*(?:\\(|/[/*]))',
'g',
);
/**
* Conservatively reject the source text if it may contain a dynamic
* import expression. To reject without parsing, `rejectImportExpressions` will
* also reject some other text as well.
*
* The proposed dynamic import expression is the only syntax currently
* proposed, that can appear in non-module JavaScript code, that
* enables direct access to the outside world that cannot be
* suppressed or intercepted without parsing and rewriting. Instead,
* this shim conservatively rejects any source text that seems to
* contain such an expression. To do this safely without parsing, we
* must also reject some valid programs, i.e., those containing
* apparent import expressions in literal strings or comments.
*
* The current conservative rule looks for the identifier "import"
* followed by either an open paren or something that looks like the
* beginning of a comment. We assume that we do not need to worry
* about html comment syntax because that was already rejected by
* rejectHtmlComments.
*
* this \s *must* match all kinds of syntax-defined whitespace. If e.g.
* U+2028 (LINE SEPARATOR) or U+2029 (PARAGRAPH SEPARATOR) is treated as
* whitespace by the parser, but not matched by /\s/, then this would admit
* an attack like: import\u2028('power.js') . We're trying to distinguish
* something like that from something like importnotreally('power.js') which
* is perfectly safe.
*
* @param {string} src
* @returns {string}
*/
export const rejectImportExpressions = src => {
const lineNumber = getLineNumber(src, importPattern);
if (lineNumber < 0) {
return src;
}
const name = getSourceURL(src);
// See https://github.com/endojs/endo/blob/master/packages/ses/error-codes/SES_IMPORT_REJECTED.md
throw SyntaxError(
`Possible import expression rejected at ${name}:${lineNumber}. (SES_IMPORT_REJECTED)`,
);
};
/**
* An optional transform to place ahead of `rejectImportExpressions` to evade
* *that* rejection. However, it may change the meaning of the program.
*
* This evasion replaces each suspicious `import` identifier with `__import__`.
* If the alleged import expression appears in a JavaScript comment, this
* evasion will not change the meaning of the program. If it appears in a
* literal (string literal, regexp literal, or a template literal), then this
* evasion will change the contents of that literal. If it appears as code
* where it would be parsed as an expression, then it might or might not change
* the meaning of the program, depending on the binding, if any, of the lexical
* variable `__import__`.
*
* @param {string} src
* @returns {string}
*/
export const evadeImportExpressionTest = src => {
const replaceFn = (_, p1, p2) => `${p1}__import__${p2}`;
return stringReplace(src, importPattern, replaceFn);
};
// /////////////////////////////////////////////////////////////////////////////
const someDirectEvalPattern = new FERAL_REG_EXP(
'(^|[^.])\\beval(\\s*\\()',
'g',
);
/**
* Heuristically reject some text that seems to contain a direct eval
* expression, with both false positives and false negavives. To reject without
* parsing, `rejectSomeDirectEvalExpressions` may will also reject some other
* text as well. It may also accept source text that contains a direct eval
* written oddly, such as `(eval)(src)`. This false negative is not a security
* vulnerability. Rather it is a compat hazard because it will execute as
* an indirect eval under the SES-shim but as a direct eval on platforms that
* support SES directly (like XS).
*
* The shim cannot correctly emulate a direct eval as explained at
* https://github.com/Agoric/realms-shim/issues/12
* If we did not reject direct eval syntax, we would
* accidentally evaluate these with an emulation of indirect eval. To
* prevent future compatibility problems, in shifting from use of the
* shim to genuine platform support for the proposal, we should
* instead statically reject code that seems to contain a direct eval
* expression.
*
* As with the dynamic import expression, to avoid a full parse, we do
* this approximately with a regexp, that will also reject strings
* that appear safely in comments or strings. Unlike dynamic import,
* if we miss some, this only creates future compat problems, not
* security problems. Thus, we are only trying to catch innocent
* occurrences, not malicious one. In particular, `(eval)(...)` is
* direct eval syntax that would not be caught by the following regexp.
*
* Exported for unit tests.
*
* @param {string} src
* @returns {string}
*/
export const rejectSomeDirectEvalExpressions = src => {
const lineNumber = getLineNumber(src, someDirectEvalPattern);
if (lineNumber < 0) {
return src;
}
const name = getSourceURL(src);
// See https://github.com/endojs/endo/blob/master/packages/ses/error-codes/SES_EVAL_REJECTED.md
throw SyntaxError(
`Possible direct eval expression rejected at ${name}:${lineNumber}. (SES_EVAL_REJECTED)`,
);
};
// /////////////////////////////////////////////////////////////////////////////
/**
* A transform that bundles together the transforms that must unconditionally
* happen last in order to ensure safe evaluation without parsing.
*
* @param {string} source
* @returns {string}
*/
export const mandatoryTransforms = source => {
source = rejectHtmlComments(source);
source = rejectImportExpressions(source);
return source;
};
/**
* Starting with `source`, apply each transform to the result of the
* previous one, returning the result of the last transformation.
*
* @param {string} source
* @param {((str: string) => string)[]} transforms
* @returns {string}
*/
export const applyTransforms = (source, transforms) => {
for (let i = 0, l = transforms.length; i < l; i += 1) {
const transform = transforms[i];
source = transform(source);
}
return source;
};
// export all as a frozen object
export const transforms = freeze({
rejectHtmlComments: freeze(rejectHtmlComments),
evadeHtmlCommentTest: freeze(evadeHtmlCommentTest),
rejectImportExpressions: freeze(rejectImportExpressions),
evadeImportExpressionTest: freeze(evadeImportExpressionTest),
rejectSomeDirectEvalExpressions: freeze(rejectSomeDirectEvalExpressions),
mandatoryTransforms: freeze(mandatoryTransforms),
applyTransforms: freeze(applyTransforms),
});