UNPKG

es2-code-prettify

Version:
430 lines (404 loc) 17.9 kB
/** Given triples of [style, pattern, context] returns a lexing function, * The lexing function interprets the patterns to find token boundaries and * returns a decoration list of the form * [index_0, style_0, index_1, style_1, ..., index_n, style_n] * where index_n is an index into the sourceCode, and style_n is a style * constant like PR_PLAIN. index_n-1 <= index_n, and style_n-1 applies to * all characters in sourceCode[index_n-1:index_n]. * * 与えられた [style, pattern, context] のトリプルは、レキシング関数を返します。 * レキシング関数は、パターンを解釈してトークンの境界を見つけて * [index_0, style_0, index_1, style_1, ..., index_n, style_n] * 形式の装飾リストを返します。 * ここで index_n は sourceCode へのインデックスであり、style_n は PR_PLAIN のようなスタイル定数です。 * index_n-1 <= index_n で、style_n-1 は sourceCode[index_n-1:index_n] のすべての文字に適用されます。 * * The stylePatterns is a list whose elements have the form * [style : string, pattern : RegExp, DEPRECATED, shortcut : string]. * * stylePatterns は、次のような形式の要素を持つリストです。 * [style : string, pattern : RegExp, DEPRECATED, shortcut : string] * * Style is a style constant like PR_PLAIN, or can be a string of the * form 'lang-FOO', where FOO is a language extension describing the * language of the portion of the token in $1 after pattern executes. * E.g., if style is 'lang-lisp', and group 1 contains the text * '(hello (world))', then that portion of the token will be passed to the * registered lisp handler for formatting. * * スタイルは PR_PLAIN のようなスタイル定数、または 'lang-FOO' 形式の文字列で、 * FOO は pattern が実行された後の $1 内のトークンの部分の言語を記述する言語拡張です。 * 例えば、style が 'lang-lisp' で、group 1 にテキスト '(hello (world))' が含まれている場合、 * トークンのその部分はフォーマットのために登録された lisp ハンドラに渡されます。 * * The text before and after group 1 will be restyled using this decorator * so decorators should take care that this doesn't result in infinite * recursion. For example, the HTML lexer rule for SCRIPT elements looks * something like ['lang-js', /<[s]cript>(.+?)<\/script>/]. This may match * '<script>foo()<\/script>', which would cause the current decorator to * be called with '<script>' which would not match the same rule since * group 1 must not be empty, so it would be instead styled as PR_TAG by * the generic tag rule. The handler registered for the 'js' extension would * then be called with 'foo()', and finally, the current decorator would * be called with '<\/script>' which would not match the original rule and * so the generic tag rule would identify it as a tag. * * グループ1の前後のテキストは、このデコレーターを使って再スタイリングされますので、 * デコレーターは無限再帰にならないように注意する必要があります。 * 例えば、SCRIPT 要素の HTML レキサー・ルールは、['lang-js', /<[s]cript>(.+?)<\/script>/] のようになります。 * これは、'<script>foo()<\/script>' にマッチする可能性がありますが、この場合、現在のデコレータは '<script>' で呼び出されます。 * この場合、グループ1は空であってはならないので、同じルールにはマッチせず、代わりにジェネリック・タグ・ルールによって * PR_TAG としてスタイリングされることになります。 その後、拡張子 'js' で登録されたハンドラが 'foo()' で呼び出され、 * 最後に現在のデコレータが '<\/script>' で呼び出されます。これは元のルールとは一致しないので、一般的なタグルールではタグとして認識されます。 * * Pattern must only match prefixes, and if it matches a prefix, then that * match is considered a token with the same style. * * パターンは接頭辞にのみマッチしなければならず、接頭辞にマッチした場合、そのマッチは同じスタイルのトークンとみなされます。 * * Context is applied to the last non-whitespace, non-comment token * recognized. * * コンテキストは、最後に認識された非ホワイトスペース、非コメントのトークンに適用されます。 * * Shortcut is an optional string of characters, any of which, if the first * character, gurantee that this pattern and only this pattern matches. * * ショートカットはオプションの文字列で、そのうちのどれかが最初の文字であれば、 * このパターンだけにマッチすることを保証します。 * * @param {Array.<StylePattern>} shortcutStylePatterns patterns that always start with * a known character. Must have a shortcut string. * * 常に既知の文字で始まるパターン。 ショートカット文字列があること。 * * @param {Array.<StylePattern>} fallthroughStylePatterns patterns that will be tried in * order if the shortcut ones fail. May have shortcuts. * * ショートカットのものが失敗した場合に、順に試していくパターン。 ショートカットがある場合もあります。 * * @return {SimpleLexer} a function that takes an undecorated job and * attaches a list of decorations. */ createSimpleLexer = function( shortcutStylePatterns, fallthroughStylePatterns ){ var shortcuts = {}; var allPatterns = shortcutStylePatterns.concat( fallthroughStylePatterns ); var allRegexs = []; var regexKeys = {}; for( var i = 0, n = allPatterns.length; i < n; ++i ){ var patternParts = allPatterns[ i ]; var shortcutChars = patternParts[ 3 ]; if( shortcutChars ){ for( var c = shortcutChars.length; --c >= 0; ){ shortcuts[ shortcutChars.charAt( c ) ] = patternParts; }; }; var regex = patternParts[ 1 ]; var k = '' + regex; if( !regexKeys[ k ] ){ allRegexs.push( regex ); regexKeys[ k ] = true; }; }; allRegexs.push( reAllChars ); return [ shortcuts, combinePrefixPatterns( allRegexs ), fallthroughStylePatterns ]; }; var reAllChars = RegExpProxy( '[\0-\uffff]' ); /** * Lexes job.sourceCode and attaches an output array job.decorations of * style classes preceded by the position at which they start in * job.sourceCode in order. */ /** @type {StylePattern} */ var stylePatternTripleQuotedStrings = [ PR_STRING, RegExpProxy( "^(?:\\'\\'\\'(?:[^\\'\\\\]|\\\\[\\s\\S]|\\'{1,2}(?=[^\\']))*(?:\\'\\'\\'|$)|\\\"\\\"\\\"(?:[^\\\"\\\\]|\\\\[\\s\\S]|\\\"{1,2}(?=[^\\\"]))*(?:\\\"\\\"\\\"|$)|\\'(?:[^\\\\\\']|\\\\[\\s\\S])*(?:\\'|$)|\\\"(?:[^\\\\\\\"]|\\\\[\\s\\S])*(?:\\\"|$))" ), null, '\'"' ]; /** @type {StylePattern} */ var stylePatternMultiLineStrings = [ PR_STRING, RegExpProxy( "^(?:\\'(?:[^\\\\\\']|\\\\[\\s\\S])*(?:\\'|$)|\\\"(?:[^\\\\\\\"]|\\\\[\\s\\S])*(?:\\\"|$)|\\`(?:[^\\\\\\`]|\\\\[\\s\\S])*(?:\\`|$))" ), null, '\'"`' ]; /** @type {StylePattern} */ var stylePatternSingleLineStrings = [ PR_STRING, RegExpProxy( "^(?:\\'(?:[^\\\\\\'\\r\\n]|\\\\.)*(?:\\'|$)|\\\"(?:[^\\\\\\\"\\r\\n]|\\\\.)*(?:\\\"|$))" ), null, '"\'' ]; /** @type {StylePattern} */ var stylePatternVerbatimStrings = [ PR_STRING, RegExpProxy( "^@\\\"(?:[^\\\"]|\\\"\\\")*(?:\\\"|$)" ), null // TODO delete? ]; /** @type {StylePattern} */ var stylePatternMultiLineCStyleComments = [ PR_COMMENT, RegExpProxy( "^#(?:##(?:[^#]|#(?!##))*(?:###|$)|.*)" ), null, '#' ]; /** @type {StylePattern} */ var stylePatternSingleLineCStyleComments = [ PR_COMMENT, RegExpProxy( "^#(?:(?:define|e(?:l|nd)if|else|error|ifn?def|include|line|pragma|undef|warning)\\b|[^\\r\\n]*)" ), null, '#' ]; /** @type {StylePattern} */ var stylePatternCStyleHeaderFile = [ PR_STRING, RegExpProxy( "^<(?:(?:(?:\\.\\.\\/)*|\\/?)(?:[\\w-]+(?:\\/[\\w-]+)+)?[\\w-]+\\.h(?:h|pp|\\+\\+)?|[a-z]\\w*)>" ), null ]; /** @type {StylePattern} */ var stylePatternNotCStyleComments = [ PR_COMMENT, RegExpProxy( "^#[^\\r\\n]*" ), null, '#' ]; /** @type {StylePattern} */ var stylePatternCStyleComments1 = [ PR_COMMENT, RegExpProxy( "^\\/\\/[^\\r\\n]*" ), null ]; /** @type {StylePattern} */ var stylePatternCStyleComments2 = [ PR_COMMENT, RegExpProxy( "^\\/\\*[\\s\\S]*?(?:\\*\\/|$)" ), null ]; /** @type {StylePattern} */ var stylePatternRegexLiteralsMultiLine = [ 'lang-regex', RegExpProxy( '^' + REGEXP_PRECEDER_PATTERN + '(' + // A regular expression literal starts with a slash that is // not followed by * or / so that it is not confused with // comments. '/(?=[^/*' + '' + '])' // and then contains any number of raw characters, + '(?:[^/\\x5B\\x5C' + '' + ']' // escape sequences (\x5C), + '|\\x5C' + '.' // or non-nesting character sets (\x5B\x5D); + '|\\x5B(?:[^\\x5C\\x5D' + '' + ']' + '|\\x5C' + '.' + ')*(?:\\x5D|$))+' // finally closed by a /. + '/' + ')' ) ]; /** @type {StylePattern} */ var stylePatternRegexLiteralsSingleLine = [ 'lang-regex', RegExpProxy( '^' + REGEXP_PRECEDER_PATTERN + '(' + // A regular expression literal starts with a slash that is // not followed by * or / so that it is not confused with // comments. '/(?=[^/*' + '\n\r' + '])' // and then contains any number of raw characters, + '(?:[^/\\x5B\\x5C' + '\n\r' + ']' // escape sequences (\x5C), + '|\\x5C' + '[\\S\\s]' // or non-nesting character sets (\x5B\x5D); + '|\\x5B(?:[^\\x5C\\x5D' + '\n\r' + ']' + '|\\x5C' + '[\\S\\s]' + ')*(?:\\x5D|$))+' // finally closed by a /. + '/' + ')' ) ]; /** @type {StylePattern} */ var stylePatternWhiteSpace = [ PR_PLAIN, RegExpProxy( '^\\s+' ), null, ' \r\n\t\xA0' ]; /** @type {StylePattern} */ var stylePatternFallthrough1 = // TODO(mikesamuel): recognize non-latin letters and numerals in idents [ PR_LITERAL, RegExpProxy( '^@[a-z_$][a-z_$@0-9]*', 'i' ), null ]; /** @type {StylePattern} */ var stylePatternFallthrough2 = [ PR_TYPE, RegExpProxy( "^(?:[@_]?[A-Z]+[a-z][A-Za-z_$@0-9]*|\\w+_t\\b)" ), null ]; /** @type {StylePattern} */ var stylePatternFallthrough3 = [ PR_PLAIN, RegExpProxy( '^[a-z_$][a-z_$@0-9]*', 'i' ), null ]; /** @type {StylePattern} */ var stylePatternFallthrough4 = [ PR_LITERAL, RegExpProxy( '^(?:' // A hex number + '0x[a-f0-9]+' // or an octal or decimal number, + '|(?:\\d(?:_\\d+)*\\d*(?:\\.\\d*)?|\\.\\d\\+)' // possibly in scientific notation + '(?:e[+\\-]?\\d+)?' + ')' // with an optional modifier like UL for unsigned long + '[a-z]*', 'i'), null, '0123456789' ]; // Don't treat escaped quotes in bash as starting strings. // See issue 144. /** @type {StylePattern} */ var stylePatternFallthrough5 = [ PR_PLAIN, RegExpProxy( "^\\\\[\\s\\S]?" ), null ]; var punctuation = // The Bash man page says // A word is a sequence of characters considered as a single // unit by GRUB. Words are separated by metacharacters, // which are the following plus space, tab, and newline: { } // | & $ ; < > // ... // A word beginning with # causes that word and all remaining // characters on that line to be ignored. // which means that only a '#' after /(?:^|[{}|&$;<>\s])/ starts a // comment but empirically // $ echo {#} // {#} // $ echo \$# // $# // $ echo }# // }# // so /(?:^|[|&;<>\s])/ is more appropriate. // http://gcc.gnu.org/onlinedocs/gcc-2.95.3/cpp_1.html#SEC3 // suggests that this definition is compatible with a // default mode that tries to use a single token definition // to recognize both bash/python style comments and C // preprocessor directives. // This definition of punctuation does not include # in the list of // follow-on exclusions, so # will not be broken before if preceeded // by a punctuation character. We could try to exclude # after // [|&;<>] but that doesn't seem to cause many major problems. // If that does turn out to be a problem, we should change the below // when hc is truthy to include # in the run of punctuation characters // only when not followint [|&;<>]. '^.[^\\s\\w.$@\'"`/\\\\]*'; var rePunctuation = RegExpProxy( punctuation ); var rePunctuationMulti = RegExpProxy( punctuation + '(?!\s*\/)' ); /** returns a function that produces a list of decorations from source text. * * This code treats ", ', and ` as string delimiters, and \ as a string * escape. It does not recognize perl's qq() style strings. * It has no special handling for double delimiter escapes as in basic, or * the tripled delimiters used in python, but should work on those regardless * although in those cases a single string literal may be broken up into * multiple adjacent string literals. * * It recognizes C, C++, and shell style comments. * * @param {Object} options a set of optional parameters. * @return {SimpleLexer} a function that examines the source code * in the input job and builds a decoration list which it attaches to * the job. */ function createSimpleLexerFromOptionalParameters( options ){ var shortcutStylePatterns = [], fallthroughStylePatterns = []; if( options[ 'tripleQuotedStrings' ] ){ // '''multi-line-string''', 'single-line-string', and double-quoted shortcutStylePatterns.push( stylePatternTripleQuotedStrings ); } else if( options[ 'multiLineStrings' ] ){ // 'multi-line-string', "multi-line-string" shortcutStylePatterns.push( stylePatternMultiLineStrings ); } else { // 'single-line-string', "single-line-string" shortcutStylePatterns.push( stylePatternSingleLineStrings ); }; if( options[ 'verbatimStrings' ] ){ // verbatim-string-literal production from the C# grammar. See issue 93. fallthroughStylePatterns.push( stylePatternVerbatimStrings ); }; var hc = options[ 'hashComments' ]; if( hc ){ if( options[ 'cStyleComments' ] ){ if( hc > 1 ){ // multiline hash comments shortcutStylePatterns.push( stylePatternMultiLineCStyleComments ); } else { // Stop C preprocessor declarations at an unclosed open comment shortcutStylePatterns.push( stylePatternSingleLineCStyleComments ); }; // #include <stdio.h> fallthroughStylePatterns.push( stylePatternCStyleHeaderFile ); } else { shortcutStylePatterns.push( stylePatternNotCStyleComments ); }; }; if( options[ 'cStyleComments' ] ){ fallthroughStylePatterns.push( stylePatternCStyleComments1 ); fallthroughStylePatterns.push( stylePatternCStyleComments2 ); }; var regexLiterals = options[ 'regexLiterals' ]; if( regexLiterals ){ fallthroughStylePatterns.push( regexLiterals > 1 ? stylePatternRegexLiteralsMultiLine : stylePatternRegexLiteralsSingleLine ); }; var types = options[ 'types' ]; if( types ){ fallthroughStylePatterns.push( [ PR_TYPE, types ] ); }; var keywords = '' + options[ 'keywords' ]; if( keywords.charAt( 0 ) === ' ' ){ keywords = keywords.substr( 1 ); }; if( keywords.charAt( keywords.length - 1 ) === ' ' ){ keywords = keywords.substr( 0, keywords.length - 1 ); }; if( keywords ){ fallthroughStylePatterns.push( [ PR_KEYWORD, RegExpProxy( '^(?:' + keywords.split( ',' ).join( '|' ).split( ' ' ).join( '|' ) + ')\\b' ), null ] ); }; shortcutStylePatterns.push( stylePatternWhiteSpace ); fallthroughStylePatterns.push( stylePatternFallthrough1, stylePatternFallthrough2, stylePatternFallthrough3, stylePatternFallthrough4, stylePatternFallthrough5, [ PR_PUNCTUATION, regexLiterals ? rePunctuationMulti : rePunctuation, null ] ); return createSimpleLexer( shortcutStylePatterns, fallthroughStylePatterns ); };