UNPKG

ace-code-editor

Version:

Ajax.org Code Editor is a full featured source code highlighting editor that powers the Cloud9 IDE

1,618 lines (1,503 loc) 304 kB
define(function(require, exports, module) { module.exports = (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);throw new Error("Cannot find module '"+o+"'")}var f=n[o]={exports:{}};t[o][0].call(f.exports,function(e){var n=t[o][1][e];return s(n?n:e)},f,f.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s})({ 1:[function(_dereq_,module,exports){ function isScopeMarker(node) { if (node.namespaceURI === "http://www.w3.org/1999/xhtml") { return node.localName === "applet" || node.localName === "caption" || node.localName === "marquee" || node.localName === "object" || node.localName === "table" || node.localName === "td" || node.localName === "th"; } if (node.namespaceURI === "http://www.w3.org/1998/Math/MathML") { return node.localName === "mi" || node.localName === "mo" || node.localName === "mn" || node.localName === "ms" || node.localName === "mtext" || node.localName === "annotation-xml"; } if (node.namespaceURI === "http://www.w3.org/2000/svg") { return node.localName === "foreignObject" || node.localName === "desc" || node.localName === "title"; } } function isListItemScopeMarker(node) { return isScopeMarker(node) || (node.namespaceURI === "http://www.w3.org/1999/xhtml" && node.localName === 'ol') || (node.namespaceURI === "http://www.w3.org/1999/xhtml" && node.localName === 'ul'); } function isTableScopeMarker(node) { return (node.namespaceURI === "http://www.w3.org/1999/xhtml" && node.localName === 'table') || (node.namespaceURI === "http://www.w3.org/1999/xhtml" && node.localName === 'html'); } function isTableBodyScopeMarker(node) { return (node.namespaceURI === "http://www.w3.org/1999/xhtml" && node.localName === 'tbody') || (node.namespaceURI === "http://www.w3.org/1999/xhtml" && node.localName === 'tfoot') || (node.namespaceURI === "http://www.w3.org/1999/xhtml" && node.localName === 'thead') || (node.namespaceURI === "http://www.w3.org/1999/xhtml" && node.localName === 'html'); } function isTableRowScopeMarker(node) { return (node.namespaceURI === "http://www.w3.org/1999/xhtml" && node.localName === 'tr') || (node.namespaceURI === "http://www.w3.org/1999/xhtml" && node.localName === 'html'); } function isButtonScopeMarker(node) { return isScopeMarker(node) || (node.namespaceURI === "http://www.w3.org/1999/xhtml" && node.localName === 'button'); } function isSelectScopeMarker(node) { return !(node.namespaceURI === "http://www.w3.org/1999/xhtml" && node.localName === 'optgroup') && !(node.namespaceURI === "http://www.w3.org/1999/xhtml" && node.localName === 'option'); } /** * Represents a stack of open elements * @constructor */ function ElementStack() { this.elements = []; this.rootNode = null; this.headElement = null; this.bodyElement = null; } /** * * @param {String} localName * @param {Function} isMarker * @return {Boolean} * @private */ ElementStack.prototype._inScope = function(localName, isMarker) { for (var i = this.elements.length - 1; i >= 0; i--) { var node = this.elements[i]; if (node.localName === localName) return true; if (isMarker(node)) return false; } }; /** * Pushes the item on the stack top * @param {StackItem} item */ ElementStack.prototype.push = function(item) { this.elements.push(item); }; /** * Pushes the item on the stack top * @param {StackItem} item HTML element stack item */ ElementStack.prototype.pushHtmlElement = function(item) { this.rootNode = item.node; this.push(item); }; /** * Pushes the item on the stack top * @param {StackItem} item HEAD element stack item */ ElementStack.prototype.pushHeadElement = function(item) { this.headElement = item.node; this.push(item); }; /** * Pushes the item on the stack top * @param {StackItem} item BODY element stack item */ ElementStack.prototype.pushBodyElement = function(item) { this.bodyElement = item.node; this.push(item); }; /** * Pops the topmost item * @return {StackItem} */ ElementStack.prototype.pop = function() { return this.elements.pop(); }; /** * Removes the item from the element stack * @param {StackItem} item The item to remove */ ElementStack.prototype.remove = function(item) { this.elements.splice(this.elements.indexOf(item), 1); }; /** * Pops until an element with a given localName is popped * @param {String} localName */ ElementStack.prototype.popUntilPopped = function(localName) { var element; do { element = this.pop(); } while (element.localName != localName); }; ElementStack.prototype.popUntilTableScopeMarker = function() { while (!isTableScopeMarker(this.top)) this.pop(); }; ElementStack.prototype.popUntilTableBodyScopeMarker = function() { while (!isTableBodyScopeMarker(this.top)) this.pop(); }; ElementStack.prototype.popUntilTableRowScopeMarker = function() { while (!isTableRowScopeMarker(this.top)) this.pop(); }; /** * * @param {Number} index * @return {StackItem} */ ElementStack.prototype.item = function(index) { return this.elements[index]; }; /** * * @param {StackItem} element * @return {Boolean} */ ElementStack.prototype.contains = function(element) { return this.elements.indexOf(element) !== -1; }; /** * * @param {String} localName * @return {Boolean} */ ElementStack.prototype.inScope = function(localName) { return this._inScope(localName, isScopeMarker); }; /** * * @param {String} localName * @return {Boolean} */ ElementStack.prototype.inListItemScope = function(localName) { return this._inScope(localName, isListItemScopeMarker); }; /** * * @param {String} localName * @return {Boolean} */ ElementStack.prototype.inTableScope = function(localName) { return this._inScope(localName, isTableScopeMarker); }; /** * * @param {String} localName * @return {Boolean} */ ElementStack.prototype.inButtonScope = function(localName) { return this._inScope(localName, isButtonScopeMarker); }; /** * * @param {String} localName * @return {Boolean} */ ElementStack.prototype.inSelectScope = function(localName) { return this._inScope(localName, isSelectScopeMarker); }; /** * * @return {Boolean} */ ElementStack.prototype.hasNumberedHeaderElementInScope = function() { for (var i = this.elements.length - 1; i >= 0; i--) { var node = this.elements[i]; if (node.isNumberedHeader()) return true; if (isScopeMarker(node)) return false; } }; /** * * @param {Object} element * @return {StackItem} */ ElementStack.prototype.furthestBlockForFormattingElement = function(element) { var furthestBlock = null; for (var i = this.elements.length - 1; i >= 0; i--) { var node = this.elements[i]; if (node.node === element) break; if (node.isSpecial()) furthestBlock = node; } return furthestBlock; }; /** * * @param {String} localName * @return {Number} */ ElementStack.prototype.findIndex = function(localName) { for (var i = this.elements.length - 1; i >= 0; i--) { if (this.elements[i].localName == localName) return i; } return -1; }; ElementStack.prototype.remove_openElements_until = function(callback) { var finished = false; var element; while (!finished) { element = this.elements.pop(); finished = callback(element); } return element; }; Object.defineProperty(ElementStack.prototype, 'top', { get: function() { return this.elements[this.elements.length - 1]; } }); Object.defineProperty(ElementStack.prototype, 'length', { get: function() { return this.elements.length; } }); exports.ElementStack = ElementStack; }, {}], 2:[function(_dereq_,module,exports){ var entities = _dereq_('html5-entities'); var InputStream = _dereq_('./InputStream').InputStream; var namedEntityPrefixes = {}; Object.keys(entities).forEach(function (entityKey) { for (var i = 0; i < entityKey.length; i++) { namedEntityPrefixes[entityKey.substring(0, i + 1)] = true; } }); function isAlphaNumeric(c) { return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); } function isHexDigit(c) { return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); } function isDecimalDigit(c) { return (c >= '0' && c <= '9'); } var EntityParser = {}; EntityParser.consumeEntity = function(buffer, tokenizer, additionalAllowedCharacter) { var decodedCharacter = ''; var consumedCharacters = ''; var ch = buffer.char(); if (ch === InputStream.EOF) return false; consumedCharacters += ch; if (ch == '\t' || ch == '\n' || ch == '\v' || ch == ' ' || ch == '<' || ch == '&') { buffer.unget(consumedCharacters); return false; } if (additionalAllowedCharacter === ch) { buffer.unget(consumedCharacters); return false; } if (ch == '#') { ch = buffer.shift(1); if (ch === InputStream.EOF) { tokenizer._parseError("expected-numeric-entity-but-got-eof"); buffer.unget(consumedCharacters); return false; } consumedCharacters += ch; var radix = 10; var isDigit = isDecimalDigit; if (ch == 'x' || ch == 'X') { radix = 16; isDigit = isHexDigit; ch = buffer.shift(1); if (ch === InputStream.EOF) { tokenizer._parseError("expected-numeric-entity-but-got-eof"); buffer.unget(consumedCharacters); return false; } consumedCharacters += ch; } if (isDigit(ch)) { var code = ''; while (ch !== InputStream.EOF && isDigit(ch)) { code += ch; ch = buffer.char(); } code = parseInt(code, radix); var replacement = this.replaceEntityNumbers(code); if (replacement) { tokenizer._parseError("invalid-numeric-entity-replaced"); code = replacement; } if (code > 0xFFFF && code <= 0x10FFFF) { // we substract 0x10000 from cp to get a 20-bits number // in the range 0..0xFFFF code -= 0x10000; // we add 0xD800 to the number formed by the first 10 bits // to give the first byte var first = ((0xffc00 & code) >> 10) + 0xD800; // we add 0xDC00 to the number formed by the low 10 bits // to give the second byte var second = (0x3ff & code) + 0xDC00; decodedCharacter = String.fromCharCode(first, second); } else decodedCharacter = String.fromCharCode(code); if (ch !== ';') { tokenizer._parseError("numeric-entity-without-semicolon"); buffer.unget(ch); } return decodedCharacter; } buffer.unget(consumedCharacters); tokenizer._parseError("expected-numeric-entity"); return false; } if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { var mostRecentMatch = ''; while (namedEntityPrefixes[consumedCharacters]) { if (entities[consumedCharacters]) { mostRecentMatch = consumedCharacters; } if (ch == ';') break; ch = buffer.char(); if (ch === InputStream.EOF) break; consumedCharacters += ch; } if (!mostRecentMatch) { tokenizer._parseError("expected-named-entity"); buffer.unget(consumedCharacters); return false; } decodedCharacter = entities[mostRecentMatch]; if (ch === ';' || !additionalAllowedCharacter || !(isAlphaNumeric(ch) || ch === '=')) { if (consumedCharacters.length > mostRecentMatch.length) { buffer.unget(consumedCharacters.substring(mostRecentMatch.length)); } if (ch !== ';') { tokenizer._parseError("named-entity-without-semicolon"); } return decodedCharacter; } buffer.unget(consumedCharacters); return false; } }; EntityParser.replaceEntityNumbers = function(c) { switch(c) { case 0x00: return 0xFFFD; // REPLACEMENT CHARACTER case 0x13: return 0x0010; // Carriage return case 0x80: return 0x20AC; // EURO SIGN case 0x81: return 0x0081; // <control> case 0x82: return 0x201A; // SINGLE LOW-9 QUOTATION MARK case 0x83: return 0x0192; // LATIN SMALL LETTER F WITH HOOK case 0x84: return 0x201E; // DOUBLE LOW-9 QUOTATION MARK case 0x85: return 0x2026; // HORIZONTAL ELLIPSIS case 0x86: return 0x2020; // DAGGER case 0x87: return 0x2021; // DOUBLE DAGGER case 0x88: return 0x02C6; // MODIFIER LETTER CIRCUMFLEX ACCENT case 0x89: return 0x2030; // PER MILLE SIGN case 0x8A: return 0x0160; // LATIN CAPITAL LETTER S WITH CARON case 0x8B: return 0x2039; // SINGLE LEFT-POINTING ANGLE QUOTATION MARK case 0x8C: return 0x0152; // LATIN CAPITAL LIGATURE OE case 0x8D: return 0x008D; // <control> case 0x8E: return 0x017D; // LATIN CAPITAL LETTER Z WITH CARON case 0x8F: return 0x008F; // <control> case 0x90: return 0x0090; // <control> case 0x91: return 0x2018; // LEFT SINGLE QUOTATION MARK case 0x92: return 0x2019; // RIGHT SINGLE QUOTATION MARK case 0x93: return 0x201C; // LEFT DOUBLE QUOTATION MARK case 0x94: return 0x201D; // RIGHT DOUBLE QUOTATION MARK case 0x95: return 0x2022; // BULLET case 0x96: return 0x2013; // EN DASH case 0x97: return 0x2014; // EM DASH case 0x98: return 0x02DC; // SMALL TILDE case 0x99: return 0x2122; // TRADE MARK SIGN case 0x9A: return 0x0161; // LATIN SMALL LETTER S WITH CARON case 0x9B: return 0x203A; // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK case 0x9C: return 0x0153; // LATIN SMALL LIGATURE OE case 0x9D: return 0x009D; // <control> case 0x9E: return 0x017E; // LATIN SMALL LETTER Z WITH CARON case 0x9F: return 0x0178; // LATIN CAPITAL LETTER Y WITH DIAERESIS default: if ((c >= 0xD800 && c <= 0xDFFF) || c > 0x10FFFF) { return 0xFFFD; } else if ((c >= 0x0001 && c <= 0x0008) || (c >= 0x000E && c <= 0x001F) || (c >= 0x007F && c <= 0x009F) || (c >= 0xFDD0 && c <= 0xFDEF) || c == 0x000B || c == 0xFFFE || c == 0x1FFFE || c == 0x2FFFFE || c == 0x2FFFF || c == 0x3FFFE || c == 0x3FFFF || c == 0x4FFFE || c == 0x4FFFF || c == 0x5FFFE || c == 0x5FFFF || c == 0x6FFFE || c == 0x6FFFF || c == 0x7FFFE || c == 0x7FFFF || c == 0x8FFFE || c == 0x8FFFF || c == 0x9FFFE || c == 0x9FFFF || c == 0xAFFFE || c == 0xAFFFF || c == 0xBFFFE || c == 0xBFFFF || c == 0xCFFFE || c == 0xCFFFF || c == 0xDFFFE || c == 0xDFFFF || c == 0xEFFFE || c == 0xEFFFF || c == 0xFFFFE || c == 0xFFFFF || c == 0x10FFFE || c == 0x10FFFF) { return c; } } }; exports.EntityParser = EntityParser; }, {"./InputStream":3,"html5-entities":12}], 3:[function(_dereq_,module,exports){ // FIXME convert CR to LF http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#input-stream function InputStream() { this.data = ''; this.start = 0; this.committed = 0; this.eof = false; this.lastLocation = {line: 0, column: 0}; } InputStream.EOF = -1; InputStream.DRAIN = -2; InputStream.prototype = { slice: function() { if(this.start >= this.data.length) { if(!this.eof) throw InputStream.DRAIN; return InputStream.EOF; } return this.data.slice(this.start, this.data.length); }, char: function() { if(!this.eof && this.start >= this.data.length - 1) throw InputStream.DRAIN; if(this.start >= this.data.length) { return InputStream.EOF; } var ch = this.data[this.start++]; if (ch === '\r') ch = '\n'; return ch; }, advance: function(amount) { this.start += amount; if(this.start >= this.data.length) { if(!this.eof) throw InputStream.DRAIN; return InputStream.EOF; } else { if(this.committed > this.data.length / 2) { // Sliiiide this.lastLocation = this.location(); this.data = this.data.slice(this.committed); this.start = this.start - this.committed; this.committed = 0; } } }, matchWhile: function(re) { if(this.eof && this.start >= this.data.length ) return ''; var r = new RegExp("^"+re+"+"); var m = r.exec(this.slice()); if(m) { if(!this.eof && m[0].length == this.data.length - this.start) throw InputStream.DRAIN; this.advance(m[0].length); return m[0]; } else { return ''; } }, matchUntil: function(re) { var m, s; s = this.slice(); if(s === InputStream.EOF) { return ''; } else if(m = new RegExp(re + (this.eof ? "|$" : "")).exec(s)) { var t = this.data.slice(this.start, this.start + m.index); this.advance(m.index); return t.replace(/\r/g, '\n').replace(/\n{2,}/g, '\n'); } else { throw InputStream.DRAIN; } }, append: function(data) { this.data += data; }, shift: function(n) { if(!this.eof && this.start + n >= this.data.length) throw InputStream.DRAIN; if(this.eof && this.start >= this.data.length) return InputStream.EOF; var d = this.data.slice(this.start, this.start + n).toString(); this.advance(Math.min(n, this.data.length - this.start)); return d; }, peek: function(n) { if(!this.eof && this.start + n >= this.data.length) throw InputStream.DRAIN; if(this.eof && this.start >= this.data.length) return InputStream.EOF; return this.data.slice(this.start, Math.min(this.start + n, this.data.length)).toString(); }, length: function() { return this.data.length - this.start - 1; }, unget: function(d) { if(d === InputStream.EOF) return; this.start -= (d.length); }, undo: function() { this.start = this.committed; }, commit: function() { this.committed = this.start; }, location: function() { var lastLine = this.lastLocation.line; var lastColumn = this.lastLocation.column; var read = this.data.slice(0, this.committed); var newlines = read.match(/\n/g); var line = newlines ? lastLine + newlines.length : lastLine; var column = newlines ? read.length - read.lastIndexOf('\n') - 1 : lastColumn + read.length; return {line: line, column: column}; } }; exports.InputStream = InputStream; }, {}], 4:[function(_dereq_,module,exports){ var SpecialElements = { "http://www.w3.org/1999/xhtml": [ 'address', 'applet', 'area', 'article', 'aside', 'base', 'basefont', 'bgsound', 'blockquote', 'body', 'br', 'button', 'caption', 'center', 'col', 'colgroup', 'dd', 'details', 'dir', 'div', 'dl', 'dt', 'embed', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'frame', 'frameset', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'head', 'header', 'hgroup', 'hr', 'html', 'iframe', 'img', 'input', 'isindex', 'li', 'link', 'listing', 'main', 'marquee', 'menu', 'menuitem', 'meta', 'nav', 'noembed', 'noframes', 'noscript', 'object', 'ol', 'p', 'param', 'plaintext', 'pre', 'script', 'section', 'select', 'source', 'style', 'summary', 'table', 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'title', 'tr', 'track', 'ul', 'wbr', 'xmp' ], "http://www.w3.org/1998/Math/MathML": [ 'mi', 'mo', 'mn', 'ms', 'mtext', 'annotation-xml' ], "http://www.w3.org/2000/svg": [ 'foreignObject', 'desc', 'title' ] }; function StackItem(namespaceURI, localName, attributes, node) { this.localName = localName; this.namespaceURI = namespaceURI; this.attributes = attributes; this.node = node; } // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#special StackItem.prototype.isSpecial = function() { return this.namespaceURI in SpecialElements && SpecialElements[this.namespaceURI].indexOf(this.localName) > -1; }; StackItem.prototype.isFosterParenting = function() { if (this.namespaceURI === "http://www.w3.org/1999/xhtml") { return this.localName === 'table' || this.localName === 'tbody' || this.localName === 'tfoot' || this.localName === 'thead' || this.localName === 'tr'; } return false; }; StackItem.prototype.isNumberedHeader = function() { if (this.namespaceURI === "http://www.w3.org/1999/xhtml") { return this.localName === 'h1' || this.localName === 'h2' || this.localName === 'h3' || this.localName === 'h4' || this.localName === 'h5' || this.localName === 'h6'; } return false; }; StackItem.prototype.isForeign = function() { return this.namespaceURI != "http://www.w3.org/1999/xhtml"; }; function getAttribute(item, name) { for (var i = 0; i < item.attributes.length; i++) { if (item.attributes[i].nodeName == name) return item.attributes[i].nodeValue; } return null; } StackItem.prototype.isHtmlIntegrationPoint = function() { if (this.namespaceURI === "http://www.w3.org/1998/Math/MathML") { if (this.localName !== "annotation-xml") return false; var encoding = getAttribute(this, 'encoding'); if (!encoding) return false; encoding = encoding.toLowerCase(); return encoding === "text/html" || encoding === "application/xhtml+xml"; } if (this.namespaceURI === "http://www.w3.org/2000/svg") { return this.localName === "foreignObject" || this.localName === "desc" || this.localName === "title"; } return false; }; StackItem.prototype.isMathMLTextIntegrationPoint = function() { if (this.namespaceURI === "http://www.w3.org/1998/Math/MathML") { return this.localName === "mi" || this.localName === "mo" || this.localName === "mn" || this.localName === "ms" || this.localName === "mtext"; } return false; }; exports.StackItem = StackItem; }, {}], 5:[function(_dereq_,module,exports){ var InputStream = _dereq_('./InputStream').InputStream; var EntityParser = _dereq_('./EntityParser').EntityParser; function isWhitespace(c){ return c === " " || c === "\n" || c === "\t" || c === "\r" || c === "\f"; } function isAlpha(c) { return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); } /** * * @param {Object} tokenHandler * @constructor */ function Tokenizer(tokenHandler) { this._tokenHandler = tokenHandler; this._state = Tokenizer.DATA; this._inputStream = new InputStream(); this._currentToken = null; this._temporaryBuffer = ''; this._additionalAllowedCharacter = ''; } Tokenizer.prototype._parseError = function(code, args) { this._tokenHandler.parseError(code, args); }; Tokenizer.prototype._emitToken = function(token) { if (token.type === 'StartTag') { for (var i = 1; i < token.data.length; i++) { if (!token.data[i].nodeName) token.data.splice(i--, 1); } } else if (token.type === 'EndTag') { if (token.selfClosing) { this._parseError('self-closing-flag-on-end-tag'); } if (token.data.length !== 0) { this._parseError('attributes-in-end-tag'); } } this._tokenHandler.processToken(token); if (token.type === 'StartTag' && token.selfClosing && !this._tokenHandler.isSelfClosingFlagAcknowledged()) { this._parseError('non-void-element-with-trailing-solidus', {name: token.name}); } }; Tokenizer.prototype._emitCurrentToken = function() { this._state = Tokenizer.DATA; this._emitToken(this._currentToken); }; Tokenizer.prototype._currentAttribute = function() { return this._currentToken.data[this._currentToken.data.length - 1]; }; Tokenizer.prototype.setState = function(state) { this._state = state; }; Tokenizer.prototype.tokenize = function(source) { // FIXME proper tokenizer states Tokenizer.DATA = data_state; Tokenizer.RCDATA = rcdata_state; Tokenizer.RAWTEXT = rawtext_state; Tokenizer.SCRIPT_DATA = script_data_state; Tokenizer.PLAINTEXT = plaintext_state; this._state = Tokenizer.DATA; this._inputStream.append(source); this._tokenHandler.startTokenization(this); this._inputStream.eof = true; var tokenizer = this; while (this._state.call(this, this._inputStream)); function data_state(buffer) { var data = buffer.char(); if (data === InputStream.EOF) { tokenizer._emitToken({type: 'EOF', data: null}); return false; } else if (data === '&') { tokenizer.setState(character_reference_in_data_state); } else if (data === '<') { tokenizer.setState(tag_open_state); } else if (data === '\u0000') { tokenizer._emitToken({type: 'Characters', data: data}); buffer.commit(); } else { var chars = buffer.matchUntil("&|<|\u0000"); tokenizer._emitToken({type: 'Characters', data: data + chars}); buffer.commit(); } return true; } function character_reference_in_data_state(buffer) { var character = EntityParser.consumeEntity(buffer, tokenizer); tokenizer.setState(data_state); tokenizer._emitToken({type: 'Characters', data: character || '&'}); return true; } function rcdata_state(buffer) { var data = buffer.char(); if (data === InputStream.EOF) { tokenizer._emitToken({type: 'EOF', data: null}); return false; } else if (data === '&') { tokenizer.setState(character_reference_in_rcdata_state); } else if (data === '<') { tokenizer.setState(rcdata_less_than_sign_state); } else if (data === "\u0000") { tokenizer._parseError("invalid-codepoint"); tokenizer._emitToken({type: 'Characters', data: '\uFFFD'}); buffer.commit(); } else { var chars = buffer.matchUntil("&|<|\u0000"); tokenizer._emitToken({type: 'Characters', data: data + chars}); buffer.commit(); } return true; } function character_reference_in_rcdata_state(buffer) { var character = EntityParser.consumeEntity(buffer, tokenizer); tokenizer.setState(rcdata_state); tokenizer._emitToken({type: 'Characters', data: character || '&'}); return true; } function rawtext_state(buffer) { var data = buffer.char(); if (data === InputStream.EOF) { tokenizer._emitToken({type: 'EOF', data: null}); return false; } else if (data === '<') { tokenizer.setState(rawtext_less_than_sign_state); } else if (data === "\u0000") { tokenizer._parseError("invalid-codepoint"); tokenizer._emitToken({type: 'Characters', data: '\uFFFD'}); buffer.commit(); } else { var chars = buffer.matchUntil("<|\u0000"); tokenizer._emitToken({type: 'Characters', data: data + chars}); } return true; } function plaintext_state(buffer) { var data = buffer.char(); if (data === InputStream.EOF) { tokenizer._emitToken({type: 'EOF', data: null}); return false; } else if (data === "\u0000") { tokenizer._parseError("invalid-codepoint"); tokenizer._emitToken({type: 'Characters', data: '\uFFFD'}); buffer.commit(); } else { var chars = buffer.matchUntil("\u0000"); tokenizer._emitToken({type: 'Characters', data: data + chars}); } return true; } function script_data_state(buffer) { var data = buffer.char(); if (data === InputStream.EOF) { tokenizer._emitToken({type: 'EOF', data: null}); return false; } else if (data === '<') { tokenizer.setState(script_data_less_than_sign_state); } else if (data === '\u0000') { tokenizer._parseError("invalid-codepoint"); tokenizer._emitToken({type: 'Characters', data: '\uFFFD'}); buffer.commit(); } else { var chars = buffer.matchUntil("<|\u0000"); tokenizer._emitToken({type: 'Characters', data: data + chars}); } return true; } function rcdata_less_than_sign_state(buffer) { var data = buffer.char(); if (data === "/") { this._temporaryBuffer = ''; tokenizer.setState(rcdata_end_tag_open_state); } else { tokenizer._emitToken({type: 'Characters', data: '<'}); buffer.unget(data); tokenizer.setState(rcdata_state); } return true; } function rcdata_end_tag_open_state(buffer) { var data = buffer.char(); if (isAlpha(data)) { this._temporaryBuffer += data; tokenizer.setState(rcdata_end_tag_name_state); } else { tokenizer._emitToken({type: 'Characters', data: '</'}); buffer.unget(data); tokenizer.setState(rcdata_state); } return true; } function rcdata_end_tag_name_state(buffer) { var appropriate = tokenizer._currentToken && (tokenizer._currentToken.name === this._temporaryBuffer.toLowerCase()); var data = buffer.char(); if (isWhitespace(data) && appropriate) { tokenizer._currentToken = {type: 'EndTag', name: this._temporaryBuffer, data: [], selfClosing: false}; tokenizer.setState(before_attribute_name_state); } else if (data === '/' && appropriate) { tokenizer._currentToken = {type: 'EndTag', name: this._temporaryBuffer, data: [], selfClosing: false}; tokenizer.setState(self_closing_tag_state); } else if (data === '>' && appropriate) { tokenizer._currentToken = {type: 'EndTag', name: this._temporaryBuffer, data: [], selfClosing: false}; tokenizer._emitCurrentToken(); tokenizer.setState(data_state); } else if (isAlpha(data)) { this._temporaryBuffer += data; buffer.commit(); } else { tokenizer._emitToken({type: 'Characters', data: '</' + this._temporaryBuffer}); buffer.unget(data); tokenizer.setState(rcdata_state); } return true; } function rawtext_less_than_sign_state(buffer) { var data = buffer.char(); if (data === "/") { this._temporaryBuffer = ''; tokenizer.setState(rawtext_end_tag_open_state); } else { tokenizer._emitToken({type: 'Characters', data: '<'}); buffer.unget(data); tokenizer.setState(rawtext_state); } return true; } function rawtext_end_tag_open_state(buffer) { var data = buffer.char(); if (isAlpha(data)) { this._temporaryBuffer += data; tokenizer.setState(rawtext_end_tag_name_state); } else { tokenizer._emitToken({type: 'Characters', data: '</'}); buffer.unget(data); tokenizer.setState(rawtext_state); } return true; } function rawtext_end_tag_name_state(buffer) { var appropriate = tokenizer._currentToken && (tokenizer._currentToken.name === this._temporaryBuffer.toLowerCase()); var data = buffer.char(); if (isWhitespace(data) && appropriate) { tokenizer._currentToken = {type: 'EndTag', name: this._temporaryBuffer, data: [], selfClosing: false}; tokenizer.setState(before_attribute_name_state); } else if (data === '/' && appropriate) { tokenizer._currentToken = {type: 'EndTag', name: this._temporaryBuffer, data: [], selfClosing: false}; tokenizer.setState(self_closing_tag_state); } else if (data === '>' && appropriate) { tokenizer._currentToken = {type: 'EndTag', name: this._temporaryBuffer, data: [], selfClosing: false}; tokenizer._emitCurrentToken(); tokenizer.setState(data_state); } else if (isAlpha(data)) { this._temporaryBuffer += data; buffer.commit(); } else { tokenizer._emitToken({type: 'Characters', data: '</' + this._temporaryBuffer}); buffer.unget(data); tokenizer.setState(rawtext_state); } return true; } function script_data_less_than_sign_state(buffer) { var data = buffer.char(); if (data === "/") { this._temporaryBuffer = ''; tokenizer.setState(script_data_end_tag_open_state); } else if (data === '!') { tokenizer._emitToken({type: 'Characters', data: '<!'}); tokenizer.setState(script_data_escape_start_state); } else { tokenizer._emitToken({type: 'Characters', data: '<'}); buffer.unget(data); tokenizer.setState(script_data_state); } return true; } function script_data_end_tag_open_state(buffer) { var data = buffer.char(); if (isAlpha(data)) { this._temporaryBuffer += data; tokenizer.setState(script_data_end_tag_name_state); } else { tokenizer._emitToken({type: 'Characters', data: '</'}); buffer.unget(data); tokenizer.setState(script_data_state); } return true; } function script_data_end_tag_name_state(buffer) { var appropriate = tokenizer._currentToken && (tokenizer._currentToken.name === this._temporaryBuffer.toLowerCase()); var data = buffer.char(); if (isWhitespace(data) && appropriate) { tokenizer._currentToken = {type: 'EndTag', name: 'script', data: [], selfClosing: false}; tokenizer.setState(before_attribute_name_state); } else if (data === '/' && appropriate) { tokenizer._currentToken = {type: 'EndTag', name: 'script', data: [], selfClosing: false}; tokenizer.setState(self_closing_tag_state); } else if (data === '>' && appropriate) { tokenizer._currentToken = {type: 'EndTag', name: 'script', data: [], selfClosing: false}; tokenizer._emitCurrentToken(); } else if (isAlpha(data)) { this._temporaryBuffer += data; buffer.commit(); } else { tokenizer._emitToken({type: 'Characters', data: '</' + this._temporaryBuffer}); buffer.unget(data); tokenizer.setState(script_data_state); } return true; } function script_data_escape_start_state(buffer) { var data = buffer.char(); if (data === '-') { tokenizer._emitToken({type: 'Characters', data: '-'}); tokenizer.setState(script_data_escape_start_dash_state); } else { buffer.unget(data); tokenizer.setState(script_data_state); } return true; } function script_data_escape_start_dash_state(buffer) { var data = buffer.char(); if (data === '-') { tokenizer._emitToken({type: 'Characters', data: '-'}); tokenizer.setState(script_data_escaped_dash_dash_state); } else { buffer.unget(data); tokenizer.setState(script_data_state); } return true; } function script_data_escaped_state(buffer) { var data = buffer.char(); if (data === InputStream.EOF) { buffer.unget(data); tokenizer.setState(data_state); } else if (data === '-') { tokenizer._emitToken({type: 'Characters', data: '-'}); tokenizer.setState(script_data_escaped_dash_state); } else if (data === '<') { tokenizer.setState(script_data_escaped_less_then_sign_state); } else if (data === '\u0000') { tokenizer._parseError("invalid-codepoint"); tokenizer._emitToken({type: 'Characters', data: '\uFFFD'}); buffer.commit(); } else { var chars = buffer.matchUntil('<|-|\u0000'); tokenizer._emitToken({type: 'Characters', data: data + chars}); } return true; } function script_data_escaped_dash_state(buffer) { var data = buffer.char(); if (data === InputStream.EOF) { buffer.unget(data); tokenizer.setState(data_state); } else if (data === '-') { tokenizer._emitToken({type: 'Characters', data: '-'}); tokenizer.setState(script_data_escaped_dash_dash_state); } else if (data === '<') { tokenizer.setState(script_data_escaped_less_then_sign_state); } else if (data === '\u0000') { tokenizer._parseError("invalid-codepoint"); tokenizer._emitToken({type: 'Characters', data: '\uFFFD'}); tokenizer.setState(script_data_escaped_state); } else { tokenizer._emitToken({type: 'Characters', data: data}); tokenizer.setState(script_data_escaped_state); } return true; } function script_data_escaped_dash_dash_state(buffer) { var data = buffer.char(); if (data === InputStream.EOF) { tokenizer._parseError('eof-in-script'); buffer.unget(data); tokenizer.setState(data_state); } else if (data === '<') { tokenizer.setState(script_data_escaped_less_then_sign_state); } else if (data === '>') { tokenizer._emitToken({type: 'Characters', data: '>'}); tokenizer.setState(script_data_state); } else if (data === '\u0000') { tokenizer._parseError("invalid-codepoint"); tokenizer._emitToken({type: 'Characters', data: '\uFFFD'}); tokenizer.setState(script_data_escaped_state); } else { tokenizer._emitToken({type: 'Characters', data: data}); tokenizer.setState(script_data_escaped_state); } return true; } function script_data_escaped_less_then_sign_state(buffer) { var data = buffer.char(); if (data === '/') { this._temporaryBuffer = ''; tokenizer.setState(script_data_escaped_end_tag_open_state); } else if (isAlpha(data)) { tokenizer._emitToken({type: 'Characters', data: '<' + data}); this._temporaryBuffer = data; tokenizer.setState(script_data_double_escape_start_state); } else { tokenizer._emitToken({type: 'Characters', data: '<'}); buffer.unget(data); tokenizer.setState(script_data_escaped_state); } return true; } function script_data_escaped_end_tag_open_state(buffer) { var data = buffer.char(); if (isAlpha(data)) { this._temporaryBuffer = data; tokenizer.setState(script_data_escaped_end_tag_name_state); } else { tokenizer._emitToken({type: 'Characters', data: '</'}); buffer.unget(data); tokenizer.setState(script_data_escaped_state); } return true; } function script_data_escaped_end_tag_name_state(buffer) { var appropriate = tokenizer._currentToken && (tokenizer._currentToken.name === this._temporaryBuffer.toLowerCase()); var data = buffer.char(); if (isWhitespace(data) && appropriate) { tokenizer._currentToken = {type: 'EndTag', name: 'script', data: [], selfClosing: false}; tokenizer.setState(before_attribute_name_state); } else if (data === '/' && appropriate) { tokenizer._currentToken = {type: 'EndTag', name: 'script', data: [], selfClosing: false}; tokenizer.setState(self_closing_tag_state); } else if (data === '>' && appropriate) { tokenizer._currentToken = {type: 'EndTag', name: 'script', data: [], selfClosing: false}; tokenizer.setState(data_state); tokenizer._emitCurrentToken(); } else if (isAlpha(data)) { this._temporaryBuffer += data; buffer.commit(); } else { tokenizer._emitToken({type: 'Characters', data: '</' + this._temporaryBuffer}); buffer.unget(data); tokenizer.setState(script_data_escaped_state); } return true; } function script_data_double_escape_start_state(buffer) { var data = buffer.char(); if (isWhitespace(data) || data === '/' || data === '>') { tokenizer._emitToken({type: 'Characters', data: data}); if (this._temporaryBuffer.toLowerCase() === 'script') tokenizer.setState(script_data_double_escaped_state); else tokenizer.setState(script_data_escaped_state); } else if (isAlpha(data)) { tokenizer._emitToken({type: 'Characters', data: data}); this._temporaryBuffer += data; buffer.commit(); } else { buffer.unget(data); tokenizer.setState(script_data_escaped_state); } return true; } function script_data_double_escaped_state(buffer) { var data = buffer.char(); if (data === InputStream.EOF) { tokenizer._parseError('eof-in-script'); buffer.unget(data); tokenizer.setState(data_state); } else if (data === '-') { tokenizer._emitToken({type: 'Characters', data: '-'}); tokenizer.setState(script_data_double_escaped_dash_state); } else if (data === '<') { tokenizer._emitToken({type: 'Characters', data: '<'}); tokenizer.setState(script_data_double_escaped_less_than_sign_state); } else if (data === '\u0000') { tokenizer._parseError('invalid-codepoint'); tokenizer._emitToken({type: 'Characters', data: '\uFFFD'}); buffer.commit(); } else { tokenizer._emitToken({type: 'Characters', data: data}); buffer.commit(); } return true; } function script_data_double_escaped_dash_state(buffer) { var data = buffer.char(); if (data === InputStream.EOF) { tokenizer._parseError('eof-in-script'); buffer.unget(data); tokenizer.setState(data_state); } else if (data === '-') { tokenizer._emitToken({type: 'Characters', data: '-'}); tokenizer.setState(script_data_double_escaped_dash_dash_state); } else if (data === '<') { tokenizer._emitToken({type: 'Characters', data: '<'}); tokenizer.setState(script_data_double_escaped_less_than_sign_state); } else if (data === '\u0000') { tokenizer._parseError('invalid-codepoint'); tokenizer._emitToken({type: 'Characters', data: '\uFFFD'}); tokenizer.setState(script_data_double_escaped_state); } else { tokenizer._emitToken({type: 'Characters', data: data}); tokenizer.setState(script_data_double_escaped_state); } return true; } function script_data_double_escaped_dash_dash_state(buffer) { var data = buffer.char(); if (data === InputStream.EOF) { tokenizer._parseError('eof-in-script'); buffer.unget(data); tokenizer.setState(data_state); } else if (data === '-') { tokenizer._emitToken({type: 'Characters', data: '-'}); buffer.commit(); } else if (data === '<') { tokenizer._emitToken({type: 'Characters', data: '<'}); tokenizer.setState(script_data_double_escaped_less_than_sign_state); } else if (data === '>') { tokenizer._emitToken({type: 'Characters', data: '>'}); tokenizer.setState(script_data_state); } else if (data === '\u0000') { tokenizer._parseError('invalid-codepoint'); tokenizer._emitToken({type: 'Characters', data: '\uFFFD'}); tokenizer.setState(script_data_double_escaped_state); } else { tokenizer._emitToken({type: 'Characters', data: data}); tokenizer.setState(script_data_double_escaped_state); } return true; } function script_data_double_escaped_less_than_sign_state(buffer) { var data = buffer.char(); if (data === '/') { tokenizer._emitToken({type: 'Characters', data: '/'}); this._temporaryBuffer = ''; tokenizer.setState(script_data_double_escape_end_state); } else { buffer.unget(data); tokenizer.setState(script_data_double_escaped_state); } return true; } function script_data_double_escape_end_state(buffer) { var data = buffer.char(); if (isWhitespace(data) || data === '/' || data === '>') { tokenizer._emitToken({type: 'Characters', data: data}); if (this._temporaryBuffer.toLowerCase() === 'script') tokenizer.setState(script_data_escaped_state); else tokenizer.setState(script_data_double_escaped_state); } else if (isAlpha(data)) { tokenizer._emitToken({type: 'Characters', data: data}); this._temporaryBuffer += data; buffer.commit(); } else { buffer.unget(data); tokenizer.setState(script_data_double_escaped_state); } return true; } function tag_open_state(buffer) { var data = buffer.char(); if (data === InputStream.EOF) { tokenizer._parseError("bare-less-than-sign-at-eof"); tokenizer._emitToken({type: 'Characters', data: '<'}); buffer.unget(data); tokenizer.setState(data_state); } else if (isAlpha(data)) { tokenizer._currentToken = {type: 'StartTag', name: data.toLowerCase(), data: []}; tokenizer.setState(tag_name_state); } else if (data === '!') { tokenizer.setState(markup_declaration_open_state); } else if (data === '/') { tokenizer.setState(close_tag_open_state); } else if (data === '>') { // XXX In theory it could be something besides a tag name. But // do we really care? tokenizer._parseError("expected-tag-name-but-got-right-bracket"); tokenizer._emitToken({type: 'Characters', data: "<>"}); tokenizer.setState(data_state); } else if (data === '?') { // XXX In theory it could be something besides a tag name. But // do we really care? tokenizer._parseError("expected-tag-name-but-got-question-mark"); buffer.unget(data); tokenizer.setState(bogus_comment_state); } else { // XXX tokenizer._parseError("expected-tag-name"); tokenizer._emitToken({type: 'Characters', data: "<"}); buffer.unget(data); tokenizer.setState(data_state); } return true; } function close_tag_open_state(buffer) { var data = buffer.char(); if (data === InputStream.EOF) { tokenizer._parseError("expected-closing-tag-but-got-eof"); tokenizer._emitToken({type: 'Characters', data: '</'}); buffer.unget(data); tokenizer.setState(data_state); } else if (isAlpha(data)) { tokenizer._currentToken = {type: 'EndTag', name: data.toLowerCase(), data: []}; tokenizer.setState(tag_name_state); } else if (data === '>') { tokenizer._parseError("expected-closing-tag-but-got-right-bracket"); tokenizer.setState(data_state); } else { tokenizer._parseError("expected-closing-tag-but-got-char", {data: data}); // param 1 is datavars: buffer.unget(data); tokenizer.setState(bogus_comment_state); } return true; } function tag_name_state(buffer) { var data = buffer.char(); if (data === InputStream.EOF) { tokenizer._parseError('eof-in-tag-name'); buffer.unget(data); tokenizer.setState(data_state); } else if (isWhitespace(data)) { tokenizer.setState(before_attribute_name_state); } else if (isAlpha(data)) { tokenizer._currentToken.name += data.toLowerCase(); } else if (data === '>') { tokenizer._emitCurrentToken(); } else if (data === '/') { tokenizer.setState(self_closing_tag_state); } else if (data === '\u0000') { tokenizer._parseError("invalid-codepoint"); tokenizer._currentToken.name += "\uFFFD"; } else { tokenizer._currentToken.name += data; } buffer.commit(); return true; } function before_attribute_name_state(buffer) { var data = buffer.char(); if (data === InputStream.EOF) { tokenizer._parseError("expected-attribute-name-but-got-eof"); buffer.unget(data); tokenizer.setState(data_state); } else if (isWhitespace(data)) { return true; } else if (isAlpha(data)) { tokenizer._currentToken.data.push({nodeName: data.toLowerCase(), nodeValue: ""}); tokenizer.setState(attribute_name_state); } else if (data === '>') { tokenizer._emitCurrentToken(); } else if (data === '/') { tokenizer.setState(self_closing_tag_state); } else if (data === "'" || data === '"' || data === '=' || data === '<') { tokenizer._parseError("invalid-character-in-attribute-name"); tokenizer._currentToken.data.push({nodeName: data, nodeValue: ""}); tokenizer.setState(attribute_name_state); } else if (data === '\u0000') { tokenizer._parseError("invalid-codepoint"); tokenizer._currentToken.data.push({nodeName: "\uFFFD", nodeValue: ""}); } else { tokenizer._currentToken.data.push({nodeName: data, nodeValue: ""}); tokenizer.setState(attribute_name_state); } return true; } function attribute_name_state(buffer) { var data = buffer.char(); var leavingThisState = true; var shouldEmit = false; if (data === InputStream.EOF) { tokenizer._parseError("eof-in-attribute-name"); buffer.unget(data); tokenizer.setState(data_state); shouldEmit = true; } else if (data === '=') { tokenizer.setState(before_attribute_value_state); } else if (isAlpha(data)) { tokenizer._currentAttribute().nodeName += data.toLowerCase(); leavingThisState = false; } else if (data === '>') { // XXX If we emit here the attributes are converted to a dict // without being checked and when the code below runs we error // because data is a dict not a list shouldEmit = true; } else if (isWhitespace(data)) { tokenizer.setState(after_attribute_name_state); } else if (data === '/') { tokenizer.setState(self_closing_tag_state); } else if (data === "'" || data === '"') { tokenizer._parseError("invalid-character-in-attribute-name"); tokenizer._currentAttribute().nodeName += data; leavingThisState = false; } else if (data === '\u0000') { tokenizer._parseError("invalid-codepoint"); tokenizer._currentAttribute().nodeName += "\uFFFD"; } else { tokenizer._currentAttribute().nodeName += data; leavingThisState = false; } if (leavingThisState) { // Attributes are not dropped at this stage. That happens when the // start tag token is emitted so values can still be safely appended // to attributes, but we do want to report the parse error in time. var attributes = tokenizer._currentToken.data; var currentAttribute = attributes[attributes.length - 1]; for (var i = attributes.length - 2; i >= 0; i--) { if (currentAttribute.nodeName === attributes[i].nodeName) { tokenizer._parseError("duplicate-attribute", {name: currentAttribute.nodeName}); currentAttribute.nodeName = null; break; } } if (shouldEmit) tokenizer._emitCurrentToken(); } else { buffer.commit(); } return true; } function after_attribute_name_state(buffer) { var data = buffer.char(); if (data === InputStream.EOF) { tokenizer._parseError("expected-end-of-tag-but-got-eof"); buffer.unget(data); tokenizer.setState(data_state); } else if (isWhitespace(data)) { return true; } else if (data === '=') { tokenizer.setState(before_attribute_value_state); } else if (data === '>') { tokenizer._emitCurrentToken(); } else if (isAlpha(data)) { tokenizer._currentToken.data.push({nodeName: data, nodeValue: ""}); tokenizer.setState(attribute_name_state); } else if (data === '/') { tokenizer.setState(self_closing_tag_state); } else if (data === "'" || data === '"' || data === '<') { tokenizer._parseError("invalid-character-after-attribute-name"); tokenizer._currentToken.data.push({nodeName: data, nodeValue: ""}); tokenizer.setState(attribute_name_state); } else if (data === '\u0000') { tokenizer._parseError("invalid-codepoint"); tokenizer._currentToken.data.push({nodeName: "\uFFFD", nodeValue: ""}); } else { tokenizer._currentToken.data.push({nodeName: data, nodeValue: ""}); tokenizer.setState(attribute_name_state); } return true; } function before_attribute_value_state(buffer) { var data = buffer.char(); if (data === InputStream.EOF) { tokenizer._parseError("expected-attribute-value-but-got-eof"); buffer.unget(data); tokenizer.setState(data_state); } else if (isWhitespace(data)) { return true; } else if (data === '"') { tokenizer.setState(attribute_value_double_quoted_state); } else if (data === '&') { tokenizer.setState(attribute_value_unquoted_state); buffer.unget(data); } else if (data === "'") { tokenizer.setState(attribute_value_single_quoted_state); } else if (data === '>') { tokenizer._parseError("expected-attribute-value-but-got-right-bracket"); tokenizer._emitCurrentToken(); } else if (data === '=' || data === '<' || data === '`') { tokenizer._parseError("unexpected-character-in-unquoted-attribute-value"); tokenizer._currentAttribute().nodeValue += data; tokenizer.setState(attribute_value_unquoted_state); } else if (data === '\u0000') { tokenizer._parseError("invalid-codepoint"); tokenizer._currentAttribute().nodeValue += "\uF