ares-ide
Version:
A browser-based code editor and UI designer for Enyo 2 projects
223 lines (221 loc) • 7.31 kB
JavaScript
//* @protected
enyo.kind({
name: "analyzer.AbstractLexer",
kind: null,
constructor: function(inText) {
if (inText) {
this.start(inText);
this.finish();
return this.r;
}
},
p0: 0,
p: 0,
start: function(inS) {
this.s = inS;
this.l = this.s.length;
this.r = [ ];
this.d = '';
this.p0 = 0;
this.p = 0;
this.n = 0;
this.analyze();
},
//
// analyze() is abstract
//
search: function(inRegEx) {
// make sure inRegEx has global flag
var r = inRegEx.global ? inRegEx : new RegExp(inRegEx.source, "g");
// install our search position
r.lastIndex = this.p;
// accumulate characters until we match some delimiter
this.m = r.exec(this.s);
// m.index is the 0-based index of the match
this.p = this.m ? this.m.index : -1;
// p0 marks the start of unconsumed characters
// p marks the start of the new delimeter
// <token><delimeter>
// p0<----->p
// d is the first character of <delimeter>, return d, or null if no matches
return (r.lastIndex != this.p0) && (this.d = this.s.charAt(this.p));
},
// examine the character inCount ahead of the current position
lookahead: function(inCount) {
return this.s.charAt(this.p + inCount);
},
// extract the token between positions p0 and p1
getToken: function() {
return this.s.slice(this.p0, this.p);
},
// move the position (p) by inCount characters (i.e. add inCount characters to token)
tokenize: function(inCount) {
this.p += inCount || 0;
},
// push a token with kind: inKind
// inD (optional) specifies a number of characters to add to the token before pushing
// inAllowEmpty: unless true, 0 length tokens are a no-op
pushToken: function(inKind, inCount, inAllowEmpty) {
// move the position (p) by inCount characters (i.e. add inCount characters to token)
this.tokenize(inCount);
// copy the token between p0 and p
var token = this.getToken();
// if the token is empty string, immediately return an empty object
if (!token && !inAllowEmpty) {
return {};
}
// counting newlines?
var nLines = (token.match(/\n/g) || []).length;
// make a token object with lots of meta-data
var mToken = { kind: inKind, token: token, start: this.p0, end: this.p, line: this.n, height: nLines };
// push the token descriptor onto the result stack
this.r.push(mToken);
// accumulate line count
this.n += nLines;
// bump the starting position pointer
this.p0 = this.p;
// return the token descriptor
return mToken;
},
// inD (optional) specifies a number of characters to add to the token before tossing
tossToken: function(inCount) {
// move the position (p) by inCount characters (i.e. add inCount characters to token)
this.tokenize(inCount);
// bump the starting position pointer
this.p0 = this.p;
},
finish: function() {
// FIXME: what did this do?
//this.t += this.s;
// FIXME: if there is left over text, push it as 'gah' type
this.pushToken("gah");
}
});
enyo.kind({
name: "analyzer.Lexer",
kind: analyzer.AbstractLexer,
symbols: "(){}[];,:<>+-=*/&",
operators: [ "++", "--", "+=", "-=", "==", "!=", "<=", ">=", "===", "&&", "||", '"', "'"],
keywords: [ "function", "new", "return", "if", "else", "while", "do", "break", "continue", "switch", "case", "var" ],
constructor: function(inText) {
this.buildPattern();
return this.inherited(arguments);
},
buildPattern: function() {
// match an inline regex
var rregex = "\\/[^\/*[](?:[^\\/\\\\\\r\n]|\\\\.)+\\/\\w*";
//
// matches double-quoted string that may contain escaped double-quotes
var rstring1 = '"(?:\\\\"|[^"])*?"';
// matches single-quoted string that may contain escaped single-quotes
var rstring2 = "'(?:\\\\'|[^'])*?'";
// matches either type of string
var rstring = rstring1 + "|" + rstring2;
//
// matches any of the keywords (\b only matches on word boundaries)
var rkeys = '\\b(?:' + this.keywords.join('|') + ')\\b';
//
// match symbols and operators (code here escapes the symbol characters for use in regex)
var rsymbols = '[\\' + this.symbols.split('').join('\\') + ']';
var rops = [];
for (var i=0, o; (o=this.operators[i]); i++) {
rops.push('\\' + o.split('').join('\\'));
}
rops = rops.join('|');
//rsymbols += '|' + rops;
// match rops first (greedy, "<=" instead of "<", "=")
rsymbols = rops + "|" + rsymbols;
//console.log(rsymbols);
//
// these are all the patterns to match
//var matches = [rstring1, rstring2, rkeys, '\\/\\/', '\\/\\*', /*rregex,*/ rsymbols, "'\"", '\\s'];
// these are the matching methods corresponding to the patterns above
//this.matchers = ["doString", "doString", "doKeyword", "doLineComment", "doCComment", /*"doRegExp",*/ "doSymbol", "doLiteral", "doWhitespace"];
//
//
// these are the patterns to match
// match escape sequences \" and \/ first to help defray confusion
var matches = ["\\\\\"|\\\\/", rregex, rstring, rkeys, '\\/\\/', '\\/\\*', rsymbols, "\\s"];
// these are the matching methods corresponding to the patterns above
this.matchers = ["doSymbol", "doRegex", "doString", "doKeyword", "doLineComment", "doCComment", "doSymbol", "doWhitespace"];
//
//
// construct the master regex as a union of the patterns above
this.pattern = '(' + matches.join(')|(') + ')';
//console.log(this.pattern);
},
analyze: function() {
var regex = new RegExp(this.pattern, "gi");
while (this.search(regex)) {
// any characters between where we were and the latest delimeter we call an identifier
this.pushToken("identifier");
// process the input stream based on the matched delimeter
this.process(this.matchers);
// any characters between where we were and the latest delimeter we call an identifier
this.pushToken("identifier");
}
},
process: function(inMatchers) {
for (var i=0, f; (f=inMatchers[i]); i++) {
if (this.m[i+1] && this[f]) {
this[f].apply(this);
return;
}
}
this.doSymbol();
},
doWhitespace: function() {
// we saw at least one ws character, so consume it
this.tokenize(1);
// consume any additional whitespace (i.e. all characters up to the first non-ws [\S])
this.search(/\S/g);
// push all such characters as a ws token
this.pushToken('ws');
// remove the actual token (don't capture whitespace)
this.r.pop();
},
doEscape: function() {
this.tokenize(2);
},
doLiteral: function() {
this.tossToken(1);
var delim = this.d;
var rx = new RegExp("\\" + delim + "|\\\\", "g");
while (this.search(rx)) {
switch (this.d) {
case '\\':
this.doEscape();
break;
default:
this.pushToken('literal', 0, true).delimiter = delim;
this.tossToken(1);
return;
}
}
},
doSymbol: function() {
this.pushToken((this.d==';' || this.d==',') ? "terminal" : "symbol", this.m[0].length);
},
doKeyword: function() {
this.pushToken("keyword", this.m[0].length);
},
doLineComment: function() {
this.tokenize(2);
if (this.search(/[\r\n]/g)) {
this.tokenize(0);
}
this.pushToken("comment");
},
doCComment: function() {
this.tokenize(2); // consume '/*'
this.search(/\*\//); // search for next '*/'
this.tokenize(2); // consume '*/'
this.pushToken("comment");
},
doString: function() {
this.pushToken("string", this.m[0].length);
},
doRegex: function() {
this.pushToken("regex", this.m[0].length);
}
});