convo
Version:
Easily create conversations (for more natural bots)
375 lines (316 loc) • 8.16 kB
JavaScript
'use strict';
/**
* Module Dependencies
*/
var props = require('component-props')
var rtemplate = /([^\\])?{([^\}]+)}/g
var error = require('jade-error')
/**
* Export the Lexer
*/
module.exports = function lex(str, filename, options) {
var lexer = new Lexer(str, filename, options);
return JSON.parse(JSON.stringify(lexer.getTokens()));
}
/**
* Initialize `Lexer` with the given `str`.
*
* @param {String} str
* @param {String} filename
* @api private
*/
function Lexer(str, filename, options) {
options = options || {};
// strip any UTF-8 BOM off of the start of `str`, if it exists.
str = str.replace(/^\uFEFF/, '');
this.input = str.replace(/\r\n|\r/g, '\n');
this.originalInput = this.input;
this.lineno = options.startingLine || 1;
this.colno = options.startingColumn || 1;
this.indentStack = [0];
this.indentRe = null;
this.ended = false;
this.tokens = [];
};
Lexer.prototype = {
getTokens() {
while (!this.ended) {
this.advance()
}
return this.tokens
},
advance() {
var tok = this.blank()
|| this.comment()
|| this.uparrow()
|| this.arrow()
|| this.regexp()
|| this.alias()
|| this.indent()
|| this.literal_string()
|| this.variable()
|| this.eos()
|| this.fail()
return tok
},
/**
* Construct a token with the given `type` and `val`.
*
* @param {String} type
* @param {String} val
* @return {Object}
* @api private
*/
tok(type, val){
var res = {type: type, line: this.lineno, col: this.colno};
if (val !== undefined) res.val = val;
return res;
},
/**
* Increment `this.lineno` and reset `this.colno`.
*
* @param {Number} increment
* @api private
*/
incrementLine(increment) {
this.lineno += increment;
if (increment) this.colno = 1;
},
/**
* Increment `this.colno`.
*
* @param {Number} increment
* @api private
*/
incrementColumn(increment) {
this.colno += increment
},
/**
* Consume the given `len` of input.
*
* @param {Number} len
* @api private
*/
consume(len) {
this.input = this.input.substr(len);
},
/**
* Blank line.
*/
blank() {
var captures;
if (captures = /^\n[ \t]*\n/.exec(this.input)) {
this.consume(captures[0].length - 1);
this.incrementLine(1);
return 'blank';
}
},
/**
* Comment
*/
comment() {
var captures;
if (captures = /^\s*\/\/[^\n]*/.exec(this.input)) {
this.consume(captures[0].length);
this.incrementLine(1);
return 'comment';
}
},
/**
* String literal
*/
literal_string() {
var captures;
if (captures = /^('([^'\\]*\\.)*[^']*'|"([^"\\]*\\.)*[^"]*)"/.exec(this.input)) {
this.consume(captures[0].length);
this.incrementColumn(captures[0].length)
var string = captures[0].slice(1, -1)
string = string.replace(rtemplate, function(m, m1, m2) {
m1 = m1 || ''
m2 = props(m2).reduce(function (str, value, key) {
str = str.replace(value, '_["' + value + '"]')
return str
}, m2)
return m1 + '${' + m2 + '}'
})
var tok = this.tok('string', string)
this.tokens.push(tok)
return 'literal_string'
}
},
/**
* Variable
*/
variable() {
var captures;
if (captures = /^[A-Za-z_$][A-Za-z0-9_$]*/.exec(this.input)) {
this.consume(captures[0].length);
this.incrementColumn(captures[0].length)
var tok = this.tok('variable', captures[0])
this.tokens.push(tok)
return 'variable'
}
},
/**
* Regular Expression
*/
regexp() {
var captures;
if (captures = /^\//.exec(this.input)) {
var escaped = false
var inClass = false
// take from acorn's parser
for(var i = 1, len = this.input.length; i < len; i++) {
var ch = this.input[i]
if (!escaped) {
if (ch === "[") inClass = true
else if (ch === "]" && inClass) inClass = false
else if (ch === "/" && !inClass) break
escaped = ch === "\\"
} else escaped = false
}
var value = this.input.slice(0, i + 1)
this.consume(value.length)
// flags
if (captures = /^[gmsiyu]*/.exec(this.input)) {
value += captures[0]
this.consume(captures[0].length)
}
// add the token
var tok = this.tok('regexp', value)
this.incrementColumn(tok.val.length)
this.tokens.push(tok)
return 'regexp'
}
},
/**
* Arrow
*/
arrow() {
var captures;
if (captures = /^\s*=>\s*/.exec(this.input)) {
this.consume(captures[0].length)
this.incrementColumn(captures[0].length)
var tok = this.tok('arrow', captures[0].trim())
this.tokens.push(tok)
return 'arrow'
}
},
/**
* Arrow
*/
uparrow() {
var captures;
if (captures = /^\s*=\^/.exec(this.input)) {
this.consume(captures[0].length)
this.incrementColumn(captures[0].length)
var tok = this.tok('uparrow', captures[0].trim())
this.tokens.push(tok)
return 'uparrow'
}
},
/**
* Alias
*/
alias() {
var captures;
if (captures = /^([A-Za-z_$][A-Za-z0-9_$]*)\s*\:\s*/.exec(this.input)) {
this.consume(captures[0].length)
this.incrementColumn(captures[0].length)
var tok = this.tok('alias', captures[1])
this.tokens.push(tok)
return 'alias'
}
},
/**
* Indent | Outdent | Newline.
*/
indent() {
var captures = this.scanIndentation();
if (captures) {
var indents = captures[1].length;
this.incrementLine(1);
this.consume(indents + 1);
if (' ' == this.input[0] || '\t' == this.input[0]) {
this.error('INVALID_INDENTATION', 'Invalid indentation, you can use tabs or spaces but not both');
}
// blank line
if ('\n' == this.input[0]) {
this.tokens.push(this.tok('newline'))
return 'newline'
}
// outdent
if (indents < this.indentStack[0]) {
while (this.indentStack[0] > indents) {
if (this.indentStack[1] < indents) {
this.error('INCONSISTENT_INDENTATION', 'Inconsistent indentation. Expecting either ' + this.indentStack[1] + ' or ' + this.indentStack[0] + ' spaces/tabs.');
}
this.colno = this.indentStack[1] + 1;
this.tokens.push(this.tok('outdent'));
this.indentStack.shift();
}
return 'outdent'
// indent
} else if (indents && indents != this.indentStack[0]) {
this.tokens.push(this.tok('indent', indents));
this.colno = 1 + indents;
this.indentStack.unshift(indents);
return 'indent'
// newline
} else {
this.tokens.push(this.tok('newline'));
this.colno = 1 + (this.indentStack[0] || 0);
return 'newline'
}
}
},
/**
* Scan the indentation
*/
scanIndentation() {
var captures, re;
// established regexp
if (this.indentRe) {
captures = this.indentRe.exec(this.input);
// determine regexp
} else {
// tabs
re = /^\n(\t*) */;
captures = re.exec(this.input);
// spaces
if (captures && !captures[1].length) {
re = /^\n( *)/;
captures = re.exec(this.input);
}
// established
if (captures && captures[1].length) this.indentRe = re;
}
return captures;
},
/**
* end-of-source.
*/
eos() {
if (this.input.length) return;
for (var i = 0; this.indentStack[i]; i++) {
this.tokens.push(this.tok('outdent'));
}
this.tokens.push(this.tok('eos'));
this.ended = true;
return true;
},
/**
* Failure case
*/
fail() {
this.error('UNEXPECTED_TEXT', 'unexpected text "' + this.input.substr(0, 5) + '"');
},
/**
* Error
*/
error(code, message) {
var err = error(code, message, {line: this.lineno, column: this.colno, filename: this.filename, src: this.originalInput});
err.message = err.message.replace('Jade', 'Convo')
throw err;
}
}