UNPKG

convo

Version:

Easily create conversations (for more natural bots)

375 lines (316 loc) 8.16 kB
'use strict'; /** * Module Dependencies */ var props = require('component-props') var rtemplate = /([^\\])?{([^\}]+)}/g var error = require('jade-error') /** * Export the Lexer */ module.exports = function lex(str, filename, options) { var lexer = new Lexer(str, filename, options); return JSON.parse(JSON.stringify(lexer.getTokens())); } /** * Initialize `Lexer` with the given `str`. * * @param {String} str * @param {String} filename * @api private */ function Lexer(str, filename, options) { options = options || {}; // strip any UTF-8 BOM off of the start of `str`, if it exists. str = str.replace(/^\uFEFF/, ''); this.input = str.replace(/\r\n|\r/g, '\n'); this.originalInput = this.input; this.lineno = options.startingLine || 1; this.colno = options.startingColumn || 1; this.indentStack = [0]; this.indentRe = null; this.ended = false; this.tokens = []; }; Lexer.prototype = { getTokens() { while (!this.ended) { this.advance() } return this.tokens }, advance() { var tok = this.blank() || this.comment() || this.uparrow() || this.arrow() || this.regexp() || this.alias() || this.indent() || this.literal_string() || this.variable() || this.eos() || this.fail() return tok }, /** * Construct a token with the given `type` and `val`. * * @param {String} type * @param {String} val * @return {Object} * @api private */ tok(type, val){ var res = {type: type, line: this.lineno, col: this.colno}; if (val !== undefined) res.val = val; return res; }, /** * Increment `this.lineno` and reset `this.colno`. * * @param {Number} increment * @api private */ incrementLine(increment) { this.lineno += increment; if (increment) this.colno = 1; }, /** * Increment `this.colno`. * * @param {Number} increment * @api private */ incrementColumn(increment) { this.colno += increment }, /** * Consume the given `len` of input. * * @param {Number} len * @api private */ consume(len) { this.input = this.input.substr(len); }, /** * Blank line. */ blank() { var captures; if (captures = /^\n[ \t]*\n/.exec(this.input)) { this.consume(captures[0].length - 1); this.incrementLine(1); return 'blank'; } }, /** * Comment */ comment() { var captures; if (captures = /^\s*\/\/[^\n]*/.exec(this.input)) { this.consume(captures[0].length); this.incrementLine(1); return 'comment'; } }, /** * String literal */ literal_string() { var captures; if (captures = /^('([^'\\]*\\.)*[^']*'|"([^"\\]*\\.)*[^"]*)"/.exec(this.input)) { this.consume(captures[0].length); this.incrementColumn(captures[0].length) var string = captures[0].slice(1, -1) string = string.replace(rtemplate, function(m, m1, m2) { m1 = m1 || '' m2 = props(m2).reduce(function (str, value, key) { str = str.replace(value, '_["' + value + '"]') return str }, m2) return m1 + '${' + m2 + '}' }) var tok = this.tok('string', string) this.tokens.push(tok) return 'literal_string' } }, /** * Variable */ variable() { var captures; if (captures = /^[A-Za-z_$][A-Za-z0-9_$]*/.exec(this.input)) { this.consume(captures[0].length); this.incrementColumn(captures[0].length) var tok = this.tok('variable', captures[0]) this.tokens.push(tok) return 'variable' } }, /** * Regular Expression */ regexp() { var captures; if (captures = /^\//.exec(this.input)) { var escaped = false var inClass = false // take from acorn's parser for(var i = 1, len = this.input.length; i < len; i++) { var ch = this.input[i] if (!escaped) { if (ch === "[") inClass = true else if (ch === "]" && inClass) inClass = false else if (ch === "/" && !inClass) break escaped = ch === "\\" } else escaped = false } var value = this.input.slice(0, i + 1) this.consume(value.length) // flags if (captures = /^[gmsiyu]*/.exec(this.input)) { value += captures[0] this.consume(captures[0].length) } // add the token var tok = this.tok('regexp', value) this.incrementColumn(tok.val.length) this.tokens.push(tok) return 'regexp' } }, /** * Arrow */ arrow() { var captures; if (captures = /^\s*=>\s*/.exec(this.input)) { this.consume(captures[0].length) this.incrementColumn(captures[0].length) var tok = this.tok('arrow', captures[0].trim()) this.tokens.push(tok) return 'arrow' } }, /** * Arrow */ uparrow() { var captures; if (captures = /^\s*=\^/.exec(this.input)) { this.consume(captures[0].length) this.incrementColumn(captures[0].length) var tok = this.tok('uparrow', captures[0].trim()) this.tokens.push(tok) return 'uparrow' } }, /** * Alias */ alias() { var captures; if (captures = /^([A-Za-z_$][A-Za-z0-9_$]*)\s*\:\s*/.exec(this.input)) { this.consume(captures[0].length) this.incrementColumn(captures[0].length) var tok = this.tok('alias', captures[1]) this.tokens.push(tok) return 'alias' } }, /** * Indent | Outdent | Newline. */ indent() { var captures = this.scanIndentation(); if (captures) { var indents = captures[1].length; this.incrementLine(1); this.consume(indents + 1); if (' ' == this.input[0] || '\t' == this.input[0]) { this.error('INVALID_INDENTATION', 'Invalid indentation, you can use tabs or spaces but not both'); } // blank line if ('\n' == this.input[0]) { this.tokens.push(this.tok('newline')) return 'newline' } // outdent if (indents < this.indentStack[0]) { while (this.indentStack[0] > indents) { if (this.indentStack[1] < indents) { this.error('INCONSISTENT_INDENTATION', 'Inconsistent indentation. Expecting either ' + this.indentStack[1] + ' or ' + this.indentStack[0] + ' spaces/tabs.'); } this.colno = this.indentStack[1] + 1; this.tokens.push(this.tok('outdent')); this.indentStack.shift(); } return 'outdent' // indent } else if (indents && indents != this.indentStack[0]) { this.tokens.push(this.tok('indent', indents)); this.colno = 1 + indents; this.indentStack.unshift(indents); return 'indent' // newline } else { this.tokens.push(this.tok('newline')); this.colno = 1 + (this.indentStack[0] || 0); return 'newline' } } }, /** * Scan the indentation */ scanIndentation() { var captures, re; // established regexp if (this.indentRe) { captures = this.indentRe.exec(this.input); // determine regexp } else { // tabs re = /^\n(\t*) */; captures = re.exec(this.input); // spaces if (captures && !captures[1].length) { re = /^\n( *)/; captures = re.exec(this.input); } // established if (captures && captures[1].length) this.indentRe = re; } return captures; }, /** * end-of-source. */ eos() { if (this.input.length) return; for (var i = 0; this.indentStack[i]; i++) { this.tokens.push(this.tok('outdent')); } this.tokens.push(this.tok('eos')); this.ended = true; return true; }, /** * Failure case */ fail() { this.error('UNEXPECTED_TEXT', 'unexpected text "' + this.input.substr(0, 5) + '"'); }, /** * Error */ error(code, message) { var err = error(code, message, {line: this.lineno, column: this.colno, filename: this.filename, src: this.originalInput}); err.message = err.message.replace('Jade', 'Convo') throw err; } }