omniset
Version:
Utility functions used accross multiple JS libraries.
477 lines (452 loc) • 13 kB
JavaScript
/**
* @imports
*/
import _isString from '../js/isString.js';
import _isUndefined from '../js/isUndefined.js';
import _isFunction from '../js/isFunction.js';
import _arrFrom from '../arr/from.js';
import _flatten from '../arr/flatten.js';
import _first from '../arr/first.js';
import _last from '../arr/last.js';
import _merge from '../obj/merge.js';
import _even from '../obj/even.js';
import _copyPlain from '../obj/copyPlain.js';
/**
* --------------------------
* TOKENIZER
* --------------------------
*/
export default class Lexer {
/**
* Factory method.
*
* Handles caching.
*
* @see constructor()
*/
static lex(str, delims, options = {}) {
if (!_isString(str = str + '')) {
throw new Error('Argument1 must be a string!');
}
var copyResult = result => {
return {
delims: result.delims.slice(),
options: _copyPlain(result.options),
nesting: result.nesting.slice(),
maxDepth: result.maxDepth,
comments: result.comments.slice(),
tokens: result.tokens.slice(),
matches: result.matches.slice(),
matchesi: _copyPlain(result.matchesi),
};
};
// ASK CACHE ---------------------------
if (Lexer.$cache[str] && options.cache !== false) {
for (var i = 0; i < Lexer.$cache[str].length; i ++) {
var cached = Lexer.$cache[str][i];
if (_even(cached.delims, delims)) {
return copyResult(cached);
}
}
}
// FRESH PARSE -------------------------------
var instance = new Lexer(str, options);
var result = instance.lex(delims);
// SAVE TO CACHE -------------------------------
if (options.cache !== false) {
Lexer.$cache[str] = Lexer.$cache[str] || [];
Lexer.$cache[str].push(result);
}
return copyResult(result);
}
/**
* Factory method for .split().
*
* Handles caching.
*
* @see constructor()
*/
static split(str, delims, options) {
return Lexer.lex(str, delims, options).tokens;
}
/**
* Factory method for .match().
*
* Handles caching.
*
* @see constructor()
*/
static match(str, delims, options) {
return Lexer.lex(str, delims, options).matches;
}
/**
* Creates a lexer instance on a string with the given options.
*
* @param string str
* @param object options:
* @param string blocks The strings that begin and end a nested structure
* @param number limit Max results to return
* @param string backreference A character like (\) that prefixes non-delim characters
*
* @return array
*/
constructor(str, options) {
if (!_isString(str)) {
throw new Error('Lexer requires the first argument to be a string.');
}
this.$str = str;
this.$options = options || {};
if (!this.$options.blocks) {
this.$options.blocks = Lexer.$blocks;
}
if (!this.$options.quotes) {
this.$options.quotes = Lexer.$quotes;
}
if (!this.$options.comments) {
this.$options.comments = Lexer.$comments;
}
}
/**
* Parses the instance string on the given delimeters.
*
* This method supports static calling,
* in which case a string is required as the first argument.
*
* @param string str
* @param object options
*
* @return object
*/
lex(delims, options) {
var runtime = {
delims: _arrFrom(delims),
options: _merge(true, {}, this.$options, options || {}),
nesting: [],
maxDepth: 0,
comments: [],
tokens: [],
matches: [],
matchesi: {},
};
// Iterate over each character, keep track of current row and column (of the returned array)
var cursor = 0;
while(typeof cursor === 'number') {
cursor = this._evalCharsAt(runtime, cursor);
}
if (runtime.nesting.length) {
throw new Error('Error parsing the string: ' + this.$str + '. Unterminated blocks: ' + _flatten(runtime.nesting).join(', ') + '');
}
// RETURN NEW ----------------------------------
return runtime;
}
/**
* Expr helper: evaluates and handles the character on the current cursor.
* Advances the cursor.
*
* @param object runtime
* @param int i
*
* @return void
*/
_evalCharsAt(runtime, i) {
if (i >= this.$str.length) {
return;
}
var charWidth = 1;
var commentTest = {}, quoteTest = {}, nestingTest = {};
// Quotes inside comments must be ignored
if (!runtime.openComment) {
quoteTest = this._testQuotes(runtime, i);
}
// Comments inside quotes must be ignored
if (!runtime.openQuote) {
commentTest = this._testComments(runtime, i);
}
// Save comments
if (runtime.openComment || commentTest.ending) {
// Save only outer comments
if (!runtime.nesting.length && !nestingTest.ending) {
var chars = commentTest.starting || commentTest.ending || this.$str[i];
charWidth = chars.length;
this._push(runtime, chars, 'comments', commentTest.starting);
} else {
this._push(runtime, this.$str[i]);
}
} else if (runtime.openQuote || quoteTest.ending) {
// Yes add quotes
this._push(runtime, this.$str[i]);
} else if (runtime.options.limit && runtime.matches.length === runtime.options.limit) {
this._push(runtime, this.$str[i]);
return i + 1;
} else {
// Nesting tags inside comments and quotes have been ignored
nestingTest = this._testNesting(runtime, i);
// ---------------------
// STOP ON THIS CHARACTER...?
// ---------------------
var nestingTest = this._testNesting(runtime, i);
// STOP CHAR(S)? at top level?
var stopChar = this._testChars(runtime.options.stopChars || [], runtime, i);
if (!runtime.nesting.length && stopChar !== false) {
runtime.options.stopChar = stopChar;
runtime.options.stopCharForward = this.$str.substr(i);
return;
}
// ---------------------
// Match and split now...
// ---------------------
if (!runtime.delims.length) {
// BLOCK-BASED SPLITTING...
if (runtime.nesting.length === 2 && nestingTest.starting) {
runtime.matches.push(null);
this._push(runtime, nestingTest.starting);
charWidth = nestingTest.starting.length;
} else if (!runtime.nesting.length && nestingTest.ending) {
this._push(runtime, nestingTest.ending);
charWidth = nestingTest.ending.length;
runtime.matches.push(null);
} else/*no-nesting flag*/ {
this._push(runtime, this.$str[i]);
}
} else {
// ---------------------
// DELIMS-BASED SPLITTING
// ---------------------
if (!runtime.nesting.length && !nestingTest.ending) {
// In case the chars at index 0 is a delim,
// the resulting split should first have an empty string, instead of undefined
this._push(runtime, '');
var matchedDelim = this._testChars(runtime.delims, runtime, i);
if (matchedDelim !== false) {
runtime.matches.push(matchedDelim);
runtime.matchesi[i] = matchedDelim;
charWidth = matchedDelim.length || 1;
if (!runtime.options.preserveDelims) {
// The current character is a delimiter...
// and should not get to appending to the split series down the line
var nextIndex = i + (matchedDelim.length || 1);
if (nextIndex === this.$str.length) {
// In case the chars at last index is a delim,
// the resulting split should last have an empty string
this._push(runtime, '');
}
return nextIndex;
}
}
this._push(runtime, matchedDelim || this.$str[i]);
} else {
var chars = nestingTest.starting || nestingTest.ending || this.$str[i];
charWidth = chars.length;
this._push(runtime, chars);
}
}
}
return i + charWidth;
}
/**
* Expr helper: tests for a quote start/end character on the current cursor.
*
* @param object runtime
* @param int i
*
* @return object
*/
_testQuotes(runtime, i) {
var result = {};
(runtime.options.quotes || []).forEach(quote => {
if (this.$str.substr(i, 1) === quote) {
if (!runtime.openQuote) {
runtime.openQuote = quote;
result.starting = quote;
} else if (quote === runtime.openQuote) {
runtime.openQuote = false;
result.ending = quote;
}
}
});
return result;
}
/**
* Expr helper: tests for a comment start/end character on the current cursor.
*
* @param object runtime
* @param int i
*
* @return object
*/
_testComments(runtime, i) {
var result = {};
(runtime.options.comments || []).forEach(block => {
if (!runtime.openComment) {
var starting = _first(block);
if (this.$str.substr(i).startsWith(starting)) {
runtime.openComment = block;
result.starting = starting;
}
} else if (_last(block) === _last(runtime.openComment)) {
var ending = _last(block);
if (this.$str.substr(i).startsWith(ending)) {
runtime.openComment = false;
result.ending = ending;
}
}
});
return result;
}
/**
* Expr helper: tests for a nesting start/end character on the current cursor.
*
* @param object runtime
* @param int i
*
* @return object
*/
_testNesting(runtime, i) {
var result = {};
(runtime.options.blocks || []).forEach(block => {
var starting = _first(block);
if (this.$str.substr(i).startsWith(starting)) {
runtime.nesting = runtime.nesting.concat([block]);
result.starting = starting;
} else if (runtime.nesting.length && _last(block) === _last(_last(runtime.nesting))) {
var ending = _last(block);
if (this.$str.substr(i).startsWith(ending)) {
runtime.nesting = runtime.nesting.slice(0, -1);
result.ending = ending;
}
}
});
runtime.maxDepth = Math.max(runtime.maxDepth, runtime.nesting.length);
return result;
}
/**
* Expr helper: tests for a delimiter or stop character on the current cursor.
*
* @param array testList
* @param object runtime
* @param int i
*
* @return mixed
*/
_testChars(testList, runtime, i) {
for (var k = 0; k < testList.length; k ++) {
var test = testList[k];
if (_isFunction(test)) {
var ret = test(this.$str.substr(0, i), this.$str.substr(i));
if (ret !== false) {
return ret;
}
}
if (runtime.options.useRegex) {
var m = this.$str.substr(i).match(new RegExp('^' + test, runtime.options.useRegex !== true ? runtime.options.useRegex : ''));
if (m) {
return m[0];
}
}
if ((!runtime.options.ci && this.$str.substr(i, test.length) === test)
|| (runtime.options.ci && this.$str.substr(i, test.length).toLowerCase() === test.toLowerCase())) {
return test;
}
}
return false;
}
/**
* Expr helper: pushes a character or set of characters into the current split series.
*
* @param object runtime
* @param string chars
* @param string target
* @param bool isNewSeries
*
* @return void
*/
_push(runtime, chars, target = 'tokens', isNewSeries = false) {
var splitSeries = runtime.matches.length;
if (_isUndefined(runtime.tokens[splitSeries])) {
runtime.tokens[splitSeries] = '';
}
if (target === 'comments') {
if (!runtime.tokens[splitSeries].comments) {
runtime.tokens[splitSeries] = new String(runtime.tokens[splitSeries]);
runtime.tokens[splitSeries].comments = [];
}
var splitSeries2 = runtime.tokens[splitSeries].comments.length - (!runtime.tokens[splitSeries].comments.length || isNewSeries ? 0 : 1);
runtime.tokens[splitSeries].comments[splitSeries2] = (runtime.tokens[splitSeries].comments[splitSeries2] || '') + chars;
} else {
var comments = runtime.tokens[splitSeries].comments;
runtime.tokens[splitSeries] = runtime.tokens[splitSeries] + chars;
//runtime.tokens[splitSeries].comments = comments;
}
}
/**
* Splits the instance string on the given delimeters and returns the tokens.
*
* @param string str
* @param object options
*
* @return array
*/
split(str, delims, options) {
return this.lex(delims, options).tokens;
}
/**
* Splits the instance string on the given delimeters and returns the matches.
*
* @param string str
* @param object options:
*
* @return array
*/
match(str, delims, options) {
return this.lex(delims, options).matches;
}
/**
* Parses the instance string on the given delimeters using regex.
*
* @param string str
* @param object options
*
* @return object
*/
regParse(delims, options) {
return this.lex(delims, _merge({useRegex: true}, options || {}));
}
/**
* Splits the instance string on the given delimeters using regex; returns the tokens.
*
* @param string str
* @param object options
*
* @return array
*/
regSplit(delims, options) {
return this.regParse(delims, options).tokens;
}
/**
* Matches the instance string on the given delimeters using regex; returns the matches.
*
* @param string str
* @param object options
*
* @return array
*/
regMatch(delims, options) {
return this.regParse(delims, options).matches;
}
};
/**
* @var array
*/
Lexer.$blocks = [['(', ')'], ['[', ']'], ['{', '}'],];
/**
* @var array
*/
Lexer.$quotes = ['"', "'", '`',];
/**
* @var array
*/
Lexer.$comments = [['/*', '*/'], ['//', "\n"],];
/**
* @var object
*/
Lexer.$cache = {};