stew-select
Version:
CSS selectors that allow regular expressions. Stew is a meatier soup.
328 lines (293 loc) • 12.3 kB
JavaScript
// Generated by CoffeeScript 1.6.3
(function() {
var DOMUtil, HOMEDIR, LIB_DIR, PredicateFactory, Stew, exports, fs, path;
fs = require('fs');
path = require('path');
HOMEDIR = path.join(__dirname, '..');
LIB_DIR = fs.existsSync(path.join(HOMEDIR, 'lib-cov')) ? path.join(HOMEDIR, 'lib-cov') : path.join(HOMEDIR, 'lib');
DOMUtil = require(path.join(LIB_DIR, 'dom-util')).DOMUtil;
PredicateFactory = require(path.join(LIB_DIR, 'predicate-factory')).PredicateFactory;
Stew = (function() {
var _ATTRIBUTES, _ATTR_NAME, _CLASSES, _DEQUOTED_ATTR_VALUE, _ID, _NAME, _NEVERQUOTED_ATTR_VALUE, _OPERATOR, _PSEUDO_CLASS, _SPLIT_ON_WS_REGEXP;
function Stew(dom_util) {
this.factory = new PredicateFactory();
this.dom_util = dom_util != null ? dom_util : new DOMUtil();
}
Stew.prototype.select = function(dom, selector, callback) {
var nodeset,
_this = this;
if (typeof selector === 'string') {
selector = this._parse_selectors(selector);
}
if (typeof dom === 'string') {
if (callback != null) {
return this.dom_util.parse_html(dom, function(err, dom) {
if (err != null) {
return callback(err);
} else {
return callback(null, _this._unguarded_select(dom, selector));
}
});
} else {
throw new Error('When select is invoked on a string object, the `callback(err,nodeset)` parameter is required.');
}
} else {
nodeset = this._unguarded_select(dom, selector);
if (typeof callback === "function") {
callback(null, nodeset);
}
return nodeset;
}
};
Stew.prototype._unguarded_select = function(dom, predicate) {
var result, visit;
result = [];
visit = function(node, parent, path, siblings, sib_index) {
if (predicate(node, parent, path, siblings, sib_index)) {
result.push(node);
}
return {
'continue': true,
'visit_children': true
};
};
this.dom_util.walk_dom(dom, {
visit: visit
});
return result;
};
Stew.prototype.select_first = function(dom, selector, callback) {
var node,
_this = this;
if (typeof selector === 'string') {
selector = this._parse_selectors(selector);
}
if (typeof dom === 'string') {
if (callback != null) {
return this.dom_util.parse_html(dom, function(err, dom) {
if (err != null) {
return callback(err);
} else {
return callback(null, _this._unguarded_select_first(dom, selector));
}
});
} else {
throw new Error('When select_first is invoked on a string object, the `callback(err,node)` parameter is required.');
}
} else {
node = this._unguarded_select_first(dom, selector);
if (typeof callback === "function") {
callback(null, node);
}
return node;
}
};
Stew.prototype._unguarded_select_first = function(dom, predicate) {
var result, visit;
result = null;
visit = function(node, parent, path, siblings, sib_index) {
if (predicate(node, parent, path, siblings, sib_index)) {
result = node;
return {
'continue': false,
'visit_children': false
};
} else {
return {
'continue': true,
'visit_children': true
};
}
};
this.dom_util.walk_dom(dom, {
visit: visit
});
return result;
};
_SPLIT_ON_WS_REGEXP = /([^\"\/\s,\+>]|(\"[^\"]+\")|(\/[^\/]+\/)|(\[[^\]]*\]))+|[,\+~>]/g;
Stew.prototype._split_on_ws_respecting_quotes = function(selector) {
var result, token;
result = [];
while (true) {
token = _SPLIT_ON_WS_REGEXP.exec(selector);
if ((token != null ? token[0] : void 0) != null) {
result.push(token[0]);
} else {
break;
}
}
return result;
};
Stew.prototype._parse_selectors = function(selectors) {
var adjacent_operator, child_operator, or_operator, preceding_sibling_operator, predicate, result, selector, _i, _len;
result = [];
if (typeof selectors === 'string') {
selectors = this._split_on_ws_respecting_quotes(selectors);
}
child_operator = false;
adjacent_operator = false;
preceding_sibling_operator = false;
or_operator = false;
for (_i = 0, _len = selectors.length; _i < _len; _i++) {
selector = selectors[_i];
if (selector === '>') {
child_operator = true;
} else if (selector === '+') {
adjacent_operator = true;
} else if (selector === '~') {
preceding_sibling_operator = true;
} else if (selector === ',') {
or_operator = true;
} else {
predicate = this._parse_selector(selector);
if (child_operator) {
result.push(this.factory.direct_descendant_predicate(result.pop(), predicate));
child_operator = false;
} else if (adjacent_operator) {
result.push(this.factory.adjacent_sibling_predicate(result.pop(), predicate));
adjacent_operator = false;
} else if (preceding_sibling_operator) {
result.push(this.factory.preceding_sibling_predicate(result.pop(), predicate));
preceding_sibling_operator = false;
} else if (or_operator) {
result.push(this.factory.or_predicate([result.pop(), predicate]));
or_operator = false;
} else {
result.push(predicate);
}
}
}
if (result.length > 0) {
result = this.factory.descendant_predicate(result);
}
return result;
};
Stew.prototype._CSS_SELECTOR_REGEXP = /((\/[^\/]*\/[gmi]*)|(\*|[\w-]+))?(\#((\/[^\/]*\/[gmi]*)|([\w-]+)))?((\.((\/[^\/]*\/[gmi]*)|([\w-]+)))*)((\[((\/[^\/]*\/[gmi]*)|([\w-]+))(((=)|(~=)|(\|=)|(\*=)|(\^=)|(\$=))(("(([^\\"]|(\\"))*)")|((\/[^\/]*\/[gmi]*)|([\w- :]+))))?\])*)(:([\w-]+))?/;
_NAME = 1;
_ID = 4;
_CLASSES = 8;
_ATTRIBUTES = 13;
_PSEUDO_CLASS = 35;
Stew.prototype._ATTRIBUTE_CLAUSE_REGEXP = /(\[((\/[^\/]*\/[gmi]*)|([\w-]+))(((=)|(~=)|(\|=)|(\*=)|(\^=)|(\$=))(("(([^\\"]|(\\"))*)")|((\/[^\/]*\/[gmi]*)|([\w- :]+))))?\])/g;
_ATTR_NAME = 2;
_OPERATOR = 6;
_DEQUOTED_ATTR_VALUE = 15;
_NEVERQUOTED_ATTR_VALUE = 18;
Stew.prototype._parse_selector = function(selector) {
var attr_match, aval, c, clauses, cs, delim, match, modifier, regexp_source, _i, _len, _ref, _ref1, _ref2, _ref3, _ref4, _ref5, _ref6;
match = this._CSS_SELECTOR_REGEXP.exec(selector);
clauses = [];
if (match[_NAME] != null) {
if (match[_NAME] === '*') {
clauses.push(this.factory.any_tag_predicate());
} else {
clauses.push(this.factory.by_tag_predicate(this._to_string_or_regex(match[_NAME])));
}
}
if (match[_ID] != null) {
clauses.push(this.factory.by_id_predicate(this._to_string_or_regex(match[_ID].substring(1))));
}
if (((_ref = match[_CLASSES]) != null ? _ref.length : void 0) > 0) {
cs = match[_CLASSES].split('.');
cs.shift();
for (_i = 0, _len = cs.length; _i < _len; _i++) {
c = cs[_i];
clauses.push(this.factory.by_class_predicate(this._to_string_or_regex(c)));
}
}
if (((_ref1 = match[_ATTRIBUTES]) != null ? _ref1.length : void 0) > 0) {
attr_match = this._ATTRIBUTE_CLAUSE_REGEXP.exec(match[_ATTRIBUTES]);
while (attr_match != null) {
if ((attr_match[_ATTR_NAME] != null) && (attr_match[_OPERATOR] == null)) {
clauses.push(this.factory.by_attr_exists_predicate(this._to_string_or_regex(attr_match[_ATTR_NAME])));
}
if ((attr_match[_ATTR_NAME] != null) && (attr_match[_OPERATOR] != null) && ((attr_match[_DEQUOTED_ATTR_VALUE] != null) || (attr_match[_NEVERQUOTED_ATTR_VALUE] != null))) {
delim = null;
if (attr_match[_OPERATOR] === '~=') {
delim = /\s+/;
}
if (attr_match[_OPERATOR] === '|=') {
clauses.push(this.factory.by_attr_value_pipe_equals(this._to_string_or_regex(attr_match[_ATTR_NAME]), this._to_string_or_regex((_ref2 = attr_match[_DEQUOTED_ATTR_VALUE]) != null ? _ref2 : attr_match[_NEVERQUOTED_ATTR_VALUE])));
} else if (attr_match[_OPERATOR] === '^=') {
aval = this._to_string_or_regex((_ref3 = attr_match[_DEQUOTED_ATTR_VALUE]) != null ? _ref3 : attr_match[_NEVERQUOTED_ATTR_VALUE]);
if (typeof aval === 'string') {
regexp_source = this.factory._escape_for_regexp(aval);
aval = new RegExp("^" + regexp_source);
} else {
regexp_source = aval.source;
modifier = '';
if (aval.ignoreCase) {
modifier += 'i';
}
if (aval.global) {
modifier += 'g';
}
if (aval.multiline) {
modifier += 'm';
}
if (!/^\^/.test(regexp_source)) {
aval = new RegExp("^" + regexp_source);
}
}
clauses.push(this.factory.by_attr_value_predicate(this._to_string_or_regex(attr_match[_ATTR_NAME]), aval));
} else if (attr_match[_OPERATOR] === '$=') {
aval = this._to_string_or_regex((_ref4 = attr_match[_DEQUOTED_ATTR_VALUE]) != null ? _ref4 : attr_match[_NEVERQUOTED_ATTR_VALUE]);
if (typeof aval === 'string') {
regexp_source = this.factory._escape_for_regexp(aval);
aval = new RegExp("" + regexp_source + "$");
} else {
regexp_source = aval.source;
modifier = '';
if (aval.ignoreCase) {
modifier += 'i';
}
if (aval.global) {
modifier += 'g';
}
if (aval.multiline) {
modifier += 'm';
}
if (!/\$$/.test(regexp_source)) {
aval = new RegExp("" + regexp_source + "$");
}
}
clauses.push(this.factory.by_attr_value_predicate(this._to_string_or_regex(attr_match[_ATTR_NAME]), aval));
} else if (attr_match[_OPERATOR] === '*=') {
aval = this._to_string_or_regex((_ref5 = attr_match[_DEQUOTED_ATTR_VALUE]) != null ? _ref5 : attr_match[_NEVERQUOTED_ATTR_VALUE]);
if (typeof aval === 'string') {
regexp_source = this.factory._escape_for_regexp(aval);
aval = new RegExp(regexp_source);
}
clauses.push(this.factory.by_attr_value_predicate(this._to_string_or_regex(attr_match[_ATTR_NAME]), aval));
} else {
clauses.push(this.factory.by_attr_value_predicate(this._to_string_or_regex(attr_match[_ATTR_NAME]), this._to_string_or_regex((_ref6 = attr_match[_DEQUOTED_ATTR_VALUE]) != null ? _ref6 : attr_match[_NEVERQUOTED_ATTR_VALUE]), delim));
}
}
attr_match = this._ATTRIBUTE_CLAUSE_REGEXP.exec(match[_ATTRIBUTES]);
}
}
if (match[_PSEUDO_CLASS] != null) {
if (match[_PSEUDO_CLASS] === 'first-child') {
clauses.push(this.factory.first_child_predicate());
}
}
if (clauses.length > 0) {
clauses = this.factory.and_predicate(clauses);
}
return clauses;
};
Stew.prototype._to_string_or_regex = function(str) {
var match;
match = str.match(/^\/(.*)\/([gmi]*)$/);
if ((match != null ? match[1] : void 0) != null) {
return new RegExp(match[1], match[2]);
} else {
return str;
}
};
return Stew;
})();
exports = exports != null ? exports : this;
exports.Stew = Stew;
exports.DOMUtil = DOMUtil;
}).call(this);