named-js-regexp
Version:
Extends JavaScript RegExp with named groups, backreferences and replacement.
219 lines (201 loc) • 7.07 kB
JavaScript
function (window) {
var validGroupName = /^[$_a-z][$_a-z0-9]*$/i;
var cache;
function parseRegex(text) {
var i, c, c1, c12, nameEnd, name, item,
mapper = {},
current = 0,
regexText = "",
inCharSet = false;
for (i = 0; i < text.length; ++i) {
c = text[i]; c1 = text[i + 1]; c12 = c1 + text[i + 2];
regexText += c;
if (c === "\\") {
if (!inCharSet && c12 === "k<") {
// this is back reference
nameEnd = text.indexOf(">", i + 3);
if (nameEnd < 0) {
throw new Error("'>' missing in named backreference.");
}
name = text.substring(i + 3, nameEnd);
item = mapper[name];
if (!item) {
throw new Error("Named group '" + name + "' is not defined in backreference.");
}
if (typeof item !== "number") {
throw new Error("Named backreference referencing duplicate named group '" + name + "'.");
}
i = nameEnd;
regexText += item;
} else {
regexText += c1;
++i;
}
} else if (inCharSet) {
if (c === "]") {
inCharSet = false;
}
} else if (c === "[") {
inCharSet = true;
} else {
// if it starts with '(' and is neither non capturing group '(?:', nor positive lookahead "(?=" nor negative lookahead "(?!" then this is capturing group expression
if (c === "(" && c12 !== "?:" && c12 !== "?=" && c12 !== "?!") {
current++;
// if it starts with '?<' or ':<' then this is capturing named group
if (c12 === "?<" || c12 === ":<") {
nameEnd = text.indexOf(">", i + 3);
if (nameEnd < 0) {
throw new Error("'>' missing in named group.");
}
name = text.substring(i + 3, nameEnd);
if (!validGroupName.test(name)) {
throw new Error("Invalide group name '" + name + "'. Regexp group name should start with '_$a-zA-Z' and can contain only '_$a-zA-Z0-9'.");
}
item = mapper[name];
if (item === undefined) {
mapper[name] = current;
} else if (typeof item === "number") {
mapper[name] = [item, current];
} else {
item.push(current);
}
i = nameEnd;
}
}
}
}
return { mapper: mapper, regexText: regexText };
}
function parseReplacement(text, groupsIndices) {
var i, c, c1, name, nameEnd, groupIndex,
result = '';
for (i = 0; i < text.length; ++i) {
c = text[i]; c1 = text[i + 1];
result += c;
if (c === '$') {
if (c1 === '$') {
result += c1;
++i;
} else if (c1 === '{') {
nameEnd = text.indexOf("}", i + 2);
if (nameEnd < 0) {
throw new Error("'>' missing in replacement named group.");
}
name = text.substring(i + 2, nameEnd);
groupIndex = groupsIndices[name];
if (groupIndex === undefined) {
throw new Error("Named group '" + name + "' is not defined in replacement text.");
}
if (typeof groupIndex !== "number") {
throw new Error("Named replacement referencing duplicate named group '" + name + "'.");
}
result += groupIndex;
i = nameEnd;
}
}
}
return result;
}
/**
* Create regexp with additional properties or enable/disable cache
* @param {(string|RegExp|boolean)} regexp string, regular literal or true/false for enable/disable cache
* @param {string} flags
*/
function createNamedRegex(regexp, flags) {
if (typeof regexp === "boolean") {
// options parameter
if (regexp === false) {
cache = undefined;
} else if (!cache) {
cache = {};
}
return;
}
if (typeof regexp !== "string") {
// parameter is regexp literal
flags = (regexp.global ? "g" : "") +
(regexp.multiline ? "m" : "") +
(regexp.ignoreCase ? "i" : "");
regexp = regexp.source;
}
var info = cache ? (cache[regexp] || (cache[regexp] = parseRegex(regexp))) : parseRegex(regexp);
var regex = new RegExp(info.regexText, flags);
regex.groupsIndices = info.mapper;
function extendMatched(matched) {
var mapper = info.mapper;
/**
* Returns value for group name or undefined. If same group name was defined multiple time, it returns first not undefined value.
* If all is set to true, then it returns array of all matched values.
*/
matched.group = function (name, all) {
var indices = mapper[name];
if (typeof indices === "number") {
// name group is defined only once
return matched[indices];
} else if (all) {
// name group is defined multiple time and because all is true, return array of all matched values
return indices.map(function (v) { return matched[v]; });
} else if (indices) {
// name group is defined multiple time and because all is false, return first not undefined matched value
for (var i = 0; i < indices.length; ++i) {
var value = matched[indices[i]];
if (value !== undefined) {
return value;
}
}
}
return undefined;
};
var cachedGroups, cachedGroupsAll;
matched.groups = function (all) {
var cg = all ? cachedGroupsAll : cachedGroups;
if (cg) {
return cg;
}
cg = {};
for (var name in mapper) {
cg[name] = matched.group(name, all);
}
return all ? cachedGroupsAll = cg : cachedGroups = cg;
};
return matched;
}
regex.exec = function (text) {
var matched = RegExp.prototype.exec.call(this, text);
if (matched) {
extendMatched(matched);
}
return matched;
}
regex.execGroups = function (text, all) {
var matched = this.exec(text);
if (!matched) {
return null;
}
return matched.groups(all);
}
regex.replace = function (text, replacement) {
if (typeof replacement === "function") {
return text.replace(regex, function () {
var matched = Array.prototype.slice.call(arguments, 0, arguments.length - 2);
extendMatched(matched);
return replacement.apply(matched, arguments);
});
} else {
var replacementText = parseReplacement(replacement, this.groupsIndices);
return text.replace(this, replacementText);
}
}
return regex;
}
if (typeof exports !== "undefined") {
module.exports = createNamedRegex;
} else if (typeof define === "function" && define.amd) {
define(function () {
return createNamedRegex;
})
} else {
window.createNamedRegex = createNamedRegex;
}
})(typeof window === "undefined" ? this : window);
;
(