equivalency
Version:
Declaratively define rules for string equivalence.
301 lines (264 loc) • 9.52 kB
JavaScript
// We don't use Symbol explicitly, but not explicitly requiring it causes IE 11
// to fail complaining about Symbol not being defined. May be related to
// https://github.com/zloirock/core-js/issues/514
require('core-js/features/symbol');
require('core-js/features/map');
// Polyfill Array.from because some dep uses it. array-from produces smaller
// bundle than using core-js.
Array.from = require('array-from');
const dl = require('damerau-levenshtein');
const { Rule, identityRule } = require('./lib');
const { powerSet } = require('./lib/helpers');
/**
* Merge maps in `iterables` into `map`.
* @param {Map} map - Destination map.
* @param {Map} ...iterables - Source maps.
*/
function concatMaps(map, ...iterables) {
for (const iterable of iterables) {
for (const item of iterable) {
map.set(...item);
}
}
}
/**
* A class to represent equivalence between strings. Manages a collection of
* Rules.
*
*/
function Equivalency() {
// Holds one object per rule, consisting of the rule and whether or not it
// matters.
this._ruleList = [];
this._ruleListIsDirty = true;
}
Equivalency.prototype.doesntMatter = function(_rule) {
this._ruleList.push({ rule: Rule.from(_rule), matters: false });
this._ruleListIsDirty = true;
return this;
};
Equivalency.prototype.matters = function(_rule) {
this._ruleList.push({ rule: Rule.from(_rule), matters: true });
this._ruleListIsDirty = true;
return this;
};
Equivalency._collapseRules = function(rules) {
// identity is always the final rule.
if (rules.length === 0 || rules[rules.length - 1].rule !== identityRule) {
rules.push({ rule: identityRule, matters: true });
}
// Collapse rules into finalMap and a set of functions.
const collapsedMap = new Map();
let ruleFns = [];
rules.forEach(({ rule, matters }) => {
/* eslint-disable indent */
switch (rule.type) {
case 'ReplaceRule':
case 'RemoveRule':
case 'MapRule': {
if (!matters) {
concatMaps(collapsedMap, rule);
} else if (matters) {
for (const key of rule.keys()) {
collapsedMap.delete(key);
}
}
break;
}
case 'FunctionRule': {
if (!matters) {
if (ruleFns.indexOf(rule) === -1) ruleFns.push(rule);
} else if (matters) {
if (ruleFns.indexOf(rule) >= -1)
ruleFns = ruleFns.filter(r => r !== rule);
}
break;
}
default: {
let ruleType;
try {
ruleType = rule.type || rule.constructor.type;
} catch (ex) {
ruleType = 'unknown type';
}
throw new Error(`Unknown rule type '${ruleType}'`);
}
}
/* eslint-enable indent */
});
return [collapsedMap, ruleFns];
};
Equivalency._compareWithRules = function(s1, s2, map, ruleFns) {
let s1prime = s1,
s2prime = s2;
// apply finalMap
map.forEach((v, k) => {
let _s1prime;
do {
_s1prime = s1prime;
s1prime = _s1prime.replace(k, v);
} while (_s1prime != s1prime);
let _s2prime;
do {
_s2prime = s2prime;
s2prime = _s2prime.replace(k, v);
} while (_s2prime != s2prime);
});
// apply rule functions
ruleFns.forEach(functionRule => {
[s1prime, s2prime] = functionRule.apply(s1prime, s2prime);
});
return { isEquivalent: s1prime === s2prime, s1prime, s2prime };
};
/**
* Compares two strings for equivalence.
*
*
* @param {string} canonical First comparison string
* @param {string} comparate Second comparison string
* @param {Object} options Options hash
* @param {bool} options.calculateEditDistance If true, return the editDistance of transformed strings with the
* isEquivalent boolean. Default: false.
* @param {bool} options.giveReasons When true, include the reason(s) why the strings aren't equivalent.
* This is O(2^n) on the number of "matters" rules because the power set of
* "matters" rules needs to be checked, so it will throw when used with
* equivalencies that have more than 16 "matters" rules unless you explicitly
* opt-in by setting `giveReasonsUnlimitedRules` to true. Default: false.
* @param {bool} options.giveReasonsUnlimitedRules See `giveReasons`. Default: false.
*
* @return {Object} Returns an object with the following top-level
* properties:
* - canonicalPrime
* - comparatePrime
* - isEquivalent
* - editDistance (optional)
* - reasons[] (optional)
*/
Equivalency.prototype.compare = Equivalency.prototype.equivalent = function(
canonical,
comparate,
options = null
) {
// Ensure identity is the final and only the final rlue.
if (
this._ruleList.length === 0 ||
this._ruleList[this._ruleList.length - 1].rule !== identityRule
) {
this._ruleListIsDirty = true;
this._ruleList.push({ rule: identityRule, matters: true });
}
let finalMap = this.finalMap,
ruleFns = this.ruleFns;
if (!finalMap || !ruleFns || this._ruleListIsDirty) {
[finalMap, ruleFns] = Equivalency._collapseRules(this._ruleList);
this._ruleListIsDirty = false;
this.finalMap = finalMap;
this.ruleFns = ruleFns;
}
const {
isEquivalent,
s1prime: canonicalPrime,
s2prime: comparatePrime,
} = Equivalency._compareWithRules(canonical, comparate, finalMap, ruleFns);
let results = { isEquivalent: isEquivalent, canonicalPrime, comparatePrime };
if (options && options.calculateEditDistance) {
const editDistance = dl(canonicalPrime, comparatePrime);
results.editDistance = editDistance.steps;
}
if (options && options.giveReasons) {
if (isEquivalent) {
results.reasons = [];
} else {
const reasons = [];
const indexesOfRulesThatMatter = this._ruleList
.slice(0, this._ruleList.length - 1)
.map((r, idx) => idx)
.filter(idx => this._ruleList[idx].matters);
if (
indexesOfRulesThatMatter.length > 16 &&
!options.giveReasonsUnlimitedRules
)
throw new Error(
`To give reasons for >16 matters rules, set opts.giveReasonsUnlimitedRules to true.`
);
// If identity wasn't the cause of the inequivalence, then one or more
// of the matters rules are the cause, so find out which one(s). The size
// of the power set is 2^n. If we ever need to support large number of
// rules, optimization paths to explore include:
//
// - use generators to lazily generate the sets, to avoid OOM errors
// - make the method async and do work in batches, yielding periodically
// so the event loop isn't starved
//
const _powerSet = powerSet(indexesOfRulesThatMatter);
// Can't use filter here b/c we need the index into this._rules.
_powerSet.forEach(indexesOfRulesUnderTest => {
for (const idx of indexesOfRulesThatMatter) {
if (reasons.indexOf(this._ruleList[idx].rule) > -1) {
return;
}
}
const rulesSwitched = this._ruleList.slice();
// Switch and test.
indexesOfRulesUnderTest.forEach(
indexOfRuleUnderTest =>
(rulesSwitched[indexOfRuleUnderTest].matters = false)
);
const [finalMap, ruleFns] = Equivalency._collapseRules(rulesSwitched);
const { isEquivalent } = Equivalency._compareWithRules(
canonical,
comparate,
finalMap,
ruleFns
);
if (isEquivalent) {
// This set of rules affected the outcome.
indexesOfRulesUnderTest.forEach(idxOfRuleUnderTest => {
reasons.push(rulesSwitched[idxOfRuleUnderTest].rule);
});
}
// Restore.
indexesOfRulesUnderTest.forEach(
indexOfRuleUnderTest =>
(rulesSwitched[indexOfRuleUnderTest].matters = true)
);
});
// If none of the rules had an effect, then the difference is due to some
// feature external to the rules.
if (reasons.length === 0) {
reasons.push(identityRule);
}
results.reasons = reasons.map(rule => {
return {
name: rule.name,
};
});
}
}
return results;
};
Equivalency.prototype.rules = function() {
return this._ruleList;
};
Equivalency.prototype.clone = function() {
const clone = new Equivalency();
// TODO: slice makes a shallow copy. Rules should know how to copy themselves
// so that a clone doesn't contain references to the original rules.
clone._ruleList = this.rules().slice();
return clone;
};
// Attach to the main export for convenience.
const lib = require('./lib');
for (let prop in lib) {
if (Object.prototype.hasOwnProperty.call(lib, prop)) {
Equivalency[prop] = lib[prop];
}
}
for (let prop in lib) {
if (Object.prototype.hasOwnProperty.call(lib, prop)) {
Equivalency.prototype[prop] = lib[prop];
}
}
const instance = new Equivalency();
instance.Equivalency = Equivalency;
module.exports = instance;