equivalency
Version:
Declaratively define rules for string equivalence.
1,260 lines (1,136 loc) • 42.3 kB
JavaScript
const expect = require('expect');
const equivalency = require('./index');
const { Equivalency } = equivalency;
const { MapRule } = require('./lib');
describe('default instance', () => {
it('should be an instance of Equivalency', () => {
expect(equivalency).toBeInstanceOf(Equivalency);
});
});
describe('Equivalency statics', () => {
describe('language builtins', () => {
it('should have en builtins', () => {
expect(Equivalency.en).toEqual(
expect.objectContaining({
COMMON_PUNCTUATION: expect.any(MapRule),
COMMON_SYMBOLS: expect.any(MapRule),
})
);
});
it('should have es builtins', () => {
expect(Equivalency.es).toEqual(
expect.objectContaining({
COMMON_PUNCTUATION: expect.any(MapRule),
COMMON_SYMBOLS: expect.any(MapRule),
})
);
});
});
});
describe('prototype', () => {
it('should have equivalent available as an alias of compare', () => {
expect(Equivalency.prototype.compare).toBe(
Equivalency.prototype.equivalent
);
});
});
describe('instance', () => {
describe('isEquivalent', () => {
describe('equivalent (default rules)', () => {
it('should return false when inputs are not byte-equal', () => {
const instance = new Equivalency();
const inputs = [['a', 'b']];
inputs.forEach(([s1, s2]) => {
expect(instance.equivalent(s1, s2)).toEqual({
isEquivalent: false,
canonicalPrime: 'a',
comparatePrime: 'b',
});
});
});
it('should give reasons', () => {
const instance = new Equivalency();
const inputs = [['a', 'b']];
inputs.forEach(([s1, s2]) => {
expect(instance.equivalent(s1, s2, { giveReasons: true })).toEqual(
expect.objectContaining({
isEquivalent: false,
reasons: [{ name: 'identity' }],
})
);
});
});
it('should return true when inputs are byte-equal', () => {
const instance = new Equivalency();
const inputs = [['a', 'a'], ['💩', '\u{1F4A9}']];
inputs.forEach(([s1, s2]) => {
expect(instance.equivalent(s1, s2)).toEqual(
expect.objectContaining({ isEquivalent: true })
);
});
});
it('should respect remove rules', () => {
const instance = new Equivalency().doesntMatter(
Equivalency.en.ASCII_PUNCTUATION
);
const inputs = [["it's", 'its']];
inputs.forEach(([s1, s2]) => {
expect(instance.equivalent(s1, s2)).toEqual(
expect.objectContaining({ isEquivalent: true })
);
});
});
it('adding then removing the same rule should be the same as default rules ', () => {
const instance = new Equivalency()
.doesntMatter(Equivalency.en.COMMON_PUNCTUATION)
.matters(Equivalency.en.COMMON_PUNCTUATION);
const inputs = [['what he did.', 'what he did?']];
inputs.forEach(([s1, s2]) => {
expect(instance.equivalent(s1, s2, { giveReasons: true })).toEqual(
expect.objectContaining({
isEquivalent: false,
reasons: [{ name: 'common punctuation' }],
})
);
});
});
it('should update rule list when it changes (bust cache)', () => {
const instance = new Equivalency().matters(
Equivalency.en.COMMON_PUNCTUATION
);
const inputs = [['what he did.', 'what he did?']];
inputs.forEach(([s1, s2]) => {
expect(instance.equivalent(s1, s2, { giveReasons: true })).toEqual(
expect.objectContaining({
isEquivalent: false,
reasons: [{ name: 'common punctuation' }],
})
);
});
instance.doesntMatter(Equivalency.en.COMMON_PUNCTUATION);
// These fail if this._ruleListIsDirty is not checked.
inputs.forEach(([s1, s2]) => {
expect(instance.equivalent(s1, s2, { giveReasons: true })).toEqual(
expect.objectContaining({
isEquivalent: true,
reasons: [],
})
);
});
});
it('doesnt throw when asked to give reasons for 15 matters rules and not explicitly told to do so', () => {
const instance = new Equivalency()
.matters('a')
.matters('b')
.matters('c')
.matters('d')
.matters('e')
.matters('f')
.matters('g')
.matters('h')
.matters('i')
.matters('j')
.matters('k')
.matters('l')
.matters('m')
.matters('n')
.matters('o')
.matters('p'); // 16 matters rules, about 75 ms on an 8th-gen i7.
expect(instance.equivalent('a', 'ab', { giveReasons: true })).toEqual(
expect.objectContaining({
isEquivalent: false,
reasons: [{ name: 'b' }],
})
);
});
it('throws when asked to give reasons for >16 matters rules and not explicitly told to do so', () => {
const instance = new Equivalency()
.matters('a')
.matters('b')
.matters('c')
.matters('d')
.matters('e')
.matters('f')
.matters('g')
.matters('h')
.matters('i')
.matters('j')
.matters('k')
.matters('l')
.matters('m')
.matters('n')
.matters('o')
.matters('p')
.matters('q');
expect(() =>
instance.equivalent('a', 'b', { giveReasons: true })
).toThrow(
'To give reasons for >16 matters rules, set opts.giveReasonsUnlimitedRules to true.'
);
});
it('gives reasons when asked to give reasons for >16 matters rules and explicitly told to do so', () => {
const instance = new Equivalency()
.matters('a')
.matters('b')
.matters('c')
.matters('d')
.matters('e')
.matters('f')
.matters('g')
.matters('h')
.matters('i')
.matters('j')
.matters('k')
.matters('l')
.matters('m')
.matters('n')
.matters('o')
.matters('p')
.matters('q'); // 17 matters rules, about 150 ms on an 8th-gen i7.
expect(
instance.equivalent('a', 'ab', {
giveReasons: true,
giveReasonsUnlimitedRules: true,
})
).toEqual(
expect.objectContaining({
isEquivalent: false,
reasons: [{ name: 'b' }],
})
);
});
it('identifies remove rules when giving reasons', () => {
const instance = new Equivalency().matters(
Equivalency.en.ASCII_PUNCTUATION
);
const inputs = [["it's", 'its']];
inputs.forEach(([s1, s2]) => {
expect(instance.equivalent(s1, s2, { giveReasons: true })).toEqual(
expect.objectContaining({
isEquivalent: false,
reasons: [{ name: 'ascii punctuation' }],
})
);
});
});
it('identifies combined rules that are reasons (punctuation and symbols)', () => {
const instance = new Equivalency()
.matters(Equivalency.en.COMMON_PUNCTUATION_AND_SYMBOLS)
.doesntMatter(Equivalency.WHITESPACE_DIFFERENCES);
const correctAnswer = 'you and me';
expect(
instance.equivalent(correctAnswer, 'you and me!', {
giveReasons: true,
})
).toEqual(
expect.objectContaining({
isEquivalent: false,
reasons: [{ name: 'common punctuation and symbols' }],
})
);
expect(
instance.equivalent(correctAnswer, 'you &and me', {
giveReasons: true,
})
).toEqual(
expect.objectContaining({
isEquivalent: false,
reasons: [{ name: 'common punctuation and symbols' }],
})
);
// If these are applied together, passes, else doesn't.
expect(
instance.equivalent(correctAnswer, 'you &and me\\', {
giveReasons: true,
})
).toEqual(
expect.objectContaining({
isEquivalent: false,
reasons: [{ name: 'common punctuation and symbols' }],
})
);
expect(
instance.equivalent(correctAnswer, 'you and I', {
giveReasons: true,
})
).toEqual(
expect.objectContaining({
isEquivalent: false,
reasons: [{ name: 'identity' }],
})
);
});
it('identifies multiple rules that are reasons (punctuation and symbols)', () => {
const instance = new Equivalency()
.matters(Equivalency.en.COMMON_PUNCTUATION)
.matters(Equivalency.en.COMMON_SYMBOLS)
.doesntMatter(Equivalency.WHITESPACE_DIFFERENCES);
const correctAnswer = 'you and me';
expect(
instance.equivalent(correctAnswer, 'you and me!', {
giveReasons: true,
})
).toEqual(
expect.objectContaining({
isEquivalent: false,
reasons: [{ name: 'common punctuation' }],
})
);
expect(
instance.equivalent(correctAnswer, 'you &and me', {
giveReasons: true,
})
).toEqual(
expect.objectContaining({
isEquivalent: false,
reasons: [{ name: 'common symbols' }],
})
);
// If these are applied together, passes, else doesn't.
expect(
instance.equivalent(correctAnswer, 'you &and me!', {
giveReasons: true,
})
).toEqual(
expect.objectContaining({
isEquivalent: false,
reasons: [
{ name: 'common punctuation' },
{ name: 'common symbols' },
],
})
);
expect(
instance.equivalent(correctAnswer, 'you and I', {
giveReasons: true,
})
).toEqual(
expect.objectContaining({
isEquivalent: false,
reasons: [{ name: 'identity' }],
})
);
expect(
instance.equivalent(correctAnswer, 'you &and I!', {
giveReasons: true,
})
).toEqual(
expect.objectContaining({
isEquivalent: false,
reasons: [{ name: 'identity' }],
})
);
});
it('identifies multiple rules that are reasons (punctuation and diacritics)', () => {
const instance = new Equivalency()
.matters(Equivalency.en.COMMON_PUNCTUATION)
.matters(Equivalency.COMMON_DIACRITICS);
const correctAnswer = `didn't become`;
expect(
instance.equivalent(correctAnswer, 'didn´t become', {
giveReasons: true,
})
).toEqual(
expect.objectContaining({
isEquivalent: false,
reasons: [{ name: 'common punctuation' }],
})
);
expect(
instance.equivalent(correctAnswer, `dídn't become`, {
giveReasons: true,
})
).toEqual(
expect.objectContaining({
isEquivalent: false,
reasons: [{ name: 'common diacritics' }],
})
);
expect(
instance.equivalent(correctAnswer, 'dídn´t become', {
giveReasons: true,
})
).toEqual(
expect.objectContaining({
isEquivalent: false,
reasons: [
{ name: 'common punctuation' },
{ name: 'common diacritics' },
],
})
);
});
it('identifies multiple rules that are reasons (punctuation and punctuation as whitespace)', () => {
const instance = new Equivalency()
.doesntMatter(Equivalency.en.COMMON_PUNCTUATION)
.doesntMatter(Equivalency.en.PUNCTUATION_AS_WHITESPACE);
const correctAnswer = `I'm angry`;
expect(instance.equivalent(correctAnswer, 'I´m angry')).toEqual(
expect.objectContaining({ isEquivalent: true })
);
});
it('gives empty array of reasons when giveReasons: true and isEquivalent: true', () => {
const instance = new Equivalency()
.doesntMatter(Equivalency.UNICODE_NORMALIZATION)
.doesntMatter(Equivalency.WHITESPACE_DIFFERENCES);
const correctAnswer = 'aeiou';
expect(
instance.equivalent(correctAnswer, 'aeiou', {
giveReasons: true,
})
).toEqual(
expect.objectContaining({
isEquivalent: true,
reasons: [],
})
);
});
it('identifies multiple rules that are reasons (grave accent, umlaut, ñ, other diacritic)', () => {
const instance = new Equivalency()
.doesntMatter(Equivalency.UNICODE_NORMALIZATION)
.matters(Equivalency.ACUTE_ACCENT)
.matters(Equivalency.UMLAUT)
.matters(Equivalency.N_TILDE)
.matters(
Equivalency.COMBINING_DIACRITICS_BLOCK_EXCEPT_ACUTE_AND_UMLAUT_AND_NTILDE
);
const correctAnswer = 'aeioun';
expect(
instance.equivalent(correctAnswer, 'áeioun', {
giveReasons: true,
})
).toEqual(
expect.objectContaining({
isEquivalent: false,
reasons: [{ name: 'acute accent' }],
})
);
expect(
instance.equivalent(correctAnswer, 'aeioün', {
giveReasons: true,
})
).toEqual(
expect.objectContaining({
isEquivalent: false,
reasons: [{ name: 'umlaut' }],
})
);
expect(
instance.equivalent(correctAnswer, 'aeiouñ', {
giveReasons: true,
})
).toEqual(
expect.objectContaining({
isEquivalent: false,
reasons: [{ name: 'ñ' }],
})
);
expect(
instance.equivalent(correctAnswer, 'ãeioun', {
giveReasons: true,
})
).toEqual(
expect.objectContaining({
isEquivalent: false,
reasons: [
{
name:
'combining diacritics block except acute and umlaut and n tilde',
},
],
})
);
expect(
instance.equivalent(correctAnswer, 'aeĭoun', {
giveReasons: true,
})
).toEqual(
expect.objectContaining({
isEquivalent: false,
reasons: [
{
name:
'combining diacritics block except acute and umlaut and n tilde',
},
],
})
);
expect(
instance.equivalent(correctAnswer, 'áeioun̈', {
giveReasons: true,
})
).toEqual(
expect.objectContaining({
isEquivalent: false,
reasons: [{ name: 'acute accent' }, { name: 'umlaut' }],
})
);
expect(
instance.equivalent(correctAnswer, 'áeĭoun', {
giveReasons: true,
})
).toEqual(
expect.objectContaining({
isEquivalent: false,
reasons: [
{ name: 'acute accent' },
{
name:
'combining diacritics block except acute and umlaut and n tilde',
},
],
})
);
});
});
describe('equivalent (builtin doesnt matter)', () => {
it("should return true when inputs differ solely by characters that don't matter", () => {
const instance = new Equivalency().doesntMatter(
Equivalency.en.COMMON_PUNCTUATION
);
const inputs = [
['what, you did', 'what you did'],
['fire-fly light', 'firefly light'],
];
inputs.forEach(([s1, s2]) => {
expect(instance.equivalent(s1, s2)).toEqual(
expect.objectContaining({ isEquivalent: true })
);
});
});
it('should return false when inputs differ by characters that do matter', () => {
const instance = new Equivalency().doesntMatter(
Equivalency.en.COMMON_PUNCTUATION
);
const inputs = [['what he did', 'what you did']];
inputs.forEach(([s1, s2]) => {
expect(instance.equivalent(s1, s2)).toEqual(
expect.objectContaining({ isEquivalent: false })
);
});
});
});
describe('equivalent (chain doesnMatter + matter)', () => {
it("should return true when inputs differ solely by characters that don't matter", () => {
const instance = new Equivalency()
.doesntMatter(Equivalency.en.COMMON_PUNCTUATION)
.matters('-');
const inputs = [['what, you did', 'what you did']];
inputs.forEach(([s1, s2]) => {
expect(instance.equivalent(s1, s2)).toEqual(
expect.objectContaining({ isEquivalent: true })
);
});
});
it('should return false when inputs differ by characters that do matter', () => {
const instance = new Equivalency()
.doesntMatter(Equivalency.en.COMMON_PUNCTUATION)
.matters('-');
const inputs = [['fire-fly light', 'firefly light']];
inputs.forEach(([s1, s2]) => {
expect(instance.equivalent(s1, s2)).toEqual(
expect.objectContaining({ isEquivalent: false })
);
});
});
it('should return false when inputs differ by characters that do matter (give reasons)', () => {
const instance = new Equivalency()
.doesntMatter(Equivalency.en.COMMON_PUNCTUATION)
.matters('-');
const inputs = [['fire-fly light', 'firefly light']];
inputs.forEach(([s1, s2]) => {
expect(instance.equivalent(s1, s2, { giveReasons: true })).toEqual(
expect.objectContaining({
isEquivalent: false,
reasons: [{ name: '-' }],
})
);
});
});
});
describe('equivalent (capitalization doesnt matter)', () => {
it('should return true when inputs differ solely by capitalization', () => {
const instance = new Equivalency().doesntMatter(
Equivalency.CAPITALIZATION
);
const inputs = [['us', 'US']];
inputs.forEach(([s1, s2]) => {
expect(instance.equivalent(s1, s2)).toEqual(
expect.objectContaining({ isEquivalent: true })
);
});
});
it('should return false when inputs differ other than by capitalization', () => {
const instance = new Equivalency().doesntMatter(
Equivalency.CAPITALIZATION
);
const inputs = [['us', 'usa']];
inputs.forEach(([s1, s2]) => {
expect(instance.equivalent(s1, s2)).toEqual(
expect.objectContaining({ isEquivalent: false })
);
});
});
});
describe('equivalent (whitespace doesnt matter)', () => {
it('should return true when inputs differ solely by shape of whitespaces', () => {
const instance = new Equivalency().doesntMatter(
Equivalency.WHITESPACE_DIFFERENCES
);
const inputs = [['the us of a', 'the us of\u2028\u2029a']];
inputs.forEach(([s1, s2]) => {
expect(instance.equivalent(s1, s2)).toEqual(
expect.objectContaining({ isEquivalent: true })
);
});
});
it('should return false when inputs differ other than by shape of whitespaces', () => {
const instance = new Equivalency().doesntMatter(
Equivalency.WHITESPACE_DIFFERENCES
);
const inputs = [['the us of a', 'The us of\u2028\u2029a']];
inputs.forEach(([s1, s2]) => {
expect(instance.equivalent(s1, s2)).toEqual(
expect.objectContaining({ isEquivalent: false })
);
});
});
});
describe('equivalent (common diacritics)', () => {
it('should return true when inputs differ solely by common diacritics', () => {
const instance = new Equivalency().doesntMatter(
Equivalency.COMMON_DIACRITICS
);
const inputs = [['â', 'a']];
inputs.forEach(([s1, s2]) => {
expect(instance.equivalent(s1, s2)).toEqual(
expect.objectContaining({ isEquivalent: true })
);
});
});
it('should return false when inputs differ other than by common diacritics', () => {
const instance = new Equivalency().doesntMatter(
Equivalency.COMMON_DIACRITICS
);
const inputs = [['âb', 'âc']];
inputs.forEach(([s1, s2]) => {
expect(instance.equivalent(s1, s2)).toEqual(
expect.objectContaining({ isEquivalent: false })
);
});
});
it('should return true for more complex inputs', () => {
const enEquivalency = new Equivalency()
.doesntMatter(Equivalency.UNICODE_NORMALIZATION)
.doesntMatter(Equivalency.WHITESPACE_DIFFERENCES)
.doesntMatter(Equivalency.CAPITALIZATION)
.doesntMatter(Equivalency.en.COMMON_PUNCTUATION)
.doesntMatter(Equivalency.en.COMMON_SYMBOLS)
.doesntMatter(Equivalency.COMMON_DIACRITICS);
const { isEquivalent } = enEquivalency.equivalent(
'àâäçèéêíïîñóöüÀÂÄÇÈÉÊÍÏÎÑÓÖÜ',
'aaaceeeiiinoouAAACEEEIIINOOU'
);
expect(isEquivalent).toBe(true);
});
it('should return true for tildes', () => {
const enEquivalency = new Equivalency()
.doesntMatter(Equivalency.CAPITALIZATION)
.doesntMatter(Equivalency.TILDE);
const { isEquivalent } = enEquivalency.equivalent('ãÃõÕñÑ', 'aaoonn');
expect(isEquivalent).toBe(true);
});
});
describe('equivalent (unicode normalization doesnt matter)', () => {
it('should return true when inputs differ solely by unicode normalization', () => {
const instance = new Equivalency().doesntMatter(
Equivalency.UNICODE_NORMALIZATION
);
// composed and decomposed forms of é
const inputs = [['\u00e9', '\u0065\u0301']];
inputs.forEach(([s1, s2]) => {
expect(instance.equivalent(s1, s2)).toEqual(
expect.objectContaining({ isEquivalent: true })
);
});
});
it('should return false when inputs differ other than by unicode normalization', () => {
const instance = new Equivalency().doesntMatter(
Equivalency.UNICODE_NORMALIZATION
);
// lowercase n \u006e and uppercase N \u004e with combining tilde \u0303
const inputs = [['\u006e\u0303', '\u004e\u0303']];
inputs.forEach(([s1, s2]) => {
expect(instance.equivalent(s1, s2)).toEqual(
expect.objectContaining({ isEquivalent: false })
);
});
});
});
describe('equivalent (arbitrary word prefix)', () => {
it('should be true when rule does not matter', () => {
const beginsWithExcuseMe = equivalency.wordPrefix('Excuse me,');
const instance = new Equivalency().doesntMatter(beginsWithExcuseMe);
const inputs = [
[
'Excuse me, could I borrow some Grey Poupon?',
'could I borrow some Grey Poupon?',
],
[
'Excuse me, could I borrow some Grey Poupon?',
'Excuse me, could I borrow some Grey Poupon?',
],
["I'm terribly sorry.", "Excuse me, I'm terribly sorry."],
["I'm terribly sorry.", "Excuse me, I'm terribly sorry."],
];
inputs.forEach(([s1, s2]) => {
expect(instance.equivalent(s1, s2)).toEqual(
expect.objectContaining({ isEquivalent: true })
);
});
});
it('should be true when rule does not matter and the word contains slashes', () => {
const beginsWithElLa = equivalency.wordPrefix('el/la');
const instance = new Equivalency().doesntMatter(beginsWithElLa);
const inputs = [['el/la dentista', 'dentista']];
inputs.forEach(([s1, s2]) => {
expect(instance.equivalent(s1, s2)).toEqual(
expect.objectContaining({ isEquivalent: true })
);
});
});
});
});
describe('editDistance', () => {
it('should return an editDistance when calculateEditDistance is true', () => {
const instance = new Equivalency();
const inputs = [['a', 'b']];
const options = { calculateEditDistance: true };
inputs.forEach(([s1, s2]) => {
const { editDistance } = instance.equivalent(s1, s2, options);
expect(editDistance).toBeDefined();
});
});
it('should not return an editDistance when calculateEditDistance is false', () => {
const instance = new Equivalency();
const inputs = [['a', 'b']];
const options = { calculateEditDistance: false };
inputs.forEach(([s1, s2]) => {
const { editDistance } = instance.equivalent(s1, s2, options);
expect(editDistance).toBeUndefined();
});
});
it('should not return an editDistance when calculateEditDistance is not provided', () => {
const instance = new Equivalency();
const inputs = [['a', 'b']];
const options = {};
inputs.forEach(([s1, s2]) => {
const { editDistance } = instance.equivalent(s1, s2, options);
expect(editDistance).toBeUndefined();
});
});
});
describe('clone', () => {
it('should clone', () => {
const inputs = [
['what he did.', 'what he did?', [true, false, false]],
['what he did', 'what he did?', [true, false, false]],
['what he did.', 'what he did', [true, false, true]],
];
const original = new Equivalency().doesntMatter(
Equivalency.en.COMMON_PUNCTUATION
);
const clone1 = original
.clone()
.matters(Equivalency.en.COMMON_PUNCTUATION);
const clone2 = original.clone().matters('?');
inputs.forEach(
([s1, s2, [originalExpected, clone1Expected, clone2Expected]]) => {
expect(original.equivalent(s1, s2)).toEqual(
expect.objectContaining({
isEquivalent: originalExpected,
})
);
expect(clone1.equivalent(s1, s2)).toEqual(
expect.objectContaining({
isEquivalent: clone1Expected,
})
);
expect(clone2.equivalent(s1, s2)).toEqual(
expect.objectContaining({
isEquivalent: clone2Expected,
})
);
}
);
});
});
});
describe('Real-world usage', () => {
describe('isEquivalent', () => {
describe('es', () => {
const esEquivalency = new Equivalency()
.doesntMatter(Equivalency.UNICODE_NORMALIZATION)
.doesntMatter(Equivalency.WHITESPACE_DIFFERENCES)
.doesntMatter(Equivalency.CAPITALIZATION)
.doesntMatter(Equivalency.es.COMMON_PUNCTUATION)
.doesntMatter(Equivalency.es.COMMON_SYMBOLS)
.matters('-');
it('should mark candidates equivalent that we want to count as equivalent', () => {
const theCorrectAnswer = '¿Cómo se dice...?';
const candidates = [
'¿Cómo se dice...?',
'Cómo se dice',
'C..,.,.??.,.ómo s.......?!?!?!??e dice.....???????????¿¿¿¿¿¿',
];
candidates.forEach(candidate => {
const { isEquivalent } = esEquivalency.equivalent(
theCorrectAnswer,
candidate
);
expect(isEquivalent).toBe(true);
});
});
it('should mark candidates inequivalent that we dont want to count as equivalent', () => {
const theCorrectAnswer = '¿Cómo se dice...?';
const candidates = ['¿Como se dice...?', 'Cómosedice'];
candidates.forEach(candidate => {
const { isEquivalent } = esEquivalency.equivalent(
theCorrectAnswer,
candidate
);
expect(isEquivalent).toBe(false);
});
});
});
describe('en', () => {
const enEquivalency = new Equivalency()
.doesntMatter(Equivalency.UNICODE_NORMALIZATION)
.doesntMatter(Equivalency.WHITESPACE_DIFFERENCES)
.doesntMatter(Equivalency.CAPITALIZATION)
.doesntMatter(Equivalency.en.COMMON_PUNCTUATION)
.matters('-')
.doesntMatter(Equivalency.en.COMMON_SYMBOLS)
.doesntMatter(Equivalency.COMMON_DIACRITICS)
.doesntMatter(Equivalency.HYPHENS_OMITTED_OR_REPLACED_WITH_SPACES);
it('should handle hyphens correctly (unidirectional)', () => {
const target = 'over-the-moon cow';
const correct = [
'over the moon cow', // spaces for hyphens
'overthemoon cow', // omitted hyphens
'over--the-moon cow', // extra hyphens where there should be one hyphen
'over the moon cow', // many spaces where there should be one hyphen
'over - the - moon cow', // hyphens with extra spaces
];
const incorrect = [
'over-the-moon-cow', // hyphens instead of spaces
'overthemooncow', // missing spaces
'over-the moon cow', // mixture of hyphen/missing hyphen
];
correct.forEach(test => {
const { isEquivalent } = enEquivalency.equivalent(target, test);
expect(isEquivalent).toBe(true);
});
incorrect.forEach(test => {
const { isEquivalent } = enEquivalency.equivalent(target, test);
expect(isEquivalent).toBe(false);
});
});
it('should handle hyphens correctly (bidirectional)', () => {
const equivalency = new Equivalency().doesntMatter(
Equivalency.HYPHENS_OMITTED_OR_REPLACED_WITH_SPACES_BOTH
);
const target = 'brother in law';
expect(
equivalency.equivalent(target, 'brother-in-law').isEquivalent
).toBe(true);
expect(
equivalency.equivalent(target, 'brother in-law').isEquivalent
).toBe(true);
expect(
equivalency.equivalent(target, 'brotherin-law').isEquivalent
).toBe(false);
expect(
equivalency.equivalent(target, 'brother-in-law-').isEquivalent
).toBe(false);
});
it('should mark candidates equivalent that we want to count as equivalent', () => {
const theCorrectAnswer = 'How are you today?';
const candidates = [
'how are you, today?',
'HOW ARE YOU TODAY',
'how aré you today',
' how aré you today ',
];
candidates.forEach(candidate => {
const { isEquivalent } = enEquivalency.equivalent(
candidate,
theCorrectAnswer
);
expect(isEquivalent).toBe(true);
});
});
describe('INFINITIVE_VERBS', () => {
let equivalency = null;
beforeEach(() => {
equivalency = new Equivalency().doesntMatter(
Equivalency.en.INFINITIVE_VERBS
);
});
it('should mark infinitive verbs as equivalent', () => {
const theCorrectAnswer = 'write';
const candidates = [
'to write',
' to write',
' TO write',
'TO write',
' tO write',
];
candidates.forEach(candidate => {
const { isEquivalent } = equivalency.equivalent(
candidate,
theCorrectAnswer
);
expect(isEquivalent).toBe(true);
});
});
it('should require a space after to in front of verbs', () => {
const theCorrectAnswer = 'write';
const candidate = 'towrite';
const { isEquivalent } = equivalency.equivalent(
candidate,
theCorrectAnswer
);
expect(isEquivalent).toBe(false);
});
});
});
describe('fr', () => {
const frEquivalency = new Equivalency()
.doesntMatter(Equivalency.UNICODE_NORMALIZATION)
.doesntMatter(Equivalency.WHITESPACE_DIFFERENCES)
.doesntMatter(Equivalency.fr.CAPITAL_VOWEL_ACCENTS)
.doesntMatter(Equivalency.CAPITALIZATION)
.doesntMatter(Equivalency.fr.COMMON_PUNCTUATION_AND_SYMBOLS)
.doesntMatter(Equivalency.fr.LIGATURES)
.matters('-');
it('should mark candidates equivalent that we want to count as equivalent', () => {
const theCorrectAnswer = "œuf, élève, m'appelle, et cætera";
const candidates = [
"œuf, élève, m'appelle, et cætera", // exact match
'œuf, élève, m‘appelle, et cætera', // open smart quote
'œuf, élève, m’appelle, et cætera', // close smart quote
"« œuf élève m'appelle et cætera »", // « and » are common punctuation
"€œuf élève m'appelle et cætera", // € symbol
"oeuf, élève, m'appelle, et caetera", // ligature replacement match
"œuf, ÉLÈVE, m'appelle, et cætera", // correct capital accents
"œuf, ELEVE, m'appelle, et cætera", // no accents
"œuf, Elève, m'appelle, et cætera", // no capital accents
];
candidates.forEach(candidate => {
const { isEquivalent } = frEquivalency.equivalent(
theCorrectAnswer,
candidate
);
expect(isEquivalent).toBe(true);
});
});
it('should mark equivalent NFD => NFC', () => {
const theCorrectAnswer = 'œuf, élève, et cætera'.normalize('NFD');
const candidates = [
'œuf, élève, et cætera', // exact match
'« œuf élève et cætera »', // « and » are common punctuation
'€œuf élève et cætera', // € symbol
'oeuf, élève, et caetera', // ligature replacement match
'œuf, ÉLÈVE, et cætera', // correct capital accents
'œuf, ELEVE, et cætera', // no accents
'œuf, Elève, et cætera', // no capital accents
].map(str => str.normalize('NFC'));
candidates.forEach(candidate => {
const { isEquivalent } = frEquivalency.equivalent(
theCorrectAnswer,
candidate
);
expect(isEquivalent).toBe(true);
});
});
it('should mark equivalent NFC => NFD', () => {
const theCorrectAnswer = 'œuf, élève, et cætera'.normalize('NFC');
const candidates = [
'œuf, élève, et cætera', // exact match
'« œuf élève et cætera »', // « and » are common punctuation
'€œuf élève et cætera', // € symbol
'oeuf, élève, et caetera', // ligature replacement match
'œuf, ÉLÈVE, et cætera', // correct capital accents
'œuf, ELEVE, et cætera', // no accents
'œuf, Elève, et cætera', // no capital accents
].map(str => str.normalize('NFD'));
candidates.forEach(candidate => {
const { isEquivalent } = frEquivalency.equivalent(
theCorrectAnswer,
candidate
);
expect(isEquivalent).toBe(true);
});
});
it('should mark candidates inequivalent that we dont want to count as equivalent', () => {
const theCorrectAnswer = 'œuf, élève, et cætera';
const candidates = ['œuf, ÉLEVE, et cætera', 'œuf, ELÈVE, et cætera'];
candidates.forEach(candidate => {
const { isEquivalent } = frEquivalency.equivalent(
theCorrectAnswer,
candidate
);
expect(isEquivalent).toBe(false);
});
});
});
});
describe('editDistance (diacritics agnostic)', () => {
describe('es', () => {
const agnosticEsEquivalency = new Equivalency()
.doesntMatter(Equivalency.UNICODE_NORMALIZATION)
.doesntMatter(Equivalency.WHITESPACE_DIFFERENCES)
.doesntMatter(Equivalency.CAPITALIZATION)
.doesntMatter(Equivalency.es.COMMON_PUNCTUATION)
.doesntMatter(Equivalency.es.COMMON_SYMBOLS)
.doesntMatter(Equivalency.COMMON_DIACRITICS)
.matters('-');
it('should return correct editDistance when strings match', () => {
const inputs = [
['estoy bien y tu', 'estoy bien, ¿y tú?'],
['como se dice', '¿Cómo se dice?'],
['Tengo tres coches.', 'Tengo tres coches.'],
['el combinado', 'el combínádo'],
];
const options = { calculateEditDistance: true };
inputs.forEach(([s1, s2]) => {
const { editDistance } = agnosticEsEquivalency.equivalent(
s1,
s2,
options
);
expect(editDistance).toEqual(0);
});
});
it('should return correct editDistance when strings dont match', () => {
const inputs = [
['e lcombinado', 'el combinado', 1],
['e l combinado', 'el combinado', 1],
['e l c ombinado', 'el combinado', 2],
['el cobminado', 'el combinado', 1],
['el comdinabo', 'el combinado', 2],
['manzana', 'manzanas', 1],
['niña', 'nino', 1],
];
const options = { calculateEditDistance: true };
inputs.forEach(([s1, s2, expected]) => {
const { editDistance } = agnosticEsEquivalency.equivalent(
s1,
s2,
options
);
expect(editDistance).toEqual(expected);
});
});
});
describe('en', () => {
const enEquivalency = new Equivalency()
.doesntMatter(Equivalency.UNICODE_NORMALIZATION)
.doesntMatter(Equivalency.WHITESPACE_DIFFERENCES)
.doesntMatter(Equivalency.CAPITALIZATION)
.doesntMatter(Equivalency.en.COMMON_PUNCTUATION)
.doesntMatter(Equivalency.en.COMMON_SYMBOLS)
.doesntMatter(Equivalency.COMMON_DIACRITICS)
.matters('-');
it('should return correct editDistance when strings match', () => {
const inputs = [
['Im well and you', "I'm well, and you?"],
['wheres the money', "Where's the money?"],
['I am Iron Man', 'I am Iron Man'],
['say cheese', 'Say cheese!'],
];
const options = { calculateEditDistance: true };
inputs.forEach(([s1, s2]) => {
const { editDistance } = enEquivalency.equivalent(s1, s2, options);
expect(editDistance).toEqual(0);
});
});
it('should return correct editDistance when string dont match', () => {
const inputs = [
['more hardworking than', 'more hard-working than'],
['- more hard working than', 'more hard-working than'],
['mre hard-wrking than', 'more hard-working than'],
['one day, Simba', 'one daysimba'],
];
const distances = [1, 3, 2, 1];
const options = { calculateEditDistance: true };
inputs.forEach(([s1, s2], index) => {
const { editDistance } = enEquivalency.equivalent(s1, s2, options);
expect(editDistance).toEqual(distances[index]);
});
});
});
});
describe('HYPHENS_OMITTED_OR_REPLACED_WITH_SPACES', () => {
let equivalency = null;
beforeEach(() => {
equivalency = new Equivalency().doesntMatter(
Equivalency.HYPHENS_OMITTED_OR_REPLACED_WITH_SPACES
);
});
it('should accept spaces for hyphens', () => {
const { isEquivalent } = equivalency.equivalent(
'over-the-moon cow',
'over the moon cow'
);
expect(isEquivalent).toBe(true);
});
it('should not accept hyphens for spaces', () => {
const { isEquivalent } = equivalency.equivalent(
'over-the-moon cow',
'over-the-moon-cow'
);
expect(isEquivalent).toBe(false);
});
it('can be a reason for non-equivalency when it matters', () => {
const { isEquivalent, reasons } = equivalency
.matters(Equivalency.HYPHENS_OMITTED_OR_REPLACED_WITH_SPACES)
.equivalent('over-the-moon cow', 'over the moon cow', {
giveReasons: true,
});
expect(isEquivalent).toBe(false);
expect(reasons).toEqual([
{ name: 'hyphens omitted or replaced with spaces' },
]);
});
});
describe('Punctuation As Whitespace Rule', () => {
it('should return true when inputs differ by punctuation', () => {
const instance = new Equivalency()
.doesntMatter(Equivalency.es.PUNCTUATION_AS_WHITESPACE)
.doesntMatter(Equivalency.WHITESPACE_DIFFERENCES);
const inputs = [
['No, tengo helado.', 'No, tengo helado.'], // exact match
['No, tengo helado.', 'No tengo helado'], // no punctuation
['No, tengo helado.', 'No,tengo helado.'], // punctuation without spacing
];
inputs.forEach(([s1, s2]) => {
expect(instance.equivalent(s1, s2)).toEqual(
expect.objectContaining({ isEquivalent: true })
);
});
});
it('should return false when inputs differ other than by punctuation', () => {
const instance = new Equivalency()
.doesntMatter(Equivalency.es.PUNCTUATION_AS_WHITESPACE)
.doesntMatter(Equivalency.WHITESPACE_DIFFERENCES);
const inputs = [['No, tengo helado.', 'Notengo helado.']];
inputs.forEach(([s1, s2]) => {
expect(instance.equivalent(s1, s2)).toEqual(
expect.objectContaining({ isEquivalent: false })
);
});
});
});
});