UNPKG

node-nlp

Version:

Library for NLU (Natural Language Understanding) done in Node.js

527 lines (517 loc) 18.7 kB
/* * Copyright (c) AXA Shared Services Spain S.A. * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ const { SimilarSearch, NerManager } = require('../../lib'); describe('NER Manager', () => { describe('Constructor', () => { test('Should create an instance', () => { const manager = new NerManager(); expect(manager).toBeDefined(); }); test('Should initialize properties', () => { const manager = new NerManager(); expect(manager.threshold).toEqual(0.8); expect(manager.namedEntities).toBeDefined(); expect(manager.similar).toBeInstanceOf(SimilarSearch); }); test('Should initialize threshold if provided', () => { const manager = new NerManager({ threshold: 0.6 }); expect(manager.threshold).toEqual(0.6); }); }); describe('Add named entity', () => { test('Should add a new named entity', () => { const manager = new NerManager(); const entity = manager.addNamedEntity('entity1'); expect(entity).toBeDefined(); expect(entity.name).toEqual('entity1'); expect(entity.locales).toEqual({}); }); test('Should return the same entity if already exists', () => { const manager = new NerManager(); const entity1 = manager.addNamedEntity('entity1'); const entity2 = manager.addNamedEntity('entity1'); expect(entity1).toBe(entity2); }); test('Should be able to add several entities', () => { const manager = new NerManager(); const entity1 = manager.addNamedEntity('entity1'); const entity2 = manager.addNamedEntity('entity2'); expect(entity1.name).toEqual('entity1'); expect(entity1.locales).toEqual({}); expect(entity2.name).toEqual('entity2'); expect(entity2.locales).toEqual({}); }); }); describe('Get named entity', () => { test('Should get existing entity', () => { const manager = new NerManager(); const entity1 = manager.addNamedEntity('entity1'); const result = manager.getNamedEntity('entity1'); expect(result).toBe(entity1); }); test('Should return undefined if entity does not exists', () => { const manager = new NerManager(); manager.addNamedEntity('entity1'); const result = manager.getNamedEntity('entity2'); expect(result).toBeUndefined(); }); test('Should create a new entity if forced and not exists', () => { const manager = new NerManager(); manager.addNamedEntity('entity1'); const result = manager.getNamedEntity('entity2', true); expect(result.name).toEqual('entity2'); expect(result.locales).toEqual({}); }); }); describe('Remove named entity', () => { test('Should remove an existing entity', () => { const manager = new NerManager(); manager.addNamedEntity('entity1'); manager.removeNamedEntity('entity1'); const result = manager.getNamedEntity('entiy1'); expect(result).toBeUndefined(); }); test('Should do nothing if entity does not exists', () => { const manager = new NerManager(); manager.addNamedEntity('entity1'); manager.removeNamedEntity('entity2'); const result = manager.getNamedEntity('entity1'); expect(result).toBeDefined(); }); }); describe('Add named entity text', () => { test('Should add text for a given language', () => { const manager = new NerManager(); manager.addNamedEntityText('entity1', 'option1', 'en', 'Something'); const entity = manager.getNamedEntity('entity1', false); expect(entity.locales.en).toEqual({ option1: ['Something'] }); }); test('Should add several text for a given language', () => { const manager = new NerManager(); manager.addNamedEntityText('entity1', 'option1', 'en', [ 'Something', 'Anything', ]); const entity = manager.getNamedEntity('entity1', false); expect(entity.locales.en).toEqual({ option1: ['Something', 'Anything'] }); }); test('Should add several text for several languages', () => { const manager = new NerManager(); manager.addNamedEntityText( 'entity1', 'option1', ['en', 'es'], ['Something', 'Anything'] ); const entity = manager.getNamedEntity('entity1', false); expect(entity.locales.en).toEqual({ option1: ['Something', 'Anything'] }); expect(entity.locales.es).toEqual({ option1: ['Something', 'Anything'] }); }); }); describe('Remove named entity text', () => { test('Should remove text for a given language', () => { const manager = new NerManager(); manager.addNamedEntityText( 'entity1', 'option1', ['en', 'es'], ['Something', 'Anything'] ); manager.removeNamedEntityText('entity1', 'option1', 'es', 'Something'); const entity = manager.getNamedEntity('entity1', false); expect(entity.locales.en).toEqual({ option1: ['Something', 'Anything'] }); expect(entity.locales.es).toEqual({ option1: ['Anything'] }); }); test('Should remove texts for a given language', () => { const manager = new NerManager(); manager.addNamedEntityText( 'entity1', 'option1', ['en', 'es'], ['Something', 'Anything'] ); manager.removeNamedEntityText('entity1', 'option1', 'es', [ 'Something', 'Anything', ]); const entity = manager.getNamedEntity('entity1', false); expect(entity.locales.en).toEqual({ option1: ['Something', 'Anything'] }); expect(entity.locales.es).toEqual({ option1: [] }); }); test('Should remove text for several languages', () => { const manager = new NerManager(); manager.addNamedEntityText( 'entity1', 'option1', ['en', 'es'], ['Something', 'Anything'] ); manager.removeNamedEntityText( 'entity1', 'option1', ['en', 'es'], 'Something' ); const entity = manager.getNamedEntity('entity1', false); expect(entity.locales.en).toEqual({ option1: ['Anything'] }); expect(entity.locales.es).toEqual({ option1: ['Anything'] }); }); test('Should do nothing if the entity does not exists', () => { const manager = new NerManager(); manager.addNamedEntityText( 'entity1', 'option1', ['en', 'es'], ['Something', 'Anything'] ); manager.removeNamedEntityText( 'entity2', 'option1', ['en', 'es'], 'Something' ); const entity = manager.getNamedEntity('entity1', false); expect(entity.locales.en).toEqual({ option1: ['Something', 'Anything'] }); expect(entity.locales.es).toEqual({ option1: ['Something', 'Anything'] }); }); }); describe('Get entities from utterance', () => { test('Should find template entities inside utterance', () => { const manager = new NerManager(); manager.addNamedEntity('entity1'); manager.addNamedEntity('entity2'); manager.addNamedEntity('entity3'); const entities = manager.getEntitiesFromUtterance( 'This is %entity1% from %entity3% yeah' ); expect(entities).toEqual(['entity1', 'entity3']); }); test('If some template is not entity should not be included', () => { const manager = new NerManager(); manager.addNamedEntity('entity1'); manager.addNamedEntity('entity2'); manager.addNamedEntity('entity3'); const entities = manager.getEntitiesFromUtterance( 'This is %entity1% with %entity4% from %entity3% yeah' ); expect(entities).toEqual(['entity1', 'entity3']); }); }); describe('Find entities', () => { test('Should find an entity inside an utterance', async () => { const manager = new NerManager(); manager.addNamedEntityText( 'hero', 'spiderman', ['en'], ['Spiderman', 'Spider-man'] ); manager.addNamedEntityText( 'hero', 'iron man', ['en'], ['iron man', 'iron-man'] ); manager.addNamedEntityText('hero', 'thor', ['en'], ['Thor']); const entities = await manager.findEntities( 'I saw spiderman in the city', 'en' ); expect(entities).toBeDefined(); expect(entities).toHaveLength(1); expect(entities[0].start).toEqual(6); expect(entities[0].end).toEqual(14); expect(entities[0].levenshtein).toEqual(0); expect(entities[0].accuracy).toEqual(1); expect(entities[0].option).toEqual('spiderman'); expect(entities[0].sourceText).toEqual('Spiderman'); expect(entities[0].entity).toEqual('hero'); expect(entities[0].utteranceText).toEqual('spiderman'); }); test('Should find an entity if levenshtein greater than threshold', async () => { const manager = new NerManager({ threshold: 0.8 }); manager.addNamedEntityText( 'hero', 'spiderman', ['en'], ['Spiderman', 'Spider-man'] ); manager.addNamedEntityText( 'hero', 'iron man', ['en'], ['iron man', 'iron-man'] ); manager.addNamedEntityText('hero', 'thor', ['en'], ['Thor']); const entities = await manager.findEntities( 'I saw spederman in the city', 'en' ); expect(entities).toBeDefined(); expect(entities).toHaveLength(1); expect(entities[0].start).toEqual(6); expect(entities[0].end).toEqual(14); expect(entities[0].levenshtein).toEqual(1); expect(entities[0].accuracy).toEqual(0.8888888888888888); expect(entities[0].option).toEqual('spiderman'); expect(entities[0].sourceText).toEqual('Spiderman'); expect(entities[0].entity).toEqual('hero'); expect(entities[0].utteranceText).toEqual('spederman'); }); test('Should not find an entity if levenshtein less than threshold', async () => { const manager = new NerManager({ threshold: 0.8 }); manager.addNamedEntityText( 'hero', 'spiderman', ['en'], ['Spiderman', 'Spider-man'] ); manager.addNamedEntityText( 'hero', 'iron man', ['en'], ['iron man', 'iron-man'] ); manager.addNamedEntityText('hero', 'thor', ['en'], ['Thor']); const entities = await manager.findEntities( 'I saw spererman in the city', 'en' ); expect(entities).toBeDefined(); expect(entities).toHaveLength(0); }); test('Should find several entities inside an utterance', async () => { const manager = new NerManager(); manager.addNamedEntityText( 'hero', 'spiderman', ['en'], ['Spiderman', 'Spider-man'] ); manager.addNamedEntityText( 'hero', 'iron man', ['en'], ['iron man', 'iron-man'] ); manager.addNamedEntityText('hero', 'thor', ['en'], ['Thor']); manager.addNamedEntityText( 'food', 'burguer', ['en'], ['Burguer', 'Hamburguer'] ); manager.addNamedEntityText('food', 'pizza', ['en'], ['pizza']); manager.addNamedEntityText( 'food', 'pasta', ['en'], ['Pasta', 'spaghetti'] ); const entities = await manager.findEntities( 'I saw spiderman eating spaghetti in the city', 'en' ); expect(entities).toBeDefined(); expect(entities).toHaveLength(2); expect(entities[0].option).toEqual('spiderman'); expect(entities[0].sourceText).toEqual('Spiderman'); expect(entities[0].entity).toEqual('hero'); expect(entities[0].utteranceText).toEqual('spiderman'); expect(entities[1].option).toEqual('pasta'); expect(entities[1].sourceText).toEqual('spaghetti'); expect(entities[1].entity).toEqual('food'); expect(entities[1].utteranceText).toEqual('spaghetti'); }); test('Should return an empty array if the utterance is empty', async () => { const manager = new NerManager(); manager.addNamedEntityText( 'hero', 'spiderman', ['en'], ['Spiderman', 'Spider-man'] ); manager.addNamedEntityText( 'hero', 'iron man', ['en'], ['iron man', 'iron-man'] ); manager.addNamedEntityText('hero', 'thor', ['en'], ['Thor']); manager.addNamedEntityText( 'food', 'burguer', ['en'], ['Burguer', 'Hamburguer'] ); manager.addNamedEntityText('food', 'pizza', ['en'], ['pizza']); manager.addNamedEntityText( 'food', 'pasta', ['en'], ['Pasta', 'spaghetti'] ); const entities = await manager.findEntities('', 'en'); expect(entities).toBeDefined(); expect(entities).toEqual([]); }); }); describe('Find entities by Regex', () => { test('Should find an entity by regex inside an utterance', async () => { const manager = new NerManager(); const entity = manager.addNamedEntity('mail', 'regex'); entity.addRegex('en', /\b(\w[-._\w]*\w@\w[-._\w]*\w\.\w{2,3})\b/gi); const entities = await manager.findEntities( 'My email is jseijas@gmail.com and yours is not', 'en' ); expect(entities).toBeDefined(); expect(entities).toHaveLength(1); expect(entities[0].start).toEqual(12); expect(entities[0].end).toEqual(29); expect(entities[0].accuracy).toEqual(1); expect(entities[0].sourceText).toEqual('jseijas@gmail.com'); expect(entities[0].entity).toEqual('mail'); expect(entities[0].utteranceText).toEqual('jseijas@gmail.com'); }); }); describe('Find entities by Trim', () => { test('Should find entities by trim', async () => { const manager = new NerManager(); const fromEntity = manager.addNamedEntity('fromLocation', 'trim'); fromEntity.addBetweenCondition('en', 'from', 'to'); fromEntity.addAfterLastCondition('en', 'from'); const toEntity = manager.addNamedEntity('toLocation', 'trim'); toEntity.addBetweenCondition('en', 'to', 'from'); toEntity.addAfterLastCondition('en', 'to'); const entities = await manager.findEntities( 'I want to travel from Barcelona to Madrid', 'en' ); expect(entities).toBeDefined(); expect(entities).toHaveLength(3); expect(entities[0]).toEqual({ accuracy: 1, end: 30, len: 9, entity: 'fromLocation', sourceText: 'Barcelona', start: 22, type: 'between', utteranceText: 'Barcelona', }); expect(entities[1]).toEqual({ accuracy: 1, end: 15, len: 6, entity: 'toLocation', sourceText: 'travel', start: 10, type: 'between', utteranceText: 'travel', }); expect(entities[2]).toEqual({ accuracy: 0.99, end: 40, len: 6, entity: 'toLocation', sourceText: 'Madrid', start: 35, type: 'afterLast', utteranceText: 'Madrid', }); }); test('A skip word list can be provided', async () => { const manager = new NerManager(); const fromEntity = manager.addNamedEntity('fromLocation', 'trim'); fromEntity.addBetweenCondition('en', 'from', 'to'); fromEntity.addAfterLastCondition('en', 'from'); const toEntity = manager.addNamedEntity('toLocation', 'trim'); toEntity.addBetweenCondition('en', 'to', 'from', { skip: ['travel'] }); toEntity.addAfterLastCondition('en', 'to'); const entities = await manager.findEntities( 'I want to travel from Barcelona to Madrid', 'en' ); expect(entities).toBeDefined(); expect(entities).toHaveLength(2); expect(entities[0]).toEqual({ accuracy: 1, end: 30, len: 9, entity: 'fromLocation', sourceText: 'Barcelona', start: 22, type: 'between', utteranceText: 'Barcelona', }); expect(entities[1]).toEqual({ accuracy: 0.99, end: 40, len: 6, entity: 'toLocation', sourceText: 'Madrid', start: 35, type: 'afterLast', utteranceText: 'Madrid', }); }); test('Trim entities can be splitted to fit with other entities', async () => { const manager = new NerManager(); const fromEntity = manager.addNamedEntity('fromLocation', 'trim'); fromEntity.addBetweenCondition('en', 'from', 'to'); fromEntity.addAfterLastCondition('en', 'from'); const toEntity = manager.addNamedEntity('toLocation', 'trim'); toEntity.addBetweenCondition('en', 'to', 'from', { skip: ['travel'] }); toEntity.addAfterLastCondition('en', 'to'); const entities = await manager.findEntities( 'I want to travel from Barcelona to Madrid tomorrow', 'en' ); expect(entities).toBeDefined(); expect(entities).toHaveLength(3); expect(entities[0].utteranceText).toEqual('tomorrow'); expect(entities[0].entity).toEqual('date'); expect(entities[1]).toEqual({ accuracy: 1, end: 30, len: 9, entity: 'fromLocation', sourceText: 'Barcelona', start: 22, type: 'between', utteranceText: 'Barcelona', }); expect(entities[2]).toEqual({ accuracy: 0.99, end: 40, len: 6, entity: 'toLocation', sourceText: 'Madrid', start: 35, type: 'afterLast', utteranceText: 'Madrid', }); }); }); });