node-nlp
Version:
Library for NLU (Natural Language Understanding) done in Node.js
734 lines (698 loc) • 25 kB
JavaScript
/*
* Copyright (c) AXA Shared Services Spain S.A.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
const fs = require('fs');
const Handlebars = require('handlebars');
const { Language } = require('../language');
const { NerManager } = require('../ner');
const { SentimentManager } = require('../sentiment');
const NlpUtil = require('./nlp-util');
const NlpClassifier = require('./nlp-classifier');
const NlgManager = require('../nlg/nlg-manager');
const NlpExcelReader = require('./nlp-excel-reader');
const { SlotManager } = require('../slot');
/**
* Class for the NLP Manager.
* The NLP manager is the one that is able to manage several classifiers,
* to have multilanguage, and also is the responsible of the NER (Named Entity
* Recognition).
*
* Understanding NER:
*
* You can have several entities defined, each one with multilanguage and
* several texts for each option. Example
* Entity Option English Spanish
* FOOD Burguer Burguer, Hamburguer Hamburguesa
* FOOD Salad Salad Ensalada
* FOOD Pizza Pizza Pizza
*
*/
class NlpManager {
/**
* Constructor of the class.
* @param {Object} settings Settings for the NLP Manager.
*/
constructor(settings) {
this.settings = settings || {};
this.guesser = new Language();
this.nerManager = new NerManager(this.settings.ner);
this.sentiment = new SentimentManager();
this.languages = [];
this.languageNames = {};
this.classifiers = {};
this.slotManager = new SlotManager();
this.intentDomains = {};
this.media = '';
if (this.settings.languages) {
this.addLanguage(this.settings.languages);
}
if (this.settings.fullSearchWhenGuessed === undefined) {
this.settings.fullSearchWhenGuessed = false;
}
if (this.settings.useNlg === undefined) {
this.settings.useNlg = true;
}
if (this.settings.useNeural === undefined) {
this.settings.useNeural = true;
}
if (this.settings.useLRC === undefined) {
this.settings.useLRC = true;
}
this.processTransformer =
typeof this.settings.processTransformer === 'function'
? this.settings.processTransformer
: _ => _;
this.nlgManager = new NlgManager();
}
/**
* Adds a language or several languages to the NLP Manager.
* @param {String[]} srcLocales Locales to be added.
*/
addLanguage(srcLocales) {
const locales = Array.isArray(srcLocales) ? srcLocales : [srcLocales];
locales.forEach(locale => {
const truncated = NlpUtil.getTruncatedLocale(locale);
if (!this.languages.includes(truncated)) {
this.languages.push(truncated);
}
if (!this.classifiers[truncated]) {
this.classifiers[truncated] = new NlpClassifier({
language: truncated,
classifier: this.settings.classifier,
neuralClassifier: this.settings.neuralClassifier,
useNeural: this.settings.useNeural,
useLRC: this.settings.useLRC,
});
}
});
}
/**
* Given a text, try to guess the language, over the languages used for the NLP.
* @param {String} utterance Text to be guessed.
* @returns {String} ISO2 locale of the language, or undefined if not found.
*/
guessLanguage(utterance) {
if (this.languages.length === 1) {
return this.languages[0];
}
const guess = this.guesser.guess(utterance, this.languages, 1);
return guess && guess.length > 0 ? guess[0].alpha2 : undefined;
}
/**
* Add new texts for an option of an entity for the given languages.
* @param {String} entityName Name of the entity.
* @param {String} optionName Name of the option.
* @param {String[]} languages Languages for adding the texts.
* @param {String[]} texts Texts to be added.
*/
addNamedEntityText(entityName, optionName, languages, texts) {
return this.nerManager.addNamedEntityText(
entityName,
optionName,
languages,
texts
);
}
/**
* Adds a new regex named entity
* @param {String} entityName Name of the entity.
* @param {RegEx} regex Regular expression
*/
addRegexEntity(entityName, languages, regex) {
const entity = this.nerManager.addNamedEntity(entityName, 'regex');
if (typeof regex === 'string') {
entity.addStrRegex(languages, regex);
} else {
entity.addRegex(languages, regex);
}
return entity;
}
/**
* Adds a new trim named entity.
* @param {String} entityName Name of the entity.
* @returns {Object} New Trim Named Entity instance.
*/
addTrimEntity(entityName) {
return this.nerManager.addNamedEntity(entityName, 'trim');
}
/**
* Remove texts from an option of an entity for the given languages.
* @param {String} entityName Name of the entity.
* @param {String} optionName Name of the option.
* @param {String[]} languages Languages for adding the texts.
* @param {String[]} texts Texts tobe added.
*/
removeNamedEntityText(entityName, optionName, languages, texts) {
return this.nerManager.removeNamedEntityText(
entityName,
optionName,
languages,
texts
);
}
/**
* Assign an intent to a domain.
* @param {String} intent Intent to be assigned.
* @param {String} domain Domain to include the intent.
*/
assignDomain(intent, domain) {
this.intentDomains[intent] = domain;
}
/**
* Returns the domain of a given intent.
* @param {String} intent Intent name.
* @returns {String} Domain of the intent.
*/
getIntentDomain(intent) {
return this.intentDomains[intent];
}
/**
* Get an object with the intents of each domain.
*/
getDomains() {
const keys = Object.keys(this.intentDomains);
const result = {};
for (let i = 0, l = keys.length; i < l; i += 1) {
const intent = keys[i];
const domain = this.intentDomains[intent];
if (!result[domain]) {
result[domain] = [];
}
result[domain].push(intent);
}
return result;
}
/**
* Adds a new utterance associated to an intent for the given locale.
* @param {String} srcLocale Locale of the language.
* @param {String} utterance Text of the utterance.
* @param {String} intent Intent name.
*/
addDocument(srcLocale, utterance, intent) {
let locale = NlpUtil.getTruncatedLocale(srcLocale);
if (!locale) {
locale = this.guessLanguage(utterance);
}
if (!locale) {
throw new Error('Locale must be defined');
}
const classifier = this.classifiers[locale];
if (!classifier) {
throw new Error(`Classifier not found for locale ${locale}`);
}
classifier.add(utterance, intent);
if (this.getIntentDomain(intent) === undefined) {
this.assignDomain(intent, 'default');
}
const entities = this.nerManager.getEntitiesFromUtterance(utterance);
this.slotManager.addBatch(intent, entities);
const optionalUtterance = this.nerManager.generateNamedEntityUtterance(
utterance,
locale
);
if (optionalUtterance) {
classifier.add(optionalUtterance, intent);
}
}
/**
* Removes an utterance associated to an intent for the given locale.
* @param {String} srcLocale Locale of the language.
* @param {String} utterance Text of the utterance.
* @param {String} intent Intent name.
*/
removeDocument(srcLocale, utterance, intent) {
let locale = NlpUtil.getTruncatedLocale(srcLocale);
if (!locale) {
locale = this.guessLanguage(utterance);
}
if (!locale) {
throw new Error('Locale must be defined');
}
const classifier = this.classifiers[locale];
if (!classifier) {
throw new Error(`Classifier not found for locale ${locale}`);
}
classifier.remove(utterance, intent);
}
/**
* Adds an answer for a locale and intent.
* @param {String} locale Locale of the intent.
* @param {String} intent Intent name.
* @param {String} answer Text of the answer.
* @param {String} condition Condition to be evaluated.
* @param {String} media url to be added (link to follow, ...).
*/
addAnswer(locale, intent, answer, condition, media) {
this.nlgManager.addAnswer(locale, intent, answer, condition, media);
}
/**
* Remove and answer from a locale and intent.
* @param {String} locale Locale of the intent.
* @param {String} intent Intent name.
* @param {String} answer Text of the answer.
* @param {String} condition Condition to be evaluated.
* @param {String} media url to be added (link to follow, ...).
*/
removeAnswer(locale, intent, answer, condition, media) {
this.nlgManager.removeAnswer(locale, intent, answer, condition, media);
}
/**
* Train the classifiers for the provided locales. If no locale is
* provided, then retrain all the classifiers.
* @param {String[]} locale List of locales for being retrained.
*/
async train(locale) {
let languages;
if (locale) {
languages = Array.isArray(locale) ? locale : [locale];
} else {
({ languages } = this);
}
return Promise.all(
languages.map(async language => {
const truncated = NlpUtil.getTruncatedLocale(language);
const classifier = this.classifiers[truncated];
if (classifier) {
await classifier.train();
}
})
);
}
/**
* Given an utterance and a locale, try to classify the utterance into one intent.
* @param {String} srcLocale Locale of the text. If not provided,
* the locale is guessed.
* @param {String} srcUtterance Text to be classified
*/
classify(srcLocale, srcUtterance) {
let utterance = srcUtterance;
let locale = srcLocale;
if (!utterance) {
utterance = srcLocale;
locale = this.guessLanguage(utterance);
}
const truncated = NlpUtil.getTruncatedLocale(locale);
const classifier = this.classifiers[truncated];
if (!classifier) {
return undefined;
}
return classifier.getClassifications(utterance);
}
/**
* Gets the sentiment of an utterance.
* @param {String} srcLocale Locale of the text. If not provided, is guessed.
* @param {Promise.String} srcUtterance Texto to analyze the sentiment.
*/
getSentiment(srcLocale, srcUtterance) {
let utterance = srcUtterance;
let locale = srcLocale;
if (!utterance) {
utterance = srcLocale;
locale = this.guessLanguage(utterance);
}
const truncated = NlpUtil.getTruncatedLocale(locale);
return this.sentiment.process(truncated, utterance);
}
getAnswer(locale, intent, context, media) {
const answer = this.nlgManager.findAnswer(locale, intent, context, media);
if (answer && answer.response) {
if (answer.media) {
return `${answer.response} - ${answer.media}`; // to improve
}
return answer.response;
}
return undefined;
}
/**
* Indicates if all the classifications has exactly 0.5 score.
* @param {Object[]} classifications Array of classifications.
* @returns {boolean} True if all classifications score is 0.5.
*/
isEqualClassification(classifications) {
for (let i = 0; i < classifications.length; i += 1) {
if (classifications[i].value !== 0.5) {
return false;
}
}
return true;
}
/**
* Process to extract entities from an utterance.
* @param {string} srcLocale Locale of the utterance, optional.
* @param {string} srcUtterance Text of the utterance.
* @param {string[]} whitelist Optional whitelist of entity names.
* @returns {Object[]} Array of entities.
*/
async extractEntities(srcLocale, srcUtterance, whitelist) {
let utterance = srcUtterance;
let locale = srcLocale;
if (!utterance) {
utterance = locale;
locale = this.guessLanguage(utterance);
}
if (!this.languages.includes(NlpUtil.getTruncatedLocale(locale))) {
locale = this.guessLanguage(utterance);
}
const truncated = NlpUtil.getTruncatedLocale(locale);
return this.nerManager.findEntities(utterance, truncated, whitelist);
}
describeLanguage(locale, name) {
this.languageNames[locale] = { locale, name };
}
/**
* Process an utterance for full classify and analyze. If the locale is
* not provided, then it will be guessed.
* Classify the utterance and extract entities from it, returning an
* object with all the information available.
* Also calculates the sentiment of the utterance, if possible.
* @param {String} srcLocale Language locale of the utterance.
* @param {String} srcUtterance Text of the utterance.
* @param {Promise.Object} Promise srcContext Context for finding answers.
*/
async process(srcLocale, srcUtterance, srcContext) {
let utterance = srcUtterance;
let locale = srcLocale;
let languageGuessed = false;
if (!utterance) {
utterance = locale;
locale = this.guessLanguage(utterance);
languageGuessed = true;
}
if (!this.languages.includes(NlpUtil.getTruncatedLocale(locale))) {
locale = this.guessLanguage(utterance);
languageGuessed = true;
if (!locale) {
[locale] = this.languages;
}
}
const truncated = NlpUtil.getTruncatedLocale(locale);
const result = {};
result.locale = locale;
result.localeIso2 = truncated;
result.language = (
this.languageNames[locale] ||
this.guesser.languagesAlpha2[result.localeIso2] ||
{}
).name;
result.utterance = utterance;
if (languageGuessed && this.settings.fullSearchWhenGuessed) {
let bestScore;
let bestClassification;
this.languages.forEach(language => {
const classification = this.classify(language, utterance);
if (classification && classification.length > 0) {
if (bestScore === undefined || classification[0].value > bestScore) {
bestScore = classification[0].value;
bestClassification = classification;
}
}
});
const optionalUtterance = await this.nerManager.generateEntityUtterance(
utterance,
truncated
);
this.languages.forEach(language => {
const classification = this.classify(language, optionalUtterance);
if (classification && classification.length > 0) {
if (bestScore === undefined || classification[0].value > bestScore) {
bestScore = classification[0].value;
bestClassification = classification;
}
}
});
result.classification = bestClassification;
} else {
result.classification = this.classify(truncated, utterance);
const optionalUtterance = await this.nerManager.generateEntityUtterance(
utterance,
truncated
);
if (optionalUtterance !== utterance) {
const optionalClassification = this.classify(
truncated,
optionalUtterance
);
if (
optionalClassification &&
optionalClassification.length > 0 &&
optionalClassification[0].value > result.classification[0].value
) {
result.classification = optionalClassification;
}
}
}
if (
!result.classification ||
result.classification.length === 0 ||
this.isEqualClassification(result.classification)
) {
result.intent = 'None';
result.domain = 'default';
result.score = 1;
} else {
result.intent = result.classification[0].label;
result.domain = this.getIntentDomain(result.intent);
result.score = result.classification[0].value;
}
result.entities = await this.nerManager.findEntities(
utterance,
truncated,
this.slotManager.getIntentEntityNames(result.intent)
);
const context = srcContext || {};
result.entities.forEach(entity => {
context[entity.entity] = entity.option || entity.utteranceText;
});
result.sentiment = await this.getSentiment(truncated, utterance);
const answer = this.getAnswer(truncated, result.intent, context);
if (answer) {
result.srcAnswer = answer;
result.answer = Handlebars.compile(answer)(context);
}
if (this.slotManager.process(result, context)) {
result.entities.forEach(entity => {
context[entity.entity] = entity.option || entity.utteranceText;
});
if (result.srcAnswer) {
result.answer = Handlebars.compile(result.srcAnswer)(context);
}
if (result.media) {
result.media = result.media; // to improve
}
}
context.slotFill = result.slotFill;
return this.processTransformer(result);
}
/**
* Clear the NLP Manger.
*/
clear() {
this.nerManager = new NerManager(this.settings.ner);
this.languages = [];
this.classifiers = {};
this.slotManager.clear();
this.nlgManager = new NlgManager();
}
/**
* Deflate the brain.js object
* @param {object} srcBrain Brain json object.
*/
deflate(srcBrain) {
const brain = srcBrain;
brain.layers = brain.layers[1]['0'];
const weights = [];
Object.keys(brain.layers.weights).forEach(key => {
weights.push(brain.layers.weights[key]);
});
brain.layers.weights = weights;
return brain;
}
/**
* Inflate the brain.js object
* @param {object} features Features map.
* @param {object} srcBrain Brain json object.
*/
inflate(features, srcBrain) {
const brain = srcBrain;
const weights = {};
const brainFeatures = {};
let i = 0;
Object.keys(features).forEach(key => {
weights[key] = brain.layers.weights[i];
brainFeatures[key] = {};
i += 1;
});
const layers = [];
layers.push(brainFeatures);
const data = {};
data['0'] = {};
data['0'].bias = brain.layers.bias;
data['0'].weights = weights;
layers.push(data);
brain.layers = layers;
return brain;
}
/**
* Load NLP manager information from a string.
* @param {String|Object} data JSON string or object to load NLP manager information from.
*/
import(data) {
const clone = typeof data === 'string' ? JSON.parse(data) : data;
this.settings = clone.settings;
this.languages = clone.languages;
this.nerManager.load(clone.nerManager);
this.slotManager.load(clone.slotManager);
this.intentDomains = clone.intentDomains || {};
this.nlgManager.responses = clone.responses;
for (let i = 0, l = clone.classifiers.length; i < l; i += 1) {
const classifierClone = clone.classifiers[i];
this.addLanguage(classifierClone.language);
const classifier = this.classifiers[classifierClone.language];
classifier.docs = classifierClone.docs;
classifier.features = classifierClone.features;
if (classifierClone.useLRC === undefined) {
classifier.settings.useLRC = true;
} else {
classifier.settings.useLRC = !!classifierClone.useLRC;
}
classifier.settings.useNeural = !!classifierClone.useNeural;
if (classifierClone.neuralClassifier) {
const { neuralClassifier } = classifier.settings;
neuralClassifier.settings = classifierClone.neuralClassifier.settings;
Object.keys(classifierClone.neuralClassifier.classifierMap).forEach(
label => {
neuralClassifier.addTrainer(label);
neuralClassifier.classifierMap[label].fromJSON(
classifierClone.neuralClassifier.classifierMap[label]
);
}
);
}
if (classifier.settings.useLRC) {
const lrc = classifier.settings.classifier;
const { logistic } = classifierClone;
lrc.observations = {};
Object.entries(logistic.observations).forEach(([label, matrix]) => {
lrc.observations[label] = [];
matrix.forEach((row, rowIndex) => {
// Create a array filled with as many zeros as features
const features = Array(classifier.features.length).fill(0);
// Set the features for the positions stored with 1. The others remains as zero.
row.forEach(featPosition => {
features[featPosition] = 1;
});
lrc.observations[label][rowIndex] = features;
});
});
lrc.labels = logistic.labels;
lrc.classifications = logistic.classifications;
lrc.observationCount = logistic.observationCount;
lrc.theta = logistic.theta;
}
}
}
/**
* Export NLP manager information as a string.
* @param {Boolean} minified If true, the returned JSON will have no spacing or indentation.
* @returns {String} NLP manager information as a JSON string.
*/
export(minified = false) {
const clone = {};
clone.settings = this.settings;
clone.languages = this.languages;
clone.intentDomains = this.intentDomains;
clone.nerManager = this.nerManager.save();
clone.slotManager = this.slotManager.save();
clone.classifiers = [];
clone.responses = this.nlgManager.responses;
if (this.languages && this.languages.length > 0) {
this.languages.forEach(language => {
const classifier = this.classifiers[language];
const classifierClone = {};
classifierClone.language = classifier.settings.language;
classifierClone.docs = classifier.docs;
classifierClone.features = classifier.features;
classifierClone.logistic = {};
const { logistic } = classifierClone;
const lrc = classifier.settings.classifier;
classifierClone.useLRC = classifier.settings.useLRC;
classifierClone.useNeural = classifier.settings.useNeural;
if (classifier.settings.neuralClassifier) {
const { neuralClassifier } = classifier.settings;
classifierClone.neuralClassifier = {};
classifierClone.neuralClassifier.settings = neuralClassifier.settings;
classifierClone.neuralClassifier.classifierMap = {};
Object.keys(neuralClassifier.classifierMap).forEach(key => {
classifierClone.neuralClassifier.classifierMap[
key
] = neuralClassifier.classifierMap[key].toJSON();
});
}
if (lrc) {
logistic.observations = lrc.observations;
Object.entries(lrc.observations).forEach(([label, matrix]) => {
matrix.forEach((features, row) => {
// Store array of positions for the features equals to 1.
logistic.observations[label][row] = features
.map((feat, index) => (feat === 1 ? index : undefined))
.filter(n => n);
});
});
logistic.labels = lrc.labels;
logistic.classifications = lrc.classifications;
logistic.observationCount = lrc.observationCount;
logistic.theta = lrc.theta;
}
clone.classifiers.push(classifierClone);
});
}
return minified ? JSON.stringify(clone) : JSON.stringify(clone, null, 2);
}
/**
* Save the NLP manager information into a file.
* @param {String} srcFileName Filename for saving the NLP manager.
*/
save(srcFileName) {
const fileName = srcFileName || 'model.nlp';
fs.writeFileSync(fileName, this.export(), 'utf8');
}
/**
* Load the NLP manager information from a file.
* @param {String} srcFilename Filename for loading the NLP manager.
*/
load(srcFileName) {
const fileName = srcFileName || 'model.nlp';
const data = fs.readFileSync(fileName, 'utf8');
this.import(data);
}
/**
* Load the NLP manager information from an excel file.
* @param {Sting} srcFileName File name of the excel.
*/
loadExcel(srcFileName) {
this.clear();
const fileName = srcFileName || 'model.xls';
const reader = new NlpExcelReader(this);
reader.load(fileName);
}
}
module.exports = NlpManager;