UNPKG

@nlpjs/nlu

Version:

Natural Language Understanding

493 lines (469 loc) 15 kB
/* * Copyright (c) AXA Group Operations Spain S.A. * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ const { Clonable, compareWildcars } = require('@nlpjs/core'); const { SpellCheck } = require('@nlpjs/similarity'); class Nlu extends Clonable { constructor(settings = {}, container) { super( { settings: {}, container: settings.container || container, }, container ); this.applySettings(this.settings, settings); this.applySettings(this.settings, { locale: 'en' }); if (!this.settings.tag) { this.settings.tag = `nlu-${this.settings.locale}`; } this.registerDefault(); this.applySettings( this.settings, this.container.getConfiguration(this.settings.tag) ); this.applySettings(this, { pipelinePrepare: this.getPipeline(`${this.settings.tag}-prepare`), pipelineTrain: this.getPipeline(`${this.settings.tag}-train`), pipelineProcess: this.getPipeline(`${this.settings.tag}-process`), }); this.spellCheck = new SpellCheck(this.settings); } registerDefault() { this.container.registerConfiguration( 'nlu-??', { keepStopwords: true, nonefeatureValue: 1, nonedeltaMultiplier: 1.2, spellCheck: false, spellCheckDistance: 1, filterZeros: true, log: true, }, false ); this.container.registerPipeline( 'nlu-??-train', ['.prepareCorpus', '.addNoneFeature', '.innerTrain'], false ); } async defaultPipelinePrepare(input) { let result; if (this.cache) { const now = new Date(); const diff = Math.abs(now.getTime() - this.cache.created) / 3600000; if (diff > 1) { this.cache.results = {}; this.cache.created = new Date().getTime(); } } if (!this.cache) { this.cache = { created: new Date().getTime(), results: {}, normalize: this.container.get('normalize'), tokenize: this.container.get('tokenize'), removeStopwords: this.container.get('removeStopwords'), stem: this.container.get('stem'), arrToObj: this.container.get('arrToObj'), }; } else if (this.cache.results[input.settings.locale]) { result = this.cache.results[input.settings.locale][ input.text || input.utterance ]; if (result) { return result; } } let output = input; output = this.cache.normalize.run(output); output = await this.cache.tokenize.run(output); output = this.cache.removeStopwords.run(output); output = await this.cache.stem.run(output); output = this.cache.arrToObj.run(output); result = output.tokens; if (!this.cache.results[input.settings.locale]) { this.cache.results[input.settings.locale] = {}; } this.cache.results[input.settings.locale][input.text || input.utterance] = result; return result; } async defaultPipelineProcess(input) { let output = await this.prepare(input); output = await this.doSpellCheck(output); output = await this.textToFeatures(output); output = await this.innerProcess(output); output = await this.filterNonActivated(output); output = await this.normalizeClassifications(output); return output; } async prepare(text, srcSettings) { const settings = srcSettings || this.settings; if (typeof text === 'string') { const input = { locale: this.settings.locale, text, settings, }; if (this.pipelinePrepare) { return this.runPipeline(input, this.pipelinePrepare); } return this.defaultPipelinePrepare(input); } if (typeof text === 'object') { if (Array.isArray(text)) { const result = []; for (let i = 0; i < text.length; i += 1) { result.push(await this.prepare(text[i], settings)); } return result; } let item = settings.fieldNameSrc ? text[settings.fieldNameSrc] : text.texts || text.utterances; if (!item && typeof item !== 'string') { if (typeof text.text === 'string') { item = text.text; } else if (typeof text.utterance === 'string') { item = text.utterance; } } if (item || typeof item === 'string') { const result = await this.prepare(item, settings); const targetField = settings.fieldNameTgt || 'tokens'; return { [targetField]: result, ...text }; } } throw new Error( `Error at nlu.prepare: expected a text but received ${text}` ); } async doSpellCheck(input, srcSettings) { const settings = this.applySettings(srcSettings || {}, this.settings); let shouldSpellCheck = input.settings.spellCheck === undefined ? undefined : input.settings.spellCheck; let spellCheckDistance = input.settings.spellCheckDistance === undefined ? undefined : input.settings.spellCheckDistance; if (shouldSpellCheck === undefined) { shouldSpellCheck = settings.spellCheck === undefined ? undefined : settings.spellCheck; } if (spellCheckDistance === undefined) { spellCheckDistance = settings.spellCheckDistance === undefined ? 1 : settings.spellCheckDistance; } if (shouldSpellCheck) { const tokens = this.spellCheck.check(input.tokens, spellCheckDistance); input.tokens = tokens; } return input; } async prepareCorpus(srcInput) { this.features = {}; this.intents = {}; this.intentFeatures = {}; const input = srcInput; const { corpus } = input; const result = []; for (let i = 0; i < corpus.length; i += 1) { const { intent } = corpus[i]; const item = { input: await this.prepare(corpus[i].utterance, input.settings), output: { [intent]: 1 }, }; const keys = Object.keys(item.input); if (!Object.prototype.hasOwnProperty.call(this.intentFeatures, intent)) { this.intentFeatures[intent] = {}; } for (let j = 0; j < keys.length; j += 1) { this.features[keys[j]] = 1; this.intentFeatures[intent][keys[j]] = 1; } this.intents[intent] = 1; result.push(item); } const keys = Object.keys(this.intentFeatures); this.featuresToIntent = {}; for (let i = 0; i < keys.length; i += 1) { const intent = keys[i]; const features = Object.keys(this.intentFeatures[intent]); for (let j = 0; j < features.length; j += 1) { const feature = features[j]; if ( !Object.prototype.hasOwnProperty.call(this.featuresToIntent, feature) ) { this.featuresToIntent[feature] = []; } this.featuresToIntent[feature].push(intent); } } this.spellCheck.setFeatures(this.features); this.numFeatures = Object.keys(this.features).length; this.numIntents = Object.keys(this.intents).length; input.corpus = result; return input; } addNoneFeature(input) { const { corpus } = input; if (input.settings && input.settings.useNoneFeature) { corpus.push({ input: { nonefeature: 1 }, output: { None: 1 } }); } return input; } convertToArray(srcInput) { const input = srcInput; const { classifications } = input; if (classifications) { if (!this.intentsArr) { if (this.intents) { this.intentsArr = Object.keys(this.intents); if (!this.intents.None) { this.intentsArr.push('None'); } } else { this.intentsArr = Object.keys(classifications); } } const keys = this.intentsArr; const result = []; for (let i = 0; i < keys.length; i += 1) { const intent = keys[i]; const score = classifications[intent]; if (score !== undefined && (score > 0 || !input.settings.filterZeros)) { result.push({ intent, score }); } } if (!result.length) { result.push({ intent: 'None', score: 1 }); } input.classifications = result.sort((a, b) => b.score - a.score); } return input; } someSimilar(tokensA, tokensB) { for (let i = 0; i < tokensB.length; i += 1) { if (tokensA[tokensB[i]]) { return true; } } return false; } matchAllowList(intent, allowList) { for (let i = 0; i < allowList.length; i += 1) { if (compareWildcars(intent, allowList[i])) { return true; } } return false; } intentIsActivated(intent, tokens, allowList) { if (allowList) { if (Array.isArray(allowList)) { return this.matchAllowList(intent, allowList); } if (!allowList[intent]) { return false; } } const features = this.intentFeatures[intent]; if (!features) { return false; } const keys = Object.keys(tokens); for (let i = 0; i < keys.length; i += 1) { if (features[keys[i]]) { return true; } } return false; } filterNonActivated(srcInput) { if (this.intentFeatures && srcInput.classifications) { const intents = srcInput.classifications.map((x) => x.intent); let someModified = false; for (let i = 0; i < intents.length; i += 1) { const intent = intents[i]; if (intent !== 'None') { if ( !this.intentIsActivated( intent, srcInput.tokens, srcInput.settings.allowList ) ) { srcInput.classifications[i].score = 0; someModified = true; } } } if (someModified) { srcInput.classifications.sort((a, b) => b.score - a.score); } } return srcInput; } normalizeClassifications(srcInput) { const input = srcInput; const { classifications } = input; if (classifications) { let total = 0; for (let i = 0; i < classifications.length; i += 1) { classifications[i].score **= 2; total += classifications[i].score; } if (total > 0) { for (let i = 0; i < classifications.length; i += 1) { classifications[i].score /= total; } } } else { input.classifications = input.nluAnswer; } return input; } textToFeatures(srcInput) { const input = srcInput; const { tokens } = input; const keys = Object.keys(tokens); let unknownTokens = 0; const features = {}; for (let i = 0; i < keys.length; i += 1) { const token = keys[i]; if (token === 'nonefeature') { tokens[token] = this.nonefeatureValue; } else if (!this.features || !this.features[token]) { unknownTokens += 1; } else { features[token] = tokens[token]; } } let nonedelta = input.settings.nonedeltaValue === undefined ? this.numIntents / this.numFeatures : this.settings.nonedeltaValue; let nonevalue = 0; for (let i = 0; i < unknownTokens; i += 1) { nonevalue += nonedelta; nonedelta *= this.settings.nonedeltaMultiplier; } if (input.settings && input.settings.useNoneFeature && nonevalue) { features.nonefeature = nonevalue; } input.tokens = features; return input; } async innerTrain() { throw new Error('This method should be implemented by child classes'); } async train(corpus, settings) { const input = { corpus, settings: this.applySettings(settings, this.settings), }; return this.runPipeline(input, this.pipelineTrain); } async getExplanation(input, explanation) { if (!explanation) { return undefined; } const normalized = await this.container.get('normalize').run(input); const tokenized = await this.container.get('tokenize').run(normalized); const { tokens } = tokenized; const stemmed = await this.container.get('stem').run(tokenized); const stems = stemmed.tokens; const result = []; result.push({ token: '', stem: '##bias', weight: explanation.bias, }); for (let i = 0; i < tokens.length; i += 1) { const stem = stems[i]; result.push({ token: tokens[i], stem, weight: explanation.weights[stem], }); } return result; } async process(utterance, settings) { const input = { text: utterance, settings: this.applySettings(settings || {}, this.settings), }; let output; if (this.pipelineProcess) { output = await this.runPipeline(input, this.pipelineProcess); } else { output = await this.defaultPipelineProcess(input); } if (Array.isArray(output.classifications)) { const explanation = input.settings.returnExplanation ? await this.getExplanation(input, output.explanation) : undefined; return { classifications: output.classifications, entities: undefined, explanation, }; } if (output.intents) { output.classifications = output.intents; delete output.intents; } return output; } toJSON() { const result = { settings: { ...this.settings }, features: this.features, intents: this.intents, intentFeatures: this.intentFeatures, featuresToIntent: this.featuresToIntent, }; delete result.settings.container; return result; } fromJSON(json) { this.applySettings(this.settings, json.settings); this.features = json.features || {}; this.intents = json.intents || {}; this.intentsArr = Object.keys(json.intents); this.featuresToIntent = json.featuresToIntent || {}; this.intentFeatures = json.intentFeatures || {}; this.spellCheck.setFeatures(this.features); this.numFeatures = Object.keys(this.features).length; this.numIntents = Object.keys(this.intents).length; } } module.exports = Nlu;