UNPKG

@nlpjs/nlu

Version:

Natural Language Understanding

438 lines (411 loc) 12.4 kB
/* * Copyright (c) AXA Group Operations Spain S.A. * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ const { Clonable, compareWildcars } = require('@nlpjs/core'); const defaultDomainName = 'master_domain'; class DomainManager extends Clonable { constructor(settings = {}, container) { super( { settings: {}, container: settings.container || container, }, container ); this.applySettings(this.settings, settings); this.applySettings(this.settings, { locale: 'en' }); if (!this.settings.tag) { this.settings.tag = `domain-manager-${this.settings.locale}`; } this.registerDefault(); this.applySettings( this.settings, this.container.getConfiguration(this.settings.tag) ); this.domains = {}; this.addDomain(defaultDomainName); this.stemDict = {}; this.intentDict = {}; this.sentences = []; this.applySettings(this, { pipelineTrain: this.getPipeline(`${this.settings.tag}-train`), pipelineProcess: this.getPipeline(`${this.settings.tag}-process`), }); } registerDefault() { this.container.registerConfiguration( 'domain-manager-??', { nluByDomain: { default: { className: 'NeuralNlu', settings: {}, }, }, trainByDomain: false, useStemDict: true, }, false ); this.container.registerPipeline( 'domain-manager-??-train', [ '.trainStemmer', '.generateCorpus', '.fillStemDict', '.innerTrain', 'output.status', ], false ); } getDomainInstance(domainName) { if (!this.settings.nluByDomain) { this.settings.nluByDomain = {}; } const domainSettings = this.settings.nluByDomain[domainName] || this.settings.nluByDomain.default || { className: 'NeuralNlu', settings: {}, }; return this.container.get( domainSettings.className || 'NeuralNlu', this.applySettings( { locale: this.settings.locale }, domainSettings.settings || {} ) ); } addDomain(name) { if (!this.domains[name]) { this.domains[name] = this.getDomainInstance(name); } return this.domains[name]; } removeDomain(name) { delete this.domains[name]; } async generateStemKey(srcTokens) { let tokens; if (typeof srcTokens !== 'string') { tokens = srcTokens; } else { const input = await this.prepare({ utterance: srcTokens }); tokens = await input.stems; } if (!Array.isArray(tokens)) { tokens = Object.keys(tokens); } return tokens.slice().sort().join(); } add(domain, utterance, intent) { if (!intent) { this.sentences.push({ domain: defaultDomainName, utterance: domain, intent: utterance, }); } else { this.sentences.push({ domain, utterance, intent }); } } getSentences() { return this.sentences; } remove(srcDomain, srcUtterance, srcIntent) { const domain = srcIntent ? srcDomain : defaultDomainName; const utterance = srcIntent ? srcUtterance : srcDomain; const intent = srcIntent || srcUtterance; for (let i = 0; i < this.sentences.length; i += 1) { const sentence = this.sentences[i]; if ( sentence.domain === domain && sentence.utterance === utterance && sentence.intent === intent ) { this.sentences.splice(i, 1); return true; } } return false; } async trainStemmer(srcInput) { const input = srcInput; if (!this.cache) { this.cache = { stem: this.container.get('stem'), }; } for (let i = 0; i < this.sentences.length; i += 1) { const current = this.sentences[i]; const subInput = { ...current, ...input }; await this.cache.stem.addForTraining(subInput); } await this.cache.stem.train(input); return input; } innerGenerateCorpus(domainName) { this.intentDict = {}; const result = {}; result[defaultDomainName] = []; for (let i = 0; i < this.sentences.length; i += 1) { const sentence = this.sentences[i]; this.intentDict[sentence.intent] = sentence.domain; const domain = domainName || sentence.domain; if (!result[domain]) { result[domain] = []; } const domainObj = result[domain]; domainObj.push({ utterance: sentence.utterance, intent: sentence.intent, }); if (!domainName) { result[defaultDomainName].push({ utterance: sentence.utterance, intent: sentence.domain, }); } } return result; } async generateCorpus(srcInput) { const input = srcInput; input.corpus = this.innerGenerateCorpus( this.settings.trainByDomain ? undefined : defaultDomainName ); return input; } async prepare(srcInput) { const input = srcInput; const isString = typeof input === 'string'; const utterance = isString ? input : input.utterance; const nlu = this.addDomain(defaultDomainName); const tokens = nlu.prepare(utterance); if (isString) { return tokens; } input.stems = tokens; return input; } async fillStemDict(srcInput) { this.stemDict = {}; for (let i = 0; i < this.sentences.length; i += 1) { const { utterance, intent, domain } = this.sentences[i]; const key = await this.generateStemKey(utterance); if (!key || key === '') { this.container .get('logger') .warn(`This utterance: "${utterance}" contains only stop words`); } this.stemDict[key] = { intent, domain, }; } return srcInput; } async innerTrain(srcInput) { const input = srcInput; const { corpus } = input; const keys = Object.keys(corpus); const status = {}; for (let i = 0; i < keys.length; i += 1) { const nlu = this.addDomain(keys[i]); const options = { useNoneFeature: this.settings.useNoneFeature }; if (srcInput.settings && srcInput.settings.log !== undefined) { options.log = srcInput.settings.log; } const result = await nlu.train(corpus[keys[i]], options); status[keys[i]] = result.status; } input.status = status; return input; } async train(settings) { const input = { domainManager: this, settings: settings || this.settings, }; return this.runPipeline(input, this.pipelineTrain); } matchAllowList(intent, allowList) { for (let i = 0; i < allowList.length; i += 1) { if (compareWildcars(intent, allowList[i])) { return true; } } return false; } async classifyByStemDict(utterance, domainName, allowList) { const key = await this.generateStemKey(utterance); const resolved = this.stemDict[key]; if (resolved && (!domainName || resolved.domain === domainName)) { if (allowList && !this.matchAllowList(resolved.intent, allowList)) { return undefined; } const classifications = []; classifications.push({ intent: resolved.intent, score: 1, }); const intents = Object.keys(this.intentDict); for (let i = 0; i < intents.length; i += 1) { if (intents[i] !== resolved.intent) { classifications.push({ intent: intents[i], score: 0 }); } } return { domain: resolved.domain, classifications }; } return undefined; } async innerClassify(srcInput, domainName) { const input = srcInput; const settings = this.applySettings({ ...input.settings }, this.settings); if (settings.useStemDict) { const result = await this.classifyByStemDict( input.utterance, domainName, srcInput.settings ? srcInput.settings.allowList : undefined ); if (result) { input.classification = result; input.explanation = [ { token: '', stem: '##exact', weight: 1, }, ]; return input; } } if (domainName) { const nlu = this.domains[domainName]; if (!nlu) { input.classification = { domain: 'default', classifications: [{ intent: 'None', score: 1 }], }; return input; } const nluAnswer = await nlu.process( input.utterance, input.settings || this.settings ); let classifications; if (Array.isArray(nluAnswer)) { classifications = nluAnswer; } else { classifications = nluAnswer.classifications; input.nluAnswer = nluAnswer; } let finalDomain; if (domainName === defaultDomainName) { if (classifications && classifications.length) { finalDomain = this.intentDict[classifications[0].intent]; } else { finalDomain = defaultDomainName; } } else { finalDomain = domainName; } input.classification = { domain: finalDomain, classifications, }; return input; } let domain = defaultDomainName; if ( (input.settings.trainByDomain === undefined && this.settings.trainByDomain) || input.settings.trainByDomain ) { const nlu = this.domains[defaultDomainName]; let classifications = await nlu.process(input.utterance); if (classifications.classifications) { classifications = classifications.classifications; } if (Object.keys(this.domains).length === 1) { input.classification = { domain: 'default', classifications, }; return input; } domain = classifications[0].intent; if (domain === 'None') { input.classification = { domain: 'default', classifications: [{ intent: 'None', score: 1 }], }; return input; } } return this.innerClassify(input, domain); } async defaultPipelineProcess(input) { const output = await this.innerClassify(input); return output.classification; } async process(utterance, settings) { const input = typeof utterance === 'string' ? { utterance, settings: settings || this.settings, } : utterance; if (this.pipelineProcess) { return this.runPipeline(input, this.pipelineProcess); } return this.defaultPipelineProcess(input); } toJSON() { const result = { settings: this.settings, stemDict: this.stemDict, intentDict: this.intentDict, sentences: this.sentences, domains: {}, }; delete result.settings.container; const keys = Object.keys(this.domains); for (let i = 0; i < keys.length; i += 1) { result.domains[keys[i]] = this.domains[keys[i]].toJSON(); } return result; } fromJSON(json) { this.applySettings(this.settings, json.settings); this.stemDict = json.stemDict; this.intentDict = json.intentDict; this.sentences = json.sentences; const keys = Object.keys(json.domains); for (let i = 0; i < keys.length; i += 1) { const domain = this.addDomain(keys[i]); domain.fromJSON(json.domains[keys[i]]); } } } module.exports = DomainManager;