UNPKG

node-nlp

Version:

Library for NLU (Natural Language Understanding) done in Node.js

423 lines (381 loc) 14.3 kB
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"> <title>JSDoc: Source: ner/trim-named-entity.js</title> <script src="scripts/prettify/prettify.js"> </script> <script src="scripts/prettify/lang-css.js"> </script> <!--[if lt IE 9]> <script src="//html5shiv.googlecode.com/svn/trunk/html5.js"></script> <![endif]--> <link type="text/css" rel="stylesheet" href="styles/prettify-tomorrow.css"> <link type="text/css" rel="stylesheet" href="styles/jsdoc-default.css"> </head> <body> <div id="main"> <h1 class="page-title">Source: ner/trim-named-entity.js</h1> <section> <article> <pre class="prettyprint source linenums"><code>/* * Copyright (c) AXA Shared Services Spain S.A. * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ const NamedEntity = require('./named-entity'); const RegexNamedEntity = require('./regex-named-entity'); /** * Class for the Trim Named Entity. */ class TrimNamedEntity extends NamedEntity { /** * Constructor of the class. * @param {Object} settings Settings for the instance. */ constructor(settings) { super(settings); this.type = 'trim'; } addBetweenCondition(srcLanguages, srcLeftWords, srcRightWords, srcOptions) { const options = srcOptions || {}; const languages = Array.isArray(srcLanguages) ? srcLanguages : [srcLanguages]; const leftWords = Array.isArray(srcLeftWords) ? srcLeftWords : [srcLeftWords]; const rightWords = Array.isArray(srcRightWords) ? srcRightWords : [srcRightWords]; const conditions = []; for (let i = 0; i &lt; leftWords.length; i += 1) { for (let j = 0; j &lt; rightWords.length; j += 1) { const leftWord = options.noSpaces === true ? leftWords[i] : ` ${leftWords[i]} `; const rightWord = options.noSpaces === true ? rightWords[j] : ` ${rightWords[j]} `; conditions.push(`(?&lt;=${leftWord})(.*)(?=${rightWord})`); } } let regex = `/${conditions.join('|')}/g`; if (!(options.caseSensitive === true)) { regex += 'i'; } languages.forEach(language => { const locale = this.getLocale(language); if (!locale.conditions) { locale.conditions = []; } locale.conditions.push({ type: 'between', leftWords, rightWords, regex: RegexNamedEntity.str2regex(regex), options, }); }); } addPositionCondition(position, srcLanguages, srcWords, srcOptions) { const options = srcOptions || {}; const languages = Array.isArray(srcLanguages) ? srcLanguages : [srcLanguages]; const words = Array.isArray(srcWords) ? srcWords : [srcWords]; languages.forEach(language => { const locale = this.getLocale(language); if (!locale.conditions) { locale.conditions = []; } locale.conditions.push({ type: position, words, options }); }); } addAfterCondition(srcLanguages, srcWords, srcOptions) { this.addPositionCondition('after', srcLanguages, srcWords, srcOptions); } addAfterFirstCondition(srcLanguages, srcWords, srcOptions) { this.addPositionCondition('afterFirst', srcLanguages, srcWords, srcOptions); } addAfterLastCondition(srcLanguages, srcWords, srcOptions) { this.addPositionCondition('afterLast', srcLanguages, srcWords, srcOptions); } addBeforeCondition(srcLanguages, srcWords, srcOptions) { this.addPositionCondition('before', srcLanguages, srcWords, srcOptions); } addBeforeFirstCondition(srcLanguages, srcWords, srcOptions) { this.addPositionCondition( 'beforeFirst', srcLanguages, srcWords, srcOptions ); } addBeforeLastCondition(srcLanguages, srcWords, srcOptions) { this.addPositionCondition('beforeLast', srcLanguages, srcWords, srcOptions); } mustSkip(word, condition) { if ( condition.options &amp;&amp; condition.options.skip &amp;&amp; condition.options.skip.length > 0 ) { for (let i = 0; i &lt; condition.options.skip.length; i += 1) { const skipWord = condition.options.skip[i]; if (condition.options.caseSensitive) { if (skipWord === word) { return true; } } else if (skipWord.toLowerCase() === word.toLowerCase()) { return true; } } } return false; } matchBetween(utterance, condition) { const result = []; let matchFound; do { const match = condition.regex.exec(` ${utterance} `); if (match) { result.push({ type: 'between', start: match.index - 1, end: condition.regex.lastIndex - 2, len: match[0].length, accuracy: 1, sourceText: match[0], utteranceText: match[0], entity: this.name, }); matchFound = true; } else { matchFound = false; } } while (matchFound); const filteredResult = []; for (let i = 0; i &lt; result.length; i += 1) { if (!this.mustSkip(result[i].utteranceText, condition)) { filteredResult.push(result[i]); } } return filteredResult; } findWord(utterance, word, caseSensitive = false, noSpaces = false) { const result = []; let matchFound; const regex = new RegExp( noSpaces ? word : ` ${word} | ${word}|${word} `, caseSensitive ? 'g' : 'ig' ); do { const match = regex.exec(utterance); if (match) { result.push({ start: match.index, end: regex.lastIndex, }); matchFound = true; } else { matchFound = false; } } while (matchFound); return result; } getBeforeResults(utterance, wordPositions) { const result = []; let startPos = 0; let endPos = 0; for (let i = 0; i &lt; wordPositions.length; i += 1) { endPos = wordPositions[i].start; const text = utterance.substring(startPos, endPos); result.push({ type: 'before', start: startPos, end: endPos - 1, len: text.length, accuracy: 0.99, sourceText: text, utteranceText: text, entity: this.name, }); startPos = wordPositions[i].end; } return result; } getBeforeFirstResults(utterance, wordPositions) { const result = []; const startPos = 0; const endPos = wordPositions[0].start; const text = utterance.substring(startPos, endPos); result.push({ type: 'beforeFirst', start: startPos, end: endPos - 1, len: text.length, accuracy: 0.99, sourceText: text, utteranceText: text, entity: this.name, }); return result; } getBeforeLastResults(utterance, wordPositions) { const result = []; const startPos = 0; const endPos = wordPositions[wordPositions.length - 1].start; const text = utterance.substring(startPos, endPos); result.push({ type: 'beforeLast', start: startPos, end: endPos - 1, len: text.length, accuracy: 0.99, sourceText: text, utteranceText: text, entity: this.name, }); return result; } getAfterResults(utterance, wordPositions) { const result = []; let startPos = 0; let endPos = utterance.length; for (let i = wordPositions.length - 1; i >= 0; i -= 1) { startPos = wordPositions[i].end; const text = utterance.substring(startPos, endPos); result.unshift({ type: 'after', start: startPos, end: endPos - 1, len: text.length, accuracy: 0.99, sourceText: text, utteranceText: text, entity: this.name, }); endPos = wordPositions[i].start; } return result; } getAfterFirstResults(utterance, wordPositions) { const result = []; const startPos = wordPositions[0].end; const endPos = utterance.length; const text = utterance.substring(startPos, endPos); result.push({ type: 'afterFirst', start: startPos, end: endPos - 1, len: text.length, accuracy: 0.99, sourceText: text, utteranceText: text, entity: this.name, }); return result; } getAfterLastResults(utterance, wordPositions) { const result = []; const startPos = wordPositions[wordPositions.length - 1].end; const endPos = utterance.length; const text = utterance.substring(startPos, endPos); result.push({ type: 'afterLast', start: startPos, end: endPos - 1, len: text.length, accuracy: 0.99, sourceText: text, utteranceText: text, entity: this.name, }); return result; } getResults(utterance, wordPositions, type) { switch (type) { case 'before': return this.getBeforeResults(utterance, wordPositions, type); case 'beforeFirst': return this.getBeforeFirstResults(utterance, wordPositions, type); case 'beforeLast': return this.getBeforeLastResults(utterance, wordPositions, type); case 'after': return this.getAfterResults(utterance, wordPositions, type); case 'afterFirst': return this.getAfterFirstResults(utterance, wordPositions, type); case 'afterLast': return this.getAfterLastResults(utterance, wordPositions, type); default: return []; } } match(utterance, condition) { const result = []; for (let i = 0; i &lt; condition.words.length; i += 1) { const word = condition.options.noSpaces ? condition.words[i] : ` ${condition.words[i]}`; const wordPositions = this.findWord(utterance, word); if (wordPositions.length > 0) { result.push( ...this.getResults(utterance, wordPositions, condition.type) ); } } const filteredResult = []; for (let i = 0; i &lt; result.length; i += 1) { if (!this.mustSkip(result[i].utteranceText, condition)) { filteredResult.push(result[i]); } } return filteredResult; } extract(utterance, language) { const result = []; const locale = this.getLocaleRules(language); if (!locale || !locale.conditions) { return result; } for (let i = 0; i &lt; locale.conditions.length; i += 1) { const condition = locale.conditions[i]; if (condition.type === 'between') { result.push(...this.matchBetween(utterance, condition)); } else { result.push(...this.match(utterance, condition)); } } return result; } } module.exports = TrimNamedEntity; </code></pre> </article> </section> </div> <nav> <h2><a href="index.html">Home</a></h2><h3>Classes</h3><ul><li><a href="BinaryNeuralNetworkClassifier.html">BinaryNeuralNetworkClassifier</a></li><li><a href="Classifier.html">Classifier</a></li><li><a href="ConversationContext.html">ConversationContext</a></li><li><a href="DutchStemmer.html">DutchStemmer</a></li><li><a href="EnglishStemmer.html">EnglishStemmer</a></li><li><a href="EnumNamedEntity.html">EnumNamedEntity</a></li><li><a href="Evaluator.html">Evaluator</a></li><li><a href="HungarianStemmer.html">HungarianStemmer</a></li><li><a href="ItalianStemmer.html">ItalianStemmer</a></li><li><a href="Language.html">Language</a></li><li><a href="LogisticRegressionClassifier.html">LogisticRegressionClassifier</a></li><li><a href="Matrix.html">Matrix</a></li><li><a href="MemoryConversationContext.html">MemoryConversationContext</a></li><li><a href="NamedEntity.html">NamedEntity</a></li><li><a href="NerManager.html">NerManager</a></li><li><a href="NlgManager.html">NlgManager</a></li><li><a href="NlpClassifier.html">NlpClassifier</a></li><li><a href="NlpManager.html">NlpManager</a></li><li><a href="NorwegianStemmer.html">NorwegianStemmer</a></li><li><a href="PortugueseStemmer.html">PortugueseStemmer</a></li><li><a href="Recognizer.html">Recognizer</a></li><li><a href="RegexNamedEntity.html">RegexNamedEntity</a></li><li><a href="RomanianStemmer.html">RomanianStemmer</a></li><li><a href="RussianStemmer.html">RussianStemmer</a></li><li><a href="SentimentAnalyzer.html">SentimentAnalyzer</a></li><li><a href="SentimentManager.html">SentimentManager</a></li><li><a href="SimilarSearch.html">SimilarSearch</a></li><li><a href="SlotManager.html">SlotManager</a></li><li><a href="StemmerJa.html">StemmerJa</a></li><li><a href="SwedishStemmer.html">SwedishStemmer</a></li><li><a href="Tokenizer.html">Tokenizer</a></li><li><a href="TrimNamedEntity.html">TrimNamedEntity</a></li><li><a href="TurkishStemmer.html">TurkishStemmer</a></li><li><a href="Vector.html">Vector</a></li><li><a href="XTable.html">XTable</a></li></ul><h3>Global</h3><ul><li><a href="global.html#endsinArr">endsinArr</a></li><li><a href="global.html#prelude">prelude</a></li><li><a href="global.html#regions">regions</a></li><li><a href="global.html#stem">stem</a></li><li><a href="global.html#stopwords">stopwords</a></li></ul> </nav> <br class="clear"> <footer> Documentation generated by <a href="https://github.com/jsdoc3/jsdoc">JSDoc 3.5.5</a> on Sat Oct 13 2018 19:14:51 GMT+0200 (CEST) </footer> <script> prettyPrint(); </script> <script src="scripts/linenumber.js"> </script> </body> </html>