UNPKG

node-nlp

Version:

Library for NLU (Natural Language Understanding) done in Node.js

383 lines (352 loc) 14.8 kB
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"> <title>JSDoc: Source: nlp/nlp-util.js</title> <script src="scripts/prettify/prettify.js"> </script> <script src="scripts/prettify/lang-css.js"> </script> <!--[if lt IE 9]> <script src="//html5shiv.googlecode.com/svn/trunk/html5.js"></script> <![endif]--> <link type="text/css" rel="stylesheet" href="styles/prettify-tomorrow.css"> <link type="text/css" rel="stylesheet" href="styles/jsdoc-default.css"> </head> <body> <div id="main"> <h1 class="page-title">Source: nlp/nlp-util.js</h1> <section> <article> <pre class="prettyprint source linenums"><code>/* * Copyright (c) AXA Shared Services Spain S.A. * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ const ArabicStemmer = require('./stemmers/arabic-stemmer'); const ArmenianStemmer = require('./stemmers/armenian-stemmer'); const BasqueStemmer = require('./stemmers/basque-stemmer'); const CatalanStemmer = require('./stemmers/catalan-stemmer'); const CzechStemmer = require('./stemmers/czech-stemmer'); const ChineseStemmer = require('./stemmers/chinese-stemmer'); const ChineseTokenizer = require('./tokenizers/chinese-tokenizer'); const DanishStemmer = require('./stemmers/danish-stemmer'); const DutchStemmer = require('./stemmers/dutch-stemmer'); const EnglishStemmer = require('./stemmers/english-stemmer'); const FinnishStemmer = require('./stemmers/finnish-stemmer'); const FrenchStemmer = require('./stemmers/french-stemmer'); const GermanStemmer = require('./stemmers/german-stemmer'); const HungarianStemmer = require('./stemmers/hungarian-stemmer'); const IrishStemmer = require('./stemmers/irish-stemmer'); const ItalianStemmer = require('./stemmers/italian-stemmer'); const NorwegianStemmer = require('./stemmers/norwegian-stemmer'); const PortugueseStemmer = require('./stemmers/portuguese-stemmer'); const PunctTokenizer = require('./tokenizers/punct-tokenizer'); const RomanianStemmer = require('./stemmers/romanian-stemmer'); const RussianStemmer = require('./stemmers/russian-stemmer'); const SloveneStemmer = require('./stemmers/slovene-stemmer'); const SpanishStemmer = require('./stemmers/spanish-stemmer'); const SwedishStemmer = require('./stemmers/swedish-stemmer'); const TamilStemmer = require('./stemmers/tamil-stemmer'); const TurkishStemmer = require('./stemmers/turkish-stemmer'); const PorterStemmer = require('./stemmers/natural/porter-stemmer'); const PorterStemmerEs = require('./stemmers/natural/porter-stemmer-es'); const PorterStemmerFa = require('./stemmers/natural/porter-stemmer-fa'); const PorterStemmerFr = require('./stemmers/natural/porter-stemmer-fr'); const PorterStemmerRu = require('./stemmers/natural/porter-stemmer-ru'); const PorterStemmerIt = require('./stemmers/natural/porter-stemmer-it'); const PorterStemmerNo = require('./stemmers/natural/porter-stemmer-no'); const PorterStemmerPt = require('./stemmers/natural/porter-stemmer-pt'); const PorterStemmerSv = require('./stemmers/natural/porter-stemmer-sv'); const PorterStemmerNl = require('./stemmers/natural/porter-stemmer-nl'); const StemmerJa = require('./stemmers/natural/stemmer-ja'); const StemmerId = require('./stemmers/natural/indonesian/stemmer_id'); const { AggressiveTokenizer, AggressiveTokenizerFa, AggressiveTokenizerFr, AggressiveTokenizerRu, AggressiveTokenizerEs, AggressiveTokenizerId, AggressiveTokenizerIt, AggressiveTokenizerNl, AggressiveTokenizerNo, AggressiveTokenizerPt, AggressiveTokenizerPl, AggressiveTokenizerSv, TokenizerJa, } = require('./tokenizers'); class NlpUtil { /** * Given a locale, get the 2 character one. * @param {String} locale Locale of the language. * @returns {String} Locale in 2 character length. */ static getTruncatedLocale(locale) { return locale ? locale.substr(0, 2).toLowerCase() : undefined; } static getStemmer(locale) { switch (locale) { case 'en': // English if (NlpUtil.useAlternative[locale]) { return new EnglishStemmer(NlpUtil.getTokenizer(locale)); } return PorterStemmer; case 'fa': // Farsi return PorterStemmerFa; case 'fr': // French if (NlpUtil.useAlternative[locale]) { return new FrenchStemmer(NlpUtil.getTokenizer(locale)); } return PorterStemmerFr; // French case 'ru': // Russian if (NlpUtil.useAlternative[locale]) { return new RussianStemmer(NlpUtil.getTokenizer(locale)); } return PorterStemmerRu; case 'es': // Spanish if (NlpUtil.useAlternative[locale]) { return new SpanishStemmer(NlpUtil.getTokenizer(locale)); } return PorterStemmerEs; case 'it': // Italian if (NlpUtil.useAlternative[locale]) { return new ItalianStemmer(NlpUtil.getTokenizer(locale)); } return PorterStemmerIt; case 'no': // Norwegian if (NlpUtil.useAlternative[locale]) { return new NorwegianStemmer(NlpUtil.getTokenizer(locale)); } return PorterStemmerNo; case 'pt': // Portuguese if (NlpUtil.useAlternative[locale]) { return new PortugueseStemmer(NlpUtil.getTokenizer(locale)); } return PorterStemmerPt; case 'sv': // Swedish if (NlpUtil.useAlternative[locale]) { return new SwedishStemmer(NlpUtil.getTokenizer(locale)); } return PorterStemmerSv; case 'nl': // Dutch if (NlpUtil.useAlternative[locale]) { return new DutchStemmer(NlpUtil.getTokenizer(locale)); } return PorterStemmerNl; case 'id': return StemmerId; // Indonesian case 'ja': return new StemmerJa(); // Japanese case 'ar': return new ArabicStemmer(NlpUtil.getTokenizer(locale)); // Arabic case 'hy': return new ArmenianStemmer(NlpUtil.getTokenizer(locale)); // Armenian case 'eu': return new BasqueStemmer(NlpUtil.getTokenizer(locale)); // Basque case 'ca': return new CatalanStemmer(NlpUtil.getTokenizer(locale)); // Catalan case 'cs': return new CzechStemmer(NlpUtil.getTokenizer(locale)); // Czech case 'da': return new DanishStemmer(NlpUtil.getTokenizer(locale)); // Danish case 'fi': return new FinnishStemmer(NlpUtil.getTokenizer(locale)); // Finnish case 'de': return new GermanStemmer(NlpUtil.getTokenizer(locale)); // German case 'hu': return new HungarianStemmer(NlpUtil.getTokenizer(locale)); // Hungarian case 'ga': return new IrishStemmer(NlpUtil.getTokenizer(locale)); // Irish case 'ro': return new RomanianStemmer(NlpUtil.getTokenizer(locale)); // Romanian case 'sl': return new SloveneStemmer(NlpUtil.getTokenizer(locale)); // Slovene case 'ta': return new TamilStemmer(NlpUtil.getTokenizer(locale)); // Tamil case 'tr': return new TurkishStemmer(NlpUtil.getTokenizer(locale)); // Turkish case 'zh': return new ChineseStemmer(); // Chinese default: return PorterStemmer; } } static getTokenizer(locale) { switch (locale) { case 'en': return new AggressiveTokenizer(); // English case 'fa': return new AggressiveTokenizerFa(); // Farsi case 'fr': return new AggressiveTokenizerFr(); // French case 'ru': return new AggressiveTokenizerRu(); // Russian case 'es': return new AggressiveTokenizerEs(); // Spanish case 'it': return new AggressiveTokenizerIt(); // Italian case 'nl': return new AggressiveTokenizerNl(); // Dutch case 'no': return new AggressiveTokenizerNo(); // Norwegian case 'pt': return new AggressiveTokenizerPt(); // Portuguese case 'pl': return new AggressiveTokenizerPl(); // Polish case 'sv': return new AggressiveTokenizerSv(); // Swedish case 'id': return new AggressiveTokenizerId(); // Indonesian case 'ja': return new TokenizerJa(); // Japanese case 'ar': return new PunctTokenizer(); // Arabic case 'hy': return new PunctTokenizer(); // Armenian case 'eu': return new PunctTokenizer(); // Basque case 'ca': return new PunctTokenizer(); // Catalan case 'cs': return new PunctTokenizer(); // Czech case 'da': return new PunctTokenizer(); // Danish case 'fi': return new PunctTokenizer(); // Finnish case 'de': return new PunctTokenizer(); // German case 'hu': return new PunctTokenizer(); // Hungarian case 'ga': return new PunctTokenizer(); // Irish case 'ro': return new PunctTokenizer(); // Romanian case 'sl': return new PunctTokenizer(); // Slovene case 'ta': return new PunctTokenizer(); // Tamil case 'tr': return new PunctTokenizer(); // Turkish case 'zh': return new ChineseTokenizer(); // Chinese default: return new PunctTokenizer(); } } static getCulture(locale) { switch (locale) { case 'en': return 'en-us'; // English case 'fa': return 'fa-ir'; // Farsi case 'fr': return 'fr-fr'; // French case 'ru': return 'ru-ru'; // Russian case 'es': return 'es-es'; // Spanish case 'it': return 'it-it'; // Italian case 'nl': return 'nl-nl'; // Dutch case 'no': return 'no-no'; // Norwegian case 'pt': return 'pt-br'; // Portuguese case 'pl': return 'pl-pl'; // Polish case 'sv': return 'sv-se'; // Swedish case 'id': return 'id-id'; // Indonesian case 'ja': return 'ja-jp'; // Japanese case 'ar': return 'ar-ae'; // Arabic case 'hy': return 'hy-am'; // Armenian case 'eu': return 'eu-es'; // Basque case 'ca': return 'ca-es'; // Catalan case 'cs': return 'cs-cz'; // Czech case 'da': return 'da-dk'; // Danish case 'fi': return 'fi-fi'; // Finnish case 'de': return 'de-de'; // German case 'hu': return 'hu-hu'; // Hungarian case 'ga': return 'ga-ie'; // Irish case 'ro': return 'ro-ro'; // Romanian case 'sl': return 'sl-sl'; // Slovene case 'ta': return 'ta-in'; // Tamil case 'tr': return 'tr-tr'; // Turkish case 'zh': return 'zh-cn'; // Chinese default: return 'en-us'; } } } NlpUtil.useAlternative = { en: false, fa: false, fr: false, ru: false, es: false, it: false, nl: false, no: false, pt: false, pl: false, sv: false, id: false, ja: false, ca: false, da: false, fi: false, de: false, hu: false, ro: false, tr: false, }; module.exports = NlpUtil; </code></pre> </article> </section> </div> <nav> <h2><a href="index.html">Home</a></h2><h3>Classes</h3><ul><li><a href="BinaryNeuralNetworkClassifier.html">BinaryNeuralNetworkClassifier</a></li><li><a href="Classifier.html">Classifier</a></li><li><a href="ConversationContext.html">ConversationContext</a></li><li><a href="DutchStemmer.html">DutchStemmer</a></li><li><a href="EnglishStemmer.html">EnglishStemmer</a></li><li><a href="EnumNamedEntity.html">EnumNamedEntity</a></li><li><a href="Evaluator.html">Evaluator</a></li><li><a href="HungarianStemmer.html">HungarianStemmer</a></li><li><a href="ItalianStemmer.html">ItalianStemmer</a></li><li><a href="Language.html">Language</a></li><li><a href="LogisticRegressionClassifier.html">LogisticRegressionClassifier</a></li><li><a href="Matrix.html">Matrix</a></li><li><a href="MemoryConversationContext.html">MemoryConversationContext</a></li><li><a href="NamedEntity.html">NamedEntity</a></li><li><a href="NerManager.html">NerManager</a></li><li><a href="NlgManager.html">NlgManager</a></li><li><a href="NlpClassifier.html">NlpClassifier</a></li><li><a href="NlpManager.html">NlpManager</a></li><li><a href="NorwegianStemmer.html">NorwegianStemmer</a></li><li><a href="PortugueseStemmer.html">PortugueseStemmer</a></li><li><a href="Recognizer.html">Recognizer</a></li><li><a href="RegexNamedEntity.html">RegexNamedEntity</a></li><li><a href="RomanianStemmer.html">RomanianStemmer</a></li><li><a href="RussianStemmer.html">RussianStemmer</a></li><li><a href="SentimentAnalyzer.html">SentimentAnalyzer</a></li><li><a href="SentimentManager.html">SentimentManager</a></li><li><a href="SimilarSearch.html">SimilarSearch</a></li><li><a href="SlotManager.html">SlotManager</a></li><li><a href="StemmerJa.html">StemmerJa</a></li><li><a href="SwedishStemmer.html">SwedishStemmer</a></li><li><a href="Tokenizer.html">Tokenizer</a></li><li><a href="TrimNamedEntity.html">TrimNamedEntity</a></li><li><a href="TurkishStemmer.html">TurkishStemmer</a></li><li><a href="Vector.html">Vector</a></li><li><a href="XTable.html">XTable</a></li></ul><h3>Global</h3><ul><li><a href="global.html#endsinArr">endsinArr</a></li><li><a href="global.html#prelude">prelude</a></li><li><a href="global.html#regions">regions</a></li><li><a href="global.html#stem">stem</a></li><li><a href="global.html#stopwords">stopwords</a></li></ul> </nav> <br class="clear"> <footer> Documentation generated by <a href="https://github.com/jsdoc3/jsdoc">JSDoc 3.5.5</a> on Sat Oct 13 2018 19:14:51 GMT+0200 (CEST) </footer> <script> prettyPrint(); </script> <script src="scripts/linenumber.js"> </script> </body> </html>