node-nlp
Version:
Library for NLU (Natural Language Understanding) done in Node.js
383 lines (352 loc) • 14.8 kB
HTML
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>JSDoc: Source: nlp/nlp-util.js</title>
<script src="scripts/prettify/prettify.js"> </script>
<script src="scripts/prettify/lang-css.js"> </script>
<!--[if lt IE 9]>
<script src="//html5shiv.googlecode.com/svn/trunk/html5.js"></script>
<![endif]-->
<link type="text/css" rel="stylesheet" href="styles/prettify-tomorrow.css">
<link type="text/css" rel="stylesheet" href="styles/jsdoc-default.css">
</head>
<body>
<div id="main">
<h1 class="page-title">Source: nlp/nlp-util.js</h1>
<section>
<article>
<pre class="prettyprint source linenums"><code>/*
* Copyright (c) AXA Shared Services Spain S.A.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
const ArabicStemmer = require('./stemmers/arabic-stemmer');
const ArmenianStemmer = require('./stemmers/armenian-stemmer');
const BasqueStemmer = require('./stemmers/basque-stemmer');
const CatalanStemmer = require('./stemmers/catalan-stemmer');
const CzechStemmer = require('./stemmers/czech-stemmer');
const ChineseStemmer = require('./stemmers/chinese-stemmer');
const ChineseTokenizer = require('./tokenizers/chinese-tokenizer');
const DanishStemmer = require('./stemmers/danish-stemmer');
const DutchStemmer = require('./stemmers/dutch-stemmer');
const EnglishStemmer = require('./stemmers/english-stemmer');
const FinnishStemmer = require('./stemmers/finnish-stemmer');
const FrenchStemmer = require('./stemmers/french-stemmer');
const GermanStemmer = require('./stemmers/german-stemmer');
const HungarianStemmer = require('./stemmers/hungarian-stemmer');
const IrishStemmer = require('./stemmers/irish-stemmer');
const ItalianStemmer = require('./stemmers/italian-stemmer');
const NorwegianStemmer = require('./stemmers/norwegian-stemmer');
const PortugueseStemmer = require('./stemmers/portuguese-stemmer');
const PunctTokenizer = require('./tokenizers/punct-tokenizer');
const RomanianStemmer = require('./stemmers/romanian-stemmer');
const RussianStemmer = require('./stemmers/russian-stemmer');
const SloveneStemmer = require('./stemmers/slovene-stemmer');
const SpanishStemmer = require('./stemmers/spanish-stemmer');
const SwedishStemmer = require('./stemmers/swedish-stemmer');
const TamilStemmer = require('./stemmers/tamil-stemmer');
const TurkishStemmer = require('./stemmers/turkish-stemmer');
const PorterStemmer = require('./stemmers/natural/porter-stemmer');
const PorterStemmerEs = require('./stemmers/natural/porter-stemmer-es');
const PorterStemmerFa = require('./stemmers/natural/porter-stemmer-fa');
const PorterStemmerFr = require('./stemmers/natural/porter-stemmer-fr');
const PorterStemmerRu = require('./stemmers/natural/porter-stemmer-ru');
const PorterStemmerIt = require('./stemmers/natural/porter-stemmer-it');
const PorterStemmerNo = require('./stemmers/natural/porter-stemmer-no');
const PorterStemmerPt = require('./stemmers/natural/porter-stemmer-pt');
const PorterStemmerSv = require('./stemmers/natural/porter-stemmer-sv');
const PorterStemmerNl = require('./stemmers/natural/porter-stemmer-nl');
const StemmerJa = require('./stemmers/natural/stemmer-ja');
const StemmerId = require('./stemmers/natural/indonesian/stemmer_id');
const {
AggressiveTokenizer,
AggressiveTokenizerFa,
AggressiveTokenizerFr,
AggressiveTokenizerRu,
AggressiveTokenizerEs,
AggressiveTokenizerId,
AggressiveTokenizerIt,
AggressiveTokenizerNl,
AggressiveTokenizerNo,
AggressiveTokenizerPt,
AggressiveTokenizerPl,
AggressiveTokenizerSv,
TokenizerJa,
} = require('./tokenizers');
class NlpUtil {
/**
* Given a locale, get the 2 character one.
* @param {String} locale Locale of the language.
* @returns {String} Locale in 2 character length.
*/
static getTruncatedLocale(locale) {
return locale ? locale.substr(0, 2).toLowerCase() : undefined;
}
static getStemmer(locale) {
switch (locale) {
case 'en': // English
if (NlpUtil.useAlternative[locale]) {
return new EnglishStemmer(NlpUtil.getTokenizer(locale));
}
return PorterStemmer;
case 'fa': // Farsi
return PorterStemmerFa;
case 'fr': // French
if (NlpUtil.useAlternative[locale]) {
return new FrenchStemmer(NlpUtil.getTokenizer(locale));
}
return PorterStemmerFr; // French
case 'ru': // Russian
if (NlpUtil.useAlternative[locale]) {
return new RussianStemmer(NlpUtil.getTokenizer(locale));
}
return PorterStemmerRu;
case 'es': // Spanish
if (NlpUtil.useAlternative[locale]) {
return new SpanishStemmer(NlpUtil.getTokenizer(locale));
}
return PorterStemmerEs;
case 'it': // Italian
if (NlpUtil.useAlternative[locale]) {
return new ItalianStemmer(NlpUtil.getTokenizer(locale));
}
return PorterStemmerIt;
case 'no': // Norwegian
if (NlpUtil.useAlternative[locale]) {
return new NorwegianStemmer(NlpUtil.getTokenizer(locale));
}
return PorterStemmerNo;
case 'pt': // Portuguese
if (NlpUtil.useAlternative[locale]) {
return new PortugueseStemmer(NlpUtil.getTokenizer(locale));
}
return PorterStemmerPt;
case 'sv': // Swedish
if (NlpUtil.useAlternative[locale]) {
return new SwedishStemmer(NlpUtil.getTokenizer(locale));
}
return PorterStemmerSv;
case 'nl': // Dutch
if (NlpUtil.useAlternative[locale]) {
return new DutchStemmer(NlpUtil.getTokenizer(locale));
}
return PorterStemmerNl;
case 'id':
return StemmerId; // Indonesian
case 'ja':
return new StemmerJa(); // Japanese
case 'ar':
return new ArabicStemmer(NlpUtil.getTokenizer(locale)); // Arabic
case 'hy':
return new ArmenianStemmer(NlpUtil.getTokenizer(locale)); // Armenian
case 'eu':
return new BasqueStemmer(NlpUtil.getTokenizer(locale)); // Basque
case 'ca':
return new CatalanStemmer(NlpUtil.getTokenizer(locale)); // Catalan
case 'cs':
return new CzechStemmer(NlpUtil.getTokenizer(locale)); // Czech
case 'da':
return new DanishStemmer(NlpUtil.getTokenizer(locale)); // Danish
case 'fi':
return new FinnishStemmer(NlpUtil.getTokenizer(locale)); // Finnish
case 'de':
return new GermanStemmer(NlpUtil.getTokenizer(locale)); // German
case 'hu':
return new HungarianStemmer(NlpUtil.getTokenizer(locale)); // Hungarian
case 'ga':
return new IrishStemmer(NlpUtil.getTokenizer(locale)); // Irish
case 'ro':
return new RomanianStemmer(NlpUtil.getTokenizer(locale)); // Romanian
case 'sl':
return new SloveneStemmer(NlpUtil.getTokenizer(locale)); // Slovene
case 'ta':
return new TamilStemmer(NlpUtil.getTokenizer(locale)); // Tamil
case 'tr':
return new TurkishStemmer(NlpUtil.getTokenizer(locale)); // Turkish
case 'zh':
return new ChineseStemmer(); // Chinese
default:
return PorterStemmer;
}
}
static getTokenizer(locale) {
switch (locale) {
case 'en':
return new AggressiveTokenizer(); // English
case 'fa':
return new AggressiveTokenizerFa(); // Farsi
case 'fr':
return new AggressiveTokenizerFr(); // French
case 'ru':
return new AggressiveTokenizerRu(); // Russian
case 'es':
return new AggressiveTokenizerEs(); // Spanish
case 'it':
return new AggressiveTokenizerIt(); // Italian
case 'nl':
return new AggressiveTokenizerNl(); // Dutch
case 'no':
return new AggressiveTokenizerNo(); // Norwegian
case 'pt':
return new AggressiveTokenizerPt(); // Portuguese
case 'pl':
return new AggressiveTokenizerPl(); // Polish
case 'sv':
return new AggressiveTokenizerSv(); // Swedish
case 'id':
return new AggressiveTokenizerId(); // Indonesian
case 'ja':
return new TokenizerJa(); // Japanese
case 'ar':
return new PunctTokenizer(); // Arabic
case 'hy':
return new PunctTokenizer(); // Armenian
case 'eu':
return new PunctTokenizer(); // Basque
case 'ca':
return new PunctTokenizer(); // Catalan
case 'cs':
return new PunctTokenizer(); // Czech
case 'da':
return new PunctTokenizer(); // Danish
case 'fi':
return new PunctTokenizer(); // Finnish
case 'de':
return new PunctTokenizer(); // German
case 'hu':
return new PunctTokenizer(); // Hungarian
case 'ga':
return new PunctTokenizer(); // Irish
case 'ro':
return new PunctTokenizer(); // Romanian
case 'sl':
return new PunctTokenizer(); // Slovene
case 'ta':
return new PunctTokenizer(); // Tamil
case 'tr':
return new PunctTokenizer(); // Turkish
case 'zh':
return new ChineseTokenizer(); // Chinese
default:
return new PunctTokenizer();
}
}
static getCulture(locale) {
switch (locale) {
case 'en':
return 'en-us'; // English
case 'fa':
return 'fa-ir'; // Farsi
case 'fr':
return 'fr-fr'; // French
case 'ru':
return 'ru-ru'; // Russian
case 'es':
return 'es-es'; // Spanish
case 'it':
return 'it-it'; // Italian
case 'nl':
return 'nl-nl'; // Dutch
case 'no':
return 'no-no'; // Norwegian
case 'pt':
return 'pt-br'; // Portuguese
case 'pl':
return 'pl-pl'; // Polish
case 'sv':
return 'sv-se'; // Swedish
case 'id':
return 'id-id'; // Indonesian
case 'ja':
return 'ja-jp'; // Japanese
case 'ar':
return 'ar-ae'; // Arabic
case 'hy':
return 'hy-am'; // Armenian
case 'eu':
return 'eu-es'; // Basque
case 'ca':
return 'ca-es'; // Catalan
case 'cs':
return 'cs-cz'; // Czech
case 'da':
return 'da-dk'; // Danish
case 'fi':
return 'fi-fi'; // Finnish
case 'de':
return 'de-de'; // German
case 'hu':
return 'hu-hu'; // Hungarian
case 'ga':
return 'ga-ie'; // Irish
case 'ro':
return 'ro-ro'; // Romanian
case 'sl':
return 'sl-sl'; // Slovene
case 'ta':
return 'ta-in'; // Tamil
case 'tr':
return 'tr-tr'; // Turkish
case 'zh':
return 'zh-cn'; // Chinese
default:
return 'en-us';
}
}
}
NlpUtil.useAlternative = {
en: false,
fa: false,
fr: false,
ru: false,
es: false,
it: false,
nl: false,
no: false,
pt: false,
pl: false,
sv: false,
id: false,
ja: false,
ca: false,
da: false,
fi: false,
de: false,
hu: false,
ro: false,
tr: false,
};
module.exports = NlpUtil;
</code></pre>
</article>
</section>
</div>
<nav>
<h2><a href="index.html">Home</a></h2><h3>Classes</h3><ul><li><a href="BinaryNeuralNetworkClassifier.html">BinaryNeuralNetworkClassifier</a></li><li><a href="Classifier.html">Classifier</a></li><li><a href="ConversationContext.html">ConversationContext</a></li><li><a href="DutchStemmer.html">DutchStemmer</a></li><li><a href="EnglishStemmer.html">EnglishStemmer</a></li><li><a href="EnumNamedEntity.html">EnumNamedEntity</a></li><li><a href="Evaluator.html">Evaluator</a></li><li><a href="HungarianStemmer.html">HungarianStemmer</a></li><li><a href="ItalianStemmer.html">ItalianStemmer</a></li><li><a href="Language.html">Language</a></li><li><a href="LogisticRegressionClassifier.html">LogisticRegressionClassifier</a></li><li><a href="Matrix.html">Matrix</a></li><li><a href="MemoryConversationContext.html">MemoryConversationContext</a></li><li><a href="NamedEntity.html">NamedEntity</a></li><li><a href="NerManager.html">NerManager</a></li><li><a href="NlgManager.html">NlgManager</a></li><li><a href="NlpClassifier.html">NlpClassifier</a></li><li><a href="NlpManager.html">NlpManager</a></li><li><a href="NorwegianStemmer.html">NorwegianStemmer</a></li><li><a href="PortugueseStemmer.html">PortugueseStemmer</a></li><li><a href="Recognizer.html">Recognizer</a></li><li><a href="RegexNamedEntity.html">RegexNamedEntity</a></li><li><a href="RomanianStemmer.html">RomanianStemmer</a></li><li><a href="RussianStemmer.html">RussianStemmer</a></li><li><a href="SentimentAnalyzer.html">SentimentAnalyzer</a></li><li><a href="SentimentManager.html">SentimentManager</a></li><li><a href="SimilarSearch.html">SimilarSearch</a></li><li><a href="SlotManager.html">SlotManager</a></li><li><a href="StemmerJa.html">StemmerJa</a></li><li><a href="SwedishStemmer.html">SwedishStemmer</a></li><li><a href="Tokenizer.html">Tokenizer</a></li><li><a href="TrimNamedEntity.html">TrimNamedEntity</a></li><li><a href="TurkishStemmer.html">TurkishStemmer</a></li><li><a href="Vector.html">Vector</a></li><li><a href="XTable.html">XTable</a></li></ul><h3>Global</h3><ul><li><a href="global.html#endsinArr">endsinArr</a></li><li><a href="global.html#prelude">prelude</a></li><li><a href="global.html#regions">regions</a></li><li><a href="global.html#stem">stem</a></li><li><a href="global.html#stopwords">stopwords</a></li></ul>
</nav>
<br class="clear">
<footer>
Documentation generated by <a href="https://github.com/jsdoc3/jsdoc">JSDoc 3.5.5</a> on Sat Oct 13 2018 19:14:51 GMT+0200 (CEST)
</footer>
<script> prettyPrint(); </script>
<script src="scripts/linenumber.js"> </script>
</body>
</html>