UNPKG

node-nlp

Version:

Library for NLU (Natural Language Understanding) done in Node.js

174 lines (148 loc) 7.49 kB
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"> <title>JSDoc: Source: language/language.js</title> <script src="scripts/prettify/prettify.js"> </script> <script src="scripts/prettify/lang-css.js"> </script> <!--[if lt IE 9]> <script src="//html5shiv.googlecode.com/svn/trunk/html5.js"></script> <![endif]--> <link type="text/css" rel="stylesheet" href="styles/prettify-tomorrow.css"> <link type="text/css" rel="stylesheet" href="styles/jsdoc-default.css"> </head> <body> <div id="main"> <h1 class="page-title">Source: language/language.js</h1> <section> <article> <pre class="prettyprint source linenums"><code>/* * Copyright (c) AXA Shared Services Spain S.A. * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ const franc = require('franc'); const languageData = require('./languages.json'); /** * Class for managing languages. The main purpose is to guess the language of * an utterance, but it also manage a list of languages indexed by both ISOs. */ class Language { /** * Constructor of the class. */ constructor() { this.languagesAlpha3 = {}; this.languagesAlpha2 = {}; this.buildData(); } /** * Given the data of the languages, build the indexes. */ buildData() { for (let i = 0; i &lt; languageData.length; i += 1) { const language = languageData[i]; this.languagesAlpha3[language.alpha3] = language; this.languagesAlpha2[language.alpha2] = language; } } /** * Given a whitelist, iterates each language to transform * from iso2 to iso3 each code. * @param {String[]} whitelist Array of language codes in iso3 or iso2 * @returns {String[]} Whitelist forced to iso3. */ transformWhitelist(whitelist) { const result = []; for (let i = 0; i &lt; whitelist.length; i += 1) { if (whitelist[i].length === 3) { result.push(whitelist[i]); } else { const language = this.languagesAlpha2[whitelist[i]]; if (language) { result.push(language.alpha3); } } } return result; } /** * Given an utterance, a whitelist of iso codes and the limit of results, * build an array of languages scored. * The whitelist and the limit are optional. * @param {String} utterance Utterance wich we want to guess the language. * @param {String[]} whitelist Whitelist of accepted languages. * @param {Number} limit Limit of results. * @returns {Object[]} Array of guesses. */ guess(utterance, whitelist, limit) { const options = {}; if (utterance.length &lt; 10) { options.minLength = utterance.length; } if (whitelist &amp;&amp; whitelist.length &amp;&amp; whitelist.length > 0) { options.whitelist = this.transformWhitelist(whitelist); } const scores = franc.all(utterance, options); const result = []; for (let i = 0; i &lt; scores.length; i += 1) { const language = this.languagesAlpha3[scores[i][0]]; if (language) { result.push({ alpha3: language.alpha3, alpha2: language.alpha2, language: language.name, score: scores[i][1], }); if (limit &amp;&amp; result.length >= limit) { break; } } } return result; } /** * Given an utterance, a whitelist of iso codes and the limit of results, * return the language with the best score. * The whitelist is optional. * @param {String} utterance Utterance wich we want to guess the language. * @param {String[]} whitelist Whitelist of accepted languages. * @return {Object} Best guess. */ guessBest(utterance, whitelist) { return this.guess(utterance, whitelist, 1)[0]; } } module.exports = Language; </code></pre> </article> </section> </div> <nav> <h2><a href="index.html">Home</a></h2><h3>Classes</h3><ul><li><a href="BinaryNeuralNetworkClassifier.html">BinaryNeuralNetworkClassifier</a></li><li><a href="Classifier.html">Classifier</a></li><li><a href="ConversationContext.html">ConversationContext</a></li><li><a href="DutchStemmer.html">DutchStemmer</a></li><li><a href="EnglishStemmer.html">EnglishStemmer</a></li><li><a href="EnumNamedEntity.html">EnumNamedEntity</a></li><li><a href="Evaluator.html">Evaluator</a></li><li><a href="HungarianStemmer.html">HungarianStemmer</a></li><li><a href="ItalianStemmer.html">ItalianStemmer</a></li><li><a href="Language.html">Language</a></li><li><a href="LogisticRegressionClassifier.html">LogisticRegressionClassifier</a></li><li><a href="Matrix.html">Matrix</a></li><li><a href="MemoryConversationContext.html">MemoryConversationContext</a></li><li><a href="NamedEntity.html">NamedEntity</a></li><li><a href="NerManager.html">NerManager</a></li><li><a href="NlgManager.html">NlgManager</a></li><li><a href="NlpClassifier.html">NlpClassifier</a></li><li><a href="NlpManager.html">NlpManager</a></li><li><a href="NorwegianStemmer.html">NorwegianStemmer</a></li><li><a href="PortugueseStemmer.html">PortugueseStemmer</a></li><li><a href="Recognizer.html">Recognizer</a></li><li><a href="RegexNamedEntity.html">RegexNamedEntity</a></li><li><a href="RomanianStemmer.html">RomanianStemmer</a></li><li><a href="RussianStemmer.html">RussianStemmer</a></li><li><a href="SentimentAnalyzer.html">SentimentAnalyzer</a></li><li><a href="SentimentManager.html">SentimentManager</a></li><li><a href="SimilarSearch.html">SimilarSearch</a></li><li><a href="SlotManager.html">SlotManager</a></li><li><a href="StemmerJa.html">StemmerJa</a></li><li><a href="SwedishStemmer.html">SwedishStemmer</a></li><li><a href="Tokenizer.html">Tokenizer</a></li><li><a href="TrimNamedEntity.html">TrimNamedEntity</a></li><li><a href="TurkishStemmer.html">TurkishStemmer</a></li><li><a href="Vector.html">Vector</a></li><li><a href="XTable.html">XTable</a></li></ul><h3>Global</h3><ul><li><a href="global.html#endsinArr">endsinArr</a></li><li><a href="global.html#prelude">prelude</a></li><li><a href="global.html#regions">regions</a></li><li><a href="global.html#stem">stem</a></li><li><a href="global.html#stopwords">stopwords</a></li></ul> </nav> <br class="clear"> <footer> Documentation generated by <a href="https://github.com/jsdoc3/jsdoc">JSDoc 3.5.5</a> on Sat Oct 13 2018 19:14:51 GMT+0200 (CEST) </footer> <script> prettyPrint(); </script> <script src="scripts/linenumber.js"> </script> </body> </html>