node-nlp
Version:
Library for NLU (Natural Language Understanding) done in Node.js
174 lines (148 loc) • 7.49 kB
HTML
<html lang="en">
<head>
<meta charset="utf-8">
<title>JSDoc: Source: language/language.js</title>
<script src="scripts/prettify/prettify.js"> </script>
<script src="scripts/prettify/lang-css.js"> </script>
<!--[if lt IE 9]>
<script src="//html5shiv.googlecode.com/svn/trunk/html5.js"></script>
<![endif]-->
<link type="text/css" rel="stylesheet" href="styles/prettify-tomorrow.css">
<link type="text/css" rel="stylesheet" href="styles/jsdoc-default.css">
</head>
<body>
<div id="main">
<h1 class="page-title">Source: language/language.js</h1>
<section>
<article>
<pre class="prettyprint source linenums"><code>/*
* Copyright (c) AXA Shared Services Spain S.A.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
const franc = require('franc');
const languageData = require('./languages.json');
/**
* Class for managing languages. The main purpose is to guess the language of
* an utterance, but it also manage a list of languages indexed by both ISOs.
*/
class Language {
/**
* Constructor of the class.
*/
constructor() {
this.languagesAlpha3 = {};
this.languagesAlpha2 = {};
this.buildData();
}
/**
* Given the data of the languages, build the indexes.
*/
buildData() {
for (let i = 0; i < languageData.length; i += 1) {
const language = languageData[i];
this.languagesAlpha3[language.alpha3] = language;
this.languagesAlpha2[language.alpha2] = language;
}
}
/**
* Given a whitelist, iterates each language to transform
* from iso2 to iso3 each code.
* @param {String[]} whitelist Array of language codes in iso3 or iso2
* @returns {String[]} Whitelist forced to iso3.
*/
transformWhitelist(whitelist) {
const result = [];
for (let i = 0; i < whitelist.length; i += 1) {
if (whitelist[i].length === 3) {
result.push(whitelist[i]);
} else {
const language = this.languagesAlpha2[whitelist[i]];
if (language) {
result.push(language.alpha3);
}
}
}
return result;
}
/**
* Given an utterance, a whitelist of iso codes and the limit of results,
* build an array of languages scored.
* The whitelist and the limit are optional.
* @param {String} utterance Utterance wich we want to guess the language.
* @param {String[]} whitelist Whitelist of accepted languages.
* @param {Number} limit Limit of results.
* @returns {Object[]} Array of guesses.
*/
guess(utterance, whitelist, limit) {
const options = {};
if (utterance.length < 10) {
options.minLength = utterance.length;
}
if (whitelist && whitelist.length && whitelist.length > 0) {
options.whitelist = this.transformWhitelist(whitelist);
}
const scores = franc.all(utterance, options);
const result = [];
for (let i = 0; i < scores.length; i += 1) {
const language = this.languagesAlpha3[scores[i][0]];
if (language) {
result.push({
alpha3: language.alpha3,
alpha2: language.alpha2,
language: language.name,
score: scores[i][1],
});
if (limit && result.length >= limit) {
break;
}
}
}
return result;
}
/**
* Given an utterance, a whitelist of iso codes and the limit of results,
* return the language with the best score.
* The whitelist is optional.
* @param {String} utterance Utterance wich we want to guess the language.
* @param {String[]} whitelist Whitelist of accepted languages.
* @return {Object} Best guess.
*/
guessBest(utterance, whitelist) {
return this.guess(utterance, whitelist, 1)[0];
}
}
module.exports = Language;
</code></pre>
</article>
</section>
</div>
<nav>
<h2><a href="index.html">Home</a></h2><h3>Classes</h3><ul><li><a href="BinaryNeuralNetworkClassifier.html">BinaryNeuralNetworkClassifier</a></li><li><a href="Classifier.html">Classifier</a></li><li><a href="ConversationContext.html">ConversationContext</a></li><li><a href="DutchStemmer.html">DutchStemmer</a></li><li><a href="EnglishStemmer.html">EnglishStemmer</a></li><li><a href="EnumNamedEntity.html">EnumNamedEntity</a></li><li><a href="Evaluator.html">Evaluator</a></li><li><a href="HungarianStemmer.html">HungarianStemmer</a></li><li><a href="ItalianStemmer.html">ItalianStemmer</a></li><li><a href="Language.html">Language</a></li><li><a href="LogisticRegressionClassifier.html">LogisticRegressionClassifier</a></li><li><a href="Matrix.html">Matrix</a></li><li><a href="MemoryConversationContext.html">MemoryConversationContext</a></li><li><a href="NamedEntity.html">NamedEntity</a></li><li><a href="NerManager.html">NerManager</a></li><li><a href="NlgManager.html">NlgManager</a></li><li><a href="NlpClassifier.html">NlpClassifier</a></li><li><a href="NlpManager.html">NlpManager</a></li><li><a href="NorwegianStemmer.html">NorwegianStemmer</a></li><li><a href="PortugueseStemmer.html">PortugueseStemmer</a></li><li><a href="Recognizer.html">Recognizer</a></li><li><a href="RegexNamedEntity.html">RegexNamedEntity</a></li><li><a href="RomanianStemmer.html">RomanianStemmer</a></li><li><a href="RussianStemmer.html">RussianStemmer</a></li><li><a href="SentimentAnalyzer.html">SentimentAnalyzer</a></li><li><a href="SentimentManager.html">SentimentManager</a></li><li><a href="SimilarSearch.html">SimilarSearch</a></li><li><a href="SlotManager.html">SlotManager</a></li><li><a href="StemmerJa.html">StemmerJa</a></li><li><a href="SwedishStemmer.html">SwedishStemmer</a></li><li><a href="Tokenizer.html">Tokenizer</a></li><li><a href="TrimNamedEntity.html">TrimNamedEntity</a></li><li><a href="TurkishStemmer.html">TurkishStemmer</a></li><li><a href="Vector.html">Vector</a></li><li><a href="XTable.html">XTable</a></li></ul><h3>Global</h3><ul><li><a href="global.html#endsinArr">endsinArr</a></li><li><a href="global.html#prelude">prelude</a></li><li><a href="global.html#regions">regions</a></li><li><a href="global.html#stem">stem</a></li><li><a href="global.html#stopwords">stopwords</a></li></ul>
</nav>
<br class="clear">
<footer>
Documentation generated by <a href="https://github.com/jsdoc3/jsdoc">JSDoc 3.5.5</a> on Sat Oct 13 2018 19:14:51 GMT+0200 (CEST)
</footer>
<script> prettyPrint(); </script>
<script src="scripts/linenumber.js"> </script>
</body>
</html>