node-nlp
Version:
Library for NLU (Natural Language Understanding) done in Node.js
36 lines (29 loc) • 1.09 kB
JavaScript
var stopwords = require('../../stopwords/stopwords_it.json');
var Tokenizer = require('../../tokenizers/aggressive-tokenizer-it');
module.exports = function() {
var stemmer = this;
stemmer.stem = function(token) {
return token;
};
stemmer.tokenizeAndStem = function(text, keepStops) {
var stemmedTokens = [];
new Tokenizer().tokenize(text).forEach(function(token) {
if (keepStops || stopwords.words.indexOf(token) == -1) {
var resultToken = token.toLowerCase();
if (resultToken.match(/[a-zàèìòù0-9]/gi)) {
resultToken = stemmer.stem(resultToken);
}
stemmedTokens.push(resultToken);
}
});
return stemmedTokens;
};
stemmer.attach = function() {
String.prototype.stem = function() {
return stemmer.stem(this);
};
String.prototype.tokenizeAndStem = function(keepStops) {
return stemmer.tokenizeAndStem(this, keepStops);
};
};
}