UNPKG

node-nlp

Version:

Library for NLU (Natural Language Understanding) done in Node.js

36 lines (29 loc) 1.09 kB
var stopwords = require('../../stopwords/stopwords_it.json'); var Tokenizer = require('../../tokenizers/aggressive-tokenizer-it'); module.exports = function() { var stemmer = this; stemmer.stem = function(token) { return token; }; stemmer.tokenizeAndStem = function(text, keepStops) { var stemmedTokens = []; new Tokenizer().tokenize(text).forEach(function(token) { if (keepStops || stopwords.words.indexOf(token) == -1) { var resultToken = token.toLowerCase(); if (resultToken.match(/[a-zàèìòù0-9]/gi)) { resultToken = stemmer.stem(resultToken); } stemmedTokens.push(resultToken); } }); return stemmedTokens; }; stemmer.attach = function() { String.prototype.stem = function() { return stemmer.stem(this); }; String.prototype.tokenizeAndStem = function(keepStops) { return stemmer.tokenizeAndStem(this, keepStops); }; }; }