@owsas/sentiment-multilang
Version:
Multilanguage AFINN-based sentiment analysis for Node.js
102 lines (101 loc) • 3.7 kB
JavaScript
;
/*
* Multilanguage AFINN-based sentiment analysis for Node.js
*/
exports.__esModule = true;
// Dependencies
var latinize = require("latinize");
var stemmer = require("stemmer");
var lexicon_1 = require("./lib/lexicon");
/**
* Tokenizes a string into an array of strings
* @param input
*/
function tokenize(input) {
return input
.toLowerCase()
// .replace(/^\s+|^0-9+|[^a-z-úñäâàáéèëêïîíìöôùüûœç\- ]+/g, '')
.replace(/\r?\n|\r/g, ' ') // line breaks changed to space https://stackoverflow.com/a/10805292
.replace(/n\'t/g, ' not') // n't changed to not
.replace(/'s/g, ' is') // 's changed to is
.replace(/['’]/g, ' ') // apos changed to space
.replace(/[.,\/#!$%\^&\*;:{}=_`\"~()]/g, '') // remove punctuation
.replace(/\s{2,}/g, ' ') // remove extra spaces https://stackoverflow.com/a/4328722
.split(' ');
}
exports.tokenize = tokenize;
;
/**
* Performs sentiment analysis on the provided input 'phrase'
* @param phrase
* @param lang
* @param callback
*/
function sentiment(phrase, lang, callback) {
// Parse arguments
if (typeof phrase === 'undefined')
phrase = '';
if ((typeof (lang) === 'undefined') || !lexicon_1["default"]["langs"][lang])
lang = 'unknown';
if (typeof callback === 'undefined')
callback = null;
// Storage objects
var tokens = tokenize(phrase), words = [], positive = [], negative = [];
var score = 0;
// Iterate over tokens if language is knowed
var len = tokens.length;
if (lang !== 'unknown') {
while (len--) {
// var prevobj = (len > 0) ? String(tokens[len-1]): "";
var negation = (lexicon_1["default"]["negations"][lang] && lexicon_1["default"]["negations"][lang][tokens[len - 1]]) ? -1 : 1;
var stringToken = lexicon_1["default"]["truncated"][lang] ? tokens[len].replace(/[aeiouúäâàáéèëêïîíìöôùüû]$/, "") : String(tokens[len]);
var punctuation = 0;
// Extract the value using the input word, it's stemmed or it's latinized (no accents) version
var tokenValue = lexicon_1["default"][lang] && (lexicon_1["default"][lang][stringToken] || lexicon_1["default"][lang][stemmer(stringToken)] || lexicon_1["default"][lang][latinize(stringToken)]);
if (tokenValue === undefined) {
// Search on the emojis data
if (!lexicon_1["default"]['emoji'][stringToken]) {
continue; // continue the while loop with the next iteration
}
// It's an emoji
punctuation = Number(lexicon_1["default"]['emoji'][stringToken]);
}
else {
// It's a word
punctuation = tokenValue;
}
words.push(stringToken);
if (punctuation > 0)
positive.push(stringToken);
if (punctuation < 0)
negative.push(stringToken);
score += punctuation * negation;
}
}
// Handle optional async interface
var result = {
score: score,
comparative: score / tokens.length,
vote: 'neutral',
tokens: tokens,
words: words,
positive: positive,
negative: negative,
language: lang
};
// Classify text as positive, negative or neutral.
if (result.score > 0) {
result.vote = 'positive';
}
else if (result.score < 0) {
result.vote = 'negative';
}
if (!callback) {
return result;
}
else {
callback(null, result);
}
}
exports.sentiment = sentiment;
;