UNPKG

@owsas/sentiment-multilang

Version:

Multilanguage AFINN-based sentiment analysis for Node.js

github.com/owsas/sentiment-multilang

owsas/sentiment-multilang

102 lines (101 loc) • 3.7 kB

JavaScript

"use strict"; /* * Multilanguage AFINN-based sentiment analysis for Node.js */ exports.__esModule = true; // Dependencies var latinize = require("latinize"); var stemmer = require("stemmer"); var lexicon_1 = require("./lib/lexicon"); /** * Tokenizes a string into an array of strings * @param input */ function tokenize(input) { return input .toLowerCase() // .replace(/^\s+|^0-9+|[^a-z-úñäâàáéèëêïîíìöôùüûœç\- ]+/g, '') .replace(/\r?\n|\r/g, ' ') // line breaks changed to space https://stackoverflow.com/a/10805292 .replace(/n\'t/g, ' not') // n't changed to not .replace(/'s/g, ' is') // 's changed to is .replace(/['’]/g, ' ') // apos changed to space .replace(/[.,\/#!$%\^&\*;:{}=_`\"~()]/g, '') // remove punctuation .replace(/\s{2,}/g, ' ') // remove extra spaces https://stackoverflow.com/a/4328722 .split(' '); } exports.tokenize = tokenize; ; /** * Performs sentiment analysis on the provided input 'phrase' * @param phrase * @param lang * @param callback */ function sentiment(phrase, lang, callback) { // Parse arguments if (typeof phrase === 'undefined') phrase = ''; if ((typeof (lang) === 'undefined') || !lexicon_1["default"]["langs"][lang]) lang = 'unknown'; if (typeof callback === 'undefined') callback = null; // Storage objects var tokens = tokenize(phrase), words = [], positive = [], negative = []; var score = 0; // Iterate over tokens if language is knowed var len = tokens.length; if (lang !== 'unknown') { while (len--) { // var prevobj = (len > 0) ? String(tokens[len-1]): ""; var negation = (lexicon_1["default"]["negations"][lang] && lexicon_1["default"]["negations"][lang][tokens[len - 1]]) ? -1 : 1; var stringToken = lexicon_1["default"]["truncated"][lang] ? tokens[len].replace(/[aeiouúäâàáéèëêïîíìöôùüû]$/, "") : String(tokens[len]); var punctuation = 0; // Extract the value using the input word, it's stemmed or it's latinized (no accents) version var tokenValue = lexicon_1["default"][lang] && (lexicon_1["default"][lang][stringToken] || lexicon_1["default"][lang][stemmer(stringToken)] || lexicon_1["default"][lang][latinize(stringToken)]); if (tokenValue === undefined) { // Search on the emojis data if (!lexicon_1["default"]['emoji'][stringToken]) { continue; // continue the while loop with the next iteration } // It's an emoji punctuation = Number(lexicon_1["default"]['emoji'][stringToken]); } else { // It's a word punctuation = tokenValue; } words.push(stringToken); if (punctuation > 0) positive.push(stringToken); if (punctuation < 0) negative.push(stringToken); score += punctuation * negation; } } // Handle optional async interface var result = { score: score, comparative: score / tokens.length, vote: 'neutral', tokens: tokens, words: words, positive: positive, negative: negative, language: lang }; // Classify text as positive, negative or neutral. if (result.score > 0) { result.vote = 'positive'; } else if (result.score < 0) { result.vote = 'negative'; } if (!callback) { return result; } else { callback(null, result); } } exports.sentiment = sentiment; ;