unfluff
Version:
A web page content extractor
37 lines (30 loc) • 1.05 kB
text/coffeescript
suite 'Stop words', ->
stopwords = require '../src/stopwords'
test 'exists', ->
s = stopwords
ok s
test 'counts stopwords', ->
data = stopwords('this is silly', 'en')
eq data.wordCount, 3
eq data.stopwordCount, 2
arrayEq data.stopWords, [ 'this', 'is' ]
test 'strips punctuation', ->
data = stopwords('this! is?? silly....', 'en')
eq data.wordCount, 3
eq data.stopwordCount, 2
arrayEq data.stopWords, [ 'this', 'is' ]
test 'defaults to english', ->
data = stopwords('this is fun')
eq data.wordCount, 3
eq data.stopwordCount, 2
arrayEq data.stopWords, [ 'this', 'is' ]
test 'handles spanish', ->
data = stopwords('este es rico', 'es')
eq data.wordCount, 3
eq data.stopwordCount, 2
arrayEq data.stopWords, [ 'este', 'es' ]
test 'Safely handles a bad language by falling back to english', ->
data = stopwords('this is fun', 'fake-language-to-test-fallbacks')
eq data.wordCount, 3
eq data.stopwordCount, 2
arrayEq data.stopWords, [ 'this', 'is' ]