nlpsum
Version:
Powerful text summarization algorithms from research papers and dedicated research.
83 lines (72 loc) • 2.71 kB
text/coffeescript
tagger= require("./tagger")
recognizer= require("./helpers/recognizer")
chunker= require("./helpers/chunker")
blacklist= require("./helpers/blacklist")
spotter=(->
set_options = (style="") ->
options= {}
if style== "verbose"
options.gerund = true
options.stick_adjectives = true
options.stick_prepositions = true
options.stick_the = true
options.want_quotations = true
options.subnouns = true
options.match_whole = true
options.case_sensitive = false
options.kill_numbers = false
options.kill_quotes = false
if style=="selective"
options.gerund = false
options.stick_adjectives = false
options.stick_prepositions = false
options.stick_the = false
options.subnouns = false
options.want_quotations = false
options.match_whole = false
options.kill_numbers = true
options.kill_quotes = true
options
cleanup= (nouns, options)->
for i of nouns
nouns[i].word = nouns[i].word.replace(/("|,|\)|\(|!)/g, "") #punctuation we want to keep
nouns[i].word = nouns[i].word.replace(/'s$/, "")
nouns[i].word = nouns[i].word.replace(/[\.\?,!:;\/\)]*$/, "")
nouns[i].word = nouns[i].word.replace(/^[\(\/]*/g, "")
nouns[i].word = nouns[i].word.replace(/[\(\/\)\\;:,]/g, " ")
nouns[i].word = nouns[i].word.replace(RegExp(" ", "g"), " ")
nouns[i].word = nouns[i].word.replace(/\W*$/, "") #punctuation
nouns[i].word = nouns[i].word.replace(/^\W*/, "")
# nouns[i].word = singularize(nouns[i].word) unless nouns[i].word.match(/^the ./)
nouns[i].word = nouns[i].word.toLowerCase() unless options.case_sensitive
nouns
#rank results
rank = (results) ->
for i of results
results[i].score = 0
results[i].score += results[i].count * 10
results[i].score += 10 if results[i].rule is "capital"
results[i].score += 7 if results[i].rule is "lexicon"
results[i].score -= 4 if results[i].rule is "group_prep"
results= results.sort((a,b)->b.score-a.score)
results
###########
spotter= (str="", style="selective")->
options= set_options("selective");
tags= tagger(str, options)
chunks= chunker(tags, options)
nouns= recognizer(chunks, options)
nouns= cleanup(nouns, options)
nouns= rank(nouns)
# nouns= nouns.filter((noun)-> pass(noun.word) )
return nouns
########
# export for AMD / RequireJS
if typeof define isnt "undefined" and define.amd
define [], ->
spotter
# export for Node.js
else module.exports = spotter if typeof module isnt "undefined" and module.exports
spotter
)()
# console.log spotter("sally walked to the store")