nlpsum
Version:
Powerful text summarization algorithms from research papers and dedicated research.
129 lines (125 loc) • 2.85 kB
JavaScript
// Generated by CoffeeScript 1.6.3
var chunker, data;
chunker = (function() {
var build;
build = function(before, obj, after) {
var all, i;
all = before;
all.push(obj);
for (i in after) {
all.push(after[i]);
}
return all;
};
chunker = function(data, options) {
var chunked, i;
for (i in data) {
i = parseInt(i);
if (data[i].pos.tag === "NNO") {
data = build(data.slice(0, i + 1), {
word: "",
pos: parts_of_speech["IN"]
}, data.slice(i + 1, data.length));
}
if (data[i].pos.tag === "VBG" && data[i + 2]) {
if (data[i + 1].pos.tag === "IN" && data[i + 2].pos.parent === "glue") {
data[i].word += " " + data[i + 1].word;
data[i + 1].word = null;
continue;
}
}
if (data[i].pos.tag === "VBG") {
if (data[i + 1] && (data[i + 2] == null) && data[i + 1].pos.tag === "IN") {
data[i].word += " " + data[i + 1].word;
data[i + 1].word = null;
}
}
}
if (options.gerund) {
for (i in data) {
if (data[i].pos.tag === "VBG") {
data[i].pos = parts_of_speech["NG"];
}
}
}
chunked = [data[0]];
i = 1;
while (i <= data.length - 1) {
console.log(i);
if (chunked[chunked.length - 1].pos.parent === data[i].pos.parent) {
if (chunked[chunked.length - 1].word && !chunked[chunked.length - 1].word.match(/(,|")/)) {
chunked[chunked.length - 1].word += " " + data[i].word;
continue;
}
}
chunked.push(data[i]);
i++;
}
return chunked;
};
if (typeof define !== "undefined" && define.amd) {
define([], function() {
return chunker;
});
} else {
if (typeof module !== "undefined" && module.exports) {
module.exports = chunker;
}
}
return chunker;
})();
data = [
{
word: "sally",
pos: {
description: "verb, base form",
example: "eat",
parent: "verb",
tag: "VB"
},
clues: [],
rule: "lexicon"
}, {
word: "walked",
pos: {
description: "verb, past tense",
example: "ate",
parent: "verb",
tense: "past",
tag: "VBD"
},
clues: [],
rule: "regex"
}, {
word: "to",
pos: {
description: "to",
example: "to",
parent: "glue",
tag: "TO"
},
clues: [],
rule: "lexicon"
}, {
word: "the",
pos: {
description: "Determiner",
example: "the,some",
parent: "title",
tag: "DT"
},
clues: [],
rule: "lexicon"
}, {
word: "store",
pos: {
description: "Noun, sing. or mass",
example: "dog, rain",
parent: "noun",
tag: "NN"
},
clues: [],
rule: "unknown"
}
];
console.log(chunker(data, {}));