penn-treebank-sample
Version:
a non-commercial, fair-use subset of the penn-treebank, in JSON
35 lines (34 loc) • 737 B
JavaScript
//a (opinionated, reductive) mapping of penn-tagset to the compromise tagset
module.exports = {
CC: 'Conjunction',
CD: 'Cardinal',
DT: 'Determiner',
FW: 'Expression',
IN: 'Preposition',
JJ: 'Adjective',
JJR: 'Comparative',
JJS: 'Superlative',
MD: 'Verb',
NN: 'Noun',
NNS: 'Noun',
NNP: 'Noun',
NNPS: 'Noun',
POS: 'Possessive',
PRP: 'Pronoun',
PRP$: 'Pronoun',
RB: 'Adverb',
RBR: 'Comparative',
RBS: 'Superlative',
TO: 'Conjunction',
UH: 'Expression',
VB: 'Verb',
VBD: 'Verb',
VBG: 'Verb',
VBN: 'Verb', // past participle
VBP: 'Verb', // non-3rd person singular present
VBZ: 'Verb', // 3rd person singular present
WDT: 'Determiner',
WP: 'Pronoun',
WP$: 'Noun',
WRB: 'Adverb'
}