novel-segment
Version:
Chinese word segmentation 簡繁中文分词模块 以網路小說為樣本
65 lines • 2.36 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.type = exports.init = exports.AdjectiveOptimizer = void 0;
const mod_1 = require("../mod");
const COLORS_1 = require("../mod/COLORS");
/**
* 把一些错认为名词的词标注为形容词,或者对名词作定语的情况
*/
class AdjectiveOptimizer extends mod_1.SubSModuleOptimizer {
constructor() {
super(...arguments);
this.name = 'AdjectiveOptimizer';
}
doOptimize(words) {
const POSTAG = this._POSTAG;
let index = 0;
while (index < words.length) {
const word = words[index];
const nextword = words[index + 1];
if (nextword) {
// 对于<颜色>+<的>,直接判断颜色是形容词(字典里颜色都是名词)
if (nextword.p & POSTAG.D_U && COLORS_1.COLOR_ALL[word.w]) {
word.op = word.op || word.p;
word.p |= POSTAG.D_A;
this.debugToken(word, {
[this.name]: true,
});
}
// 如果是连续的两个名词,前一个是颜色,那这个颜色也是形容词
if (word.p & POSTAG.D_N && this.isNominal(nextword.p) && COLORS_1.COLOR_ALL[word.w]) {
word.op = word.op || word.p;
word.p |= POSTAG.D_A;
word.p |= POSTAG.D_N;
this.debugToken(word, {
[this.name]: true,
});
}
}
// 移到下一个单词
index += 1;
}
return words;
}
isNominal(pos) {
/*
if (Array.isArray(pos))
{
return this.isNominal(pos[0]);
}
*/
const POSTAG = this._POSTAG;
return (pos === POSTAG.D_N ||
pos === POSTAG.A_NT ||
pos === POSTAG.A_NX ||
pos === POSTAG.A_NZ ||
pos === POSTAG.A_NR ||
pos === POSTAG.A_NS ||
pos === POSTAG.URL);
}
}
exports.AdjectiveOptimizer = AdjectiveOptimizer;
exports.init = AdjectiveOptimizer.init.bind(AdjectiveOptimizer);
exports.type = AdjectiveOptimizer.type;
exports.default = AdjectiveOptimizer;
//# sourceMappingURL=AdjectiveOptimizer.js.map