UNPKG

novel-segment

Version:

Chinese word segmentation 簡繁中文分词模块 以網路小說為樣本

51 lines 1.45 kB
'use strict'; Object.defineProperty(exports, "__esModule", { value: true }); exports.type = exports.init = exports.ZhuyinTokenizer = void 0; const mod_1 = require("../mod"); /** * 注音 */ class ZhuyinTokenizer extends mod_1.SubSModuleTokenizer { constructor() { super(...arguments); this.name = 'ZhuyinTokenizer'; } _cache(...argv) { super._cache(...argv); } split(words) { return this._splitUnset(words, this.splitZhuyin); } splitZhuyin(text, cur) { let ret = []; let self = this; let _r = /[\u31A0-\u31BA\u3105-\u312E]/u; if (!_r.test(text)) { return null; } text .split(/([\u31A0-\u31BA\u3105-\u312E]+)/u) .forEach(function (w, i) { if (w !== '') { if (_r.test(w)) { ret.push(self.debugToken({ w, }, { [self.name]: true, }, true)); } else { ret.push({ w, }); } } }); return ret.length ? ret : null; } } exports.ZhuyinTokenizer = ZhuyinTokenizer; exports.init = ZhuyinTokenizer.init.bind(ZhuyinTokenizer); exports.type = ZhuyinTokenizer.type; exports.default = ZhuyinTokenizer; //# sourceMappingURL=ZhuyinTokenizer.js.map