novel-segment
Version:
Chinese word segmentation 簡繁中文分词模块 以網路小說為樣本
95 lines (94 loc) • 2.71 kB
TypeScript
/**
* Created by user on 2018/4/16/016.
*/
import { EnumWrapper } from "ts-enum-util";
import { Optimizer, SubSModuleOptimizer, ISubOptimizer, ISubOptimizerCreate } from './Optimizer';
import { Tokenizer, SubSModuleTokenizer, ISubTokenizer, ISubTokenizerCreate } from './Tokenizer';
import { SubSModule, ISubSModule, ISubSModuleCreate, ISubSModuleMethod } from './mod';
export { Optimizer, SubSModuleOptimizer, ISubOptimizer, ISubOptimizerCreate };
export { Tokenizer, SubSModuleTokenizer, ISubTokenizer, ISubTokenizerCreate };
export { SubSModule, ISubSModule, ISubSModuleCreate, ISubSModuleMethod };
/**
* 识别模块
* 强制分割类单词识别
*/
export declare enum ENUM_SUBMODS {
/**
* URL识别
*/
URLTokenizer = "URLTokenizer",
/**
* 通配符,必须在标点符号识别之前
*/
WildcardTokenizer = "WildcardTokenizer",
/**
* 标点符号识别
*/
PunctuationTokenizer = "PunctuationTokenizer",
/**
* 外文字符、数字识别,必须在标点符号识别之后
*/
ForeignTokenizer = "ForeignTokenizer",
/**
* 词典识别
*/
DictTokenizer = "DictTokenizer",
/**
* 人名识别,建议在词典识别之后
*/
ChsNameTokenizer = "ChsNameTokenizer",
JpSimpleTokenizer = "JpSimpleTokenizer",
/**
* 注音
*/
ZhuyinTokenizer = "ZhuyinTokenizer",
/**
* 部首
*/
/**
* 邮箱地址识别
*/
EmailOptimizer = "EmailOptimizer",
/**
* 人名识别优化
*/
ChsNameOptimizer = "ChsNameOptimizer",
/**
* 词典识别优化
*/
DictOptimizer = "DictOptimizer",
/**
* 日期时间识别优化
*/
DatetimeOptimizer = "DatetimeOptimizer",
/**
* 合併外文與中文的詞
* 例如 T恤
*/
ForeignOptimizer = "ForeignOptimizer",
/**
* 自動處理 `里|裏|后`
*/
ZhtSynonymOptimizer = "ZhtSynonymOptimizer",
AdjectiveOptimizer = "AdjectiveOptimizer"
}
/**
* 不包含在預設模組列表內 需要手動指定
*/
export declare enum ENUM_SUBMODS_OTHER {
/**
* 单字切分模块
*/
SingleTokenizer = "SingleTokenizer"
}
export declare type ENUM_SUBMODS_NAME = ENUM_SUBMODS | ENUM_SUBMODS_OTHER;
export declare const LIST_SUBMODS_NOT_DEF: ENUM_SUBMODS[];
export declare const SUBMODS_LIST: EnumWrapper<string, typeof ENUM_SUBMODS>;
export declare const SUBMODS_OTHER_LIST: EnumWrapper<string, typeof ENUM_SUBMODS_OTHER>;
/**
* 取得列表並且保持 ENUM 順序
* @param {boolean} all
* @returns {ENUM_SUBMODS[]}
*/
export declare function getDefault(all?: boolean): ENUM_SUBMODS[];
export default getDefault;