hanzi-tools
Version:
Converts from Chinese characters to pinyin, between simplified and traditional, and does word segmentation.
18 lines (16 loc) • 526 B
JavaScript
let nodejieba = require("nodejieba"),
{ isCharacterText } = require("./util");
function tag(text) {
let tokens = nodejieba.tag(text);
let outTokens = [];
for (let { word, tag } of tokens) {
if (word.length > 1 && (tag === "x" || (tag === "n" && word.includes("吗")))) {
for (let char of word) {
outTokens.push(nodejieba.tag(char)[0]);
}
}
else outTokens.push({ word, tag });
}
return outTokens;
}
module.exports = tag;