cjk-readings
Version:
Web service that generates readings for chinese characters.
83 lines (66 loc) • 2.07 kB
text/typescript
/**
* Parts of this file are based on:
* - https://github.com/hotoo/pinyin/blob/master/lib/index.js
* - https://github.com/hotoo/pinyin/blob/master/lib/pinyin.js
* Copyright 闲耘 <hotoo.cn@gmail.com>
* Used under MIT License
*/
import jieba from 'nodejieba'
import PINYIN_DICT from 'pinyin/data/dict-zi'
import PHRASES_DICT from 'pinyin/data/phrases-dict'
import { TextPart } from './types'
function segmentIntoPhrases(text: string): string[] {
return jieba.cutSmall(text, 4)
}
function convertCharacter(char: string): TextPart {
if (char.length !== 1) {
return convertCharacter(char.charAt(0))
}
const charCode = char.charCodeAt(0)
if (!PINYIN_DICT[charCode]) {
return [char, []]
}
return [char, PINYIN_DICT[charCode].split(',')]
}
function convertPhrase(phrase: string): TextPart[] {
let parts: TextPart[] = []
if (PHRASES_DICT.hasOwnProperty(phrase)) {
parts = parts.concat(
PHRASES_DICT[phrase].map((readings, i) => [phrase[i], readings]),
)
} else {
parts = parts.concat(Array.from(phrase).map(convertCharacter))
}
return parts
}
function convertText(text: string): TextPart[] {
const phrases = segmentIntoPhrases(text)
let parts: TextPart[] = []
let readingless = ''
for (let i = 0; i < phrases.length; i++) {
const phraseOrCharacter = phrases[i]
const firstCharCode = phraseOrCharacter.charCodeAt(0)
if (PINYIN_DICT[firstCharCode]) {
// ends of characters without readings.
if (readingless.length > 0) {
parts.push([readingless, []])
readingless = ''
}
if (phraseOrCharacter.length === 1) {
parts = parts.concat(convertCharacter(phraseOrCharacter))
} else {
parts = parts.concat(convertPhrase(phraseOrCharacter))
}
} else {
readingless += phraseOrCharacter
}
}
// finish off with any readingless characters
if (readingless.length > 0) {
parts.push([readingless, []])
}
return parts
}
export function generatePinyin(text: string): TextPart[] {
return convertText(text)
}