UNPKG

cjk-readings

Version:

Web service that generates readings for chinese characters.

83 lines (66 loc) 2.07 kB
/** * Parts of this file are based on: * - https://github.com/hotoo/pinyin/blob/master/lib/index.js * - https://github.com/hotoo/pinyin/blob/master/lib/pinyin.js * Copyright 闲耘 <hotoo.cn@gmail.com> * Used under MIT License */ import jieba from 'nodejieba' import PINYIN_DICT from 'pinyin/data/dict-zi' import PHRASES_DICT from 'pinyin/data/phrases-dict' import { TextPart } from './types' function segmentIntoPhrases(text: string): string[] { return jieba.cutSmall(text, 4) } function convertCharacter(char: string): TextPart { if (char.length !== 1) { return convertCharacter(char.charAt(0)) } const charCode = char.charCodeAt(0) if (!PINYIN_DICT[charCode]) { return [char, []] } return [char, PINYIN_DICT[charCode].split(',')] } function convertPhrase(phrase: string): TextPart[] { let parts: TextPart[] = [] if (PHRASES_DICT.hasOwnProperty(phrase)) { parts = parts.concat( PHRASES_DICT[phrase].map((readings, i) => [phrase[i], readings]), ) } else { parts = parts.concat(Array.from(phrase).map(convertCharacter)) } return parts } function convertText(text: string): TextPart[] { const phrases = segmentIntoPhrases(text) let parts: TextPart[] = [] let readingless = '' for (let i = 0; i < phrases.length; i++) { const phraseOrCharacter = phrases[i] const firstCharCode = phraseOrCharacter.charCodeAt(0) if (PINYIN_DICT[firstCharCode]) { // ends of characters without readings. if (readingless.length > 0) { parts.push([readingless, []]) readingless = '' } if (phraseOrCharacter.length === 1) { parts = parts.concat(convertCharacter(phraseOrCharacter)) } else { parts = parts.concat(convertPhrase(phraseOrCharacter)) } } else { readingless += phraseOrCharacter } } // finish off with any readingless characters if (readingless.length > 0) { parts.push([readingless, []]) } return parts } export function generatePinyin(text: string): TextPart[] { return convertText(text) }