UNPKG

auto-cms-server

Version:

Auto turn any webpage into editable CMS without coding.

277 lines (276 loc) 9.91 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.langDictParser = exports.Langs = exports.LangFileSuffix = exports.to_en = exports.to_hk = exports.en_to_zh = exports.en_to_ko = exports.en_to_ja = exports.en_to_ar = void 0; exports.extractWrappedText = extractWrappedText; exports.loadLangFile = loadLangFile; exports.translateHTML = translateHTML; exports.setupEasyNMT = setupEasyNMT; exports.detectLang = detectLang; const node_easynmt_1 = require("node-easynmt"); const debug_1 = __importDefault(require("debug")); const env_1 = require("./env"); const fs_1 = require("fs"); const html_1 = require("./html"); const cast_ts_1 = require("cast.ts"); const task_queue_1 = require("@beenotung/tslib/task/task-queue"); const open_google_translator_1 = require("open-google-translator"); const memorize_1 = require("@beenotung/tslib/memorize"); const emoji_regex_1 = __importDefault(require("emoji-regex")); let emojiRegex = (0, emoji_regex_1.default)(); let log = (0, debug_1.default)('auto-cms:i18n'); log.enabled = env_1.env.NODE_ENV == 'development'; let googleTranslateQueue = new task_queue_1.TaskQueue(); function gen_from_en(lang) { let label = `en_to_${lang}:`; return (0, memorize_1.memorize)(async (en) => { if (!en.trim()) return en; log(label, { en }); let result; try { let data = await googleTranslateQueue.runTask(() => (0, open_google_translator_1.TranslateLanguageData)({ listOfWordsToTranslate: [en], fromLanguage: 'en', toLanguage: lang, })); result = data[0].translation; if (!result) throw 'empty translate result'; } catch (error) { // If Google Translate fails, use EasyNMT as fallback result = await (0, node_easynmt_1.patchedTranslate)({ text: en, target_lang: lang, source_lang: 'en', cached: false, }); } log(label, { [lang]: result }); return result; }); } exports.en_to_ar = gen_from_en('ar'); exports.en_to_ja = gen_from_en('ja'); exports.en_to_ko = gen_from_en('ko'); exports.en_to_zh = (0, memorize_1.memorize)(async (en) => { if (!en.trim()) return en; log('en_to_zh:', { en }); let zh; try { let data = await googleTranslateQueue.runTask(() => (0, open_google_translator_1.TranslateLanguageData)({ listOfWordsToTranslate: [en], fromLanguage: 'en', toLanguage: 'zh-cn', })); zh = data[0].translation; if (!zh) throw 'empty translate result'; } catch (error) { zh = await translateIntoSimplified(en); } log('en_to_zh:', { zh }); return zh; }); exports.to_hk = (0, memorize_1.memorize)(async (en, zh) => { async function translate() { if (!en.trim()) { log('to_hk:', { zh }); return translateIntoTraditional(zh); } try { log('to_hk:', { en }); let data = await googleTranslateQueue.runTask(() => (0, open_google_translator_1.TranslateLanguageData)({ listOfWordsToTranslate: [en], fromLanguage: 'en', toLanguage: 'zh-tw', })); return data[0].translation; } catch (error) { if (!zh.trim()) { throw error; } log('to_hk:', { zh }); return translateIntoTraditional(zh); } } let hk = await translate(); log('to_hk:', { hk }); return hk; }); exports.to_en = (0, memorize_1.memorize)(async (hk, zh) => { async function translate() { if (hk.trim()) { log('to_en:', { hk }); let data = await googleTranslateQueue.runTask(() => (0, open_google_translator_1.TranslateLanguageData)({ listOfWordsToTranslate: [hk], fromLanguage: 'zh-tw', toLanguage: 'en', })); return data[0].translation; } if (zh.trim()) { log('to_en:', { zh }); let data = await googleTranslateQueue.runTask(() => (0, open_google_translator_1.TranslateLanguageData)({ listOfWordsToTranslate: [zh], fromLanguage: 'zh-cn', toLanguage: 'en', })); return data[0].translation; } return hk || zh; } let en = await translate(); log('to_en:', { en }); return en; }); // FIXME: investigate error when translating: New Generative Tool For 3D Scenes launch soon! // FIXME: handle repeated output, e.g. 'YOLOv:' -> 'YOLOV: (YOLOV): (YOLOV): (YOLOV): (YOLOV): (YOLOV): (YOLOV): (YOLOV): (YOLOV): (YOLOV): (YOLOV): (YOLOV): (YOLOV): (YOLOV:) (YOLOV:) (YOLOV): (YOLOV:) (YOLOV:) (YOLOV:) (YOLOV:) (YOLOV:) (YOLOV:) (YOLOV:) (YOLOV:) (YOLOV:) (YOLOV:) (YOLOV:) (YOLOV:) (YOLOV:) (YOLOV:) (YOLOV:) (YOLOV:) (YOL:) (YOLOV:) (YOLOV:) (YOL:) (YOL:) (YOLOV:) (YOL:) (YOLOV:)' async function translateIntoSimplified(en) { log('translate:', { en }); let out_text = await (0, node_easynmt_1.patchedTranslate)({ text: en, target_lang: 'zh', source_lang: 'en', cached: false, }); log('translate:', { out_text }); return out_text; } let zhConvertResultParser = (0, cast_ts_1.object)({ code: (0, cast_ts_1.literal)(0), data: (0, cast_ts_1.object)({ converter: (0, cast_ts_1.enums)([ 'Simplified', // 簡體化 'Traditional', // 繁體化 'China', // 中國化 'Hongkong', // 香港化 'Taiwan', // 台灣化 'Pinyin', // 拼音化 'Bopomofo', // 注音化 'Mars', // 火星化 'WikiSimplified', // 維基簡體化 'WikiTraditional', // 維基繁體化 ]), text: (0, cast_ts_1.string)(), textFormat: (0, cast_ts_1.literal)('PlainText'), usedModules: (0, cast_ts_1.array)((0, cast_ts_1.string)()), }), revisions: (0, cast_ts_1.object)({ build: (0, cast_ts_1.string)(), msg: (0, cast_ts_1.string)() }), }); // zh_cn -> zh_hk let zhTaskQueue = new task_queue_1.TaskQueue(); async function translateIntoTraditional(zh_cn) { if (!zh_cn.trim()) return zh_cn; // use task queue to avoid overload the external service with concurrent requests return zhTaskQueue.runTask(() => { log('translate zh:', { zh_cn }); return fetch('https://api.zhconvert.org/convert', { method: 'POST', headers: { 'Content-Type': 'application/json', }, body: JSON.stringify({ text: zh_cn, converter: 'Traditional' }), }) .then(res => res.json()) .then(json => zhConvertResultParser.parse(json).data.text) .then(zh_hk => { log('translate zh:', { zh_hk }); return zh_hk; }); }); } function extractWrappedText(html) { let matches = html.match(/{{(.*?)}}/gms); return matches || []; } exports.LangFileSuffix = '.json'; exports.Langs = [ { code: 'en', name: 'English' }, { code: 'zh_cn', name: 'Simplified Chinese' }, { code: 'zh_hk', name: 'Traditional Chinese' }, { code: 'ja', name: 'Japanese' }, { code: 'ko', name: 'Korean' }, { code: 'ar', name: 'Arabic' }, ]; exports.langDictParser = (0, cast_ts_1.dict)({ key: (0, cast_ts_1.string)(), value: (0, cast_ts_1.dict)({ key: (0, cast_ts_1.string)({ sampleValues: ['en', 'zh_cn', 'zh_hk', 'ja', 'ko', 'ar'], }), value: (0, cast_ts_1.string)(), }), }); function loadLangFile(file) { try { let text = (0, fs_1.readFileSync)(file).toString(); let json = JSON.parse(text); return exports.langDictParser.parse(json); } catch (error) { // file not found return null; } } function translateHTML(options) { let { html, file, lang } = options; let dict = loadLangFile(file); if (!dict) return html; let matches = extractWrappedText(html); for (let key of matches) { let word = dict[key]; if (!word) continue; let text = word[lang]; if (!text) continue; html = html.replaceAll(key, (0, html_1.encodeHTML)(text).replaceAll('&nbsp;', ' ')); } return html; } async function setupEasyNMT() { try { await (0, node_easynmt_1.autoStartServer)({ debug: env_1.env.NODE_ENV == 'development', }); } catch (error) { // i18n module is optional, fine to continue without halt console.error('Warning: Failed to start EasyNMT docker container for multi language translation.', String(error)); } } function detectLang(text) { for (let char of text) { let code = char.charCodeAt(0); // Arabic character range if (code >= 0x0600 && code <= 0x06ff && !emojiRegex.test(char)) { return 'ar'; } // Japanese Hiragana range if (code >= 0x3040 && code <= 0x309f && !emojiRegex.test(char)) { return 'ja'; } // Japanese Katakana range if (code >= 0x30a0 && code <= 0x30ff && !emojiRegex.test(char)) { return 'ja'; } // Korean Hangul range if (code >= 0xac00 && code <= 0xd7af && !emojiRegex.test(char)) { return 'ko'; } // Chinese character range (includes Kanji used in Japanese) if (code >= 20000 && !emojiRegex.test(char)) { return 'zh'; } } return 'en'; }