auto-cms-server
Version:
Auto turn any webpage into editable CMS without coding.
277 lines (276 loc) • 9.91 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.langDictParser = exports.Langs = exports.LangFileSuffix = exports.to_en = exports.to_hk = exports.en_to_zh = exports.en_to_ko = exports.en_to_ja = exports.en_to_ar = void 0;
exports.extractWrappedText = extractWrappedText;
exports.loadLangFile = loadLangFile;
exports.translateHTML = translateHTML;
exports.setupEasyNMT = setupEasyNMT;
exports.detectLang = detectLang;
const node_easynmt_1 = require("node-easynmt");
const debug_1 = __importDefault(require("debug"));
const env_1 = require("./env");
const fs_1 = require("fs");
const html_1 = require("./html");
const cast_ts_1 = require("cast.ts");
const task_queue_1 = require("@beenotung/tslib/task/task-queue");
const open_google_translator_1 = require("open-google-translator");
const memorize_1 = require("@beenotung/tslib/memorize");
const emoji_regex_1 = __importDefault(require("emoji-regex"));
let emojiRegex = (0, emoji_regex_1.default)();
let log = (0, debug_1.default)('auto-cms:i18n');
log.enabled = env_1.env.NODE_ENV == 'development';
let googleTranslateQueue = new task_queue_1.TaskQueue();
function gen_from_en(lang) {
let label = `en_to_${lang}:`;
return (0, memorize_1.memorize)(async (en) => {
if (!en.trim())
return en;
log(label, { en });
let result;
try {
let data = await googleTranslateQueue.runTask(() => (0, open_google_translator_1.TranslateLanguageData)({
listOfWordsToTranslate: [en],
fromLanguage: 'en',
toLanguage: lang,
}));
result = data[0].translation;
if (!result)
throw 'empty translate result';
}
catch (error) {
// If Google Translate fails, use EasyNMT as fallback
result = await (0, node_easynmt_1.patchedTranslate)({
text: en,
target_lang: lang,
source_lang: 'en',
cached: false,
});
}
log(label, { [lang]: result });
return result;
});
}
exports.en_to_ar = gen_from_en('ar');
exports.en_to_ja = gen_from_en('ja');
exports.en_to_ko = gen_from_en('ko');
exports.en_to_zh = (0, memorize_1.memorize)(async (en) => {
if (!en.trim())
return en;
log('en_to_zh:', { en });
let zh;
try {
let data = await googleTranslateQueue.runTask(() => (0, open_google_translator_1.TranslateLanguageData)({
listOfWordsToTranslate: [en],
fromLanguage: 'en',
toLanguage: 'zh-cn',
}));
zh = data[0].translation;
if (!zh)
throw 'empty translate result';
}
catch (error) {
zh = await translateIntoSimplified(en);
}
log('en_to_zh:', { zh });
return zh;
});
exports.to_hk = (0, memorize_1.memorize)(async (en, zh) => {
async function translate() {
if (!en.trim()) {
log('to_hk:', { zh });
return translateIntoTraditional(zh);
}
try {
log('to_hk:', { en });
let data = await googleTranslateQueue.runTask(() => (0, open_google_translator_1.TranslateLanguageData)({
listOfWordsToTranslate: [en],
fromLanguage: 'en',
toLanguage: 'zh-tw',
}));
return data[0].translation;
}
catch (error) {
if (!zh.trim()) {
throw error;
}
log('to_hk:', { zh });
return translateIntoTraditional(zh);
}
}
let hk = await translate();
log('to_hk:', { hk });
return hk;
});
exports.to_en = (0, memorize_1.memorize)(async (hk, zh) => {
async function translate() {
if (hk.trim()) {
log('to_en:', { hk });
let data = await googleTranslateQueue.runTask(() => (0, open_google_translator_1.TranslateLanguageData)({
listOfWordsToTranslate: [hk],
fromLanguage: 'zh-tw',
toLanguage: 'en',
}));
return data[0].translation;
}
if (zh.trim()) {
log('to_en:', { zh });
let data = await googleTranslateQueue.runTask(() => (0, open_google_translator_1.TranslateLanguageData)({
listOfWordsToTranslate: [zh],
fromLanguage: 'zh-cn',
toLanguage: 'en',
}));
return data[0].translation;
}
return hk || zh;
}
let en = await translate();
log('to_en:', { en });
return en;
});
// FIXME: investigate error when translating: New Generative Tool For 3D Scenes launch soon!
// FIXME: handle repeated output, e.g. 'YOLOv:' -> 'YOLOV: (YOLOV): (YOLOV): (YOLOV): (YOLOV): (YOLOV): (YOLOV): (YOLOV): (YOLOV): (YOLOV): (YOLOV): (YOLOV): (YOLOV): (YOLOV:) (YOLOV:) (YOLOV): (YOLOV:) (YOLOV:) (YOLOV:) (YOLOV:) (YOLOV:) (YOLOV:) (YOLOV:) (YOLOV:) (YOLOV:) (YOLOV:) (YOLOV:) (YOLOV:) (YOLOV:) (YOLOV:) (YOLOV:) (YOLOV:) (YOL:) (YOLOV:) (YOLOV:) (YOL:) (YOL:) (YOLOV:) (YOL:) (YOLOV:)'
async function translateIntoSimplified(en) {
log('translate:', { en });
let out_text = await (0, node_easynmt_1.patchedTranslate)({
text: en,
target_lang: 'zh',
source_lang: 'en',
cached: false,
});
log('translate:', { out_text });
return out_text;
}
let zhConvertResultParser = (0, cast_ts_1.object)({
code: (0, cast_ts_1.literal)(0),
data: (0, cast_ts_1.object)({
converter: (0, cast_ts_1.enums)([
'Simplified', // 簡體化
'Traditional', // 繁體化
'China', // 中國化
'Hongkong', // 香港化
'Taiwan', // 台灣化
'Pinyin', // 拼音化
'Bopomofo', // 注音化
'Mars', // 火星化
'WikiSimplified', // 維基簡體化
'WikiTraditional', // 維基繁體化
]),
text: (0, cast_ts_1.string)(),
textFormat: (0, cast_ts_1.literal)('PlainText'),
usedModules: (0, cast_ts_1.array)((0, cast_ts_1.string)()),
}),
revisions: (0, cast_ts_1.object)({ build: (0, cast_ts_1.string)(), msg: (0, cast_ts_1.string)() }),
});
// zh_cn -> zh_hk
let zhTaskQueue = new task_queue_1.TaskQueue();
async function translateIntoTraditional(zh_cn) {
if (!zh_cn.trim())
return zh_cn;
// use task queue to avoid overload the external service with concurrent requests
return zhTaskQueue.runTask(() => {
log('translate zh:', { zh_cn });
return fetch('https://api.zhconvert.org/convert', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({ text: zh_cn, converter: 'Traditional' }),
})
.then(res => res.json())
.then(json => zhConvertResultParser.parse(json).data.text)
.then(zh_hk => {
log('translate zh:', { zh_hk });
return zh_hk;
});
});
}
function extractWrappedText(html) {
let matches = html.match(/{{(.*?)}}/gms);
return matches || [];
}
exports.LangFileSuffix = '.json';
exports.Langs = [
{ code: 'en', name: 'English' },
{ code: 'zh_cn', name: 'Simplified Chinese' },
{ code: 'zh_hk', name: 'Traditional Chinese' },
{ code: 'ja', name: 'Japanese' },
{ code: 'ko', name: 'Korean' },
{ code: 'ar', name: 'Arabic' },
];
exports.langDictParser = (0, cast_ts_1.dict)({
key: (0, cast_ts_1.string)(),
value: (0, cast_ts_1.dict)({
key: (0, cast_ts_1.string)({
sampleValues: ['en', 'zh_cn', 'zh_hk', 'ja', 'ko', 'ar'],
}),
value: (0, cast_ts_1.string)(),
}),
});
function loadLangFile(file) {
try {
let text = (0, fs_1.readFileSync)(file).toString();
let json = JSON.parse(text);
return exports.langDictParser.parse(json);
}
catch (error) {
// file not found
return null;
}
}
function translateHTML(options) {
let { html, file, lang } = options;
let dict = loadLangFile(file);
if (!dict)
return html;
let matches = extractWrappedText(html);
for (let key of matches) {
let word = dict[key];
if (!word)
continue;
let text = word[lang];
if (!text)
continue;
html = html.replaceAll(key, (0, html_1.encodeHTML)(text).replaceAll(' ', ' '));
}
return html;
}
async function setupEasyNMT() {
try {
await (0, node_easynmt_1.autoStartServer)({
debug: env_1.env.NODE_ENV == 'development',
});
}
catch (error) {
// i18n module is optional, fine to continue without halt
console.error('Warning: Failed to start EasyNMT docker container for multi language translation.', String(error));
}
}
function detectLang(text) {
for (let char of text) {
let code = char.charCodeAt(0);
// Arabic character range
if (code >= 0x0600 && code <= 0x06ff && !emojiRegex.test(char)) {
return 'ar';
}
// Japanese Hiragana range
if (code >= 0x3040 && code <= 0x309f && !emojiRegex.test(char)) {
return 'ja';
}
// Japanese Katakana range
if (code >= 0x30a0 && code <= 0x30ff && !emojiRegex.test(char)) {
return 'ja';
}
// Korean Hangul range
if (code >= 0xac00 && code <= 0xd7af && !emojiRegex.test(char)) {
return 'ko';
}
// Chinese character range (includes Kanji used in Japanese)
if (code >= 20000 && !emojiRegex.test(char)) {
return 'zh';
}
}
return 'en';
}