UNPKG

@awesome-fe/translate

Version:
165 lines 6.71 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.splitSubtitles = exports.splitTimelineBySentence = exports.MAX_VISUAL_LENGTH = exports.mergeTimelineBySentence = exports.SubtitleTranslator = void 0; const abstract_translator_1 = require("./abstract-translator"); const subtitle_1 = require("../dom/subtitle/subtitle"); const common_1 = require("../dom/common"); class SubtitleTranslator extends abstract_translator_1.AbstractTranslator { parse(text) { return subtitle_1.subtitle.parse(text); } serialize(doc) { return subtitle_1.subtitle.stringify(doc); } translateDoc(doc, options) { // 建立文本到时间轴的映射 const wholeSentences = mergeTimelineBySentence(doc.items); // 翻译文本 this.translateWholeSentences(wholeSentences).then((wholeSentences) => { // 将翻译结果映射回时间轴,并根据标点进行适当的拆分 doc.items = wholeSentences.map((wholeSentence) => { return { cue: '', startTime: wholeSentence.startTime, endTime: wholeSentence.endTime, text: mergeWholeSentence(wholeSentence), }; }); doc.meta.language = 'zh-Hans'; }); return doc; } translateWholeSentences(wholeSentences) { return Promise.all(wholeSentences.map((wholeSentence) => { return this.translateSentence(wholeSentence.original, wholeSentence.translation, 'plain').then((result) => { wholeSentence.translation = result; }); })).then(() => wholeSentences); } } exports.SubtitleTranslator = SubtitleTranslator; function mergeWholeSentence(wholeSentence) { if (wholeSentence.original.trim() === wholeSentence.translation.trim()) { return wholeSentence.original; } else { return `${wholeSentence.original}\n${wholeSentence.translation}`; } } function splitTranslation(text) { const lines = text.split('\n'); const firstChineseLineIndex = lines.findIndex((line) => (0, common_1.containsChinese)(line)); if (firstChineseLineIndex === -1) { return { original: text.replace(/\n/g, ' '), translation: '' }; } else { const original = lines.slice(0, firstChineseLineIndex).join(' '); const translation = lines.slice(firstChineseLineIndex).join(' '); return { original, translation }; } } function mergeTimelineBySentence(items) { const wholeSentences = []; // 根据句号等标点合并文本,同时合并时间轴 let text = ''; let startTime = items[0].startTime ?? 0; let originalItems = []; for (let i = 0; i < items.length; i++) { const item = items[i]; originalItems.push(item); text += item.text + ' '; if (/[.!?]$/.test(item.text) || i === items.length - 1) { const { original, translation } = splitTranslation(text.trim()); wholeSentences.push({ original, translation, startTime, endTime: item.endTime, items: originalItems }); startTime = item.endTime; originalItems = []; text = ''; } } return wholeSentences; } exports.mergeTimelineBySentence = mergeTimelineBySentence; // 求字符串的视觉长度,中文算两个字符,英文算一个字符 function visualLengthOf(text) { return text.split('').map(it => (0, common_1.containsChinese)(it) ? 2 : 1).reduce((a, b) => a + b, 0); } function splitChineseSentence(translation, maxVisualLength) { const fragments = translation.split(/(?<=[,。!?;])/); const result = []; let text = ''; // 给定字符处是否可以断开 function canBreakAt(char) { return (0, common_1.containsChinese)(char); } function splitLongSentence(text) { const result = []; let prevPos = 0; let i = 0; while (i <= text.length) { const currentText = text.slice(prevPos, i); if (visualLengthOf(currentText) > maxVisualLength && canBreakAt(text[i]) || i === text.length) { result.push(currentText); prevPos = i; } ++i; } return result; } for (let i = 0; i < fragments.length; i++) { text += fragments[i]; const thisFragmentExceeded = visualLengthOf(text) > maxVisualLength; const nextFragmentExceeded = visualLengthOf(text + (fragments[i + 1] ?? '')) > maxVisualLength; const isLastFragment = i === fragments.length - 1; if (thisFragmentExceeded) { const subFragments = splitLongSentence(text); result.push(subFragments.join('\n')); text = ''; } else if (nextFragmentExceeded || isLastFragment) { result.push(text.trim()); text = ''; } } return result; } function splitTimeline(wholeSentence, maxVisualLength) { const result = []; const fragments = splitChineseSentence(wholeSentence.translation, maxVisualLength); const unitDuration = (wholeSentence.endTime - wholeSentence.startTime) / visualLengthOf(wholeSentence.translation); let startTime = wholeSentence.startTime; for (let text of fragments) { // 按视觉长度估算本段文本的实际长度 const currentDuration = Math.round(visualLengthOf(text) * unitDuration); result.push({ startTime: startTime, endTime: startTime + currentDuration, text, cue: '', }); startTime += currentDuration; } return result; } exports.MAX_VISUAL_LENGTH = 36; function splitTimelineBySentence(wholeSentences, maxVisualLength = exports.MAX_VISUAL_LENGTH) { return wholeSentences.flatMap((wholeSentence) => splitTimeline(wholeSentence, maxVisualLength)); } exports.splitTimelineBySentence = splitTimelineBySentence; function splitSubtitles(content, maxVisualLength = exports.MAX_VISUAL_LENGTH) { const dom = subtitle_1.subtitle.parse(content); const wholeSentences = dom.items.map(it => { const [original, translation] = it.text.split(/\r?\n/); return ({ startTime: it.startTime, endTime: it.endTime, cue: it.cue, original, translation, items: [], }); }); const items = wholeSentences.flatMap((wholeSentence) => splitTimeline(wholeSentence, maxVisualLength)); return subtitle_1.subtitle.stringify({ meta: dom.meta, items }); } exports.splitSubtitles = splitSubtitles; //# sourceMappingURL=subtitle-translator.js.map