@awesome-fe/translate
Version:
Translation utils
165 lines • 6.71 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.splitSubtitles = exports.splitTimelineBySentence = exports.MAX_VISUAL_LENGTH = exports.mergeTimelineBySentence = exports.SubtitleTranslator = void 0;
const abstract_translator_1 = require("./abstract-translator");
const subtitle_1 = require("../dom/subtitle/subtitle");
const common_1 = require("../dom/common");
class SubtitleTranslator extends abstract_translator_1.AbstractTranslator {
parse(text) {
return subtitle_1.subtitle.parse(text);
}
serialize(doc) {
return subtitle_1.subtitle.stringify(doc);
}
translateDoc(doc, options) {
// 建立文本到时间轴的映射
const wholeSentences = mergeTimelineBySentence(doc.items);
// 翻译文本
this.translateWholeSentences(wholeSentences).then((wholeSentences) => {
// 将翻译结果映射回时间轴,并根据标点进行适当的拆分
doc.items = wholeSentences.map((wholeSentence) => {
return {
cue: '',
startTime: wholeSentence.startTime,
endTime: wholeSentence.endTime,
text: mergeWholeSentence(wholeSentence),
};
});
doc.meta.language = 'zh-Hans';
});
return doc;
}
translateWholeSentences(wholeSentences) {
return Promise.all(wholeSentences.map((wholeSentence) => {
return this.translateSentence(wholeSentence.original, wholeSentence.translation, 'plain').then((result) => {
wholeSentence.translation = result;
});
})).then(() => wholeSentences);
}
}
exports.SubtitleTranslator = SubtitleTranslator;
function mergeWholeSentence(wholeSentence) {
if (wholeSentence.original.trim() === wholeSentence.translation.trim()) {
return wholeSentence.original;
}
else {
return `${wholeSentence.original}\n${wholeSentence.translation}`;
}
}
function splitTranslation(text) {
const lines = text.split('\n');
const firstChineseLineIndex = lines.findIndex((line) => (0, common_1.containsChinese)(line));
if (firstChineseLineIndex === -1) {
return { original: text.replace(/\n/g, ' '), translation: '' };
}
else {
const original = lines.slice(0, firstChineseLineIndex).join(' ');
const translation = lines.slice(firstChineseLineIndex).join(' ');
return { original, translation };
}
}
function mergeTimelineBySentence(items) {
const wholeSentences = [];
// 根据句号等标点合并文本,同时合并时间轴
let text = '';
let startTime = items[0].startTime ?? 0;
let originalItems = [];
for (let i = 0; i < items.length; i++) {
const item = items[i];
originalItems.push(item);
text += item.text + ' ';
if (/[.!?]$/.test(item.text) || i === items.length - 1) {
const { original, translation } = splitTranslation(text.trim());
wholeSentences.push({ original, translation, startTime, endTime: item.endTime, items: originalItems });
startTime = item.endTime;
originalItems = [];
text = '';
}
}
return wholeSentences;
}
exports.mergeTimelineBySentence = mergeTimelineBySentence;
// 求字符串的视觉长度,中文算两个字符,英文算一个字符
function visualLengthOf(text) {
return text.split('').map(it => (0, common_1.containsChinese)(it) ? 2 : 1).reduce((a, b) => a + b, 0);
}
function splitChineseSentence(translation, maxVisualLength) {
const fragments = translation.split(/(?<=[,。!?;])/);
const result = [];
let text = '';
// 给定字符处是否可以断开
function canBreakAt(char) {
return (0, common_1.containsChinese)(char);
}
function splitLongSentence(text) {
const result = [];
let prevPos = 0;
let i = 0;
while (i <= text.length) {
const currentText = text.slice(prevPos, i);
if (visualLengthOf(currentText) > maxVisualLength && canBreakAt(text[i]) || i === text.length) {
result.push(currentText);
prevPos = i;
}
++i;
}
return result;
}
for (let i = 0; i < fragments.length; i++) {
text += fragments[i];
const thisFragmentExceeded = visualLengthOf(text) > maxVisualLength;
const nextFragmentExceeded = visualLengthOf(text + (fragments[i + 1] ?? '')) > maxVisualLength;
const isLastFragment = i === fragments.length - 1;
if (thisFragmentExceeded) {
const subFragments = splitLongSentence(text);
result.push(subFragments.join('\n'));
text = '';
}
else if (nextFragmentExceeded || isLastFragment) {
result.push(text.trim());
text = '';
}
}
return result;
}
function splitTimeline(wholeSentence, maxVisualLength) {
const result = [];
const fragments = splitChineseSentence(wholeSentence.translation, maxVisualLength);
const unitDuration = (wholeSentence.endTime - wholeSentence.startTime) / visualLengthOf(wholeSentence.translation);
let startTime = wholeSentence.startTime;
for (let text of fragments) {
// 按视觉长度估算本段文本的实际长度
const currentDuration = Math.round(visualLengthOf(text) * unitDuration);
result.push({
startTime: startTime,
endTime: startTime + currentDuration,
text,
cue: '',
});
startTime += currentDuration;
}
return result;
}
exports.MAX_VISUAL_LENGTH = 36;
function splitTimelineBySentence(wholeSentences, maxVisualLength = exports.MAX_VISUAL_LENGTH) {
return wholeSentences.flatMap((wholeSentence) => splitTimeline(wholeSentence, maxVisualLength));
}
exports.splitTimelineBySentence = splitTimelineBySentence;
function splitSubtitles(content, maxVisualLength = exports.MAX_VISUAL_LENGTH) {
const dom = subtitle_1.subtitle.parse(content);
const wholeSentences = dom.items.map(it => {
const [original, translation] = it.text.split(/\r?\n/);
return ({
startTime: it.startTime,
endTime: it.endTime,
cue: it.cue,
original,
translation,
items: [],
});
});
const items = wholeSentences.flatMap((wholeSentence) => splitTimeline(wholeSentence, maxVisualLength));
return subtitle_1.subtitle.stringify({ meta: dom.meta, items });
}
exports.splitSubtitles = splitSubtitles;
//# sourceMappingURL=subtitle-translator.js.map