@awesome-fe/translate
Version:
Translation utils
178 lines • 7.77 kB
JavaScript
;
var __extends = (this && this.__extends) || (function () {
var extendStatics = function (d, b) {
extendStatics = Object.setPrototypeOf ||
({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) ||
function (d, b) { for (var p in b) if (Object.prototype.hasOwnProperty.call(b, p)) d[p] = b[p]; };
return extendStatics(d, b);
};
return function (d, b) {
if (typeof b !== "function" && b !== null)
throw new TypeError("Class extends value " + String(b) + " is not a constructor or null");
extendStatics(d, b);
function __() { this.constructor = d; }
d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __());
};
})();
Object.defineProperty(exports, "__esModule", { value: true });
exports.splitSubtitles = exports.splitTimelineBySentence = exports.mergeTimelineBySentence = exports.SubtitleTranslator = void 0;
var abstract_translator_1 = require("./abstract-translator");
var subtitle_1 = require("../dom/subtitle/subtitle");
var common_1 = require("../dom/common");
var SubtitleTranslator = /** @class */ (function (_super) {
__extends(SubtitleTranslator, _super);
function SubtitleTranslator() {
return _super !== null && _super.apply(this, arguments) || this;
}
SubtitleTranslator.prototype.parse = function (text) {
return subtitle_1.subtitle.parse(text);
};
SubtitleTranslator.prototype.serialize = function (doc) {
return subtitle_1.subtitle.stringify(doc);
};
SubtitleTranslator.prototype.translateDoc = function (doc, options) {
// 建立文本到时间轴的映射
var wholeSentences = mergeTimelineBySentence(doc.items);
// 翻译文本
this.translateWholeSentences(wholeSentences).then(function (wholeSentences) {
// 将翻译结果映射回时间轴,并根据标点进行适当的拆分
doc.items = wholeSentences.map(function (wholeSentence) {
return {
cue: '',
startTime: wholeSentence.startTime,
endTime: wholeSentence.endTime,
text: mergeWholeSentence(wholeSentence),
};
});
doc.meta.language = 'zh-Hans';
});
return doc;
};
SubtitleTranslator.prototype.translateWholeSentences = function (wholeSentences) {
var _this = this;
return Promise.all(wholeSentences.map(function (wholeSentence) {
return _this.translateSentence(wholeSentence.original.replace(/(\r?\n|\xa0)/g, ' '), 'plain').then(function (translation) {
wholeSentence.translation = translation;
});
})).then(function () { return wholeSentences; });
};
return SubtitleTranslator;
}(abstract_translator_1.AbstractTranslator));
exports.SubtitleTranslator = SubtitleTranslator;
function mergeWholeSentence(wholeSentence) {
if (wholeSentence.original.trim() === wholeSentence.translation.trim()) {
return wholeSentence.original;
}
else {
return "".concat(wholeSentence.original.replace(/\r?\n/g, ' '), "\n").concat(wholeSentence.translation);
}
}
function mergeTimelineBySentence(items) {
var _a;
var wholeSentences = [];
// 根据句号等标点合并文本,同时合并时间轴
var text = '';
var startTime = (_a = items[0].startTime) !== null && _a !== void 0 ? _a : 0;
var originalItems = [];
for (var i = 0; i < items.length; i++) {
var item = items[i];
originalItems.push(item);
text += item.text + ' ';
if (/[.!]$/.test(item.text) || i === items.length - 1) {
wholeSentences.push({ original: text.trim(), translation: '', startTime: startTime, endTime: item.endTime, items: originalItems });
text = '';
startTime = item.endTime;
originalItems = [];
}
}
return wholeSentences;
}
exports.mergeTimelineBySentence = mergeTimelineBySentence;
// 求字符串的视觉长度,中文算两个字符,英文算一个字符
function visualLengthOf(text) {
return text.split('').map(function (it) { return (0, common_1.containsChinese)(it) ? 2 : 1; }).reduce(function (a, b) { return a + b; }, 0);
}
function splitChineseSentence(translation, maxVisualLength) {
var _a;
var fragments = translation.split(/(?<=[,。!?;])/);
var result = [];
var text = '';
// 给定字符处是否可以断开
function canBreakAt(char) {
return (0, common_1.containsChinese)(char);
}
function splitLongSentence(text) {
var result = [];
var prevPos = 0;
var i = 0;
while (i <= text.length) {
var currentText = text.slice(prevPos, i);
if (visualLengthOf(currentText) > maxVisualLength && canBreakAt(text[i]) || i === text.length) {
result.push(currentText);
prevPos = i;
}
++i;
}
return result;
}
for (var i = 0; i < fragments.length; i++) {
text += fragments[i];
var thisFragmentExceeded = visualLengthOf(text) > maxVisualLength;
var nextFragmentExceeded = visualLengthOf(text + ((_a = fragments[i + 1]) !== null && _a !== void 0 ? _a : '')) > maxVisualLength;
var isLastFragment = i === fragments.length - 1;
if (thisFragmentExceeded) {
var subFragments = splitLongSentence(text);
result.push(subFragments.join('\n'));
text = '';
}
else if (nextFragmentExceeded || isLastFragment) {
result.push(text.trim());
text = '';
}
}
return result;
}
function splitTimeline(wholeSentence, maxVisualLength) {
var result = [];
var fragments = splitChineseSentence(wholeSentence.translation, maxVisualLength);
var unitDuration = (wholeSentence.endTime - wholeSentence.startTime) / visualLengthOf(wholeSentence.translation);
var startTime = wholeSentence.startTime;
for (var _i = 0, fragments_1 = fragments; _i < fragments_1.length; _i++) {
var text = fragments_1[_i];
// 按视觉长度估算本段文本的实际长度
var currentDuration = Math.round(visualLengthOf(text) * unitDuration);
result.push({
startTime: startTime,
endTime: startTime + currentDuration,
text: text,
cue: '',
});
startTime += currentDuration;
}
return result;
}
var MAX_VISUAL_LENGTH = 36;
function splitTimelineBySentence(wholeSentences, maxVisualLength) {
if (maxVisualLength === void 0) { maxVisualLength = MAX_VISUAL_LENGTH; }
return wholeSentences.flatMap(function (wholeSentence) { return splitTimeline(wholeSentence, maxVisualLength); });
}
exports.splitTimelineBySentence = splitTimelineBySentence;
function splitSubtitles(content, maxVisualLength) {
if (maxVisualLength === void 0) { maxVisualLength = MAX_VISUAL_LENGTH; }
var dom = subtitle_1.subtitle.parse(content);
var wholeSentences = dom.items.map(function (it) {
var _a = it.text.split(/\r?\n/), original = _a[0], translation = _a[1];
return ({
startTime: it.startTime,
endTime: it.endTime,
cue: it.cue,
original: original,
translation: translation,
items: [],
});
});
var items = wholeSentences.flatMap(function (wholeSentence) { return splitTimeline(wholeSentence, maxVisualLength); });
return subtitle_1.subtitle.stringify({ meta: dom.meta, items: items });
}
exports.splitSubtitles = splitSubtitles;
//# sourceMappingURL=subtitle-translator.js.map