@awesome-fe/translate
Version:
Translation utils
163 lines • 6.56 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.HtmlTranslator = void 0;
const abstract_translator_1 = require("./abstract-translator");
const dom_models_1 = require("../dom/parse5/dom-models");
const translation_pair_1 = require("./translation-pair");
const common_1 = require("../dom/common");
function extractInnerHtml(html) {
return html.replace(/^<([\w-]+)\b[^>]*>(.*?)<\/\1>$/g, '$2');
}
class HtmlTranslator extends abstract_translator_1.AbstractTranslator {
selectors = dom_models_1.defaultSelectors;
parse(text, options = {}) {
options.htmlFragment = options.htmlFragment ?? !text.match(/^(<!DOCTYPE html>|<html\b)/si);
if (options.htmlFragment) {
return dom_models_1.DomDocumentFragment.parse(text);
}
else {
return dom_models_1.DomDocument.parse(text);
}
}
serialize(doc) {
return doc.toHtml();
}
translateDoc(doc) {
if (doc instanceof dom_models_1.DomDocument) {
this.formatHtml(doc);
const titleElement = doc.head.querySelector(it => it.tagName === 'title');
if (titleElement) {
const [original, translation] = (0, translation_pair_1.buildTranslationPair)(titleElement.getAttribute('original-title'), titleElement.textContent);
this.translateSentence(original, translation, 'plain').then(result => {
if (result && result !== original) {
titleElement.setAttribute('original-title', original);
titleElement.textContent = result;
}
});
}
}
this.addWrapperForTextInSpecialBlocks(doc);
const elements = this.selectors
.map(selector => Array.from(doc.querySelectorAll(selector)))
.flat()
.filter(it => this.shouldTranslate(it));
for (let element of elements) {
const [original, translation] = this.buildTranslationPair(element);
this.translateSentence(original, translation, 'html').then(result => {
if (result && result !== original) {
this.applyTranslation(element, result);
}
});
}
return doc;
}
shouldTranslate(node) {
if (!(node instanceof dom_models_1.DomElement)) {
return false;
}
if (node.hasAttribute('translation-result')) {
return false;
}
if (node.getAttribute('translate') === 'no') {
return false;
}
if ((0, common_1.containsChinese)(node.textContent)) {
return false;
}
return true;
}
formatHtml(doc) {
const htmlNode = doc.childNodes.find(it => it.nodeName === 'html');
doc.insertBefore(new dom_models_1.DomText('\n'), htmlNode);
htmlNode.insertBefore(new dom_models_1.DomText('\n'), htmlNode.firstChild);
htmlNode.insertAfter(new dom_models_1.DomText('\n'), htmlNode.lastChild);
}
buildTranslationPair(element) {
const next = element.nextElementSibling;
if (element.hasAttribute('translation-origin')) {
return [element.outerHTML, next?.outerHTML ?? ''];
}
else {
return [element.outerHTML, ''];
}
}
applyTranslation(original, translation) {
translation = extractInnerHtml(translation);
const existingTranslationNode = original.nextElementSibling;
if (existingTranslationNode?.hasAttribute('translation-result')) {
existingTranslationNode.innerHTML = translation;
return;
}
const translationNode = new dom_models_1.DomElement(original.tagName);
translationNode.setAttribute('translation-result', 'on');
original.setAttribute('translation-origin', 'off');
const spaces = original.previousSibling()?.textContent || '';
const node = new dom_models_1.DomText(spaces);
original.parentNode?.insertAfter(translationNode, original);
// 如果是空格,则在结果元素前复制一份,以便对齐
if (!spaces.trim()) {
original.parentNode?.insertAfter(node, original);
}
translationNode.innerHTML = translation;
}
addWrapperForTextInSpecialBlocks(body) {
const blocks = body.querySelectorAll((it) => it.isTagOf('li', 'td', 'th'));
blocks.forEach(it => {
it.childNodes = wrapChildren(it);
it.childNodes.forEach(child => {
child.parentNode = it;
});
});
}
}
exports.HtmlTranslator = HtmlTranslator;
function getSpacingNode(node, defaultText) {
if (isInlineNode(node) && isBlank(node)) {
node.remove();
return node;
}
else {
return new dom_models_1.DomText(defaultText);
}
}
function addToResult(wrapper, result, leadingSpaces) {
if (wrapper.childNodes.length) {
if (isBlank(wrapper)) {
result.push(...wrapper.childNodes);
}
else {
const leadingNode = getSpacingNode(wrapper.firstChild, `${leadingSpaces} `);
const tailingNode = getSpacingNode(wrapper.lastChild, `${leadingSpaces}`);
result.push(leadingNode);
result.push(wrapper);
result.push(tailingNode);
}
}
}
function wrapChildren(node) {
let wrapper = new dom_models_1.DomElement('p');
const result = [];
const leadingSpaces = node.previousSibling()?.textContent || '';
node.childNodes.forEach((value, index) => {
// 如果是内联,则收集内联节点
if (isInlineNode(value)) {
wrapper.appendChild(value);
}
else {
// 如果是块,则输出已收集了内联节点的包装,并创建新的包装
addToResult(wrapper, result, leadingSpaces);
wrapper = new dom_models_1.DomElement('p');
wrapper.parentNode = node;
result.push(value);
}
});
addToResult(wrapper, result, leadingSpaces);
return result;
}
function isInlineNode(value) {
return value instanceof dom_models_1.DomText || (value instanceof dom_models_1.DomElement && value.isTagOf('a', 'em', 'strong', 'span', 'sub', 'sup', 'del', 'code', 'img', 'input', 'br', 'kbd', 'label', 'u', 'i', 'b', 'big', 'small', 'ins', 'strike'));
}
function isBlank(node) {
return !node.textContent?.trim();
}
//# sourceMappingURL=html-translator.js.map