UNPKG

weploy-translate

Version:

Translate your React.js or Next.js app with AI

320 lines (259 loc) 13.8 kB
const { getGlobalseoOptions, shouldTranslateInlineText, CONTEXT_LIMIT, MAX_WORDS_LENGTH_FOR_CONTEXT, OLD_EXCLUDE_CLASS, MERGE_PREFIX } = require("../configs"); const { isExcludedClassName, isExcludedId } = require("./isExcluded"); const isUrl = require("./isUrl"); function isIgnoredTagInContext(tagName) { return tagName && ["HTML", "HEAD", "SCRIPT", "STYLE", "SVG", "PATH", "CIRCLE", "TEXTAREA", "INPUT", "SELECT", "OPTION", "NOSCRIPT"].includes(tagName.toUpperCase()) } function collectAllTextContentInsideNode(window, node, shouldExclude = false) { const textNodes = []; if (isIgnoredTagInContext(node.tagName)) { return textNodes } node.childNodes.forEach((child) => { if (shouldExclude && child && child.className && typeof child.className == "string" && (child.className.includes(OLD_EXCLUDE_CLASS) || child.className.includes("globalseo-exclude"))) return; if ( child && child.className && typeof child.className == "string" && isExcludedClassName(window,child.className) ) { return; } if (child && child.id && typeof child.id == "string" && isExcludedId(window, child.id)) { return; }; // if script tag or style tag skip if (isIgnoredTagInContext(child.tagName)) return; if (isIgnoredTagInContext(child.parentNode.tagName)) return; if (child.nodeType === window.Node.TEXT_NODE && child.textContent.trim()) { // assign original text to parent element if (window.isWorker) { const parentElement = child.parentNode; parentElement.setAttribute("data-original-text", child.textContent); } else { // compare with original text const originalText = child.parentNode.getAttribute("data-original-text"); // if same then dont translate if (child.textContent === originalText) { return } } textNodes.push(child); } if (child.nodeType !== window.Node.TEXT_NODE) { textNodes.push(...collectAllTextContentInsideNode(window, child)); } }); return textNodes; } function assignFullTextToTextNodes(window, node, textNodes, topLevelTagName) { const fullTextArray = textNodes.map((textNode) => textNode.textContent); const fullTextTagNames = textNodes.map((textNodes) => textNodes.parentNode.tagName) node.childNodes.forEach((child) => { if (child.nodeType === window.Node.TEXT_NODE) { // if (child.textContent.trim()) { child.fullTextArray = fullTextArray; child.fullTextIndex = textNodes.findIndex((textNode) => textNode === child); child.fullTextTagNames = fullTextTagNames; child.topLevelTagName = topLevelTagName; child.cacheKey = `${MERGE_PREFIX}-${JSON.stringify(fullTextArray.filter(x => x && x.trim()))}`; // get rid of empty string // } } else { assignFullTextToTextNodes(window, child, textNodes, topLevelTagName); } }); } function extractTextNodes(window, node, textNodes) { if (!node) return; if (node.tagName && ["SCRIPT", "SVG", "PATH", "CIRCLE", "TEXTAREA", "INPUT", "SELECT", "OPTION", "STYLE", "NOSCRIPT"].includes(node.tagName.toUpperCase())) return; if (node.nodeType === window.Node.TEXT_NODE) { if (node.parentNode.tagName && ["SCRIPT", "SVG", "PATH", "CIRCLE", "TEXTAREA", "INPUT", "SELECT", "OPTION", "STYLE", "NOSCRIPT"].includes(node.parentNode.tagName.toUpperCase())) return; if(isUrl(node.textContent)) return; const options = getGlobalseoOptions(window); if (options.excludeContents.length && options.excludeContents.find((x) => { const regex = new RegExp(x); return regex.test(node.textContent); })) return; // Check if the text node is empty if (!node.textContent.trim()) return; // fullTextArray assignment // 1. must be a text node (example: "im") // 2. must be a child of an inline element (example: <span>im</span>) // 3. minimum has 1 parent sibling (not including itself) (<div><span>im</span><span>portant</span></div>) // 4. all parent siblings are inline elements OR not inline BUT all of its children are inline // 5. all parent siblings are NOT links or buttons (it means they were navigations) if ( shouldTranslateInlineText(window) && !node.cacheKey && // 2. must be a child of an inline element node.parentNode instanceof Element && window.getComputedStyle(node.parentNode).display === 'inline' && // not a link or button or list item node.parentNode.tagName !== 'A' && node.parentNode.tagName !== 'BUTTON' && node.parentNode.tagName !== 'LI' ) { const parentSiblings = []; (node.parentNode.parentNode.childNodes || []).forEach((child) => { if (child.nodeType === window.Node.TEXT_NODE) { if (child.textContent.trim()) { parentSiblings.push(child); } } else { parentSiblings.push(child); } }); const parentSiblingsValidity = parentSiblings.map((child) => { // if text node if (child.nodeType === window.Node.TEXT_NODE) { return true; } // // if not text node but contains a tag or button // if (child instanceof Element && (child.tagName === 'A' || child.tagName === 'BUTTON')) { // return false; // } // if not text node and not a tag or button BUT has children that are a tags or buttons if (child instanceof Element) { const hasTagsOrButtons = Array.from(child.childNodes).some((grandChild) => { return grandChild instanceof Element && (grandChild.tagName === 'A' || grandChild.tagName === 'BUTTON'); }); return !hasTagsOrButtons; } // if not text node but inline if (child instanceof Element && window.getComputedStyle(child).display === 'inline') { return true; } // if not text node and not inline but all of its children are inline if (child instanceof Element && window.getComputedStyle(child).display !== 'inline') { const inlineChildren = []; const childrenTags = []; (child.childNodes || []).forEach((grandChild) => { if (grandChild instanceof Element && window.getComputedStyle(grandChild).display === 'inline') { inlineChildren.push(grandChild); if (grandChild.tagName === 'A' || grandChild.tagName === 'BUTTON' || grandChild.tagName === 'LI') { childrenTags.push(grandChild); } } if (grandChild.nodeType === window.Node.TEXT_NODE) { inlineChildren.push(grandChild); } }); // if all children are a tag or button, return false if (childrenTags.length === child.childNodes.length) { return false; } return inlineChildren.length === child.childNodes.length; } return false; }); const shouldAssignFullText = parentSiblingsValidity.every((validity) => validity); // check if all parent siblings are links or buttons const isAllParentSiblingsAreLinksOrButtons = parentSiblings.every((child) => { return child instanceof Element && (child.tagName === 'A' || child.tagName === 'BUTTON' || child.tagName === "LI"); }); if (parentSiblings.length > 1 && shouldAssignFullText && !isAllParentSiblingsAreLinksOrButtons) { // console.log('node', node, parentSiblings); const textNodes = collectAllTextContentInsideNode(window, node.parentNode.parentNode); if (textNodes.length > 1) { const topLevelTagName = node.parentNode.parentNode.tagName; // inject top level tagname for extra context assignFullTextToTextNodes(window, node.parentNode.parentNode, textNodes, topLevelTagName); }; } } // inject parentTagName for extra context const parentTagName = node.parentNode.tagName; node.parentTagName = parentTagName; function shouldAssignContext(theNode) { return !theNode.context && !theNode.fullTextArray; } if (shouldAssignContext(node)) { // inject html around the text node as context const textNodesForContext = collectAllTextContentInsideNode(window, node.parentNode.parentNode, true); // <div> Hello <div><div>Text 1 <span>fdfd</span></div></div> <h1>Text</h1></div> textNodesForContext.forEach((singleTextNode) => { // start the loop from current node, then do 2 loops, one through previous and the other one through next siblings, the previous check should add the textContent to the left and the next check should add textContent to the right of the current node textContent, if the total character already reach 100 or all siblings already checked, then stop const currentNodeIndex = textNodesForContext.findIndex((textNode) => textNode === singleTextNode); const leftTextNodes = []; const rightTextNodes = []; let totalWordsLeftSide = 0; let totalWordsRightSide = 0; let leftIndex = currentNodeIndex - 1; let rightIndex = currentNodeIndex + 1; // the limit of the total words for each side const contextLimit = CONTEXT_LIMIT; // Initialize a variable to keep track of the loop iteration let i = 0; // Continue the loop until the total character count reaches the limit or both indices are out of their respective bounds for (; (totalWordsLeftSide < contextLimit && leftIndex >= 0) || ( totalWordsRightSide < contextLimit && rightIndex < textNodesForContext.length); i++) { // Check if it's time to check the left side or if the right side is finished let shouldCheckLeft = totalWordsRightSide >= contextLimit || (i % 2 === 0 && leftIndex >= 0) || rightIndex >= textNodesForContext.length ; // Check if it's time to check the right side or if the left side is finished let shouldCheckRight = totalWordsLeftSide >= contextLimit || (i % 2 === 1 && rightIndex < textNodesForContext.length) || leftIndex < 0; if (shouldCheckLeft && totalWordsLeftSide < contextLimit) { // Get the text node from the left side const leftTextNode = textNodesForContext[leftIndex]; // Add the text node to the left text nodes array leftTextNodes.unshift(leftTextNode); // Increase the total character count // console.log("totalWordsLeftSide", singleTextNode.textContent, totalWordsLeftSide, leftTextNode.textContent.trim().split(" ").length, leftTextNode.textContent) totalWordsLeftSide += leftTextNode.textContent.trim().split(" ").length; // Move the left index one step back leftIndex--; } else if (shouldCheckRight && totalWordsRightSide < contextLimit) { // Get the text node from the right side const rightTextNode = textNodesForContext[rightIndex]; // Add the text node to the right text nodes array rightTextNodes.push(rightTextNode); // Increase the total character count // console.log("totalWordsRightSide", singleTextNode.textContent, totalWordsRightSide, rightTextNode.textContent.trim().split(" ").length, rightTextNode.textContent) totalWordsRightSide += rightTextNode.textContent.trim().split(" ").length; // Move the right index one step forward rightIndex++; } } const textNodesForContextWithCurrentNode = [...leftTextNodes, singleTextNode, ...rightTextNodes]; const context = textNodesForContextWithCurrentNode.reduce((acc, textNode) => { // if has sibling, then inject span (for splitting purpose on backend) if (textNode.parentNode.childNodes.length > 1) { textNode.translationTagName = "SPAN" return `${acc} <span>${textNode.textContent}</span>`; } else { return `${acc} <${textNode.parentNode.tagName.toLowerCase()}>${textNode.textContent}</${textNode.parentNode.tagName.toLowerCase()}>`; } }, ''); // why not check this earlier? // because we need to run this function on the very top level of the node, if we block this earlier, then the lower level textNodes will not get the full context because this function only check for grandparent not further than that // <div> // "Very very very long text that doesnt need context " //    <span> //       "Short text" //    </span> // </div> const isMoreThanXWords = singleTextNode.textContent.trim().split(" ").length > MAX_WORDS_LENGTH_FOR_CONTEXT; if (!shouldAssignContext(singleTextNode) || isMoreThanXWords) { return; } const grandParentTagName = node.parentNode.parentNode.tagName.toLowerCase(); singleTextNode.context = `<${grandParentTagName}>${context}</${grandParentTagName}>`; }) } // if (!node.originalTextContent) { // node.originalTextContent = node.textContent; // } textNodes.push(node); } else { const globalseoOptions = getGlobalseoOptions(window); if ( node && node.className && typeof node.className == "string" && isExcludedClassName(window,node.className) ) { return; } if (node && node.id && typeof node.id == "string" && isExcludedId(window, node.id)) { return; }; for (let child of node.childNodes) { extractTextNodes(window, child, textNodes); } } } module.exports = extractTextNodes;