UNPKG

@akson/cortex-shopify-translations

Version:

Unified Shopify translations management client with product extraction, translation sync, and CLI tools

341 lines (286 loc) 9.88 kB
#!/usr/bin/env node /** * AI Translation Engine using OpenAI GPT-4o-mini * Maintains brand consistency and glossary compliance */ import fs from "fs/promises"; import path from "path"; import { config } from "dotenv"; // Load environment variables config({ path: path.join(process.cwd(), "translations", "config", ".env") }); class AITranslator { constructor() { this.apiKey = process.env.OPENAI_API_KEY; this.model = process.env.AI_MODEL || "gpt-4o-mini"; this.apiUrl = "https://api.openai.com/v1/chat/completions"; this.temperature = 0.3; // Lower temperature for consistency this.maxRetries = 3; // Load configuration this.glossary = null; this.brand = null; this.loadConfig(); } async loadConfig() { const configPath = path.join(process.cwd(), "translations", "config"); try { this.glossary = JSON.parse( await fs.readFile(path.join(configPath, "glossary.json"), "utf-8") ); this.brand = JSON.parse( await fs.readFile(path.join(configPath, "brand.json"), "utf-8") ); } catch (error) { console.error("⚠️ Warning: Could not load config files:", error.message); } } buildSystemPrompt(targetLanguage) { const langName = { de: "German", it: "Italian", en: "English", }[targetLanguage]; const brandGuidelines = this.brand?.style_guidelines?.[targetLanguage] || []; const generalGuidelines = this.brand?.style_guidelines?.general || []; return `You are a professional translator for MyArmy.ch, a Swiss military merchandise company. BRAND VOICE: ${this.brand?.brand_voice?.tone || "Professional yet approachable"} TARGET LANGUAGE: ${langName} (${targetLanguage}) GLOSSARY (USE THESE EXACT TRANSLATIONS): ${JSON.stringify(this.glossary?.terms || {}, null, 2)} FIXED PHRASES (USE EXACTLY AS SHOWN): ${JSON.stringify(this.glossary?.phrases || {}, null, 2)} STYLE GUIDELINES: ${[...generalGuidelines, ...brandGuidelines].map((g) => `- ${g}`).join("\n")} RULES: 1. Use the glossary terms EXACTLY as provided 2. For fixed phrases, use the exact translation provided 3. Preserve ALL variables like %{shop_name}, {{variable}}, [link], etc. 4. Preserve ALL HTML tags like <strong>, <p>, <br>, etc. 5. Keep URLs and email addresses unchanged 6. Do not translate items from the do_not_translate list: ${( this.glossary?.do_not_translate || [] ).join(", ")} 7. Maintain the same tone and formality level as the original 8. For possessives (ton/ta/tes), use the appropriate translation from the glossary 9. Ensure no French words remain in the translation (unless in do_not_translate list) Respond with ONLY the translation, no explanations or notes.`; } async translateBatch(texts, targetLanguage) { if (!this.apiKey) { throw new Error("OPENAI_API_KEY not set in translations/config/.env"); } const systemPrompt = this.buildSystemPrompt(targetLanguage); // Build user prompt with all texts const userPrompt = texts .map((item, index) => { // Check if we have an exact phrase match first const exactMatch = this.glossary?.phrases?.[item.text]; if (exactMatch && exactMatch[targetLanguage]) { return `[${index}] EXACT MATCH - USE: "${exactMatch[targetLanguage]}"`; } return `[${index}] Translate: "${item.text}"${ item.context ? `\nContext: ${item.context}` : "" }`; }) .join("\n\n"); // Make API call with retry logic for (let attempt = 1; attempt <= this.maxRetries; attempt++) { try { const response = await fetch(this.apiUrl, { method: "POST", headers: { "Content-Type": "application/json", Authorization: `Bearer ${this.apiKey}`, }, body: JSON.stringify({ model: this.model, messages: [ { role: "system", content: systemPrompt }, { role: "user", content: userPrompt }, ], temperature: this.temperature, max_tokens: 4000, }), }); if (!response.ok) { const error = await response.text(); throw new Error(`API error: ${response.status} - ${error}`); } const data = await response.json(); if (!data.choices?.[0]?.message?.content) { throw new Error("Invalid API response structure"); } // Parse the response const translations = this.parseTranslations( data.choices[0].message.content, texts, targetLanguage ); // Calculate costs const usage = data.usage; const cost = this.calculateCost(usage); return { translations, usage, cost, }; } catch (error) { console.error( `❌ Attempt ${attempt}/${this.maxRetries} failed:`, error.message ); if (attempt === this.maxRetries) { throw error; } // Wait before retry (exponential backoff) await new Promise((resolve) => setTimeout(resolve, 1000 * Math.pow(2, attempt)) ); } } } parseTranslations(response, originalTexts, targetLanguage) { const lines = response.split("\n"); const translations = []; originalTexts.forEach((item, index) => { // Look for the indexed response const pattern = new RegExp(`\\[${index}\\]\\s*(.*)`, "s"); let translated = ""; // Find the line with this index for (const line of lines) { const match = line.match(pattern); if (match) { translated = match[1].trim(); // Remove quotes if present translated = translated.replace(/^["']|["']$/g, ""); // Handle EXACT MATCH responses if (translated.startsWith("EXACT MATCH - USE:")) { translated = translated.replace("EXACT MATCH - USE:", "").trim(); translated = translated.replace(/^["']|["']$/g, ""); } break; } } // Fallback: if no indexed match found, try direct match if (!translated && index < lines.length) { translated = lines[index]?.trim() || item.text; } // Validate the translation const validation = this.validateTranslation( item.text, translated, targetLanguage ); translations.push({ original: item.text, translated, key: item.key, validation, confidence: validation.score, }); }); return translations; } validateTranslation(original, translated, targetLanguage) { const checks = { notEmpty: translated && translated.length > 0, notSameAsOriginal: translated !== original, noFrenchWords: !this.detectFrenchContamination( translated, targetLanguage ), variablesPreserved: this.checkVariables(original, translated), htmlPreserved: this.checkHTML(original, translated), glossaryCompliant: this.checkGlossaryCompliance( original, translated, targetLanguage ), reasonableLength: translated.length > original.length * 0.3 && translated.length < original.length * 3, }; const score = Object.values(checks).filter(Boolean).length / Object.keys(checks).length; return { valid: score >= 0.7, score, checks, }; } detectFrenchContamination(text, targetLanguage) { if (targetLanguage === "fr") return false; const frenchWords = [ "avec", "pour", "dans", "sur", "sous", "tout", "tous", "leur", "leurs", "cette", "ceux", "celui", "également", "ainsi", "donc", "mais", ]; // Don't check for common words that might be valid const exceptions = this.glossary?.do_not_translate || []; const words = text.toLowerCase().split(/\s+/); return words.some( (word) => frenchWords.includes(word) && !exceptions.some((ex) => ex.toLowerCase() === word) ); } checkVariables(original, translated) { const varPattern = /%{[^}]+}|{{[^}]+}}|\[\[[^\]]+\]\]/g; const originalVars = original.match(varPattern) || []; const translatedVars = translated.match(varPattern) || []; return ( originalVars.length === translatedVars.length && originalVars.every((v) => translatedVars.includes(v)) ); } checkHTML(original, translated) { const htmlPattern = /<[^>]+>/g; const originalTags = original.match(htmlPattern) || []; const translatedTags = translated.match(htmlPattern) || []; return originalTags.length === translatedTags.length; } checkGlossaryCompliance(original, translated, targetLanguage) { // Check if key terms from glossary are properly translated if (!this.glossary?.terms) return true; for (const [term, translations] of Object.entries(this.glossary.terms)) { if (original.toLowerCase().includes(term)) { const expectedTranslation = translations[targetLanguage]; if ( expectedTranslation && !translated.toLowerCase().includes(expectedTranslation.toLowerCase()) ) { return false; } } } return true; } calculateCost(usage) { if (!usage) return 0; // GPT-4o-mini pricing const inputCost = (usage.prompt_tokens / 1000000) * 0.15; const outputCost = (usage.completion_tokens / 1000000) * 0.6; return inputCost + outputCost; } async translateSingle(text, targetLanguage, context = null) { const result = await this.translateBatch( [{ text, context, key: "" }], targetLanguage ); return result.translations[0]; } } export default AITranslator;