@akson/cortex-shopify-translations
Version:
Unified Shopify translations management client with product extraction, translation sync, and CLI tools
341 lines (286 loc) • 9.88 kB
JavaScript
/**
* AI Translation Engine using OpenAI GPT-4o-mini
* Maintains brand consistency and glossary compliance
*/
import fs from "fs/promises";
import path from "path";
import { config } from "dotenv";
// Load environment variables
config({ path: path.join(process.cwd(), "translations", "config", ".env") });
class AITranslator {
constructor() {
this.apiKey = process.env.OPENAI_API_KEY;
this.model = process.env.AI_MODEL || "gpt-4o-mini";
this.apiUrl = "https://api.openai.com/v1/chat/completions";
this.temperature = 0.3; // Lower temperature for consistency
this.maxRetries = 3;
// Load configuration
this.glossary = null;
this.brand = null;
this.loadConfig();
}
async loadConfig() {
const configPath = path.join(process.cwd(), "translations", "config");
try {
this.glossary = JSON.parse(
await fs.readFile(path.join(configPath, "glossary.json"), "utf-8")
);
this.brand = JSON.parse(
await fs.readFile(path.join(configPath, "brand.json"), "utf-8")
);
} catch (error) {
console.error("⚠️ Warning: Could not load config files:", error.message);
}
}
buildSystemPrompt(targetLanguage) {
const langName = {
de: "German",
it: "Italian",
en: "English",
}[targetLanguage];
const brandGuidelines =
this.brand?.style_guidelines?.[targetLanguage] || [];
const generalGuidelines = this.brand?.style_guidelines?.general || [];
return `You are a professional translator for MyArmy.ch, a Swiss military merchandise company.
BRAND VOICE: ${this.brand?.brand_voice?.tone || "Professional yet approachable"}
TARGET LANGUAGE: ${langName} (${targetLanguage})
GLOSSARY (USE THESE EXACT TRANSLATIONS):
${JSON.stringify(this.glossary?.terms || {}, null, 2)}
FIXED PHRASES (USE EXACTLY AS SHOWN):
${JSON.stringify(this.glossary?.phrases || {}, null, 2)}
STYLE GUIDELINES:
${[...generalGuidelines, ...brandGuidelines].map((g) => `- ${g}`).join("\n")}
RULES:
1. Use the glossary terms EXACTLY as provided
2. For fixed phrases, use the exact translation provided
3. Preserve ALL variables like %{shop_name}, {{variable}}, [link], etc.
4. Preserve ALL HTML tags like <strong>, <p>, <br>, etc.
5. Keep URLs and email addresses unchanged
6. Do not translate items from the do_not_translate list: ${(
this.glossary?.do_not_translate || []
).join(", ")}
7. Maintain the same tone and formality level as the original
8. For possessives (ton/ta/tes), use the appropriate translation from the glossary
9. Ensure no French words remain in the translation (unless in do_not_translate list)
Respond with ONLY the translation, no explanations or notes.`;
}
async translateBatch(texts, targetLanguage) {
if (!this.apiKey) {
throw new Error("OPENAI_API_KEY not set in translations/config/.env");
}
const systemPrompt = this.buildSystemPrompt(targetLanguage);
// Build user prompt with all texts
const userPrompt = texts
.map((item, index) => {
// Check if we have an exact phrase match first
const exactMatch = this.glossary?.phrases?.[item.text];
if (exactMatch && exactMatch[targetLanguage]) {
return `[${index}] EXACT MATCH - USE: "${exactMatch[targetLanguage]}"`;
}
return `[${index}] Translate: "${item.text}"${
item.context ? `\nContext: ${item.context}` : ""
}`;
})
.join("\n\n");
// Make API call with retry logic
for (let attempt = 1; attempt <= this.maxRetries; attempt++) {
try {
const response = await fetch(this.apiUrl, {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${this.apiKey}`,
},
body: JSON.stringify({
model: this.model,
messages: [
{ role: "system", content: systemPrompt },
{ role: "user", content: userPrompt },
],
temperature: this.temperature,
max_tokens: 4000,
}),
});
if (!response.ok) {
const error = await response.text();
throw new Error(`API error: ${response.status} - ${error}`);
}
const data = await response.json();
if (!data.choices?.[0]?.message?.content) {
throw new Error("Invalid API response structure");
}
// Parse the response
const translations = this.parseTranslations(
data.choices[0].message.content,
texts,
targetLanguage
);
// Calculate costs
const usage = data.usage;
const cost = this.calculateCost(usage);
return {
translations,
usage,
cost,
};
} catch (error) {
console.error(
`❌ Attempt ${attempt}/${this.maxRetries} failed:`,
error.message
);
if (attempt === this.maxRetries) {
throw error;
}
// Wait before retry (exponential backoff)
await new Promise((resolve) =>
setTimeout(resolve, 1000 * Math.pow(2, attempt))
);
}
}
}
parseTranslations(response, originalTexts, targetLanguage) {
const lines = response.split("\n");
const translations = [];
originalTexts.forEach((item, index) => {
// Look for the indexed response
const pattern = new RegExp(`\\[${index}\\]\\s*(.*)`, "s");
let translated = "";
// Find the line with this index
for (const line of lines) {
const match = line.match(pattern);
if (match) {
translated = match[1].trim();
// Remove quotes if present
translated = translated.replace(/^["']|["']$/g, "");
// Handle EXACT MATCH responses
if (translated.startsWith("EXACT MATCH - USE:")) {
translated = translated.replace("EXACT MATCH - USE:", "").trim();
translated = translated.replace(/^["']|["']$/g, "");
}
break;
}
}
// Fallback: if no indexed match found, try direct match
if (!translated && index < lines.length) {
translated = lines[index]?.trim() || item.text;
}
// Validate the translation
const validation = this.validateTranslation(
item.text,
translated,
targetLanguage
);
translations.push({
original: item.text,
translated,
key: item.key,
validation,
confidence: validation.score,
});
});
return translations;
}
validateTranslation(original, translated, targetLanguage) {
const checks = {
notEmpty: translated && translated.length > 0,
notSameAsOriginal: translated !== original,
noFrenchWords: !this.detectFrenchContamination(
translated,
targetLanguage
),
variablesPreserved: this.checkVariables(original, translated),
htmlPreserved: this.checkHTML(original, translated),
glossaryCompliant: this.checkGlossaryCompliance(
original,
translated,
targetLanguage
),
reasonableLength:
translated.length > original.length * 0.3 &&
translated.length < original.length * 3,
};
const score =
Object.values(checks).filter(Boolean).length / Object.keys(checks).length;
return {
valid: score >= 0.7,
score,
checks,
};
}
detectFrenchContamination(text, targetLanguage) {
if (targetLanguage === "fr") return false;
const frenchWords = [
"avec",
"pour",
"dans",
"sur",
"sous",
"tout",
"tous",
"leur",
"leurs",
"cette",
"ceux",
"celui",
"également",
"ainsi",
"donc",
"mais",
];
// Don't check for common words that might be valid
const exceptions = this.glossary?.do_not_translate || [];
const words = text.toLowerCase().split(/\s+/);
return words.some(
(word) =>
frenchWords.includes(word) &&
!exceptions.some((ex) => ex.toLowerCase() === word)
);
}
checkVariables(original, translated) {
const varPattern = /%{[^}]+}|{{[^}]+}}|\[\[[^\]]+\]\]/g;
const originalVars = original.match(varPattern) || [];
const translatedVars = translated.match(varPattern) || [];
return (
originalVars.length === translatedVars.length &&
originalVars.every((v) => translatedVars.includes(v))
);
}
checkHTML(original, translated) {
const htmlPattern = /<[^>]+>/g;
const originalTags = original.match(htmlPattern) || [];
const translatedTags = translated.match(htmlPattern) || [];
return originalTags.length === translatedTags.length;
}
checkGlossaryCompliance(original, translated, targetLanguage) {
// Check if key terms from glossary are properly translated
if (!this.glossary?.terms) return true;
for (const [term, translations] of Object.entries(this.glossary.terms)) {
if (original.toLowerCase().includes(term)) {
const expectedTranslation = translations[targetLanguage];
if (
expectedTranslation &&
!translated.toLowerCase().includes(expectedTranslation.toLowerCase())
) {
return false;
}
}
}
return true;
}
calculateCost(usage) {
if (!usage) return 0;
// GPT-4o-mini pricing
const inputCost = (usage.prompt_tokens / 1000000) * 0.15;
const outputCost = (usage.completion_tokens / 1000000) * 0.6;
return inputCost + outputCost;
}
async translateSingle(text, targetLanguage, context = null) {
const result = await this.translateBatch(
[{ text, context, key: "" }],
targetLanguage
);
return result.translations[0];
}
}
export default AITranslator;