@akson/cortex-shopify-translations
Version:
Unified Shopify translations management client with product extraction, translation sync, and CLI tools
253 lines (214 loc) • 7.65 kB
JavaScript
/**
* Validate translations for quality issues
* Checks for glossary compliance, French contamination, variable preservation
*/
import fs from 'fs/promises';
import path from 'path';
class TranslationValidator {
constructor() {
this.glossary = null;
this.issues = [];
}
async loadGlossary() {
const glossaryPath = path.join(process.cwd(), 'translations', 'config', 'glossary.json');
this.glossary = JSON.parse(await fs.readFile(glossaryPath, 'utf-8'));
}
validateTranslation(translation, language) {
const issues = [];
if (!translation[language]) return issues;
const original = translation.fr;
const translated = translation[language];
// Check 1: Variables preserved
const varPattern = /%{[^}]+}|{{[^}]+}}|\[\[[^\]]+\]\]/g;
const originalVars = original.match(varPattern) || [];
const translatedVars = translated.match(varPattern) || [];
if (originalVars.length !== translatedVars.length) {
issues.push({
type: 'variables',
severity: 'error',
message: `Variable count mismatch: ${originalVars.length} → ${translatedVars.length}`
});
}
originalVars.forEach(v => {
if (!translatedVars.includes(v)) {
issues.push({
type: 'variables',
severity: 'error',
message: `Missing variable: ${v}`
});
}
});
// Check 2: HTML tags preserved
const htmlPattern = /<[^>]+>/g;
const originalTags = original.match(htmlPattern) || [];
const translatedTags = translated.match(htmlPattern) || [];
if (originalTags.length !== translatedTags.length) {
issues.push({
type: 'html',
severity: 'warning',
message: `HTML tag count mismatch: ${originalTags.length} → ${translatedTags.length}`
});
}
// Check 3: French contamination (not for French translations)
if (language !== 'fr') {
const frenchWords = [
'avec', 'pour', 'dans', 'sur', 'sous', 'tout', 'tous',
'leur', 'leurs', 'cette', 'ceux', 'celui', 'également',
'ainsi', 'donc', 'mais', 'est', 'sont', 'avoir', 'être'
];
const words = translated.toLowerCase().split(/\s+/);
const contamination = words.filter(word =>
frenchWords.includes(word) &&
!this.glossary.do_not_translate.some(ex => ex.toLowerCase() === word)
);
if (contamination.length > 0) {
issues.push({
type: 'contamination',
severity: 'warning',
message: `Possible French words: ${contamination.join(', ')}`
});
}
}
// Check 4: Glossary compliance
if (this.glossary && this.glossary.terms) {
for (const [term, translations] of Object.entries(this.glossary.terms)) {
const termLower = term.toLowerCase();
if (original.toLowerCase().includes(termLower)) {
const expectedTranslation = translations[language];
if (expectedTranslation && !translated.toLowerCase().includes(expectedTranslation.toLowerCase())) {
issues.push({
type: 'glossary',
severity: 'warning',
message: `Glossary term "${term}" should be translated as "${expectedTranslation}"`
});
}
}
}
}
// Check 5: Empty or unchanged
if (translated === original && language !== 'fr') {
issues.push({
type: 'unchanged',
severity: 'error',
message: 'Translation is identical to original'
});
}
// Check 6: Reasonable length
const lengthRatio = translated.length / original.length;
if (lengthRatio < 0.3 || lengthRatio > 3) {
issues.push({
type: 'length',
severity: 'warning',
message: `Unusual length ratio: ${lengthRatio.toFixed(2)}`
});
}
return issues;
}
async validateFile(fileName, language = null) {
const filePath = path.join(process.cwd(), 'translations', 'content', fileName);
const data = JSON.parse(await fs.readFile(filePath, 'utf-8'));
const languages = language ? [language] : ['de', 'it', 'en'];
const fileIssues = [];
for (const translation of data.translations) {
for (const lang of languages) {
const issues = this.validateTranslation(translation, lang);
if (issues.length > 0) {
fileIssues.push({
key: translation.key,
language: lang,
issues
});
}
}
}
return fileIssues;
}
}
async function validateTranslations(category = null, language = null, showAll = false) {
console.log('🔍 Validating translations...\n');
const validator = new TranslationValidator();
await validator.loadGlossary();
const contentDir = path.join(process.cwd(), 'translations', 'content');
// Get files to validate
let files;
if (category) {
files = [`${category}.json`];
} else {
const allFiles = await fs.readdir(contentDir);
files = allFiles.filter(f => f.endsWith('.json'));
}
const summary = {
total: 0,
byType: {},
bySeverity: { error: 0, warning: 0 }
};
for (const fileName of files) {
try {
const issues = await validator.validateFile(fileName, language);
if (issues.length > 0 || showAll) {
console.log(`\n📁 ${fileName}`);
console.log('─'.repeat(60));
if (issues.length === 0) {
console.log('✅ No issues found');
} else {
// Group by key for better readability
const byKey = {};
issues.forEach(item => {
if (!byKey[item.key]) byKey[item.key] = [];
byKey[item.key].push(item);
});
for (const [key, keyIssues] of Object.entries(byKey)) {
console.log(`\n❌ ${key}`);
keyIssues.forEach(item => {
item.issues.forEach(issue => {
const icon = issue.severity === 'error' ? '🔴' : '⚠️';
console.log(` ${icon} [${item.language.toUpperCase()}] ${issue.message}`);
// Update summary
summary.total++;
summary.bySeverity[issue.severity]++;
if (!summary.byType[issue.type]) summary.byType[issue.type] = 0;
summary.byType[issue.type]++;
});
});
}
}
}
} catch (error) {
console.error(`✗ Error validating ${fileName}: ${error.message}`);
}
}
// Show summary
if (summary.total > 0) {
console.log('\n' + '═'.repeat(60));
console.log('📊 Validation Summary\n');
console.log(`Total issues: ${summary.total}`);
console.log(` 🔴 Errors: ${summary.bySeverity.error}`);
console.log(` ⚠️ Warnings: ${summary.bySeverity.warning}`);
console.log('\nBy type:');
Object.entries(summary.byType).forEach(([type, count]) => {
console.log(` • ${type}: ${count}`);
});
console.log('\n💡 To fix issues:');
console.log(' 1. Review the specific translations');
console.log(' 2. Update glossary if needed');
console.log(' 3. Re-run failed translations with reset-failed.mjs');
} else {
console.log('\n✅ All translations passed validation!');
}
}
// CLI
const args = process.argv.slice(2);
let category = null;
let language = null;
let showAll = false;
args.forEach(arg => {
if (arg.startsWith('--category=')) {
category = arg.split('=')[1];
} else if (arg.startsWith('--lang=')) {
language = arg.split('=')[1];
} else if (arg === '--all') {
showAll = true;
}
});
validateTranslations(category, language, showAll);