UNPKG

@alnliang/tlv

Version:

Lightweight AI-powered tool for verifying and correcting translations in JSON language files using any OpenAI-compatible API

150 lines 6.79 kB
export class TranslationDataPreparer { // Convert parallel translation corpus to RAG documents static prepareParallelCorpus(translations) { return translations.map(translation => ({ id: translation.id, content: `Source: ${translation.sourceText}\nTarget: ${translation.targetText}`, metadata: { sourceLanguage: translation.sourceLanguage, targetLanguage: translation.targetLanguage, domain: translation.domain || 'general', context: translation.context, sourceFile: translation.sourceFile, confidence: 1.0, }, })); } // Convert translation memory to RAG documents static prepareTranslationMemory(entries) { const documents = []; entries.forEach((entry, entryIndex) => { entry.translations.forEach((translation, translationIndex) => { documents.push({ id: `tm_${entryIndex}_${translationIndex}`, content: `Source: ${translation.source}\nTarget: ${translation.target}`, metadata: { sourceLanguage: entry.sourceLanguage, targetLanguage: entry.targetLanguage, domain: 'general', context: translation.context, confidence: 0.9, }, }); }); }); return documents; } // Convert technical glossary to RAG documents static prepareTechnicalGlossary(glossary) { const documents = []; glossary.forEach((entry, entryIndex) => { Object.entries(entry.translations).forEach(([targetLang, translation]) => { documents.push({ id: `glossary_${entryIndex}_${targetLang}`, content: `Term: ${entry.term}\nTranslation: ${translation}\nDefinition: ${entry.definition || 'N/A'}`, metadata: { sourceLanguage: 'en', targetLanguage: targetLang, domain: entry.domain, context: `Technical term in ${entry.domain}`, confidence: 1.0, }, }); }); }); return documents; } // Convert software localization files to RAG documents static prepareSoftwareLocalization(localizationData, sourceLanguage = 'en', domain = 'ui') { const documents = []; const sourceData = localizationData[sourceLanguage]; if (!sourceData) { throw new Error(`Source language ${sourceLanguage} not found in localization data`); } Object.entries(localizationData).forEach(([targetLang, translations]) => { if (targetLang === sourceLanguage) return; Object.entries(sourceData).forEach(([key, sourceText]) => { const targetText = translations[key]; if (targetText) { documents.push({ id: `loc_${targetLang}_${key}`, content: `Source: ${sourceText}\nTarget: ${targetText}`, metadata: { sourceLanguage, targetLanguage: targetLang, domain, context: `UI element: ${key}`, sourceFile: `localization/${targetLang}.json`, confidence: 0.8, }, }); } }); }); return documents; } // Convert API documentation translations to RAG documents static prepareAPIDocumentation(apiDocs, sourceLanguage = 'en') { const documents = []; apiDocs.forEach((doc, docIndex) => { const sourceDescription = doc.description[sourceLanguage]; if (!sourceDescription) return; Object.entries(doc.description).forEach(([targetLang, targetDescription]) => { if (targetLang === sourceLanguage) return; documents.push({ id: `api_desc_${docIndex}_${targetLang}`, content: `Source: ${sourceDescription}\nTarget: ${targetDescription}`, metadata: { sourceLanguage, targetLanguage: targetLang, domain: 'api', context: `API endpoint: ${doc.endpoint}`, sourceFile: 'api-documentation', confidence: 0.9, }, }); }); if (doc.parameters) { Object.entries(doc.parameters).forEach(([paramName, paramDescriptions]) => { const sourceParamDesc = paramDescriptions[sourceLanguage]; if (!sourceParamDesc) return; Object.entries(paramDescriptions).forEach(([targetLang, targetParamDesc]) => { if (targetLang === sourceLanguage) return; documents.push({ id: `api_param_${docIndex}_${paramName}_${targetLang}`, content: `Source: ${sourceParamDesc}\nTarget: ${targetParamDesc}`, metadata: { sourceLanguage, targetLanguage: targetLang, domain: 'api', context: `API parameter: ${doc.endpoint}/${paramName}`, sourceFile: 'api-documentation', confidence: 0.9, }, }); }); }); } }); return documents; } // Merge multiple RAG document arrays and deduplicate static mergeAndDeduplicate(...documentArrays) { const allDocuments = documentArrays.flat(); const uniqueDocuments = new Map(); allDocuments.forEach(doc => { const contentKey = `${doc.metadata.sourceLanguage}-${doc.metadata.targetLanguage}-${doc.content}`; if (!uniqueDocuments.has(contentKey) || (uniqueDocuments.get(contentKey).metadata.confidence || 0) < (doc.metadata.confidence || 0)) { uniqueDocuments.set(contentKey, doc); } }); return Array.from(uniqueDocuments.values()); } } //# sourceMappingURL=data-preparation.js.map