@alnliang/tlv
Version:
Lightweight AI-powered tool for verifying and correcting translations in JSON language files using any OpenAI-compatible API
150 lines • 6.79 kB
JavaScript
export class TranslationDataPreparer {
// Convert parallel translation corpus to RAG documents
static prepareParallelCorpus(translations) {
return translations.map(translation => ({
id: translation.id,
content: `Source: ${translation.sourceText}\nTarget: ${translation.targetText}`,
metadata: {
sourceLanguage: translation.sourceLanguage,
targetLanguage: translation.targetLanguage,
domain: translation.domain || 'general',
context: translation.context,
sourceFile: translation.sourceFile,
confidence: 1.0,
},
}));
}
// Convert translation memory to RAG documents
static prepareTranslationMemory(entries) {
const documents = [];
entries.forEach((entry, entryIndex) => {
entry.translations.forEach((translation, translationIndex) => {
documents.push({
id: `tm_${entryIndex}_${translationIndex}`,
content: `Source: ${translation.source}\nTarget: ${translation.target}`,
metadata: {
sourceLanguage: entry.sourceLanguage,
targetLanguage: entry.targetLanguage,
domain: 'general',
context: translation.context,
confidence: 0.9,
},
});
});
});
return documents;
}
// Convert technical glossary to RAG documents
static prepareTechnicalGlossary(glossary) {
const documents = [];
glossary.forEach((entry, entryIndex) => {
Object.entries(entry.translations).forEach(([targetLang, translation]) => {
documents.push({
id: `glossary_${entryIndex}_${targetLang}`,
content: `Term: ${entry.term}\nTranslation: ${translation}\nDefinition: ${entry.definition || 'N/A'}`,
metadata: {
sourceLanguage: 'en',
targetLanguage: targetLang,
domain: entry.domain,
context: `Technical term in ${entry.domain}`,
confidence: 1.0,
},
});
});
});
return documents;
}
// Convert software localization files to RAG documents
static prepareSoftwareLocalization(localizationData, sourceLanguage = 'en', domain = 'ui') {
const documents = [];
const sourceData = localizationData[sourceLanguage];
if (!sourceData) {
throw new Error(`Source language ${sourceLanguage} not found in localization data`);
}
Object.entries(localizationData).forEach(([targetLang, translations]) => {
if (targetLang === sourceLanguage)
return;
Object.entries(sourceData).forEach(([key, sourceText]) => {
const targetText = translations[key];
if (targetText) {
documents.push({
id: `loc_${targetLang}_${key}`,
content: `Source: ${sourceText}\nTarget: ${targetText}`,
metadata: {
sourceLanguage,
targetLanguage: targetLang,
domain,
context: `UI element: ${key}`,
sourceFile: `localization/${targetLang}.json`,
confidence: 0.8,
},
});
}
});
});
return documents;
}
// Convert API documentation translations to RAG documents
static prepareAPIDocumentation(apiDocs, sourceLanguage = 'en') {
const documents = [];
apiDocs.forEach((doc, docIndex) => {
const sourceDescription = doc.description[sourceLanguage];
if (!sourceDescription)
return;
Object.entries(doc.description).forEach(([targetLang, targetDescription]) => {
if (targetLang === sourceLanguage)
return;
documents.push({
id: `api_desc_${docIndex}_${targetLang}`,
content: `Source: ${sourceDescription}\nTarget: ${targetDescription}`,
metadata: {
sourceLanguage,
targetLanguage: targetLang,
domain: 'api',
context: `API endpoint: ${doc.endpoint}`,
sourceFile: 'api-documentation',
confidence: 0.9,
},
});
});
if (doc.parameters) {
Object.entries(doc.parameters).forEach(([paramName, paramDescriptions]) => {
const sourceParamDesc = paramDescriptions[sourceLanguage];
if (!sourceParamDesc)
return;
Object.entries(paramDescriptions).forEach(([targetLang, targetParamDesc]) => {
if (targetLang === sourceLanguage)
return;
documents.push({
id: `api_param_${docIndex}_${paramName}_${targetLang}`,
content: `Source: ${sourceParamDesc}\nTarget: ${targetParamDesc}`,
metadata: {
sourceLanguage,
targetLanguage: targetLang,
domain: 'api',
context: `API parameter: ${doc.endpoint}/${paramName}`,
sourceFile: 'api-documentation',
confidence: 0.9,
},
});
});
});
}
});
return documents;
}
// Merge multiple RAG document arrays and deduplicate
static mergeAndDeduplicate(...documentArrays) {
const allDocuments = documentArrays.flat();
const uniqueDocuments = new Map();
allDocuments.forEach(doc => {
const contentKey = `${doc.metadata.sourceLanguage}-${doc.metadata.targetLanguage}-${doc.content}`;
if (!uniqueDocuments.has(contentKey) ||
(uniqueDocuments.get(contentKey).metadata.confidence || 0) < (doc.metadata.confidence || 0)) {
uniqueDocuments.set(contentKey, doc);
}
});
return Array.from(uniqueDocuments.values());
}
}
//# sourceMappingURL=data-preparation.js.map