UNPKG

@awesome-fe/translate

Version:
83 lines 3.42 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.VectorizerEngine = void 0; const translation_engine_1 = require("./translation-engine"); const sentence_formatter_1 = require("./sentence-formatter"); const fs_1 = require("fs"); const path_1 = require("path"); const get_new_filename_for_1 = require("./get-new-filename-for"); const get_distance_1 = require("../utils/get-distance"); const common_1 = require("../dom/common"); const create_embedding_1 = require("../utils/create-embedding"); class VectorizerEngine extends translation_engine_1.TranslationEngine { options; constructor(options = {}) { super(); this.options = options; } _entries = []; get entries() { return this._entries; } getDictFilename() { const dict = this.options.dict; if (dict) { const cwd = this.options.cwd; const currentFile = this.currentFile; return (0, get_new_filename_for_1.getNewFilenameFor)(currentFile, cwd, dict, '.vector.json'); } } async batchTranslate(sentences, format) { if ((0, fs_1.existsSync)(this.getDictFilename())) { return sentences; } // 向量化只针对有翻译结果的句子 const filteredSentences = sentences .filter(([original, translation]) => !!translation && (0, common_1.containsChinese)(translation) && !original.includes('<div className="breadcrumb-container">')) .map(([original, translation]) => [ sentence_formatter_1.SentenceFormatter.toMarkdown(original.trim(), format), sentence_formatter_1.SentenceFormatter.toMarkdown(translation.trim(), format), ]); if (!filteredSentences.length) { return sentences; } const originalVectors = await this.getVectors(filteredSentences.map(it => it[0])); const translatedVectors = await this.getVectors(filteredSentences.map(it => it[1])); for (let i = 0; i < filteredSentences.length; ++i) { const [english, chinese] = filteredSentences[i]; const englishVector = originalVectors[i]; const chineseVector = translatedVectors[i]; const distance = (0, get_distance_1.getDistance)(englishVector, chineseVector); if (distance > 0.2) { console.warn(`distance: ${distance}, english: ${english}, chinese: ${chinese}`); } const newEntry = { english, englishVector, chinese, chineseVector, distance, }; this._entries.push(newEntry); } // 原封不动返回,因为我们并不想改变翻译结果 return sentences; } async setup(currentFile) { await super.setup(currentFile); this._entries = []; } async tearDown() { const dict = this.getDictFilename(); if (dict && this.entries.length) { const dir = (0, path_1.dirname)(dict); (0, fs_1.mkdirSync)(dir, { recursive: true }); (0, fs_1.writeFileSync)(dict, JSON.stringify(this.entries), 'utf8'); } } async getVectors(texts) { return (0, create_embedding_1.createEmbedding)(texts); } } exports.VectorizerEngine = VectorizerEngine; //# sourceMappingURL=vectorizer-engine.js.map