UNPKG

anylang

Version:

A translator's kit that uses the free APIs of Google Translate, Yandex, Bing, ChatGPT, and other LLMs

312 lines (310 loc) 44.4 kB
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; import { Semaphore } from '../utils/Semaphore'; /** * Module for scheduling and optimization of translate a text streams * * - It can union many translate requests to one * - You can group any requests by context * - It's configurable. You can set retry limit and edge for direct translate */ export class Scheduler { constructor(translator, config) { this.config = { translateRetryAttemptLimit: 2, isAllowDirectTranslateBadChunks: true, directTranslateLength: null, translatePoolDelay: 300, chunkSizeForInstantTranslate: null, taskBatchHandleDelay: null, }; this.abortedContexts = new Set(); this.contextCounter = 0; this.taskContainersStorage = new Set(); this.timersMap = new Map(); /** * Tasks queue with items sorted by priority * It must be handled from end to start */ this.translateQueue = []; /** * Return first item from queue and delete it from queue * Items is sorted by priority */ this.getItemFromTranslateQueue = () => { var _a; return { done: this.translateQueue.length === 0, value: (_a = this.translateQueue.pop()) !== null && _a !== void 0 ? _a : null, }; }; this.workerState = false; this.translator = translator; this.config = Object.assign(Object.assign({}, this.config), config); this.semafor = new Semaphore({ timeout: translator.getRequestsTimeout() }); } // eslint-disable-next-line @typescript-eslint/require-await abort(context) { return __awaiter(this, void 0, void 0, function* () { const abortTasks = (tasks) => { tasks.forEach((task) => { task.reject(new Error('Translation is aborted in scheduler')); }); }; // Clear tasks for (const task of this.taskContainersStorage) { if (context === task.context) { this.taskContainersStorage.delete(task); abortTasks(task.tasks); } } // Remove tasks from translation queue this.translateQueue = this.translateQueue.filter((task) => { // Abort and filter out matched tasks if (context === task.context) { abortTasks(task.tasks); return false; } return true; }); // TODO: abort even sent requests // Abort in-flight translations this.abortedContexts.add(context); }); } translate(text, from, to, options) { return __awaiter(this, void 0, void 0, function* () { const { context = '', priority = 0, directTranslate: directTranslateForThisRequest = false, } = options !== null && options !== void 0 ? options : {}; if (this.translator.checkLimitExceeding(text) <= 0) { // Direct translate if (directTranslateForThisRequest || (this.config.directTranslateLength !== null && text.length >= this.config.directTranslateLength)) { return this.directTranslate(text, from, to); } else { return this.makeTask({ text: text, from, to, context, priority }); } } else { // Split text by words and translate return this.splitAndTranslate(text, from, to, context, priority); } }); } directTranslate(text, from, to) { return __awaiter(this, void 0, void 0, function* () { const free = yield this.semafor.take(); return this.translator.translate(text, from, to).finally(free); }); } splitAndTranslate(text, from, to, context, priority) { const splittedText = []; const charsetIndexes = []; let wordsBuffer = ''; for (const textMatch of text.matchAll(/([^\s]+)(\s*)/g)) { const newPart = textMatch[0]; const newBuffer = wordsBuffer + newPart; // Add word to buffer if can if (this.translator.checkLimitExceeding(newBuffer) <= 0) { wordsBuffer = newBuffer; continue; } // Write and clear buffer if not empthy if (wordsBuffer.length > 0) { splittedText.push(wordsBuffer); wordsBuffer = ''; } // Handle new part if (this.translator.checkLimitExceeding(newPart) <= 0) { // Add to buffer wordsBuffer += newPart; continue; } else { // Slice by chars let charsBuffer = newPart; while (charsBuffer.length > 0) { const extraChars = this.translator.checkLimitExceeding(charsBuffer); if (extraChars > 0) { const offset = charsBuffer.length - extraChars; // Write slice and remainder splittedText.push(charsBuffer.slice(0, offset)); charsBuffer = charsBuffer.slice(offset); charsetIndexes.push(splittedText.length - 1); } } } } const ctxPrefix = context.length > 0 ? context + ';' : ''; return Promise.all(splittedText.map((text, index) => charsetIndexes.includes(index) ? text : this.makeTask({ text, from, to, context: ctxPrefix + `text#${this.contextCounter++}`, priority, }))).then((translatedParts) => translatedParts.join('')); } makeTask({ text, from, to, priority, context = '' }) { return new Promise((resolve, reject) => { this.addToTaskContainer({ text, from, to, context, priority, resolve, reject, }); }); } addToTaskContainer(params) { const { text, from, to, attempt = 0, context = '', priority, resolve, reject, } = params; // create task const task = { text, from, to, attempt, resolve, reject, }; let container = null; // try add to exists container for (const taskContainer of this.taskContainersStorage) { // Skip containers with not equal parameters if (['from', 'to', 'context', 'priority'].some((key) => params[key] !== taskContainer[key])) continue; // Lightweight check to overflow // NOTE: Do strict check here if you need comply a limit contract if (this.translator.getLengthLimit() >= taskContainer.length + task.text.length) { taskContainer.tasks.push(task); taskContainer.length += task.text.length; container = taskContainer; } } // make container if (container === null) { const newTaskContainer = { context, priority, from, to, tasks: [task], length: task.text.length, }; this.taskContainersStorage.add(newTaskContainer); container = newTaskContainer; } if (this.config.chunkSizeForInstantTranslate !== null && container.length >= this.config.chunkSizeForInstantTranslate) { this.addToTranslateQueue(container); } else { this.updateDelayForAddToTranslateQueue(container); } } updateDelayForAddToTranslateQueue(taskContainer) { // Flush timer if (this.timersMap.has(taskContainer)) { // Due to expectation run on one platform, timer objects will same always globalThis.clearTimeout(this.timersMap.get(taskContainer)); } this.timersMap.set(taskContainer, globalThis.setTimeout(() => { this.addToTranslateQueue(taskContainer); }, this.config.translatePoolDelay)); } addToTranslateQueue(taskContainer) { // Flush timer if (this.timersMap.has(taskContainer)) { // Due to expectation run on one platform, timer objects will same always globalThis.clearTimeout(this.timersMap.get(taskContainer)); this.timersMap.delete(taskContainer); } this.taskContainersStorage.delete(taskContainer); // Resort queue by priority each time to keep consistency this.translateQueue = this.translateQueue .concat(taskContainer) .sort((a, b) => a.priority - b.priority); if (!this.workerState) { this.runWorker().catch((error) => { throw error; }); } } runWorker() { return __awaiter(this, void 0, void 0, function* () { this.workerState = true; let firstIteration = true; // Daemon loop // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition while (true) { // Delay first iteration to await fill the queue, to consider priority better const workerHandleDelay = this.config.taskBatchHandleDelay; if (workerHandleDelay && firstIteration) { yield new Promise((res) => setTimeout(res, workerHandleDelay)); } firstIteration = false; const iterate = this.getItemFromTranslateQueue(); // Skip when queue empty if (iterate.done || iterate.value === null) break; const taskContainer = iterate.value; const free = yield this.semafor.take(); const textArray = taskContainer.tasks.map((i) => i.text); yield this.translator .translateBatch(textArray, taskContainer.from, taskContainer.to) .then((result) => { for (let index = 0; index < taskContainer.tasks.length; index++) { const task = taskContainer.tasks[index]; const translatedText = result[index]; if (translatedText !== null) { task.resolve(translatedText); } else { this.taskErrorHandler(task, new Error("Translator module can't translate this"), taskContainer.context, taskContainer.priority); } } }) .catch((reason) => { console.error(reason); for (const task of taskContainer.tasks) { this.taskErrorHandler(task, reason, taskContainer.context, taskContainer.priority); } }) .finally(free); } this.workerState = false; }); } taskErrorHandler(task, error, context, priority) { if (this.abortedContexts.has(context)) { task.reject(error); return; } if (task.attempt >= this.config.translateRetryAttemptLimit) { if (this.config.isAllowDirectTranslateBadChunks) { const { text, from, to, resolve, reject } = task; this.directTranslate(text, from, to).then(resolve, reject); } else { task.reject(error); } } else { this.addToTaskContainer(Object.assign(Object.assign({}, task), { attempt: task.attempt + 1, context, priority })); } } } //# sourceMappingURL=data:application/json;charset=utf8;base64,