anylang
Version:
A translator's kit that uses the free APIs of Google Translate, Yandex, Bing, ChatGPT, and other LLMs
351 lines (349 loc) • 44.1 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", {
value: true
});
exports.Scheduler = void 0;
var _Semaphore = require("../utils/Semaphore");
var __awaiter = void 0 && (void 0).__awaiter || function (thisArg, _arguments, P, generator) {
function adopt(value) {
return value instanceof P ? value : new P(function (resolve) {
resolve(value);
});
}
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) {
try {
step(generator.next(value));
} catch (e) {
reject(e);
}
}
function rejected(value) {
try {
step(generator["throw"](value));
} catch (e) {
reject(e);
}
}
function step(result) {
result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected);
}
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
/**
* Module for scheduling and optimization of translate a text streams
*
* - It can union many translate requests to one
* - You can group any requests by context
* - It's configurable. You can set retry limit and edge for direct translate
*/
class Scheduler {
constructor(translator, config) {
this.config = {
translateRetryAttemptLimit: 2,
isAllowDirectTranslateBadChunks: true,
directTranslateLength: null,
translatePoolDelay: 300,
chunkSizeForInstantTranslate: null,
taskBatchHandleDelay: null
};
this.abortedContexts = new Set();
this.contextCounter = 0;
this.taskContainersStorage = new Set();
this.timersMap = new Map();
/**
* Tasks queue with items sorted by priority
* It must be handled from end to start
*/
this.translateQueue = [];
/**
* Return first item from queue and delete it from queue
* Items is sorted by priority
*/
this.getItemFromTranslateQueue = () => {
var _a;
return {
done: this.translateQueue.length === 0,
value: (_a = this.translateQueue.pop()) !== null && _a !== void 0 ? _a : null
};
};
this.workerState = false;
this.translator = translator;
this.config = Object.assign(Object.assign({}, this.config), config);
this.semafor = new _Semaphore.Semaphore({
timeout: translator.getRequestsTimeout()
});
}
// eslint-disable-next-line @typescript-eslint/require-await
abort(context) {
return __awaiter(this, void 0, void 0, function* () {
const abortTasks = tasks => {
tasks.forEach(task => {
task.reject(new Error('Translation is aborted in scheduler'));
});
};
// Clear tasks
for (const task of this.taskContainersStorage) {
if (context === task.context) {
this.taskContainersStorage.delete(task);
abortTasks(task.tasks);
}
}
// Remove tasks from translation queue
this.translateQueue = this.translateQueue.filter(task => {
// Abort and filter out matched tasks
if (context === task.context) {
abortTasks(task.tasks);
return false;
}
return true;
});
// TODO: abort even sent requests
// Abort in-flight translations
this.abortedContexts.add(context);
});
}
translate(text, from, to, options) {
return __awaiter(this, void 0, void 0, function* () {
const {
context = '',
priority = 0,
directTranslate: directTranslateForThisRequest = false
} = options !== null && options !== void 0 ? options : {};
if (this.translator.checkLimitExceeding(text) <= 0) {
// Direct translate
if (directTranslateForThisRequest || this.config.directTranslateLength !== null && text.length >= this.config.directTranslateLength) {
return this.directTranslate(text, from, to);
} else {
return this.makeTask({
text: text,
from,
to,
context,
priority
});
}
} else {
// Split text by words and translate
return this.splitAndTranslate(text, from, to, context, priority);
}
});
}
directTranslate(text, from, to) {
return __awaiter(this, void 0, void 0, function* () {
const free = yield this.semafor.take();
return this.translator.translate(text, from, to).finally(free);
});
}
splitAndTranslate(text, from, to, context, priority) {
const splittedText = [];
const charsetIndexes = [];
let wordsBuffer = '';
for (const textMatch of text.matchAll(/([^\s]+)(\s*)/g)) {
const newPart = textMatch[0];
const newBuffer = wordsBuffer + newPart;
// Add word to buffer if can
if (this.translator.checkLimitExceeding(newBuffer) <= 0) {
wordsBuffer = newBuffer;
continue;
}
// Write and clear buffer if not empthy
if (wordsBuffer.length > 0) {
splittedText.push(wordsBuffer);
wordsBuffer = '';
}
// Handle new part
if (this.translator.checkLimitExceeding(newPart) <= 0) {
// Add to buffer
wordsBuffer += newPart;
continue;
} else {
// Slice by chars
let charsBuffer = newPart;
while (charsBuffer.length > 0) {
const extraChars = this.translator.checkLimitExceeding(charsBuffer);
if (extraChars > 0) {
const offset = charsBuffer.length - extraChars;
// Write slice and remainder
splittedText.push(charsBuffer.slice(0, offset));
charsBuffer = charsBuffer.slice(offset);
charsetIndexes.push(splittedText.length - 1);
}
}
}
}
const ctxPrefix = context.length > 0 ? context + ';' : '';
return Promise.all(splittedText.map((text, index) => charsetIndexes.includes(index) ? text : this.makeTask({
text,
from,
to,
context: ctxPrefix + `text#${this.contextCounter++}`,
priority
}))).then(translatedParts => translatedParts.join(''));
}
makeTask({
text,
from,
to,
priority,
context = ''
}) {
return new Promise((resolve, reject) => {
this.addToTaskContainer({
text,
from,
to,
context,
priority,
resolve,
reject
});
});
}
addToTaskContainer(params) {
const {
text,
from,
to,
attempt = 0,
context = '',
priority,
resolve,
reject
} = params;
// create task
const task = {
text,
from,
to,
attempt,
resolve,
reject
};
let container = null;
// try add to exists container
for (const taskContainer of this.taskContainersStorage) {
// Skip containers with not equal parameters
if (['from', 'to', 'context', 'priority'].some(key => params[key] !== taskContainer[key])) continue;
// Lightweight check to overflow
// NOTE: Do strict check here if you need comply a limit contract
if (this.translator.getLengthLimit() >= taskContainer.length + task.text.length) {
taskContainer.tasks.push(task);
taskContainer.length += task.text.length;
container = taskContainer;
}
}
// make container
if (container === null) {
const newTaskContainer = {
context,
priority,
from,
to,
tasks: [task],
length: task.text.length
};
this.taskContainersStorage.add(newTaskContainer);
container = newTaskContainer;
}
if (this.config.chunkSizeForInstantTranslate !== null && container.length >= this.config.chunkSizeForInstantTranslate) {
this.addToTranslateQueue(container);
} else {
this.updateDelayForAddToTranslateQueue(container);
}
}
updateDelayForAddToTranslateQueue(taskContainer) {
// Flush timer
if (this.timersMap.has(taskContainer)) {
// Due to expectation run on one platform, timer objects will same always
globalThis.clearTimeout(this.timersMap.get(taskContainer));
}
this.timersMap.set(taskContainer, globalThis.setTimeout(() => {
this.addToTranslateQueue(taskContainer);
}, this.config.translatePoolDelay));
}
addToTranslateQueue(taskContainer) {
// Flush timer
if (this.timersMap.has(taskContainer)) {
// Due to expectation run on one platform, timer objects will same always
globalThis.clearTimeout(this.timersMap.get(taskContainer));
this.timersMap.delete(taskContainer);
}
this.taskContainersStorage.delete(taskContainer);
// Resort queue by priority each time to keep consistency
this.translateQueue = this.translateQueue.concat(taskContainer).sort((a, b) => a.priority - b.priority);
if (!this.workerState) {
this.runWorker().catch(error => {
throw error;
});
}
}
runWorker() {
return __awaiter(this, void 0, void 0, function* () {
this.workerState = true;
let firstIteration = true;
// Daemon loop
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
while (true) {
// Delay first iteration to await fill the queue, to consider priority better
const workerHandleDelay = this.config.taskBatchHandleDelay;
if (workerHandleDelay && firstIteration) {
yield new Promise(res => setTimeout(res, workerHandleDelay));
}
firstIteration = false;
const iterate = this.getItemFromTranslateQueue();
// Skip when queue empty
if (iterate.done || iterate.value === null) break;
const taskContainer = iterate.value;
const free = yield this.semafor.take();
const textArray = taskContainer.tasks.map(i => i.text);
yield this.translator.translateBatch(textArray, taskContainer.from, taskContainer.to).then(result => {
for (let index = 0; index < taskContainer.tasks.length; index++) {
const task = taskContainer.tasks[index];
const translatedText = result[index];
if (translatedText !== null) {
task.resolve(translatedText);
} else {
this.taskErrorHandler(task, new Error("Translator module can't translate this"), taskContainer.context, taskContainer.priority);
}
}
}).catch(reason => {
console.error(reason);
for (const task of taskContainer.tasks) {
this.taskErrorHandler(task, reason, taskContainer.context, taskContainer.priority);
}
}).finally(free);
}
this.workerState = false;
});
}
taskErrorHandler(task, error, context, priority) {
if (this.abortedContexts.has(context)) {
task.reject(error);
return;
}
if (task.attempt >= this.config.translateRetryAttemptLimit) {
if (this.config.isAllowDirectTranslateBadChunks) {
const {
text,
from,
to,
resolve,
reject
} = task;
this.directTranslate(text, from, to).then(resolve, reject);
} else {
task.reject(error);
}
} else {
this.addToTaskContainer(Object.assign(Object.assign({}, task), {
attempt: task.attempt + 1,
context,
priority
}));
}
}
}
exports.Scheduler = Scheduler;
//# sourceMappingURL=data:application/json;charset=utf8;base64,