i18n-ai-translate
Version:
AI-powered localization CLI, Node library, and GitHub Action. Translate i18next JSON, Gettext PO, Java .properties, and iOS .strings with ChatGPT, Claude, Gemini, or local Ollama models.
156 lines (130 loc) • 5.42 kB
text/typescript
import { delay, printInfo } from "./utils";
const ONE_MINUTE_MS = 60_000;
export default class RateLimiter {
lastAPICall: number | null;
delayBetweenCallsMs: number;
verboseLogging: boolean;
/** Optional tokens-per-minute cap enforced alongside the RPM slot. */
tokensPerMinute?: number;
private nextSlot: number;
/**
* Sliding 60-second window of recent token consumption. Each entry
* is [unix-millis-at-consume-time, token-count]. Entries older than
* 60s are pruned on every acquire() call.
*/
private tokenWindow: Array<[number, number]>;
constructor(
delayBetweenCallsMs: number,
verboseLogging: boolean,
tokensPerMinute?: number,
) {
this.lastAPICall = null;
this.delayBetweenCallsMs = delayBetweenCallsMs;
this.verboseLogging = verboseLogging;
this.tokensPerMinute = tokensPerMinute;
this.nextSlot = 0;
this.tokenWindow = [];
}
/**
* Reserve the next RPM slot and, when configured, also wait for
* enough TPM headroom to fit the estimated token cost of the call
* about to be made. Concurrent callers reserve distinct slots
* synchronously; the TPM check runs afterward.
* @param estimatedTokens - tokens this call is expected to consume; 0 skips the TPM check
*/
async acquire(estimatedTokens = 0): Promise<void> {
// RPM slot reservation. The read-and-bump must happen in one
// synchronous turn — no await between — so concurrent callers
// each reserve a distinct slot spaced by delayBetweenCallsMs.
const now = Date.now();
const slot = Math.max(now, this.nextSlot);
this.nextSlot = slot + this.delayBetweenCallsMs;
this.lastAPICall = slot;
const timeToWait = slot - now;
if (timeToWait > 0) {
if (this.verboseLogging) {
printInfo(
`\nRateLimiter | Waiting ${timeToWait}ms before next API call`,
);
}
await delay(timeToWait);
}
if (this.tokensPerMinute && estimatedTokens > 0) {
await this.awaitTokenBudget(estimatedTokens);
}
}
/**
* TPM enforcement. Prunes stale entries, and if adding this call
* would exceed the cap, waits until the oldest entry is old enough
* to fall out of the 60-second window.
*/
private async awaitTokenBudget(estimatedTokens: number): Promise<void> {
if (!this.tokensPerMinute) return;
// Paying more than the minute-wide cap in a single call is
// impossible to satisfy; still consume it, but log loudly so
// the user knows it'll likely 429.
if (estimatedTokens >= this.tokensPerMinute) {
if (this.verboseLogging) {
printInfo(
`\nRateLimiter | Single call estimated at ${estimatedTokens} tokens exceeds TPM cap of ${this.tokensPerMinute}; dispatching anyway`,
);
}
this.tokenWindow.push([Date.now(), estimatedTokens]);
return;
}
// Potentially loop: the wait might have to repeat if other
// concurrent callers consumed the budget while we were asleep.
// Each iteration prunes first, then either fires or sleeps
// until the oldest relevant entry ages out.
// eslint-disable-next-line no-constant-condition
while (true) {
const now = Date.now();
this.tokenWindow = this.tokenWindow.filter(
([ts]) => now - ts < ONE_MINUTE_MS,
);
const inFlight = this.tokenWindow.reduce(
(sum, [, t]) => sum + t,
0,
);
if (inFlight + estimatedTokens <= this.tokensPerMinute) {
this.tokenWindow.push([now, estimatedTokens]);
return;
}
// Sleep until the earliest in-window entry falls out, then
// re-check. The window is sorted by insertion order so
// tokenWindow[0] is the oldest.
const oldest = this.tokenWindow[0][0];
const waitMs = Math.max(1, ONE_MINUTE_MS - (now - oldest));
if (this.verboseLogging) {
printInfo(
`\nRateLimiter | TPM cap reached (${inFlight}/${this.tokensPerMinute}), waiting ${waitMs}ms for budget`,
);
}
// eslint-disable-next-line no-await-in-loop
await delay(waitMs);
}
}
penalize(penaltyMs: number): void {
// A 429 from any worker pushes the shared slot forward so every
// other in-flight worker backs off too.
const target = Date.now() + penaltyMs;
if (target > this.nextSlot) this.nextSlot = target;
}
apiCalled(): void {
this.lastAPICall = Date.now();
}
async wait(): Promise<void> {
if (this.lastAPICall) {
const timeToWait =
this.delayBetweenCallsMs - (Date.now() - this.lastAPICall);
if (timeToWait > 0) {
if (this.verboseLogging) {
printInfo(
`\nRateLimiter | Waiting ${timeToWait}ms before next API call`,
);
}
await delay(timeToWait);
}
}
}
}