repomix
Version:
A tool to pack repository contents to single file for AI consumption
62 lines (61 loc) • 2.1 kB
JavaScript
import { GptEncoding } from 'gpt-tokenizer/GptEncoding';
import { resolveEncodingAsync } from 'gpt-tokenizer/resolveEncodingAsync';
import { logger } from '../../shared/logger.js';
import { TOKEN_ENCODINGS } from './tokenEncodings.js';
export { TOKEN_ENCODINGS };
const PLAIN_TEXT_OPTIONS = { disallowedSpecial: new Set() };
const encodingModules = new Map();
const loadEncoding = async (encodingName) => {
const cached = encodingModules.get(encodingName);
if (cached) {
return cached;
}
const startTime = process.hrtime.bigint();
const bpeRanks = await resolveEncodingAsync(encodingName);
const encoder = GptEncoding.getEncodingApi(encodingName, () => bpeRanks);
const countFn = encoder.countTokens.bind(encoder);
encodingModules.set(encodingName, countFn);
const endTime = process.hrtime.bigint();
const initTime = Number(endTime - startTime) / 1e6;
logger.debug(`TokenCounter initialization for ${encodingName} took ${initTime.toFixed(2)}ms`);
return countFn;
};
export class TokenCounter {
countFn = null;
encodingName;
deps;
constructor(encodingName, deps = {
loadEncoding,
}) {
this.encodingName = encodingName;
this.deps = deps;
}
async init() {
this.countFn = await this.deps.loadEncoding(this.encodingName);
}
countTokens(content, filePath) {
if (!this.countFn) {
throw new Error('TokenCounter not initialized. Call init() first.');
}
try {
return this.countFn(content, PLAIN_TEXT_OPTIONS);
}
catch (error) {
let message = '';
if (error instanceof Error) {
message = error.message;
}
else {
message = String(error);
}
if (filePath) {
logger.warn(`Failed to count tokens. path: ${filePath}, error: ${message}`);
}
else {
logger.warn(`Failed to count tokens. error: ${message}`);
}
return 0;
}
}
free() { }
}