UNPKG

repomix

Version:

A tool to pack repository contents to single file for AI consumption

62 lines (61 loc) 2.1 kB
import { GptEncoding } from 'gpt-tokenizer/GptEncoding'; import { resolveEncodingAsync } from 'gpt-tokenizer/resolveEncodingAsync'; import { logger } from '../../shared/logger.js'; import { TOKEN_ENCODINGS } from './tokenEncodings.js'; export { TOKEN_ENCODINGS }; const PLAIN_TEXT_OPTIONS = { disallowedSpecial: new Set() }; const encodingModules = new Map(); const loadEncoding = async (encodingName) => { const cached = encodingModules.get(encodingName); if (cached) { return cached; } const startTime = process.hrtime.bigint(); const bpeRanks = await resolveEncodingAsync(encodingName); const encoder = GptEncoding.getEncodingApi(encodingName, () => bpeRanks); const countFn = encoder.countTokens.bind(encoder); encodingModules.set(encodingName, countFn); const endTime = process.hrtime.bigint(); const initTime = Number(endTime - startTime) / 1e6; logger.debug(`TokenCounter initialization for ${encodingName} took ${initTime.toFixed(2)}ms`); return countFn; }; export class TokenCounter { countFn = null; encodingName; deps; constructor(encodingName, deps = { loadEncoding, }) { this.encodingName = encodingName; this.deps = deps; } async init() { this.countFn = await this.deps.loadEncoding(this.encodingName); } countTokens(content, filePath) { if (!this.countFn) { throw new Error('TokenCounter not initialized. Call init() first.'); } try { return this.countFn(content, PLAIN_TEXT_OPTIONS); } catch (error) { let message = ''; if (error instanceof Error) { message = error.message; } else { message = String(error); } if (filePath) { logger.warn(`Failed to count tokens. path: ${filePath}, error: ${message}`); } else { logger.warn(`Failed to count tokens. error: ${message}`); } return 0; } } free() { } }