UNPKG

@gaiaverse/semantic-turning-point-detector

Version:

Detects key semantic turning points in conversations using recursive semantic distance analysis. Ideal for conversation analysis, dialogue segmentation, insight detection, and AI-assisted reasoning tasks.

66 lines 2.52 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.countTokens = countTokens; exports.generateEmbedding = generateEmbedding; exports.createEmbeddingCache = createEmbeddingCache; // src/tokenUtils.ts const gpt_tokenizer_1 = __importDefault(require("gpt-tokenizer")); const openai_1 = __importDefault(require("openai")); const dotenv_1 = __importDefault(require("dotenv")); const crypto_1 = __importDefault(require("crypto")); const lru_cache_1 = require("lru-cache"); dotenv_1.default.config(); /** * Count tokens in a given string using OpenAI-compatible tokenization. * * @param text - The text to tokenize * @param modelName - Optional model name to specify the tokenizer variant, though not used as the default is sufficient. * @returns Number of tokens */ function countTokens(text) { return gpt_tokenizer_1.default.encode(text).length; } /** * Generates an embedding for a given text using the OpenAI API * This provides the vector representation for semantic distance calculation */ async function generateEmbedding(text, model, cache) { // Create a hash-based cache key instead of using raw text const cacheKey = cache ? crypto_1.default .createHash('sha256') .update(`${model || 'default'}:${text}`) .digest('hex') : ''; // Check cache if provided if (cache && cacheKey) { const cached = cache.get(cacheKey); if (cached) return cached; } const openai = new openai_1.default({ apiKey: process.env.OPENAI_API_KEY }); const response = await openai.embeddings.create({ model: model, input: text, encoding_format: "float", }); const embedding = new Float32Array(response.data?.[0].embedding); // Store in cache if provided if (cache && cacheKey) { cache.set(cacheKey, embedding); } return embedding; } /** * Creates a new LRU cache for embeddings with RAM limit */ function createEmbeddingCache(ramLimitMB = 100, ttlSeconds = 600) { const embeddingSize = 3072 * 4; // text-embedding-3-large: 3072 dimensions * 4 bytes per float const maxEntries = Math.floor((ramLimitMB * 1024 * 1024) / embeddingSize); return new lru_cache_1.LRUCache({ max: maxEntries, ttl: ttlSeconds * 1000, // Convert seconds to milliseconds }); } //# sourceMappingURL=tokensUtil.js.map