@gaiaverse/semantic-turning-point-detector
Version:
Detects key semantic turning points in conversations using recursive semantic distance analysis. Ideal for conversation analysis, dialogue segmentation, insight detection, and AI-assisted reasoning tasks.
66 lines • 2.52 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.countTokens = countTokens;
exports.generateEmbedding = generateEmbedding;
exports.createEmbeddingCache = createEmbeddingCache;
// src/tokenUtils.ts
const gpt_tokenizer_1 = __importDefault(require("gpt-tokenizer"));
const openai_1 = __importDefault(require("openai"));
const dotenv_1 = __importDefault(require("dotenv"));
const crypto_1 = __importDefault(require("crypto"));
const lru_cache_1 = require("lru-cache");
dotenv_1.default.config();
/**
* Count tokens in a given string using OpenAI-compatible tokenization.
*
* @param text - The text to tokenize
* @param modelName - Optional model name to specify the tokenizer variant, though not used as the default is sufficient.
* @returns Number of tokens
*/
function countTokens(text) {
return gpt_tokenizer_1.default.encode(text).length;
}
/**
* Generates an embedding for a given text using the OpenAI API
* This provides the vector representation for semantic distance calculation
*/
async function generateEmbedding(text, model, cache) {
// Create a hash-based cache key instead of using raw text
const cacheKey = cache ? crypto_1.default
.createHash('sha256')
.update(`${model || 'default'}:${text}`)
.digest('hex') : '';
// Check cache if provided
if (cache && cacheKey) {
const cached = cache.get(cacheKey);
if (cached)
return cached;
}
const openai = new openai_1.default({ apiKey: process.env.OPENAI_API_KEY });
const response = await openai.embeddings.create({
model: model,
input: text,
encoding_format: "float",
});
const embedding = new Float32Array(response.data?.[0].embedding);
// Store in cache if provided
if (cache && cacheKey) {
cache.set(cacheKey, embedding);
}
return embedding;
}
/**
* Creates a new LRU cache for embeddings with RAM limit
*/
function createEmbeddingCache(ramLimitMB = 100, ttlSeconds = 600) {
const embeddingSize = 3072 * 4; // text-embedding-3-large: 3072 dimensions * 4 bytes per float
const maxEntries = Math.floor((ramLimitMB * 1024 * 1024) / embeddingSize);
return new lru_cache_1.LRUCache({
max: maxEntries,
ttl: ttlSeconds * 1000, // Convert seconds to milliseconds
});
}
//# sourceMappingURL=tokensUtil.js.map