il2cpp-dump-analyzer-mcp
Version:
Agentic RAG system for analyzing IL2CPP dump.cs files from Unity games
414 lines • 17 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.SupabaseIL2CPPVectorStore = void 0;
const supabase_js_1 = require("@supabase/supabase-js");
const documents_1 = require("@langchain/core/documents");
const crypto_1 = __importDefault(require("crypto"));
/**
* Vector store for IL2CPP code chunks using Supabase
* Fixed version with improved error handling and consistency
*/
class SupabaseIL2CPPVectorStore {
/**
* Initialize the Supabase vector store
* @param embeddings Embeddings instance to use
* @param supabaseUrl Supabase URL
* @param supabaseKey Supabase API key
* @param tableName Table name for vector storage
*/
constructor(embeddings, supabaseUrl, supabaseKey, tableName = 'il2cpp_documents') {
this.isInitialized = false;
this.initializationPromise = null;
this.embeddings = embeddings;
this.tableName = tableName;
// Get the dimensions from the embeddings model with fallback
this.dimensions = this.getDimensionsFromEmbeddings(embeddings);
// Create Supabase client
this.supabaseClient = (0, supabase_js_1.createClient)(supabaseUrl, supabaseKey);
console.log(`Initialized Supabase vector store with dimensions: ${this.dimensions}`);
}
/**
* Get dimensions from embeddings instance with proper fallback
*/
getDimensionsFromEmbeddings(embeddings) {
// Try different ways to get dimensions
if (typeof embeddings.getDimension === 'function') {
return embeddings.getDimension();
}
if (embeddings.dimensions) {
return embeddings.dimensions;
}
// Default to 384 for all-MiniLM-L6-v2 model
console.warn('Could not determine embedding dimensions, defaulting to 384');
return 384;
}
/**
* Ensure the vector store is properly initialized
*/
async ensureInitialized() {
if (this.isInitialized) {
return;
}
if (this.initializationPromise) {
return this.initializationPromise;
}
this.initializationPromise = this.initializeTable();
await this.initializationPromise;
this.isInitialized = true;
}
/**
* Initialize the Supabase table with the correct schema
*/
async initializeTable() {
try {
// First, check if the table exists and get its structure
const { data: tableInfo, error: tableError } = await this.supabaseClient
.from(this.tableName)
.select('id, content, metadata, embedding, document_hash')
.limit(1);
if (tableError) {
if (tableError.code === '42P01') {
console.log(`Table ${this.tableName} doesn't exist. Please run the SQL setup commands.`);
console.log('You can find the setup commands in supabase-setup.sql');
throw new Error(`Table ${this.tableName} does not exist. Please run the setup SQL commands.`);
}
else {
console.warn('Error checking table structure:', tableError);
throw tableError;
}
}
console.log(`Table ${this.tableName} exists and is accessible.`);
}
catch (error) {
console.error('Error initializing table:', error);
throw error;
}
}
/**
* Create a new instance of the vector store from texts
*/
static async fromTexts(texts, metadatas, embeddings, supabaseUrl, supabaseKey, tableName = 'il2cpp_documents') {
const vectorStore = new SupabaseIL2CPPVectorStore(embeddings, supabaseUrl, supabaseKey, tableName);
// Create documents from texts and metadata
const documents = texts.map((text, index) => {
return new documents_1.Document({
pageContent: text,
metadata: metadatas[index] || {},
});
});
// Add documents to the vector store
await vectorStore.addDocuments(documents);
return vectorStore;
}
/**
* Add documents to the vector store with improved error handling
*/
async addDocuments(documents) {
if (!documents || documents.length === 0) {
console.log('No documents to add.');
return;
}
await this.ensureInitialized();
try {
console.log(`Processing ${documents.length} documents for insertion...`);
// Generate document hashes for deduplication
const documentHashes = documents.map(doc => this.generateDocumentHash(doc));
// Check for existing documents
const { newDocuments, existingCount } = await this.filterExistingDocuments(documents, documentHashes);
if (newDocuments.length === 0) {
console.log(`All ${documents.length} documents already exist in the database. Skipping insertion.`);
return;
}
if (existingCount > 0) {
console.log(`Found ${existingCount} existing documents. Adding ${newDocuments.length} new documents.`);
}
else {
console.log(`Adding all ${newDocuments.length} documents to the database.`);
}
// Generate embeddings for new documents
const embeddings = await this.generateEmbeddings(newDocuments);
// Insert documents in batches
await this.insertDocumentsBatch(newDocuments, embeddings, documentHashes);
console.log(`Successfully added ${newDocuments.length} documents to the vector store.`);
}
catch (error) {
console.error('Error adding documents to Supabase:', error);
throw error;
}
}
/**
* Filter out documents that already exist in the database
*/
async filterExistingDocuments(documents, documentHashes) {
try {
const existingDocuments = new Set();
// Query in batches to avoid overwhelming the database
const hashBatchSize = 100;
for (let i = 0; i < documentHashes.length; i += hashBatchSize) {
const hashBatch = documentHashes.slice(i, i + hashBatchSize);
const { data, error } = await this.supabaseClient
.from(this.tableName)
.select('document_hash')
.in('document_hash', hashBatch);
if (error) {
console.warn('Error checking for existing documents:', error);
// Continue without deduplication if there's an error
break;
}
if (data && data.length > 0) {
data.forEach(item => existingDocuments.add(item.document_hash));
}
}
// Filter out existing documents
const newDocuments = [];
documents.forEach((doc, index) => {
const hash = documentHashes[index];
if (!existingDocuments.has(hash)) {
newDocuments.push(doc);
}
});
return {
newDocuments,
existingCount: documents.length - newDocuments.length
};
}
catch (error) {
console.warn('Error during deduplication check:', error);
console.log('Proceeding without deduplication');
return { newDocuments: documents, existingCount: 0 };
}
}
/**
* Generate embeddings for documents with proper validation
*/
async generateEmbeddings(documents) {
const texts = documents.map(doc => doc.pageContent);
const embeddings = await this.embeddings.embedDocuments(texts);
// Validate embeddings
if (embeddings.length !== documents.length) {
throw new Error(`Embedding count mismatch: expected ${documents.length}, got ${embeddings.length}`);
}
// Validate each embedding
const validatedEmbeddings = embeddings.map((embedding, index) => {
const validatedEmbedding = this.validateEmbedding(embedding, index);
if (validatedEmbedding.length !== this.dimensions) {
throw new Error(`Embedding dimension mismatch for document ${index}: expected ${this.dimensions}, got ${validatedEmbedding.length}`);
}
return validatedEmbedding;
});
return validatedEmbeddings;
}
/**
* Validate and normalize a single embedding
*/
validateEmbedding(embedding, index) {
let validatedEmbedding;
if (Array.isArray(embedding)) {
validatedEmbedding = embedding.map(val => Number(val));
}
else if (typeof embedding === 'object' && embedding !== null) {
// Handle case where embedding is an object (e.g., from some models)
validatedEmbedding = Object.values(embedding).map(val => Number(val));
}
else {
throw new Error(`Invalid embedding format for document ${index}: ${typeof embedding}`);
}
// Check for NaN values
if (validatedEmbedding.some(val => isNaN(val))) {
throw new Error(`Embedding contains NaN values for document ${index}`);
}
return validatedEmbedding;
}
/**
* Insert documents in batches with proper error handling
*/
async insertDocumentsBatch(documents, embeddings, allHashes) {
const batchSize = 20;
const totalBatches = Math.ceil(documents.length / batchSize);
console.log(`Inserting ${documents.length} documents in ${totalBatches} batches`);
for (let batchIndex = 0; batchIndex < totalBatches; batchIndex++) {
const startIdx = batchIndex * batchSize;
const endIdx = Math.min(startIdx + batchSize, documents.length);
const batch = documents.slice(startIdx, endIdx);
const batchEmbeddings = embeddings.slice(startIdx, endIdx);
const batchHashes = batch.map(doc => this.generateDocumentHash(doc));
await this.insertSingleBatch(batch, batchEmbeddings, batchHashes, batchIndex + 1, totalBatches);
}
}
/**
* Insert a single batch of documents
*/
async insertSingleBatch(batch, batchEmbeddings, batchHashes, batchNumber, totalBatches) {
const batchData = batch.map((doc, i) => ({
content: doc.pageContent,
metadata: doc.metadata || {},
embedding: batchEmbeddings[i],
document_hash: batchHashes[i]
}));
try {
// Use upsert with conflict resolution
const { error } = await this.supabaseClient
.from(this.tableName)
.upsert(batchData, {
onConflict: 'document_hash',
ignoreDuplicates: true
});
if (error) {
// Handle specific error cases
if (error.code === '23505') {
console.warn(`Batch ${batchNumber}/${totalBatches}: Some documents already exist (duplicate key)`);
}
else {
console.error(`Batch ${batchNumber}/${totalBatches}: Error inserting documents:`, error);
throw error;
}
}
else {
console.log(`Batch ${batchNumber}/${totalBatches}: Successfully inserted ${batch.length} documents`);
}
}
catch (error) {
console.error(`Batch ${batchNumber}/${totalBatches}: Failed to insert documents:`, error);
throw error;
}
}
/**
* Add code chunks to the vector store
*/
async addCodeChunks(chunks) {
const documents = chunks.map(chunk => new documents_1.Document({
pageContent: chunk.text,
metadata: chunk.metadata
}));
await this.addDocuments(documents);
}
/**
* Search for similar documents based on a query string
*/
async similaritySearch(query, k = 5) {
const results = await this.similaritySearchWithScore(query, k);
return results.map(([doc]) => doc);
}
/**
* Search for similar documents with scores
*/
async similaritySearchWithScore(query, k = 5) {
await this.ensureInitialized();
try {
// Generate embedding for the query
const queryEmbedding = await this.embeddings.embedQuery(query);
// Validate query embedding
const validatedQueryEmbedding = this.validateEmbedding(queryEmbedding, -1);
if (validatedQueryEmbedding.length !== this.dimensions) {
throw new Error(`Query embedding dimension mismatch: expected ${this.dimensions}, got ${validatedQueryEmbedding.length}`);
}
// Search for similar documents using the match_documents function
const { data, error } = await this.supabaseClient.rpc('match_documents', {
query_embedding: validatedQueryEmbedding,
match_threshold: 0.0,
match_count: k
});
if (error) {
console.error('Error searching for similar documents:', error);
if (error.code === '42883') {
throw new Error('The match_documents function does not exist. Please run the SQL setup commands.');
}
throw error;
}
// Convert results to documents with scores
return (data || []).map((item) => {
const doc = new documents_1.Document({
pageContent: item.content,
metadata: item.metadata || {}
});
return [doc, item.similarity || 0];
});
}
catch (error) {
console.error('Error in similarity search:', error);
throw error;
}
}
/**
* Get the total number of documents in the vector store
*/
async getDocumentCount() {
await this.ensureInitialized();
try {
const { count, error } = await this.supabaseClient
.from(this.tableName)
.select('*', { count: 'exact', head: true });
if (error) {
console.error('Error getting document count:', error);
throw error;
}
return count || 0;
}
catch (error) {
console.error('Error in getDocumentCount:', error);
throw error;
}
}
/**
* Delete all documents from the vector store
*/
async deleteAll() {
await this.ensureInitialized();
try {
const { error } = await this.supabaseClient
.from(this.tableName)
.delete()
.neq('id', 0); // Delete all rows
if (error) {
console.error('Error deleting all documents:', error);
throw error;
}
console.log('Successfully deleted all documents from the vector store.');
}
catch (error) {
console.error('Error in deleteAll:', error);
throw error;
}
}
/**
* Get the dimensionality of the embeddings
*/
getDimension() {
return this.dimensions;
}
/**
* Generate a unique hash for a document based on its content and metadata
*/
generateDocumentHash(document) {
// Create a deterministic string representation
const metadataStr = JSON.stringify(document.metadata || {}, Object.keys(document.metadata || {}).sort());
const contentToHash = `${document.pageContent}|${metadataStr}`;
// Generate SHA-256 hash
return crypto_1.default.createHash('sha256').update(contentToHash, 'utf8').digest('hex');
}
/**
* Check if the vector store is properly configured and accessible
*/
async healthCheck() {
try {
await this.ensureInitialized();
// Try to get document count as a health check
const count = await this.getDocumentCount();
return {
healthy: true,
message: `Vector store is healthy. Contains ${count} documents.`
};
}
catch (error) {
return {
healthy: false,
message: `Vector store health check failed: ${error instanceof Error ? error.message : 'Unknown error'}`
};
}
}
}
exports.SupabaseIL2CPPVectorStore = SupabaseIL2CPPVectorStore;
//# sourceMappingURL=supabase-vector-store-fixed.js.map