UNPKG

il2cpp-dump-analyzer-mcp

Version:

Agentic RAG system for analyzing IL2CPP dump.cs files from Unity games

374 lines 17.6 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.SupabaseIL2CPPVectorStore = void 0; const supabase_js_1 = require("@supabase/supabase-js"); const documents_1 = require("@langchain/core/documents"); const crypto_1 = __importDefault(require("crypto")); /** * Vector store for IL2CPP code chunks using Supabase */ class SupabaseIL2CPPVectorStore { /** * Initialize the Supabase vector store * @param embeddings Embeddings instance to use * @param supabaseUrl Supabase URL * @param supabaseKey Supabase API key * @param tableName Table name for vector storage */ constructor(embeddings, supabaseUrl, supabaseKey, tableName = 'il2cpp_documents') { this.embeddings = embeddings; this.tableName = tableName; // Get the dimensions from the embeddings model this.dimensions = embeddings.getDimension?.() || 384; // Create Supabase client with Docker-compatible configuration this.supabaseClient = (0, supabase_js_1.createClient)(supabaseUrl, supabaseKey, { auth: { persistSession: false, autoRefreshToken: false, detectSessionInUrl: false }, // Remove problematic header overrides for Docker networking global: { headers: { 'Content-Type': 'application/json' } } }); console.log(`Initialized Supabase vector store with dimensions: ${this.dimensions}`); // Initialize the table structure this.initializeTable().catch(error => { console.error('Error initializing Supabase table:', error); }); } /** * Initialize the Supabase table with the correct schema * This ensures the document_hash column exists for deduplication */ async initializeTable() { try { // Check if the table exists const { error: checkError } = await this.supabaseClient .from(this.tableName) .select('id') .limit(1); // If the table doesn't exist, we'll need to rely on Supabase's auto-creation // when we first insert data, since we may not have direct SQL execution privileges if (checkError && checkError.code === '42P01') { console.log(`Table ${this.tableName} doesn't exist. It will be created automatically on first insert.`); // We'll add document_hash when inserting data } } catch (error) { console.warn('Error checking table structure:', error); console.log('Will attempt to use table as-is or it will be created on first insert'); } } /** * Create a new instance of the vector store from texts * @param texts Array of texts * @param metadatas Array of metadata objects * @param embeddings Embeddings instance * @param supabaseUrl Supabase URL * @param supabaseKey Supabase API key * @param tableName Table name * @returns New SupabaseIL2CPPVectorStore instance */ static async fromTexts(texts, metadatas, embeddings, supabaseUrl, supabaseKey, tableName = 'il2cpp_documents') { // Create the vector store instance const vectorStore = new SupabaseIL2CPPVectorStore(embeddings, supabaseUrl, supabaseKey, tableName); // Create documents from texts and metadata const documents = texts.map((text, index) => { return new documents_1.Document({ pageContent: text, metadata: metadatas[index] || {}, }); }); // Add documents to the vector store await vectorStore.addDocuments(documents); return vectorStore; } /** * Add documents to the vector store * @param documents Array of documents to add */ async addDocuments(documents) { try { // Generate document hashes for deduplication const documentHashes = documents.map(doc => this.generateDocumentHash(doc)); // Track which documents to add let newDocuments = [...documents]; let existingCount = 0; try { // Check which documents already exist in the database const existingDocuments = new Set(); // Query in batches to avoid overwhelming the database const hashBatchSize = 100; for (let i = 0; i < documentHashes.length; i += hashBatchSize) { const hashBatch = documentHashes.slice(i, i + hashBatchSize); // Check for existing documents with these hashes const { data, error } = await this.supabaseClient .from(this.tableName) .select('document_hash') .in('document_hash', hashBatch); if (error) { if (error.code === '42P01') { // Table doesn't exist yet, so no documents exist console.log(`Table ${this.tableName} doesn't exist yet. All documents will be added.`); break; } else { console.warn('Error checking for existing documents:', error); } } else if (data && data.length > 0) { // Add existing hashes to the set data.forEach(item => existingDocuments.add(item.document_hash)); } } if (existingDocuments.size > 0) { // Filter out documents that already exist const filteredDocuments = []; documents.forEach((doc, index) => { const hash = documentHashes[index]; if (!existingDocuments.has(hash)) { filteredDocuments.push(doc); } }); existingCount = documents.length - filteredDocuments.length; newDocuments = filteredDocuments; } } catch (error) { console.warn('Error during deduplication check:', error); console.log('Will proceed with adding all documents and rely on unique constraint'); } // If all documents already exist, we're done if (newDocuments.length === 0) { console.log(`All ${documents.length} documents already exist in the database. Skipping insertion.`); return; } if (existingCount > 0) { console.log(`Found ${existingCount} existing documents. Adding ${newDocuments.length} new documents.`); } else { console.log(`Adding all ${newDocuments.length} documents to the database.`); } // Generate embeddings for documents const texts = newDocuments.map(doc => doc.pageContent); const embeddings = await this.embeddings.embedDocuments(texts); // Verify embedding format if (embeddings.length > 0) { const firstEmbedding = embeddings[0]; console.log(`Embedding type: ${typeof firstEmbedding}, isArray: ${Array.isArray(firstEmbedding)}`); console.log(`First embedding length: ${firstEmbedding.length}`); console.log(`First few values: ${firstEmbedding.slice(0, 5)}`); // Ensure embeddings are arrays of numbers const validatedEmbeddings = embeddings.map(emb => { if (typeof emb === 'object' && !Array.isArray(emb)) { // Convert object to array if needed return Object.values(emb).map(val => Number(val)); } return emb; }); // Use validated embeddings embeddings.splice(0, embeddings.length, ...validatedEmbeddings); } // Process in batches to avoid overwhelming the database const batchSize = 20; const totalBatches = Math.ceil(newDocuments.length / batchSize); console.log(`Adding ${newDocuments.length} documents to Supabase in ${totalBatches} batches`); for (let batchIndex = 0; batchIndex < totalBatches; batchIndex++) { const startIdx = batchIndex * batchSize; const endIdx = Math.min(startIdx + batchSize, newDocuments.length); const batch = newDocuments.slice(startIdx, endIdx); const batchEmbeddings = embeddings.slice(startIdx, endIdx); const batchHashes = batch.map(doc => this.generateDocumentHash(doc)); // Prepare batch data const batchData = batch.map((doc, i) => ({ content: doc.pageContent, metadata: doc.metadata, embedding: batchEmbeddings[i], document_hash: batchHashes[i] })); // Insert batch with upsert option (on conflict do nothing) const { error } = await this.supabaseClient .from(this.tableName) .upsert(batchData, { onConflict: 'document_hash', ignoreDuplicates: true }); if (error) { // If upsert fails (e.g., document_hash column doesn't exist yet), try regular insert if (error.code === '42703') { // undefined_column error console.log(`Column 'document_hash' doesn't exist yet. Trying regular insert...`); // Try without document_hash for the first batch if (batchIndex === 0) { const simpleBatchData = batch.map((doc, i) => ({ content: doc.pageContent, metadata: doc.metadata, embedding: batchEmbeddings[i] })); const { error: insertError } = await this.supabaseClient .from(this.tableName) .insert(simpleBatchData); if (insertError) { console.error(`Batch ${batchIndex + 1}/${totalBatches}: Error inserting documents:`, JSON.stringify(insertError, null, 2)); console.error(`Error code: ${insertError.code}, message: ${insertError.message || 'No message'}`); console.error(`Error details: ${insertError.details || 'No details'}`); } else { console.log(`Batch ${batchIndex + 1}/${totalBatches}: Successfully added ${batch.length} documents`); } } else { // For subsequent batches, we'll include document_hash since the first batch should have created the table const { error: insertError } = await this.supabaseClient .from(this.tableName) .insert(batchData); if (insertError) { console.error(`Batch ${batchIndex + 1}/${totalBatches}: Error inserting documents:`, JSON.stringify(insertError, null, 2)); console.error(`Error code: ${insertError.code}, message: ${insertError.message || 'No message'}`); console.error(`Error details: ${insertError.details || 'No details'}`); } else { console.log(`Batch ${batchIndex + 1}/${totalBatches}: Successfully added ${batch.length} documents`); } } } else if (error.code === '23505') { // unique_violation console.warn(`Batch ${batchIndex + 1}/${totalBatches}: Duplicate key violation, some documents already exist`); } else { console.error(`Batch ${batchIndex + 1}/${totalBatches}: Error inserting documents:`, JSON.stringify(error, null, 2)); console.error(`Error code: ${error.code}, message: ${error.message || 'No message'}`); console.error(`Error details: ${error.details || 'No details'}`); } } else { console.log(`Batch ${batchIndex + 1}/${totalBatches}: Successfully added ${batch.length} documents`); } } } catch (error) { console.error('Error adding documents to Supabase:', error); throw error; } } /** * Add code chunks to the vector store * @param chunks Array of code chunks to add */ async addCodeChunks(chunks) { const documents = chunks.map(chunk => new documents_1.Document({ pageContent: chunk.text, metadata: chunk.metadata })); await this.addDocuments(documents); } /** * Search for similar documents based on a query string * @param query Query string * @param k Number of results to return * @returns Array of documents with similarity scores */ async similaritySearch(query, k = 5) { // Get results with scores const results = await this.similaritySearchWithScore(query, k); // Return just the documents return results.map(([doc]) => doc); } /** * Search for similar documents with scores * @param query Query string * @param k Number of results to return * @returns Array of documents with similarity scores */ async similaritySearchWithScore(query, k = 5) { try { // Generate embedding for the query const queryEmbedding = await this.embeddings.embedQuery(query); // Search for similar documents using the match_documents function const { data, error } = await this.supabaseClient.rpc('match_documents', { query_embedding: queryEmbedding, match_threshold: 0.0, // Return all results and filter by k match_count: k }); if (error) { console.error('Error searching for similar documents:', error); return []; } // Convert results to documents with scores return (data || []).map((item) => { const doc = new documents_1.Document({ pageContent: item.content, metadata: item.metadata }); return [doc, item.similarity]; }); } catch (error) { console.error('Error in similarity search:', error); return []; } } /** * Get the total number of documents in the vector store * @returns Number of documents */ async getDocumentCount() { try { const { count, error } = await this.supabaseClient .from(this.tableName) .select('*', { count: 'exact', head: true }); if (error) { console.error('Error getting document count:', error); return 0; } return count || 0; } catch (error) { console.error('Error in getDocumentCount:', error); return 0; } } /** * Delete all documents from the vector store */ async deleteAll() { try { const { error } = await this.supabaseClient .from(this.tableName) .delete() .neq('id', 0); // Delete all rows if (error) { console.error('Error deleting all documents:', error); throw error; } } catch (error) { console.error('Error in deleteAll:', error); throw error; } } /** * Get the dimensionality of the embeddings * @returns The number of dimensions in the embedding vectors */ getDimension() { return this.dimensions; } /** * Generate a unique hash for a document based on its content and metadata * @param document Document to generate hash for * @returns SHA-256 hash of the document content and metadata */ generateDocumentHash(document) { // Create a string representation of the document that includes content and metadata const metadataStr = JSON.stringify(document.metadata || {}); const contentToHash = `${document.pageContent}|${metadataStr}`; // Generate SHA-256 hash return crypto_1.default.createHash('sha256').update(contentToHash).digest('hex'); } } exports.SupabaseIL2CPPVectorStore = SupabaseIL2CPPVectorStore; //# sourceMappingURL=supabase-vector-store.js.map