UNPKG

@ocap/indexdb-prune

Version:

Tool to prune records from indexdb that do not exist in statedb

114 lines (89 loc) 3.38 kB
/* eslint-disable no-console, import/no-unresolved */ require('dotenv-flow').config(); const BATCH_SIZE = 300; const tables = ['tx', 'account', 'asset', 'delegation', 'token', 'factory', 'stake', 'rollup', 'rollupBlock']; async function findRecordsToDelete(indexdb, statedb) { // Initialize results object const recordsToDelete = { timestamp: new Date().toISOString(), tables: {}, }; // List of tables to check console.log(`Will check the following tables: ${tables.join(', ')}`); // Process each table for (let i = 0; i < tables.length; i++) { const tableName = tables[i]; console.log(`Processing table: ${tableName}`); const stateTable = statedb[tableName]; const indexTable = indexdb[tableName]; const uniqueIndex = stateTable.uniqIndex; if (!recordsToDelete.tables[tableName]) { recordsToDelete.tables[tableName] = []; } let searchAfter = null; let hasMore = true; let processedCount = 0; // Use search_after pagination to query all records while (hasMore) { // Build query parameters const searchBody = { size: BATCH_SIZE, query: { match_all: {} }, sort: [{ _id: 'asc' }], }; if (searchAfter) { searchBody.search_after = searchAfter; } // Execute query const response = await indexTable.search({ body: searchBody }); const { hits } = response.hits; if (!hits || hits.length === 0) { hasMore = false; break; } processedCount += hits.length; // Only log every 2000 records for large tables if (processedCount % 2000 === 0 || hits.length < BATCH_SIZE) { console.log(`Checking ${tableName}: processed ${processedCount} records`); } // Get Elasticsearch record IDs const indexIds = hits.map((hit) => hit._id); // Check if these IDs exist in StateDB const existingRecords = await statedb.driver(tableName).select(uniqueIndex).whereIn(uniqueIndex, indexIds); // Create map of existing IDs const stateExistenceMap = {}; existingRecords.forEach((record) => { stateExistenceMap[record[uniqueIndex]] = true; }); // Find records that need to be deleted for (const hit of hits) { const id = hit._id; if (!stateExistenceMap[id]) { recordsToDelete.tables[tableName].push(id); } } // Update search_after to get next batch if (hits.length > 0) { const lastHit = hits[hits.length - 1]; searchAfter = lastHit.sort || [lastHit._id]; } // If we got fewer records than requested, we've reached the end if (hits.length < BATCH_SIZE) { hasMore = false; } } console.log(`Completed table ${tableName}: found ${recordsToDelete.tables[tableName].length} records to delete`); } // Output summary console.log('\n----- SUMMARY OF RECORDS TO DELETE -----'); let totalRecords = 0; Object.keys(recordsToDelete.tables).forEach((tableName) => { const count = recordsToDelete.tables[tableName].length; totalRecords += count; console.log(`${tableName}: ${count} records`); }); console.log(`Total: ${totalRecords} records to delete`); console.log(`Details: ${JSON.stringify(recordsToDelete, null, 2)}`); return recordsToDelete; } module.exports = { findRecordsToDelete };