@ocap/indexdb-prune
Version:
Tool to prune records from indexdb that do not exist in statedb
114 lines (89 loc) • 3.38 kB
JavaScript
/* eslint-disable no-console, import/no-unresolved */
require('dotenv-flow').config();
const BATCH_SIZE = 300;
const tables = ['tx', 'account', 'asset', 'delegation', 'token', 'factory', 'stake', 'rollup', 'rollupBlock'];
async function findRecordsToDelete(indexdb, statedb) {
// Initialize results object
const recordsToDelete = {
timestamp: new Date().toISOString(),
tables: {},
};
// List of tables to check
console.log(`Will check the following tables: ${tables.join(', ')}`);
// Process each table
for (let i = 0; i < tables.length; i++) {
const tableName = tables[i];
console.log(`Processing table: ${tableName}`);
const stateTable = statedb[tableName];
const indexTable = indexdb[tableName];
const uniqueIndex = stateTable.uniqIndex;
if (!recordsToDelete.tables[tableName]) {
recordsToDelete.tables[tableName] = [];
}
let searchAfter = null;
let hasMore = true;
let processedCount = 0;
// Use search_after pagination to query all records
while (hasMore) {
// Build query parameters
const searchBody = {
size: BATCH_SIZE,
query: { match_all: {} },
sort: [{ _id: 'asc' }],
};
if (searchAfter) {
searchBody.search_after = searchAfter;
}
// Execute query
const response = await indexTable.search({ body: searchBody });
const { hits } = response.hits;
if (!hits || hits.length === 0) {
hasMore = false;
break;
}
processedCount += hits.length;
// Only log every 2000 records for large tables
if (processedCount % 2000 === 0 || hits.length < BATCH_SIZE) {
console.log(`Checking ${tableName}: processed ${processedCount} records`);
}
// Get Elasticsearch record IDs
const indexIds = hits.map((hit) => hit._id);
// Check if these IDs exist in StateDB
const existingRecords = await statedb.driver(tableName).select(uniqueIndex).whereIn(uniqueIndex, indexIds);
// Create map of existing IDs
const stateExistenceMap = {};
existingRecords.forEach((record) => {
stateExistenceMap[record[uniqueIndex]] = true;
});
// Find records that need to be deleted
for (const hit of hits) {
const id = hit._id;
if (!stateExistenceMap[id]) {
recordsToDelete.tables[tableName].push(id);
}
}
// Update search_after to get next batch
if (hits.length > 0) {
const lastHit = hits[hits.length - 1];
searchAfter = lastHit.sort || [lastHit._id];
}
// If we got fewer records than requested, we've reached the end
if (hits.length < BATCH_SIZE) {
hasMore = false;
}
}
console.log(`Completed table ${tableName}: found ${recordsToDelete.tables[tableName].length} records to delete`);
}
// Output summary
console.log('\n----- SUMMARY OF RECORDS TO DELETE -----');
let totalRecords = 0;
Object.keys(recordsToDelete.tables).forEach((tableName) => {
const count = recordsToDelete.tables[tableName].length;
totalRecords += count;
console.log(`${tableName}: ${count} records`);
});
console.log(`Total: ${totalRecords} records to delete`);
console.log(`Details: ${JSON.stringify(recordsToDelete, null, 2)}`);
return recordsToDelete;
}
module.exports = { findRecordsToDelete };