UNPKG

kotak-payment-gateway

Version:

Kotak Bank Payment Gateway - Automated payment verification using bank statement scraping

966 lines (831 loc) 39.7 kB
/** * Kotak Bank Statement Scraper - Puppeteer Version * Maintains logged-in state and handles CSV file management automatically. */ const puppeteer = require('puppeteer'); const fs = require('fs-extra'); const path = require('path'); const glob = require('glob'); const { fetchKotakOtp } = require('./config'); const { time } = require('console'); class KotakScraper { /** * Single, clean Kotak Bank scraper with session management. */ constructor(username, password, emailConfig = {}, headless = false) { this.username = username; this.password = password; this.emailConfig = emailConfig; this.headless = headless; this.browser = null; this.page = null; this.isLoggedIn = false; this.keepAliveInterval = null; this.downloadPath = process.cwd(); this.loginInProgress = false; this.downloadInProgress = false; this.setupLogging(); } setupLogging() { /** * Setup simple logging. */ this.logger = { info: (msg) => console.log(`INFO: ${msg}`), error: (msg) => console.error(`ERROR: ${msg}`), warning: (msg) => console.warn(`WARNING: ${msg}`) }; } async setupBrowser() { /** * Setup Puppeteer browser with download preferences. */ const browserOptions = { headless: this.headless ? 'new' : false, defaultViewport: { width: 1920, height: 1080 }, args: [ '--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-web-security', '--disable-features=VizDisplayCompositor', '--disable-blink-features=AutomationControlled', '--disable-extensions', '--no-first-run', '--disable-default-apps', '--disable-background-timer-throttling', '--disable-backgrounding-occluded-windows', '--disable-renderer-backgrounding' ] }; this.browser = await puppeteer.launch(browserOptions); // Use the default page that comes with the browser instead of creating a new one const pages = await this.browser.pages(); this.page = pages[0]; // Set download behavior const client = await this.page.target().createCDPSession(); const resolvedDownloadPath = path.resolve(this.downloadPath); this.logger.info(`Setting browser download path to: ${resolvedDownloadPath}`); await client.send('Page.setDownloadBehavior', { behavior: 'allow', downloadPath: resolvedDownloadPath }); // Set user agent and viewport await this.page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36'); // Remove automation indicators await this.page.evaluateOnNewDocument(() => { Object.defineProperty(navigator, 'webdriver', { get: () => undefined, }); }); // Set extra headers await this.page.setExtraHTTPHeaders({ 'Accept-Language': 'en-US,en;q=0.9' }); } async recreateBrowser() { /** * Recreate the browser if it becomes invalid. */ try { this.logger.info("Recreating browser..."); // Stop keep-alive if (this.keepAliveInterval) { clearInterval(this.keepAliveInterval); this.keepAliveInterval = null; } // Close existing browser if (this.browser) { try { // Force close all pages first const pages = await this.browser.pages(); for (const page of pages) { try { await page.close(); } catch (error) { // Ignore individual page close errors } } await this.browser.close(); } catch (error) { // Ignore errors when closing invalid browser this.logger.warning(`Error closing browser: ${error.message}`); } } // Reset state (but preserve loginInProgress if login is in progress) this.isLoggedIn = false; // Don't reset loginInProgress here as it might be managed by login method this.browser = null; this.page = null; // Wait a moment before creating new browser await new Promise(resolve => setTimeout(resolve, 2000)); // Create new browser await this.setupBrowser(); this.logger.info("Browser recreated successfully"); } catch (error) { this.logger.error(`Error recreating browser: ${error}`); throw error; } } async login() { /** * Login to Kotak Net Banking. */ // Prevent concurrent login attempts with timeout if (this.loginInProgress) { this.logger.info("Login already in progress, waiting..."); const startWait = Date.now(); const maxWaitTime = 120000; // 2 minutes timeout while (this.loginInProgress) { if (Date.now() - startWait > maxWaitTime) { this.logger.warning("Login wait timeout reached, forcing reset"); this.loginInProgress = false; break; } await new Promise(resolve => setTimeout(resolve, 1000)); } return this.isLoggedIn; } this.loginInProgress = true; try { // Check if we need to recreate the browser if (!await this.isSessionActive()) { try { if (this.page) { await this.page.url(); } } catch (error) { if (error.message.includes('Target closed') || error.message.includes('Session closed') || error.message.includes('Requesting main frame too early')) { await this.recreateBrowser(); } } } this.logger.info("Logging in to Kotak Net Banking..."); // Navigate to login page with better error handling try { await this.page.goto("https://netbanking.kotak.com/knb2/#/login", { waitUntil: 'networkidle2', timeout: 60000 }); // Wait for the page to be fully interactive await this.page.waitForFunction(() => document.readyState === 'complete', { timeout: 30000 }); } catch (error) { this.logger.error(`Failed to navigate to login page: ${error.message}`); return false; } // Wait for and enter credentials with better error handling try { this.logger.info("Waiting for login form..."); await this.page.waitForSelector('#userName', { visible: true, timeout: 15000 }); await this.page.waitForSelector('#credentialInputField', { visible: true, timeout: 15000 }); // Clear existing values and enter credentials with proper delay await this.page.evaluate(() => { const userField = document.querySelector('#userName'); if (userField) userField.value = ''; const passField = document.querySelector('#credentialInputField'); if (passField) passField.value = ''; }); await this.page.focus('#userName'); await this.page.type('#userName', this.username, { delay: 50 }); await this.page.focus('#credentialInputField'); await this.page.type('#credentialInputField', this.password, { delay: 50 }); } catch (error) { this.logger.error(`Failed to enter credentials: ${error.message}`); return false; } // Click login button with better targeting let currenttime = new Date(); try { this.logger.info("Clicking login button..."); const loginButtonSelector = 'button.btn.btn-primary.float-right.marb16.btnVertualsubmit.mt-3'; await this.page.waitForSelector(loginButtonSelector, { visible: true, timeout: 10000 }); const loginButtons = await this.page.$$(loginButtonSelector); let loginClicked = false; for (const button of loginButtons) { const buttonText = await this.page.evaluate(el => el.textContent.trim(), button); if (buttonText === "Secure login") { currenttime = new Date(); await button.click(); loginClicked = true; break; } } if (!loginClicked) { this.logger.error("Login button not found"); return false; } } catch (error) { this.logger.error(`Failed to click login button: ${error.message}`); return false; } // Handle OTP with better error handling try { this.logger.info("Fetching OTP from email..."); const otp = await fetchKotakOtp(this.emailConfig , currenttime); if (!otp) { this.logger.error("No OTP received from email - login will fail"); return false; } this.logger.info(`OTP received: ${otp}`); // Wait for OTP input field await this.page.waitForSelector('#otpMobile', { visible: true, timeout: 15000 }); // Clear and enter OTP with proper delay await this.page.evaluate(() => { const otpField = document.querySelector('#otpMobile'); if (otpField) otpField.value = ''; }); await this.page.focus('#otpMobile'); await this.page.type('#otpMobile', otp, { delay: 100 }); // Submit OTP const otpSubmitSelector = 'button.btn.btn-primary.float-right.btn-mar-right.ng-star-inserted'; await this.page.waitForSelector(otpSubmitSelector, { visible: true, timeout: 10000 }); const otpSubmit = await this.page.$(otpSubmitSelector); await otpSubmit.click(); // Verify login success by checking for dashboard elements try { await this.page.waitForSelector('span.nav-item.hidemenu.ng-star-inserted', { visible: true, timeout: 10000 }); this.isLoggedIn = true; this.startKeepAlive(); this.logger.info("Login successful!"); return true; } catch (error) { this.logger.error("Login verification failed - dashboard not loaded"); return false; } } catch (error) { this.logger.error(`OTP handling failed: ${error.message}`); return false; } } catch (error) { this.logger.error(`Login failed: ${error}`); return false; } finally { // Always reset loginInProgress flag regardless of success/failure if (this.loginInProgress) { this.logger.info("Resetting loginInProgress flag"); this.loginInProgress = false; } } } async isSessionActive() { /** * Check if session is still active. */ try { if (!this.browser || !this.page) { return false; } // Test if the page is still valid try { await this.page.url(); } catch (error) { if (error.message.includes('Target closed') || error.message.includes('Session closed') || error.message.includes('Requesting main frame too early')) { this.logger.warning("Browser session is invalid"); return false; } throw error; } // Check for login form elements (indicates logged out) try { const loginElements = await this.page.$('#userName'); if (loginElements) { const isVisible = await this.page.evaluate(el => { return el && el.offsetParent !== null; }, loginElements); if (isVisible) { return false; } } } catch (error) { // If we can't check login elements, assume session is invalid this.logger.warning(`Cannot check login elements: ${error.message}`); return false; } // Check for logged-in navigation elements try { const navElements = await this.page.$$('span.nav-item.hidemenu.ng-star-inserted'); if (navElements && navElements.length > 0) { return true; } } catch (error) { this.logger.warning(`Cannot check nav elements: ${error.message}`); return false; } // Check for session timeout messages try { const pageContent = await this.page.content(); const timeoutIndicators = ["session expired", "login again", "timeout", "logged out"]; for (const indicator of timeoutIndicators) { if (pageContent.toLowerCase().includes(indicator)) { return false; } } } catch (error) { this.logger.warning(`Cannot check page content: ${error.message}`); return false; } return true; } catch (error) { this.logger.warning(`Error checking session: ${error}`); return false; } } startKeepAlive() { /** * Start keep-alive interval. */ if (this.keepAliveInterval) { return; } this.keepAliveInterval = setInterval(async () => { try { // Click between tabs to keep session alive const tabs = [ "//li[contains(@class, 'list-item-wrapper') and contains(text(), 'Recent Transactions')]", "//li[contains(@class, 'list-item-wrapper') and contains(text(), 'Annual Account Statements')]" ]; const randomTab = tabs[Math.floor(Math.random() * tabs.length)]; try { const element = await this.page.$x(randomTab); if (element && element.length > 0) { await element[0].click(); await this.page.waitForTimeout(1000); } } catch (error) { // Ignore errors during keep-alive } } catch (error) { // Ignore errors during keep-alive } }, Math.floor(Math.random() * (60000 - 45000)) + 45000); // 45-60 seconds this.logger.info("Keep-alive started"); } async cleanupCsvFiles() { /** * Delete existing CSV files in download directory. */ try { // Check if CSV processing is in progress before cleanup if (isProcessingInProgress()) { this.logger.info("CSV processing is in progress, waiting before cleanup..."); let waitTime = 0; const maxWaitTime = 30000; // Wait max 30 seconds while (isProcessingInProgress() && waitTime < maxWaitTime) { await new Promise(resolve => setTimeout(resolve, 1000)); waitTime += 1000; if (waitTime % 5000 === 0) { // Log every 5 seconds this.logger.info(`Still waiting for CSV processing to complete... (${waitTime/1000}s)`); } } if (isProcessingInProgress()) { this.logger.warning("CSV processing is taking too long, proceeding with caution..."); } else { this.logger.info("CSV processing completed, proceeding with cleanup"); } } this.logger.info(`Cleaning up CSV files in: ${this.downloadPath}`); // Use forward slashes for glob pattern (works on Windows too) const csvPattern = path.join(this.downloadPath, "*.csv").replace(/\\/g, '/'); this.logger.info(`Looking for CSV files with pattern: ${csvPattern}`); // Try glob with forward slashes let csvFiles = glob.sync(csvPattern); // If no files found on Windows, try native path format if (csvFiles.length === 0 && process.platform === 'win32') { this.logger.info("No files found with forward slash pattern, trying Windows native pattern..."); const backslashPattern = path.join(this.downloadPath, "*.csv"); csvFiles = glob.sync(backslashPattern); this.logger.info(`Windows native pattern result: ${csvFiles.length} files`); } // If still no files, try fs.readdir approach if (csvFiles.length === 0) { this.logger.info("Trying fs.readdir approach..."); try { const files = await fs.readdir(this.downloadPath); csvFiles = files .filter(file => file.toLowerCase().endsWith('.csv')) .map(file => path.join(this.downloadPath, file)); this.logger.info(`fs.readdir found ${csvFiles.length} CSV files`); } catch (readdirError) { this.logger.error(`fs.readdir failed: ${readdirError.message}`); } } this.logger.info(`Found ${csvFiles.length} CSV files to clean up`); if (csvFiles.length === 0) { this.logger.info("No CSV files found to clean up"); return; } for (const file of csvFiles) { try { await fs.remove(file); this.logger.info(`✅ Deleted old CSV: ${path.basename(file)}`); } catch (fileError) { this.logger.error(`❌ Failed to delete ${path.basename(file)}: ${fileError.message}`); } } this.logger.info(`Cleanup completed. Processed ${csvFiles.length} files.`); } catch (error) { this.logger.error(`Error during CSV cleanup: ${error.message}`); // Log the full error stack for debugging console.error("Full cleanup error:", error); } } async verifyDownloadSuccess() { /** * Verify that the download was successful by checking for new CSV files. * @returns {boolean} True if CSV files found, false otherwise */ try { this.logger.info("Verifying download success..."); // Use the same robust file detection as cleanup let csvFiles = []; // Try glob with forward slashes first (cross-platform) const csvPattern = path.join(this.downloadPath, "*.csv").replace(/\\/g, '/'); csvFiles = glob.sync(csvPattern); // If no files found, try fs.readdir approach if (csvFiles.length === 0) { try { const files = await fs.readdir(this.downloadPath); csvFiles = files .filter(file => file.toLowerCase().endsWith('.csv')) .map(file => path.join(this.downloadPath, file)); } catch (readdirError) { this.logger.error(`fs.readdir failed during verification: ${readdirError.message}`); } } if (csvFiles.length > 0) { this.logger.info(`✅ Download verification: Found ${csvFiles.length} CSV file(s):`); for (const file of csvFiles) { try { const stats = await fs.stat(file); this.logger.info(` 📄 ${path.basename(file)} (${Math.round(stats.size / 1024)}KB, modified: ${stats.mtime.toLocaleString()})`); } catch (statError) { this.logger.warning(` 📄 ${path.basename(file)} (could not read file stats)`); } } return true; // Success - files found } else { this.logger.warning("⚠️ Download verification: No CSV files found after download"); this.logger.info("This could mean:"); this.logger.info(" 1. Download is still in progress"); this.logger.info(" 2. Download failed silently"); this.logger.info(" 3. Files are being saved to a different location"); return false; // Failed - no files found } } catch (error) { this.logger.error(`Error verifying download: ${error.message}`); return false; // Failed due to error } } async downloadStatement(retryCount = 0, maxRetries = 2) { /** * Download bank statement in CSV format with retry mechanism. * * @param {number} retryCount - Current retry attempt * @param {number} maxRetries - Maximum number of retries * @returns {boolean} True if successful */ // Prevent concurrent downloads within the same scraper instance if (this.downloadInProgress) { this.logger.info("Download already in progress for this scraper instance, waiting..."); while (this.downloadInProgress) { await new Promise(resolve => setTimeout(resolve, 1000)); } return true; // Return true assuming the concurrent download succeeded } this.downloadInProgress = true; this.logger.info(`Download attempt ${retryCount + 1}/${maxRetries + 1}`); try { // Clean up old CSV files first this.logger.info(`Current working directory: ${process.cwd()}`); this.logger.info(`Download path configured: ${this.downloadPath}`); await this.cleanupCsvFiles(); // Double-check cleanup worked let remainingFiles = []; try { const files = await fs.readdir(this.downloadPath); remainingFiles = files.filter(file => file.toLowerCase().endsWith('.csv')); } catch (error) { this.logger.warning(`Could not verify cleanup: ${error.message}`); } if (remainingFiles.length > 0) { this.logger.warning(`After cleanup, still found ${remainingFiles.length} CSV files:`); remainingFiles.forEach(file => this.logger.warning(` - ${file}`)); } else { this.logger.info("✅ Cleanup verification: No CSV files remaining in download directory"); } // Ensure we're logged in if (!this.isLoggedIn || !await this.isSessionActive()) { this.logger.info("Session expired, re-logging in..."); if (!await this.login()) { this.logger.error("Login failed during download attempt"); if (retryCount < maxRetries) { this.logger.info(`Retrying after login failure... (attempt ${retryCount + 2}/${maxRetries + 1})`); this.downloadInProgress = false; await new Promise(resolve => setTimeout(resolve, 5000)); // Wait longer for login retry return await this.downloadStatement(retryCount + 1, maxRetries); } else { this.downloadInProgress = false; return false; } } } this.logger.info("Navigating to statements..."); // Navigate to Statements with better error handling try { await this.page.waitForSelector('span.nav-item.hidemenu.ng-star-inserted', { visible: true, timeout: 10000 }); await this.page.waitForTimeout(1000); const navItems = await this.page.$$('span.nav-item.hidemenu.ng-star-inserted'); let statementsFound = false; for (const item of navItems) { const text = await this.page.evaluate(el => el.textContent.trim(), item); if (text === "Statements") { await item.click(); statementsFound = true; break; } } if (!statementsFound) { this.logger.error("Statements menu not found"); this.downloadInProgress = false; return false; } } catch (error) { this.logger.error(`Failed to navigate to statements: ${error.message}`); this.downloadInProgress = false; return false; } // Click Account Statements try { await this.page.waitForSelector('p.account-new-heading.ng-star-inserted', { visible: true, timeout: 10000 }); await this.page.waitForTimeout(1000); const accountItems = await this.page.$$('p.account-new-heading.ng-star-inserted'); let accountStatementsFound = false; for (const item of accountItems) { const text = await this.page.evaluate(el => el.textContent.trim(), item); if (text === "Account Statements") { await item.click(); accountStatementsFound = true; break; } } if (!accountStatementsFound) { this.logger.error("Account Statements section not found"); this.downloadInProgress = false; return false; } } catch (error) { this.logger.error(`Failed to click Account Statements: ${error.message}`); this.downloadInProgress = false; return false; } // Download CSV statement this.logger.info("Downloading CSV statement..."); try { // Wait for page to load properly await this.page.waitForTimeout(2000); // Find current year sections using XPath (similar to Python selenium approach) const currentYearSections = await this.page.$x( "//span[contains(text(), 'Account Statements for Current Year')]/following-sibling::ul//span[contains(text(), 'Download Calendar Year')]" ); if (currentYearSections && currentYearSections.length > 0) { this.logger.info("Found download option, clicking..."); await currentYearSections[0].click(); await this.page.waitForTimeout(500); // Select CSV format const formatOption = await this.page.$x( "//li[contains(@class, 'list-format') and contains(text(), 'CSV')]" ); if (formatOption && formatOption.length > 0) { this.logger.info("Found CSV format option, clicking..."); await formatOption[0].click(); this.logger.info("CSV statement download initiated"); await this.page.waitForTimeout(5000); // Wait longer for download // Check if download was successful by looking for new CSV files const downloadSuccessful = await this.verifyDownloadSuccess(); if (downloadSuccessful) { this.downloadInProgress = false; return true; } else { // Download verification failed this.logger.warning(`Download verification failed on attempt ${retryCount + 1}`); if (retryCount < maxRetries) { this.logger.info(`Retrying download process... (attempt ${retryCount + 2}/${maxRetries + 1})`); // Reset login state and try again this.isLoggedIn = false; this.downloadInProgress = false; // Wait before retry await new Promise(resolve => setTimeout(resolve, 3000)); // Recursive retry with increased count return await this.downloadStatement(retryCount + 1, maxRetries); } else { this.logger.error(`Download failed after ${maxRetries + 1} attempts`); this.downloadInProgress = false; return false; } } } else { this.logger.error("CSV format option not found"); this.downloadInProgress = false; return false; } } else { this.logger.error("Download option not found"); this.downloadInProgress = false; return false; } } catch (error) { this.logger.error(`Failed during CSV download: ${error.message}`); this.downloadInProgress = false; return false; } } catch (error) { this.logger.error(`Download failed: ${error}`); this.downloadInProgress = false; return false; } finally { // this.downloadInProgress = false; // Moved to specific return points to as after return the finally won't trigger } } async close() { /** * Close the scraper. */ if (this.keepAliveInterval) { clearInterval(this.keepAliveInterval); this.keepAliveInterval = null; } if (this.browser) { await this.browser.close(); this.logger.info("Scraper closed"); } } } // Global scraper instance let _scraper = null; async function getScraper(username, password, emailConfig, headless = false) { /** * Get or create the global scraper instance. */ // If scraper doesn't exist, create it if (_scraper === null) { _scraper = new KotakScraper(username, password, emailConfig, headless); await _scraper.setupBrowser(); return _scraper; } // Check if credentials have changed or headless mode has changed if (_scraper.username !== username || _scraper.password !== password || _scraper.headless !== headless || JSON.stringify(_scraper.emailConfig) !== JSON.stringify(emailConfig)) { _scraper.logger.info(`Credentials or headless mode changed, recreating scraper`); try { await _scraper.close(); } catch (error) { // Ignore close errors } _scraper = new KotakScraper(username, password, emailConfig, headless); await _scraper.setupBrowser(); return _scraper; } // Check if existing scraper has a valid session try { if (!await _scraper.isSessionActive()) { // If session is invalid due to browser issues, recreate try { await _scraper.page.url(); } catch (error) { if (error.message.includes('Target closed') || error.message.includes('Session closed') || error.message.includes('Requesting main frame too early')) { _scraper.logger.warning("Recreating scraper due to invalid session"); await _scraper.recreateBrowser(); } } } } catch (error) { _scraper.logger.warning(`Error checking scraper state: ${error}`); // If there's the "main frame too early" error or other critical errors, recreate completely if (error.message.includes('Requesting main frame too early') || error.message.includes('Target closed') || error.message.includes('Session closed')) { _scraper.logger.info("Recreating scraper completely due to critical error"); try { await _scraper.close(); } catch (closeError) { // Ignore close errors } _scraper = new KotakScraper(username, password, emailConfig, headless); await _scraper.setupBrowser(); } } return _scraper; } async function download(username, password, emailConfig, headless = false) { /** * Download bank statement in CSV format. * * @param {string} username - Kotak username * @param {string} password - Kotak password * @param {Object} emailConfig - Email configuration for OTP * @param {boolean} headless - Run browser in headless mode (invisible) * @returns {boolean} True if successful */ const scraper = await getScraper(username, password, emailConfig, headless); return await scraper.downloadStatement(); } async function closeScraper() { /** * Close the scraper when done. */ if (_scraper) { await _scraper.close(); _scraper = null; } } function isDownloadInProgress() { /** * Check if download is currently in progress. */ return _scraper ? _scraper.downloadInProgress : false; } // Processing status checker function let _processingStatusChecker = null; function setProcessingStatusChecker(checker) { /** * Set a function to check if CSV processing is in progress. * This prevents cleanup during CSV processing. */ _processingStatusChecker = checker; } function isProcessingInProgress() { /** * Check if CSV processing is currently in progress. */ return _processingStatusChecker ? _processingStatusChecker() : false; } // Main execution async function main() { const readline = require('readline'); const rl = readline.createInterface({ input: process.stdin, output: process.stdout }); const question = (prompt) => { return new Promise((resolve) => { rl.question(prompt, resolve); }); }; while (true) { // Simple test console.log("🏦 Kotak Bank Statement Downloader (Node.js/Puppeteer)"); console.log(`📁 Download location: ${process.cwd()}`); // Ask for headless mode preference const headlessInput = await question("Run in headless mode? (y/n, default=n): "); const headlessMode = ['y', 'yes', '1', 'true'].includes(headlessInput.trim().toLowerCase()); const userInput = await question("Press Enter to download CSV or 'exit' to quit: "); if (userInput.trim().toLowerCase() === "exit") { break; } const success = await download(headlessMode); if (success) { console.log("✅ Download successful!"); // List CSV files in current directory const csvFiles = glob.sync("*.csv"); if (csvFiles.length > 0) { console.log(`📄 Downloaded: ${csvFiles[0]}`); } } else { console.log("❌ Download failed"); } } rl.close(); // Uncomment to close scraper (will need to re-login next time) // await closeScraper(); } // Export for use as module module.exports = { KotakScraper, download, closeScraper, isDownloadInProgress, setProcessingStatusChecker }; // Run main if this file is executed directly if (require.main === module) { main().catch(console.error); }