UNPKG

stepwright

Version:

A powerful web scraping library built with Playwright

184 lines 7.87 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.executeTab = executeTab; const scraper_1 = require("./scraper"); const utils_1 = require("./utils"); const step_executor_1 = require("./step-executor"); // Import global types require("./global-types"); /** * Execute a tab. * * @param {object} page - The page object. * @param {object} template - The template object. * @param {function} onResult - The onResult function. * * @since v1.0.0 * @author Muhammad Umer Farooq <umer@lablnet.com> * * @returns {string} - The data. * @since v1.0.0 * @company Framework Island */ async function executeTab(page, template, onResult) { const results = []; // Set global callback for immediate streaming if (onResult) { global.onResultCallback = async (result, index) => { await onResult(result, index); }; } else { global.onResultCallback = null; } console.log(`=== TAB ${template.tab} ===`); // 1. Execute init steps once if provided if (template.initSteps && template.initSteps.length > 0) { console.log('--- Running initSteps ---'); await (0, step_executor_1.executeStepList)(page, template.initSteps, {}); } const { pagination } = template; // Helper to run pagination action (next/scroll) async function runPagination(page, pagination, logPrefix = '') { if (pagination.strategy === 'next' && pagination.nextButton) { console.log(`${logPrefix}👉 Clicking next button`); try { // Check if next button exists and is enabled const nextButton = (0, utils_1.locatorFor)(page, pagination.nextButton.object_type, pagination.nextButton.object); const count = await nextButton.count(); if (count === 0) { console.log(`${logPrefix}👉 Next button not found - end of pagination`); return false; } const isDisabled = await nextButton.isDisabled().catch(() => false); if (isDisabled) { console.log(`${logPrefix}👉 Next button is disabled - end of pagination`); return false; } await (0, scraper_1.click)(page, pagination.nextButton.object_type, pagination.nextButton.object); if (pagination.nextButton.wait) { await page.waitForTimeout(pagination.nextButton.wait); } else { await page.waitForLoadState('networkidle'); } return true; } catch (err) { // Next button missing – end of pagination console.log(`${logPrefix}👉 Next button click failed: ${err}`); return false; } } else if (pagination.strategy === 'scroll') { console.log(`${logPrefix}🖱️ Scrolling for pagination`); const offset = pagination.scroll?.offset ?? (await page.evaluate(() => window.innerHeight)); await page.evaluate((y) => window.scrollBy(0, y), offset); const delay = pagination.scroll?.delay ?? 1000; await page.waitForTimeout(delay); return true; } return false; } // If paginateAllFirst is set, run all pagination actions first if (pagination && pagination.paginateAllFirst) { let pageIndex = 0; while (true) { if (pagination.maxPages && pageIndex >= pagination.maxPages) { break; } const paginated = await runPagination(page, pagination, '[paginateAllFirst] '); if (!paginated) break; pageIndex++; } // After all pagination, run perPageSteps once const collected = {}; const stepsForPage = template.perPageSteps && template.perPageSteps.length > 0 ? template.perPageSteps : (template.steps ?? []); await (0, step_executor_1.executeStepList)(page, stepsForPage, collected); if (Object.keys(collected).length > 0) { const itemKeys = Object.keys(collected).filter(key => key.startsWith('item_')); let resultIndex = 0; if (itemKeys.length > 0) { for (const key of itemKeys) { const itemData = collected[key]; if (itemData && Object.keys(itemData).length > 0) { results.push(itemData); if (onResult && !global.onResultCallback) { await onResult(itemData, resultIndex); } resultIndex++; } } } else { results.push(collected); if (onResult) { await onResult(collected, resultIndex); } } } return results; } // Default behavior (pagination per page) let pageIndex = 0; let resultIndex = 0; while (true) { console.log(`--- Page iteration ${pageIndex} ---`); const collected = {}; // If paginationFirst is set, run pagination action before perPageSteps (except on first page) if (pagination && pagination.paginationFirst && pageIndex > 0) { const paginated = await runPagination(page, pagination, '[paginationFirst] '); if (!paginated) break; } const stepsForPage = template.perPageSteps && template.perPageSteps.length > 0 ? template.perPageSteps : (template.steps ?? []); await (0, step_executor_1.executeStepList)(page, stepsForPage, collected); // after gathering data for this page if (Object.keys(collected).length > 0) { // Check if we have indexed items from foreach (like item_0, item_1, etc.) const itemKeys = Object.keys(collected).filter(key => key.startsWith('item_')); if (itemKeys.length > 0) { // Convert indexed items to separate result objects for (const key of itemKeys) { const itemData = collected[key]; if (itemData && Object.keys(itemData).length > 0) { results.push(itemData); // Call callback if provided (but only if not already called by foreach) if (onResult && !global.onResultCallback) { await onResult(itemData, resultIndex); } resultIndex++; } } } else { // Normal case - push the collected object as is results.push(collected); // Call callback if provided if (onResult) { await onResult(collected, resultIndex); } resultIndex++; } } // Handle pagination (if not using paginationFirst, or for loop control) if (!pagination) { console.log('No pagination configured, finishing tab'); break; // no pagination -> done } pageIndex += 1; if (pagination.maxPages && pageIndex >= pagination.maxPages) { break; } // If paginationFirst is NOT set, run pagination action after perPageSteps (current behavior) if (!pagination.paginationFirst) { const paginated = await runPagination(page, pagination, ''); if (!paginated) break; } } console.log(`=== Finished tab ${template.tab} - collected ${results.length} record(s) ===`); return results; } //# sourceMappingURL=tab-executor.js.map