UNPKG

@apify-scrapers/shared

Version:

Shared utilities and constants for Apify scrapers

79 lines (66 loc) 2.4 kB
import { PLATFORMS, PLATFORM_DOMAINS } from '../constants/platforms.js'; /** * Detect platform from URL * @param {string} url - The URL to analyze * @returns {string} Platform name (shopee, lazada, or unknown) */ export function detectPlatform(url) { if (!url) return PLATFORMS.UNKNOWN; const urlObj = new URL(url); const hostname = urlObj.hostname.toLowerCase(); // Check Shopee domains if (PLATFORM_DOMAINS[PLATFORMS.SHOPEE].some(domain => hostname.includes(domain))) { return PLATFORMS.SHOPEE; } // Check Lazada domains if (PLATFORM_DOMAINS[PLATFORMS.LAZADA].some(domain => hostname.includes(domain))) { return PLATFORMS.LAZADA; } return PLATFORMS.UNKNOWN; } /** * Extract country code from URL * @param {string} url - The URL to analyze * @returns {string} Country code (SG, MY, ID, etc.) */ export function extractCountryFromUrl(url) { if (!url) return null; const urlObj = new URL(url); const hostname = urlObj.hostname.toLowerCase(); // Extract country from subdomain or path const countryMatch = hostname.match(/\.([a-z]{2})\./); if (countryMatch) { return countryMatch[1].toUpperCase(); } // Fallback: check common patterns if (hostname.includes('.sg')) return 'SG'; if (hostname.includes('.my')) return 'MY'; if (hostname.includes('.id')) return 'ID'; if (hostname.includes('.ph')) return 'PH'; if (hostname.includes('.th')) return 'TH'; if (hostname.includes('.vn')) return 'VN'; return null; } /** * Generate next page URL for pagination * @param {string} currentUrl - Current page URL * @param {number} nextPage - Next page number * @param {string} platform - Platform name * @returns {string|null} Next page URL or null if error */ export function getNextPageUrl(currentUrl, nextPage, platform) { try { const url = new URL(currentUrl); if (platform === PLATFORMS.LAZADA) { // Lazada uses different pagination structure url.searchParams.set('page', nextPage.toString()); } else { // Shopee pagination url.searchParams.set('page', nextPage.toString()); } return url.toString(); } catch (error) { console.error('Error creating next page URL:', error); return null; } }