@apify-scrapers/shared
Version:
Shared utilities and constants for Apify scrapers
79 lines (66 loc) • 2.4 kB
JavaScript
import { PLATFORMS, PLATFORM_DOMAINS } from '../constants/platforms.js';
/**
* Detect platform from URL
* @param {string} url - The URL to analyze
* @returns {string} Platform name (shopee, lazada, or unknown)
*/
export function detectPlatform(url) {
if (!url) return PLATFORMS.UNKNOWN;
const urlObj = new URL(url);
const hostname = urlObj.hostname.toLowerCase();
// Check Shopee domains
if (PLATFORM_DOMAINS[PLATFORMS.SHOPEE].some(domain => hostname.includes(domain))) {
return PLATFORMS.SHOPEE;
}
// Check Lazada domains
if (PLATFORM_DOMAINS[PLATFORMS.LAZADA].some(domain => hostname.includes(domain))) {
return PLATFORMS.LAZADA;
}
return PLATFORMS.UNKNOWN;
}
/**
* Extract country code from URL
* @param {string} url - The URL to analyze
* @returns {string} Country code (SG, MY, ID, etc.)
*/
export function extractCountryFromUrl(url) {
if (!url) return null;
const urlObj = new URL(url);
const hostname = urlObj.hostname.toLowerCase();
// Extract country from subdomain or path
const countryMatch = hostname.match(/\.([a-z]{2})\./);
if (countryMatch) {
return countryMatch[1].toUpperCase();
}
// Fallback: check common patterns
if (hostname.includes('.sg')) return 'SG';
if (hostname.includes('.my')) return 'MY';
if (hostname.includes('.id')) return 'ID';
if (hostname.includes('.ph')) return 'PH';
if (hostname.includes('.th')) return 'TH';
if (hostname.includes('.vn')) return 'VN';
return null;
}
/**
* Generate next page URL for pagination
* @param {string} currentUrl - Current page URL
* @param {number} nextPage - Next page number
* @param {string} platform - Platform name
* @returns {string|null} Next page URL or null if error
*/
export function getNextPageUrl(currentUrl, nextPage, platform) {
try {
const url = new URL(currentUrl);
if (platform === PLATFORMS.LAZADA) {
// Lazada uses different pagination structure
url.searchParams.set('page', nextPage.toString());
} else {
// Shopee pagination
url.searchParams.set('page', nextPage.toString());
}
return url.toString();
} catch (error) {
console.error('Error creating next page URL:', error);
return null;
}
}