UNPKG

@casoon/auditmysite

Version:

Professional website analysis suite with robust accessibility testing, Core Web Vitals performance monitoring, SEO analysis, and content optimization insights. Features isolated browser contexts, retry mechanisms, and comprehensive API endpoints for profe

117 lines 4.93 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.SitemapParser = void 0; const fs_1 = __importDefault(require("fs")); const path_1 = __importDefault(require("path")); const fast_xml_parser_1 = require("fast-xml-parser"); class SitemapParser { constructor() { this.parser = new fast_xml_parser_1.XMLParser({ ignoreAttributes: false, attributeNamePrefix: "@_", }); } async parseSitemap(sitemapUrl) { let xml; // Lade XML von URL oder Datei if (sitemapUrl.startsWith("http")) { const response = await fetch(sitemapUrl); if (!response.ok) { throw new Error(`Failed to fetch sitemap: ${response.statusText}`); } xml = await response.text(); } else { xml = fs_1.default.readFileSync(path_1.default.resolve(sitemapUrl), "utf-8"); } const parsed = this.parser.parse(xml); const urls = []; // Fall 1: Sitemap Index (WordPress/multi-sitemap structure) if (parsed.sitemapindex && parsed.sitemapindex.sitemap) { console.log(`📋 Found sitemap index with ${Array.isArray(parsed.sitemapindex.sitemap) ? parsed.sitemapindex.sitemap.length : 1} sub-sitemaps`); const sitemaps = Array.isArray(parsed.sitemapindex.sitemap) ? parsed.sitemapindex.sitemap : [parsed.sitemapindex.sitemap]; // Fetch URLs from each sub-sitemap (limit to first 10 for performance) const sitemapsToProcess = sitemaps.slice(0, 10); for (const sitemap of sitemapsToProcess) { try { const subSitemapUrl = sitemap.loc; if (subSitemapUrl && subSitemapUrl !== sitemapUrl) { // Avoid infinite loops console.log(` 📄 Processing sub-sitemap: ${subSitemapUrl}`); const subUrls = await this.parseSitemap(subSitemapUrl); // Recursive call urls.push(...subUrls); } } catch (error) { console.warn(` ⚠️ Failed to process sub-sitemap ${sitemap.loc}: ${error}`); // Continue with other sitemaps even if one fails } } if (sitemaps.length > 10) { console.log(` 📊 Limited processing to first 10 of ${sitemaps.length} sub-sitemaps for performance`); } return urls; } // Fall 2: Standard sitemap.xml Struktur if (parsed.urlset && parsed.urlset.url) { if (Array.isArray(parsed.urlset.url)) { urls.push(...parsed.urlset.url.map((u) => ({ loc: u.loc, lastmod: u.lastmod, changefreq: u.changefreq, priority: u.priority, }))); } else { urls.push({ loc: parsed.urlset.url.loc, lastmod: parsed.urlset.url.lastmod, changefreq: parsed.urlset.url.changefreq, priority: parsed.urlset.url.priority, }); } } // Fall 3: Falls die URLs im #text Feld sind (wie bei Astro) if (urls.length === 0 && parsed.urlset && parsed.urlset["#text"]) { const textContent = parsed.urlset["#text"]; const urlMatches = textContent.match(/<loc>(.*?)<\/loc>/g); if (urlMatches) { urls.push(...urlMatches.map((match) => ({ loc: match.replace(/<\/?loc>/g, ""), }))); } } return urls; } filterUrls(urls, options) { let filtered = urls; // Filtere nach Ausschluss-Mustern if (options.filterPatterns) { filtered = filtered.filter((url) => !options.filterPatterns.some((pattern) => url.loc.includes(pattern))); } // Filtere nach Einschluss-Mustern if (options.includePatterns) { filtered = filtered.filter((url) => options.includePatterns.some((pattern) => url.loc.includes(pattern))); } return filtered; } convertToLocalUrls(urls, baseUrl) { return urls.map((url) => ({ ...url, loc: this.convertUrlToLocal(url.loc, baseUrl), })); } convertUrlToLocal(url, baseUrl) { // Extrahiere Domain aus der URL const urlObj = new URL(url); const domain = urlObj.origin; // Ersetze Domain durch baseUrl return url.replace(domain, baseUrl); } } exports.SitemapParser = SitemapParser; //# sourceMappingURL=sitemap-parser.js.map