UNPKG

plugin-pagenotfound

Version:
392 lines (353 loc) 14.1 kB
import { JSDOM } from 'jsdom'; import { readFileSync } from 'node:fs'; import path from 'node:path'; import { fileURLToPath } from 'node:url'; import fs from 'node:fs'; export class HarAnalyzer { constructor() { this.groups = {}; this.rules = { 'has-unexpected-404-response': 'warning', 'no-valid-title-found': 'warning', 'no-valid-h1-found': 'warning', 'no-valid-text-found': 'error', 'invalid-text-found': 'warning', 'no-valid-response-status-code': 'error', 'no-unsupported-locale-use': 'warning', 'no-valid-not-found-text-in-body': 'warning', 'no-network': 'warning' }; const libFolder = fileURLToPath(new URL('..', import.meta.url)); this.pluginFolder = path.resolve(libFolder, '..'); const packagePath = path.resolve(libFolder, 'package.json'); this.package = JSON.parse(readFileSync(packagePath, 'utf8')); this.dependencies = this.package.dependencies; this.version = this.package.version; } getAnalyzableUrl(url, uuid, group) { if (this.groups[group] !== undefined) { // Only test it once for every group return undefined; } const urlObj = new URL(url); const tmpPath = uuid.substring(0, 5); let path = `${tmpPath}/finns-det-en-sida/pa-den-har-adressen/testanrop/`; if ((urlObj.pathname.length + path.length) < 200) { if (urlObj.pathname.endsWith('/')) { path = `${urlObj.pathname}${path}`; } else { path = `${urlObj.pathname}/${path}`; } } urlObj.pathname = path; const test_url = urlObj.toString(); this.groups[group] = { 'page-not-found-url': test_url } return test_url; } transform2SimplifiedData(harData) { const data = { 'page': undefined, '404s': [] }; if ('log' in harData) { harData = harData['log']; } let reqIndex = 1; let hasPage = false; for (const entry of harData.entries) { const req = entry.request; const res = entry.response; const reqUrl = req.url; if (!res.content || !res.content.mimeType || !res.content.size || res.content.size <= 0 || !res.status) { continue; } if (!hasPage && res.content.mimeType.includes('html')) { data.page = { url: reqUrl, content: res.content.text, status: res.status, index: reqIndex }; hasPage = true; } else { if (res.status === 404) { data['404s'].push(reqUrl) } } reqIndex++; } return data; } addIssuesFromKnowledgeData(knowledgeData) { const url = knowledgeData['url']; if (!('is-page-not-found-page' in knowledgeData)) { if (knowledgeData['other-404-responses'].length > 0) { knowledgeData['issues']['has-unexpected-404-response'] = { 'test': '404', rule: 'has-unexpected-404-response', category: 'standard', severity: 'warning', 'subIssues': [{ url: url, rule: 'has-unexpected-404-response', category: 'standard', text: `Unexpected 404 response(s) found`, severity: 'warning' }] }; } return; } if (!knowledgeData['page-title'] || knowledgeData['page-title'].length < 1) { knowledgeData['issues']['no-valid-title-found'] = { 'test': '404', rule: 'no-valid-title-found', category: 'standard', severity: 'warning', 'subIssues': [{ url: url, rule: 'no-valid-title-found', category: 'standard', text: 'No valid page title found', severity: 'warning', }] }; } if (!knowledgeData['h1'] || knowledgeData['h1'].length < 1) { knowledgeData['issues']['no-valid-h1-found'] = { 'test': '404', rule: 'no-valid-h1-found', category: 'standard', severity: 'warning', 'subIssues': [{ url: url, rule: 'no-valid-h1-found', category: 'standard', text: 'No valid H1 found', severity: 'warning', }] }; } if (!knowledgeData['body-text']) { knowledgeData['issues']['no-valid-text-found'] = { 'test': '404', rule: 'no-valid-text-found', category: 'standard', severity: 'error', 'subIssues': [{ url: url, rule: 'no-valid-text-found', category: 'standard', text: 'No valid body text found', severity: 'error', }] }; } else if (knowledgeData['body-text'].length < 150) { knowledgeData['issues']['invalid-text-found'] = { 'test': '404', rule: 'invalid-text-found', category: 'standard', severity: 'warning', 'subIssues': [{ url: url, rule: 'invalid-text-found', category: 'standard', text: 'body text found is too short', severity: 'warning', }] }; } if (knowledgeData['status-code'] !== 404) { knowledgeData['issues']['no-valid-response-status-code'] = { 'test': '404', rule: 'no-valid-response-status-code', category: 'standard', severity: 'error', 'subIssues': [{ url: url, rule: 'no-valid-response-status-code', category: 'standard', text: 'Response status code is not 404', severity: 'error', }] }; } // Read and parse the configuration file const libFolder = fileURLToPath(new URL('..', import.meta.url)); const localeFilePath = path.resolve(libFolder, 'locale', `${knowledgeData['lang']}.json`); if (!fs.existsSync(localeFilePath)) { knowledgeData['issues']['no-unsupported-locale-use'] = { 'test': '404', rule: 'no-unsupported-locale-use', category: 'standard', severity: 'warning', 'subIssues': [{ url: url, rule: 'no-unsupported-locale-use', category: 'standard', severity: 'warning', }] }; } else { const localeData = JSON.parse(readFileSync(localeFilePath, 'utf8')); const validNotFoundTexts = localeData['valid-not-found-texts']; const bodyText = knowledgeData['body-text'] || ''; const containsValidText = validNotFoundTexts.some(text => bodyText.includes(text)); if (!containsValidText) { knowledgeData['issues']['no-valid-not-found-text-in-body'] = { 'test': '404', rule: 'no-valid-not-found-text-in-body', category: 'standard', severity: 'warning', 'subIssues': [{ url: url, rule: 'no-valid-not-found-text-in-body', category: 'standard', severity: 'warning', }] }; } } // Add missing rules from securityConfig and standardConfig const allRules = [ ...Object.keys(this.rules || {}).filter(rule => this.rules[rule] !== "off") ]; for (const rule of allRules) { if (!knowledgeData.issues[rule]) { knowledgeData.issues[rule] = { 'test': '404', rule: rule, category: 'standard', severity: 'resolved', // Default severity for missing issues subIssues: [] }; } } } async createKnowledgeFromData(analyzedData, url, group) { let knowledgeData = { 'url': url, 'group': group, 'issues': {}, 'lang': 'sv', 'page-title': undefined, 'h1': undefined, 'body-text': undefined, 'status-code': undefined, 'other-404-responses': [] }; if (analyzedData === undefined) { return knowledgeData; } if (!('page' in analyzedData) || !analyzedData['page']) { knowledgeData['issues']['no-network'] = { 'test': '404', 'rule': 'no-network', 'category': 'technical', 'severity': 'warning', 'subIssues': [ { 'url': url, 'rule': 'no-network', 'category': 'standard', 'severity': 'warning', 'text': `No HTML content found in the HAR file.`, 'line': 0, 'column': 0 } ] }; return knowledgeData; } if (!('url' in analyzedData['page'])) { return knowledgeData; } if (url === this.groups[group]['page-not-found-url']) { const page_url = analyzedData['page']['url']; this.groups[group]['page-not-found-url'] = page_url const content = analyzedData['page']['content']; knowledgeData['is-page-not-found-page'] = true; if (content !== '') { const dom = new JSDOM(content); const doc = dom.window.document; const title = doc.querySelector('title'); if (title) { knowledgeData['page-title'] = title.textContent .replace(/\n/g, ' ') .replace(/\t/g, ' ') .replace(/ {2,}/g, ' ').trim(); } const header1 = doc.querySelector('h1'); if (header1) { knowledgeData['h1'] = header1.textContent .replace(/\n/g, ' ') .replace(/\t/g, ' ') .replace(/ {2,}/g, ' ').trim(); } const html = doc.querySelector('html'); if (html && html.hasAttribute('lang')) { const lang_code = html.getAttribute('lang'); if (lang_code.includes('en')) { knowledgeData['lang'] = 'en'; } else { knowledgeData['lang'] = 'sv'; } } let body = doc.querySelector('main'); if (!body) { body = doc.querySelector('body'); } if (body) { // Specify the tags you want to remove const tagsToRemove = ['script', 'nav', 'header', 'footer', 'form', 'input', 'button', 'a']; // Iterate through each tag and remove all instances of it tagsToRemove.forEach(tag => { const elements = body.querySelectorAll(tag); elements.forEach(element => element.remove()); }); // Get mimized text content const bodyText = body.textContent .replace(/\n/g, ' ') .replace(/\t/g, ' ') .replace(/ {2,}/g, ' ').trim(); knowledgeData['body-text'] = bodyText; } } } knowledgeData['other-404-responses'].push(...analyzedData['404s']); // Remove duplicates using Set and filter out the specific value knowledgeData['other-404-responses'] = [...new Set(knowledgeData['other-404-responses'])] const status = analyzedData['page']['status']; knowledgeData['status-code'] = status this.addIssuesFromKnowledgeData(knowledgeData); return knowledgeData; } async analyzeData(url, harData, group) { if (this.groups[group] === undefined) { this.groups[group] = {}; } const analyzedData = this.transform2SimplifiedData(harData); if (!('analyzedData' in this.groups[group])) { this.groups[group]['analyzedData'] = [] } this.groups[group]['analyzedData'].push(analyzedData); const knowledgeData = await this.createKnowledgeFromData(analyzedData, url, group); if (!('knowledgeData' in this.groups[group])) { this.groups[group]['knowledgeData'] = [] } this.groups[group]['knowledgeData'].push(knowledgeData); return { 'version': this.version, 'dependencies': this.dependencies, 'url': url, 'analyzedData': analyzedData, 'knowledgeData': knowledgeData }; } getSummary() { return this; } }