UNPKG

relatt-scraper

Version:

Metascarper

127 lines (111 loc) 3.41 kB
const { generateLinkRequestLogsLine, } = require("../utils/generateLinkRequestLogLine"); const { getUrlHeaderData } = require("../utils/getUrlHeaderData"); /** * Check if site is available with subdomain and protocol if not given * @param {*} url * @param {*} operationType * @returns */ const analyzeSubdomain = async (url, operationType) => { let res = await setSubDomain({ url, operationType }); return setProtocol(res.link, res.logs, operationType); }; async function setSubDomain({ url, operationType }) { let regEx = /(?:http[s]*\:\/\/)*(.*?)\.(?=[^\/]*\..{2,5})/i; let match = url.match(regEx); let linkRequestLogs = []; //If url hasn't subdomain if (!match) { let newLink = url; if (url.includes("https://")) { newLink = url.replace(/^https:\/\//, "https://www."); } else { newLink = url.includes("http://") ? url.replace(/^http:\/\//, "http://www.") : "www." + url; } try { let res = await getUrlHeaderData({ link: newLink, operationType, fetchOptions: { https: { rejectUnauthorized: false, }, timeout: 3000, }, }); if (res.contentType) { linkRequestLogs.push( generateLinkRequestLogsLine(newLink, operationType, 200) ); return { link: newLink, logs: linkRequestLogs }; } else { linkRequestLogs.push( generateLinkRequestLogsLine(newLink, operationType, 404) ); return { link: url, logs: linkRequestLogs }; } } catch (error) { linkRequestLogs.push( generateLinkRequestLogsLine(newLink, operationType, error.message) ); return { link: url, logs: linkRequestLogs }; } } return { link: url, logs: linkRequestLogs }; } async function setProtocol(url, logs, operationType) { let linkRequestLogs = logs; try { if (!url.includes("https://") && !url.includes("http://")) { let resultWithHttps = await getUrlHeaderData({ link: "https://" + url, fetchOptions: { https: { rejectUnauthorized: false, }, }, }); if (resultWithHttps) { linkRequestLogs.push( generateLinkRequestLogsLine("https://" + url, operationType, 200) ); return { analyzedUrl: "https://" + url, logs: linkRequestLogs }; } else { linkRequestLogs.push( generateLinkRequestLogsLine("https://" + url, operationType, 404) ); let resultWithHttp = await getUrlHeaderData({ link: "http://" + url, fetchOptions: { https: { rejectUnauthorized: false, }, }, }); if (resultWithHttp) { linkRequestLogs.push( generateLinkRequestLogsLine("http://" + url, operationType, 200) ); return { analyzedUrl: "http://" + url, logs: linkRequestLogs }; } else { linkRequestLogs.push( generateLinkRequestLogsLine("http://" + url, operationType, 404) ); return { analyzedUrl: url, logs: linkRequestLogs }; } } } else { return { analyzedUrl: url, logs: linkRequestLogs, }; } } catch (error) { return { analyzedUrl: url, logs: linkRequestLogs }; } } module.exports = { analyzeSubdomain };