relatt-scraper
Version:
Metascarper
127 lines (111 loc) • 3.41 kB
JavaScript
const {
generateLinkRequestLogsLine,
} = require("../utils/generateLinkRequestLogLine");
const { getUrlHeaderData } = require("../utils/getUrlHeaderData");
/**
* Check if site is available with subdomain and protocol if not given
* @param {*} url
* @param {*} operationType
* @returns
*/
const analyzeSubdomain = async (url, operationType) => {
let res = await setSubDomain({ url, operationType });
return setProtocol(res.link, res.logs, operationType);
};
async function setSubDomain({ url, operationType }) {
let regEx = /(?:http[s]*\:\/\/)*(.*?)\.(?=[^\/]*\..{2,5})/i;
let match = url.match(regEx);
let linkRequestLogs = [];
//If url hasn't subdomain
if (!match) {
let newLink = url;
if (url.includes("https://")) {
newLink = url.replace(/^https:\/\//, "https://www.");
} else {
newLink = url.includes("http://")
? url.replace(/^http:\/\//, "http://www.")
: "www." + url;
}
try {
let res = await getUrlHeaderData({
link: newLink,
operationType,
fetchOptions: {
https: {
rejectUnauthorized: false,
},
timeout: 3000,
},
});
if (res.contentType) {
linkRequestLogs.push(
generateLinkRequestLogsLine(newLink, operationType, 200)
);
return { link: newLink, logs: linkRequestLogs };
} else {
linkRequestLogs.push(
generateLinkRequestLogsLine(newLink, operationType, 404)
);
return { link: url, logs: linkRequestLogs };
}
} catch (error) {
linkRequestLogs.push(
generateLinkRequestLogsLine(newLink, operationType, error.message)
);
return { link: url, logs: linkRequestLogs };
}
}
return { link: url, logs: linkRequestLogs };
}
async function setProtocol(url, logs, operationType) {
let linkRequestLogs = logs;
try {
if (!url.includes("https://") && !url.includes("http://")) {
let resultWithHttps = await getUrlHeaderData({
link: "https://" + url,
fetchOptions: {
https: {
rejectUnauthorized: false,
},
},
});
if (resultWithHttps) {
linkRequestLogs.push(
generateLinkRequestLogsLine("https://" + url, operationType, 200)
);
return { analyzedUrl: "https://" + url, logs: linkRequestLogs };
} else {
linkRequestLogs.push(
generateLinkRequestLogsLine("https://" + url, operationType, 404)
);
let resultWithHttp = await getUrlHeaderData({
link: "http://" + url,
fetchOptions: {
https: {
rejectUnauthorized: false,
},
},
});
if (resultWithHttp) {
linkRequestLogs.push(
generateLinkRequestLogsLine("http://" + url, operationType, 200)
);
return { analyzedUrl: "http://" + url, logs: linkRequestLogs };
} else {
linkRequestLogs.push(
generateLinkRequestLogsLine("http://" + url, operationType, 404)
);
return { analyzedUrl: url, logs: linkRequestLogs };
}
}
} else {
return {
analyzedUrl: url,
logs: linkRequestLogs,
};
}
} catch (error) {
return { analyzedUrl: url, logs: linkRequestLogs };
}
}
module.exports = { analyzeSubdomain };