relatt-scraper
Version:
Metascarper
90 lines (74 loc) • 2.04 kB
JavaScript
const url = require("url");
const psl = require("psl");
const got = require("got");
const {
generateLinkRequestLogsLine,
} = require("../utils/generateLinkRequestLogLine");
const verifySubdomain = async (_url, operationType) => {
let isWWWSubdomain = false;
let isOtherSubdomain = false;
let subdomainRequire = false;
let availableUrls = [];
let logs = [];
let { hostname } = url.parse(_url);
let { subdomain } = psl.parse(hostname ? hostname : _url);
if (subdomain != "www" && subdomain) {
availableUrls.push(_url);
isOtherSubdomain = true;
} else {
let linkWithWWW = "";
if (subdomain == "www") {
linkWithWWW = _url;
} else if (_url.includes("https://")) {
linkWithWWW = _url.replace(/^https:\/\//, "https://www.");
} else if (_url.includes("http://")) {
linkWithWWW = _url.includes("http://");
} else {
linkWithWWW = "www." + _url;
}
let linkWithoutWWW = !subdomain ? _url : _url.replace("www.", "");
try {
await got(linkWithWWW, {
https: {
rejectUnauthorized: false,
},
});
logs.push(generateLinkRequestLogsLine(linkWithWWW, operationType, 200));
availableUrls.push(linkWithWWW);
isWWWSubdomain = true;
} catch (error) {
logs.push(
generateLinkRequestLogsLine(linkWithWWW, operationType, error.message)
);
isWWWSubdomain = false;
}
try {
await got(linkWithoutWWW, {
https: {
rejectUnauthorized: false,
},
});
logs.push(
generateLinkRequestLogsLine(linkWithoutWWW, operationType, 200)
);
availableUrls.push(linkWithoutWWW);
} catch (error) {
logs.push(
generateLinkRequestLogsLine(
linkWithoutWWW,
operationType,
error.message
)
);
subdomainRequire = true;
}
}
return {
isWWWSubdomain,
isOtherSubdomain,
subdomainRequire,
availableUrls,
logs,
};
};
module.exports = { verifySubdomain };