relatt-scraper
Version:
Metascarper
50 lines (40 loc) • 1.26 kB
JavaScript
const url = require("url");
const psl = require("psl");
const { analyzeSubdomain } = require("./analyzeSubdomain");
const partitionLink = async ({
link,
shouldCheckSubdomainAndProtocol = true,
operationType
}) => {
let linkToProcess = link;
let partitionLinkLogs = [];
if (shouldCheckSubdomainAndProtocol) {
let { analyzedUrl, logs } = await analyzeSubdomain(link, operationType);
linkToProcess = analyzedUrl;
partitionLinkLogs = [...logs];
}
let { pathname, hostname, query, protocol } = url.parse(linkToProcess);
let { domain, subdomain } = psl.parse(hostname);
let linkPart = linkToProcess.split("?");
let fullUrl = "";
hostname = subdomain === null ? `${domain}` : `${subdomain}.${domain}`;
protocol = protocol ? protocol : "http:";
/**
* Remove trailing slashes from path name if it's not equal to /
*/
if (pathname !== "/") pathname = `${pathname.replace(/\/+$/, "")}`;
if (query) {
pathname = `${pathname}?${linkPart[1]}`;
}
fullUrl = `${hostname}${pathname}`;
return {
domain: domain.toLocaleLowerCase(),
subdomain,
pathname,
fullUrl,
protocol,
hostname: hostname.toLocaleLowerCase(),
logs: partitionLinkLogs,
};
};
module.exports = { partitionLink };