neat-url
Version:
Normalize URLs by removing tracking parameters.
174 lines (147 loc) • 4.97 kB
JavaScript
const psl = require('psl');
const qs = require('qs');
module.exports = function neatURL(params) {
const oUrl = params.url;
const includeHash = (typeof params.includeHash !== "undefined" && params.includeHash || false);
const myURL = new URL(oUrl);
delete myURL.search;
// Remove tracking tokens.
myURL.searchParams.delete("CNDID");
myURL.searchParams.delete("__twitter_impression");
myURL.searchParams.delete("_hsenc");
myURL.searchParams.delete("_openstat");
myURL.searchParams.delete("action_object_map");
myURL.searchParams.delete("action_ref_map");
myURL.searchParams.delete("action_type_map");
myURL.searchParams.delete("amp");
myURL.searchParams.delete("fb_action_ids");
myURL.searchParams.delete("fb_action_types");
myURL.searchParams.delete("fb_ref");
myURL.searchParams.delete("fb_source");
myURL.searchParams.delete("fbclid");
myURL.searchParams.delete("ga_campaign");
myURL.searchParams.delete("ga_content");
myURL.searchParams.delete("ga_medium");
myURL.searchParams.delete("ga_place");
myURL.searchParams.delete("ga_source");
myURL.searchParams.delete("ga_term");
myURL.searchParams.delete("gs_l");
myURL.searchParams.delete("hmb_campaign");
myURL.searchParams.delete("hmb_medium");
myURL.searchParams.delete("hmb_source");
myURL.searchParams.delete("mbid");
myURL.searchParams.delete("mc_cid");
myURL.searchParams.delete("mc_eid");
myURL.searchParams.delete("mkt_tok");
myURL.searchParams.delete("referrer");
myURL.searchParams.delete("spJobID");
myURL.searchParams.delete("spMailingID");
myURL.searchParams.delete("spReportId");
myURL.searchParams.delete("spUserID");
myURL.searchParams.delete("utm_brand");
myURL.searchParams.delete("utm_campaign");
myURL.searchParams.delete("utm_cid");
myURL.searchParams.delete("utm_content");
myURL.searchParams.delete("utm_int");
myURL.searchParams.delete("utm_mailing");
myURL.searchParams.delete("utm_medium");
myURL.searchParams.delete("utm_name");
myURL.searchParams.delete("utm_place");
myURL.searchParams.delete("utm_pubreferrer");
myURL.searchParams.delete("utm_reader");
myURL.searchParams.delete("utm_social");
myURL.searchParams.delete("utm_source");
myURL.searchParams.delete("utm_swu");
myURL.searchParams.delete("utm_term");
myURL.searchParams.delete("utm_userid");
myURL.searchParams.delete("utm_viz_id");
myURL.searchParams.delete("wt_mc_o");
myURL.searchParams.delete("yclid");
myURL.searchParams.delete("WT.mc_id");
myURL.searchParams.delete("WT.mc_ev");
myURL.searchParams.delete("WT.srch");
if (includeHash) {
myURL.hash = cleanHash(myURL.hash);
}
return removeDomainSpecificParameters(myURL).toString();
};
function removeDomainSpecificParameters (myURL) {
const sld = psl.parse(myURL.hostname).sld;
switch (sld) {
case 'amazon':
myURL.searchParams.delete("_encoding");
myURL.searchParams.delete("pd_rd_r");
myURL.searchParams.delete("pd_rd_w");
myURL.searchParams.delete("pd_rd_wg");
myURL.searchParams.delete("psc");
myURL.searchParams.delete("tag");
myURL.searchParams.delete("ie");
myURL.searchParams.delete("creative");
myURL.searchParams.delete("linkCode");
myURL.searchParams.delete("creativeASIN");
myURL.searchParams.delete("linkId");
// Check for /ref=xxx at the end of the pathname.
const match = myURL.pathname.match(/\/ref=(\w*)$/);
if (match) {
const firstIndex = myURL.pathname.indexOf(match[0]);
myURL.pathname = myURL.pathname.substr(0, firstIndex);
}
break;
case 'google':
myURL.searchParams.delete("ei");
myURL.searchParams.delete("gws_rd");
myURL.searchParams.delete("sei");
myURL.searchParams.delete("ved");
break;
case 'bing':
myURL.searchParams.delete("cvid");
myURL.searchParams.delete("form");
myURL.searchParams.delete("pq");
myURL.searchParams.delete("qs");
myURL.searchParams.delete("sc");
myURL.searchParams.delete("sk");
myURL.searchParams.delete("sp");
break;
case 'youtube':
myURL.searchParams.delete("ab_channel");
myURL.searchParams.delete("attr_tag");
myURL.searchParams.delete("feature");
myURL.searchParams.delete("gclid");
myURL.searchParams.delete("kw");
break;
case 'reddit':
myURL.searchParams.delete("st");
break;
case 'twitter':
myURL.searchParams.delete("s");
myURL.searchParams.delete("ref_src");
myURL.searchParams.delete("ref_url");
break;
case 'nytimes':
myURL.searchParams.delete("emc");
myURL.searchParams.delete("partner");
break;
case 'instagram':
myURL.searchParams.delete("igshid");
break;
}
return myURL;
}
function cleanHash(hash) {
if (typeof hash !== "string") {
return hash;
}
if (!hash.match(/Echobox.*=/)) {
return hash;
}
const hashQuery = qs.parse(
hash.replace(/^#/, '')
);
// Remove keys from hash query.
delete hashQuery.Echobox;
const cleanHashString = qs.stringify(hashQuery);
if (cleanHashString === '') {
return '';
}
return '#' + cleanHashString;
}