UNPKG

parse-url

Version:

An advanced url parser supporting git urls too.

239 lines (234 loc) 8.06 kB
'use strict'; var parsePath = require('parse-path'); const DATA_URL_DEFAULT_MIME_TYPE = "text/plain"; const DATA_URL_DEFAULT_CHARSET = "us-ascii"; const testParameter = (name, filters) => filters.some((filter) => filter instanceof RegExp ? filter.test(name) : filter === name); const supportedProtocols = /* @__PURE__ */ new Set([ "https:", "http:", "file:" ]); const hasCustomProtocol = (urlString) => { try { const { protocol } = new URL(urlString); return protocol.endsWith(":") && !protocol.includes(".") && !supportedProtocols.has(protocol); } catch { return false; } }; const normalizeDataURL = (urlString, { stripHash }) => { const match = /^data:(?<type>[^,]*?),(?<data>[^#]*?)(?:#(?<hash>.*))?$/.exec(urlString); if (!match) { throw new Error(`Invalid URL: ${urlString}`); } let { type, data, hash } = match.groups; const mediaType = type.split(";"); hash = stripHash ? "" : hash; let isBase64 = false; if (mediaType[mediaType.length - 1] === "base64") { mediaType.pop(); isBase64 = true; } const mimeType = mediaType.shift()?.toLowerCase() ?? ""; const attributes = mediaType.map((attribute) => { let [key, value = ""] = attribute.split("=").map((string) => string.trim()); if (key === "charset") { value = value.toLowerCase(); if (value === DATA_URL_DEFAULT_CHARSET) { return ""; } } return `${key}${value ? `=${value}` : ""}`; }).filter(Boolean); const normalizedMediaType = [ ...attributes ]; if (isBase64) { normalizedMediaType.push("base64"); } if (normalizedMediaType.length > 0 || mimeType && mimeType !== DATA_URL_DEFAULT_MIME_TYPE) { normalizedMediaType.unshift(mimeType); } return `data:${normalizedMediaType.join(";")},${isBase64 ? data.trim() : data}${hash ? `#${hash}` : ""}`; }; function normalizeUrl(urlString, options) { options = { defaultProtocol: "http", normalizeProtocol: true, forceHttp: false, forceHttps: false, stripAuthentication: true, stripHash: false, stripTextFragment: true, stripWWW: true, removeQueryParameters: [/^utm_\w+/i], removeTrailingSlash: true, removeSingleSlash: true, removeDirectoryIndex: false, removeExplicitPort: false, sortQueryParameters: true, ...options }; if (typeof options.defaultProtocol === "string" && !options.defaultProtocol.endsWith(":")) { options.defaultProtocol = `${options.defaultProtocol}:`; } urlString = urlString.trim(); if (/^data:/i.test(urlString)) { return normalizeDataURL(urlString, options); } if (hasCustomProtocol(urlString)) { return urlString; } const hasRelativeProtocol = urlString.startsWith("//"); const isRelativeUrl = !hasRelativeProtocol && /^\.*\//.test(urlString); if (!isRelativeUrl) { urlString = urlString.replace(/^(?!(?:\w+:)?\/\/)|^\/\//, options.defaultProtocol); } const urlObject = new URL(urlString); if (options.forceHttp && options.forceHttps) { throw new Error("The `forceHttp` and `forceHttps` options cannot be used together"); } if (options.forceHttp && urlObject.protocol === "https:") { urlObject.protocol = "http:"; } if (options.forceHttps && urlObject.protocol === "http:") { urlObject.protocol = "https:"; } if (options.stripAuthentication) { urlObject.username = ""; urlObject.password = ""; } if (options.stripHash) { urlObject.hash = ""; } else if (options.stripTextFragment) { urlObject.hash = urlObject.hash.replace(/#?:~:text.*?$/i, ""); } if (urlObject.pathname) { const protocolRegex = /\b[a-z][a-z\d+\-.]{1,50}:\/\//g; let lastIndex = 0; let result = ""; for (; ; ) { const match = protocolRegex.exec(urlObject.pathname); if (!match) { break; } const protocol = match[0]; const protocolAtIndex = match.index; const intermediate = urlObject.pathname.slice(lastIndex, protocolAtIndex); result += intermediate.replace(/\/{2,}/g, "/"); result += protocol; lastIndex = protocolAtIndex + protocol.length; } const remnant = urlObject.pathname.slice(lastIndex, urlObject.pathname.length); result += remnant.replace(/\/{2,}/g, "/"); urlObject.pathname = result; } if (urlObject.pathname) { try { urlObject.pathname = decodeURI(urlObject.pathname); } catch { } } if (options.removeDirectoryIndex === true) { options.removeDirectoryIndex = [/^index\.[a-z]+$/]; } if (Array.isArray(options.removeDirectoryIndex) && options.removeDirectoryIndex.length > 0) { let pathComponents = urlObject.pathname.split("/"); const lastComponent = pathComponents[pathComponents.length - 1]; if (testParameter(lastComponent, options.removeDirectoryIndex)) { pathComponents = pathComponents.slice(0, -1); urlObject.pathname = pathComponents.slice(1).join("/") + "/"; } } if (urlObject.hostname) { urlObject.hostname = urlObject.hostname.replace(/\.$/, ""); if (options.stripWWW && /^www\.(?!www\.)[a-z\-\d]{1,63}\.[a-z.\-\d]{2,63}$/.test(urlObject.hostname)) { urlObject.hostname = urlObject.hostname.replace(/^www\./, ""); } } if (Array.isArray(options.removeQueryParameters)) { for (const key of [...urlObject.searchParams.keys()]) { if (testParameter(key, options.removeQueryParameters)) { urlObject.searchParams.delete(key); } } } if (!Array.isArray(options.keepQueryParameters) && options.removeQueryParameters === true) { urlObject.search = ""; } if (Array.isArray(options.keepQueryParameters) && options.keepQueryParameters.length > 0) { for (const key of [...urlObject.searchParams.keys()]) { if (!testParameter(key, options.keepQueryParameters)) { urlObject.searchParams.delete(key); } } } if (options.sortQueryParameters) { urlObject.searchParams.sort(); try { urlObject.search = decodeURIComponent(urlObject.search); } catch { } } if (options.removeTrailingSlash) { urlObject.pathname = urlObject.pathname.replace(/\/$/, ""); } if (options.removeExplicitPort && urlObject.port) { urlObject.port = ""; } const oldUrlString = urlString; urlString = urlObject.toString(); if (!options.removeSingleSlash && urlObject.pathname === "/" && !oldUrlString.endsWith("/") && urlObject.hash === "") { urlString = urlString.replace(/\/$/, ""); } if ((options.removeTrailingSlash || urlObject.pathname === "/") && urlObject.hash === "" && options.removeSingleSlash) { urlString = urlString.replace(/\/$/, ""); } if (hasRelativeProtocol && !options.normalizeProtocol) { urlString = urlString.replace(/^http:\/\//, "//"); } if (options.stripProtocol) { urlString = urlString.replace(/^(?:https?:)?\/\//, ""); } return urlString; } const parseUrl = (url, normalize = false) => { const GIT_RE = /^(?:([a-zA-Z_][a-zA-Z0-9_-]{0,31})@|https?:\/\/)([\w\.\-@]+)[\/:](([\~,\.\w,\-,\_,\/,\s]|%[0-9A-Fa-f]{2})+?(?:\.git|\/)?)$/; const throwErr = (msg) => { const err = new Error(msg); err.subject_url = url; throw err; }; if (typeof url !== "string" || !url.trim()) { throwErr("Invalid url."); } if (url.length > parseUrl.MAX_INPUT_LENGTH) { throwErr("Input exceeds maximum length. If needed, change the value of parseUrl.MAX_INPUT_LENGTH."); } if (normalize) { if (typeof normalize !== "object") { normalize = { stripHash: false }; } url = normalizeUrl(url, normalize); } const parsed = parsePath(url); if (parsed.parse_failed) { const matched = parsed.href.match(GIT_RE); if (matched) { parsed.protocols = ["ssh"]; parsed.protocol = "ssh"; parsed.resource = matched[2]; parsed.host = matched[2]; parsed.user = matched[1]; parsed.pathname = `/${matched[3]}`; parsed.parse_failed = false; } else { throwErr("URL parsing failed."); } } return parsed; }; parseUrl.MAX_INPUT_LENGTH = 2048; module.exports = parseUrl;