UNPKG

parse-url

Version:

An advanced url parser supporting git urls too.

396 lines (319 loc) 12.1 kB
import require$$1 from 'parse-path'; function getAugmentedNamespace(n) { if (n.__esModule) return n; var f = n.default; if (typeof f == "function") { var a = function a () { if (this instanceof a) { var args = [null]; args.push.apply(args, arguments); var Ctor = Function.bind.apply(f, args); return new Ctor(); } return f.apply(this, arguments); }; a.prototype = f.prototype; } else a = {}; Object.defineProperty(a, '__esModule', {value: true}); Object.keys(n).forEach(function (k) { var d = Object.getOwnPropertyDescriptor(n, k); Object.defineProperty(a, k, d.get ? d : { enumerable: true, get: function () { return n[k]; } }); }); return a; } var src = {}; // https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs const DATA_URL_DEFAULT_MIME_TYPE = 'text/plain'; const DATA_URL_DEFAULT_CHARSET = 'us-ascii'; const testParameter = (name, filters) => filters.some(filter => filter instanceof RegExp ? filter.test(name) : filter === name); const normalizeDataURL = (urlString, {stripHash}) => { const match = /^data:(?<type>[^,]*?),(?<data>[^#]*?)(?:#(?<hash>.*))?$/.exec(urlString); if (!match) { throw new Error(`Invalid URL: ${urlString}`); } let {type, data, hash} = match.groups; const mediaType = type.split(';'); hash = stripHash ? '' : hash; let isBase64 = false; if (mediaType[mediaType.length - 1] === 'base64') { mediaType.pop(); isBase64 = true; } // Lowercase MIME type const mimeType = (mediaType.shift() || '').toLowerCase(); const attributes = mediaType .map(attribute => { let [key, value = ''] = attribute.split('=').map(string => string.trim()); // Lowercase `charset` if (key === 'charset') { value = value.toLowerCase(); if (value === DATA_URL_DEFAULT_CHARSET) { return ''; } } return `${key}${value ? `=${value}` : ''}`; }) .filter(Boolean); const normalizedMediaType = [ ...attributes, ]; if (isBase64) { normalizedMediaType.push('base64'); } if (normalizedMediaType.length > 0 || (mimeType && mimeType !== DATA_URL_DEFAULT_MIME_TYPE)) { normalizedMediaType.unshift(mimeType); } return `data:${normalizedMediaType.join(';')},${isBase64 ? data.trim() : data}${hash ? `#${hash}` : ''}`; }; function normalizeUrl(urlString, options) { options = { defaultProtocol: 'http:', normalizeProtocol: true, forceHttp: false, forceHttps: false, stripAuthentication: true, stripHash: false, stripTextFragment: true, stripWWW: true, removeQueryParameters: [/^utm_\w+/i], removeTrailingSlash: true, removeSingleSlash: true, removeDirectoryIndex: false, sortQueryParameters: true, ...options, }; urlString = urlString.trim(); // Data URL if (/^data:/i.test(urlString)) { return normalizeDataURL(urlString, options); } if (/^view-source:/i.test(urlString)) { throw new Error('`view-source:` is not supported as it is a non-standard protocol'); } const hasRelativeProtocol = urlString.startsWith('//'); const isRelativeUrl = !hasRelativeProtocol && /^\.*\//.test(urlString); // Prepend protocol if (!isRelativeUrl) { urlString = urlString.replace(/^(?!(?:\w+:)?\/\/)|^\/\//, options.defaultProtocol); } const urlObject = new URL(urlString); if (options.forceHttp && options.forceHttps) { throw new Error('The `forceHttp` and `forceHttps` options cannot be used together'); } if (options.forceHttp && urlObject.protocol === 'https:') { urlObject.protocol = 'http:'; } if (options.forceHttps && urlObject.protocol === 'http:') { urlObject.protocol = 'https:'; } // Remove auth if (options.stripAuthentication) { urlObject.username = ''; urlObject.password = ''; } // Remove hash if (options.stripHash) { urlObject.hash = ''; } else if (options.stripTextFragment) { urlObject.hash = urlObject.hash.replace(/#?:~:text.*?$/i, ''); } // Remove duplicate slashes if not preceded by a protocol // NOTE: This could be implemented using a single negative lookbehind // regex, but we avoid that to maintain compatibility with older js engines // which do not have support for that feature. if (urlObject.pathname) { // TODO: Replace everything below with `urlObject.pathname = urlObject.pathname.replace(/(?<!\b[a-z][a-z\d+\-.]{1,50}:)\/{2,}/g, '/');` when Safari supports negative lookbehind. // Split the string by occurrences of this protocol regex, and perform // duplicate-slash replacement on the strings between those occurrences // (if any). const protocolRegex = /\b[a-z][a-z\d+\-.]{1,50}:\/\//g; let lastIndex = 0; let result = ''; for (;;) { const match = protocolRegex.exec(urlObject.pathname); if (!match) { break; } const protocol = match[0]; const protocolAtIndex = match.index; const intermediate = urlObject.pathname.slice(lastIndex, protocolAtIndex); result += intermediate.replace(/\/{2,}/g, '/'); result += protocol; lastIndex = protocolAtIndex + protocol.length; } const remnant = urlObject.pathname.slice(lastIndex, urlObject.pathname.length); result += remnant.replace(/\/{2,}/g, '/'); urlObject.pathname = result; } // Decode URI octets if (urlObject.pathname) { try { urlObject.pathname = decodeURI(urlObject.pathname); } catch {} } // Remove directory index if (options.removeDirectoryIndex === true) { options.removeDirectoryIndex = [/^index\.[a-z]+$/]; } if (Array.isArray(options.removeDirectoryIndex) && options.removeDirectoryIndex.length > 0) { let pathComponents = urlObject.pathname.split('/'); const lastComponent = pathComponents[pathComponents.length - 1]; if (testParameter(lastComponent, options.removeDirectoryIndex)) { pathComponents = pathComponents.slice(0, -1); urlObject.pathname = pathComponents.slice(1).join('/') + '/'; } } if (urlObject.hostname) { // Remove trailing dot urlObject.hostname = urlObject.hostname.replace(/\.$/, ''); // Remove `www.` if (options.stripWWW && /^www\.(?!www\.)[a-z\-\d]{1,63}\.[a-z.\-\d]{2,63}$/.test(urlObject.hostname)) { // Each label should be max 63 at length (min: 1). // Source: https://en.wikipedia.org/wiki/Hostname#Restrictions_on_valid_host_names // Each TLD should be up to 63 characters long (min: 2). // It is technically possible to have a single character TLD, but none currently exist. urlObject.hostname = urlObject.hostname.replace(/^www\./, ''); } } // Remove query unwanted parameters if (Array.isArray(options.removeQueryParameters)) { // eslint-disable-next-line unicorn/no-useless-spread -- We are intentionally spreading to get a copy. for (const key of [...urlObject.searchParams.keys()]) { if (testParameter(key, options.removeQueryParameters)) { urlObject.searchParams.delete(key); } } } if (options.removeQueryParameters === true) { urlObject.search = ''; } // Sort query parameters if (options.sortQueryParameters) { urlObject.searchParams.sort(); // Calling `.sort()` encodes the search parameters, so we need to decode them again. try { urlObject.search = decodeURIComponent(urlObject.search); } catch {} } if (options.removeTrailingSlash) { urlObject.pathname = urlObject.pathname.replace(/\/$/, ''); } const oldUrlString = urlString; // Take advantage of many of the Node `url` normalizations urlString = urlObject.toString(); if (!options.removeSingleSlash && urlObject.pathname === '/' && !oldUrlString.endsWith('/') && urlObject.hash === '') { urlString = urlString.replace(/\/$/, ''); } // Remove ending `/` unless removeSingleSlash is false if ((options.removeTrailingSlash || urlObject.pathname === '/') && urlObject.hash === '' && options.removeSingleSlash) { urlString = urlString.replace(/\/$/, ''); } // Restore relative protocol, if applicable if (hasRelativeProtocol && !options.normalizeProtocol) { urlString = urlString.replace(/^http:\/\//, '//'); } // Remove http/https if (options.stripProtocol) { urlString = urlString.replace(/^(?:https?:)?\/\//, ''); } return urlString; } var normalizeUrl$1 = /*#__PURE__*/Object.freeze({ __proto__: null, 'default': normalizeUrl }); var require$$0 = /*@__PURE__*/getAugmentedNamespace(normalizeUrl$1); Object.defineProperty(src, "__esModule", { value: true }); var _typeof = typeof Symbol === "function" && typeof Symbol.iterator === "symbol" ? function (obj) { return typeof obj; } : function (obj) { return obj && typeof Symbol === "function" && obj.constructor === Symbol && obj !== Symbol.prototype ? "symbol" : typeof obj; }; // Dependencies var _normalizeUrl = require$$0; var _normalizeUrl2 = _interopRequireDefault(_normalizeUrl); var _parsePath = require$$1; var _parsePath2 = _interopRequireDefault(_parsePath); function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } /** * parseUrl * Parses the input url. * * **Note**: This *throws* if invalid urls are provided. * * @name parseUrl * @function * @param {String} url The input url. * @param {Boolean|Object} normalize Whether to normalize the url or not. * Default is `false`. If `true`, the url will * be normalized. If an object, it will be the * options object sent to [`normalize-url`](https://github.com/sindresorhus/normalize-url). * * For SSH urls, normalize won't work. * * @return {Object} An object containing the following fields: * * - `protocols` (Array): An array with the url protocols (usually it has one element). * - `protocol` (String): The first protocol, `"ssh"` (if the url is a ssh url) or `"file"`. * - `port` (null|Number): The domain port. * - `resource` (String): The url domain (including subdomains). * - `host` (String): The fully qualified domain name of a network host, or its IP address. * - `user` (String): The authentication user (usually for ssh urls). * - `pathname` (String): The url pathname. * - `hash` (String): The url hash. * - `search` (String): The url querystring value. * - `href` (String): The input url. * - `query` (Object): The url querystring, parsed as object. * - `parse_failed` (Boolean): Whether the parsing failed or not. */ var parseUrl = function parseUrl(url) { var normalize = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : false; // Constants /** * ([a-zA-Z_][a-zA-Z0-9_-]{0,31}) Try to match the user * ([\w\.\-@]+) Match the host/resource * (([\~,\.\w,\-,\_,\/,\s]|%[0-9A-Fa-f]{2})+?(?:\.git|\/)?) Match the path, allowing spaces/white */ var GIT_RE = /^(?:([a-zA-Z_][a-zA-Z0-9_-]{0,31})@|https?:\/\/)([\w\.\-@]+)[\/:](([\~,\.\w,\-,\_,\/,\s]|%[0-9A-Fa-f]{2})+?(?:\.git|\/)?)$/; var throwErr = function throwErr(msg) { var err = new Error(msg); err.subject_url = url; throw err; }; if (typeof url !== "string" || !url.trim()) { throwErr("Invalid url."); } if (url.length > parseUrl.MAX_INPUT_LENGTH) { throwErr("Input exceeds maximum length. If needed, change the value of parseUrl.MAX_INPUT_LENGTH."); } if (normalize) { if ((typeof normalize === "undefined" ? "undefined" : _typeof(normalize)) !== "object") { normalize = { stripHash: false }; } url = (0, _normalizeUrl2.default)(url, normalize); } var parsed = (0, _parsePath2.default)(url); // Potential git-ssh urls if (parsed.parse_failed) { var matched = parsed.href.match(GIT_RE); if (matched) { parsed.protocols = ["ssh"]; parsed.protocol = "ssh"; parsed.resource = matched[2]; parsed.host = matched[2]; parsed.user = matched[1]; parsed.pathname = "/" + matched[3]; parsed.parse_failed = false; } else { throwErr("URL parsing failed."); } } return parsed; }; parseUrl.MAX_INPUT_LENGTH = 2048; var _default = src.default = parseUrl; export { _default as default };