UNPKG

tldjs

Version:

JavaScript API to work against complex domain names, subdomains and URIs.

1,829 lines (1,553 loc) 210 kB
(function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.tldjs = f()}})(function(){var define,module,exports;return (function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);var f=new Error("Cannot find module '"+o+"'");throw f.code="MODULE_NOT_FOUND",f}var l=n[o]={exports:{}};t[o][0].call(l.exports,function(e){var n=t[o][1][e];return s(n?n:e)},l,l.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o<r.length;o++)s(r[o]);return s})({1:[function(require,module,exports){ 'use strict'; // Load rules var Trie = require('./lib/suffix-trie.js'); var allRules = Trie.fromJson(require('./rules.json')); // Internals var extractHostname = require('./lib/clean-host.js'); var getDomain = require('./lib/domain.js'); var getPublicSuffix = require('./lib/public-suffix.js'); var getSubdomain = require('./lib/subdomain.js'); var isValid = require('./lib/is-valid.js'); var isIp = require('./lib/is-ip.js'); var tldExists = require('./lib/tld-exists.js'); // Flags representing steps in the `parse` function. They are used to implement // a early stop mechanism (simulating some form of laziness) to avoid doing more // work than necessary to perform a given action (e.g.: we don't need to extract // the domain and subdomain if we are only interested in public suffix). var TLD_EXISTS = 1; var PUBLIC_SUFFIX = 2; var DOMAIN = 3; var SUB_DOMAIN = 4; var ALL = 5; /** * Creates a new instance of tldjs * @param {Object.<rules,validHosts>} options [description] * @return {tldjs|Object} [description] */ function factory(options) { var rules = options.rules || allRules || {}; var validHosts = options.validHosts || []; var _extractHostname = options.extractHostname || extractHostname; /** * Process a given url and extract all information. This is a higher level API * around private functions of `tld.js`. It allows to remove duplication (only * extract hostname from url once for all operations) and implement some early * termination mechanism to not pay the price of what we don't need (this * simulates laziness at a lower cost). * * @param {string} url * @param {number|undefined} _step - where should we stop processing * @return {object} */ function parse(url, _step) { var step = _step || ALL; var result = { hostname: _extractHostname(url), isValid: null, isIp: null, tldExists: false, publicSuffix: null, domain: null, subdomain: null, }; if (result.hostname === null) { result.isIp = false; result.isValid = false; return result; } // Check if `hostname` is a valid ip address result.isIp = isIp(result.hostname); if (result.isIp) { result.isValid = true; return result; } // Check if `hostname` is valid result.isValid = isValid(result.hostname); if (result.isValid === false) return result; // Check if tld exists if (step === ALL || step === TLD_EXISTS) { result.tldExists = tldExists(rules, result.hostname); } if (step === TLD_EXISTS) return result; // Extract public suffix result.publicSuffix = getPublicSuffix(rules, result.hostname); if (step === PUBLIC_SUFFIX) return result; // Extract domain result.domain = getDomain(validHosts, result.publicSuffix, result.hostname); if (step === DOMAIN) return result; // Extract subdomain result.subdomain = getSubdomain(result.hostname, result.domain); return result; } return { extractHostname: _extractHostname, isValid: isValid, parse: parse, tldExists: function (url) { return parse(url, TLD_EXISTS).tldExists; }, getPublicSuffix: function (url) { return parse(url, PUBLIC_SUFFIX).publicSuffix; }, getDomain: function (url) { return parse(url, DOMAIN).domain; }, getSubdomain: function (url) { return parse(url, SUB_DOMAIN).subdomain; }, fromUserSettings: factory }; } module.exports = factory({}); },{"./lib/clean-host.js":2,"./lib/domain.js":3,"./lib/is-ip.js":5,"./lib/is-valid.js":6,"./lib/public-suffix.js":7,"./lib/subdomain.js":8,"./lib/suffix-trie.js":9,"./lib/tld-exists.js":10,"./rules.json":17}],2:[function(require,module,exports){ var URL = require('url'); var isValid = require('./is-valid.js'); /** * Utility to cleanup the base host value. Also removes url fragments. * * Works for: * - hostname * - //hostname * - scheme://hostname * - scheme+scheme://hostname * * @param {string} value * @return {String} */ // scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) var hasPrefixRE = /^(([a-z][a-z0-9+.-]*)?:)?\/\//; /** * @see https://github.com/oncletom/tld.js/issues/95 * * @param {string} value */ function trimTrailingDots(value) { if (value[value.length - 1] === '.') { return value.substr(0, value.length - 1); } return value; } /** * Fast check to avoid calling `trim` when not needed. * * @param {string} value */ function checkTrimmingNeeded(value) { return ( value.length > 0 && ( value.charCodeAt(0) <= 32 || value.charCodeAt(value.length - 1) <= 32 ) ); } /** * Fast check to avoid calling `toLowerCase` when not needed. * * @param {string} value */ function checkLowerCaseNeeded(value) { for (var i = 0; i < value.length; i += 1) { var code = value.charCodeAt(i); if (code >= 65 && code <= 90) { // [A-Z] return true; } } return false; } module.exports = function extractHostname(value) { // First check if `value` is already a valid hostname. if (isValid(value)) { return trimTrailingDots(value); } var url = value; if (typeof url !== 'string') { url = '' + url; } var needsTrimming = checkTrimmingNeeded(url); if (needsTrimming) { url = url.trim(); } var needsLowerCase = checkLowerCaseNeeded(url); if (needsLowerCase) { url = url.toLowerCase(); } // Try again after `url` has been transformed to lowercase and trimmed. if ((needsLowerCase || needsTrimming) && isValid(url)) { return trimTrailingDots(url); } // Proceed with heavier url parsing to extract the hostname. if (!hasPrefixRE.test(url)) { url = '//' + url; } var parts = URL.parse(url, null, true); if (parts.hostname) { return trimTrailingDots(parts.hostname); } return null; }; },{"./is-valid.js":6,"url":15}],3:[function(require,module,exports){ 'use strict'; /** * Polyfill for `endsWith` * * @param {string} str * @param {string} pattern * @return {boolean} */ function endsWith(str, pattern) { return ( str.lastIndexOf(pattern) === (str.length - pattern.length) ); } /** * Check if `vhost` is a valid suffix of `hostname` (top-domain) * * It means that `vhost` needs to be a suffix of `hostname` and we then need to * make sure that: either they are equal, or the character preceding `vhost` in * `hostname` is a '.' (it should not be a partial label). * * * hostname = 'not.evil.com' and vhost = 'vil.com' => not ok * * hostname = 'not.evil.com' and vhost = 'evil.com' => ok * * hostname = 'not.evil.com' and vhost = 'not.evil.com' => ok * * @param {string} hostname * @param {string} vhost * @return {boolean} */ function shareSameDomainSuffix(hostname, vhost) { if (endsWith(hostname, vhost)) { return ( hostname.length === vhost.length || hostname[hostname.length - vhost.length - 1] === '.' ); } return false; } /** * Given a hostname and its public suffix, extract the general domain. * * @param {string} hostname * @param {string} publicSuffix * @return {string} */ function extractDomainWithSuffix(hostname, publicSuffix) { // Locate the index of the last '.' in the part of the `hostname` preceding // the public suffix. // // examples: // 1. not.evil.co.uk => evil.co.uk // ^ ^ // | | start of public suffix // | index of the last dot // // 2. example.co.uk => example.co.uk // ^ ^ // | | start of public suffix // | // | (-1) no dot found before the public suffix var publicSuffixIndex = hostname.length - publicSuffix.length - 2; var lastDotBeforeSuffixIndex = hostname.lastIndexOf('.', publicSuffixIndex); // No '.' found, then `hostname` is the general domain (no sub-domain) if (lastDotBeforeSuffixIndex === -1) { return hostname; } // Extract the part between the last '.' return hostname.substr(lastDotBeforeSuffixIndex + 1); } /** * Detects the domain based on rules and upon and a host string * * @api * @param {string} host * @return {String} */ module.exports = function getDomain(validHosts, suffix, hostname) { // Check if `hostname` ends with a member of `validHosts`. for (var i = 0; i < validHosts.length; i += 1) { var vhost = validHosts[i]; if (shareSameDomainSuffix(hostname, vhost)) { return vhost; } } // If there is no suffix, there is no hostname if (suffix === null) { return null; } // If `hostname` is a valid public suffix, then there is no domain to return. // Since we already know that `getPublicSuffix` returns a suffix of `hostname` // there is no need to perform a string comparison and we only compare the // size. if (suffix.length === hostname.length) { return null; } // To extract the general domain, we start by identifying the public suffix // (if any), then consider the domain to be the public suffix with one added // level of depth. (e.g.: if hostname is `not.evil.co.uk` and public suffix: // `co.uk`, then we take one more level: `evil`, giving the final result: // `evil.co.uk`). return extractDomainWithSuffix(hostname, suffix); }; },{}],4:[function(require,module,exports){ "use strict"; /** * Utility to extract the TLD from a hostname string * * @param {string} host * @return {String} */ module.exports = function extractTldFromHost(hostname) { var lastDotIndex = hostname.lastIndexOf('.'); if (lastDotIndex === -1) { return null; } return hostname.substr(lastDotIndex + 1); }; },{}],5:[function(require,module,exports){ 'use strict'; /** * Check if a hostname is an IP. You should be aware that this only works * because `hostname` is already garanteed to be a valid hostname! * * @param {string} hostname * @return {boolean} */ function isProbablyIpv4(hostname) { var numberOfDots = 0; for (var i = 0; i < hostname.length; i += 1) { var code = hostname.charCodeAt(i); if (code === 46) { // '.' numberOfDots += 1; } else if (code < 48 || code > 57) { // 48 => '0' // 57 => '9' return false; } } return ( numberOfDots === 3 && hostname[0] !== '.' && hostname[hostname.length - 1] !== '.' ); } /** * Similar to isProbablyIpv4. * * @param {string} hostname * @return {boolean} */ function isProbablyIpv6(hostname) { var hasColon = false; for (var i = 0; i < hostname.length; i += 1) { var code = hostname.charCodeAt(i); if (code === 58) { // ':' hasColon = true; } else if (!( (code >= 48 && code <= 57) || // 0-9 (code >= 97 && code <= 102) // a-f )) { return false; } } return hasColon; } /** * Check if `hostname` is *probably* a valid ip addr (either ipv6 or ipv4). * This *will not* work on any string. We need `hostname` to be a valid * hostname. * * @param {string} hostname * @return {boolean} */ module.exports = function isIp(hostname) { if (typeof hostname !== 'string') { return false; } if (hostname.length === 0) { return false; } return (isProbablyIpv6(hostname) || isProbablyIpv4(hostname)); }; },{}],6:[function(require,module,exports){ "use strict"; /** * Check if the code point is a digit [0-9] * * @param {number} code * @return boolean */ function isDigit(code) { // 48 == '0' // 57 == '9' return code >= 48 && code <= 57; } /** * Check if the code point is a letter [a-zA-Z] * * @param {number} code * @return boolean */ function isAlpha(code) { // 97 === 'a' // 122 == 'z' return code >= 97 && code <= 122; } /** * Check if a hostname string is valid (according to RFC). It's usually a * preliminary check before trying to use getDomain or anything else. * * Beware: it does not check if the TLD exists. * * @api * @param {string} hostname * @return {boolean} */ module.exports = function isValid(hostname) { if (typeof hostname !== 'string') { return false; } if (hostname.length > 255) { return false; } if (hostname.length === 0) { return false; } // Check first character: [a-zA-Z0-9] var firstCharCode = hostname.charCodeAt(0); if (!(isAlpha(firstCharCode) || isDigit(firstCharCode))) { return false; } // Validate hostname according to RFC var lastDotIndex = -1; var lastCharCode; var code; var len = hostname.length; for (var i = 0; i < len; i += 1) { code = hostname.charCodeAt(i); if (code === 46) { // '.' if ( // Check that previous label is < 63 bytes long (64 = 63 + '.') (i - lastDotIndex) > 64 || // Check that previous character was not already a '.' lastCharCode === 46 || // Check that the previous label does not end with a '-' lastCharCode === 45 ) { return false; } lastDotIndex = i; } else if (!(isAlpha(code) || isDigit(code) || code === 45)) { // Check if there is a forbidden character in the label: [^a-zA-Z0-9-] return false; } lastCharCode = code; } return ( // Check that last label is shorter than 63 chars (len - lastDotIndex - 1) <= 63 && // Check that the last character is an allowed trailing label character. // Since we already checked that the char is a valid hostname character, // we only need to check that it's different from '-'. lastCharCode !== 45 ); }; },{}],7:[function(require,module,exports){ 'use strict'; var extractTldFromHost = require('./from-host.js'); /** * Returns the public suffix (including exact matches) * * @api * @since 1.5 * @param {string} hostname * @return {string} */ module.exports = function getPublicSuffix(rules, hostname) { // First check if `hostname` is already a valid top-level Domain. if (rules.hasTld(hostname)) { return hostname; } var candidate = rules.suffixLookup(hostname); if (candidate === null) { // Prevailing rule is '*' so we consider the top-level domain to be the // public suffix of `hostname` (e.g.: 'example.org' => 'org'). return extractTldFromHost(hostname); } return candidate; }; },{"./from-host.js":4}],8:[function(require,module,exports){ 'use strict'; /** * Returns the subdomain of a hostname string * * @api * @param {string} hostname * @param {string} domain - the root domain of the hostname * @return {string|null} a subdomain string if any, blank string if subdomain * is empty, otherwise null. */ module.exports = function getSubdomain(hostname, domain) { // No domain found? Just abort, abort! if (domain === null) { return null; } return hostname.substr(0, hostname.length - domain.length - 1); }; },{}],9:[function(require,module,exports){ "use strict"; var VALID_HOSTNAME_VALUE = 0; /** * Return min(a, b), handling possible `null` values. * * @param {number|null} a * @param {number|null} b * @return {number|null} */ function minIndex(a, b) { if (a === null) { return b; } else if (b === null) { return a; } return a < b ? a : b; } /** * Insert a public suffix rule in the `trie`. * * @param {object} rule * @param {object} trie * @return {object} trie (updated) */ function insertInTrie(rule, trie) { var parts = rule.parts; var node = trie; for (var i = 0; i < parts.length; i += 1) { var part = parts[i]; var nextNode = node[part]; if (nextNode === undefined) { nextNode = Object.create(null); node[part] = nextNode; } node = nextNode; } node.$ = VALID_HOSTNAME_VALUE; return trie; } /** * Recursive lookup of `parts` (starting at `index`) in the tree. * * @param {array} parts * @param {object} trie * @param {number} index - when to start in `parts` (initially: length - 1) * @return {number} size of the suffix found (in number of parts matched) */ function lookupInTrie(parts, trie, index) { var part; var nextNode; var publicSuffixIndex = null; // We have a match! if (trie.$ !== undefined) { publicSuffixIndex = index + 1; } // No more `parts` to look for if (index === -1) { return publicSuffixIndex; } part = parts[index]; // Check branch corresponding to next part of hostname nextNode = trie[part]; if (nextNode !== undefined) { publicSuffixIndex = minIndex( publicSuffixIndex, lookupInTrie(parts, nextNode, index - 1) ); } // Check wildcard branch nextNode = trie['*']; if (nextNode !== undefined) { publicSuffixIndex = minIndex( publicSuffixIndex, lookupInTrie(parts, nextNode, index - 1) ); } return publicSuffixIndex; } /** * Contains the public suffix ruleset as a Trie for efficient look-up. * * @constructor */ function SuffixTrie(rules) { this.exceptions = Object.create(null); this.rules = Object.create(null); if (rules) { for (var i = 0; i < rules.length; i += 1) { var rule = rules[i]; if (rule.exception) { insertInTrie(rule, this.exceptions); } else { insertInTrie(rule, this.rules); } } } } /** * Load the trie from JSON (as serialized by JSON.stringify). */ SuffixTrie.fromJson = function (json) { var trie = new SuffixTrie(); trie.exceptions = json.exceptions; trie.rules = json.rules; return trie; }; /** * Check if `value` is a valid TLD. */ SuffixTrie.prototype.hasTld = function (value) { // All TLDs are at the root of the Trie. return this.rules[value] !== undefined; }; /** * Check if `hostname` has a valid public suffix in `trie`. * * @param {string} hostname * @return {string|null} public suffix */ SuffixTrie.prototype.suffixLookup = function (hostname) { var parts = hostname.split('.'); // Look for a match in rules var publicSuffixIndex = lookupInTrie( parts, this.rules, parts.length - 1 ); if (publicSuffixIndex === null) { return null; } // Look for exceptions var exceptionIndex = lookupInTrie( parts, this.exceptions, parts.length - 1 ); if (exceptionIndex !== null) { return parts.slice(exceptionIndex + 1).join('.'); } return parts.slice(publicSuffixIndex).join('.'); }; module.exports = SuffixTrie; },{}],10:[function(require,module,exports){ 'use strict'; var extractTldFromHost = require('./from-host.js'); /** * Checks if the TLD exists for a given hostname * * @api * @param {string} rules * @param {string} hostname * @return {boolean} */ module.exports = function tldExists(rules, hostname) { // Easy case, it's a TLD if (rules.hasTld(hostname)) { return true; } // Popping only the TLD of the hostname var hostTld = extractTldFromHost(hostname); if (hostTld === null) { return false; } return rules.hasTld(hostTld); }; },{"./from-host.js":4}],11:[function(require,module,exports){ (function (global){ /*! https://mths.be/punycode v1.4.1 by @mathias */ ;(function(root) { /** Detect free variables */ var freeExports = typeof exports == 'object' && exports && !exports.nodeType && exports; var freeModule = typeof module == 'object' && module && !module.nodeType && module; var freeGlobal = typeof global == 'object' && global; if ( freeGlobal.global === freeGlobal || freeGlobal.window === freeGlobal || freeGlobal.self === freeGlobal ) { root = freeGlobal; } /** * The `punycode` object. * @name punycode * @type Object */ var punycode, /** Highest positive signed 32-bit float value */ maxInt = 2147483647, // aka. 0x7FFFFFFF or 2^31-1 /** Bootstring parameters */ base = 36, tMin = 1, tMax = 26, skew = 38, damp = 700, initialBias = 72, initialN = 128, // 0x80 delimiter = '-', // '\x2D' /** Regular expressions */ regexPunycode = /^xn--/, regexNonASCII = /[^\x20-\x7E]/, // unprintable ASCII chars + non-ASCII chars regexSeparators = /[\x2E\u3002\uFF0E\uFF61]/g, // RFC 3490 separators /** Error messages */ errors = { 'overflow': 'Overflow: input needs wider integers to process', 'not-basic': 'Illegal input >= 0x80 (not a basic code point)', 'invalid-input': 'Invalid input' }, /** Convenience shortcuts */ baseMinusTMin = base - tMin, floor = Math.floor, stringFromCharCode = String.fromCharCode, /** Temporary variable */ key; /*--------------------------------------------------------------------------*/ /** * A generic error utility function. * @private * @param {String} type The error type. * @returns {Error} Throws a `RangeError` with the applicable error message. */ function error(type) { throw new RangeError(errors[type]); } /** * A generic `Array#map` utility function. * @private * @param {Array} array The array to iterate over. * @param {Function} callback The function that gets called for every array * item. * @returns {Array} A new array of values returned by the callback function. */ function map(array, fn) { var length = array.length; var result = []; while (length--) { result[length] = fn(array[length]); } return result; } /** * A simple `Array#map`-like wrapper to work with domain name strings or email * addresses. * @private * @param {String} domain The domain name or email address. * @param {Function} callback The function that gets called for every * character. * @returns {Array} A new string of characters returned by the callback * function. */ function mapDomain(string, fn) { var parts = string.split('@'); var result = ''; if (parts.length > 1) { // In email addresses, only the domain name should be punycoded. Leave // the local part (i.e. everything up to `@`) intact. result = parts[0] + '@'; string = parts[1]; } // Avoid `split(regex)` for IE8 compatibility. See #17. string = string.replace(regexSeparators, '\x2E'); var labels = string.split('.'); var encoded = map(labels, fn).join('.'); return result + encoded; } /** * Creates an array containing the numeric code points of each Unicode * character in the string. While JavaScript uses UCS-2 internally, * this function will convert a pair of surrogate halves (each of which * UCS-2 exposes as separate characters) into a single code point, * matching UTF-16. * @see `punycode.ucs2.encode` * @see <https://mathiasbynens.be/notes/javascript-encoding> * @memberOf punycode.ucs2 * @name decode * @param {String} string The Unicode input string (UCS-2). * @returns {Array} The new array of code points. */ function ucs2decode(string) { var output = [], counter = 0, length = string.length, value, extra; while (counter < length) { value = string.charCodeAt(counter++); if (value >= 0xD800 && value <= 0xDBFF && counter < length) { // high surrogate, and there is a next character extra = string.charCodeAt(counter++); if ((extra & 0xFC00) == 0xDC00) { // low surrogate output.push(((value & 0x3FF) << 10) + (extra & 0x3FF) + 0x10000); } else { // unmatched surrogate; only append this code unit, in case the next // code unit is the high surrogate of a surrogate pair output.push(value); counter--; } } else { output.push(value); } } return output; } /** * Creates a string based on an array of numeric code points. * @see `punycode.ucs2.decode` * @memberOf punycode.ucs2 * @name encode * @param {Array} codePoints The array of numeric code points. * @returns {String} The new Unicode string (UCS-2). */ function ucs2encode(array) { return map(array, function(value) { var output = ''; if (value > 0xFFFF) { value -= 0x10000; output += stringFromCharCode(value >>> 10 & 0x3FF | 0xD800); value = 0xDC00 | value & 0x3FF; } output += stringFromCharCode(value); return output; }).join(''); } /** * Converts a basic code point into a digit/integer. * @see `digitToBasic()` * @private * @param {Number} codePoint The basic numeric code point value. * @returns {Number} The numeric value of a basic code point (for use in * representing integers) in the range `0` to `base - 1`, or `base` if * the code point does not represent a value. */ function basicToDigit(codePoint) { if (codePoint - 48 < 10) { return codePoint - 22; } if (codePoint - 65 < 26) { return codePoint - 65; } if (codePoint - 97 < 26) { return codePoint - 97; } return base; } /** * Converts a digit/integer into a basic code point. * @see `basicToDigit()` * @private * @param {Number} digit The numeric value of a basic code point. * @returns {Number} The basic code point whose value (when used for * representing integers) is `digit`, which needs to be in the range * `0` to `base - 1`. If `flag` is non-zero, the uppercase form is * used; else, the lowercase form is used. The behavior is undefined * if `flag` is non-zero and `digit` has no uppercase form. */ function digitToBasic(digit, flag) { // 0..25 map to ASCII a..z or A..Z // 26..35 map to ASCII 0..9 return digit + 22 + 75 * (digit < 26) - ((flag != 0) << 5); } /** * Bias adaptation function as per section 3.4 of RFC 3492. * https://tools.ietf.org/html/rfc3492#section-3.4 * @private */ function adapt(delta, numPoints, firstTime) { var k = 0; delta = firstTime ? floor(delta / damp) : delta >> 1; delta += floor(delta / numPoints); for (/* no initialization */; delta > baseMinusTMin * tMax >> 1; k += base) { delta = floor(delta / baseMinusTMin); } return floor(k + (baseMinusTMin + 1) * delta / (delta + skew)); } /** * Converts a Punycode string of ASCII-only symbols to a string of Unicode * symbols. * @memberOf punycode * @param {String} input The Punycode string of ASCII-only symbols. * @returns {String} The resulting string of Unicode symbols. */ function decode(input) { // Don't use UCS-2 var output = [], inputLength = input.length, out, i = 0, n = initialN, bias = initialBias, basic, j, index, oldi, w, k, digit, t, /** Cached calculation results */ baseMinusT; // Handle the basic code points: let `basic` be the number of input code // points before the last delimiter, or `0` if there is none, then copy // the first basic code points to the output. basic = input.lastIndexOf(delimiter); if (basic < 0) { basic = 0; } for (j = 0; j < basic; ++j) { // if it's not a basic code point if (input.charCodeAt(j) >= 0x80) { error('not-basic'); } output.push(input.charCodeAt(j)); } // Main decoding loop: start just after the last delimiter if any basic code // points were copied; start at the beginning otherwise. for (index = basic > 0 ? basic + 1 : 0; index < inputLength; /* no final expression */) { // `index` is the index of the next character to be consumed. // Decode a generalized variable-length integer into `delta`, // which gets added to `i`. The overflow checking is easier // if we increase `i` as we go, then subtract off its starting // value at the end to obtain `delta`. for (oldi = i, w = 1, k = base; /* no condition */; k += base) { if (index >= inputLength) { error('invalid-input'); } digit = basicToDigit(input.charCodeAt(index++)); if (digit >= base || digit > floor((maxInt - i) / w)) { error('overflow'); } i += digit * w; t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias); if (digit < t) { break; } baseMinusT = base - t; if (w > floor(maxInt / baseMinusT)) { error('overflow'); } w *= baseMinusT; } out = output.length + 1; bias = adapt(i - oldi, out, oldi == 0); // `i` was supposed to wrap around from `out` to `0`, // incrementing `n` each time, so we'll fix that now: if (floor(i / out) > maxInt - n) { error('overflow'); } n += floor(i / out); i %= out; // Insert `n` at position `i` of the output output.splice(i++, 0, n); } return ucs2encode(output); } /** * Converts a string of Unicode symbols (e.g. a domain name label) to a * Punycode string of ASCII-only symbols. * @memberOf punycode * @param {String} input The string of Unicode symbols. * @returns {String} The resulting Punycode string of ASCII-only symbols. */ function encode(input) { var n, delta, handledCPCount, basicLength, bias, j, m, q, k, t, currentValue, output = [], /** `inputLength` will hold the number of code points in `input`. */ inputLength, /** Cached calculation results */ handledCPCountPlusOne, baseMinusT, qMinusT; // Convert the input in UCS-2 to Unicode input = ucs2decode(input); // Cache the length inputLength = input.length; // Initialize the state n = initialN; delta = 0; bias = initialBias; // Handle the basic code points for (j = 0; j < inputLength; ++j) { currentValue = input[j]; if (currentValue < 0x80) { output.push(stringFromCharCode(currentValue)); } } handledCPCount = basicLength = output.length; // `handledCPCount` is the number of code points that have been handled; // `basicLength` is the number of basic code points. // Finish the basic string - if it is not empty - with a delimiter if (basicLength) { output.push(delimiter); } // Main encoding loop: while (handledCPCount < inputLength) { // All non-basic code points < n have been handled already. Find the next // larger one: for (m = maxInt, j = 0; j < inputLength; ++j) { currentValue = input[j]; if (currentValue >= n && currentValue < m) { m = currentValue; } } // Increase `delta` enough to advance the decoder's <n,i> state to <m,0>, // but guard against overflow handledCPCountPlusOne = handledCPCount + 1; if (m - n > floor((maxInt - delta) / handledCPCountPlusOne)) { error('overflow'); } delta += (m - n) * handledCPCountPlusOne; n = m; for (j = 0; j < inputLength; ++j) { currentValue = input[j]; if (currentValue < n && ++delta > maxInt) { error('overflow'); } if (currentValue == n) { // Represent delta as a generalized variable-length integer for (q = delta, k = base; /* no condition */; k += base) { t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias); if (q < t) { break; } qMinusT = q - t; baseMinusT = base - t; output.push( stringFromCharCode(digitToBasic(t + qMinusT % baseMinusT, 0)) ); q = floor(qMinusT / baseMinusT); } output.push(stringFromCharCode(digitToBasic(q, 0))); bias = adapt(delta, handledCPCountPlusOne, handledCPCount == basicLength); delta = 0; ++handledCPCount; } } ++delta; ++n; } return output.join(''); } /** * Converts a Punycode string representing a domain name or an email address * to Unicode. Only the Punycoded parts of the input will be converted, i.e. * it doesn't matter if you call it on a string that has already been * converted to Unicode. * @memberOf punycode * @param {String} input The Punycoded domain name or email address to * convert to Unicode. * @returns {String} The Unicode representation of the given Punycode * string. */ function toUnicode(input) { return mapDomain(input, function(string) { return regexPunycode.test(string) ? decode(string.slice(4).toLowerCase()) : string; }); } /** * Converts a Unicode string representing a domain name or an email address to * Punycode. Only the non-ASCII parts of the domain name will be converted, * i.e. it doesn't matter if you call it with a domain that's already in * ASCII. * @memberOf punycode * @param {String} input The domain name or email address to convert, as a * Unicode string. * @returns {String} The Punycode representation of the given domain name or * email address. */ function toASCII(input) { return mapDomain(input, function(string) { return regexNonASCII.test(string) ? 'xn--' + encode(string) : string; }); } /*--------------------------------------------------------------------------*/ /** Define the public API */ punycode = { /** * A string representing the current Punycode.js version number. * @memberOf punycode * @type String */ 'version': '1.4.1', /** * An object of methods to convert from JavaScript's internal character * representation (UCS-2) to Unicode code points, and back. * @see <https://mathiasbynens.be/notes/javascript-encoding> * @memberOf punycode * @type Object */ 'ucs2': { 'decode': ucs2decode, 'encode': ucs2encode }, 'decode': decode, 'encode': encode, 'toASCII': toASCII, 'toUnicode': toUnicode }; /** Expose `punycode` */ // Some AMD build optimizers, like r.js, check for specific condition patterns // like the following: if ( typeof define == 'function' && typeof define.amd == 'object' && define.amd ) { define('punycode', function() { return punycode; }); } else if (freeExports && freeModule) { if (module.exports == freeExports) { // in Node.js, io.js, or RingoJS v0.8.0+ freeModule.exports = punycode; } else { // in Narwhal or RingoJS v0.7.0- for (key in punycode) { punycode.hasOwnProperty(key) && (freeExports[key] = punycode[key]); } } } else { // in Rhino or a web browser root.punycode = punycode; } }(this)); }).call(this,typeof global !== "undefined" ? global : typeof self !== "undefined" ? self : typeof window !== "undefined" ? window : {}) },{}],12:[function(require,module,exports){ // Copyright Joyent, Inc. and other Node contributors. // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the // "Software"), to deal in the Software without restriction, including // without limitation the rights to use, copy, modify, merge, publish, // distribute, sublicense, and/or sell copies of the Software, and to permit // persons to whom the Software is furnished to do so, subject to the // following conditions: // // The above copyright notice and this permission notice shall be included // in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN // NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, // DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR // OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE // USE OR OTHER DEALINGS IN THE SOFTWARE. 'use strict'; // If obj.hasOwnProperty has been overridden, then calling // obj.hasOwnProperty(prop) will break. // See: https://github.com/joyent/node/issues/1707 function hasOwnProperty(obj, prop) { return Object.prototype.hasOwnProperty.call(obj, prop); } module.exports = function(qs, sep, eq, options) { sep = sep || '&'; eq = eq || '='; var obj = {}; if (typeof qs !== 'string' || qs.length === 0) { return obj; } var regexp = /\+/g; qs = qs.split(sep); var maxKeys = 1000; if (options && typeof options.maxKeys === 'number') { maxKeys = options.maxKeys; } var len = qs.length; // maxKeys <= 0 means that we should not limit keys count if (maxKeys > 0 && len > maxKeys) { len = maxKeys; } for (var i = 0; i < len; ++i) { var x = qs[i].replace(regexp, '%20'), idx = x.indexOf(eq), kstr, vstr, k, v; if (idx >= 0) { kstr = x.substr(0, idx); vstr = x.substr(idx + 1); } else { kstr = x; vstr = ''; } k = decodeURIComponent(kstr); v = decodeURIComponent(vstr); if (!hasOwnProperty(obj, k)) { obj[k] = v; } else if (isArray(obj[k])) { obj[k].push(v); } else { obj[k] = [obj[k], v]; } } return obj; }; var isArray = Array.isArray || function (xs) { return Object.prototype.toString.call(xs) === '[object Array]'; }; },{}],13:[function(require,module,exports){ // Copyright Joyent, Inc. and other Node contributors. // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the // "Software"), to deal in the Software without restriction, including // without limitation the rights to use, copy, modify, merge, publish, // distribute, sublicense, and/or sell copies of the Software, and to permit // persons to whom the Software is furnished to do so, subject to the // following conditions: // // The above copyright notice and this permission notice shall be included // in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN // NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, // DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR // OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE // USE OR OTHER DEALINGS IN THE SOFTWARE. 'use strict'; var stringifyPrimitive = function(v) { switch (typeof v) { case 'string': return v; case 'boolean': return v ? 'true' : 'false'; case 'number': return isFinite(v) ? v : ''; default: return ''; } }; module.exports = function(obj, sep, eq, name) { sep = sep || '&'; eq = eq || '='; if (obj === null) { obj = undefined; } if (typeof obj === 'object') { return map(objectKeys(obj), function(k) { var ks = encodeURIComponent(stringifyPrimitive(k)) + eq; if (isArray(obj[k])) { return map(obj[k], function(v) { return ks + encodeURIComponent(stringifyPrimitive(v)); }).join(sep); } else { return ks + encodeURIComponent(stringifyPrimitive(obj[k])); } }).join(sep); } if (!name) return ''; return encodeURIComponent(stringifyPrimitive(name)) + eq + encodeURIComponent(stringifyPrimitive(obj)); }; var isArray = Array.isArray || function (xs) { return Object.prototype.toString.call(xs) === '[object Array]'; }; function map (xs, f) { if (xs.map) return xs.map(f); var res = []; for (var i = 0; i < xs.length; i++) { res.push(f(xs[i], i)); } return res; } var objectKeys = Object.keys || function (obj) { var res = []; for (var key in obj) { if (Object.prototype.hasOwnProperty.call(obj, key)) res.push(key); } return res; }; },{}],14:[function(require,module,exports){ 'use strict'; exports.decode = exports.parse = require('./decode'); exports.encode = exports.stringify = require('./encode'); },{"./decode":12,"./encode":13}],15:[function(require,module,exports){ // Copyright Joyent, Inc. and other Node contributors. // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the // "Software"), to deal in the Software without restriction, including // without limitation the rights to use, copy, modify, merge, publish, // distribute, sublicense, and/or sell copies of the Software, and to permit // persons to whom the Software is furnished to do so, subject to the // following conditions: // // The above copyright notice and this permission notice shall be included // in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN // NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, // DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR // OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE // USE OR OTHER DEALINGS IN THE SOFTWARE. 'use strict'; var punycode = require('punycode'); var util = require('./util'); exports.parse = urlParse; exports.resolve = urlResolve; exports.resolveObject = urlResolveObject; exports.format = urlFormat; exports.Url = Url; function Url() { this.protocol = null; this.slashes = null; this.auth = null; this.host = null; this.port = null; this.hostname = null; this.hash = null; this.search = null; this.query = null; this.pathname = null; this.path = null; this.href = null; } // Reference: RFC 3986, RFC 1808, RFC 2396 // define these here so at least they only have to be // compiled once on the first module load. var protocolPattern = /^([a-z0-9.+-]+:)/i, portPattern = /:[0-9]*$/, // Special case for a simple path URL simplePathPattern = /^(\/\/?(?!\/)[^\?\s]*)(\?[^\s]*)?$/, // RFC 2396: characters reserved for delimiting URLs. // We actually just auto-escape these. delims = ['<', '>', '"', '`', ' ', '\r', '\n', '\t'], // RFC 2396: characters not allowed for various reasons. unwise = ['{', '}', '|', '\\', '^', '`'].concat(delims), // Allowed by RFCs, but cause of XSS attacks. Always escape these. autoEscape = ['\''].concat(unwise), // Characters that are never ever allowed in a hostname. // Note that any invalid chars are also handled, but these // are the ones that are *expected* to be seen, so we fast-path // them. nonHostChars = ['%', '/', '?', ';', '#'].concat(autoEscape), hostEndingChars = ['/', '?', '#'], hostnameMaxLen = 255, hostnamePartPattern = /^[+a-z0-9A-Z_-]{0,63}$/, hostnamePartStart = /^([+a-z0-9A-Z_-]{0,63})(.*)$/, // protocols that can allow "unsafe" and "unwise" chars. unsafeProtocol = { 'javascript': true, 'javascript:': true }, // protocols that never have a hostname. hostlessProtocol = { 'javascript': true, 'javascript:': true }, // protocols that always contain a // bit. slashedProtocol = { 'http': true, 'https': true, 'ftp': true, 'gopher': true, 'file': true, 'http:': true, 'https:': true, 'ftp:': true, 'gopher:': true, 'file:': true }, querystring = require('querystring'); function urlParse(url, parseQueryString, slashesDenoteHost) { if (url && util.isObject(url) && url instanceof Url) return url; var u = new Url; u.parse(url, parseQueryString, slashesDenoteHost); return u; } Url.prototype.parse = function(url, parseQueryString, slashesDenoteHost) { if (!util.isString(url)) { throw new TypeError("Parameter 'url' must be a string, not " + typeof url); } // Copy chrome, IE, opera backslash-handling behavior. // Back slashes before the query string get converted to forward slashes // See: https://code.google.com/p/chromium/issues/detail?id=25916 var queryIndex = url.indexOf('?'), splitter = (queryIndex !== -1 && queryIndex < url.indexOf('#')) ? '?' : '#', uSplit = url.split(splitter), slashRegex = /\\/g; uSplit[0] = uSplit[0].replace(slashRegex, '/'); url = uSplit.join(splitter); var rest = url; // trim before proceeding. // This is to support parse stuff like " http://foo.com \n" rest = rest.trim(); if (!slashesDenoteHost && url.split('#').length === 1) { // Try fast path regexp var simplePath = simplePathPattern.exec(rest); if (simplePath) { this.path = rest; this.href = rest; this.pathname = simplePath[1]; if (simplePath[2]) { this.search = simplePath[2]; if (parseQueryString) { this.query = querystring.parse(this.search.substr(1)); } else { this.query = this.search.substr(1); } } else if (parseQueryString) { this.search = ''; this.query = {}; } return this; } } var proto = protocolPattern.exec(rest); if (proto) { proto = proto[0]; var lowerProto = proto.toLowerCase(); this.protocol = lowerProto; rest = rest.substr(proto.length); } // figure out if it's got a host // user@server is *always* interpreted as a hostname, and url // resolution will treat //foo/bar as host=foo,path=bar because that's // how the browser resolves relative URLs. if (slashesDenoteHost || proto || rest.match(/^\/\/[^@\/]+@[^@\/]+/)) { var slashes = rest.substr(0, 2) === '//'; if (slashes && !(proto && hostlessProtocol[proto])) { rest = rest.substr(2); this.slashes = true; } } if (!hostlessProtocol[proto] && (slashes || (proto && !slashedProtocol[proto]))) { // there's a hostname. // the first instance of /, ?, ;, or # ends the host. // // If there is an @ in the hostname, then non-host chars *are* allowed // to the left of the last @ sign, unless some host-ending character // comes *before* the @-sign. // URLs are obnoxious. // // ex: // http://a@b@c/ => user:a@b host:c // http://a@b?@c => user:a host:c path:/?@c // v0.12 TODO(isaacs): This is not quite how Chrome does things. // Review our test case against browsers more comprehensively. // find the first instance of any hostEndingChars var hostEnd = -1; for (var i = 0; i < hostEndingChars.length; i++) { var hec = rest.indexOf(hostEndingChars[i]); if (hec !== -1 && (hostEnd === -1 || hec < hostEnd)) hostEnd = hec; } // at this point, either we have an explicit point where the // auth portion cannot go past, or the last @ char is the decider. var auth, atSign; if (hostEnd === -1) { // atSign can be anywhere. atSign = rest.lastIndexOf('@'); } else { // atSign must be in auth portion. // http://a@b/c@d => host:b auth:a path:/c@d atSign = rest.lastIndexOf('@', hostEnd); } // Now we have a portion which is definitely the auth. // Pull that off. if (atSign !== -1) { auth = rest.slice(0, atSign); rest = rest.slice(atSign + 1); this.auth = decodeURIComponent(auth); } // the host is the remaining to the left of the first non-host char hostEnd = -1; for (var i = 0; i < nonHostChars.length; i++) { var hec = rest.indexOf(nonHostChars[i]); if (hec !== -1 && (hostEnd === -1 || hec < hostEnd)) hostEnd = hec; } // if we still have not hit it, then the entire thing is a host. if (hostEnd === -1) hostEnd = rest.length; this.host = rest.slice(0, hostEnd); rest = rest.slice(hostEnd); // pull out port. this.parseHost(); // we've indicated that there is a hostname, // so even if it's empty, it has to be present. this.hostname = this.hostname || ''; // if hostname begins with [ and ends with ] // assume that it's an IPv6 address. var ipv6Hostname = this.hostname[0] === '[' && this.hostname[this.hostname.length - 1] === ']'; // validate a little. if (!ipv6Hostname) { var hostparts = this.hostname.split(/\./); for (var i = 0, l = hostparts.length; i < l; i++) { var part = hostparts[i]; if (!part) continue; if (!part.match(hostnamePartPattern)) { var newpart = ''; for (var j = 0, k = part.length; j < k; j++) { if (part.charCodeAt(j) > 127) { // we replace non-ASCII char with a temporary placeholder // we need this to make sure size of hostname is not // broken by replacing non-ASCII by nothing newpart += 'x'; } else { newpart += part[j]; } } // we test again with ASCII char only if (!newpart.match(hostnamePartPattern)) { var validParts = hostparts.slice(0, i); var notHost = hostparts.slice(i + 1); var bit = part.match(hostnamePartStart); if (bit) { validParts.push(bit[1]); notHost.unshift(bit[2]); } if (notHost.length) { rest = '/' + notHost.join('.') + rest; } this.hostname = validParts.join('.'); break; } } } } if (this.hostname.length > hostnameMaxLen) { this.hostname = ''; } else { // hostnames are always lower case. this.hostname = this.hostname.toLowerCase(); } if (!ipv6Hostname) { // IDNA Support: Returns a punycoded representation of "domain". // It only converts parts of the domain name that // have non-ASCII characters, i.e. it doesn't matter if // you call it with a domain that already is ASCII-only. this.hostname = punycode.toASCII(this.hostname); } var p = this.port ? ':' + this.port : ''; var h = this.hostname || ''; this.host = h + p; this.href += this.host; // strip [ and ] from