UNPKG

open-graph-scraper

Version:

Node.js scraper module for Open Graph and Twitter Card info

282 lines (279 loc) 9.91 kB
"use strict"; // eslint-disable-next-line @typescript-eslint/ban-ts-comment // @ts-nocheck /* eslint-disable @typescript-eslint/no-unsafe-argument */ /* eslint-disable @typescript-eslint/no-unsafe-call */ /* eslint-disable @typescript-eslint/no-unsafe-member-access */ /* eslint-disable @typescript-eslint/no-unsafe-assignment */ // This is from https://github.com/validatorjs/validator.js version: 13.12.0 Object.defineProperty(exports, "__esModule", { value: true }); exports.default = isURL; // https://github.com/validatorjs/validator.js/blob/master/src/lib/util/assertString.js function assertString(input) { const isString = typeof input === 'string' || input instanceof String; if (!isString) { let invalidType = typeof input; if (input === null) invalidType = 'null'; else if (invalidType === 'object') invalidType = input.constructor.name; throw new TypeError(`Expected a string but received a ${invalidType}`); } } // https://github.com/validatorjs/validator.js/blob/master/src/lib/util/merge.js // eslint-disable-next-line @typescript-eslint/default-param-last function merge(obj = {}, defaults) { // eslint-disable-next-line no-restricted-syntax for (const key in defaults) { if (typeof obj[key] === 'undefined') { obj[key] = defaults[key]; } } return obj; } // https://github.com/validatorjs/validator.js/blob/master/src/lib/isFQDN.js const defaultFqdnOptions = { require_tld: true, allow_underscores: false, allow_trailing_dot: false, allow_numeric_tld: false, allow_wildcard: false, ignore_max_length: false, }; function isFQDN(str, options) { assertString(str); options = merge(options, defaultFqdnOptions); /* Remove the optional trailing dot before checking validity */ if (options.allow_trailing_dot && str[str.length - 1] === '.') { str = str.substring(0, str.length - 1); } /* Remove the optional wildcard before checking validity */ if (options.allow_wildcard === true && str.indexOf('*.') === 0) { str = str.substring(2); } const parts = str.split('.'); const tld = parts[parts.length - 1]; if (options.require_tld) { // disallow fqdns without tld if (parts.length < 2) { return false; } if (!options.allow_numeric_tld // eslint-disable-next-line @typescript-eslint/no-unsafe-argument && !/^([a-z\u00A1-\u00A8\u00AA-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF]{2,}|xn[a-z0-9-]{2,})$/i.test(tld)) { return false; } // disallow spaces if (/\s/.test(tld)) { return false; } } // reject numeric TLDs if (!options.allow_numeric_tld && /^\d+$/.test(tld)) { return false; } // eslint-disable-next-line @typescript-eslint/no-unsafe-return return parts.every((part) => { if (part.length > 63 && !options.ignore_max_length) { return false; } if (!/^[a-z_\u00a1-\uffff0-9-]+$/i.test(part)) { return false; } // disallow full-width chars if (/[\uff01-\uff5e]/.test(part)) { return false; } // disallow parts starting or ending with hyphen if (/^-|-$/.test(part)) { return false; } if (!options.allow_underscores && /_/.test(part)) { return false; } return true; }); } // https://github.com/validatorjs/validator.js/blob/master/src/lib/isIP.js const IPv4SegmentFormat = '(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])'; const IPv4AddressFormat = `(${IPv4SegmentFormat}[.]){3}${IPv4SegmentFormat}`; const IPv4AddressRegExp = new RegExp(`^${IPv4AddressFormat}$`); const IPv6SegmentFormat = '(?:[0-9a-fA-F]{1,4})'; const IPv6AddressRegExp = new RegExp('^(' + `(?:${IPv6SegmentFormat}:){7}(?:${IPv6SegmentFormat}|:)|` + `(?:${IPv6SegmentFormat}:){6}(?:${IPv4AddressFormat}|:${IPv6SegmentFormat}|:)|` + `(?:${IPv6SegmentFormat}:){5}(?::${IPv4AddressFormat}|(:${IPv6SegmentFormat}){1,2}|:)|` + `(?:${IPv6SegmentFormat}:){4}(?:(:${IPv6SegmentFormat}){0,1}:${IPv4AddressFormat}|(:${IPv6SegmentFormat}){1,3}|:)|` + `(?:${IPv6SegmentFormat}:){3}(?:(:${IPv6SegmentFormat}){0,2}:${IPv4AddressFormat}|(:${IPv6SegmentFormat}){1,4}|:)|` + `(?:${IPv6SegmentFormat}:){2}(?:(:${IPv6SegmentFormat}){0,3}:${IPv4AddressFormat}|(:${IPv6SegmentFormat}){1,5}|:)|` + `(?:${IPv6SegmentFormat}:){1}(?:(:${IPv6SegmentFormat}){0,4}:${IPv4AddressFormat}|(:${IPv6SegmentFormat}){1,6}|:)|` + `(?::((?::${IPv6SegmentFormat}){0,5}:${IPv4AddressFormat}|(?::${IPv6SegmentFormat}){1,7}|:))` + ')(%[0-9a-zA-Z-.:]{1,})?$'); function isIP(str, version = '') { assertString(str); version = String(version); if (!version) { // eslint-disable-next-line @typescript-eslint/no-unsafe-return return isIP(str, 4) || isIP(str, 6); } if (version === '4') { return IPv4AddressRegExp.test(str); } if (version === '6') { return IPv6AddressRegExp.test(str); } return false; } // https://github.com/validatorjs/validator.js/blob/master/src/lib/isURL.js /* options for isURL method require_protocol - if set as true isURL will return false if protocol is not present in the URL require_valid_protocol - isURL will check if the URL's protocol is present in the protocols option protocols - valid protocols can be modified with this option require_host - if set as false isURL will not check if host is present in the URL require_port - if set as true isURL will check if port is present in the URL allow_protocol_relative_urls - if set as true protocol relative URLs will be allowed validate_length - if set as false isURL will skip string length validation (IE maximum is 2083) */ const defaultUrlOptions = { protocols: ['http', 'https', 'ftp'], require_tld: true, require_protocol: false, require_host: true, require_port: false, require_valid_protocol: true, allow_underscores: false, allow_trailing_dot: false, allow_protocol_relative_urls: false, allow_fragments: true, allow_query_components: true, validate_length: true, }; const wrappedIpv6 = /^\[([^\]]+)\](?::([0-9]+))?$/; function isRegExp(obj) { return Object.prototype.toString.call(obj) === '[object RegExp]'; } function checkHost(host, matches) { // eslint-disable-next-line @typescript-eslint/prefer-for-of for (let i = 0; i < matches.length; i += 1) { const match = matches[i]; if (host === match || (isRegExp(match) && match.test(host))) { return true; } } return false; } function isURL(url, options) { assertString(url); if (!url || /[\s<>]/.test(url)) { return false; } if (url.indexOf('mailto:') === 0) { return false; } options = merge(options, defaultUrlOptions); if (options.validate_length && url.length >= 2083) { return false; } if (!options.allow_fragments && url.includes('#')) { return false; } if (!options.allow_query_components && (url.includes('?') || url.includes('&'))) { return false; } // eslint-disable-next-line @typescript-eslint/naming-convention let protocol; let auth; let host; let port; let port_str; let split; let ipv6; split = url.split('#'); url = split.shift(); split = url.split('?'); url = split.shift(); split = url.split('://'); if (split.length > 1) { protocol = split.shift().toLowerCase(); if (options.require_valid_protocol && options.protocols.indexOf(protocol) === -1) { return false; } } else if (options.require_protocol) { return false; } else if (url.slice(0, 2) === '//') { if (!options.allow_protocol_relative_urls) { return false; } split[0] = url.slice(2); } url = split.join('://'); if (url === '') { return false; } split = url.split('/'); url = split.shift(); if (url === '' && !options.require_host) { return true; } split = url.split('@'); if (split.length > 1) { if (options.disallow_auth) { return false; } if (split[0] === '') { return false; } auth = split.shift(); if (auth.indexOf(':') >= 0 && auth.split(':').length > 2) { return false; } const [user, password] = auth.split(':'); if (user === '' && password === '') { return false; } } const hostname = split.join('@'); port_str = null; ipv6 = null; // eslint-disable-next-line @typescript-eslint/naming-convention const ipv6_match = hostname.match(wrappedIpv6); if (ipv6_match) { host = ''; // eslint-disable-next-line prefer-destructuring ipv6 = ipv6_match[1]; port_str = ipv6_match[2] || null; } else { split = hostname.split(':'); host = split.shift(); if (split.length) { port_str = split.join(':'); } } if (port_str !== null && port_str.length > 0) { port = parseInt(port_str, 10); if (!/^[0-9]+$/.test(port_str) || port <= 0 || port > 65535) { return false; } } else if (options.require_port) { return false; } if (options.host_whitelist) { return checkHost(host, options.host_whitelist); } if (host === '' && !options.require_host) { return true; } if (!isIP(host) && !isFQDN(host, options) && (!ipv6 || !isIP(ipv6, 6))) { return false; } host = host || ipv6; if (options.host_blacklist && checkHost(host, options.host_blacklist)) { return false; } return true; }