open-graph-scraper
Version:
Node.js scraper module for Open Graph and Twitter Card info
282 lines (279 loc) • 9.91 kB
JavaScript
;
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-nocheck
/* eslint-disable @typescript-eslint/no-unsafe-argument */
/* eslint-disable @typescript-eslint/no-unsafe-call */
/* eslint-disable @typescript-eslint/no-unsafe-member-access */
/* eslint-disable @typescript-eslint/no-unsafe-assignment */
// This is from https://github.com/validatorjs/validator.js version: 13.12.0
Object.defineProperty(exports, "__esModule", { value: true });
exports.default = isURL;
// https://github.com/validatorjs/validator.js/blob/master/src/lib/util/assertString.js
function assertString(input) {
const isString = typeof input === 'string' || input instanceof String;
if (!isString) {
let invalidType = typeof input;
if (input === null)
invalidType = 'null';
else if (invalidType === 'object')
invalidType = input.constructor.name;
throw new TypeError(`Expected a string but received a ${invalidType}`);
}
}
// https://github.com/validatorjs/validator.js/blob/master/src/lib/util/merge.js
// eslint-disable-next-line @typescript-eslint/default-param-last
function merge(obj = {}, defaults) {
// eslint-disable-next-line no-restricted-syntax
for (const key in defaults) {
if (typeof obj[key] === 'undefined') {
obj[key] = defaults[key];
}
}
return obj;
}
// https://github.com/validatorjs/validator.js/blob/master/src/lib/isFQDN.js
const defaultFqdnOptions = {
require_tld: true,
allow_underscores: false,
allow_trailing_dot: false,
allow_numeric_tld: false,
allow_wildcard: false,
ignore_max_length: false,
};
function isFQDN(str, options) {
assertString(str);
options = merge(options, defaultFqdnOptions);
/* Remove the optional trailing dot before checking validity */
if (options.allow_trailing_dot && str[str.length - 1] === '.') {
str = str.substring(0, str.length - 1);
}
/* Remove the optional wildcard before checking validity */
if (options.allow_wildcard === true && str.indexOf('*.') === 0) {
str = str.substring(2);
}
const parts = str.split('.');
const tld = parts[parts.length - 1];
if (options.require_tld) {
// disallow fqdns without tld
if (parts.length < 2) {
return false;
}
if (!options.allow_numeric_tld
// eslint-disable-next-line @typescript-eslint/no-unsafe-argument
&& !/^([a-z\u00A1-\u00A8\u00AA-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF]{2,}|xn[a-z0-9-]{2,})$/i.test(tld)) {
return false;
}
// disallow spaces
if (/\s/.test(tld)) {
return false;
}
}
// reject numeric TLDs
if (!options.allow_numeric_tld && /^\d+$/.test(tld)) {
return false;
}
// eslint-disable-next-line @typescript-eslint/no-unsafe-return
return parts.every((part) => {
if (part.length > 63 && !options.ignore_max_length) {
return false;
}
if (!/^[a-z_\u00a1-\uffff0-9-]+$/i.test(part)) {
return false;
}
// disallow full-width chars
if (/[\uff01-\uff5e]/.test(part)) {
return false;
}
// disallow parts starting or ending with hyphen
if (/^-|-$/.test(part)) {
return false;
}
if (!options.allow_underscores && /_/.test(part)) {
return false;
}
return true;
});
}
// https://github.com/validatorjs/validator.js/blob/master/src/lib/isIP.js
const IPv4SegmentFormat = '(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])';
const IPv4AddressFormat = `(${IPv4SegmentFormat}[.]){3}${IPv4SegmentFormat}`;
const IPv4AddressRegExp = new RegExp(`^${IPv4AddressFormat}$`);
const IPv6SegmentFormat = '(?:[0-9a-fA-F]{1,4})';
const IPv6AddressRegExp = new RegExp('^('
+ `(?:${IPv6SegmentFormat}:){7}(?:${IPv6SegmentFormat}|:)|`
+ `(?:${IPv6SegmentFormat}:){6}(?:${IPv4AddressFormat}|:${IPv6SegmentFormat}|:)|`
+ `(?:${IPv6SegmentFormat}:){5}(?::${IPv4AddressFormat}|(:${IPv6SegmentFormat}){1,2}|:)|`
+ `(?:${IPv6SegmentFormat}:){4}(?:(:${IPv6SegmentFormat}){0,1}:${IPv4AddressFormat}|(:${IPv6SegmentFormat}){1,3}|:)|`
+ `(?:${IPv6SegmentFormat}:){3}(?:(:${IPv6SegmentFormat}){0,2}:${IPv4AddressFormat}|(:${IPv6SegmentFormat}){1,4}|:)|`
+ `(?:${IPv6SegmentFormat}:){2}(?:(:${IPv6SegmentFormat}){0,3}:${IPv4AddressFormat}|(:${IPv6SegmentFormat}){1,5}|:)|`
+ `(?:${IPv6SegmentFormat}:){1}(?:(:${IPv6SegmentFormat}){0,4}:${IPv4AddressFormat}|(:${IPv6SegmentFormat}){1,6}|:)|`
+ `(?::((?::${IPv6SegmentFormat}){0,5}:${IPv4AddressFormat}|(?::${IPv6SegmentFormat}){1,7}|:))`
+ ')(%[0-9a-zA-Z-.:]{1,})?$');
function isIP(str, version = '') {
assertString(str);
version = String(version);
if (!version) {
// eslint-disable-next-line @typescript-eslint/no-unsafe-return
return isIP(str, 4) || isIP(str, 6);
}
if (version === '4') {
return IPv4AddressRegExp.test(str);
}
if (version === '6') {
return IPv6AddressRegExp.test(str);
}
return false;
}
// https://github.com/validatorjs/validator.js/blob/master/src/lib/isURL.js
/*
options for isURL method
require_protocol - if set as true isURL will return false if protocol is not present in the URL
require_valid_protocol - isURL will check if the URL's protocol is present in the protocols option
protocols - valid protocols can be modified with this option
require_host - if set as false isURL will not check if host is present in the URL
require_port - if set as true isURL will check if port is present in the URL
allow_protocol_relative_urls - if set as true protocol relative URLs will be allowed
validate_length - if set as false isURL will skip string length validation (IE maximum is 2083)
*/
const defaultUrlOptions = {
protocols: ['http', 'https', 'ftp'],
require_tld: true,
require_protocol: false,
require_host: true,
require_port: false,
require_valid_protocol: true,
allow_underscores: false,
allow_trailing_dot: false,
allow_protocol_relative_urls: false,
allow_fragments: true,
allow_query_components: true,
validate_length: true,
};
const wrappedIpv6 = /^\[([^\]]+)\](?::([0-9]+))?$/;
function isRegExp(obj) {
return Object.prototype.toString.call(obj) === '[object RegExp]';
}
function checkHost(host, matches) {
// eslint-disable-next-line @typescript-eslint/prefer-for-of
for (let i = 0; i < matches.length; i += 1) {
const match = matches[i];
if (host === match || (isRegExp(match) && match.test(host))) {
return true;
}
}
return false;
}
function isURL(url, options) {
assertString(url);
if (!url || /[\s<>]/.test(url)) {
return false;
}
if (url.indexOf('mailto:') === 0) {
return false;
}
options = merge(options, defaultUrlOptions);
if (options.validate_length && url.length >= 2083) {
return false;
}
if (!options.allow_fragments && url.includes('#')) {
return false;
}
if (!options.allow_query_components && (url.includes('?') || url.includes('&'))) {
return false;
}
// eslint-disable-next-line @typescript-eslint/naming-convention
let protocol;
let auth;
let host;
let port;
let port_str;
let split;
let ipv6;
split = url.split('#');
url = split.shift();
split = url.split('?');
url = split.shift();
split = url.split('://');
if (split.length > 1) {
protocol = split.shift().toLowerCase();
if (options.require_valid_protocol && options.protocols.indexOf(protocol) === -1) {
return false;
}
}
else if (options.require_protocol) {
return false;
}
else if (url.slice(0, 2) === '//') {
if (!options.allow_protocol_relative_urls) {
return false;
}
split[0] = url.slice(2);
}
url = split.join('://');
if (url === '') {
return false;
}
split = url.split('/');
url = split.shift();
if (url === '' && !options.require_host) {
return true;
}
split = url.split('@');
if (split.length > 1) {
if (options.disallow_auth) {
return false;
}
if (split[0] === '') {
return false;
}
auth = split.shift();
if (auth.indexOf(':') >= 0 && auth.split(':').length > 2) {
return false;
}
const [user, password] = auth.split(':');
if (user === '' && password === '') {
return false;
}
}
const hostname = split.join('@');
port_str = null;
ipv6 = null;
// eslint-disable-next-line @typescript-eslint/naming-convention
const ipv6_match = hostname.match(wrappedIpv6);
if (ipv6_match) {
host = '';
// eslint-disable-next-line prefer-destructuring
ipv6 = ipv6_match[1];
port_str = ipv6_match[2] || null;
}
else {
split = hostname.split(':');
host = split.shift();
if (split.length) {
port_str = split.join(':');
}
}
if (port_str !== null && port_str.length > 0) {
port = parseInt(port_str, 10);
if (!/^[0-9]+$/.test(port_str) || port <= 0 || port > 65535) {
return false;
}
}
else if (options.require_port) {
return false;
}
if (options.host_whitelist) {
return checkHost(host, options.host_whitelist);
}
if (host === '' && !options.require_host) {
return true;
}
if (!isIP(host) && !isFQDN(host, options) && (!ipv6 || !isIP(ipv6, 6))) {
return false;
}
host = host || ipv6;
if (options.host_blacklist && checkHost(host, options.host_blacklist)) {
return false;
}
return true;
}