UNPKG

@warren-bank/node-request-cli

Version:

An extremely lightweight HTTP request client for the command-line. Supports: http, https, proxy, redirects, cookies, content-encoding, multipart/form-data, multi-threading, recursive website crawling and mirroring.

148 lines (128 loc) 4.82 kB
const add_argv_flags = (argv_flags) => { Object.assign( argv_flags, { "--spider": {bool: true}, "--mirror": {bool: true}, "--recursive": {bool: true}, "--level": {num: "int"}, "--page-requisites": {bool: true}, "--adjust-extension": {bool: true}, "--convert-links": {bool: true}, "--no-parent": {bool: true}, "--exclude-directory": {many: true}, "--include-directory": {many: true}, "--span-subdomains": {bool: true}, "--span-hosts": {bool: true}, "--exclude-host": {many: true}, "--include-host": {many: true}, "--reject-regex": {regex: "i", many: true}, "--accept-regex": {regex: "i", many: true}, "--no-directories": {bool: true}, "--protocol-directories": {bool: true}, "--no-host-directories": {bool: true}, "--cut-dirs": {num: "int"}, "--force-html": {regex: "i", many: true}, "--base": {} } ) } const add_argv_flag_aliases = (argv_flag_aliases) => { Object.assign( argv_flag_aliases, { "--mirror": ["-m"], "--recursive": ["-r"], "--level": ["-l"], "--page-requisites": ["-p"], "--adjust-extension": ["-E"], "--convert-links": ["-k"], "--no-parent": ["-np"], "--exclude-directory": ["-xD", "--exclude"], // non-standard: "-X" is allocated to "curl" addon, as an alias for "--method" "--include-directory": ["-iD", "--include"], // non-standard: "-I" is allocated to "curl" addon, as an alias for "--head" "--span-subdomains": ["-sD"], "--span-hosts": ["-sH"], // non-standard: "-H" is allocated to "curl" addon, as an alias for "--header" "--exclude-host": ["-xH", "--exclude-domains"], "--include-host": ["-iH", "--domains", "-D"], "--no-directories": ["-nd"], "--no-host-directories": ["-nH"], "--force-html": ["-F"], "--base": ["-B"] } ) } const process_argv_vals = (argv_vals) => { if (argv_vals["--spider"]) { // --mirror --server-response --dry-run argv_vals["--mirror"] = true argv_vals["--server-response"] = true argv_vals["--dry-run"] = true } if (argv_vals["--mirror"]) { // -r --trust-server-names -E -k -l 0 argv_vals["--recursive"] = true argv_vals["--trust-server-names"] = true argv_vals["--adjust-extension"] = true argv_vals["--convert-links"] = true if (argv_vals["--level"] === undefined) { argv_vals["--level"] = Number.MAX_SAFE_INTEGER } } if (!argv_vals["--recursive"] && argv_vals["--page-requisites"]) { argv_vals["--recursive"] = true argv_vals["--level"] = -1 argv_vals["--convert-links"] = true } if (argv_vals["--recursive"]) { argv_vals["--max-concurrency"] = 1 argv_vals["--output-document"] = null argv_vals["--no-clobber"] = true if (argv_vals["--level"] === undefined) { argv_vals["--level"] = 5 } if (argv_vals["--level"] === 0) { argv_vals["--level"] = Number.MAX_SAFE_INTEGER } if (argv_vals["--level"] < 0) { argv_vals["--level"] = 0 } argv_vals["--exclude-host"] = normalize_hosts_list(argv_vals, argv_vals["--exclude-host"]) argv_vals["--include-host"] = normalize_hosts_list(argv_vals, argv_vals["--include-host"]) if (!argv_vals["--force-html"].length) { argv_vals["--force-html"].push( /^[^\?#]+(?:\.(?:cgi|pl|php[3-5]?|py|asp[x]?|[psx]?html?)|\/)(?:[\?#].*)?$/i ) } } } const normalize_hosts_list = (argv_vals, hosts) => { if (Array.isArray(hosts) && hosts.length) { hosts = hosts.map(hostname => normalize_hostname(argv_vals, hostname)) hosts = hosts.filter(hostname => !!hostname) return hosts } else { return [] } } const normalize_hostname = (argv_vals, hostname) => { if (hostname && (typeof hostname === 'string')) { hostname = hostname.toLowerCase() if (argv_vals["--span-subdomains"]) { // normalize hostname to only contain the top 2x levels of the domain hostname = hostname.split('.') if (hostname.length > 2) { hostname = hostname.slice(hostname.length - 2, hostname.length) } hostname = hostname.join('.') } return hostname } return '' } module.exports = { add_argv_flags, add_argv_flag_aliases, process_argv_vals, normalize_hostname }