@warren-bank/node-request-cli
Version:
An extremely lightweight HTTP request client for the command-line. Supports: http, https, proxy, redirects, cookies, content-encoding, multipart/form-data, multi-threading, recursive website crawling and mirroring.
234 lines (188 loc) • 7.97 kB
JavaScript
const mkdirSync = require('@warren-bank/mkdir-sync')
const path = require('path')
const parse_url = require('url').parse
const regex = {
charsets: {
windows: /([\\\|\/\:\?"\*\<\>])/g, // ‘\’, ‘|’, ‘/’, ‘:’, ‘?’, ‘"’, ‘*’, ‘<’, ‘>’
unix: /([\/])/g, // ‘/’
control: /([\x00-\x1F\x80-\x9F])/g, // 0–31 and 128–159
ascii: /([\x80-\xFF])/g // 128-255
},
content_disposition_header: {
quoted_filename: /filename=(['"])([^\1]+)\1/i,
unquoted_filename: /filename=([^\s]+)(?:[\s]|$)/i
}
}
const reset_global_regex = (r) => {
if (r instanceof RegExp) {
r.lastIndex = 0
}
return r
}
// -----------------------------------------------------------------------------
const get_output_filepath = (argv_vals, urldata, {url, redirects, response, is_html} = {}) => {
let request_url, output_filepath
if (!request_url && url) {
request_url = (Array.isArray(redirects) && redirects.length && argv_vals["--trust-server-names"])
? redirects[redirects.length - 1]
: url
}
if (!request_url && urldata) {
request_url = Array.isArray(urldata)
? urldata[0]
: urldata
}
if (!output_filepath && Array.isArray(urldata) && (urldata.length >= 2)) {
output_filepath = urldata[1]
}
if (!output_filepath && argv_vals["--output-document"] && (!Array.isArray(urldata) || (argv_vals["--output-document"] === "-"))) {
output_filepath = argv_vals["--output-document"]
}
if (!output_filepath && argv_vals["--recursive"] && request_url) {
const parsed_url = parse_url(request_url)
const escape_dir = escape_restricted_charsets.bind(this, argv_vals)
let dirs, fname
dirs = []
if (!argv_vals["--no-directories"] && argv_vals["--protocol-directories"] && parsed_url.protocol) {
let dir = parsed_url.protocol
if (dir[dir.length - 1] === ':') {
dir = dir.substring(0, dir.length - 1)
}
dirs.push(dir)
}
if (!argv_vals["--no-directories"] && !argv_vals["--no-host-directories"] && parsed_url.hostname) {
let dir = parsed_url.hostname
if (parsed_url.port) {
dir += (argv_vals["--restrict-file-names"].indexOf("windows") >= 0)
? '+'
: ':'
dir += parsed_url.port
}
dirs.push(dir)
}
{
let parts = parsed_url.pathname ? parsed_url.pathname.split('/') : []
fname = parts.pop() || argv_vals["--default-page"] || 'index.html'
if (parts.length && !argv_vals["--no-directories"]) {
parts = parts.filter(part => !!part)
if (argv_vals["--cut-dirs"]) {
parts = (argv_vals["--cut-dirs"] < parts.length)
? parts.slice(argv_vals["--cut-dirs"])
: []
}
if (parts.length) {
dirs.push(...parts)
}
}
if (parsed_url.query && !argv_vals["--no-querystring"]) {
fname += (argv_vals["--restrict-file-names"].indexOf("windows") >= 0)
? '@'
: '?'
fname += parsed_url.query
}
}
if (fname) {
if (dirs.length) {
dirs = dirs.map(escape_dir)
}
fname = escape_filename(argv_vals, fname, is_html)
dirs.push(fname)
output_filepath = dirs.join(path.sep)
}
}
if (!output_filepath && argv_vals["--content-disposition"] && (response instanceof Object) && (response.headers instanceof Object) && response.headers['content-disposition']) {
let fname = extract_filename_header(response.headers['content-disposition'])
if (fname) {
output_filepath = escape_filename(argv_vals, fname)
}
}
if (!output_filepath && request_url && (!argv_vals["--content-disposition"] || (argv_vals["--content-disposition"] && (response instanceof Object)))) {
const parsed_url = parse_url(request_url)
const parts = parsed_url.pathname ? parsed_url.pathname.split('/') : []
let fname = parts.pop() || argv_vals["--default-page"] || 'index.html'
if (fname) {
if (parsed_url.query && !argv_vals["--no-querystring"]) {
fname += (argv_vals["--restrict-file-names"].indexOf("windows") >= 0)
? '@'
: '?'
fname += parsed_url.query
}
output_filepath = escape_filename(argv_vals, fname)
}
// fallback
if (!output_filepath) {
output_filepath = get_hash(request_url)
}
}
if (output_filepath && (typeof output_filepath === 'string') && (output_filepath !== "-") && !path.isAbsolute(output_filepath)) {
const output_dir = argv_vals["--directory-prefix"] || process.cwd()
output_filepath = path.resolve(output_dir, output_filepath)
}
return output_filepath
}
// -----------------------------------------------------------------------------
const escape_filename = (argv_vals, fname, is_html) => {
if (fname && (typeof fname === 'string')) {
if (argv_vals["--adjust-extension"] && is_html && ((fname.length < 5) || (fname.substring(fname.length - 5, fname.length).toLowerCase() !== '.html'))) {
fname += '.html'
}
fname = escape_restricted_charsets(argv_vals, fname)
if ((argv_vals["--plugins"] instanceof Object) && (argv_vals["--plugins"]["change_filename"] instanceof Function)) {
fname = argv_vals["--plugins"]["change_filename"](fname) || fname
}
}
return fname
}
const escape_restricted_charsets = (argv_vals, text) => {
if (text && (typeof text === 'string')) {
if (argv_vals["--restrict-file-names"].indexOf("windows") >= 0) {
text = text.replace(reset_global_regex(regex.charsets.windows), escape_ascii_character)
}
if (argv_vals["--restrict-file-names"].indexOf("unix") >= 0) {
text = text.replace(reset_global_regex(regex.charsets.unix), escape_ascii_character)
}
if (argv_vals["--restrict-file-names"].indexOf("nocontrol") === -1) {
text = text.replace(reset_global_regex(regex.charsets.control), escape_ascii_character)
}
if (argv_vals["--restrict-file-names"].indexOf("ascii") >= 0) {
text = text.replace(reset_global_regex(regex.charsets.ascii), escape_ascii_character)
}
if (argv_vals["--restrict-file-names"].indexOf("lowercase") >= 0) {
text = text.toLowerCase()
}
if (argv_vals["--restrict-file-names"].indexOf("uppercase") >= 0) {
text = text.toUpperCase()
}
}
return text
}
const escape_ascii_character = (c) => {
return '%' + c.charCodeAt(0).toString(16).toUpperCase()
}
// -----------------------------------------------------------------------------
const extract_filename_header = (header) => {
if (header) {
let match
match = String(header).match(regex.content_disposition_header.quoted_filename)
if (match && (match.length >= 3)) return match[2]
match = String(header).match(regex.content_disposition_header.unquoted_filename)
if (match && (match.length >= 2)) return match[1]
}
return null
}
// -----------------------------------------------------------------------------
const get_hash = (data, algorithm='sha256') => {
const crypto = require('crypto')
const hash = crypto.createHash(algorithm)
hash.update(data)
return hash.digest('hex')
}
// -----------------------------------------------------------------------------
const make_parent_directory = (output_filepath) => {
const output_dirpath = path.dirname(output_filepath)
// no need to check whether directory already exists;
// polyfill library will silently ignore an error when e.code is 'EEXIST'
mkdirSync(output_dirpath, {recursive: true})
}
// -----------------------------------------------------------------------------
module.exports = {get_output_filepath, make_parent_directory}