UNPKG

html-get

Version:

Get the HTML from any website, fine-tuned for correction & speed

45 lines (37 loc) 1.12 kB
#!/usr/bin/env node 'use strict' const createBrowserless = require('browserless') const { URL } = require('url') const mri = require('mri') const getHTML = require('..') const browserlessFactory = createBrowserless() const { _: input, debug: isDebug, ...args } = mri(process.argv.slice(2)) const url = new URL(input).toString() const browserContext = browserlessFactory.createContext() const getBrowserless = () => browserContext getHTML(url, { getBrowserless, ...args }) .then(async ({ html, stats, headers, statusCode }) => { if (isDebug) { console.log(` url: ${url} html: ${Buffer.from(html).byteLength} bytes (HTTP ${statusCode}) time: ${stats.timing} (${stats.mode}) headers: ${ headers ? Object.keys(headers).reduce( (acc, key) => `${acc}${key}=${headers[key]} `, '' ) : '-' } `) } else { console.log(html) } process.exit() }) .catch(error => console.error(error) || process.exit(1)) .finally(async () => { await getBrowserless(browser => browser.destroyContext()) browserlessFactory.close() })