html-get
Version:
Get the HTML from any website, fine-tuned for correction & speed
45 lines (37 loc) • 1.12 kB
JavaScript
const createBrowserless = require('browserless')
const { URL } = require('url')
const mri = require('mri')
const getHTML = require('..')
const browserlessFactory = createBrowserless()
const { _: input, debug: isDebug, ...args } = mri(process.argv.slice(2))
const url = new URL(input).toString()
const browserContext = browserlessFactory.createContext()
const getBrowserless = () => browserContext
getHTML(url, { getBrowserless, ...args })
.then(async ({ html, stats, headers, statusCode }) => {
if (isDebug) {
console.log(`
url: ${url}
html: ${Buffer.from(html).byteLength} bytes (HTTP ${statusCode})
time: ${stats.timing} (${stats.mode})
headers: ${
headers
? Object.keys(headers).reduce(
(acc, key) => `${acc}${key}=${headers[key]} `,
''
)
: '-'
}
`)
} else {
console.log(html)
}
process.exit()
})
.catch(error => console.error(error) || process.exit(1))
.finally(async () => {
await getBrowserless(browser => browser.destroyContext())
browserlessFactory.close()
})