mega-scraper
Version:
scrape a website's content.
169 lines (154 loc) • 4.25 kB
HTML
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta http-equiv="X-UA-Compatible" content="ie=edge">
<title>mega-scraper</title>
<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css" integrity="sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T" crossorigin="anonymous">
</head>
<body>
<main id="main"></main>
</body>
<script>
if (window.loading) window.loading.style = 'display: none'
const eventSource = new EventSource('/sse')
window.eventSource = eventSource
eventSource.onmessage = (message) => {
console.log(message)
const data = JSON.parse(message.data).data
if (!data && Object.keys(data).length === 0) {
return window.main.innerHTML = `
<h1>starting..</h1>
`
}
window.main.innerHTML = `
<div class="container">
<div class="row">
<div class="col m-1">
<h6>
start
</h6>
<h1>
${new Date(data.start).toISOString()}
</h1>
</div>
<div class="col m-1">
<h6>
elapsed
</h6>
<h1>
${data.elapsed}ms
</h1>
</div>
<div class="w-100"></div>
${data.finish > 0 ? `
<div class="col m-1">
<h6>
finish
</h6>
<h1>
${new Date(data.finish).toISOString()}
</h1>
</div>
<div class="w-100"></div>
` : ''}
<div class="col m-1">
<h6>
url
</h6>
<h1>
<a href="${data.url}" target="_blank">${data.url}</a>
</h1>
</div>
<div class="w-100"></div>
${data.scrapedReviewsCount ? `
<div class="col m-2">
<h6>
reviews
</h6>
<h1>
${data.scrapedReviewsCount}
</h1>
</div>
<div class="col m-2">
<h6>
reviews / s
</h6>
<h1>
${(+data.scrapedReviewsCount / (+data.elapsed / 1000)).toFixed(1)}
</h1>
</div>` : ''}
${data.scrapedPages ? `
<div class="w-100"></div>
<div class="col m-2">
<h6>
scraped pages
</h6>
<h1>
${data.scrapedPages}
</h1>
</div>` : ''}
${data.scrapedPages ? `
<div class="col m-2">
<h6>
pages / s
</h6>
<h1>
${(+data.scrapedPages / (+data.elapsed / 1000)).toFixed(1)}
</h1>
</div>` : ''}
${data.lastTenScreenshots ? `
<div class="w-100"></div>
<div class="col m-2">
<h6>
screenshots
</h6>
<div class="container" style="max-height: 30em; overflow-y: scroll;">
<div class="row" style="margin: 1em auto; flex-wrap: nowrap;">
${JSON.parse(data.lastTenScreenshots).map(screenshot => `
<div class="col">
<a href="/screenshots/${screenshot}" target="_blank">
<img width="250px" src="/screenshots/${screenshot}" />
</a>
</div>
`).join('')}
</div>
</div>
</div>` : ''}
<div class="w-100"></div>
${data.lastTenScrapedReviews ? `
<div class="col m-2">
<h6>
reviews
</h6>
<div class="container" style="max-height: 30em; overflow-y: scroll;">
${JSON.parse(data.lastTenScrapedReviews).map(r => `
<div class="row" style="margin: 1em auto;">
<div class="col-sm-2">
<span class="badge">${r.stars || 0}⭐️</span>
<br/>
<span class="badge">${r.dateString}</span>
<br/>
<span class="badge">${r.hash && r.hash.substring(0, 10)}..</span>
<br/>
${r.asin && `<span class="badge">asin ${r.asin}</span><br/>`}
${r.pageNumber && `<span class="badge">page ${r.pageNumber}</span><br/>`}
${r.url && `<span class="badge"><a target="_blank" href="${r.url}">url</a></span>`}
</div>
<div class="col">
${r.text && r.text.substring(0, 500)}
</div>
</div>
`).join('')}
</div>
<br/>
<pre>${JSON.stringify(data, null, 2)}</pre>
</div>
</div>` : ''}
</div>
</div>
`
}
</script>
</html>