websites
Version:
Top 100,000 websites.
31 lines (26 loc) • 694 B
JavaScript
const request = require('request')
const unzip = require('unzip')
const csv = require('csvtojson')
const fs = require('fs-extra')
const path = require('path')
const domains = []
const save = () => {
let dest = path.join(__dirname, 'websites.json')
return fs.writeJson(dest, domains)
}
request.get('http://s3.amazonaws.com/alexa-static/top-1m.csv.zip')
.pipe(unzip.Parse())
.on('entry', function (entry) {
entry.pipe(csv()).subscribe(row => {
if (domains.length > 1e5) {
entry.pause()
save()
.then(() => {
process.exit(0)
})
} else {
let domain = row.google.com
domains.push(domain)
}
})
})