UNPKG

site-validator-cli

Version:

A command line tool that takes a URL or a file, then uses html-validator (a wrapper for https://validator.w3.org/nu/) to validate each page.

github.com/p1ho/site-validator-cli

p1ho/site-validator-cli

68 lines (56 loc) • 1.92 kB

JavaScript

'use strict' const path = require('path') const Cache = require('../Cache') const Crawler = require('simplecrawler') const { greenOnBlack } = require('../clc') module.exports = (url, cacheTime, clearCache) => { return new Promise(function (resolve, reject) { try { const urlHash = require('crypto').createHash('sha1').update(url).digest('hex') var cachePath = path.resolve(`${__dirname}/../../cache/sitemap/`) var cache = new Cache(urlHash, cachePath, cacheTime) var cacheOld if (clearCache) { cache.remove() console.log(`\nCache cleared for ${url}`) } else { cacheOld = cache.getKey(url) } if (cacheTime !== false && cacheOld !== undefined) { console.log('\n' + greenOnBlack('Success') + ` Site URLs Cache found for ${url}`) console.log('\n' + 'Pages found in cache') cacheOld.forEach(e => { console.log(greenOnBlack(e)) }) return resolve(cacheOld) } else { console.log(`\nFetching Site Urls for ${url} ...`) console.log('This may take a while...\n') } console.log('Pages crawled:') var urls = [] var crawler = Crawler(url) crawler.addDownloadCondition((queueItem, response, callback) => { let shouldDownload try { shouldDownload = queueItem.stateData.contentType.includes('text/html') } catch (err) { shouldDownload = false } callback(null, shouldDownload) }) crawler.on('fetchcomplete', (queueItem, data, res) => { urls.push(queueItem.url) console.log(greenOnBlack(queueItem.url)) }) crawler.on('complete', async () => { if (cacheTime !== false) { cache.setKey(url, urls) cache.save() } return resolve(urls) }) crawler.start() } catch (error) { return reject(error) } }) }