UNPKG

robots-txt-component

Version:

Lightweight robots.txt parsing component without any external dependencies for Node.js.

84 lines (66 loc) 2.06 kB
function prepareUrl(link) { let url = new URL(link); url.search = ''; url.searchParams = new URLSearchParams({}); url.hash = ''; url.pathname = ''; url.port = ''; url.username = ''; url.password = ''; url.pathname = '/robots.txt'; return url; } async function makeRequest(handler, link) { return await new Promise(resolve => { let headers = { 'Accept': '*/*', 'User-Agent': 'NodeAgent' }; handler.get(link,{ headers }, res => { if (res.statusCode === 301) { let locationHeader = res.headers['location']; if (locationHeader !== '' || locationHeader !== null || locationHeader !== undefined) { resolve({ redirection: true, location: locationHeader }); } } if (res.statusCode !== 200) { resolve(null); } res.setEncoding('utf8'); res.on('data', body => { resolve(body); }) }) }) } module.exports = async (link) => { let url = prepareUrl(link); let robotsLink = url.toString(); let request = null; if (url.protocol.startsWith('https')) { request = require('https'); } else { request = require('http'); } let response = await makeRequest(request, robotsLink); if (typeof response === 'string') { return response; } let rawContent = null; if (response !== null && typeof response === 'object') { if (response.redirection === true) { link = response.location; url = prepareUrl(link); if (url.protocol.startsWith('https')) { request = require('https'); } else { request = require('http'); } response = await makeRequest(request, url.toString()); if (typeof response === 'string') { rawContent = response; } } } return rawContent; }