@passmarked/malware
Version:
Rules that check if the page or linked pages on the same domain (or external) contain any unwanted software,malware or reported phishing attacks
168 lines (117 loc) • 4.32 kB
JavaScript
/**
* required modules
**/
const cheerio = require('cheerio');
const S = require('string');
const url = require('url');
const async = require('async');
const _ = require('underscore');
const Drivers = require('../drivers');
/**
* Runs the malware checks against linked pages found
* in the HTML provided
**/
module.exports = exports = function(payload, fn) {
// get the data
var data = payload.getData()
// check if external
var uri = url.parse(data.redirected || data.url);
// get the page content
payload.getPageContent(function(err, content) {
// did we get a error ?
if(err) {
// debug
payload.error('Got a error trying to get the Page Content', err);
// done
return fn(err);
}
// page content should not be blank
if(S(content || '').isEmpty() === true) return fn(null);
// list of links we end up checking
var linksToCheck = [];
// load up cheerio
var $ = cheerio.load(content);
// run it
$('a').each(function(index, elem) {
// ref of the link
var href = $(elem).attr('href');
// double check that we got a ref ...
if(S(href || '').isEmpty() === true)
return;
// if it starts with a anchor
if((href || '').indexOf('#') === 0)
return;
// if the link does not point with a protocol
// we can assume this is a local link
if(href.toLowerCase().indexOf('https://') !== 0 &&
href.toLowerCase().indexOf('http://') !== 0 &&
href.toLowerCase().indexOf('//') !== 0 &&
href.toLowerCase().indexOf('/') !== 0) {
// resolve the path to make a complete url
href = url.resolve(uri.protocol + "//" + uri.host, href);
}
// check that this is a actual url, this should stop
// a few other links like #,javascript:void(0); too
if(href.toLowerCase().indexOf('http://') !== 0 &&
href.toLowerCase().indexOf('https://') !== 0)
return;
// check that this is not a js function
if(href.match(/^[A-Za-z]+.*\(/gi) !== null)
return;
// now parse the href
var hrefUri = url.parse(href);
// could we parse the href ?
if(!hrefUri)
return;
// skip links to current page
if(uri.hostname === hrefUri.hostname &&
uri.path.toLowerCase() === hrefUri.path.toLowerCase())
return;
// add to list
linksToCheck.push(href);
});
// if not blank
if(linksToCheck.length > 0) {
// cap the amount of checks we do at 50
linksToCheck = linksToCheck.slice(0, process.env.PASSMARKED_MALWARE_LINK_LIMIT || 50);
// do the check against url
return Drivers.check(payload, linksToCheck, function(err, detections) {
// loop and add all the errors that were found
for(var i = 0; i < (detections || []).length; i++) {
// local reference
var detection = detections[i];
// parse the href
var href = url.parse(detection.url);
// build up the occurence
var occurence = {
display: 'url',
url: detection.preview,
message: '$ reported that the page hosts $',
tool: 'open',
identifiers: [ detection.source, S(detection.type).capitalize().s ]
};
// check if internal
if(href.hostname === uri.hostname) {
// add the rule
payload.addRule({
key: 'link.internal.' + detection.type.toLowerCase(),
message: 'Referenced local pages that host ' + S(detection.type).capitalize().s + ' detected',
type: 'critical'
}, occurence);
} else {
// add the rule
payload.addRule({
key: 'link.external.' + detection.type.toLowerCase(),
message: 'Referenced third party pages that host ' + S(detection.type).capitalize().s + ' detected',
type: 'warning'
}, occurence);
}
}
// finish strong
return fn(null);
});
}
// done
fn(null);
});
};