docparse-scraper-nst
Version:
Node Zombie based scraper to scrape bills from the NStar (supplier code "NST)" website
186 lines (173 loc) • 6.05 kB
JavaScript
var moment = require('moment');
var async = require('async');
var logger = require('../../logger');
var helper = require('../../helper');
var inspect = helper.inspect;
exports.atHomepage = function(bro) {
var nickname = bro.querySelector("td:contains('Account Nickname')");
if (nickname) {
return true;
}
return false;
}
function deselectAllAccounts(bro, cb) {
var selectedOptions = bro.querySelectorAll('select[name=accountNumber]');
bro.evaluate('Array.prototype.slice.call(document.querySelector(\'select[name="accountNumber"]\')).filter(function(op) { op.removeAttribute(\'selected\') });');
bro.wait(cb);
// selectedOptions.map(function (option) {
// var value = option.getAttribute('value');
// option.removeAttribute('selected');
// });
// bro.evaluate('document.querySelector(\'select[name="accountNumber"]\').form.submit()');
}
function deselectAllBills(bro) {
var selectedOptions = bro.querySelectorAll('select[name=billDate] option');
selectedOptions.map(function (option) {
var value = option.getAttribute('value');
option.removeAttribute('selected');
});
}
exports.getAccountNumbersOnPage = function(bro, cb) {
var message;
inspect('getting account numbers');
var dropdown = bro.querySelector('select[name="accountNumber"]');
if (!dropdown) {
message = 'dropdown not found when getting number of accounts on the account homepage';
inspect(message);
return cb(message);
}
var options = bro.querySelectorAll('select[name=accountNumber] option');
if (!options) {
message = 'options not found when getting number of accounts on the account homepage';
inspect(message);
return cb(message);
}
var accountNumbers = options.map(function (option) {
var value = option.getAttribute('value');
return value;
});
var accountNames = options.map(function (option) {
return option.innerHTML;
});
options.sort(function (a, b) {
return a.getAttribute('value')-b.getAttribute('value');
});
var length = accountNumbers.length;
inspect(length,'number of accounts on homepage');
return cb(null, options);
}
function getSelectedAccountData(bro) {
var selectedOptions = bro.querySelectorAll('select[name=accountNumber] option:selected');
if (selectedOptions.length === 0) {
inspect('no accounts selected');
return null;
}
if (selectedOptions.length > 1) {
inspect('more than 1 accounts selected');
logger.error('error getting currently selected account data', {error: 'more than 1 account selected on account homepage'});
return null;
}
var option = selectedOptions[0];
var output = {
accountNumber: option.getAttribute('value'),
accountName: option.innerHTML
};
return output;
}
function performSelectAccount (data, cb) {
deselectAllAccounts(data.bro, function (err, reply) {
data.bro.select('select[name="accountNumber"]', data.accountNumberOption.getAttribute('value'), function (err, b) {
data.bro.evaluate('document.querySelector(\'select[name="accountNumber"]\').form.submit()');
data.bro.wait(cb);
});
});
}
exports.selectAccount = function (data, cb) {
var maxAttemps = 2;
var attempt = 0;
var desiredAccountNumber = data.accountNumberOption.getAttribute('value');
var desiredAccountName = data.accountNumberOption.innerHTML;
var output = {
username: data.currentLogin.username,
accountName: desiredAccountName,
accountIndex: data.accountIndex,
numAccounts: data.numAccounts,
}
inspect(output, 'selecting account');
var currentAccountNumber, currentAccountName;
async.until(
function () {
var accountData = getSelectedAccountData(data.bro);
if (!accountData) {
return false;
}
currentAccountNumber = accountData.accountNumber
currentAccountName = accountData.accountName
var accountNumberCorrect = false;
var info = {
currentNumber: currentAccountNumber,
desiredNumber: desiredAccountNumber,
currentName: currentAccountName,
desiredName: desiredAccountName,
}
if (desiredAccountNumber === currentAccountNumber) {
accountNumberCorrect = true;
}
var accountNameCorrect = false;
if (desiredAccountName === currentAccountName) {
accountNameCorrect = true;
}
if (accountNumberCorrect && accountNameCorrect) {
return true;
}
return false;
},
function(selectCB) {
attempt++;
if (attempt > maxAttemps) {
return selectCB('failed to select account, max number of attempts reached');
}
performSelectAccount(data, function (err, reply) {
if (err) { return selectCB(err); }
selectCB();
});
},
function (err) {
if (err) {
inspect(err, 'done selecting account error');
return cb(err);
}
cb();
}
);
}
exports.selectBill = function (data, cb) {
if (!data.billDateString) { return cb('error selecting bill date, data missing "billDateString" field'); }
deselectAllBills(data.bro);
data.bro.select('select[name=billDate]', data.billDateString, function (err, b) {
data.bro.evaluate('document.querySelector(\'select[name="billDate"]\').form.submit()');
data.bro.wait(function (err, reply) {
if (err) { return cb(err); }
cb();
})
})
}
exports.getBillDates = function (data, callback) {
if (!data.accountNumber) { return callback('error getting bill numbers, "accountNumber" field missing from data'); }
var dates = data.bro.querySelectorAll('select[name=billDate] option' );
var numbers = [];
var invalidDateFound = false;
var output = dates.map(function(option) {
var dateString = option.innerHTML;
var valid = moment(dateString, "MMM DD, YYYY").isValid();
if (!valid) {
invalidDateFound = true;
return null;
}
return dateString
})
if (invalidDateFound) {
return callback('invalid dateString found when getting bill dates on account homepage');
}
return callback(null, output);
}