aps-data-api
Version:
package for data extraction from APS company for omnimetic project
887 lines (734 loc) • 27 kB
text/typescript
import axios, { AxiosRequestConfig, AxiosRequestHeaders } from 'axios';
import config from 'config';
import fs from 'fs';
import path from 'path';
import { PDFExtract } from 'pdf.js-extract';
import puppeteer from 'puppeteer';
import request from 'request';
import tunnel from 'tunnel';
import {
AxiosApiResponse,
BatchResponse,
BrowserInterface,
Cookie,
Errors,
GetDataPayload,
Logger,
LooseObject,
RequestHeaders,
UploadFileCallback,
} from '../../typings';
import { HourlyUtilityData, MonthlyUtilityData } from '../../models';
import {
UtilityServiceAccountResponse,
UtilityUserAccountResponse,
Workflow,
} from '../../services';
import { DateHelper } from '../../helpers';
export class APSWorkflow extends Workflow {
logger?: Logger;
constructor() {
super();
const workFlowPath = path.resolve(__dirname);
const workFlowConfigFilePath = path.join(workFlowPath, 'workflow.json');
this.config = this.readConfig(workFlowConfigFilePath);
}
async initializeBrowser() {
this.browser = await puppeteer.launch({
args: [
'--enable-features=NetworkService',
'--disable-setuid-sandbox',
'--no-sandbox',
],
});
}
async login(payload: GetDataPayload, tunnelingAgentOptions: LooseObject) {
const { companyUrl, dataExtractionError, passwordCssId, usernameCssId } =
this.config.login;
try {
this.logger.info('[APS] Login to APS process start');
if (!this.browser) {
await this.initializeBrowser();
}
this.page = await this.browser.newPage();
const page = this.page;
page.setDefaultTimeout(config.get('puppeteer.defaultTimeout'));
await page.setRequestInterception(true);
// Uncomment below code to see browser console logs
// page.on('console', (message) => {
// this.logger.info(
// `[APS] Browser Console: ${message.type()}, ${message.text()}`,
// );
// });
this.setPageOnEvent(page, tunnelingAgentOptions);
const { password, username } = payload;
await page.goto(companyUrl, {
waitUntil: 'networkidle0',
});
await page.waitForSelector(usernameCssId);
await this.typeValueForField(usernameCssId, username);
await this.typeValueForField(passwordCssId, password);
await this.click('Enter');
this.logger.info('[APS] Login to APS process end');
} catch (err) {
await this.browser.close();
return Promise.reject(
new Errors.DataExtractionError(`${dataExtractionError}: ${err}`),
);
}
}
async getServiceAccounts() {
const page = this.page;
const { addressDataSelector, customerProfileUrl, dataExtractionError } =
this.config.getServiceAccounts;
const userDetails = await this.getUserDetails();
try {
const { id, name, premiseDetailsList, legalNames, meterNumber } =
userDetails;
await page.goto(customerProfileUrl);
await page.waitForSelector(addressDataSelector);
const addressData = await page.$eval(addressDataSelector, (element) => {
return element.textContent;
});
// addressData = 'Mailing address416115 N JW Marriot RD # 120-156, SCOTTSDALE,
// ARIZONA,85641, USA'
const address = addressData.slice(15).split(',');
const city = address[1].trim();
const postalCode = address[3].trim();
const state = address[2].trim();
const street = address[0];
const serviceAccounts: UtilityServiceAccountResponse[] = [];
for (const premiseDetails of premiseDetailsList) {
const premiseId = await premiseDetails.premiseID;
serviceAccounts.push(
new UtilityServiceAccountResponse({
id: premiseId,
address: {
city,
postalCode,
state,
street,
},
meterNumber,
legalNames,
}),
);
}
const response: UtilityUserAccountResponse =
new UtilityUserAccountResponse({
id,
name,
serviceAccounts,
});
return Promise.resolve(response);
} catch (err) {
return Promise.reject(
new Errors.DataExtractionError(`${dataExtractionError}: ${err}`),
);
} finally {
await this.browser.close();
}
}
async getMonthlyUtilityData(): Promise<MonthlyUtilityData[]> {
const page = this.page;
const { billDetailsUrl, dataExtractionError } =
this.config.getMonthlyUtilityData;
const userDetails = await this.getUserDetails();
try {
const { premiseDetailsList } = userDetails;
const externalServiceAccountId = await premiseDetailsList[0].premiseID;
const billsDataObject = await this.getBillsData();
const { billsData, billsDateString } = billsDataObject;
let i = -1;
const userMonthlyUtilityDataList: MonthlyUtilityData[] = [];
for (const bill of billsData) {
try {
i += 1;
if (billsDateString[i] !== billsDateString[i - 1]) {
const relativeDownloadFilePath = `service_acccounts/${externalServiceAccountId}/bills/${billsDateString[i]}`;
const downloadPath = path.resolve(
__dirname,
`/tmp/downloads/${relativeDownloadFilePath}`,
);
const client = await page.target().createCDPSession();
await client.send('Page.setDownloadBehavior', {
downloadPath,
behavior: 'allow',
});
await bill.click();
await page.waitForResponse((request) =>
request.url().startsWith(billDetailsUrl),
);
this.logger.info(`[APS] Downloading pdf ${i}`);
await new Promise((resolve) =>
setTimeout(resolve, config.get('apsDownloadPDF.timeOut')),
);
const monthlyData = await this.extractPdfData(
downloadPath,
billsDateString[i],
externalServiceAccountId,
);
userMonthlyUtilityDataList.push(monthlyData);
fs.unlinkSync(`${downloadPath}/bill.pdf`);
}
} catch (err) {
this.logger.error(`[APS] Downloading PDF error: ${err.message}`);
}
}
return Promise.resolve(userMonthlyUtilityDataList);
} catch (err) {
return Promise.reject(
new Errors.DataExtractionError(
`${dataExtractionError}: ${err.message}`,
),
);
} finally {
await this.browser.close();
}
}
async getReqHeaderAndBody(): Promise<RequestHeaders> {
const page = this.page;
const { utilityDataUrl, dataExtractionError, intervalUsageUrl } =
this.config.hourlyUtilityDataConfig;
try {
this.logger.info('[APS] Fetching request headers and body start');
let headers: Record<string, string> = {},
requestBody: string;
await page.setRequestInterception(true);
page.on('response', async (response) => {
if (response.url().startsWith(intervalUsageUrl)) {
headers = response.request().headers();
requestBody = response.request().postData();
}
});
await (page as any).waitForNavigation({ waitUntil: 'networkidle2' });
await page.goto(utilityDataUrl, { waitUntil: 'networkidle0' });
let newHeaders: Record<string, string> = {};
const client = await page.target().createCDPSession();
let cookies = (await client.send('Network.getAllCookies')).cookies;
let cookieString = '';
cookies.forEach((cookie) => {
cookieString += cookie.name + '=' + cookie.value + '; ';
});
newHeaders['authorization'] = headers['authorization'];
newHeaders['ocp-apim-subscription-key'] =
headers['ocp-apim-subscription-key'];
newHeaders['Cookie'] = cookieString;
this.logger.info('[APS] Fetching request headers and body end');
return { headers: newHeaders, requestBody };
} catch (err) {
return Promise.reject(
new Errors.DataExtractionError(`${dataExtractionError}: ${err}`),
);
} finally {
await this.browser.close();
}
}
async makeBatchRequest(
headers: AxiosRequestHeaders,
body: string,
nextDate: Date,
finishDate?: Date,
): Promise<BatchResponse> {
const { batchSize, batchesPerLoop, dataExtractionError, requestUrl } =
this.config.hourlyUtilityDataConfig;
try {
this.logger.info('[APS] Batch requests function start');
let batchRequestConfigsArray: AxiosRequestConfig<LooseObject>[][] = [];
let nextDateForExtraction: Date = new Date(nextDate);
nextDateForExtraction.setDate(
nextDate.getDate() - batchSize * batchesPerLoop,
);
for (let i = 0; i < batchesPerLoop; i++) {
let batchRequestConfigs: AxiosRequestConfig<LooseObject>[] = [];
for (let j = batchSize * i + 1; j <= batchSize * i + batchSize; j++) {
let prevDate = new Date(nextDate);
prevDate.setDate(nextDate.getDate() - j);
const prevDateString = DateHelper.formatDate(prevDate);
let configRequestBody: LooseObject = JSON.parse(body);
if (
prevDateString === configRequestBody.ratePlan[0].startDate ||
prevDateString === DateHelper.formatDate(finishDate)
) {
// Checks if the loop has reached the begining date of utility service start
nextDateForExtraction = null;
break;
}
configRequestBody['billCycleStartDate'] = prevDateString;
configRequestBody['billCycleEndDate'] = prevDateString;
const config: AxiosRequestConfig<LooseObject> = {
method: 'post',
url: requestUrl,
headers: headers,
data: configRequestBody,
};
batchRequestConfigs.push(config);
}
batchRequestConfigsArray.push(batchRequestConfigs);
if (nextDateForExtraction === null) break;
}
let batchResponse: HourlyUtilityData[] = [];
let k = batchRequestConfigsArray.length;
while (k >= 1) {
const axiosApiResponseArray: AxiosApiResponse[] =
await this.axiosBatchApiRequest(batchRequestConfigsArray[k - 1], 0);
if (axiosApiResponseArray === undefined) continue;
const hourlyDataFormatted: HourlyUtilityData[] =
await this.formatHourlyData(axiosApiResponseArray);
batchResponse = batchResponse.concat(...hourlyDataFormatted);
k--;
}
this.logger.info('[APS] Batch requests function end');
return { batchResponse, nextBatchDate: nextDateForExtraction };
} catch (err) {
return Promise.reject(
new Errors.DataExtractionError(`${dataExtractionError}: ${err}`),
);
}
}
async axiosBatchApiRequest(
batchRequestConfigs: AxiosRequestConfig<LooseObject>[],
retryCount: number,
): Promise<AxiosApiResponse[]> {
try {
const res = await axios.all(
batchRequestConfigs.map(async (requestConfigs, index) => {
return {
response: await axios(requestConfigs),
date: batchRequestConfigs[index].data.billCycleStartDate,
};
}),
);
this.logger.info('[APS] Axios batch request sent');
return res;
} catch (err) {
if (retryCount === 2) {
this.logger.error('[APS] Axios request error');
return Promise.reject(err);
}
await this.axiosBatchApiRequest(batchRequestConfigs, retryCount + 1);
}
}
async formatHourlyData(
axiosApiResponseArray: AxiosApiResponse[],
): Promise<HourlyUtilityData[]> {
const res: HourlyUtilityData[] = axiosApiResponseArray.map(
(axiosApiResponse) =>
axiosApiResponse.response.data.summarizedUsageDataResponse.dailyRatePlanUsage.map(
(hourlyData: LooseObject) => {
return new HourlyUtilityData({
...hourlyData,
date: axiosApiResponse.date,
});
},
),
);
return res;
}
private async getUserDetails() {
const page = this.page;
const { accessResult, loginXhrUrl } = this.config.login;
const { userDetailsUrl, dataExtractionError } =
this.config.getServiceAccounts;
try {
const loginXhr = await page.waitForResponse((request) =>
request.url().startsWith(loginXhrUrl),
);
try {
const res = await loginXhr.json();
if (res.isLoginSuccess === false && res.error === accessResult) {
await this.browser.close();
return Promise.reject(new Errors.AuthenticationError(accessResult));
}
} catch (err) {
this.logger.error(`[APS] Login XHR JSON error: ${err}`);
}
const userDetailsHTTPResponse = await page.waitForResponse(
userDetailsUrl,
);
const userDetails = await userDetailsHTTPResponse.json();
// userDetails = {
// Details: {
// profileData: {
// MainPersonName: 'Holmes,Sherlock',
// },
// AccountDetails: {
// getAccountDetailsResponse: {
// getAccountDetailsRes: {
// getPersonDetails: {
// accountID: '6076a3eb3d755200159dc776',
// },
// getSASPListByAccountID: {
// premiseDetailsList: [
// {
// premiseID: '6076a3eb3d755200159dc908',
// },
// ],
// },
// },
// },
// },
// },
// };
let name = userDetails.Details.profileData.MainPersonName.split(',');
name = name[1].concat(' ', name[0]);
const accountDetails = await userDetails.Details.AccountDetails
.getAccountDetailsResponse.getAccountDetailsRes.getPersonDetails;
const id = await accountDetails.accountID;
const premiseDetailsList = await userDetails.Details.AccountDetails
.getAccountDetailsResponse.getAccountDetailsRes.getSASPListByAccountID
.premiseDetailsList;
const legalNames = accountDetails.results
.filter(
(tag: LooseObject, index: number, array: LooseObject[]) =>
array.findIndex(
(t: LooseObject) =>
t.personName === tag.personName && t.personID === tag.personID,
) === index,
)
.map((object: LooseObject) =>
object.personName.split(',').length == 2
? `${object.personName.split(',')[1]} ${
object.personName.split(',')[0]
}`
: object.personName,
);
const meterNumbersList = premiseDetailsList[0].sASPDetails
.filter(
(listItem: LooseObject) =>
listItem.sATypeDesc !== '' && listItem.sARatePlancCode !== '',
)
.map((user: LooseObject) => user.meterBadgeNumber);
const meterNumber = meterNumbersList[meterNumbersList.length - 1];
return { id, name, premiseDetailsList, legalNames, meterNumber };
} catch (err) {
return Promise.reject(
new Errors.DataExtractionError(`${dataExtractionError}: ${err}`),
);
}
}
private async getBillsData() {
const page = this.page;
const {
billDetailsPage,
billDetailsPageSelector,
billsDataSelector,
billsDateStringSelector,
selectStringSelector,
selectValueStringSelector,
} = this.config.getMonthlyUtilityData;
await page.goto(billDetailsPage, { waitUntil: 'networkidle0' });
await page.waitForSelector(billDetailsPageSelector);
const selectValueString = await page.$eval(
selectValueStringSelector,
(ele) => {
return ele.textContent;
},
);
const selectValueInt = parseInt(selectValueString.split(' ')[7], 10);
const selectValueRaw = Math.ceil(selectValueInt / 10) * 10;
const selectValue = selectValueRaw.toString();
await page.select(selectStringSelector, selectValue);
const billsData = await page.$$(billsDataSelector);
const billsDateStringRaw = await page.$$eval(
billsDateStringSelector,
(nodes) => nodes.map((n) => n.textContent),
);
// tslint:disable-next-line: max-line-length
// billsDateStringRaw = [' Sep 08, 2021 ', ' Aug 09, 2021 ', ' Jul 08, 2021 ', ' Jun 08, 2021 ', ' May 06, 2021 ']
const billsDateString = [];
for (const date of billsDateStringRaw) {
const monthYear = date.split(' ');
const monthAndYear = monthYear[1].concat('_').concat(monthYear[3]);
billsDateString.push(monthAndYear);
}
return { billsData, billsDateString };
}
private async extractPdfData(
downloadPath: string,
billsDateString: string,
externalServiceAccountId: string,
) {
const { dataExtractionError } = this.config.getMonthlyUtilityData;
try {
const pdfExtract = new PDFExtract();
const extractedBillData = await pdfExtract.extract(
`${downloadPath}/bill.pdf`,
);
this.logger.info(`[APS] Extracted data for pdf ${downloadPath}/bill.pdf`);
const pageContent = extractedBillData.pages[2].content;
// pageContent = [
// {
// x: 37.20000000000002,
// y: 520.3199999999998,
// str: 'Cost of electricity with taxes and fees',
// dir: 'ltr',
// width: 150.00000000000003,
// height: 10.32,
// fontName: 'g_d0_f375R',
// },
// {
// x: 273.12,
// y: 520.3199999999998,
// str: '$77.46',
// dir: 'ltr',
// width: 27.84,
// height: 10.32,
// fontName: 'g_d0_f375R',
// },
// {
// x: 314.4,
// y: 196.80000000000018,
// str: 'Total electricity you used, in kWh',
// dir: 'ltr',
// width: 144.48,
// height: 10.799999999999999,
// fontName: 'g_d0_f6R',
// },
// {
// x: 566.88,
// y: 197.5200000000002,
// str: '448',
// dir: 'ltr',
// width: 16.56,
// height: 11.76,
// fontName: 'g_d0_f918R',
// },
// ];
const totalUsageObject = this.extractfromYCoordinates(pageContent, {
y1: 197.52,
y2: 234.96,
});
const totalUsage: string = this.convertToString(totalUsageObject);
// extracting usage Amount value
const usageAmountObject = this.extractFromXCoordinates(pageContent, {
x1: 273.12,
x2: 268.08,
});
const usageAmount =
usageAmountObject[usageAmountObject.length - 1].str.slice(1);
// extracting onPeakUsage value
const onPeakUsageObject = this.extractfromYCoordinates(pageContent, {
y1: 246.96,
y2: null,
});
const onPeakUsageInKW: string | null =
this.convertToString(onPeakUsageObject);
// extracting offPeakUsage value
const offPeakUsageObject = this.extractfromYCoordinates(pageContent, {
y1: 331.92,
y2: null,
});
const offPeakUsageInKW: string | null =
this.convertToString(offPeakUsageObject);
const monthAndYear = billsDateString;
const rawMonth: string = monthAndYear.slice(0, 3);
// tslint:disable-next-line: radix
const rawYear: number = parseInt(monthAndYear.slice(4));
const date: Date = new Date(`${rawMonth},${rawYear}`);
date.setMonth(date.getMonth() - 1);
const month: string = date.toLocaleString('default', { month: 'long' });
const year: number = date.getFullYear();
const amountInCents: number = Math.round(parseFloat(usageAmount) * 100);
const energyConsumptionInWatts = totalUsage
? this.convertToWatts(totalUsage)
: totalUsage;
const offPeakUsageInWatts = offPeakUsageInKW
? this.convertToWatts(offPeakUsageInKW)
: offPeakUsageInKW;
const onPeakUsageInWatts = onPeakUsageInKW
? this.convertToWatts(onPeakUsageInKW)
: onPeakUsageInKW;
// tslint:disable-next-line: radix
const monthlyData: MonthlyUtilityData = new MonthlyUtilityData({
amountInCents,
energyConsumptionInWatts,
month,
offPeakUsageInWatts,
onPeakUsageInWatts,
year,
serviceAccountId: externalServiceAccountId,
});
return monthlyData;
} catch (err) {
return Promise.reject(
new Errors.DataExtractionError(`${dataExtractionError}: ${err}`),
);
}
}
async getUtilityBillPDFs(
serviceAccountId: string,
upload: UploadFileCallback,
) {
const page = this.page;
const { billDetailsUrl } = this.config.getMonthlyUtilityData;
try {
await page.waitForNavigation();
const billsDataObject = await this.getBillsData();
const { billsData, billsDateString } = billsDataObject;
let index = -1;
for (const bill of billsData) {
try {
index += 1;
if (billsDateString[index] !== billsDateString[index - 1]) {
const relativeDownloadFilePath = `service_acccounts/${serviceAccountId}/${DateHelper.formatBillDate(
billsDateString[index],
)}`;
const downloadPath = path.resolve(
__dirname,
`/tmp/bills/${relativeDownloadFilePath}`,
); // /tmp/bills/service_accounts/634900de7f2d174aacfd29c3/2022_1
const client = await page.target().createCDPSession();
await client.send('Page.setDownloadBehavior', {
downloadPath,
behavior: 'allow',
});
await page.evaluate((b) => b.click(), bill);
await page.waitForResponse((res) =>
res.url().startsWith(billDetailsUrl),
);
this.logger.info(`[APS] Downloading pdf ${index}`);
await new Promise((resolve) =>
setTimeout(resolve, config.get('apsDownloadPDF.timeOut')),
);
const uploadPath = `bills/${relativeDownloadFilePath}`;
await upload(
{
filename: 'bill.pdf',
mimetype: 'application/pdf',
path: downloadPath,
},
uploadPath,
);
fs.unlinkSync(`${downloadPath}/bill.pdf`);
}
} catch (err) {
this.logger.error(`[APS]: Bill pdf error: ${err.message}`);
}
}
await this.browser.close();
} catch (err) {
this.logger.info(`Downloading PDF bills err: ${err.message}`);
this.browser.close();
}
}
private setPageOnEvent(
page: BrowserInterface.Page,
tunnelingAgentOptions: LooseObject,
) {
const { loginXhrUrl } = this.config.login;
let cookies: Cookie[] = [];
let cookiesFlag: Boolean;
const logger: Logger = this.logger;
// Uncomment below code to see network logs
// page.on('response', async (interceptedResponse) => {
// this.logger.info(
// `[SRP] Response per Request: { statusCode: ${interceptedResponse.status()},
// statusText: ${interceptedResponse.statusText()},
// method: ${interceptedResponse.request().method()},
// url: ${interceptedResponse.url()},}`,
// );
// });
page.on('request', async (interceptedRequest) => {
if (interceptedRequest.url() === loginXhrUrl) {
const tunnelingAgent = tunnel.httpsOverHttp(tunnelingAgentOptions);
const interceptedRequestHeaders = interceptedRequest.headers();
// Replacing HeadlessChrome with just Chrome. Because HeadlessChrome is resulting as an invalid user-agent header on hitting request.
interceptedRequestHeaders['user-agent'] = interceptedRequestHeaders[
'user-agent'
].replace('Headless', '');
const options = {
uri: interceptedRequest.url(),
method: interceptedRequest.method(),
headers: interceptedRequestHeaders,
agent: tunnelingAgent,
body: interceptedRequest.postData(),
};
request(options, async function (err, res, body) {
if (err) {
logger.error(`[APS] Page ON event request error: ${err.message}`);
return;
}
const setCookies: string[] = res.headers['set-cookie'];
for (let i = 0; setCookies && i < setCookies.length; i++) {
const value: string = setCookies[i];
// Here the string "value" is in this form - "<name>=<value>; ..."
const cookie: string[] = value.split('; ')[0].split('=');
// Considering only the non utm cookies. All the utm cookies will be in the end of setCookies.
if (cookie[0].startsWith('___utm')) {
break;
}
cookies.push({ name: cookie[0], value: cookie[1] });
}
try {
await page.setCookie(...cookies);
} catch (err) {
logger.error(`[APS] Page set cookie error: ${err.message}`);
}
cookiesFlag = true;
interceptedRequest.respond({
status: res.statusCode,
contentType: res.headers['content-type'],
headers: res.headers,
body: body,
});
});
} else {
if (cookiesFlag) {
try {
await page.setCookie(...cookies);
} catch (err) {
logger.error(`[APS] Page set cookie error: ${err.message}`);
}
}
interceptedRequest.continue();
}
});
}
private extractfromYCoordinates(
pageContent: any,
coordinates: {
y1: number | null;
y2: number | null;
},
) {
const textValueObject = pageContent.filter((el: LooseObject) => {
return (
Math.round(el.y * 100) / 100 === coordinates.y1 ||
Math.round(el.y * 100) / 100 === coordinates.y2
);
});
return textValueObject;
}
private extractFromXCoordinates(
pageContent: any,
coordinates: {
x1: number | null;
x2: number | null;
},
) {
const textValueObject = pageContent.filter((el: LooseObject) => {
return (
Math.round(el.x * 100) / 100 === coordinates.x1 ||
Math.round(el.x * 100) / 100 === coordinates.x2
);
});
return textValueObject;
}
private convertToString(object: LooseObject) {
try {
let resultantStringValue: string = object[object.length - 1].str;
return resultantStringValue;
} catch (err) {
return null;
}
}
private convertToWatts(data: string) {
try {
const value: number = parseInt(data) * 1000;
return value;
} catch (err) {
return null;
}
}
}