UNPKG

aps-data-api

Version:

package for data extraction from APS company for omnimetic project

887 lines (734 loc) 27 kB
import axios, { AxiosRequestConfig, AxiosRequestHeaders } from 'axios'; import config from 'config'; import fs from 'fs'; import path from 'path'; import { PDFExtract } from 'pdf.js-extract'; import puppeteer from 'puppeteer'; import request from 'request'; import tunnel from 'tunnel'; import { AxiosApiResponse, BatchResponse, BrowserInterface, Cookie, Errors, GetDataPayload, Logger, LooseObject, RequestHeaders, UploadFileCallback, } from '../../typings'; import { HourlyUtilityData, MonthlyUtilityData } from '../../models'; import { UtilityServiceAccountResponse, UtilityUserAccountResponse, Workflow, } from '../../services'; import { DateHelper } from '../../helpers'; export class APSWorkflow extends Workflow { logger?: Logger; constructor() { super(); const workFlowPath = path.resolve(__dirname); const workFlowConfigFilePath = path.join(workFlowPath, 'workflow.json'); this.config = this.readConfig(workFlowConfigFilePath); } async initializeBrowser() { this.browser = await puppeteer.launch({ args: [ '--enable-features=NetworkService', '--disable-setuid-sandbox', '--no-sandbox', ], }); } async login(payload: GetDataPayload, tunnelingAgentOptions: LooseObject) { const { companyUrl, dataExtractionError, passwordCssId, usernameCssId } = this.config.login; try { this.logger.info('[APS] Login to APS process start'); if (!this.browser) { await this.initializeBrowser(); } this.page = await this.browser.newPage(); const page = this.page; page.setDefaultTimeout(config.get('puppeteer.defaultTimeout')); await page.setRequestInterception(true); // Uncomment below code to see browser console logs // page.on('console', (message) => { // this.logger.info( // `[APS] Browser Console: ${message.type()}, ${message.text()}`, // ); // }); this.setPageOnEvent(page, tunnelingAgentOptions); const { password, username } = payload; await page.goto(companyUrl, { waitUntil: 'networkidle0', }); await page.waitForSelector(usernameCssId); await this.typeValueForField(usernameCssId, username); await this.typeValueForField(passwordCssId, password); await this.click('Enter'); this.logger.info('[APS] Login to APS process end'); } catch (err) { await this.browser.close(); return Promise.reject( new Errors.DataExtractionError(`${dataExtractionError}: ${err}`), ); } } async getServiceAccounts() { const page = this.page; const { addressDataSelector, customerProfileUrl, dataExtractionError } = this.config.getServiceAccounts; const userDetails = await this.getUserDetails(); try { const { id, name, premiseDetailsList, legalNames, meterNumber } = userDetails; await page.goto(customerProfileUrl); await page.waitForSelector(addressDataSelector); const addressData = await page.$eval(addressDataSelector, (element) => { return element.textContent; }); // addressData = 'Mailing address416115 N JW Marriot RD # 120-156, SCOTTSDALE, // ARIZONA,85641, USA' const address = addressData.slice(15).split(','); const city = address[1].trim(); const postalCode = address[3].trim(); const state = address[2].trim(); const street = address[0]; const serviceAccounts: UtilityServiceAccountResponse[] = []; for (const premiseDetails of premiseDetailsList) { const premiseId = await premiseDetails.premiseID; serviceAccounts.push( new UtilityServiceAccountResponse({ id: premiseId, address: { city, postalCode, state, street, }, meterNumber, legalNames, }), ); } const response: UtilityUserAccountResponse = new UtilityUserAccountResponse({ id, name, serviceAccounts, }); return Promise.resolve(response); } catch (err) { return Promise.reject( new Errors.DataExtractionError(`${dataExtractionError}: ${err}`), ); } finally { await this.browser.close(); } } async getMonthlyUtilityData(): Promise<MonthlyUtilityData[]> { const page = this.page; const { billDetailsUrl, dataExtractionError } = this.config.getMonthlyUtilityData; const userDetails = await this.getUserDetails(); try { const { premiseDetailsList } = userDetails; const externalServiceAccountId = await premiseDetailsList[0].premiseID; const billsDataObject = await this.getBillsData(); const { billsData, billsDateString } = billsDataObject; let i = -1; const userMonthlyUtilityDataList: MonthlyUtilityData[] = []; for (const bill of billsData) { try { i += 1; if (billsDateString[i] !== billsDateString[i - 1]) { const relativeDownloadFilePath = `service_acccounts/${externalServiceAccountId}/bills/${billsDateString[i]}`; const downloadPath = path.resolve( __dirname, `/tmp/downloads/${relativeDownloadFilePath}`, ); const client = await page.target().createCDPSession(); await client.send('Page.setDownloadBehavior', { downloadPath, behavior: 'allow', }); await bill.click(); await page.waitForResponse((request) => request.url().startsWith(billDetailsUrl), ); this.logger.info(`[APS] Downloading pdf ${i}`); await new Promise((resolve) => setTimeout(resolve, config.get('apsDownloadPDF.timeOut')), ); const monthlyData = await this.extractPdfData( downloadPath, billsDateString[i], externalServiceAccountId, ); userMonthlyUtilityDataList.push(monthlyData); fs.unlinkSync(`${downloadPath}/bill.pdf`); } } catch (err) { this.logger.error(`[APS] Downloading PDF error: ${err.message}`); } } return Promise.resolve(userMonthlyUtilityDataList); } catch (err) { return Promise.reject( new Errors.DataExtractionError( `${dataExtractionError}: ${err.message}`, ), ); } finally { await this.browser.close(); } } async getReqHeaderAndBody(): Promise<RequestHeaders> { const page = this.page; const { utilityDataUrl, dataExtractionError, intervalUsageUrl } = this.config.hourlyUtilityDataConfig; try { this.logger.info('[APS] Fetching request headers and body start'); let headers: Record<string, string> = {}, requestBody: string; await page.setRequestInterception(true); page.on('response', async (response) => { if (response.url().startsWith(intervalUsageUrl)) { headers = response.request().headers(); requestBody = response.request().postData(); } }); await (page as any).waitForNavigation({ waitUntil: 'networkidle2' }); await page.goto(utilityDataUrl, { waitUntil: 'networkidle0' }); let newHeaders: Record<string, string> = {}; const client = await page.target().createCDPSession(); let cookies = (await client.send('Network.getAllCookies')).cookies; let cookieString = ''; cookies.forEach((cookie) => { cookieString += cookie.name + '=' + cookie.value + '; '; }); newHeaders['authorization'] = headers['authorization']; newHeaders['ocp-apim-subscription-key'] = headers['ocp-apim-subscription-key']; newHeaders['Cookie'] = cookieString; this.logger.info('[APS] Fetching request headers and body end'); return { headers: newHeaders, requestBody }; } catch (err) { return Promise.reject( new Errors.DataExtractionError(`${dataExtractionError}: ${err}`), ); } finally { await this.browser.close(); } } async makeBatchRequest( headers: AxiosRequestHeaders, body: string, nextDate: Date, finishDate?: Date, ): Promise<BatchResponse> { const { batchSize, batchesPerLoop, dataExtractionError, requestUrl } = this.config.hourlyUtilityDataConfig; try { this.logger.info('[APS] Batch requests function start'); let batchRequestConfigsArray: AxiosRequestConfig<LooseObject>[][] = []; let nextDateForExtraction: Date = new Date(nextDate); nextDateForExtraction.setDate( nextDate.getDate() - batchSize * batchesPerLoop, ); for (let i = 0; i < batchesPerLoop; i++) { let batchRequestConfigs: AxiosRequestConfig<LooseObject>[] = []; for (let j = batchSize * i + 1; j <= batchSize * i + batchSize; j++) { let prevDate = new Date(nextDate); prevDate.setDate(nextDate.getDate() - j); const prevDateString = DateHelper.formatDate(prevDate); let configRequestBody: LooseObject = JSON.parse(body); if ( prevDateString === configRequestBody.ratePlan[0].startDate || prevDateString === DateHelper.formatDate(finishDate) ) { // Checks if the loop has reached the begining date of utility service start nextDateForExtraction = null; break; } configRequestBody['billCycleStartDate'] = prevDateString; configRequestBody['billCycleEndDate'] = prevDateString; const config: AxiosRequestConfig<LooseObject> = { method: 'post', url: requestUrl, headers: headers, data: configRequestBody, }; batchRequestConfigs.push(config); } batchRequestConfigsArray.push(batchRequestConfigs); if (nextDateForExtraction === null) break; } let batchResponse: HourlyUtilityData[] = []; let k = batchRequestConfigsArray.length; while (k >= 1) { const axiosApiResponseArray: AxiosApiResponse[] = await this.axiosBatchApiRequest(batchRequestConfigsArray[k - 1], 0); if (axiosApiResponseArray === undefined) continue; const hourlyDataFormatted: HourlyUtilityData[] = await this.formatHourlyData(axiosApiResponseArray); batchResponse = batchResponse.concat(...hourlyDataFormatted); k--; } this.logger.info('[APS] Batch requests function end'); return { batchResponse, nextBatchDate: nextDateForExtraction }; } catch (err) { return Promise.reject( new Errors.DataExtractionError(`${dataExtractionError}: ${err}`), ); } } async axiosBatchApiRequest( batchRequestConfigs: AxiosRequestConfig<LooseObject>[], retryCount: number, ): Promise<AxiosApiResponse[]> { try { const res = await axios.all( batchRequestConfigs.map(async (requestConfigs, index) => { return { response: await axios(requestConfigs), date: batchRequestConfigs[index].data.billCycleStartDate, }; }), ); this.logger.info('[APS] Axios batch request sent'); return res; } catch (err) { if (retryCount === 2) { this.logger.error('[APS] Axios request error'); return Promise.reject(err); } await this.axiosBatchApiRequest(batchRequestConfigs, retryCount + 1); } } async formatHourlyData( axiosApiResponseArray: AxiosApiResponse[], ): Promise<HourlyUtilityData[]> { const res: HourlyUtilityData[] = axiosApiResponseArray.map( (axiosApiResponse) => axiosApiResponse.response.data.summarizedUsageDataResponse.dailyRatePlanUsage.map( (hourlyData: LooseObject) => { return new HourlyUtilityData({ ...hourlyData, date: axiosApiResponse.date, }); }, ), ); return res; } private async getUserDetails() { const page = this.page; const { accessResult, loginXhrUrl } = this.config.login; const { userDetailsUrl, dataExtractionError } = this.config.getServiceAccounts; try { const loginXhr = await page.waitForResponse((request) => request.url().startsWith(loginXhrUrl), ); try { const res = await loginXhr.json(); if (res.isLoginSuccess === false && res.error === accessResult) { await this.browser.close(); return Promise.reject(new Errors.AuthenticationError(accessResult)); } } catch (err) { this.logger.error(`[APS] Login XHR JSON error: ${err}`); } const userDetailsHTTPResponse = await page.waitForResponse( userDetailsUrl, ); const userDetails = await userDetailsHTTPResponse.json(); // userDetails = { // Details: { // profileData: { // MainPersonName: 'Holmes,Sherlock', // }, // AccountDetails: { // getAccountDetailsResponse: { // getAccountDetailsRes: { // getPersonDetails: { // accountID: '6076a3eb3d755200159dc776', // }, // getSASPListByAccountID: { // premiseDetailsList: [ // { // premiseID: '6076a3eb3d755200159dc908', // }, // ], // }, // }, // }, // }, // }, // }; let name = userDetails.Details.profileData.MainPersonName.split(','); name = name[1].concat(' ', name[0]); const accountDetails = await userDetails.Details.AccountDetails .getAccountDetailsResponse.getAccountDetailsRes.getPersonDetails; const id = await accountDetails.accountID; const premiseDetailsList = await userDetails.Details.AccountDetails .getAccountDetailsResponse.getAccountDetailsRes.getSASPListByAccountID .premiseDetailsList; const legalNames = accountDetails.results .filter( (tag: LooseObject, index: number, array: LooseObject[]) => array.findIndex( (t: LooseObject) => t.personName === tag.personName && t.personID === tag.personID, ) === index, ) .map((object: LooseObject) => object.personName.split(',').length == 2 ? `${object.personName.split(',')[1]} ${ object.personName.split(',')[0] }` : object.personName, ); const meterNumbersList = premiseDetailsList[0].sASPDetails .filter( (listItem: LooseObject) => listItem.sATypeDesc !== '' && listItem.sARatePlancCode !== '', ) .map((user: LooseObject) => user.meterBadgeNumber); const meterNumber = meterNumbersList[meterNumbersList.length - 1]; return { id, name, premiseDetailsList, legalNames, meterNumber }; } catch (err) { return Promise.reject( new Errors.DataExtractionError(`${dataExtractionError}: ${err}`), ); } } private async getBillsData() { const page = this.page; const { billDetailsPage, billDetailsPageSelector, billsDataSelector, billsDateStringSelector, selectStringSelector, selectValueStringSelector, } = this.config.getMonthlyUtilityData; await page.goto(billDetailsPage, { waitUntil: 'networkidle0' }); await page.waitForSelector(billDetailsPageSelector); const selectValueString = await page.$eval( selectValueStringSelector, (ele) => { return ele.textContent; }, ); const selectValueInt = parseInt(selectValueString.split(' ')[7], 10); const selectValueRaw = Math.ceil(selectValueInt / 10) * 10; const selectValue = selectValueRaw.toString(); await page.select(selectStringSelector, selectValue); const billsData = await page.$$(billsDataSelector); const billsDateStringRaw = await page.$$eval( billsDateStringSelector, (nodes) => nodes.map((n) => n.textContent), ); // tslint:disable-next-line: max-line-length // billsDateStringRaw = [' Sep 08, 2021 ', ' Aug 09, 2021 ', ' Jul 08, 2021 ', ' Jun 08, 2021 ', ' May 06, 2021 '] const billsDateString = []; for (const date of billsDateStringRaw) { const monthYear = date.split(' '); const monthAndYear = monthYear[1].concat('_').concat(monthYear[3]); billsDateString.push(monthAndYear); } return { billsData, billsDateString }; } private async extractPdfData( downloadPath: string, billsDateString: string, externalServiceAccountId: string, ) { const { dataExtractionError } = this.config.getMonthlyUtilityData; try { const pdfExtract = new PDFExtract(); const extractedBillData = await pdfExtract.extract( `${downloadPath}/bill.pdf`, ); this.logger.info(`[APS] Extracted data for pdf ${downloadPath}/bill.pdf`); const pageContent = extractedBillData.pages[2].content; // pageContent = [ // { // x: 37.20000000000002, // y: 520.3199999999998, // str: 'Cost of electricity with taxes and fees', // dir: 'ltr', // width: 150.00000000000003, // height: 10.32, // fontName: 'g_d0_f375R', // }, // { // x: 273.12, // y: 520.3199999999998, // str: '$77.46', // dir: 'ltr', // width: 27.84, // height: 10.32, // fontName: 'g_d0_f375R', // }, // { // x: 314.4, // y: 196.80000000000018, // str: 'Total electricity you used, in kWh', // dir: 'ltr', // width: 144.48, // height: 10.799999999999999, // fontName: 'g_d0_f6R', // }, // { // x: 566.88, // y: 197.5200000000002, // str: '448', // dir: 'ltr', // width: 16.56, // height: 11.76, // fontName: 'g_d0_f918R', // }, // ]; const totalUsageObject = this.extractfromYCoordinates(pageContent, { y1: 197.52, y2: 234.96, }); const totalUsage: string = this.convertToString(totalUsageObject); // extracting usage Amount value const usageAmountObject = this.extractFromXCoordinates(pageContent, { x1: 273.12, x2: 268.08, }); const usageAmount = usageAmountObject[usageAmountObject.length - 1].str.slice(1); // extracting onPeakUsage value const onPeakUsageObject = this.extractfromYCoordinates(pageContent, { y1: 246.96, y2: null, }); const onPeakUsageInKW: string | null = this.convertToString(onPeakUsageObject); // extracting offPeakUsage value const offPeakUsageObject = this.extractfromYCoordinates(pageContent, { y1: 331.92, y2: null, }); const offPeakUsageInKW: string | null = this.convertToString(offPeakUsageObject); const monthAndYear = billsDateString; const rawMonth: string = monthAndYear.slice(0, 3); // tslint:disable-next-line: radix const rawYear: number = parseInt(monthAndYear.slice(4)); const date: Date = new Date(`${rawMonth},${rawYear}`); date.setMonth(date.getMonth() - 1); const month: string = date.toLocaleString('default', { month: 'long' }); const year: number = date.getFullYear(); const amountInCents: number = Math.round(parseFloat(usageAmount) * 100); const energyConsumptionInWatts = totalUsage ? this.convertToWatts(totalUsage) : totalUsage; const offPeakUsageInWatts = offPeakUsageInKW ? this.convertToWatts(offPeakUsageInKW) : offPeakUsageInKW; const onPeakUsageInWatts = onPeakUsageInKW ? this.convertToWatts(onPeakUsageInKW) : onPeakUsageInKW; // tslint:disable-next-line: radix const monthlyData: MonthlyUtilityData = new MonthlyUtilityData({ amountInCents, energyConsumptionInWatts, month, offPeakUsageInWatts, onPeakUsageInWatts, year, serviceAccountId: externalServiceAccountId, }); return monthlyData; } catch (err) { return Promise.reject( new Errors.DataExtractionError(`${dataExtractionError}: ${err}`), ); } } async getUtilityBillPDFs( serviceAccountId: string, upload: UploadFileCallback, ) { const page = this.page; const { billDetailsUrl } = this.config.getMonthlyUtilityData; try { await page.waitForNavigation(); const billsDataObject = await this.getBillsData(); const { billsData, billsDateString } = billsDataObject; let index = -1; for (const bill of billsData) { try { index += 1; if (billsDateString[index] !== billsDateString[index - 1]) { const relativeDownloadFilePath = `service_acccounts/${serviceAccountId}/${DateHelper.formatBillDate( billsDateString[index], )}`; const downloadPath = path.resolve( __dirname, `/tmp/bills/${relativeDownloadFilePath}`, ); // /tmp/bills/service_accounts/634900de7f2d174aacfd29c3/2022_1 const client = await page.target().createCDPSession(); await client.send('Page.setDownloadBehavior', { downloadPath, behavior: 'allow', }); await page.evaluate((b) => b.click(), bill); await page.waitForResponse((res) => res.url().startsWith(billDetailsUrl), ); this.logger.info(`[APS] Downloading pdf ${index}`); await new Promise((resolve) => setTimeout(resolve, config.get('apsDownloadPDF.timeOut')), ); const uploadPath = `bills/${relativeDownloadFilePath}`; await upload( { filename: 'bill.pdf', mimetype: 'application/pdf', path: downloadPath, }, uploadPath, ); fs.unlinkSync(`${downloadPath}/bill.pdf`); } } catch (err) { this.logger.error(`[APS]: Bill pdf error: ${err.message}`); } } await this.browser.close(); } catch (err) { this.logger.info(`Downloading PDF bills err: ${err.message}`); this.browser.close(); } } private setPageOnEvent( page: BrowserInterface.Page, tunnelingAgentOptions: LooseObject, ) { const { loginXhrUrl } = this.config.login; let cookies: Cookie[] = []; let cookiesFlag: Boolean; const logger: Logger = this.logger; // Uncomment below code to see network logs // page.on('response', async (interceptedResponse) => { // this.logger.info( // `[SRP] Response per Request: { statusCode: ${interceptedResponse.status()}, // statusText: ${interceptedResponse.statusText()}, // method: ${interceptedResponse.request().method()}, // url: ${interceptedResponse.url()},}`, // ); // }); page.on('request', async (interceptedRequest) => { if (interceptedRequest.url() === loginXhrUrl) { const tunnelingAgent = tunnel.httpsOverHttp(tunnelingAgentOptions); const interceptedRequestHeaders = interceptedRequest.headers(); // Replacing HeadlessChrome with just Chrome. Because HeadlessChrome is resulting as an invalid user-agent header on hitting request. interceptedRequestHeaders['user-agent'] = interceptedRequestHeaders[ 'user-agent' ].replace('Headless', ''); const options = { uri: interceptedRequest.url(), method: interceptedRequest.method(), headers: interceptedRequestHeaders, agent: tunnelingAgent, body: interceptedRequest.postData(), }; request(options, async function (err, res, body) { if (err) { logger.error(`[APS] Page ON event request error: ${err.message}`); return; } const setCookies: string[] = res.headers['set-cookie']; for (let i = 0; setCookies && i < setCookies.length; i++) { const value: string = setCookies[i]; // Here the string "value" is in this form - "<name>=<value>; ..." const cookie: string[] = value.split('; ')[0].split('='); // Considering only the non utm cookies. All the utm cookies will be in the end of setCookies. if (cookie[0].startsWith('___utm')) { break; } cookies.push({ name: cookie[0], value: cookie[1] }); } try { await page.setCookie(...cookies); } catch (err) { logger.error(`[APS] Page set cookie error: ${err.message}`); } cookiesFlag = true; interceptedRequest.respond({ status: res.statusCode, contentType: res.headers['content-type'], headers: res.headers, body: body, }); }); } else { if (cookiesFlag) { try { await page.setCookie(...cookies); } catch (err) { logger.error(`[APS] Page set cookie error: ${err.message}`); } } interceptedRequest.continue(); } }); } private extractfromYCoordinates( pageContent: any, coordinates: { y1: number | null; y2: number | null; }, ) { const textValueObject = pageContent.filter((el: LooseObject) => { return ( Math.round(el.y * 100) / 100 === coordinates.y1 || Math.round(el.y * 100) / 100 === coordinates.y2 ); }); return textValueObject; } private extractFromXCoordinates( pageContent: any, coordinates: { x1: number | null; x2: number | null; }, ) { const textValueObject = pageContent.filter((el: LooseObject) => { return ( Math.round(el.x * 100) / 100 === coordinates.x1 || Math.round(el.x * 100) / 100 === coordinates.x2 ); }); return textValueObject; } private convertToString(object: LooseObject) { try { let resultantStringValue: string = object[object.length - 1].str; return resultantStringValue; } catch (err) { return null; } } private convertToWatts(data: string) { try { const value: number = parseInt(data) * 1000; return value; } catch (err) { return null; } } }