UNPKG

jarviscrawlercore

Version:
364 lines (294 loc) 8.32 kB
const log = require('../log'); const {sleep, closeAllPagesEx} = require('../utils'); const {WaitAllResponse} = require('../waitallresponse'); const {WaitFrameNavigated} = require('../waitframenavigated'); const { getProducts, waitAllProducts, checkNeedLogin, login, } = require('./utils'); /** * procMainCategory2 - process main category2 * @param {object} page - page * @param {int} categoryIndex - index in category * @param {int} categoryIndex2 - index in category2 * @param {object} waitchgpage - WaitFrameNavigated * @param {object} waitAllResponse - WaitAllResponse * @param {number} timeout - timeout in microseconds * @return {object} ret - {error, ret} */ async function procMainCategory2( page, categoryIndex, categoryIndex2, waitchgpage, waitAllResponse, timeout, ) { const awaiterr = undefined; const lstcategory2 = await page .$$eval('.pub-threeiO.pub-threeiS', (eles) => { const lstcategory2 = []; if (eles.length > 0) { const lsta = eles[0].getElementsByTagName('a'); for (let i = 0; i < lsta.length; ++i) { const cc = { name: lsta[i].innerText, force: false, }; for (let j = 0; j < lsta[i].classList.length; ++j) { if (lsta[i].classList[j] == 'pub-threeiK') { cc.force = true; } } lstcategory2.push(cc); } } return lstcategory2; }) .catch((err) => { awaiterr = err; }); if (awaiterr) { return {error: awaiterr}; } if (categoryIndex2 >= 0 && categoryIndex2 < lstcategory2.length) { if (!lstcategory2[categoryIndex2].force) { const lstc = await page.$$('.pub-threeiQ').catch((err) => { awaiterr = err; }); if (awaiterr) { return {error: awaiterr}; } if (lstc.length <= categoryIndex2) { return { error: new Error( 'alimamaGetTop.procMainCategory2 invalid $$(.pub-threeiQ).', ), }; } await lstc[categoryIndex2].hover(); await sleep(50); waitchgpage.resetex(); waitAllResponse.reset(); await lstc[categoryIndex2].click(); await waitchgpage.waitDone(timeout); await waitAllResponse.waitDone(timeout); } const retWaitAllProducts = await waitAllProducts( page, waitAllResponse, timeout, ); if (retWaitAllProducts) { await page.close(); return {error: retWaitAllProducts}; } const retp = await getProducts(page); if (retp.error) { return {error: retp.error}; } return { ret: { category: lstcategory2[categoryIndex2].name, products: retp.lst, }, }; } return {error: new Error('alimamaGetTop.procMainCategory2 no data.')}; } /** * procMainCategory - process main category * @param {object} page - page * @param {int} categoryIndex - index in category * @param {object} waitchgpage - WaitFrameNavigated * @param {object} waitAllResponse - WaitAllResponse * @param {number} timeout - timeout in microseconds * @return {object} ret - {error, ret} */ async function procMainCategory( page, categoryIndex, waitchgpage, waitAllResponse, timeout, ) { let awaiterr = undefined; const lstcategory = await page .$$eval('.pub-threeiI', (eles) => { const lstcategory = []; for (let i = 0; i < eles.length; ++i) { const cc = { name: eles[i].innerText, force: false, }; for (let j = 0; j < eles[i].classList.length; ++j) { if (eles[i].classList[j] == 'pub-threeiK') { cc.force = true; } } lstcategory.push(cc); } return lstcategory; }) .catch((err) => { awaiterr = err; }); if (awaiterr) { return {error: awaiterr}; } if (categoryIndex >= 0 && categoryIndex < lstcategory.length) { if (!lstcategory[categoryIndex].force) { const lstc = await page.$$('.pub-threeiI').catch((err) => { awaiterr = err; }); if (awaiterr) { return {error: awaiterr}; } if (lstc.length <= categoryIndex) { return { error: new Error( 'alimamaGetTop.procMainCategory invalid $$(.pub-threeiI).', ), }; } await lstc[categoryIndex].hover(); await sleep(50); waitchgpage.resetex(); waitAllResponse.reset(); await lstc[categoryIndex].click(); await waitchgpage.waitDone(timeout); await waitAllResponse.waitDone(timeout); } const maxcategory2 = await page .$$eval('.pub-threeiO.pub-threeiS', (eles) => { if (eles.length > 0) { const lsta = eles[0].getElementsByTagName('a'); return lsta.length; } return 0; }) .catch((err) => { awaiterr = err; }); if (awaiterr) { return {error: awaiterr}; } const lst = []; for (let i = 0; i < maxcategory2; ++i) { const cr = await procMainCategory2( page, categoryIndex, i, waitchgpage, waitAllResponse, timeout, ); if (cr.error) { return {error: cr.error}; } lst.push({ category: [lstcategory[categoryIndex].name, cr.ret.category], products: cr.ret.products, }); } return {ret: lst}; } return {error: new Error('alimamaGetTop.procMainCategory no data.')}; } /** * alimamaGetTop - alimama get top products * @param {object} browser - browser * @param {object} cfg - alimama config * @param {number} timeout - timeout in microseconds * @return {object} ret - {error, ret} */ async function alimamaGetTop(browser, cfg, timeout) { let awaiterr = undefined; const page = await browser.newPage(); const url = 'https://pub.alimama.com/promo/search/index.htm'; checkNeedLogin(page, url); const waitAllResponse = new WaitAllResponse(page); const mainframe = await page.mainFrame(); const waitchgpage = new WaitFrameNavigated(page, mainframe, async (frame) => { const cururl = frame.url(); return cururl.indexOf(url) == 0; }); await page .setViewport({ width: 1280, height: 600, deviceScaleFactor: 1, }) .catch((err) => { awaiterr = err; }); if (awaiterr) { log.error('alimamaGetTop.setViewport', awaiterr); await page.close(); return {error: awaiterr.toString()}; } await page .goto(url, { timeout: timeout, }) .catch((err) => { awaiterr = err; }); if (awaiterr) { log.error('alimamaGetTop.goto', awaiterr); await page.close(); return {error: awaiterr.toString()}; } const isok = await waitAllResponse.waitDone(timeout); if (!isok) { const err = new Error('alimamaGetTop.waitDone timeout.'); log.error('alimamaGetTop.waitDone ', err); await page.close(); return {error: err}; } waitAllResponse.reset(); if (cfg) { const loginret = await login(page, cfg.username, cfg.password); if (loginret.err) { log.error('alimamaGetTop.login ', loginret.err); await page.close(); return {error: loginret.err}; } const isok = await waitAllResponse.waitDone(timeout); if (!isok) { const err = new Error('alimamaGetTop.waitDone timeout.'); log.error('alimamaGetTop.waitDone ', err); await page.close(); return {error: err}; } waitAllResponse.reset(); } const maxcategory = await page .$$eval('.pub-threeiI', (eles) => { return eles.length; }) .catch((err) => { awaiterr = err; }); const lst = []; for (let i = 0; i < maxcategory; ++i) { const cr = await procMainCategory( page, i, waitchgpage, waitAllResponse, timeout, ); if (cr.error) { return {error: cr.error}; } lst.push(...cr.ret); } console.log(lst); await page.close(); await closeAllPagesEx(browser, 6); return {ret: {lst: lst}}; } exports.alimamaGetTop = alimamaGetTop;