UNPKG

jarviscrawlercore

Version:
1,488 lines (1,227 loc) 33.4 kB
const fs = require('fs'); const path = require('path'); const messages = require('../pbjs/result_pb'); const AdmZip = require('adm-zip'); const crypto = require('crypto'); const log = require('./log'); /** * save protobuf message * @param {string} filename - output file * @param {string} msg - message */ function saveMessage(filename, msg) { fs.writeFileSync(filename, msg.serializeBinary()); } /** * save protobuf message with zip * @param {string} filename - output file * @param {string} msg - message */ function saveZipMessage(filename, msg) { const zip = new AdmZip(); zip.addFile('msg.pb', msg.serializeBinary()); zip.writeZip(filename); } /** * hash md5 * @param {buffer} buf - buffer * @return {string} md5 - md5 string */ function hashMD5(buf) { return crypto.createHash('md5').update(buf).digest('hex'); } /** * set ImageInfo with img * @param {object} img - img object * @param {object} mapResponse - map response * @param {bool} isoutpurimages - is output images * @return {ImageInfo} imginfo - imginfo */ function setImageInfo(img, mapResponse, isoutpurimages) { if (mapResponse[img.url]) { img.data = mapResponse[img.url]; img.hashName = hashMD5(img.data); if (isoutpurimages) { fs.writeFileSync('./output/' + img.hashName + '.jpg', img.data); } } return img; } /** * set ImageInfo with img * @param {string} url - url * @param {object} mapResponse - map response * @return {string} hashname - maybe undefined */ function getImageHashName(url, mapResponse) { if (mapResponse[url]) { return hashMD5(mapResponse[url]); } return undefined; } /** * new Paragraph with object * @param {object} obj - Paragraph object * @return {messages.Paragraph} paragraph - Paragraph */ function newParagraph(obj) { const result = new messages.Paragraph(); if (obj.pt) { result.setPt(obj.pt); } if (obj.imgHashName) { result.setImghashname(obj.imgHashName); } if (obj.text) { result.setText(obj.text); } if (obj.imgURL) { result.setImgURL(obj.imgURL); } return result; } /** * new ImageInfo with object * @param {object} obj - ImageInfo object * @return {messages.ImageInfo} imginfo - ImageInfo */ function newImageInfo(obj) { const result = new messages.ImageInfo(); if (obj.hashName) { result.setHashname(obj.hashName); } if (obj.url) { result.setUrl(obj.url); } if (obj.width) { result.setWidth(obj.width); } if (obj.height) { result.setHeight(obj.height); } if (obj.data) { result.setData(obj.data); } return result; } /** * new ExportArticleResult with object * @param {object} obj - ExportArticleResult object * @return {messages.ExportArticleResult} ear - ExportArticleResult */ function newExportArticleResult(obj) { const result = new messages.ExportArticleResult(); if (obj.title) { result.setTitle(obj.title); } if (obj.author) { result.setAuthor(obj.author); } if (obj.writeTime) { result.setWritetime(obj.writeTime); } if (obj.article) { result.setArticle(obj.article); } if (obj.url) { result.setUrl(obj.url); } if (obj.imgs) { for (let i = 0; i < obj.imgs.length; ++i) { result.addImgs(newImageInfo(obj.imgs[i]), i); } } if (obj.titleImage) { result.setTitleimage(newImageInfo(obj.titleImage)); } if (obj.tags) { for (let i = 0; i < obj.tags.length; ++i) { result.addTags(obj.tags[i], i); } } if (obj.paragraphs) { for (let i = 0; i < obj.paragraphs.length; ++i) { result.addParagraphs(newParagraph(obj.paragraphs[i]), i); } } if (obj.summary) { result.setSummary(obj.summary); } return result; } /** * new Article with object * @param {object} obj - Article object * @return {messages.Article} ear - Article */ function newArticle(obj) { const result = new messages.Article(); if (obj.title) { result.setTitle(obj.title); } if (obj.author) { result.setAuthor(obj.author); } if (obj.writeTime) { result.setWritetime(obj.writeTime); } if (obj.url) { result.setUrl(obj.url); } if (obj.image) { result.setTitleimage(newImageInfo(obj.image)); } if (obj.summary) { result.setSummary(obj.summary); } if (obj.secondTitle) { result.setSecondtitle(obj.secondTitle); } return result; } /** * new ArticleList with object * @param {object} obj - ArticleList object * @return {messages.ArticleList} ear - ArticleList */ function newArticleList(obj) { const result = new messages.ArticleList(); if (Array.isArray(obj.articles)) { for (let i = 0; i < obj.articles.length; ++i) { result.addArticles(newArticle(obj.articles[i]), i); } } return result; } /** * attachJQuery * @param {object} page - page */ async function attachJQuery(page) { const jquery = await page.evaluate(() => { return typeof $; }); if (jquery !== 'function') { await page .addScriptTag({path: './browser/jquery3.3.1.min.js'}) .catch((err) => { log.error('attachJQuery:addScriptTag', err); }); await page.waitForFunction('typeof $ === "function"').catch((err) => { log.error('attachJQuery:waitForFunction', err); }); } } /** * attachJarvisCrawlerCore * @param {object} page - page */ async function attachJarvisCrawlerCore(page) { // await page.waitForFunction(() => { // document.head !== null; // }); await page.addScriptTag({path: './browser/utils.js'}).catch((err) => { log.error('attachJarvisCrawlerCore:addScriptTag', err); // isok = false; }); await page .waitForFunction('typeof jarvisCrawlerCoreVer === "string"') .catch((err) => { log.error('attachJQuery:waitForFunction', err); }); } /** * new DTBusinessGameReport with object * @param {object} obj - DTBusinessGameReport object * @return {messages.DTBusinessGameReport} result - DTBusinessGameReport */ function newDTBusinessGameReport(obj) { const result = new messages.DTBusinessGameReport(); if (obj.businessid) { result.setBusinessid(obj.businessid); } if (obj.gamecode) { result.setGamecode(obj.gamecode); } if (obj.totalWin) { result.setTotalwin(obj.totalWin); } if (obj.totalBet) { result.setTotalbet(obj.totalBet); } if (obj.gameNums) { result.setGamenums(obj.gameNums); } if (obj.currency) { result.setCurrency(obj.currency); } return result; } /** * new DTTodayGameData with object * @param {object} obj - DTTodayGameData object * @return {messages.DTTodayGameData} result - DTTodayGameData */ function newDTTodayGameData(obj) { const result = new messages.DTTodayGameData(); if (obj.totalWin) { result.setTotalwin(obj.totalWin); } if (obj.totalBet) { result.setTotalbet(obj.totalBet); } if (obj.gameNums) { result.setGamenums(obj.gameNums); } return result; } /** * new newDTGameResultErr with object * @param {DTGameResultErrCode} errcode - DTGameResultErrCode * @param {number} value0 - int64 value * @param {number} value1 - int64 value * @param {string} strval0 - string value * @return {messages.DTGameResultErr} result - DTGameResultErr */ function newDTGameResultErr(errcode, value0, value1, strval0) { const result = new messages.DTGameResultErr(); result.setErrcode(errcode); if (value0) { result.setValue0(value0); } if (value1) { result.setValue1(value1); } if (strval0) { result.setStrval0(strval0); } return result; } /** * new printDTGameResultErr * @param {string} str - string * @param {DTGameResultErr} err - DTGameResultErr */ function printDTGameResultErr(str, err) { if (err.getValue0() || err.getValue1() || err.getStrval0()) { log.error( str + ' [ errcode: ' + err.getErrcode() + ' v0: ' + err.getValue0() + ' v1: ' + err.getValue1() + ' strv0: ' + err.getStrval0() + ' ]', ); } else { log.error(str + ' [ errcode: ' + err.getErrcode() + ' ]'); } } /** * new DTGPKGameResult with object * @param {object} obj - DTGPKGameResult object * @return {messages.DTGPKGameResult} result - DTGPKGameResult */ function newDTGPKGameResult(obj) { const result = new messages.DTGPKGameResult(); if (obj.id) { result.setId(obj.id); } if (obj.businessid) { result.setBusinessid(obj.businessid); } if (obj.playername) { result.setPlayername(obj.playername); } if (obj.gamecode) { result.setGamecode(obj.gamecode); } if (typeof obj.win === 'number') { result.setWin(obj.win); } if (typeof obj.bet === 'number') { result.setBet(obj.bet); } if (typeof obj.off === 'number') { result.setOff(obj.off); } if (typeof obj.lines === 'number') { result.setLines(obj.lines); } if (typeof obj.moneystart === 'number') { result.setMoneystart(obj.moneystart); } if (typeof obj.moneyend === 'number') { result.setMoneyend(obj.moneyend); } if (obj.playerip) { result.setPlayerip(obj.playerip); } if (obj.datastate) { result.setDatastate(obj.datastate); } if (obj.gametime) { result.setGametime(obj.gametime); } if (obj.clienttype) { result.setClienttype(obj.clienttype); } if (obj.currency) { result.setCurrency(obj.currency); } if (typeof obj.iscomplete === 'bool') { result.setIscomplete(obj.iscomplete); } if (obj.giftfreeid) { result.setGiftfreeid(obj.giftfreeid); } if (obj.gamedata) { result.setGamedata(obj.gamedata); } if (obj.gameresult) { result.setGameresult(obj.gameresult); } if (typeof obj.hassubgame === 'bool') { result.setHassubgame(obj.hassubgame); } if (obj.err) { result.setErr(obj.err); } if (obj.dtbaseid) { result.setDtbaseid(obj.dtbaseid); } if (typeof obj.rootgame === 'bool') { result.setRootgame(obj.rootgame); } if (Array.isArray(obj.children)) { for (let i = 0; i < obj.children.length; ++i) { result.addChildren(newDTGPKGameResult(obj.children[i]), i); } } return result; } /** * new DTGPKCheckGameResult with object * @param {object} obj - DTGPKCheckGameResult object * @return {messages.DTGPKCheckGameResult} result - DTGPKCheckGameResult */ function newDTGPKCheckGameResult(obj) { const result = new messages.DTGPKCheckGameResult(); for (let i = 0; i < obj.lst.length; ++i) { result.addLst(newDTGPKGameResult(obj.lst[i]), i); } if (obj.errnums) { result.setErrnums(obj.errnums); } return result; } /** * print DTGPKCheckGameResult * @param {DTGPKCheckGameResult} result - DTGPKCheckGameResult */ function printDTGPKCheckGameResult(result) { const lst = result.getLstList(); for (let i = 0; i < lst.length; ++i) { if (lst[i].getErr()) { printDTGameResultErr(lst[i].getId(), lst[i].getErr()); } const children = lst[i].getChildrenList(); for (let j = 0; j < children.length; ++j) { if (children[j].getErr()) { printDTGameResultErr(children[j].getId(), children[j].getErr()); } } } } /** * new CrunchBaseOrganization with object * @param {object} obj - CrunchBaseOrganization object * @return {messages.CrunchBaseOrganization} result - CrunchBaseOrganization */ function newCrunchBaseOrganization(obj) { const result = new messages.CrunchBaseOrganization(); if (obj.name) { result.setName(obj.name); } if (obj.code) { result.setCode(obj.code); } if (Array.isArray(obj.categories)) { result.setCategoriesList(obj.categories); } if (Array.isArray(obj.headquartersRegions)) { result.setHeadquartersregionsList(obj.headquartersRegions); } if (obj.foundeddate) { result.setFoundeddate(obj.foundeddate); } if (Array.isArray(obj.founders)) { result.setFoundersList(obj.founders); } if (obj.operatingstatus) { result.setOperatingstatus(obj.operatingstatus); } if (obj.fundingstatus) { result.setFundingstatus(obj.fundingstatus); } if (obj.lastfundingtype) { result.setLastfundingtype(obj.lastfundingtype); } if (obj.legalname) { result.setLegalname(obj.legalname); } if (obj.stocksymbol) { result.setStocksymbol(obj.stocksymbol); } if (obj.valuationipo) { result.setValuationipo(obj.valuationipo); } if (obj.priceipo) { result.setPriceipo(obj.priceipo); } if (obj.dateipo) { result.setDateipo(obj.dateipo); } if (obj.moneyraisedipo) { result.setMoneyraisedipo(obj.moneyraisedipo); } if (Array.isArray(obj.fundingrounds)) { for (let i = 0; i < obj.fundingrounds.length; ++i) { result.addFundingrounds( newCrunchBaseFundingRound(obj.fundingrounds[i]), i, ); } // result.setFoundersList(obj.fundingrounds); } return result; } /** * new CrunchBaseFundingRound with object * @param {object} obj - CrunchBaseFundingRound object * @return {messages.CrunchBaseFundingRound} result - CrunchBaseFundingRound */ function newCrunchBaseFundingRound(obj) { const result = new messages.CrunchBaseFundingRound(); if (obj.announceddate) { result.setAnnounceddate(obj.announceddate); } if (obj.transactionname) { result.setTransactionname(obj.transactionname); } if (obj.moneyraised) { result.setMoneyraised(obj.moneyraised); } if (Array.isArray(obj.investors)) { for (let i = 0; i < obj.investors.length; ++i) { result.addInvestors(newCrunchBaseInvestor(obj.investors[i]), i); } } return result; } /** * new CrunchBaseInvestor with object * @param {object} obj - CrunchBaseInvestor object * @return {messages.CrunchBaseInvestor} result - CrunchBaseInvestor */ function newCrunchBaseInvestor(obj) { const result = new messages.CrunchBaseInvestor(); if (obj.investorname) { result.setInvestorname(obj.investorname); } if (obj.leadinvestor) { result.setLeadinvestor(obj.leadinvestor); } if (Array.isArray(obj.partners)) { result.setPartnersList(obj.partners); } return result; } /** * new ReplyAnalyzePage with object * @param {object} obj - ReplyAnalyzePage object * @return {messages.ReplyAnalyzePage} result - ReplyAnalyzePage */ function newReplyAnalyzePage(obj) { const result = new messages.ReplyAnalyzePage(); if (obj.pageTime) { result.setPagetime(obj.pageTime); } if (obj.pageBytes) { result.setPagebytes(obj.pageBytes); } if (Array.isArray(obj.errs) && obj.errs.length > 0) { result.setErrsList(obj.errs); } if (Array.isArray(obj.reqs)) { for (let i = 0; i < obj.reqs.length; ++i) { result.addReqs(newAnalyzeReqInfo(obj.reqs[i]), i); } } if (Array.isArray(obj.screenshots)) { for (let i = 0; i < obj.screenshots.length; ++i) { result.addScreenshots(newAnalyzeScreenshot(obj.screenshots[i]), i); } } if (Array.isArray(obj.logs) && obj.logs.length > 0) { result.setLogsList(obj.logs); } return result; } /** * new ReplyGeoIP with object * @param {object} obj - ReplyGeoIP object * @return {messages.ReplyGeoIP} result - ReplyGeoIP */ function newReplyGeoIP(obj) { const result = new messages.ReplyGeoIP(); if (obj.latitude) { result.setLatitude(obj.latitude); } if (obj.longitude) { result.setLongitude(obj.longitude); } if (obj.organization) { result.setOrganization(obj.organization); } if (obj.asn) { result.setAsn(obj.asn); } if (obj.continent) { result.setContinent(obj.continent); } if (obj.country) { result.setCountry(obj.country); } if (obj.region) { result.setRegion(obj.region); } if (obj.city) { result.setCity(obj.city); } if (obj.hostname) { result.setHostname(obj.hostname); } return result; } /** * new TechInAsiaCompany with object * @param {object} obj - TechInAsiaCompany object * @return {messages.TechInAsiaCompany} result - TechInAsiaCompany */ function newTechInAsiaCompany(obj) { const result = new messages.TechInAsiaCompany(); if (obj.name) { result.setName(obj.name); } if (obj.avatar) { result.setAvatar(obj.avatar); } if (obj.organization) { result.setOrganization(obj.organization); } if (Array.isArray(obj.location) && obj.location.length > 0) { result.setLocationList(obj.location); } if (Array.isArray(obj.categories) && obj.categories.length > 0) { result.setCategoriesList(obj.categories); } if (obj.employees) { result.setEmployees(obj.employees); } if (obj.introduction) { result.setIntroduction(obj.introduction); } if (Array.isArray(obj.links) && obj.links.length > 0) { result.setLinksList(obj.links); } if (obj.companyCode) { result.setCompanycode(obj.companyCode); } return result; } /** * new TechInAsiaJob with object * @param {object} obj - TechInAsiaJob object * @return {messages.TechInAsiaJob} result - TechInAsiaJob */ function newTechInAsiaJob(obj) { const result = new messages.TechInAsiaJob(); if (obj.companyName) { result.setCompanyname(obj.companyName); } if (obj.title) { result.setTitle(obj.title); } if (Array.isArray(obj.location) && obj.location.length > 0) { result.setLocationList(obj.location); } if (obj.minSalary) { result.setMinsalary(obj.minSalary); } if (obj.maxSalary) { result.setMaxsalary(obj.maxSalary); } if (obj.currency) { result.setCurrency(obj.currency); } if (obj.createTime) { result.setCreatetime(obj.createTime); } if (obj.updateTime) { result.setUpdatetime(obj.updateTime); } if (obj.jobFunction) { result.setJobfunction(obj.jobFunction); } if (obj.jobType) { result.setJobtype(obj.jobType); } if (obj.experience) { result.setExperience(obj.experience); } if (obj.vacancies) { result.setVacancies(obj.vacancies); } if (obj.description) { result.setDescription(obj.description); } if (Array.isArray(obj.requiredSkills) && obj.requiredSkills.length > 0) { result.setRequiredskillsList(obj.requiredSkills); } if (obj.culture) { result.setCulture(obj.culture); } if (obj.companyCode) { result.setCompanycode(obj.companyCode); } if (obj.jobCode) { result.setJobcode(obj.jobCode); } if (Array.isArray(obj.subType) && obj.subType.length > 0) { result.setSubtypeList(obj.subType); } return result; } /** * new TechInAsiaJobList with object * @param {object} obj - TechInAsiaJobList object * @return {messages.TechInAsiaJobList} result - TechInAsiaJobList */ function newTechInAsiaJobList(obj) { const result = new messages.TechInAsiaJobList(); if (Array.isArray(obj.jobs) && obj.jobs.length > 0) { for (let i = 0; i < obj.jobs.length; ++i) { result.addJobs(newTechInAsiaJob(obj.jobs[i], i)); } } return result; } /** * new TechInAsiaJobTag with object * @param {object} obj - TechInAsiaJobTag object * @return {messages.TechInAsiaJobTag} result - TechInAsiaJobTag */ function newTechInAsiaJobTag(obj) { const result = new messages.TechInAsiaJobTag(); if (obj.tag) { result.setTag(obj.tag); } if (Array.isArray(obj.subTags) && obj.subTags.length > 0) { result.setSubtagsList(obj.subTags); } return result; } /** * new TechInAsiaJobTagList with object * @param {object} obj - TechInAsiaJobTagList object * @return {messages.TechInAsiaJobTagList} result - TechInAsiaJobTagList */ function newTechInAsiaJobTagList(obj) { const result = new messages.TechInAsiaJobTagList(); if (Array.isArray(obj.tags) && obj.tags.length > 0) { for (let i = 0; i < obj.tags.length; ++i) { result.addTags(newTechInAsiaJobTag(obj.tags[i], i)); } } return result; } /** * new ReplyTechInAsia with object * @param {number} mode - messages.TechInAsiaMode * @param {object} obj - TechInAsiaJob or TechInAsiaCompany object * @return {messages.ReplyTechInAsia} result - ReplyTechInAsia */ function newReplyTechInAsia(mode, obj) { const result = new messages.ReplyTechInAsia(); result.setMode(mode); if (mode == messages.TechInAsiaMode.TIAM_JOB) { result.setJob(newTechInAsiaJob(obj)); } else if (mode == messages.TechInAsiaMode.TIAM_COMPANY) { result.setCompany(newTechInAsiaCompany(obj)); } else if (mode == messages.TechInAsiaMode.TIAM_JOBLIST) { result.setJobs(newTechInAsiaJobList(obj)); } else if (mode == messages.TechInAsiaMode.TIAM_JOBTAG) { result.setTags(newTechInAsiaJobTagList(obj)); } return result; } /** * new AnalyzeReqInfo with object * @param {object} obj - AnalyzeReqInfo object * @return {messages.AnalyzeReqInfo} result - AnalyzeReqInfo */ function newAnalyzeReqInfo(obj) { const result = new messages.AnalyzeReqInfo(); if (obj.url) { result.setUrl(obj.url); } if (obj.downloadTime) { result.setDownloadtime(obj.downloadTime); } if (obj.bufBytes) { result.setBufbytes(obj.bufBytes); } if (obj.status) { result.setStatus(obj.status); } if (obj.startTime) { result.setStarttime(obj.startTime); } if (obj.isGZip) { result.setIsgzip(obj.isGZip); } if (obj.contentType) { result.setContenttype(obj.contentType); } if (obj.ipaddr) { result.setIpaddr(obj.ipaddr); } if (obj.remoteaddr) { result.setRemoteaddr(obj.remoteaddr); } if (obj.imgWidth) { result.setImgwidth(obj.imgWidth); } if (obj.imgHeight) { result.setImgheight(obj.imgHeight); } return result; } /** * new AnalyzeScreenshot with object * @param {object} obj - AnalyzeScreenshot object * @return {messages.AnalyzeScreenshot} result - AnalyzeScreenshot */ function newAnalyzeScreenshot(obj) { const result = new messages.AnalyzeScreenshot(); if (obj.name) { result.setName(obj.name); } if (obj.type) { result.setType(obj.type); } if (obj.buf) { result.setBuf(obj.buf); } if (obj.status) { result.setStatus(obj.status); } return result; } /** * sleep * @param {number} ms - ms * @return {Promise} result - */ async function sleep(ms) { return new Promise((resolve) => setTimeout(resolve, ms)); } /** * mouseMove * @param {object} page - page * @param {number} x - screen x * @param {number} y - screen y * @param {number} cx - client x * @param {number} cy - client y */ async function mouseMove(page, x, y, cx, cy) { await page .evaluate( (param) => { console.log(param); const e = new MouseEvent('mousemove', { screenX: param.x, screenY: param.y, clientX: param.cx, clientY: param.cy, }); document.body.dispatchEvent(e); }, {x: x, y: y, cx: cx, cy: cy}, ) .catch((err) => { log.error('mouseMove ' + err); }); } /** * mouseMoveToEle * @param {object} page - page * @param {string} selector - selector */ async function mouseMoveToEle(page, selector) { const ele = await page.$(selector).catch((err) => { log.error('mouseMoveToEle ' + err); }); if (ele) { const bbox = await ele.boundingBox(); log.debug(bbox); await page.mouse.move( Math.floor(bbox.x + bbox.width / 2), Math.floor(bbox.y + bbox.height / 2), ); } } /** * mouseMoveToEleEx * @param {object} page - page * @param {string} selector - selector * @param {function} isThis - async function (ElementHandle) bool */ async function mouseMoveToEleEx(page, selector, isThis) { const eles = await page.$$(selector).catch((err) => { log.error('mouseMoveToEleEx ' + err); }); for (let i = 0; i < eles.length; ++i) { if (await isThis(eles[i])) { const bbox = await eles[i].boundingBox(); log.debug(bbox); await page.mouse.move( Math.floor(bbox.x + bbox.width / 2), Math.floor(bbox.y + bbox.height / 2), ); return; } } } /** * mouseMoveToFrameEleEx * @param {object} page - page * @param {string} selector - selector * @param {function} isFrame - async function (Frame) bool * @param {function} isThis - async function (ElementHandle) bool */ async function mouseMoveToFrameEleEx(page, selector, isFrame, isThis) { const lstFrames = await page.frames(); for (let i = 0; i < lstFrames.length; ++i) { const frame = lstFrames[i]; if (isFrame(frame)) { const eles = await frame.$$(selector).catch((err) => { log.error('mouseMoveToFrameEleEx:$$(' + selector + ') ' + err); }); for (let j = 0; j < eles.length; ++j) { if (await isThis(eles[j])) { const bbox = await eles[j].boundingBox(); log.debug(bbox); await page.mouse.move( Math.floor(bbox.x + bbox.width / 2), Math.floor(bbox.y + bbox.height / 2), ); return; } } } } } /** * mouseClickEle * @param {object} page - page * @param {string} selector - selector */ async function mouseClickEle(page, selector) { const ele = await page.$(selector).catch((err) => { log.error('mouseClickEle ' + err); }); if (ele) { const bbox = await ele.boundingBox(); log.debug(bbox); await page.mouse.move( Math.floor(bbox.x + bbox.width / 2), Math.floor(bbox.y + bbox.height / 2), ); await page.mouse.down(); await page.mouse.up(); } } /** * mouseClickFrameEleEx * @param {object} page - page * @param {string} selector - selector * @param {function} isFrame - async function (Frame) bool * @param {function} isThis - async function (ElementHandle) bool */ async function mouseClickFrameEleEx(page, selector, isFrame, isThis) { const lstFrames = await page.frames(); for (let i = 0; i < lstFrames.length; ++i) { const frame = lstFrames[i]; if (isFrame(frame)) { const eles = await frame.$$(selector).catch((err) => { log.error('mouseClickFrameEleEx:$$(' + selector + ') ' + err); }); for (let j = 0; j < eles.length; ++j) { if (await isThis(eles[j])) { const bbox = await eles[j].boundingBox(); log.debug(bbox); await page.mouse.move( Math.floor(bbox.x + bbox.width / 2), Math.floor(bbox.y + bbox.height / 2), ); await page.mouse.down(); await page.mouse.up(); return; } } } } } /** * mouseHoldFrameEleEx * @param {object} page - page * @param {string} selector - selector * @param {function} isFrame - async function (Frame) bool * @param {function} isThis - async function (ElementHandle) bool * @param {number} timeHold - time to hold */ async function mouseHoldFrameEleEx(page, selector, isFrame, isThis, timeHold) { const lstFrames = await page.frames(); log.debug('mouseHoldFrameEleEx ' + lstFrames.length); for (let i = 0; i < lstFrames.length; ++i) { const frame = lstFrames[i]; if (frame != page.mainFrame() && isFrame(frame)) { const eles = await frame.$$(selector).catch((err) => { log.error('mouseHoldFrameEleEx:$$(' + selector + ') ' + err); }); for (let j = 0; j < eles.length; ++j) { if (await isThis(eles[j])) { const bbox = await eles[j].boundingBox(); log.debug(bbox); await page.mouse.move( Math.floor(bbox.x + bbox.width / 2), Math.floor(bbox.y + bbox.height / 2), ); await page.mouse.down(); await sleep(timeHold); await page.mouse.up(); return; } } } } } /** * hasChinese * @param {string} str - string * @return {bool} hasChinese - has Chinese */ function hasChinese(str) { const pattern = new RegExp('[\u4E00-\u9FA5\u3000-\u303F]+'); return pattern.test(str); } /** * findFrame * @param {object} page - page * @param {function} funcIsFrame - function funcIsFrame(frame) bool * @return {object} frame - frame */ async function findFrame(page, funcIsFrame) { while (true) { const frame = await page.frames().find(funcIsFrame); if (frame) { return frame; } sleep(1000); } return undefined; } /** * clearLocalStorage * @param {object} page - page * @return {error} err - error */ async function clearLocalStorage(page) { let awaiterr = undefined; await page .evaluate(() => { localStorage.clear(); }) .catch((err) => { awaiterr = err; }); return awaiterr; } /** * clearSessionStorage * @param {object} page - page * @return {error} err - error */ async function clearSessionStorage(page) { let awaiterr = undefined; await page .evaluate(() => { sessionStorage.clear(); }) .catch((err) => { awaiterr = err; }); return awaiterr; } /** * clearCookies * @param {object} page - page * @return {error} err - error */ async function clearCookies(page) { let awaiterr = undefined; await page .evaluate(() => { const cookies = document.cookie.split(';'); for (let i = 0; i < cookies.length; i++) { const cookie = cookies[i]; const eqPos = cookie.indexOf('='); const name = eqPos > -1 ? cookie.substr(0, eqPos) : cookie; document.cookie = name + '=;expires=Thu, 01 Jan 1970 00:00:00 GMT'; } }) .catch((err) => { awaiterr = err; }); return awaiterr; } /** * clearIndexedDB * @param {object} page - page * @return {error} err - error */ async function clearIndexedDB(page) { let awaiterr = undefined; await page .evaluate(() => { window.indexedDB .databases() .then((dbs) => { dbs.forEach((db) => { window.indexedDB.deleteDatabase(db.name); }); }) .catch((err) => { console.log('window.indexedDB.databases ', err); }); }) .catch((err) => { awaiterr = err; }); return awaiterr; } /** * closeAllPages * @param {object} browser - browser * @return {error} err - error */ async function closeAllPages(browser) { let awaiterr = undefined; const pages = await browser.pages().catch((err) => { awaiterr = err; }); for (let i = 0; i < pages.length; ++i) { if (pages[i].url() != 'about:blank') { await pages[i].close().catch((err) => { awaiterr = err; }); } } return awaiterr; } /** * closeAllPagesEx - close all pages if pages > nums * @param {object} browser - browser * @param {int} nums - nums * @return {error} err - error */ async function closeAllPagesEx(browser, nums) { let awaiterr = undefined; const pages = await browser.pages().catch((err) => { awaiterr = err; }); if (awaiterr) { return awaiterr; } if (pages.length > nums) { for (let i = 0; i < pages.length; ++i) { if (pages[i].url() != 'about:blank') { await pages[i].close().catch((err) => { awaiterr = err; }); } } } return awaiterr; } /** * printInfo - print infomation * @param {string} projname - project's name */ function printInfo(projname) { const proj = JSON.parse( fs.readFileSync(path.join(__dirname, '../package.json')), ); console.log(projname + ' (jarviscrawlercore) v' + proj.version + '.'); console.log( 'The project\'s url is https://github.com/zhs007/jarviscrawlercore .', ); console.log('Author is Zerro Zhao (zerrozhao@gmail.com).'); console.log(''); } exports.saveMessage = saveMessage; exports.saveZipMessage = saveZipMessage; exports.hashMD5 = hashMD5; exports.setImageInfo = setImageInfo; exports.getImageHashName = getImageHashName; exports.newParagraph = newParagraph; exports.newImageInfo = newImageInfo; exports.newExportArticleResult = newExportArticleResult; exports.newArticle = newArticle; exports.newArticleList = newArticleList; exports.attachJQuery = attachJQuery; exports.attachJarvisCrawlerCore = attachJarvisCrawlerCore; exports.newDTBusinessGameReport = newDTBusinessGameReport; exports.newDTTodayGameData = newDTTodayGameData; exports.newDTGPKGameResult = newDTGPKGameResult; exports.newDTGPKCheckGameResult = newDTGPKCheckGameResult; exports.newDTGameResultErr = newDTGameResultErr; exports.printDTGPKCheckGameResult = printDTGPKCheckGameResult; exports.newCrunchBaseOrganization = newCrunchBaseOrganization; exports.newCrunchBaseFundingRound = newCrunchBaseFundingRound; exports.newCrunchBaseInvestor = newCrunchBaseInvestor; exports.newReplyAnalyzePage = newReplyAnalyzePage; exports.mouseMove = mouseMove; exports.mouseMoveToEle = mouseMoveToEle; exports.mouseMoveToEleEx = mouseMoveToEleEx; exports.mouseMoveToFrameEleEx = mouseMoveToFrameEleEx; exports.mouseClickEle = mouseClickEle; exports.mouseClickFrameEleEx = mouseClickFrameEleEx; exports.mouseHoldFrameEleEx = mouseHoldFrameEleEx; exports.sleep = sleep; exports.hasChinese = hasChinese; exports.findFrame = findFrame; exports.newReplyGeoIP = newReplyGeoIP; exports.newReplyTechInAsia = newReplyTechInAsia; exports.clearCookies = clearCookies; exports.clearSessionStorage = clearSessionStorage; exports.clearLocalStorage = clearLocalStorage; exports.clearIndexedDB = clearIndexedDB; exports.closeAllPages = closeAllPages; exports.closeAllPagesEx = closeAllPagesEx; exports.printInfo = printInfo;