UNPKG

crawlerzzz

Version:

48 lines (42 loc) 1.11 kB
import liveCrawler, { LinkType } from '../src/liveCrawler'; import { Page } from 'puppeteer'; import * as cheerio from 'cheerio'; import { propArraySelector } from '../src/htmlSelector'; const processPage = async (page: Page, link: LinkType) => { const content = await page.content(); let $ = cheerio.load(content); // console.log('content', content); }; liveCrawler({ domain: 'https://www.zhihu.com/collection/80502443', // startFrom: '/quotes', processPage, manualLogin: true, rateLimit: 3000, secondsWaitForLogin: 10, launchConfig: { headless: false, userDataDir: 'browsercache' } //@ts-ignore }).then(({ pageSource$, link$, queueLink }) => { //@ts-ignore pageSource$.subscribe(res => { const src = res.src; const $ = cheerio.load(src); const nextPage = $('div.zm-invite-pager a') .last() .toArray() .map(a => $(a).attr('href')) .map(queueLink); let a = propArraySelector($, '.zm-item', { content: (q: any) => $('.zm-item-title a', q) .text() .trim() }); const lk = $('.next_page').attr('href'); queueLink(lk); console.log(a); }); });