scrapper-tools

Version:

Modern way to scrape modern websites

72 lines (56 loc) • 1.84 kB

text/typescript

import { Page } from "playwright" import _ from "lodash" import AsyncLock from "async-lock" let lock = new AsyncLock({ maxPending: 5000 }) let hooks: Array<(page: Page, config: any) => any> = [] export async function browserRequest(page: Page, config: any = {}) { for (let hook of hooks) { await hook(page, config) } if (!config.url) { throw "URL is not given. Please provide Url" } let defaultCfg: any = { credentials: "include", headers: {}, body: null, redirect: "follow", mode: "cors", } if (!config.method) { defaultCfg.method = "GET" } let fetchConfig = _.merge({}, defaultCfg, config) let evaluated = await page.evaluate(async (fetchConfig) => { let res = await fetch(fetchConfig.url, fetchConfig) let toRet = await res.text() return toRet }, fetchConfig) return evaluated } // Async just in case in future we need to add some await here export async function addBrowserRequestHooks(func: (page: Page, config: any) => any) { hooks.push(func) } export async function singleBrowserRequest(page: Page, config: any = {}) { return await lock.acquire("singleBrowserRequest", async function singleBrowserRequestLock() { return await browserRequest(page, config) }) } let concurrentRequestId = 0 export async function concurrentBrowserRequest(page: Page, concurrency: number, config: any = {}) { concurrentRequestId = (concurrentRequestId + 1) % concurrency return await lock.acquire( "singleBrowserRequest" + concurrentRequestId, async function singleBrowserRequestLock() { return await browserRequest(page, config) } ) } export async function jsonBrowserRequest(a: Page, b: any) { if (!b.headers) { b.headers = {} } b.headers["content-type"] = "application/json;charset=UTF-8" return browserRequest(a, b) }