UNPKG

@crawlee/core

Version:

The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.

77 lines 3.07 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.GotScrapingHttpClient = void 0; const utils_1 = require("@crawlee/utils"); /** * A HTTP client implementation based on the `got-scraping` library. */ class GotScrapingHttpClient { /** * @inheritDoc */ async sendRequest(request) { const gotResult = await (0, utils_1.gotScraping)({ ...request, // `HttpCrawler` reads the cookies beforehand and sets them in `request.gotOptions`. // Using the `cookieJar` option directly would override that. cookieJar: undefined, retry: { limit: 0, ...request.retry, }, }); return { ...gotResult, body: gotResult.body, request: { url: request.url, ...gotResult.request }, }; } /** * @inheritDoc */ async stream(request, handleRedirect) { // eslint-disable-next-line no-async-promise-executor return new Promise(async (resolve, reject) => { const stream = await Promise.resolve((0, utils_1.gotScraping)({ ...request, isStream: true, cookieJar: undefined })); stream.on('redirect', (updatedOptions, redirectResponse) => { handleRedirect?.(redirectResponse, updatedOptions); }); // We need to end the stream for DELETE requests, otherwise it will hang. if (request.method && ['DELETE', 'delete'].includes(request.method)) { stream.end(); } stream.on('error', reject); stream.on('response', (response) => { const result = { stream, request, redirectUrls: response.redirectUrls, url: response.url, ip: response.ip, statusCode: response.statusCode, headers: response.headers, trailers: response.trailers, complete: response.complete, get downloadProgress() { return stream.downloadProgress; }, get uploadProgress() { return stream.uploadProgress; }, }; Object.assign(result, response); // TODO BC - remove in 4.0 resolve(result); stream.on('end', () => { var _a; result.complete = response.complete; result.trailers ?? (result.trailers = {}); Object.assign(result.trailers, response.trailers); (_a = result).rawTrailers ?? (_a.rawTrailers = []); // TODO BC - remove in 4.0 Object.assign(result.rawTrailers, response.rawTrailers); }); }); }); } } exports.GotScrapingHttpClient = GotScrapingHttpClient; //# sourceMappingURL=got-scraping-http-client.js.map