@crawlee/core
Version:
The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.
77 lines • 3.07 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.GotScrapingHttpClient = void 0;
const utils_1 = require("@crawlee/utils");
/**
* A HTTP client implementation based on the `got-scraping` library.
*/
class GotScrapingHttpClient {
/**
* @inheritDoc
*/
async sendRequest(request) {
const gotResult = await (0, utils_1.gotScraping)({
...request,
// `HttpCrawler` reads the cookies beforehand and sets them in `request.gotOptions`.
// Using the `cookieJar` option directly would override that.
cookieJar: undefined,
retry: {
limit: 0,
...request.retry,
},
});
return {
...gotResult,
body: gotResult.body,
request: { url: request.url, ...gotResult.request },
};
}
/**
* @inheritDoc
*/
async stream(request, handleRedirect) {
// eslint-disable-next-line no-async-promise-executor
return new Promise(async (resolve, reject) => {
const stream = await Promise.resolve((0, utils_1.gotScraping)({ ...request, isStream: true, cookieJar: undefined }));
stream.on('redirect', (updatedOptions, redirectResponse) => {
handleRedirect?.(redirectResponse, updatedOptions);
});
// We need to end the stream for DELETE requests, otherwise it will hang.
if (request.method && ['DELETE', 'delete'].includes(request.method)) {
stream.end();
}
stream.on('error', reject);
stream.on('response', (response) => {
const result = {
stream,
request,
redirectUrls: response.redirectUrls,
url: response.url,
ip: response.ip,
statusCode: response.statusCode,
headers: response.headers,
trailers: response.trailers,
complete: response.complete,
get downloadProgress() {
return stream.downloadProgress;
},
get uploadProgress() {
return stream.uploadProgress;
},
};
Object.assign(result, response); // TODO BC - remove in 4.0
resolve(result);
stream.on('end', () => {
var _a;
result.complete = response.complete;
result.trailers ?? (result.trailers = {});
Object.assign(result.trailers, response.trailers);
(_a = result).rawTrailers ?? (_a.rawTrailers = []); // TODO BC - remove in 4.0
Object.assign(result.rawTrailers, response.rawTrailers);
});
});
});
}
}
exports.GotScrapingHttpClient = GotScrapingHttpClient;
//# sourceMappingURL=got-scraping-http-client.js.map