UNPKG

@hyperbrowser/sdk

Version:

Node SDK for Hyperbrowser API

228 lines (227 loc) 8.19 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.ScrapeService = exports.BatchScrapeService = void 0; const base_1 = require("./base"); const utils_1 = require("../utils"); const client_1 = require("../client"); const constants_1 = require("../types/constants"); class BatchScrapeService extends base_1.BaseService { /** * Start a new batch scrape job * @param params The parameters for the batch scrape job */ async start(params) { try { return await this.request("/scrape/batch", { method: "POST", body: JSON.stringify(params), }); } catch (error) { if (error instanceof client_1.HyperbrowserError) { throw error; } throw new client_1.HyperbrowserError("Failed to start batch scrape job", undefined); } } /** * Get the status of a batch scrape job * @param id The ID of the batch scrape job to get */ async getStatus(id) { try { return await this.request(`/scrape/batch/${id}/status`); } catch (error) { if (error instanceof client_1.HyperbrowserError) { throw error; } throw new client_1.HyperbrowserError(`Failed to get batch scrape job ${id} status`, undefined); } } /** * Get the details of a batch scrape job * @param id The ID of the batch scrape job to get * @param params Optional parameters to filter the batch scrape job */ async get(id, params) { try { return await this.request(`/scrape/batch/${id}`, undefined, { page: params?.page, batchSize: params?.batchSize, }); } catch (error) { if (error instanceof client_1.HyperbrowserError) { throw error; } throw new client_1.HyperbrowserError(`Failed to get batch scrape job ${id}`, undefined); } } /** * Start a batch scrape job and wait for it to complete * @param params The parameters for the batch scrape job * @param returnAllPages Whether to return all pages in the batch scrape job response */ async startAndWait(params, returnAllPages = true) { const job = await this.start(params); const jobId = job.jobId; if (!jobId) { throw new client_1.HyperbrowserError("Failed to start batch scrape job, could not get job ID"); } let failures = 0; let jobStatus = "pending"; while (true) { try { const { status } = await this.getStatus(jobId); if (status === "completed" || status === "failed") { jobStatus = status; break; } failures = 0; } catch (error) { failures++; if (failures >= constants_1.POLLING_ATTEMPTS) { throw new client_1.HyperbrowserError(`Failed to poll batch scrape job ${jobId} after ${constants_1.POLLING_ATTEMPTS} attempts: ${error}`); } } await (0, utils_1.sleep)(2000); } failures = 0; if (!returnAllPages) { while (true) { try { return await this.get(jobId); } catch (error) { failures++; if (failures >= constants_1.POLLING_ATTEMPTS) { throw new client_1.HyperbrowserError(`Failed to get batch scrape job ${jobId} after ${constants_1.POLLING_ATTEMPTS} attempts: ${error}`); } } await (0, utils_1.sleep)(500); } } failures = 0; const jobResponse = { jobId, status: jobStatus, data: [], currentPageBatch: 0, totalPageBatches: 0, totalScrapedPages: 0, batchSize: 100, }; let firstCheck = true; while (firstCheck || jobResponse.currentPageBatch < jobResponse.totalPageBatches) { try { const tmpJobResponse = await this.get(jobId, { page: jobResponse.currentPageBatch + 1, batchSize: 100, }); if (tmpJobResponse.data) { jobResponse.data?.push(...tmpJobResponse.data); } jobResponse.currentPageBatch = tmpJobResponse.currentPageBatch; jobResponse.totalScrapedPages = tmpJobResponse.totalScrapedPages; jobResponse.totalPageBatches = tmpJobResponse.totalPageBatches; jobResponse.batchSize = tmpJobResponse.batchSize; failures = 0; firstCheck = false; } catch (error) { failures++; if (failures >= constants_1.POLLING_ATTEMPTS) { throw new client_1.HyperbrowserError(`Failed to get batch page ${jobResponse.currentPageBatch + 1} for job ${jobId} after ${constants_1.POLLING_ATTEMPTS} attempts: ${error}`); } } await (0, utils_1.sleep)(500); } return jobResponse; } } exports.BatchScrapeService = BatchScrapeService; class ScrapeService extends base_1.BaseService { constructor(apiKey, baseUrl, timeout) { super(apiKey, baseUrl, timeout); this.batch = new BatchScrapeService(apiKey, baseUrl, timeout); } /** * Start a new scrape job * @param params The parameters for the scrape job */ async start(params) { try { return await this.request("/scrape", { method: "POST", body: JSON.stringify(params), }); } catch (error) { if (error instanceof client_1.HyperbrowserError) { throw error; } throw new client_1.HyperbrowserError("Failed to start scrape job", undefined); } } /** * Get the status of a scrape job * @param id The ID of the scrape job to get */ async getStatus(id) { try { return await this.request(`/scrape/${id}/status`); } catch (error) { if (error instanceof client_1.HyperbrowserError) { throw error; } throw new client_1.HyperbrowserError(`Failed to get scrape job status ${id}`, undefined); } } /** * Get the details of a scrape job * @param id The ID of the scrape job to get */ async get(id) { try { return await this.request(`/scrape/${id}`); } catch (error) { if (error instanceof client_1.HyperbrowserError) { throw error; } throw new client_1.HyperbrowserError(`Failed to get scrape job ${id}`, undefined); } } /** * Start a scrape job and wait for it to complete * @param params The parameters for the scrape job */ async startAndWait(params) { const job = await this.start(params); const jobId = job.jobId; if (!jobId) { throw new client_1.HyperbrowserError("Failed to start scrape job, could not get job ID"); } let failures = 0; while (true) { try { const { status } = await this.getStatus(jobId); if (status === "completed" || status === "failed") { return await this.get(jobId); } failures = 0; } catch (error) { failures++; if (failures >= constants_1.POLLING_ATTEMPTS) { throw new client_1.HyperbrowserError(`Failed to poll scrape job ${jobId} after ${constants_1.POLLING_ATTEMPTS} attempts: ${error}`); } } await (0, utils_1.sleep)(2000); } } } exports.ScrapeService = ScrapeService;