@hyperbrowser/sdk
Version:
Node SDK for Hyperbrowser API
228 lines (227 loc) • 8.19 kB
JavaScript
Object.defineProperty(exports, "__esModule", { value: true });
exports.ScrapeService = exports.BatchScrapeService = void 0;
const base_1 = require("./base");
const utils_1 = require("../utils");
const client_1 = require("../client");
const constants_1 = require("../types/constants");
class BatchScrapeService extends base_1.BaseService {
/**
* Start a new batch scrape job
* @param params The parameters for the batch scrape job
*/
async start(params) {
try {
return await this.request("/scrape/batch", {
method: "POST",
body: JSON.stringify(params),
});
}
catch (error) {
if (error instanceof client_1.HyperbrowserError) {
throw error;
}
throw new client_1.HyperbrowserError("Failed to start batch scrape job", undefined);
}
}
/**
* Get the status of a batch scrape job
* @param id The ID of the batch scrape job to get
*/
async getStatus(id) {
try {
return await this.request(`/scrape/batch/${id}/status`);
}
catch (error) {
if (error instanceof client_1.HyperbrowserError) {
throw error;
}
throw new client_1.HyperbrowserError(`Failed to get batch scrape job ${id} status`, undefined);
}
}
/**
* Get the details of a batch scrape job
* @param id The ID of the batch scrape job to get
* @param params Optional parameters to filter the batch scrape job
*/
async get(id, params) {
try {
return await this.request(`/scrape/batch/${id}`, undefined, {
page: params?.page,
batchSize: params?.batchSize,
});
}
catch (error) {
if (error instanceof client_1.HyperbrowserError) {
throw error;
}
throw new client_1.HyperbrowserError(`Failed to get batch scrape job ${id}`, undefined);
}
}
/**
* Start a batch scrape job and wait for it to complete
* @param params The parameters for the batch scrape job
* @param returnAllPages Whether to return all pages in the batch scrape job response
*/
async startAndWait(params, returnAllPages = true) {
const job = await this.start(params);
const jobId = job.jobId;
if (!jobId) {
throw new client_1.HyperbrowserError("Failed to start batch scrape job, could not get job ID");
}
let failures = 0;
let jobStatus = "pending";
while (true) {
try {
const { status } = await this.getStatus(jobId);
if (status === "completed" || status === "failed") {
jobStatus = status;
break;
}
failures = 0;
}
catch (error) {
failures++;
if (failures >= constants_1.POLLING_ATTEMPTS) {
throw new client_1.HyperbrowserError(`Failed to poll batch scrape job ${jobId} after ${constants_1.POLLING_ATTEMPTS} attempts: ${error}`);
}
}
await (0, utils_1.sleep)(2000);
}
failures = 0;
if (!returnAllPages) {
while (true) {
try {
return await this.get(jobId);
}
catch (error) {
failures++;
if (failures >= constants_1.POLLING_ATTEMPTS) {
throw new client_1.HyperbrowserError(`Failed to get batch scrape job ${jobId} after ${constants_1.POLLING_ATTEMPTS} attempts: ${error}`);
}
}
await (0, utils_1.sleep)(500);
}
}
failures = 0;
const jobResponse = {
jobId,
status: jobStatus,
data: [],
currentPageBatch: 0,
totalPageBatches: 0,
totalScrapedPages: 0,
batchSize: 100,
};
let firstCheck = true;
while (firstCheck || jobResponse.currentPageBatch < jobResponse.totalPageBatches) {
try {
const tmpJobResponse = await this.get(jobId, {
page: jobResponse.currentPageBatch + 1,
batchSize: 100,
});
if (tmpJobResponse.data) {
jobResponse.data?.push(...tmpJobResponse.data);
}
jobResponse.currentPageBatch = tmpJobResponse.currentPageBatch;
jobResponse.totalScrapedPages = tmpJobResponse.totalScrapedPages;
jobResponse.totalPageBatches = tmpJobResponse.totalPageBatches;
jobResponse.batchSize = tmpJobResponse.batchSize;
failures = 0;
firstCheck = false;
}
catch (error) {
failures++;
if (failures >= constants_1.POLLING_ATTEMPTS) {
throw new client_1.HyperbrowserError(`Failed to get batch page ${jobResponse.currentPageBatch + 1} for job ${jobId} after ${constants_1.POLLING_ATTEMPTS} attempts: ${error}`);
}
}
await (0, utils_1.sleep)(500);
}
return jobResponse;
}
}
exports.BatchScrapeService = BatchScrapeService;
class ScrapeService extends base_1.BaseService {
constructor(apiKey, baseUrl, timeout) {
super(apiKey, baseUrl, timeout);
this.batch = new BatchScrapeService(apiKey, baseUrl, timeout);
}
/**
* Start a new scrape job
* @param params The parameters for the scrape job
*/
async start(params) {
try {
return await this.request("/scrape", {
method: "POST",
body: JSON.stringify(params),
});
}
catch (error) {
if (error instanceof client_1.HyperbrowserError) {
throw error;
}
throw new client_1.HyperbrowserError("Failed to start scrape job", undefined);
}
}
/**
* Get the status of a scrape job
* @param id The ID of the scrape job to get
*/
async getStatus(id) {
try {
return await this.request(`/scrape/${id}/status`);
}
catch (error) {
if (error instanceof client_1.HyperbrowserError) {
throw error;
}
throw new client_1.HyperbrowserError(`Failed to get scrape job status ${id}`, undefined);
}
}
/**
* Get the details of a scrape job
* @param id The ID of the scrape job to get
*/
async get(id) {
try {
return await this.request(`/scrape/${id}`);
}
catch (error) {
if (error instanceof client_1.HyperbrowserError) {
throw error;
}
throw new client_1.HyperbrowserError(`Failed to get scrape job ${id}`, undefined);
}
}
/**
* Start a scrape job and wait for it to complete
* @param params The parameters for the scrape job
*/
async startAndWait(params) {
const job = await this.start(params);
const jobId = job.jobId;
if (!jobId) {
throw new client_1.HyperbrowserError("Failed to start scrape job, could not get job ID");
}
let failures = 0;
while (true) {
try {
const { status } = await this.getStatus(jobId);
if (status === "completed" || status === "failed") {
return await this.get(jobId);
}
failures = 0;
}
catch (error) {
failures++;
if (failures >= constants_1.POLLING_ATTEMPTS) {
throw new client_1.HyperbrowserError(`Failed to poll scrape job ${jobId} after ${constants_1.POLLING_ATTEMPTS} attempts: ${error}`);
}
}
await (0, utils_1.sleep)(2000);
}
}
}
exports.ScrapeService = ScrapeService;
;