oxylabs-ai-studio
Version:
JavaScript SDK for Oxylabs AI Studio API services
88 lines • 3.26 kB
JavaScript
/**
* AI-Scraper Service
* Handles all AI-Scraper related API calls
*/
export class AiScraperService {
constructor(client) {
this.client = client;
}
/**
* Generate schema for scraping (POST /scrape/schema)
*/
async generateSchema(options) {
return await this.client.post('/scrape/schema', options);
}
/**
* Submit scraping request (POST /scrape)
*/
async submitScrapeRequest(options) {
const payload = {
url: options.url,
output_format: options.output_format || "markdown",
render_javascript: options.render_javascript || false,
geo_location: options.geo_location || undefined,
};
if (options.user_agent) {
payload.user_agent = options.user_agent;
}
// Only include openapi_schema if output_format is json or csv
if ((options.output_format === "json" || options.output_format === "csv" || options.output_format === "toon") && options.schema) {
payload.openapi_schema = options.schema;
}
return await this.client.post('/scrape', payload);
}
/**
* Get scraping run data/results (GET /scrape/run/data)
*/
async getScrapeRunData(runId) {
if (!runId) {
throw new Error('run_id is required');
}
const params = new URLSearchParams();
params.append('run_id', runId);
const url = `/scrape/run/data?${params.toString()}`;
return await this.client.get(url);
}
/**
* Synchronous scraping (wait for results)
*/
async scrape(options, timeout = 60000, pollInterval = 5000) {
const submitResult = await this.submitScrapeRequest(options);
const runId = submitResult.run_id || submitResult.id;
if (!runId) {
throw new Error('No run ID returned from scrape request');
}
const startTime = Date.now();
while (Date.now() - startTime < timeout) {
const runData = await this.getScrapeRunData(runId);
const status = runData?.status;
console.log('Run status:', status);
if (status === 'completed') {
return runData;
}
else if (status === 'failed') {
throw new Error(`Scraping failed: ${runData?.error_code || runData?.message || 'Unknown error'}`);
}
await new Promise(resolve => setTimeout(resolve, pollInterval));
}
throw new Error(`Scraping timeout after ${timeout}ms`);
}
/**
* Complete workflow with auto-schema and sync results
*/
async scrapeWithAutoSchema(options, timeout = 60000) {
// Generate schema first
const schemaResult = await this.generateSchema({
user_prompt: options.parse_prompt
});
// Then perform synchronous scraping
return await this.scrape({
url: options.url,
output_format: options.output_format || "markdown",
schema: schemaResult.openapi_schema,
render_javascript: options.render_javascript || false,
geo_location: options.geo_location || undefined
}, timeout);
}
}
//# sourceMappingURL=aiScraper.js.map