parsera-ts
Version:
Official TypeScript SDK for Parsera.org API - Extract structured data from any webpage
300 lines (299 loc) • 9.6 kB
TypeScript
export interface ParseraRetryOptions {
/**
* Maximum number of retry attempts for failed requests
* Applies to network errors, timeouts, and rate limits
* @default 3
*/
maxRetries?: number;
/**
* Multiplier for exponential backoff between retries
* Each retry will wait: initialDelay * (backoffFactor ^ retryCount)
* Example: 1000ms, then 2000ms, then 4000ms
* @default 2
*/
backoffFactor?: number;
/**
* Initial delay (in milliseconds) before the first retry
* This delay will be multiplied by backoffFactor for subsequent retries
* @default 1000 (1 second)
*/
initialDelay?: number;
}
export interface ParseraAttribute {
/**
* Name of the attribute to extract
* This will be the key in the returned data object
*/
name: string;
/**
* Natural language description of what to extract
* Be as specific as possible about the data you want
*/
description: string;
}
export interface ParseraCookie {
/**
* Cookie properties as key-value pairs
*/
[key: string]: string;
/**
* SameSite attribute for the cookie
* Controls how the cookie behaves with cross-site requests
*/
sameSite: 'None' | 'Lax' | 'Strict';
}
export interface ParseraOptions {
/**
* Your Parsera API key
* Required for authentication with the API
*/
apiKey: string;
/**
* Base URL for the Parsera API
* Only change this if you're using a custom API endpoint
* @default "https://api.parsera.org/v1"
*/
baseUrl?: string;
/**
* Default country code for proxy servers
* Used when no specific proxyCountry is provided in the request
* @example "random" | "UnitedStates" | "UnitedKingdom" | "Germany" | "Japan" | "France" | "Canada"
* @default "UnitedStates"
* @see https://api.parsera.org/v1/proxy-countries for full list of supported countries
*/
defaultProxyCountry?: string;
/**
* Maximum time (in milliseconds) to wait for each API request
* If a request takes longer, it will be aborted and retried
* @default 30000 (30 seconds)
*/
timeout?: number;
/**
* Configuration for retry behavior on failed requests
* @see ParseraRetryOptions
*/
retryOptions?: ParseraRetryOptions;
}
export interface ExtractOptions {
/**
* URL of the webpage to extract data from
* Must be a valid HTTP/HTTPS URL
*/
url: string;
/**
* Attributes to extract from the webpage
* Can be either an array of ParseraAttribute objects
* or a Record of name-description pairs
*/
attributes: ParseraAttribute[] | Record<string, string>;
/**
* Country code for proxy server location
* Overrides the defaultProxyCountry setting
* @example "UnitedStates" | "UnitedKingdom" | "Germany" | "Japan"
*/
proxyCountry?: string;
/**
* Cookies to be sent with the request
* Useful for accessing pages that require authentication
*/
cookies?: ParseraCookie[];
/**
* Enable precision mode for more accurate extractions
* May increase processing time
* @default false
*/
precisionMode?: boolean;
/**
* AbortSignal for request cancellation
* Allows the request to be aborted if it exceeds timeout
* or if cancellation is needed
*/
signal?: AbortSignal;
}
export type ParseraEventType = 'request:start' | 'request:end' | 'request:retry' | 'request:error' | 'extract:start' | 'extract:complete' | 'extract:error' | 'rateLimit' | 'timeout' | string;
export interface ParseraEvent<T = unknown> {
type: ParseraEventType;
timestamp: number;
data?: T;
error?: Error;
retryCount?: number;
}
export type ParseraEventHandler<T = unknown> = (event: ParseraEvent<T>) => void | Promise<void>;
export interface ParseraEventOptions {
/**
* Whether to handle the event asynchronously
* When true, event handlers won't block the main execution
* @default false
*/
async?: boolean;
/**
* Whether to catch errors in event handlers
* When true, errors in handlers won't affect the main execution
* @default true
*/
catchErrors?: boolean;
}
export declare class Parsera {
private readonly apiKey;
private readonly baseUrl;
private readonly defaultProxyCountry;
private readonly timeout;
private readonly retryOptions;
private lastRequestTime;
private readonly minRequestInterval;
private readonly eventHandlers;
private readonly eventOptions;
/**
* Creates a new Parsera client instance.
*
* @example
* ```typescript
* const parsera = new Parsera({
* apiKey: "your-api-key",
* timeout: 60000, // 60 second timeout
* retryOptions: {
* maxRetries: 3,
* backoffFactor: 2,
* initialDelay: 1000,
* }
* });
* ```
*/
constructor({ apiKey, baseUrl, defaultProxyCountry, timeout, retryOptions }: ParseraOptions);
private validateApiKey;
private validateUrl;
private enforceRateLimit;
private fetchWithTimeout;
private retryableRequest;
private isRetryableError;
/**
* Converts a Record<string, string> to ParseraAttribute[]
*/
private convertToAttributes;
/**
* Registers an event handler for a specific event type
*
* @param eventType - Type of event to listen for
* @param handler - Function to handle the event
* @param options - Configuration options for event handling
*
* @example
* ```typescript
* parsera.on('extract:complete', (event) => {
* console.log(`Extraction completed with ${event.data.length} items`);
* });
*
* parsera.on('request:retry', (event) => {
* console.log(`Retrying request (attempt ${event.retryCount})`);
* });
*
* // Custom event
* parsera.on('my:custom:event', (event) => {
* console.log('Custom event data:', event.data);
* });
* ```
*/
on<T = unknown>(eventType: ParseraEventType, handler: ParseraEventHandler<T>, options?: ParseraEventOptions): void;
/**
* Removes an event handler for a specific event type
*/
off<T = unknown>(eventType: ParseraEventType, handler: ParseraEventHandler<T>): void;
/**
* Removes all event handlers for a specific event type
*/
removeAllListeners(eventType?: ParseraEventType): void;
private emit;
/**
* Extracts data from a webpage using the Parsera API.
*
* @param options - Configuration options for the extraction
* @returns Promise resolving to an array of extracted data objects
*
* @throws {Error} When API key is invalid
* @throws {Error} When URL is invalid
* @throws {Error} When request times out
* @throws {Error} When rate limit is exceeded (after retries)
* @throws {Error} When no data is found
*
* @example
* ```typescript
* // Basic usage with attribute record
* const results = await parsera.extract({
* url: "https://example.com/products",
* attributes: {
* title: "Extract the product title",
* price: "Get the product price",
* }
* });
*
* // Advanced usage with all options
* const results = await parsera.extract({
* url: "https://example.com/products",
* attributes: [
* { name: "title", description: "Extract the product title" },
* { name: "price", description: "Get the product price" }
* ],
* proxyCountry: "GB",
* cookies: [
* { name: "session", value: "abc123", sameSite: "Lax" }
* ],
* precisionMode: true,
* signal: abortController.signal
* });
*
* // With request cancellation
* const controller = new AbortController();
* const promise = parsera.extract({
* url: "https://example.com",
* attributes: { title: "Extract the title" },
* signal: controller.signal
* });
*
* // Cancel the request after 5 seconds
* setTimeout(() => controller.abort(), 5000);
* ```
*
* @example
* // Example return value:
* [
* {
* "title": "Product Name",
* "price": "$99.99"
* },
* {
* "title": "Another Product",
* "price": "$149.99"
* }
* ]
*/
extract({ url, attributes, proxyCountry, cookies, precisionMode, signal }: ExtractOptions): Promise<Record<string, string>[]>;
/**
* Alias for extract method to match Python library interface.
*
* @see {@link extract} for full documentation and examples
*
* @example
* ```typescript
* const results = await parsera.run({
* url: "https://example.com",
* attributes: { title: "Extract the title" }
* });
* ```
*/
run(options: ExtractOptions): Promise<Record<string, string>[]>;
/**
* Alias for extract method to match Python library interface.
*
* @see {@link extract} for full documentation and examples
*
* @example
* ```typescript
* const results = await parsera.arun({
* url: "https://example.com",
* attributes: { title: "Extract the title" }
* });
* ```
*/
arun(options: ExtractOptions): Promise<Record<string, string>[]>;
private handleError;
}