UNPKG

@harvestapi/scraper

Version:

HarvestAPI provides LinkedIn data scraping tools for real-time, high-performance scraping at a low cost. API allows to search for Linkedin `jobs`, `companies`, `profiles`, and `posts` using a wide range of filters.

60 lines (59 loc) 2 kB
import { ApiItemResponse, ApiListResponse, ApiPagination } from '../types'; export type ListingScraperConfig<TItemShot, TItemDetails> = { outputType?: 'json' | 'sqlite' | 'callback'; outputDir?: string; filename?: string; tableName?: string; scrapeDetails?: boolean; onItemScraped?: (args: { pagination: ApiPagination | null; item: TItemShot | TItemDetails; logger: Required<ScraperOptions>['logger']; } & Partial<ApiItemResponse<TItemShot | TItemDetails>>) => any; onFirstPageFetched?: (args: { data: ApiListResponse<TItemShot> | null; }) => any; onPageFetched?: (args: { page: number; data: ApiListResponse<TItemShot> | null; }) => any; overrideConcurrency?: number; overridePageConcurrency?: number; maxItems?: number; disableLog?: boolean; disableErrorLog?: boolean; optionsOverride?: Partial<ListingScraperOptions<TItemShot, TItemDetails>>; sessionId?: string; addListingHeaders?: Record<string, string>; addItemHeaders?: Record<string, string>; takePages?: number; startPage?: number; }; export type ListingScraperOptions<TItemShot, TItemDetails> = ListingScraperConfig<TItemShot, TItemDetails> & { fetchList: (args: { page: number; paginationToken?: string | null; sessionId?: string; addHeaders?: Record<string, string>; }) => Promise<ApiListResponse<TItemShot>>; fetchItem: (args: { item: TItemShot; sessionId?: string; addHeaders?: Record<string, string>; }) => Promise<(ApiItemResponse<TItemDetails> | { skipped: boolean; done?: boolean; }) | null> | null; maxPageNumber: number; entityName: string; warnPageLimit?: boolean; }; export type ScraperOptions = { apiKey: string; baseUrl?: string; addHeaders?: Record<string, string>; logger?: { log: (...args: any[]) => void; error: (...args: any[]) => void; }; };