UNPKG

@harvestapi/scraper

Version:

HarvestAPI provides LinkedIn data scraping tools for real-time, high-performance scraping at a low cost. API allows to search for Linkedin `jobs`, `companies`, `profiles`, and `posts` using a wide range of filters.

74 lines (73 loc) 2.46 kB
import { ApiItemResponse, ApiListResponse, ApiPagination } from '../types'; export type ListingScraperConfig<TItemShot, TItemDetails> = { outputType?: 'json' | 'sqlite' | 'callback'; outputDir?: string; filename?: string; tableName?: string; scrapeDetails?: boolean; keepScrapingIfAllSkippedOnPage?: boolean; onItemScraped?: (args: { pagination: ApiPagination | null; item: TItemShot | TItemDetails; logger: Required<ScraperOptions>['logger']; } & Partial<ApiItemResponse<TItemShot | TItemDetails>>) => any; onFirstPageFetched?: (args: { data: ApiListResponse<TItemShot> | null; }) => any; onPageFetched?: (args: { page: number; data: ApiListResponse<TItemShot> | null; }) => Promise<{ doneAll?: boolean; donePages?: boolean; } | void> | { doneAll?: boolean; donePages?: boolean; } | void; overrideConcurrency?: number; overridePageConcurrency?: number; maxItems?: number; disableLog?: boolean; disableErrorLog?: boolean; optionsOverride?: Partial<ListingScraperOptions<TItemShot, TItemDetails>>; sessionId?: string; addListingHeaders?: Record<string, string>; addItemHeaders?: Record<string, string>; takePages?: number; startPage?: number; getFetchListParams?: (args: { page: number; pagination: ApiPagination | null; }) => Record<string, any>; }; export type ItemDetailsExtendedProperties = { skipResult?: boolean; skipCount?: boolean; done?: boolean; keepScrapingIfAllSkippedOnPage?: boolean; }; export type ListingScraperOptions<TItemShot, TItemDetails> = ListingScraperConfig<TItemShot, TItemDetails> & { fetchList: (args: { page: number; paginationToken?: string | null; sessionId?: string; addHeaders?: Record<string, string>; }) => Promise<ApiListResponse<TItemShot>>; fetchItem: (args: { item: TItemShot; sessionId?: string; addHeaders?: Record<string, string>; }) => Promise<(ApiItemResponse<TItemDetails> | ItemDetailsExtendedProperties) | null> | null; maxPageNumber: number; entityName: string; warnPageLimit?: boolean; }; export type ScraperOptions = { apiKey: string; baseUrl?: string; addHeaders?: Record<string, string>; logger?: { log: (...args: any[]) => void; error: (...args: any[]) => void; }; };