@harvestapi/scraper
Version:
HarvestAPI provides LinkedIn data scraping tools for real-time, high-performance scraping at a low cost. API allows to search for Linkedin `jobs`, `companies`, `profiles`, and `posts` using a wide range of filters.
60 lines (59 loc) • 2 kB
TypeScript
import { ApiItemResponse, ApiListResponse, ApiPagination } from '../types';
export type ListingScraperConfig<TItemShot, TItemDetails> = {
outputType?: 'json' | 'sqlite' | 'callback';
outputDir?: string;
filename?: string;
tableName?: string;
scrapeDetails?: boolean;
onItemScraped?: (args: {
pagination: ApiPagination | null;
item: TItemShot | TItemDetails;
logger: Required<ScraperOptions>['logger'];
} & Partial<ApiItemResponse<TItemShot | TItemDetails>>) => any;
onFirstPageFetched?: (args: {
data: ApiListResponse<TItemShot> | null;
}) => any;
onPageFetched?: (args: {
page: number;
data: ApiListResponse<TItemShot> | null;
}) => any;
overrideConcurrency?: number;
overridePageConcurrency?: number;
maxItems?: number;
disableLog?: boolean;
disableErrorLog?: boolean;
optionsOverride?: Partial<ListingScraperOptions<TItemShot, TItemDetails>>;
sessionId?: string;
addListingHeaders?: Record<string, string>;
addItemHeaders?: Record<string, string>;
takePages?: number;
startPage?: number;
};
export type ListingScraperOptions<TItemShot, TItemDetails> = ListingScraperConfig<TItemShot, TItemDetails> & {
fetchList: (args: {
page: number;
paginationToken?: string | null;
sessionId?: string;
addHeaders?: Record<string, string>;
}) => Promise<ApiListResponse<TItemShot>>;
fetchItem: (args: {
item: TItemShot;
sessionId?: string;
addHeaders?: Record<string, string>;
}) => Promise<(ApiItemResponse<TItemDetails> | {
skipped: boolean;
done?: boolean;
}) | null> | null;
maxPageNumber: number;
entityName: string;
warnPageLimit?: boolean;
};
export type ScraperOptions = {
apiKey: string;
baseUrl?: string;
addHeaders?: Record<string, string>;
logger?: {
log: (...args: any[]) => void;
error: (...args: any[]) => void;
};
};