UNPKG

@spider-cloud/spider-client

Version:

Isomorphic Javascript SDK for Spider Cloud services

192 lines (190 loc) 9.13 kB
import { ChunkCallbackFunction, Collection, QueryRequest, SpiderCoreResponse, SpiderParams } from "./config"; /** * Generic params for core request. */ export type GenericParams = Omit<SpiderParams, "url">; /** * Configuration interface for Spider. */ export interface SpiderConfig { apiKey?: string | null; } /** * A class to interact with the Spider API. */ export declare class Spider { private apiKey?; /** * Create an instance of Spider. * @param {string | null} apiKey - The API key used to authenticate to the Spider API. If null, attempts to source from environment variables. * @throws Will throw an error if the API key is not provided. */ constructor(props?: SpiderConfig); /** * Internal method to handle POST requests. * @param {string} endpoint - The API endpoint to which the POST request should be sent. * @param {Record<string, any>} data - The JSON data to be sent in the request body. * @param {boolean} [stream=false] - Whether to stream the response back without parsing. * @returns {Promise<Response | any>} The response in JSON if not streamed, or the Response object if streamed. */ private _apiPost; /** * Internal method to handle GET requests. * @param {string} endpoint - The API endpoint from which data should be retrieved. * @returns {Promise<any>} The data returned from the endpoint in JSON format. */ private _apiGet; /** * Internal method to handle DELETE requests. * @param {string} endpoint - The API endpoint from which data should be retrieved. * @returns {Promise<any>} The data returned from the endpoint in JSON format. */ private _apiDelete; /** * Scrapes data from a specified URL. * @param {string} url - The URL to scrape. * @param {GenericParams} [params={}] - Additional parameters for the scraping request. * @returns {Promise<any>} The scraped data from the URL. */ scrapeUrl(url: string, params?: GenericParams): Promise<any>; /** * Initiates a crawling job starting from the specified URL. * @param {string} url - The URL to start crawling. * @param {GenericParams} [params={}] - Additional parameters for the crawl. * @param {boolean} [stream=false] - Whether to receive the response as a stream. * @param {function} [callback=function] - The callback function when streaming per chunk. If this is set with stream you will not get a end response. * @returns {Promise<any | Response>} The result of the crawl, either structured data or a Response object if streaming. */ crawlUrl(url: string, params?: GenericParams, stream?: boolean, cb?: ChunkCallbackFunction): Promise<SpiderCoreResponse[] | void>; /** * Retrieves all links from the specified URL. * @param {string} url - The URL from which to gather links. * @param {GenericParams} [params={}] - Additional parameters for the crawl. * @param {boolean} [stream=false] - Whether to receive the response as a stream. * @param {function} [callback=function] - The callback function when streaming per chunk. If this is set with stream you will not get a end response. * @returns {Promise<any | Response>} The result of the crawl, either structured data or a Response object if streaming. */ links(url: string, params?: GenericParams, stream?: boolean, cb?: ChunkCallbackFunction): Promise<SpiderCoreResponse[] | void>; /** * Takes a screenshot of the website starting from this URL. * @param {string} url - The URL to start the screenshot. * @param {GenericParams} [params={}] - Configuration parameters for the screenshot. * @returns {Promise<any>} The screenshot data. */ screenshot(url: string, params?: GenericParams): Promise<any>; /** * Perform a search and gather a list of websites to start crawling and collect resources. * @param {string} search - The search query. * @param {GenericParams} [params={}] - Configuration parameters for the search. * @returns {Promise<any>} The result of the crawl, either structured data or a Response object if streaming. */ search(q: string, params?: GenericParams): Promise<any>; /** * Transform HTML to Markdown or text. You can send up to 10MB of data at once. * @param {object} data - The data to trasnform, a list of objects with the key 'html' and optional 'url' key for readability. * @param {object} [params={}] - Configuration parameters for the transformation. * @returns {Promise<any>} The transformation result. */ transform(data: { html: string; url?: string; }[], params?: {}): Promise<any>; /** * Extracts leads from a website. * @param {string} url - The URL from which to extract contacts. * @param {GenericParams} [params={}] - Configuration parameters for the extraction. * @returns {Promise<any>} The contact information extracted. */ extractContacts(url: string, params?: GenericParams): Promise<any>; /** * Applies labeling to data extracted from a specified URL. * @param {string} url - The URL to label. * @param {GenericParams} [params={}] - Configuration parameters for labeling. * @returns {Promise<any>} The labeled data. */ label(url: string, params?: GenericParams): Promise<any>; /** * Check the crawl state of the website. * @param {string} url - The URL to check. * @param {GenericParams} [params={}] - Configuration parameters for crawl state. Can also pass in "domain" instead of the url to query. * @returns {Promise<any>} The crawl state data. */ getCrawlState(url: string, params?: GenericParams): Promise<any>; /** * Create a signed url to download files from the storage. * @param {string} [domain] - The domain for the user's storage. If not provided, downloads all files. * @param {Object} [options] - The download options. * @param {boolean} [raw] - Return the raw response. * @returns {Promise<Response>} The response containing the file stream. */ createSignedUrl(url?: string, options?: { page?: number; limit?: number; expiresIn?: number; domain?: string; pathname?: string; }): Promise<any>; /** * Retrieves the number of credits available on the account. * @returns {Promise<any>} The current credit balance. */ getCredits(): Promise<any>; /** * Send a POST request to insert data into a specified table. * @param {string} table - The table name in the database. * @param {object} data - The data to be inserted. * @returns {Promise<any>} The response from the server. */ postData(collection: Collection, data: GenericParams | Record<string, any>): Promise<any>; /** * Send a GET request to retrieve data from a specified table. * @param {Collection} table - The table name in the database. * @param {object} params - The query parameters for data retrieval. * @returns {Promise<any>} The response from the server. */ getData(collections: Collection, params: GenericParams | Record<string, any>): Promise<any>; /** * Download a record. The url is the path of the storage hash returned and not the exact website url. * @param {QueryRequest} params - The query parameters for data retrieval. * @returns {Promise<any>} The download response from the server. */ download(query: QueryRequest, output?: "text" | "blob"): Promise<any>; /** * Perform a query to get a document. * @param {QueryRequest} params - The query parameters for data retrieval. * @returns {Promise<any>} The response from the server. */ query(query: QueryRequest): Promise<any>; /** * Send a DELETE request to remove data from a specified table. * @param {Collection} table - The table name in the database. * @param {object} params - Parameters to identify records to delete. * @returns {Promise<any>} The response from the server. */ deleteData(collection: Collection, params: GenericParams | Record<string, any>): Promise<any>; /** * Prepares common headers for each API request. * @returns {HeadersInit} A headers object for fetch requests. */ get prepareHeaders(): { "Content-Type": string; Authorization: string; "User-Agent": string; }; /** * Prepares common headers for each API request with JSONl content-type suitable for streaming. * @returns {HeadersInit} A headers object for fetch requests. */ get prepareHeadersJsonL(): { "Content-Type": string; Authorization: string; "User-Agent": string; }; /** * Handles errors from API requests. * @param {Response} response - The fetch response object. * @param {string} action - Description of the attempted action. * @throws Will throw an error with detailed status information. */ handleError(response: Response, action: string): void; }