@spider-cloud/spider-client
Version:
Isomorphic Javascript SDK for Spider Cloud services
192 lines (190 loc) • 9.13 kB
TypeScript
import { ChunkCallbackFunction, Collection, QueryRequest, SpiderCoreResponse, SpiderParams } from "./config";
/**
* Generic params for core request.
*/
export type GenericParams = Omit<SpiderParams, "url">;
/**
* Configuration interface for Spider.
*/
export interface SpiderConfig {
apiKey?: string | null;
}
/**
* A class to interact with the Spider API.
*/
export declare class Spider {
private apiKey?;
/**
* Create an instance of Spider.
* @param {string | null} apiKey - The API key used to authenticate to the Spider API. If null, attempts to source from environment variables.
* @throws Will throw an error if the API key is not provided.
*/
constructor(props?: SpiderConfig);
/**
* Internal method to handle POST requests.
* @param {string} endpoint - The API endpoint to which the POST request should be sent.
* @param {Record<string, any>} data - The JSON data to be sent in the request body.
* @param {boolean} [stream=false] - Whether to stream the response back without parsing.
* @returns {Promise<Response | any>} The response in JSON if not streamed, or the Response object if streamed.
*/
private _apiPost;
/**
* Internal method to handle GET requests.
* @param {string} endpoint - The API endpoint from which data should be retrieved.
* @returns {Promise<any>} The data returned from the endpoint in JSON format.
*/
private _apiGet;
/**
* Internal method to handle DELETE requests.
* @param {string} endpoint - The API endpoint from which data should be retrieved.
* @returns {Promise<any>} The data returned from the endpoint in JSON format.
*/
private _apiDelete;
/**
* Scrapes data from a specified URL.
* @param {string} url - The URL to scrape.
* @param {GenericParams} [params={}] - Additional parameters for the scraping request.
* @returns {Promise<any>} The scraped data from the URL.
*/
scrapeUrl(url: string, params?: GenericParams): Promise<any>;
/**
* Initiates a crawling job starting from the specified URL.
* @param {string} url - The URL to start crawling.
* @param {GenericParams} [params={}] - Additional parameters for the crawl.
* @param {boolean} [stream=false] - Whether to receive the response as a stream.
* @param {function} [callback=function] - The callback function when streaming per chunk. If this is set with stream you will not get a end response.
* @returns {Promise<any | Response>} The result of the crawl, either structured data or a Response object if streaming.
*/
crawlUrl(url: string, params?: GenericParams, stream?: boolean, cb?: ChunkCallbackFunction): Promise<SpiderCoreResponse[] | void>;
/**
* Retrieves all links from the specified URL.
* @param {string} url - The URL from which to gather links.
* @param {GenericParams} [params={}] - Additional parameters for the crawl.
* @param {boolean} [stream=false] - Whether to receive the response as a stream.
* @param {function} [callback=function] - The callback function when streaming per chunk. If this is set with stream you will not get a end response.
* @returns {Promise<any | Response>} The result of the crawl, either structured data or a Response object if streaming.
*/
links(url: string, params?: GenericParams, stream?: boolean, cb?: ChunkCallbackFunction): Promise<SpiderCoreResponse[] | void>;
/**
* Takes a screenshot of the website starting from this URL.
* @param {string} url - The URL to start the screenshot.
* @param {GenericParams} [params={}] - Configuration parameters for the screenshot.
* @returns {Promise<any>} The screenshot data.
*/
screenshot(url: string, params?: GenericParams): Promise<any>;
/**
* Perform a search and gather a list of websites to start crawling and collect resources.
* @param {string} search - The search query.
* @param {GenericParams} [params={}] - Configuration parameters for the search.
* @returns {Promise<any>} The result of the crawl, either structured data or a Response object if streaming.
*/
search(q: string, params?: GenericParams): Promise<any>;
/**
* Transform HTML to Markdown or text. You can send up to 10MB of data at once.
* @param {object} data - The data to trasnform, a list of objects with the key 'html' and optional 'url' key for readability.
* @param {object} [params={}] - Configuration parameters for the transformation.
* @returns {Promise<any>} The transformation result.
*/
transform(data: {
html: string;
url?: string;
}[], params?: {}): Promise<any>;
/**
* Extracts leads from a website.
* @param {string} url - The URL from which to extract contacts.
* @param {GenericParams} [params={}] - Configuration parameters for the extraction.
* @returns {Promise<any>} The contact information extracted.
*/
extractContacts(url: string, params?: GenericParams): Promise<any>;
/**
* Applies labeling to data extracted from a specified URL.
* @param {string} url - The URL to label.
* @param {GenericParams} [params={}] - Configuration parameters for labeling.
* @returns {Promise<any>} The labeled data.
*/
label(url: string, params?: GenericParams): Promise<any>;
/**
* Check the crawl state of the website.
* @param {string} url - The URL to check.
* @param {GenericParams} [params={}] - Configuration parameters for crawl state. Can also pass in "domain" instead of the url to query.
* @returns {Promise<any>} The crawl state data.
*/
getCrawlState(url: string, params?: GenericParams): Promise<any>;
/**
* Create a signed url to download files from the storage.
* @param {string} [domain] - The domain for the user's storage. If not provided, downloads all files.
* @param {Object} [options] - The download options.
* @param {boolean} [raw] - Return the raw response.
* @returns {Promise<Response>} The response containing the file stream.
*/
createSignedUrl(url?: string, options?: {
page?: number;
limit?: number;
expiresIn?: number;
domain?: string;
pathname?: string;
}): Promise<any>;
/**
* Retrieves the number of credits available on the account.
* @returns {Promise<any>} The current credit balance.
*/
getCredits(): Promise<any>;
/**
* Send a POST request to insert data into a specified table.
* @param {string} table - The table name in the database.
* @param {object} data - The data to be inserted.
* @returns {Promise<any>} The response from the server.
*/
postData(collection: Collection, data: GenericParams | Record<string, any>): Promise<any>;
/**
* Send a GET request to retrieve data from a specified table.
* @param {Collection} table - The table name in the database.
* @param {object} params - The query parameters for data retrieval.
* @returns {Promise<any>} The response from the server.
*/
getData(collections: Collection, params: GenericParams | Record<string, any>): Promise<any>;
/**
* Download a record. The url is the path of the storage hash returned and not the exact website url.
* @param {QueryRequest} params - The query parameters for data retrieval.
* @returns {Promise<any>} The download response from the server.
*/
download(query: QueryRequest, output?: "text" | "blob"): Promise<any>;
/**
* Perform a query to get a document.
* @param {QueryRequest} params - The query parameters for data retrieval.
* @returns {Promise<any>} The response from the server.
*/
query(query: QueryRequest): Promise<any>;
/**
* Send a DELETE request to remove data from a specified table.
* @param {Collection} table - The table name in the database.
* @param {object} params - Parameters to identify records to delete.
* @returns {Promise<any>} The response from the server.
*/
deleteData(collection: Collection, params: GenericParams | Record<string, any>): Promise<any>;
/**
* Prepares common headers for each API request.
* @returns {HeadersInit} A headers object for fetch requests.
*/
get prepareHeaders(): {
"Content-Type": string;
Authorization: string;
"User-Agent": string;
};
/**
* Prepares common headers for each API request with JSONl content-type suitable for streaming.
* @returns {HeadersInit} A headers object for fetch requests.
*/
get prepareHeadersJsonL(): {
"Content-Type": string;
Authorization: string;
"User-Agent": string;
};
/**
* Handles errors from API requests.
* @param {Response} response - The fetch response object.
* @param {string} action - Description of the attempted action.
* @throws Will throw an error with detailed status information.
*/
handleError(response: Response, action: string): void;
}