@spider-cloud/spider-client
Version:
Isomorphic Javascript SDK for Spider Cloud services
192 lines (191 loc) • 8.49 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.Spider = void 0;
const config_1 = require("./config");
const package_json_1 = require("../package.json");
const stream_reader_1 = require("./utils/stream-reader");
const exponential_backoff_1 = require("exponential-backoff");
/**
* A class to interact with the Spider API.
*/
class Spider {
/**
* Create an instance of Spider.
* @param {string | null} apiKey - The API key used to authenticate to the Spider API. If null, attempts to source from environment variables.
* @throws Will throw an error if the API key is not provided.
*/
constructor(props) {
var _a;
this.apiKey = (props === null || props === void 0 ? void 0 : props.apiKey) || ((_a = process === null || process === void 0 ? void 0 : process.env) === null || _a === void 0 ? void 0 : _a.SPIDER_API_KEY);
if (!this.apiKey) {
throw new Error("No API key provided");
}
}
/**
* Internal method to handle POST requests.
* @param {string} endpoint - The API endpoint to which the POST request should be sent.
* @param {Record<string, any>} data - The JSON data to be sent in the request body.
* @param {boolean} [stream=false] - Whether to stream the response back without parsing.
* @returns {Promise<Response | any>} The response in JSON if not streamed, or the Response object if streamed.
*/
async _apiPost(endpoint, data, stream, jsonl) {
const headers = jsonl ? this.prepareHeadersJsonL : this.prepareHeaders;
const response = await (0, exponential_backoff_1.backOff)(() => fetch(`${config_1.APISchema["url"]}/${config_1.ApiVersion.V1}/${endpoint}`, {
method: "POST",
headers: headers,
body: JSON.stringify(data),
}), {
numOfAttempts: 5,
});
if (!stream) {
if (response.ok) {
return response.json();
}
else {
this.handleError(response, `post to ${endpoint}`);
}
}
return response;
}
/**
* Internal method to handle GET requests.
* @param {string} endpoint - The API endpoint from which data should be retrieved.
* @returns {Promise<any>} The data returned from the endpoint in JSON format.
*/
async _apiGet(endpoint) {
const headers = this.prepareHeaders;
const response = await (0, exponential_backoff_1.backOff)(() => fetch(`${config_1.APISchema["url"]}/${config_1.ApiVersion.V1}/${endpoint}`, {
method: "GET",
headers: headers,
}), {
numOfAttempts: 5,
});
if (response.ok) {
return response.json();
}
else {
this.handleError(response, `get from ${endpoint}`);
}
}
/**
* Scrapes data from a specified URL.
* @param {string} url - The URL to scrape.
* @param {GenericParams} [params={}] - Additional parameters for the scraping request.
* @returns {Promise<any>} The scraped data from the URL.
*/
async scrapeUrl(url, params = {}) {
return this._apiPost(config_1.APIRoutes.Crawl, { url: url, limit: 1, ...params });
}
/**
* Initiates a crawling job starting from the specified URL.
* @param {string} url - The URL to start crawling.
* @param {GenericParams} [params={}] - Additional parameters for the crawl.
* @param {boolean} [stream=false] - Whether to receive the response as a stream.
* @param {function} [callback=function] - The callback function when streaming per chunk. If this is set with stream you will not get a end response.
* @returns {Promise<any | Response>} The result of the crawl, either structured data or a Response object if streaming.
*/
async crawlUrl(url, params = {}, stream = false, cb) {
const jsonl = stream && cb;
const res = await this._apiPost(config_1.APIRoutes.Crawl, { url, ...params }, stream, !!jsonl);
if (jsonl) {
return await (0, stream_reader_1.streamReader)(res, cb);
}
return res;
}
/**
* Retrieves all links from the specified URL.
* @param {string} url - The URL from which to gather links.
* @param {GenericParams} [params={}] - Additional parameters for the crawl.
* @param {boolean} [stream=false] - Whether to receive the response as a stream.
* @param {function} [callback=function] - The callback function when streaming per chunk. If this is set with stream you will not get a end response.
* @returns {Promise<any | Response>} The result of the crawl, either structured data or a Response object if streaming.
*/
async links(url, params = {}, stream = false, cb) {
const jsonl = stream && cb;
const res = await this._apiPost(config_1.APIRoutes.Links, { url, ...params }, stream, !!jsonl);
if (jsonl) {
return await (0, stream_reader_1.streamReader)(res, cb);
}
return res;
}
/**
* Takes a screenshot of the website starting from this URL.
* @param {string} url - The URL to start the screenshot.
* @param {GenericParams} [params={}] - Configuration parameters for the screenshot.
* @returns {Promise<any>} The screenshot data.
*/
async screenshot(url, params = {}) {
return this._apiPost(config_1.APIRoutes.Screenshot, { url: url, ...params });
}
/**
* Perform a search and gather a list of websites to start crawling and collect resources.
* @param {string} search - The search query.
* @param {GenericParams} [params={}] - Configuration parameters for the search.
* @returns {Promise<any>} The result of the crawl, either structured data or a Response object if streaming.
*/
async search(q, params = {}) {
return this._apiPost(config_1.APIRoutes.Search, { search: q, ...params });
}
/**
* Transform HTML to Markdown or text. You can send up to 10MB of data at once.
* @param {object} data - The data to trasnform, a list of objects with the key 'html' and optional 'url' key for readability.
* @param {object} [params={}] - Configuration parameters for the transformation.
* @returns {Promise<any>} The transformation result.
*/
async transform(data, params) {
var _a;
return this._apiPost(config_1.APIRoutes.Transform, {
...(params ? params : {}),
data: (params === null || params === void 0 ? void 0 : params.data) && Array.isArray(params.data) && ((_a = params.data) === null || _a === void 0 ? void 0 : _a.length)
? params.data
: data,
});
}
/**
* Retrieves the number of credits available on the account.
* @returns {Promise<any>} The current credit balance.
*/
async getCredits() {
return this._apiGet(config_1.APIRoutes.DataCredits);
}
/**
* Send a POST request to insert data into a specified table.
* @param {string} table - The table name in the database.
* @param {object} data - The data to be inserted.
* @returns {Promise<any>} The response from the server.
*/
async postData(collection, data) {
return this._apiPost(`${config_1.APIRoutes.Data}/${collection}`, data);
}
/**
* Prepares common headers for each API request.
* @returns {HeadersInit} A headers object for fetch requests.
*/
get prepareHeaders() {
return {
"Content-Type": "application/json",
Authorization: `Bearer ${this.apiKey}`,
"User-Agent": `Spider-Client/${package_json_1.version}`,
};
}
/**
* Prepares common headers for each API request with JSONl content-type suitable for streaming.
* @returns {HeadersInit} A headers object for fetch requests.
*/
get prepareHeadersJsonL() {
return {
...this.prepareHeaders,
"Content-Type": "application/jsonl",
};
}
/**
* Handles errors from API requests.
* @param {Response} response - The fetch response object.
* @param {string} action - Description of the attempted action.
* @throws Will throw an error with detailed status information.
*/
handleError(response, action) {
throw new Error(`Failed to ${action}. Status code: ${response.status}.`);
}
}
exports.Spider = Spider;