openapi-directory
Version:
Building & bundling https://github.com/APIs-guru/openapi-directory for easy use from JS
1 lines • 57.3 kB
JSON
{"openapi":"3.0.0","servers":[{"description":"Production server","url":"https://api.dataflowkit.com/v1"}],"info":{"contact":{"url":"https://dataflowkit.com/"},"description":"Render Javascript driven pages, while we internally manage Headless Chrome and proxies for you. \n\n- Build a custom web scraper with our Visual point-and-click toolkit.\n- Scrape the most popular Search engines result pages (SERP).\n- Convert web pages to PDF and capture screenshots.\n***\n### Authentication\nDataflow Kit API require you to sign up for an API key in order to use the API. \n\nThe API key can be found in the [DFK Dashboard](https://account.dataflowkit.com) after _free registration_.\n\nPass a secret API Key to all API requests to the server as the `api_key` query parameter. \n","termsOfService":"https://dataflowkit.com/terms","title":"Dataflow Kit Web Scraper","version":"1.3","x-apisguru-categories":["developer_tools"],"x-origin":[{"format":"openapi","url":"https://api.dataflowkit.com/v1/swagger.yaml","version":"3.0"}],"x-providerName":"dataflowkit.com"},"externalDocs":{"description":"swagger-ui documentation","url":"https://dataflowkit.com/open-api"},"security":[{"ApiKeyAuth":[]}],"tags":[{"name":"fetch"},{"name":"serp"},{"name":"parse"},{"name":"url-to-pdf"},{"name":"url-to-screenshot"}],"paths":{"/convert/url/pdf":{"post":{"deprecated":false,"description":"Automate URL to PDF Conversion right in your application.\n\nSpecify request parameters like URL, Proxy, and actions to render web pages to PDF using Headless Chrome.\n\nGet resulted PDF even from websites blocked in your area for some reason utilizing our worldwide pool of proxies.\n\nSimulate real-world human interaction with the page. For example, before saving a web page to PDF, you may need to scroll it.\n\nGenerate ready-to-run code for your favorite language at [https://dataflowkit.com/url-to-pdf](https://dataflowkit.com/url-to-pdf)","operationId":"url-to-pdf","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/url2pdfrequest"}}},"required":true},"responses":{"200":{"content":{"application/pdf":{"schema":{"format":"binary","type":"string"}},"text/plain; charset=utf-8":{"example":"https://dfk-storage-ny3.nyc3.digitaloceanspaces.com/5e5d2864ebb755000188c2c5/url_pdf2020-05-06_20%3A00.pdf?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=KMGZH6JMEM75FTB4EEVL%2F20200506%2Fnyc3%2Fs3%2Faws4_request&X-Amz-Date=20200506T200046Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=737740ad471acf45120709a07de7440287e1daa17599a44b8668f6586030e6be"}},"description":"A PDF file."},"400":{"content":{"text/plain; charset=utf-8":{"examples":{"invalidURL":{"summary":"Invalid request URL","value":"Invalid request URL"},"noFields":{"summary":"No fields to scrape","value":"No fields to scrape"}}}},"description":"Bad Request. Invalid payload specified."},"401":{"content":{"text/plain; charset=utf-8":{"examples":{"No_api":{"summary":"No API Key provided","value":"No API Key provided"},"invalid":{"summary":"Invalid API key","value":"Invalid API key"}}}},"description":"Unauthorized. `api_key` parameter is missed or incorrect"},"500":{"content":{"text/plain; charset=utf-8":{"examples":{"fetchFailed":{"summary":"Process fetch failed.","value":"Process fetch failed."},"singleProcessFailed":{"summary":"Create single process failed","value":"Create single process failed"}}}},"description":"Internal Server Error is a very general HTTP status code that means something has gone wrong on the web site's server."}},"summary":"Save web page as PDF","tags":["url-to-pdf"]}},"/convert/url/screenshot":{"post":{"deprecated":false,"description":"Automate URL to Screenshot Conversion right in your application.\n\nSpecify request parameters like URL, Proxy, and actions to convert web pages to screenshots using Headless Chrome.\n\nGet resulted pictures in JPG or PNG formats even from websites blocked in your area for some reason utilizing our worldwide pool of proxies.\n\nSimulate real-world human interaction with the page. For example, before capturing a web page, you may need to scroll it.\n\nGenerate ready-to-run code for your favorite language at [https://dataflowkit.com/url-to-screenshot](https://dataflowkit.com/url-to-screenshot)","operationId":"url-to-screenshot","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/url2screenshotrequest"}}},"required":true},"responses":{"200":{"content":{"image/jpeg":{"schema":{"format":"binary","type":"string"}},"image/png":{"schema":{"format":"binary","type":"string"}},"text/plain; charset=utf-8":{"example":"https://dfk-storage-ny3.nyc3.digitaloceanspaces.com/5e5d2864ebb755000188c2c5/url_screenshot2020-05-06_20%3A02.jpeg?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=KMGZH6JMEM75FTB4EEVL%2F20200506%2Fnyc3%2Fs3%2Faws4_request&X-Amz-Date=20200506T200305Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=1c1c39fab3e9b0a8806fc4fd6690b335758d12a926a635c08c7301f87ff9279b"}},"description":"Returns jpg or png file."},"400":{"content":{"text/plain; charset=utf-8":{"examples":{"invalidURL":{"summary":"Invalid request URL","value":"Invalid request URL"},"noFields":{"summary":"No fields to scrape","value":"No fields to scrape"}}}},"description":"Bad Request. Invalid payload specified."},"401":{"content":{"text/plain; charset=utf-8":{"examples":{"No_api":{"summary":"No API Key provided","value":"No API Key provided"},"invalid":{"summary":"Invalid API key","value":"Invalid API key"}}}},"description":"Unauthorized. `api_key` parameter is missed or incorrect"},"500":{"content":{"text/plain; charset=utf-8":{"examples":{"fetchFailed":{"summary":"Process fetch failed.","value":"Process fetch failed."},"singleProcessFailed":{"summary":"Create single process failed","value":"Create single process failed"}}}},"description":"Internal Server Error is a very general HTTP status code that means something has gone wrong on the web site's server."}},"summary":"Capture web page Screenshots.","tags":["url-to-screenshot"]}},"/fetch":{"post":{"deprecated":false,"description":"Use fetch endpoint to download web pages.","operationId":"fetch","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/fetchrequest"}}},"description":"- _Base fetcher type_ is the right choice for fetching server-side rendered pages. It takes fewer resources and works faster than rendering HTML with _Chrome fetcher_\n- But for rendering Angular, React, and Vue.js web sites, you should always specify _Chrome fetcher type_. In this case, headless chrome fetcher renders dynamic Javascript content in the same way as real web browsers would do it.\n\nGenerate ready-to-run code for your favorite language at [https://dataflowkit.com/render-web](https://dataflowkit.com/render-web)\n","required":true},"responses":{"200":{"content":{"text/html; charset=utf-8":{"example":"<html>\n <head></head>\n <body>{\n "ip": "178.171.21.156",\n "city": "Singapore",\n "region": null,\n "region_code": null,\n "country": "SG",\n "country_code": "SG",\n "country_code_iso3": "SGP",\n "country_capital": "Singapore",\n "country_tld": ".sg",\n "country_name": "Singapore",\n "continent_code": "AS",\n "in_eu": false,\n "postal": "18",\n "latitude": 1.2929,\n "longitude": 103.8547,\n "timezone": "Asia/Singapore",\n "utc_offset": "+0800",\n "country_calling_code": "+65",\n "currency": "SGD",\n "currency_name": "Dollar",\n "languages": "cmn,en-SG,ms-SG,ta-SG,zh-SG",\n "country_area": 692.7,\n "country_population": 4701069.0,\n "asn": "AS9009",\n "org": "M247 Ltd"\n}</body>\n</html>"},"text/plain; charset=utf-8":{"example":"https://dfk-storage-ny3.nyc3.digitaloceanspaces.com/5e5d2864ebb755000188c2c5/ipapi.co_2020-05-06_19%3A32.html?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=KMGZH6JMEM75FTB4EEVL%2F20200506%2Fnyc3%2Fs3%2Faws4_request&X-Amz-Date=20200506T193209Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=86675afdbabc18027cb1d78cb6faf35df2137940b6b8e9e526006bb66afbfae7"}},"description":"Returns utf8 encoded web page content."},"400":{"content":{"text/plain; charset=utf-8":{"examples":{"invalidURL":{"summary":"Invalid request URL","value":"Invalid request URL"},"noFields":{"summary":"No fields to scrape","value":"No fields to scrape"}}}},"description":"Bad Request. Invalid payload specified."},"401":{"content":{"text/plain; charset=utf-8":{"examples":{"No_api":{"summary":"No API Key provided","value":"No API Key provided"},"invalid":{"summary":"Invalid API key","value":"Invalid API key"}}}},"description":"Unauthorized. `api_key` parameter is missed or incorrect"},"500":{"content":{"text/plain; charset=utf-8":{"examples":{"fetchFailed":{"summary":"Process fetch failed.","value":"Process fetch failed."},"singleProcessFailed":{"summary":"Create single process failed","value":"Create single process failed"}}}},"description":"Internal Server Error is a very general HTTP status code that means something has gone wrong on the web site's server."}},"summary":"Download web page content","tags":["fetch"]}},"/parse":{"post":{"deprecated":false,"description":"Dataflow kit uses CSS selectors to find HTML elements in web pages for later data extraction.\n\nOpen [visual point-and-click toolkit](https://dataflowkit.com/dfk) and click desired elements on a page to specify extracting data. \n\n\n Then you can send generated payload to `/parse` endpoint. We crawl web pages and extract data like text, links, or images for you following the specified rules. \n\n\nExtracted data is returned in CSV, MS Excel, JSON, JSON(Lines) or XML format.\n","operationId":"parse","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/parserequest"}}},"description":"### Field types and attributes\n \n- **Text**. Extract human-readable text from the selected element and all its child elements. HTML tags are stripped, and only text returned.\n \n- **Link**. Capture link `href` attribute and link text. Or specify a special _Path_ option for website navigation. When Path option is true, all other selectors ignored, and no results from the current page returned.\n \n- **Image**. Image type extracts `src` (URL) and `alt` attributes of an image\n\n\n***\n### Filters\nFilters are used to manipulate text data when extracting.\n\nHere is the list of available filters\n\n\n- **Trim** removes leading and trailing white spaces from the _field text or attribute_\n\n- **Normal** leaves the case and capitalization of text/ attribute exactly as is.\n\n- **UPPERCASE** makes all of the letters in the Field's text/ attribute uppercase.\n\n- **lowercase** makes all of the letters in the Field's text/ attribute lowercase.\n\n- **Capitalize** capitalizes the first letter of each word in the Field's text/ attribute\n\n- **Concatinate** joins text array element into a single string\n\n***\n### Regular Expressions\n\nFor more advanced text formatting regular expression can be used. Some useful examples are listed below\n\n\n| Input text | Regex | Result |\n| ---------- | ----- | ------ |\n| price- 10.99€ | <code>[0-9]+\\.[0-9]+</code> | 10.99 |\n| phone- 0 (944) 244-18-22 | <code>\\w+</code> | 09442441822 |\n\n\n***\n### Details. Chaining.\nThe Link field type serves as a navigation link to a details page containing more data.\nA special _Path_ option is used for navigation only. When the Path option specified, no results from the current page returned. But grouped results from details pages will be pulled instead. You can use chaining functionality of Dataflow Kit scraper to retrieve all the detail page data at the same time.\n","required":true},"responses":{"200":{"content":{"application/json":{"example":[{"Name_href":"https://test.dataflowkit.com/persons/1","Name_text":"Ethan Aguirre","Number_text":"1","Picture_alt":"","Picture_src":"https://test.dataflowkit.com/static/img/avataaars-1.svg"},{"Name_href":"https://test.dataflowkit.com/persons/2","Name_text":"Melodie Holder","Number_text":"2","Picture_alt":"","Picture_src":"https://test.dataflowkit.com/static/img/avataaars-2.svg"},{"Name_href":"https://test.dataflowkit.com/persons/3","Name_text":"Meghan Reyes","Number_text":"3","Picture_alt":"","Picture_src":"https://test.dataflowkit.com/static/img/avataaars-3.svg"},{"Name_href":"https://test.dataflowkit.com/persons/4","Name_text":"Lane Vinson","Number_text":"4","Picture_alt":"","Picture_src":"https://test.dataflowkit.com/static/img/avataaars-4.svg"},{"Name_href":"https://test.dataflowkit.com/persons/5","Name_text":"Philip Tillman","Number_text":"5","Picture_alt":"","Picture_src":"https://test.dataflowkit.com/static/img/avataaars-5.svg"},{"Name_href":"https://test.dataflowkit.com/persons/6","Name_text":"Theodore Mcclain","Number_text":"6","Picture_alt":"","Picture_src":"https://test.dataflowkit.com/static/img/avataaars-6.svg"},{"Name_href":"https://test.dataflowkit.com/persons/7","Name_text":"Neville Kane","Number_text":"7","Picture_alt":"","Picture_src":"https://test.dataflowkit.com/static/img/avataaars-7.svg"},{"Name_href":"https://test.dataflowkit.com/persons/8","Name_text":"Lila Vazquez","Number_text":"8","Picture_alt":"","Picture_src":"https://test.dataflowkit.com/static/img/avataaars-8.svg"},{"Name_href":"https://test.dataflowkit.com/persons/9","Name_text":"Ulysses Peters","Number_text":"9","Picture_alt":"","Picture_src":"https://test.dataflowkit.com/static/img/avataaars-9.svg"},{"Name_href":"https://test.dataflowkit.com/persons/10","Name_text":"Camden Young","Number_text":"10","Picture_alt":"","Picture_src":"https://test.dataflowkit.com/static/img/avataaars-10.svg"},{"Name_href":"https://test.dataflowkit.com/persons/11","Name_text":"Solomon Petty","Number_text":"11","Picture_alt":"","Picture_src":"https://test.dataflowkit.com/static/img/avataaars-11.svg"},{"Name_href":"https://test.dataflowkit.com/persons/12","Name_text":"Ahmed Robbins","Number_text":"12","Picture_alt":"","Picture_src":"https://test.dataflowkit.com/static/img/avataaars-12.svg"},{"Name_href":"https://test.dataflowkit.com/persons/13","Name_text":"William Olsen","Number_text":"13","Picture_alt":"","Picture_src":"https://test.dataflowkit.com/static/img/avataaars-13.svg"},{"Name_href":"https://test.dataflowkit.com/persons/14","Name_text":"Ahmed Vaughan","Number_text":"14","Picture_alt":"","Picture_src":"https://test.dataflowkit.com/static/img/avataaars-14.svg"},{"Name_href":"https://test.dataflowkit.com/persons/15","Name_text":"Howard Kemp","Number_text":"15","Picture_alt":"","Picture_src":"https://test.dataflowkit.com/static/img/avataaars-15.svg"},{"Name_href":"https://test.dataflowkit.com/persons/16","Name_text":"Channing Flores","Number_text":"16","Picture_alt":"","Picture_src":"https://test.dataflowkit.com/static/img/avataaars-16.svg"},{"Name_href":"https://test.dataflowkit.com/persons/17","Name_text":"Brandon Bauer","Number_text":"17","Picture_alt":"","Picture_src":"https://test.dataflowkit.com/static/img/avataaars-17.svg"},{"Name_href":"https://test.dataflowkit.com/persons/18","Name_text":"Colt Morrow","Number_text":"18","Picture_alt":"","Picture_src":"https://test.dataflowkit.com/static/img/avataaars-18.svg"},{"Name_href":"https://test.dataflowkit.com/persons/19","Name_text":"Kaye Garner","Number_text":"19","Picture_alt":"","Picture_src":"https://test.dataflowkit.com/static/img/avataaars-19.svg"},{"Name_href":"https://test.dataflowkit.com/persons/20","Name_text":"Clayton Justice","Number_text":"20","Picture_alt":"","Picture_src":"https://test.dataflowkit.com/static/img/avataaars-20.svg"},{"Name_href":"https://test.dataflowkit.com/persons/21","Name_text":"Hiroko Mills","Number_text":"21","Picture_alt":"","Picture_src":"https://test.dataflowkit.com/static/img/avataaars-21.svg"},{"Name_href":"https://test.dataflowkit.com/persons/22","Name_text":"Melvin Lloyd","Number_text":"22","Picture_alt":"","Picture_src":"https://test.dataflowkit.com/static/img/avataaars-22.svg"},{"Name_href":"https://test.dataflowkit.com/persons/23","Name_text":"Marshall Mayo","Number_text":"23","Picture_alt":"","Picture_src":"https://test.dataflowkit.com/static/img/avataaars-23.svg"},{"Name_href":"https://test.dataflowkit.com/persons/24","Name_text":"Rae Casey","Number_text":"24","Picture_alt":"","Picture_src":"https://test.dataflowkit.com/static/img/avataaars-24.svg"},{"Name_href":"https://test.dataflowkit.com/persons/25","Name_text":"Astra Snyder","Number_text":"25","Picture_alt":"","Picture_src":"https://test.dataflowkit.com/static/img/avataaars-25.svg"},{"Name_href":"https://test.dataflowkit.com/persons/26","Name_text":"Simon Mckinney","Number_text":"26","Picture_alt":"","Picture_src":"https://test.dataflowkit.com/static/img/avataaars-26.svg"},{"Name_href":"https://test.dataflowkit.com/persons/27","Name_text":"Graiden Riggs","Number_text":"27","Picture_alt":"","Picture_src":"https://test.dataflowkit.com/static/img/avataaars-27.svg"},{"Name_href":"https://test.dataflowkit.com/persons/28","Name_text":"Jaden Stewart","Number_text":"28","Picture_alt":"","Picture_src":"https://test.dataflowkit.com/static/img/avataaars-28.svg"},{"Name_href":"https://test.dataflowkit.com/persons/29","Name_text":"Christian Galloway","Number_text":"29","Picture_alt":"","Picture_src":"https://test.dataflowkit.com/static/img/avataaars-29.svg"},{"Name_href":"https://test.dataflowkit.com/persons/30","Name_text":"Signe Sykes","Number_text":"30","Picture_alt":"","Picture_src":"https://test.dataflowkit.com/static/img/avataaars-30.svg"}],"schema":{"type":"object"}}},"description":"Returns data in the one of the follwing formats - JSON, JSON Lines, CSV, MS Excel, XML"},"400":{"content":{"text/plain; charset=utf-8":{"examples":{"invalidURL":{"summary":"Invalid request URL","value":"Invalid request URL"},"noFields":{"summary":"No fields to scrape","value":"No fields to scrape"}}}},"description":"Bad Request. Invalid payload specified."},"401":{"content":{"text/plain; charset=utf-8":{"examples":{"No_api":{"summary":"No API Key provided","value":"No API Key provided"},"invalid":{"summary":"Invalid API key","value":"Invalid API key"}}}},"description":"Unauthorized. `api_key` parameter is missed or incorrect"},"500":{"content":{"text/plain; charset=utf-8":{"examples":{"fetchFailed":{"summary":"Process fetch failed.","value":"Process fetch failed."},"singleProcessFailed":{"summary":"Create single process failed","value":"Create single process failed"}}}},"description":"Internal Server Error is a very general HTTP status code that means something has gone wrong on the web site's server."}},"summary":"Extract structured data from web pages","tags":["parse"]}},"/serp":{"post":{"deprecated":false,"description":"To crawl search engine result pages, you can use `/serp` endpoint. SERP collection service extracts a list of organic results, news, images, and more. Specify configuration parameters, such as country or languages, to customize output SERP data.\nThe following search engines are supported\n\n- google\n- google-image\n- google-news\n- google-shopping\n- bing\n- duckduckgo\n- baidu\n- yandex\n\n\nGenerate ready-to-run code for your favorite language at [https://dataflowkit.com/serp](https://dataflowkit.com/serp)","operationId":"serp","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/serprequest"}}},"description":"<h2>Search parameters</h2>\n\n> In most cases, you don't need to customize parameters by hand. Use <a href=\"https://dataflowkit.com/serp\" target=\"_blank\">SERP extraction Code generator</a>. It is the easiest way to generate a payload for launching in the Dataflow kit cloud.\n\n<h3>URL GET parameters</h3>\n\n||||\n|-|-|-|\n|q| Parameter defines encoded search term. You can use anything that you would use in a regular Search engines search. (e.g. for Google, <ul> <li><code>link:dataflowkit.com</code>,</li> <li><code>site:twitter.com Bratislava</code>,</li><li><code>inurl:view/view.shtml</code>, etc.)</li></ul> See The Complete List of 42 Advanced <a href=\"https://ahrefs.com/blog/google-advanced-search-operators/\" target=\"_blank\">Google Search Operators</a>|<ul> <li><code>q</code> parameter is used by google, Bing, DuckDuckGo.</li><li> <code>text</code> is used as query holder by Yandex SE.</li><li> Chineese Baidu uses <code>wd</code> for this purpose.</li></ul>|\n|tbm| <code>tbm</code> is a special Google parameter used to differentiate between search types| <ul> <li><code>tbm=isch</code> - Google Images,</li> <li> <code>tbm=nws</code> - Google News</li> <li><code>tbm=shop</code> - Google Shopping</li> </ul>|\n|lr|Restricts the search to documents written in a particular languages.|<ul><li>Google uses <code>lang_{two-letter lang code}</code> to specify languages and <code>|</code> as a delimiter. (e.g., <code>lang_sk|lang_de</code> will only search Slovak and German pages). See the <a href=\"https://developers.google.com/custom-search/v1/cse/list\">full list</a> of possible values for Google. </li><li>For Bing specify <code>setLang=en</code> parameter.</li><li> In Yandex use <code>lang=ca</code> parameter</li></ul>|\n|gl|Specify the country to search from. It's a two-letter country code. (e.g., <code>sk</code> for Slovakia, or <code>us</code> for the United States).| For Google see the <a href=\"https://developers.google.com/custom-search/docs/xml_results_appendices#countryCodes\">Country Codes</a> page for a list of valid values. For Bing <code>cc=at</code> parameter is used.|\n","required":true},"responses":{"200":{"content":{"application/json":{"example":[{"description_text":"Dataflow kit visits the web on your behalf, processes Javascript driven pages in the cloud, return rendered HTML, capture screenshot or save as PDF. Dataflow Kit services. Headless Chrome as a service. We automate dynamic web content download using the Headless Chrome browser. ...","link_href":"https://dataflowkit.com/","link_text":"Turn Websites into structured data /Dataflow kit"},{"description_text":"Dataflow kit (\"DFK\") is a Web Scraping framework for Gophers. It extracts data from web pages, following the specified CSS Selectors. You can use it in many ways for data mining, data processing or archiving.","link_href":"https://github.com/slotix/dataflowkit","link_text":"GitHub - slotix/dataflowkit: Extract structured data from ..."},{"description_text":"The Department of Health - Abu Dhabi (DoH - Abu Dhabi) leverages the DataFlow Group's specialized Primary Source Verification (PSV) solutions to screen the credentials of professionals working within Abu Dhabi's healthcare sector. As the regulative body of the healthcare sector in Abu Dhabi, DoH - Abu Dhabi ensures excellence for the ...","link_href":"https://corp.dataflowgroup.com/verification-services/start-your-verification/healthcare/department-of-health-abu-dhabi/","link_text":"Department of Health - Abu Dhabi - Dataflow Group"},{"description_text":"Dataflow Kit was added by slotix in Apr 2020 and the latest update was made in May 2020. The list of alternatives was updated Apr 2020. It's possible to update the information on Dataflow Kit or report it as discontinued, duplicated or spam.","link_href":"https://alternativeto.net/software/dataflow-kit/","link_text":"Dataflow Kit Alternatives and Similar Websites and Apps ..."},{"description_text":"Dataflow Kit Reloaded. We are so excited to introduce a new, completely re-implemented Dataflow Kit. In particular, we supplement our legacy custom web scraper with more focused and more understandable web services for our users.","link_href":"https://blog.dataflowkit.com/","link_text":"Dataflow Kit Blog"},{"description_text":"The Dubai Health Authority (DHA) leverages the DataFlow Group's specialized Primary Source Verification (PSV) solutions to screen the credentials of professionals working within Dubai's healthcare sector. The DHA is led by a mission to ensure access to health services, maintain and enhance the quality of these services, improve the health ...","link_href":"https://corp.dataflowgroup.com/verification-services/start-your-verification/healthcare/dubai-health-authority/","link_text":"Dubai Health Authority - Dataflow Group"},{"description_text":"Dataflow Kit Reloaded. We are so excited to introduce a new, completely re-implemented Dataflow Kit. In particular, we supplement our legacy custom web scraper with more focused and more understandable web services for our users.","link_href":"https://blog.dataflowkit.com/reloaded/","link_text":"Dataflow Kit Reloaded."},{"description_text":"The Dataflow Kit API allows embedding free COVID-19 live statistics web widget into sites. Methods provide data for the USA, Spain, or the World. Developers can access live statistics data through the DFK COVID-19 API for free. They can build widgets, mobile apps, or integrate them into other applications.","link_href":"https://www.programmableweb.com/api/dataflow-kit-rest-api-v1","link_text":"Dataflow Kit REST API v1 | ProgrammableWeb"},{"description_text":"Description. Dataflow kit is a Scraping framework for Gophers. DFK extracts structured data from web pages, following the specified extractors. It can be used in many ways for data mining, data processing or archiving.","link_href":"https://go.libhunt.com/dataflowkit-alternatives","link_text":"Dataflow kit Alternatives - Go Text Processing | LibHunt"},{"description_text":"Point, click and extract. Work on any interactive site Scrape a website behind a login form Extract data from multiple pages. Scrape infinite scrolled pages. Crawl details; Extract and follow lin","link_href":"https://www.startupranking.com/dataflow-kit","link_text":"Dataflow Kit - Fast extraction of structured data from ..."}],"schema":{"type":"object"}},"application/x-ndjson":{"example":"{\"description_text\":\"We offer Dataflow kit Proxies service to get around content download restrictions from specific websites or send requests through proxies to obtain country-specific versions of target websites. Just specify the target country from 100+ supported global locations to send your web/ SERPs scraping API requests.\",\"link_href\":\"https://dataflowkit.com/\",\"link_text\":\"Turn Websites into structured data /Dataflow kit\"}\n{\"description_text\":\"Dataflow kit. Dataflow kit (\\\"DFK\\\") is a Web Scraping framework for Gophers. It extracts data from web pages, following the specified CSS Selectors.\",\"link_href\":\"https://github.com/slotix/dataflowkit\",\"link_text\":\"GitHub - slotix/dataflowkit: Extract structured data from ...\"}\n{\"description_text\":\"The Department of Health - Abu Dhabi (DoH - Abu Dhabi) leverages the DataFlow Group's specialized Primary Source Verification (PSV) solutions to screen the credentials of professionals working within Abu Dhabi's healthcare sector. As the regulative body of the healthcare sector in Abu Dhabi, DoH - Abu Dhabi ensures excellence for the ...\",\"link_href\":\"https://corp.dataflowgroup.com/verification-services/start-your-verification/healthcare/department-of-health-abu-dhabi/\",\"link_text\":\"Department of Health - Abu Dhabi - Dataflow Group\"}\n{\"description_text\":\"Dataflow Kit Reloaded. We are so excited to introduce a new, completely re-implemented Dataflow Kit. In particular, we supplement our legacy custom web scraper with more focused and more understandable web services for our users.\",\"link_href\":\"https://blog.dataflowkit.com/\",\"link_text\":\"Dataflow Kit Blog\"}\n{\"description_text\":\"Coronavirus Developer Resource Center. COVID-19 APIs, SDKs, coverage, open source code and other related dev resources »\",\"link_href\":\"https://www.programmableweb.com/api/dataflow-kit-covid-19-tracking\",\"link_text\":\"Dataflow Kit COVID-19 Tracking API | ProgrammableWeb\"}\n{\"description_text\":\"Dataflow Kit Reloaded. We are so excited to introduce a new, completely re-implemented Dataflow Kit. In particular, we supplement our legacy custom web scraper with more focused and more understandable web services for our users.\",\"link_href\":\"https://blog.dataflowkit.com/reloaded/\",\"link_text\":\"Dataflow Kit Reloaded.\"}\n{\"description_text\":\"The Dubai Health Authority (DHA) leverages the DataFlow Group's specialized Primary Source Verification (PSV) solutions to screen the credentials of professionals working within Dubai's healthcare sector. The DHA is led by a mission to ensure access to health services, maintain and enhance the quality of these services, improve the health ...\",\"link_href\":\"https://corp.dataflowgroup.com/verification-services/start-your-verification/healthcare/dubai-health-authority/\",\"link_text\":\"Dubai Health Authority - Dataflow Group\"}\n{\"description_text\":\"Dataflow Kit Extract information from web sites with a visual point-and-click toolkit. Turn websites into useful data. Automate data workflows on the web, process, and transform data at any scale.\",\"link_href\":\"https://alternativeto.net/software/dataflow-kit/\",\"link_text\":\"Dataflow Kit Alternatives and Similar Websites and Apps ...\"}\n{\"description_text\":\"The Dataflow Kit API allows embedding free COVID-19 live statistics web widget into sites. Developers can access live statistics data through the DFK COVID-19 API for free. They can build widgets, mobile apps, or integrate them into other applications.\",\"link_href\":\"https://www.programmableweb.com/api/dataflow-kit-0\",\"link_text\":\"Dataflow Kit API | ProgrammableWeb\"}\n{\"description_text\":\"Coronavirus info widgets. Embed free COVID-19 live statistics web widget into your site.\",\"link_href\":\"https://covid-19.dataflowkit.com/\",\"link_text\":\"COVID-19 Coronavirus live statistics\"}\n"},"text/csv":{"example":"link_href,link_text,description_text\nhttps://dataflowkit.com/,Turn Websites into structured data /Dataflow kit,\"Dataflow kit visits the web on your behalf, processes Javascript driven pages in the cloud, return rendered HTML, capture screenshot or save as PDF. Dataflow Kit services. Headless Chrome as a service. We automate dynamic web content download using the Headless Chrome browser. ...\"\nhttps://github.com/slotix/dataflowkit,GitHub - slotix/dataflowkit: Extract structured data from ...,\"Dataflow kit (\"\"DFK\"\") is a Web Scraping framework for Gophers. It extracts data from web pages, following the specified CSS Selectors. You can use it in many ways for data mining, data processing or archiving.\"\nhttps://corp.dataflowgroup.com/verification-services/start-your-verification/healthcare/department-of-health-abu-dhabi/,Department of Health - Abu Dhabi - Dataflow Group,\"The Department of Health - Abu Dhabi (DoH - Abu Dhabi) leverages the DataFlow Group's specialized Primary Source Verification (PSV) solutions to screen the credentials of professionals working within Abu Dhabi's healthcare sector. As the regulative body of the healthcare sector in Abu Dhabi, DoH - Abu Dhabi ensures excellence for the ...\"\nhttps://blog.dataflowkit.com/,Dataflow Kit Blog,\"Dataflow Kit Reloaded. We are so excited to introduce a new, completely re-implemented Dataflow Kit. In particular, we supplement our legacy custom web scraper with more focused and more understandable web services for our users.\"\nhttps://www.programmableweb.com/api/dataflow-kit-covid-19-tracking,Dataflow Kit COVID-19 Tracking API | ProgrammableWeb,\"Coronavirus Developer Resource Center. COVID-19 APIs, SDKs, coverage, open source code and other related dev resources »\"\nhttps://blog.dataflowkit.com/reloaded/,Dataflow Kit Reloaded.,\"Dataflow Kit Reloaded. We are so excited to introduce a new, completely re-implemented Dataflow Kit. In particular, we supplement our legacy custom web scraper with more focused and more understandable web services for our users.\"\nhttps://corp.dataflowgroup.com/verification-services/start-your-verification/healthcare/dubai-health-authority/,Dubai Health Authority - Dataflow Group,\"The Dubai Health Authority (DHA) leverages the DataFlow Group's specialized Primary Source Verification (PSV) solutions to screen the credentials of professionals working within Dubai's healthcare sector. The DHA is led by a mission to ensure access to health services, maintain and enhance the quality of these services, improve the health ...\"\nhttps://alternativeto.net/software/dataflow-kit/,Dataflow Kit Alternatives and Similar Websites and Apps ...,\"Popular Alternatives to Dataflow Kit for Web, Windows, Mac, Linux, Software as a Service (SaaS) and more. Explore 25 websites and apps like Dataflow Kit, all suggested and ranked by the AlternativeTo user community.\"\nhttps://www.programmableweb.com/api/dataflow-kit-0,Dataflow Kit API | ProgrammableWeb,\"The Dataflow Kit API allows embedding free COVID-19 live statistics web widget into sites. Developers can access live statistics data through the DFK COVID-19 API for free. They can build widgets, mobile apps, or integrate them into other applications.\"\nhttps://covid-19.dataflowkit.com/,COVID-19 Coronavirus live statistics,Coronavirus info widgets. Embed free COVID-19 live statistics web widget into your site.\n"}},"description":"Returns data in the one of the follwing formats - JSON, JSON Lines, CSV, MS Excel, XML"},"400":{"content":{"text/plain; charset=utf-8":{"examples":{"invalidURL":{"summary":"Invalid request URL","value":"Invalid request URL"},"noFields":{"summary":"No fields to scrape","value":"No fields to scrape"}}}},"description":"Bad Request. Invalid payload specified."},"401":{"content":{"text/plain; charset=utf-8":{"examples":{"No_api":{"summary":"No API Key provided","value":"No API Key provided"},"invalid":{"summary":"Invalid API key","value":"Invalid API key"}}}},"description":"Unauthorized. `api_key` parameter is missed or incorrect"},"500":{"content":{"text/plain; charset=utf-8":{"examples":{"fetchFailed":{"summary":"Process fetch failed.","value":"Process fetch failed."},"singleProcessFailed":{"summary":"Create single process failed","value":"Create single process failed"}}}},"description":"Internal Server Error is a very general HTTP status code that means something has gone wrong on the web site's server."}},"summary":"Collect search results from search engines","tags":["serp"]}}},"components":{"responses":{"BadRequest":{"content":{"text/plain; charset=utf-8":{"examples":{"invalidURL":{"summary":"Invalid request URL","value":"Invalid request URL"},"noFields":{"summary":"No fields to scrape","value":"No fields to scrape"}}}},"description":"Bad Request. Invalid payload specified."},"InternalServerError":{"content":{"text/plain; charset=utf-8":{"examples":{"fetchFailed":{"summary":"Process fetch failed.","value":"Process fetch failed."},"singleProcessFailed":{"summary":"Create single process failed","value":"Create single process failed"}}}},"description":"Internal Server Error is a very general HTTP status code that means something has gone wrong on the web site's server."},"NotFound":{"content":{"text/plain; charset=utf-8":{"example":"404. Not found"}},"description":"The specified resource was not found"},"Unauthorized":{"content":{"text/plain; charset=utf-8":{"examples":{"No_api":{"summary":"No API Key provided","value":"No API Key provided"},"invalid":{"summary":"Invalid API key","value":"Invalid API key"}}}},"description":"Unauthorized. `api_key` parameter is missed or incorrect"}},"schemas":{"Paginator":{"description":"Specify _Next link_ paginator on pages containing a link pointing to the next page. The next page link is extracted from a document by querying href attribute of a given element's CSS selector.","properties":{"nextPageSelector":{"example":".page-link","type":"string"},"pageNum":{"example":10,"type":"integer"}},"type":"object"},"action":{"anyOf":[{"description":"Sets the value of an input field as if you had typed it in. You can also set the value of combo boxes, checkboxes, etc., using this action. In these cases, the value must be the value of the selected option, not visible text.","properties":{"ignoreIfNotPresent":{"example":false,"type":"boolean"},"selector":{"description":"Must be a valid CSS Selector","example":"#search-form-editbox","type":"string"},"value":{"description":"The value to input.","example":"web scraper","type":"string"}},"title":"input","type":"object"},{"description":"The Send Keys action simulates real user input of key by key into a given string. It mimics real user behavior, such as the inability to type into invisible or read-only DOM elements. This action is useful for cases where explicit keystroke events are required, like auto-completing combo boxes. Unlike a similar 'input' action, which forces a specified value directly into an input selector, this action does not overwrite existing content.","properties":{"ignoreIfNotPresent":{"example":false,"type":"boolean"},"selector":{"description":"Must be a valid CSS Selector","example":"#search-form-editbox","type":"string"},"value":{"description":"Sequence of keys to send. Keys can include keystrokes such as ALT+A, ENTER, BACKSPACE, etc.","example":"web scraper","type":"string"}},"title":"sendKeys","type":"object"},{"description":"Clicks on a target element (such as a link, button, checkbox, or radio button) with specified CSS Selector.","properties":{"ignoreIfNotPresent":{"description":"This optional parameter is useful when the target element occasionally may not be present in the DOM.","example":false,"type":"boolean"},"selector":{"description":"Must be a valid CSS Selector","example":".click-me","type":"string"},"skipLastIteration":{"description":"It is only used for click action inside a loop only. Skips the last iteration.","example":true,"type":"boolean"}},"title":"click","type":"object"},{"description":"Double clicks on a target element (such as a link, button, checkbox, or radio button) with specified CSS Selector.","properties":{"ignoreIfNotPresent":{"description":"This optional parameter is useful when the target element occasionally may not be present in the DOM.","example":false,"type":"boolean"},"selector":{"description":"Must be a valid CSS Selector","example":".double-click-me","type":"string"},"skipLastIteration":{"description":"It is only used for click action inside a loop only. Skips the last iteration.","example":true,"type":"boolean"}},"title":"doubleClick","type":"object"},{"description":"Click on an element with the specified CSS Selector. JS Click internally invokes a script (Javascript) that clicks the element.","properties":{"ignoreIfNotPresent":{"description":"This optional parameter is useful when the target element occasionally may not be present in the DOM.","example":false,"type":"boolean"},"selector":{"description":"Must be a valid CSS Selector for the target element.","example":".js-click-me","type":"string"},"skipLastIteration":{"description":"It is only used for click action inside a loop only. Skips the last iteration.","example":true,"type":"boolean"}},"title":"jsclick","type":"object"},{"description":"Submit the specified form. This action is useful for forms without explicit submit buttons, such as single-input Search forms.","properties":{"selector":{"description":"Must be an any valid CSS Selector inside the parent form to submit.","example":".some-element-inside-form","type":"string"}},"title":"submit","type":"object"},{"description":"Wait for the target element to become visible on the page.","properties":{"selector":{"description":"Must be a valid CSS Selector for the target element.","example":":root","type":"string"}},"title":"waitVisible","type":"object"},{"description":"Wait for the target element to become invisible on the page.","properties":{"selector":{"description":"Must be a valid CSS Selector for the target element.","example":"#some-element","type":"string"}},"title":"waitNotVisible","type":"object"},{"description":"Wait for the specified amount of time.","properties":{"waitDelay":{"description":"Wait time (in milliseconds).","example":"5000","type":"string"}},"title":"pause","type":"object"},{"description":"Executes the Javascript passes as 'script' parameter","properties":{"script":{"description":"The JavaScript snippet to run","example":"console.log(\"It works!\")","type":"string"}},"title":"execute","type":"object"},{"description":"Loop action combines a set of actions and executes it as many times as specified in the \"times\" parameter.","properties":{"actions":{"default":[],"description":"list of actions combined in the loop are executed step-by-step","items":{"$ref":"#/components/schemas/action"},"type":"array"},"times":{"description":"the number of times to execute the wrapped actions within the 'loop .. times' construction.","example":5,"type":"number"}},"title":"loop .. times","type":"object"},{"description":"Sometimes it is necessary to retrieve the HTML content of a web page multiple times in a single request. This action is for that.","properties":{"skipLastIteration":{"description":"It is only used for loop actions only. Skips the last iteration.","example":true,"type":"boolean"}},"title":"getcontent","type":"object"},{"description":"Scroll a page down to load more content, simulating user interaction with infinite scrolled pages. Or specify the element's CSS Selector to click for loading more content.","properties":{"scrollByPixels":{"description":"Scrolls a web page by the number of pixels specified by 'scrollByPixels' parameter.","example":650,"type":"number"},"scrollingElementSelector":{"description":"Optionally specify here a valid CSS Selector of scrolling element.","example":"#scroll-panel","type":"string"},"selector":{"description":"Some websites require clicking 'More' button while scrolling a page. Put here 'More' button valid CSS Selector.","example":".more-button","type":"string"},"times":{"description":"The number of times to scroll down a web page.","example":3,"type":"integer"}},"title":"scroll","type":"object"}],"title":"Action","type":"object"},"fetchrequest":{"example":{"actions":[{"waitFor":{"waitForSelector":":root"}}],"output":"buffer","proxy":"country-any","type":"base","url":"https://ipapi.co/json/"},"properties":{"actions":{"default":[],"description":"Use actions to automate manual workflows while rendering web pages. They simulate real-world human interaction with pages. _(Chrome fetcher type only)_","items":{"$ref":"#/components/schemas/action"},"type":"array"},"ignoreHTTPStatusErrCodes":{"description":"The HTTP 200 OK success status response code indicates that the request has succeeded. Sometimes a server returns normal HTML content even with an erroneous Non-200 HTTP response status code. The IgnoreHTTPStatusCode option is useful when you need to force the return of HTML content. Defaults to \"false.\"","type":"boolean"},"initialCookies":{"default":[],"description":"The \"Initial Cookies\" option is useful for crawling websites that require a login. The simplest solution to get an array of cookies for specific websites is to use a web browser \"EditThisCookie\" extension. Copy a cookie array with \"EditThisCookie\" and paste it into the \"Initial cookie\" field.","items":{"$ref":"#/components/schemas/initialCookie"},"type":"array"},"output":{"default":"buffer","description":"If set to _file_, the content of downloaded HTML is uploaded to Dataflow Kit Storage first. Then the link to this file is returned. Overwise, downloaded content is returned in the response body.","enum":["buffer","file"],"type":"string"},"proxy":{"description":"Specify proxy by adding [country ISO code](https://en.wikipedia.org/wiki/ISO_3166-2) to `country-` value to send requests through a proxy in the specified country. Use `country-any` to use random geo-targets.","example":"country-sk","type":"string"},"type":{"description":"If set to `base`, the Base fetcher is used for downloading web page content. Use `chrome` for fetching content with a Headless chrome browser. If omitted `base` fetcher is used by default.","enum":["base","chrome"],"type":"string"},"url":{"description":"Specify URL to download.","type":"string"},"waitDelay":{"description":"Specify a wait delay (in seconds). This may be useful if certain elements of the web site need to be rendered after the initial page load. _(Chrome fetcher type only)_","type":"number"}},"required":["type","url"],"title":"Fetch request","type":"object"},"field":{"properties":{"attrs":{"description":"A set of attributes to extract from a Field. Find more information about attributes","items":{"enum":["text","href","src","alt"],"type":"string"},"type":"array"},"details":{"allOf":[{"$ref":"#/components/schemas/parserequest"}],"description":"Details themself represent independent Parse request that extracts data from linked pages."},"filters":{"description":"Filters are used to pre-processing of text data when extracting.","items":{"anyOf":[{"properties":{"name":{"enum":["trim","normal","uppercase","lowercase","capitalize","concatinate"],"type":"string"}},"type":"object"},{"properties":{"name":{"example":"regex","type":"string"},"param":{"example":"[\\\\d.]+","type":"string"}},"type":"object"}]},"type":"array"},"name":{"description":"Field name is used to aggregate results.","type":"string"},"selector":{"description":"Selector represents a CSS selector for data extraction within the given block.","example":"#cards a","type":"string"},"type":{"description":"Selector type. ( 0 - image, 1 - text, 2 - link)","enum":[0,1,2],"type":"integer"}},"required":["attrs","name","selector","type"],"title":"Field","type":"object"},"initialCookie":{"description":"InitialCookie structure keep cookies that optionally can be passed to the new fetcher crawl a website that requires a login. Generate Cookies array with EditThisCookie chrome extension.","properties":{"domain":{"example":".twitter.com","type":"string"},"expirationDate":{"example":1762900726.409761,"type":"number"},"hostOnly":{"example":false,"type":"boolean"},"httpOnly":{"example":false,"type":"boolean"},"id":{"example":1,"type":"number"},"name":{"example":"auth_token","type":"string"},"path":{"example":"/","type":"string"},"sameSite":{"enum":["unspecified","strict","lax","no_restriction"],"type":"string"},"secure":{"example":true,"type":"boolean"},"session":{"example":true,"type":"boolean"},"storeID":{"example":"1","type":"string"},"value":{"example":"46fd9fed1ab8b0b0e231ac3f","type":"string"}},"title":"initialCookie","type":"object"},"parserequest":{"example":{"fields":[{"attrs":["text"],"filters":[{"name":"trim"}],"name":"Number","selector":".badge-primary","type":1},{"attrs":["href","text"],"filters":[{"name":"trim"}],"name":"Name","selector":"#cards a","type":2},{"attrs":["src","alt"],"filters":[{"name":"trim"}],"name":"Picture","selector":".card-img-top","type":0}],"format":"json","name":"test.dataflowkit.com","paginator":{"nextPageSelector":".page-item:nth-child(2) .page-link","pageNum":2},"path":false,"request":{"type":"chrome","url":"https://test.dataflowkit.com/persons/page-0"}},"properties":{"commonParent":{"description":"Specifies common ancestor block for a set of fields used to extract data from a web page. _(CSS Selector)_","example":".common-block","type":"string"},"fields":{"description":"Define a set of fields used to extract data from a web page. A Field represents a given chunk of extracted data from every block on each page.\n","items":{"$ref":"#/components/schemas/field"},"type":"array"},"format":{"description":"Extracted data is returned either in CSV, MS Excel, JSON, JSON(Lines) or XML format.","enum":["csv","json","jsonl","excel","xml"],"title":"Format","type":"string"},"name":{"description":"Collection name.","type":"string"},"paginator":{"$ref":"#/components/schemas/Paginator"},"path":{"default":false,"description":"Path is a special parameter specifying navigation pages only. It collects information from detailed pages. No results from the current page return. Defaults to false.","title":"Path","type":"boolean"},"request":{"$ref":"#/components/schemas/fetchrequest"}},"required":["fields","format","name","proxy","type","url"],"title":"Parse request","type":"object"},"serprequest":{"example":{"commonParent":"div[id].result","fields":[{"attrs":["href","text"],"filters":[{"name":"trim"}],"name":"link","selector":".result__a","type":2},{"attrs":["text"],"filters":[{"name":"trim"}],"name":"description","selector":".js-result-snippet","type":1}],"format":"json","name":"duckduckgo","request":{"proxy":"country-any","type":"chrome","url":"https://duckduckgo.com/?q=Dataflow+kit&ia=web"}},"properties":{"fields":{"description":"Specify CSS selectors (patterns) used to gather data from Search Engine Result Pages.\n\nReady-to-use payloads for collecting search results from the most popular Search Engines are available. These payloads are customizable, though.\n","items":{"$ref":"#/components/schemas/field"},"type":"array"},"format":{"description":"Extracted data is returned either in CSV, MS Excel, JSON, JSON(Lines) or XML format.","enum":["csv","json","jsonl","excel","xml"],"title":"Format","type":"string"},"name":{"description":"Collection name.","type":"string"},"pageNum":{"default":1,"description":"Specify number of pages to crawl.","type":"integer"},"proxy":{"description":"Always specify proxy for sending SERP requests. Add choosen [country ISO code](https://en.wikipedia.org/wiki/ISO_3166-2) to `country-` value to send requests through a proxy in the specified country. Use `country-any` to use random geo-targets.","example":"country-any","type":"string"},"type":{"description":"For SERP requests you should _always_ use `chrome` type to fetch content with a Headless chrome browser","example":"chrome","type":"string"},"url":{"description":"url holds the li