UNPKG

@pipedream/dataforseo

Version:

Pipedream DataForSEO Components

72 lines (70 loc) 2.29 kB
import { parseObjectEntries } from "../../common/utils.mjs"; import dataforseo from "../../dataforseo.app.mjs"; export default { key: "dataforseo-parse-page-content", name: "Parse Page Content", description: "Parse the content on any page and return its structured content. [See the documentation](https://docs.dataforseo.com/v3/on_page/content_parsing/live/)", version: "0.0.1", type: "action", methods: { parsePageContent(args = {}) { return this.dataforseo._makeRequest({ path: "/on_page/content_parsing/live", method: "post", ...args, }); }, }, props: { dataforseo, url: { type: "string", label: "URL", description: "The URL of the page to parse, e.g. `https://pipedream.com/`", }, customUserAgent: { type: "string", label: "Custom User Agent", description: "Custom user agent for crawling a website. Default is `Mozilla/5.0 (compatible; RSiteAuditor)`", optional: true, }, storeRawHtml: { type: "boolean", label: "Store Raw HTML", description: "Set to `true` if you want to get the HTML of the page using the [https://docs.dataforseo.com/v3/on_page/raw_html/](OnPage Raw HTML endpoint)", optional: true, }, enableJavascript: { type: "boolean", label: "Enable Javascript", description: "Set to `true` if you want to load the scripts available on a page", optional: true, }, additionalOptions: { propDefinition: [ dataforseo, "additionalOptions", ], description: "Additional parameters to send in the request. [See the documentation](https://docs.dataforseo.com/v3/on_page/content_parsing/live/) for all available parameters. Values will be parsed as JSON where applicable.", }, }, async run({ $ }) { const response = await this.parsePageContent({ $, data: [ { url: this.url, custom_user_agent: this.customUserAgent, store_raw_html: this.storeRawHtml, enable_javascript: this.enableJavascript, ...parseObjectEntries(this.additionalOptions), }, ], }); $.export("$summary", "Successfully parsed page content"); return response; }, };