UNPKG

pomljs

Version:

Prompt Orchestration Markup Language

1 lines 7.83 kB
{"version":3,"file":"webpage.cjs","sources":["../../.build/components/webpage.js"],"sourcesContent":["import * as React from 'react';\nimport fs from '../util/fs';\nimport { component, expandRelative, useWithCatch } from \"../base\";\nimport { Text } from \"../essentials\";\nimport * as cheerio from 'cheerio';\nimport { htmlToPoml } from './document';\nasync function fetchWebpage(url) {\n try {\n const response = await fetch(url);\n if (!response.ok) {\n throw new Error(`HTTP error! Status: ${response.status}`);\n }\n return await response.text();\n }\n catch (error) {\n throw new Error(`Error fetching webpage from ${url}: ${error}`);\n }\n}\nasync function extractTextFromHtml(html, selector) {\n const $ = cheerio.load(html);\n // Remove scripts and styles\n $('script').remove();\n $('style').remove();\n // If selector is provided, extract content from matching elements\n if (selector) {\n try {\n const elements = $(selector);\n if (elements.length === 0) {\n return `No elements found matching selector: ${selector}`;\n }\n return elements\n .map((_, el) => $(el).text())\n .get()\n .join('\\n\\n');\n }\n catch (error) {\n throw new Error(`Error with selector \"${selector}\": ${error}`);\n }\n }\n // Get text from body, preserving some structure\n return $('body').text().trim() || '';\n}\nasync function processWebpage(props) {\n const { src, url, buffer, extractText = false, selector } = props;\n let html;\n if (url) {\n html = await fetchWebpage(url);\n }\n else if (src) {\n const filePath = expandRelative(src);\n html = fs.readFileSync(filePath, 'utf-8');\n }\n else if (buffer) {\n if (typeof buffer === 'string') {\n html = buffer;\n }\n else {\n html = buffer.toString('utf-8');\n }\n }\n else {\n throw new Error('Either url, src, or buffer must be provided');\n }\n if (extractText) {\n const text = await extractTextFromHtml(html, selector);\n return React.createElement(Text, { whiteSpace: \"pre\" }, text);\n }\n else {\n // Use the htmlToPoml function to convert HTML to POML components\n const $ = cheerio.load(html);\n let content;\n if (selector) {\n const selected = $(selector);\n if (selected.length === 0) {\n return React.createElement(Text, null,\n \"No elements found matching selector: \",\n selector);\n }\n content = htmlToPoml(selected, $, props);\n }\n else {\n content = htmlToPoml($('body'), $, props);\n }\n return content;\n }\n}\n/**\n * Displays content from a webpage.\n *\n * @param {string} url - The URL of the webpage to fetch and display.\n * @param {string} src - Local file path to an HTML file to display.\n * @param {string|Buffer} buffer - HTML content as string or buffer.\n * @param {string} base64 - Base64 encoded HTML content.\n * @param {boolean} extractText - Whether to extract plain text content (true) or convert HTML to structured POML (false). Default is false.\n * @param {string} selector - CSS selector to extract specific content from the page (e.g., \"article\", \".content\", \"#main\"). Default is \"body\".\n *\n * @see {@link Inline} for other props available.\n *\n * @example\n * Display content from a URL:\n * ```xml\n * <webpage url=\"https://example.com\" />\n * ```\n *\n * Extract only specific content using a selector:\n * ```xml\n * <webpage url=\"https://example.com\" selector=\"main article\" />\n * ```\n *\n * Convert HTML to structured POML components:\n * ```xml\n * <webpage url=\"https://example.com\" extractText=\"false\" />\n * ```\n */\nexport const Webpage = component('Webpage', { asynchorous: true })((props) => {\n let { src, url, buffer, base64, extractText, selector, ...others } = props;\n if (base64) {\n if (buffer !== undefined) {\n throw new Error('Either buffer or base64 should be provided, not both.');\n }\n buffer = Buffer.from(base64, 'base64');\n }\n const content = useWithCatch(processWebpage({ ...props, buffer: buffer }), others);\n return React.createElement(Text, { ...others }, content ?? null);\n});\n//# sourceMappingURL=webpage.js.map"],"names":["cheerio","expandRelative","fs","React","Text","htmlToPoml","component","useWithCatch"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAMA,eAAe,YAAY,CAAC,GAAG,EAAE;AACjC,IAAI,IAAI;AACR,QAAQ,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC;AACzC,QAAQ,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE;AAC1B,YAAY,MAAM,IAAI,KAAK,CAAC,CAAC,oBAAoB,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC;AACrE;AACA,QAAQ,OAAO,MAAM,QAAQ,CAAC,IAAI,EAAE;AACpC;AACA,IAAI,OAAO,KAAK,EAAE;AAClB,QAAQ,MAAM,IAAI,KAAK,CAAC,CAAC,4BAA4B,EAAE,GAAG,CAAC,EAAE,EAAE,KAAK,CAAC,CAAC,CAAC;AACvE;AACA;AACA,eAAe,mBAAmB,CAAC,IAAI,EAAE,QAAQ,EAAE;AACnD,IAAI,MAAM,CAAC,GAAGA,kBAAO,CAAC,IAAI,CAAC,IAAI,CAAC;AAChC;AACA,IAAI,CAAC,CAAC,QAAQ,CAAC,CAAC,MAAM,EAAE;AACxB,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE;AACvB;AACA,IAAI,IAAI,QAAQ,EAAE;AAClB,QAAQ,IAAI;AACZ,YAAY,MAAM,QAAQ,GAAG,CAAC,CAAC,QAAQ,CAAC;AACxC,YAAY,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE;AACvC,gBAAgB,OAAO,CAAC,qCAAqC,EAAE,QAAQ,CAAC,CAAC;AACzE;AACA,YAAY,OAAO;AACnB,iBAAiB,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE;AAC5C,iBAAiB,GAAG;AACpB,iBAAiB,IAAI,CAAC,MAAM,CAAC;AAC7B;AACA,QAAQ,OAAO,KAAK,EAAE;AACtB,YAAY,MAAM,IAAI,KAAK,CAAC,CAAC,qBAAqB,EAAE,QAAQ,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC,CAAC;AAC1E;AACA;AACA;AACA,IAAI,OAAO,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,IAAI,EAAE;AACxC;AACA,eAAe,cAAc,CAAC,KAAK,EAAE;AACrC,IAAI,MAAM,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,EAAE,WAAW,GAAG,KAAK,EAAE,QAAQ,EAAE,GAAG,KAAK;AACrE,IAAI,IAAI,IAAI;AACZ,IAAI,IAAI,GAAG,EAAE;AACb,QAAQ,IAAI,GAAG,MAAM,YAAY,CAAC,GAAG,CAAC;AACtC;AACA,SAAS,IAAI,GAAG,EAAE;AAClB,QAAQ,MAAM,QAAQ,GAAGC,mBAAc,CAAC,GAAG,CAAC;AAC5C,QAAQ,IAAI,GAAGC,aAAE,CAAC,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC;AACjD;AACA,SAAS,IAAI,MAAM,EAAE;AACrB,QAAQ,IAAI,OAAO,MAAM,KAAK,QAAQ,EAAE;AACxC,YAAY,IAAI,GAAG,MAAM;AACzB;AACA,aAAa;AACb,YAAY,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC;AAC3C;AACA;AACA,SAAS;AACT,QAAQ,MAAM,IAAI,KAAK,CAAC,6CAA6C,CAAC;AACtE;AACA,IAAI,IAAI,WAAW,EAAE;AACrB,QAAQ,MAAM,IAAI,GAAG,MAAM,mBAAmB,CAAC,IAAI,EAAE,QAAQ,CAAC;AAC9D,QAAQ,OAAOC,gBAAK,CAAC,aAAa,CAACC,eAAI,EAAE,EAAE,UAAU,EAAE,KAAK,EAAE,EAAE,IAAI,CAAC;AACrE;AACA,SAAS;AACT;AACA,QAAQ,MAAM,CAAC,GAAGJ,kBAAO,CAAC,IAAI,CAAC,IAAI,CAAC;AACpC,QAAQ,IAAI,OAAO;AACnB,QAAQ,IAAI,QAAQ,EAAE;AACtB,YAAY,MAAM,QAAQ,GAAG,CAAC,CAAC,QAAQ,CAAC;AACxC,YAAY,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE;AACvC,gBAAgB,OAAOG,gBAAK,CAAC,aAAa,CAACC,eAAI,EAAE,IAAI;AACrD,oBAAoB,uCAAuC;AAC3D,oBAAoB,QAAQ,CAAC;AAC7B;AACA,YAAY,OAAO,GAAGC,mBAAU,CAAC,QAAQ,EAAE,CAAC,EAAE,KAAK,CAAC;AACpD;AACA,aAAa;AACb,YAAY,OAAO,GAAGA,mBAAU,CAAC,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,KAAK,CAAC;AACrD;AACA,QAAQ,OAAO,OAAO;AACtB;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACuBC,cAAS,CAAC,SAAS,EAAE,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,KAAK,KAAK;AAC9E,IAAI,IAAI,EAAE,GAAG,EAAE,GAAG,EAAE,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,QAAQ,EAAE,GAAG,MAAM,EAAE,GAAG,KAAK;AAC9E,IAAI,IAAI,MAAM,EAAE;AAChB,QAAQ,IAAI,MAAM,KAAK,SAAS,EAAE;AAClC,YAAY,MAAM,IAAI,KAAK,CAAC,uDAAuD,CAAC;AACpF;AACA,QAAQ,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM,EAAE,QAAQ,CAAC;AAC9C;AACA,IAAI,MAAM,OAAO,GAAGC,iBAAY,CAAC,cAAc,CAAC,EAAE,GAAG,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM,CAAC;AACtF,IAAI,OAAOJ,gBAAK,CAAC,aAAa,CAACC,eAAI,EAAE,EAAE,GAAG,MAAM,EAAE,EAAE,OAAO,IAAI,IAAI,CAAC;AACpE,CAAC;;"}