UNPKG

@llml-browser/types

Version:

TypeScript types and schemas for the @llml-browser API

1 lines 1.91 kB
{"version":3,"sources":["../src/services/cheerio/types.ts"],"sourcesContent":["import type { MetaFiles } from '@/routers/links/types';\nimport type { PageMetadata } from '@/services/metadata/types';\n\n/**\n * Represents data scraped from a webpage using Cheerio.\n * Contains various extracted elements and metadata from the target page.\n *\n * @interface ScrapedData\n *\n * @property title - The title of the webpage extracted from the title tag\n * @property rawHTML - The original unmodified HTML content of the webpage\n * @property description - The meta description of the webpage\n * @property metadata - Optional structured metadata extracted from the page (OpenGraph, Twitter Cards, etc.)\n * @property cleanedHtml - Optional sanitized version of the HTML with unnecessary elements removed\n * @property metaFiles - Optional metadata files like robots.txt and sitemap.xml\n *\n * @example\n * ```typescript\n * const scrapedData: ScrapedData = {\n * title: \"Example Website - Home Page\",\n * rawHTML: \"<html><head><title>Example Website - Home Page</title></head><body>...</body></html>\",\n * description: \"This is an example website demonstrating web scraping capabilities.\",\n * metadata: {\n * title: \"Example Website - Home Page\",\n * description: \"This is an example website demonstrating web scraping capabilities.\",\n * ogTitle: \"Example Website\",\n * // other metadata properties\n * },\n * cleanedHtml: \"<div><h1>Example Website</h1><p>Main content...</p></div>\",\n * metaFiles: {\n * robots: \"User-agent: *\\nDisallow: /admin/\",\n * sitemap: \"<?xml version=\\\"1.0\\\"?><urlset>...</urlset>\"\n * }\n * };\n * ```\n */\nexport interface ScrapedData {\n title: string;\n rawHtml: string;\n description: string;\n metadata?: PageMetadata;\n cleanedHtml?: string;\n metaFiles?: MetaFiles;\n}\n"],"mappings":";AAAA;","names":[]}