@llml-browser/types
Version:
TypeScript types and schemas for the @llml-browser API
1 lines • 6.54 kB
Source Map (JSON)
{"version":3,"sources":["../src/services/html-cleaning/types.ts"],"sourcesContent":["import { z } from 'zod';\n\n/**\n * Schema for HTML cleaning configuration options.\n * Defines the validation rules for HTML sanitization parameters.\n */\nexport const HTMLCleaningOptionsSchema = z\n .object({\n allowedHTMLTags: z.array(z.string()).optional(),\n disallowedHTMLTags: z.array(z.string()).optional(),\n extractMainContent: z.boolean().optional().default(true),\n /* Deprecated property, will be removed in future. add baseUrl to HTMLCleaning as a required parameter instead*/\n documentBaseUrl: z.string().optional(),\n removeBase64Images: z.boolean().optional().default(true),\n })\n .strict();\n\n/**\n * Configuration options for HTML content cleaning and sanitization.\n * Controls which elements are preserved or removed during processing.\n *\n * @property allowedHTMLTags - HTML tags to preserve in the output (whitelist)\n * @property disallowedHTMLTags - HTML tags to remove from the output (blacklist)\n * @property extractMainContent - Whether to extract only the main content area, removing navigation, footers, etc.\n * @property documentBaseUrl - Base URL for resolving relative URLs (deprecated, use baseUrl parameter instead)\n * @property removeBase64Images - Whether to remove base64 encoded images to reduce payload size\n *\n * @example\n * ```typescript\n * const cleaningOptions: HTMLCleaningOptions = {\n * allowedHTMLTags: ['p', 'h1', 'h2', 'h3', 'ul', 'ol', 'li', 'a', 'strong', 'em'],\n * disallowedHTMLTags: ['script', 'style', 'iframe', 'form', 'button'],\n * extractMainContent: true,\n * documentBaseUrl: 'https://example.com',\n * removeBase64Images: true\n * };\n * ```\n */\nexport type HTMLCleaningOptions = z.infer<typeof HTMLCleaningOptionsSchema>;\n\n/**\n * Schema for defining patterns to match DOM elements.\n * Used to create selectors for targeting specific elements during HTML processing.\n */\nexport const ElementPatternSchema = z\n .object({\n tag: z.union([z.string(), z.instanceof(RegExp)]).optional(),\n attributes: z\n .array(\n z\n .object({\n name: z.union([z.string(), z.instanceof(RegExp)]),\n value: z.union([z.string(), z.instanceof(RegExp)]).optional(),\n })\n .strict(),\n )\n .optional(),\n classNames: z.array(z.union([z.string(), z.instanceof(RegExp)])).optional(),\n ids: z.array(z.union([z.string(), z.instanceof(RegExp)])).optional(),\n })\n .strict();\n\n/**\n * Defines a pattern for matching DOM elements based on their properties.\n * Used for targeted element selection during HTML sanitization.\n *\n * @property tag - Element tag name or regex pattern to match (e.g., 'div', /^h[1-6]$/)\n * @property attributes - List of attribute patterns to match by name and optional value\n * @property classNames - List of class names or regex patterns to match in the class attribute\n * @property ids - List of ID patterns to match in the id attribute\n *\n * @example\n * ```typescript\n * const mainContentPattern: ElementPattern = {\n * tag: 'div',\n * attributes: [{ name: 'data-role', value: 'content' }],\n * classNames: ['main-content', /^content-/],\n * ids: ['main', 'article-body']\n * };\n *\n * const navigationPattern: ElementPattern = {\n * tag: /^nav$/i,\n * classNames: [/nav/, /menu/, /header/],\n * ids: ['navigation', 'main-menu']\n * };\n * ```\n */\nexport type ElementPattern = z.infer<typeof ElementPatternSchema>;\n\n/**\n * Schema for metrics collected during HTML cleaning.\n * Tracks performance and effectiveness of the cleaning process.\n */\nexport const HTMLCleaningMetricsSchema = z.object({\n inputSize: z.number(),\n outputSize: z.number(),\n compressionRatio: z.number(),\n});\n\n/**\n * Metrics collected during HTML cleaning and sanitization.\n * Provides insights into the effectiveness of the cleaning process.\n *\n * @property inputSize - Size of input HTML in bytes before cleaning\n * @property outputSize - Size of cleaned HTML in bytes after processing\n * @property compressionRatio - Ratio of output size to input size (lower is better)\n *\n * @example\n * ```typescript\n * const metrics: HTMLCleaningMetrics = {\n * inputSize: 125000, // 125KB input\n * outputSize: 42000, // 42KB output\n * compressionRatio: 0.336 // 33.6% of original size\n * };\n * ```\n */\nexport type HTMLCleaningMetrics = z.infer<typeof HTMLCleaningMetricsSchema>;\n\n/**\n * Schema for the result of HTML cleaning operation.\n * Defines the structure of the cleaning operation output.\n */\nexport const HTMLCleaningResultSchema = z.object({\n cleanedHtml: z.string(),\n metrics: HTMLCleaningMetricsSchema.optional(),\n});\n\n/**\n * Result of HTML cleaning and sanitization.\n * Contains the processed HTML and optional performance metrics.\n *\n * @property cleanedHtml - Cleaned and sanitized HTML content\n * @property metrics - Optional metrics collected during the cleaning process\n *\n * @example\n * ```typescript\n * const result: HTMLCleaningResult = {\n * cleanedHtml: \"<div><h1>Article Title</h1><p>Clean content...</p></div>\",\n * metrics: {\n * inputSize: 125000,\n * outputSize: 42000,\n * compressionRatio: 0.336\n * }\n * };\n * ```\n */\nexport type HTMLCleaningResult = z.infer<typeof HTMLCleaningResultSchema>;\n"],"mappings":";;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,SAAS,SAAS;AAMX,IAAM,4BAA4B,EACtC,OAAO;AAAA,EACN,iBAAiB,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,SAAS;AAAA,EAC9C,oBAAoB,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,SAAS;AAAA,EACjD,oBAAoB,EAAE,QAAQ,EAAE,SAAS,EAAE,QAAQ,IAAI;AAAA;AAAA,EAEvD,iBAAiB,EAAE,OAAO,EAAE,SAAS;AAAA,EACrC,oBAAoB,EAAE,QAAQ,EAAE,SAAS,EAAE,QAAQ,IAAI;AACzD,CAAC,EACA,OAAO;AA6BH,IAAM,uBAAuB,EACjC,OAAO;AAAA,EACN,KAAK,EAAE,MAAM,CAAC,EAAE,OAAO,GAAG,EAAE,WAAW,MAAM,CAAC,CAAC,EAAE,SAAS;AAAA,EAC1D,YAAY,EACT;AAAA,IACC,EACG,OAAO;AAAA,MACN,MAAM,EAAE,MAAM,CAAC,EAAE,OAAO,GAAG,EAAE,WAAW,MAAM,CAAC,CAAC;AAAA,MAChD,OAAO,EAAE,MAAM,CAAC,EAAE,OAAO,GAAG,EAAE,WAAW,MAAM,CAAC,CAAC,EAAE,SAAS;AAAA,IAC9D,CAAC,EACA,OAAO;AAAA,EACZ,EACC,SAAS;AAAA,EACZ,YAAY,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,OAAO,GAAG,EAAE,WAAW,MAAM,CAAC,CAAC,CAAC,EAAE,SAAS;AAAA,EAC1E,KAAK,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,OAAO,GAAG,EAAE,WAAW,MAAM,CAAC,CAAC,CAAC,EAAE,SAAS;AACrE,CAAC,EACA,OAAO;AAiCH,IAAM,4BAA4B,EAAE,OAAO;AAAA,EAChD,WAAW,EAAE,OAAO;AAAA,EACpB,YAAY,EAAE,OAAO;AAAA,EACrB,kBAAkB,EAAE,OAAO;AAC7B,CAAC;AAyBM,IAAM,2BAA2B,EAAE,OAAO;AAAA,EAC/C,aAAa,EAAE,OAAO;AAAA,EACtB,SAAS,0BAA0B,SAAS;AAC9C,CAAC;","names":[]}