UNPKG

@llml-browser/types

Version:

TypeScript types and schemas for the @llml-browser API

212 lines (208 loc) 8.64 kB
import { z } from 'zod'; /** * Schema for HTML cleaning configuration options. * Defines the validation rules for HTML sanitization parameters. */ declare const HTMLCleaningOptionsSchema: z.ZodObject<{ allowedHTMLTags: z.ZodOptional<z.ZodArray<z.ZodString, "many">>; disallowedHTMLTags: z.ZodOptional<z.ZodArray<z.ZodString, "many">>; extractMainContent: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>; documentBaseUrl: z.ZodOptional<z.ZodString>; removeBase64Images: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>; }, "strict", z.ZodTypeAny, { extractMainContent: boolean; removeBase64Images: boolean; allowedHTMLTags?: string[] | undefined; disallowedHTMLTags?: string[] | undefined; documentBaseUrl?: string | undefined; }, { allowedHTMLTags?: string[] | undefined; disallowedHTMLTags?: string[] | undefined; extractMainContent?: boolean | undefined; documentBaseUrl?: string | undefined; removeBase64Images?: boolean | undefined; }>; /** * Configuration options for HTML content cleaning and sanitization. * Controls which elements are preserved or removed during processing. * * @property allowedHTMLTags - HTML tags to preserve in the output (whitelist) * @property disallowedHTMLTags - HTML tags to remove from the output (blacklist) * @property extractMainContent - Whether to extract only the main content area, removing navigation, footers, etc. * @property documentBaseUrl - Base URL for resolving relative URLs (deprecated, use baseUrl parameter instead) * @property removeBase64Images - Whether to remove base64 encoded images to reduce payload size * * @example * ```typescript * const cleaningOptions: HTMLCleaningOptions = { * allowedHTMLTags: ['p', 'h1', 'h2', 'h3', 'ul', 'ol', 'li', 'a', 'strong', 'em'], * disallowedHTMLTags: ['script', 'style', 'iframe', 'form', 'button'], * extractMainContent: true, * documentBaseUrl: 'https://example.com', * removeBase64Images: true * }; * ``` */ type HTMLCleaningOptions = z.infer<typeof HTMLCleaningOptionsSchema>; /** * Schema for defining patterns to match DOM elements. * Used to create selectors for targeting specific elements during HTML processing. */ declare const ElementPatternSchema: z.ZodObject<{ tag: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodType<RegExp, z.ZodTypeDef, RegExp>]>>; attributes: z.ZodOptional<z.ZodArray<z.ZodObject<{ name: z.ZodUnion<[z.ZodString, z.ZodType<RegExp, z.ZodTypeDef, RegExp>]>; value: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodType<RegExp, z.ZodTypeDef, RegExp>]>>; }, "strict", z.ZodTypeAny, { name: string | RegExp; value?: string | RegExp | undefined; }, { name: string | RegExp; value?: string | RegExp | undefined; }>, "many">>; classNames: z.ZodOptional<z.ZodArray<z.ZodUnion<[z.ZodString, z.ZodType<RegExp, z.ZodTypeDef, RegExp>]>, "many">>; ids: z.ZodOptional<z.ZodArray<z.ZodUnion<[z.ZodString, z.ZodType<RegExp, z.ZodTypeDef, RegExp>]>, "many">>; }, "strict", z.ZodTypeAny, { tag?: string | RegExp | undefined; attributes?: { name: string | RegExp; value?: string | RegExp | undefined; }[] | undefined; classNames?: (string | RegExp)[] | undefined; ids?: (string | RegExp)[] | undefined; }, { tag?: string | RegExp | undefined; attributes?: { name: string | RegExp; value?: string | RegExp | undefined; }[] | undefined; classNames?: (string | RegExp)[] | undefined; ids?: (string | RegExp)[] | undefined; }>; /** * Defines a pattern for matching DOM elements based on their properties. * Used for targeted element selection during HTML sanitization. * * @property tag - Element tag name or regex pattern to match (e.g., 'div', /^h[1-6]$/) * @property attributes - List of attribute patterns to match by name and optional value * @property classNames - List of class names or regex patterns to match in the class attribute * @property ids - List of ID patterns to match in the id attribute * * @example * ```typescript * const mainContentPattern: ElementPattern = { * tag: 'div', * attributes: [{ name: 'data-role', value: 'content' }], * classNames: ['main-content', /^content-/], * ids: ['main', 'article-body'] * }; * * const navigationPattern: ElementPattern = { * tag: /^nav$/i, * classNames: [/nav/, /menu/, /header/], * ids: ['navigation', 'main-menu'] * }; * ``` */ type ElementPattern = z.infer<typeof ElementPatternSchema>; /** * Schema for metrics collected during HTML cleaning. * Tracks performance and effectiveness of the cleaning process. */ declare const HTMLCleaningMetricsSchema: z.ZodObject<{ inputSize: z.ZodNumber; outputSize: z.ZodNumber; compressionRatio: z.ZodNumber; }, "strip", z.ZodTypeAny, { inputSize: number; outputSize: number; compressionRatio: number; }, { inputSize: number; outputSize: number; compressionRatio: number; }>; /** * Metrics collected during HTML cleaning and sanitization. * Provides insights into the effectiveness of the cleaning process. * * @property inputSize - Size of input HTML in bytes before cleaning * @property outputSize - Size of cleaned HTML in bytes after processing * @property compressionRatio - Ratio of output size to input size (lower is better) * * @example * ```typescript * const metrics: HTMLCleaningMetrics = { * inputSize: 125000, // 125KB input * outputSize: 42000, // 42KB output * compressionRatio: 0.336 // 33.6% of original size * }; * ``` */ type HTMLCleaningMetrics = z.infer<typeof HTMLCleaningMetricsSchema>; /** * Schema for the result of HTML cleaning operation. * Defines the structure of the cleaning operation output. */ declare const HTMLCleaningResultSchema: z.ZodObject<{ cleanedHtml: z.ZodString; metrics: z.ZodOptional<z.ZodObject<{ inputSize: z.ZodNumber; outputSize: z.ZodNumber; compressionRatio: z.ZodNumber; }, "strip", z.ZodTypeAny, { inputSize: number; outputSize: number; compressionRatio: number; }, { inputSize: number; outputSize: number; compressionRatio: number; }>>; }, "strip", z.ZodTypeAny, { cleanedHtml: string; metrics?: { inputSize: number; outputSize: number; compressionRatio: number; } | undefined; }, { cleanedHtml: string; metrics?: { inputSize: number; outputSize: number; compressionRatio: number; } | undefined; }>; /** * Result of HTML cleaning and sanitization. * Contains the processed HTML and optional performance metrics. * * @property cleanedHtml - Cleaned and sanitized HTML content * @property metrics - Optional metrics collected during the cleaning process * * @example * ```typescript * const result: HTMLCleaningResult = { * cleanedHtml: "<div><h1>Article Title</h1><p>Clean content...</p></div>", * metrics: { * inputSize: 125000, * outputSize: 42000, * compressionRatio: 0.336 * } * }; * ``` */ type HTMLCleaningResult = z.infer<typeof HTMLCleaningResultSchema>; type HTMLCleaningTypes_ElementPattern = ElementPattern; declare const HTMLCleaningTypes_ElementPatternSchema: typeof ElementPatternSchema; type HTMLCleaningTypes_HTMLCleaningMetrics = HTMLCleaningMetrics; declare const HTMLCleaningTypes_HTMLCleaningMetricsSchema: typeof HTMLCleaningMetricsSchema; type HTMLCleaningTypes_HTMLCleaningOptions = HTMLCleaningOptions; declare const HTMLCleaningTypes_HTMLCleaningOptionsSchema: typeof HTMLCleaningOptionsSchema; type HTMLCleaningTypes_HTMLCleaningResult = HTMLCleaningResult; declare const HTMLCleaningTypes_HTMLCleaningResultSchema: typeof HTMLCleaningResultSchema; declare namespace HTMLCleaningTypes { export { type HTMLCleaningTypes_ElementPattern as ElementPattern, HTMLCleaningTypes_ElementPatternSchema as ElementPatternSchema, type HTMLCleaningTypes_HTMLCleaningMetrics as HTMLCleaningMetrics, HTMLCleaningTypes_HTMLCleaningMetricsSchema as HTMLCleaningMetricsSchema, type HTMLCleaningTypes_HTMLCleaningOptions as HTMLCleaningOptions, HTMLCleaningTypes_HTMLCleaningOptionsSchema as HTMLCleaningOptionsSchema, type HTMLCleaningTypes_HTMLCleaningResult as HTMLCleaningResult, HTMLCleaningTypes_HTMLCleaningResultSchema as HTMLCleaningResultSchema }; } export { ElementPatternSchema as E, HTMLCleaningTypes as H, HTMLCleaningOptionsSchema as a, type HTMLCleaningOptions as b, type ElementPattern as c, HTMLCleaningMetricsSchema as d, type HTMLCleaningMetrics as e, HTMLCleaningResultSchema as f, type HTMLCleaningResult as g };