@llml-browser/types
Version:
TypeScript types and schemas for the @llml-browser API
212 lines (208 loc) • 8.64 kB
TypeScript
import { z } from 'zod';
/**
* Schema for HTML cleaning configuration options.
* Defines the validation rules for HTML sanitization parameters.
*/
declare const HTMLCleaningOptionsSchema: z.ZodObject<{
allowedHTMLTags: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
disallowedHTMLTags: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
extractMainContent: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
documentBaseUrl: z.ZodOptional<z.ZodString>;
removeBase64Images: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
}, "strict", z.ZodTypeAny, {
extractMainContent: boolean;
removeBase64Images: boolean;
allowedHTMLTags?: string[] | undefined;
disallowedHTMLTags?: string[] | undefined;
documentBaseUrl?: string | undefined;
}, {
allowedHTMLTags?: string[] | undefined;
disallowedHTMLTags?: string[] | undefined;
extractMainContent?: boolean | undefined;
documentBaseUrl?: string | undefined;
removeBase64Images?: boolean | undefined;
}>;
/**
* Configuration options for HTML content cleaning and sanitization.
* Controls which elements are preserved or removed during processing.
*
* @property allowedHTMLTags - HTML tags to preserve in the output (whitelist)
* @property disallowedHTMLTags - HTML tags to remove from the output (blacklist)
* @property extractMainContent - Whether to extract only the main content area, removing navigation, footers, etc.
* @property documentBaseUrl - Base URL for resolving relative URLs (deprecated, use baseUrl parameter instead)
* @property removeBase64Images - Whether to remove base64 encoded images to reduce payload size
*
* @example
* ```typescript
* const cleaningOptions: HTMLCleaningOptions = {
* allowedHTMLTags: ['p', 'h1', 'h2', 'h3', 'ul', 'ol', 'li', 'a', 'strong', 'em'],
* disallowedHTMLTags: ['script', 'style', 'iframe', 'form', 'button'],
* extractMainContent: true,
* documentBaseUrl: 'https://example.com',
* removeBase64Images: true
* };
* ```
*/
type HTMLCleaningOptions = z.infer<typeof HTMLCleaningOptionsSchema>;
/**
* Schema for defining patterns to match DOM elements.
* Used to create selectors for targeting specific elements during HTML processing.
*/
declare const ElementPatternSchema: z.ZodObject<{
tag: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodType<RegExp, z.ZodTypeDef, RegExp>]>>;
attributes: z.ZodOptional<z.ZodArray<z.ZodObject<{
name: z.ZodUnion<[z.ZodString, z.ZodType<RegExp, z.ZodTypeDef, RegExp>]>;
value: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodType<RegExp, z.ZodTypeDef, RegExp>]>>;
}, "strict", z.ZodTypeAny, {
name: string | RegExp;
value?: string | RegExp | undefined;
}, {
name: string | RegExp;
value?: string | RegExp | undefined;
}>, "many">>;
classNames: z.ZodOptional<z.ZodArray<z.ZodUnion<[z.ZodString, z.ZodType<RegExp, z.ZodTypeDef, RegExp>]>, "many">>;
ids: z.ZodOptional<z.ZodArray<z.ZodUnion<[z.ZodString, z.ZodType<RegExp, z.ZodTypeDef, RegExp>]>, "many">>;
}, "strict", z.ZodTypeAny, {
tag?: string | RegExp | undefined;
attributes?: {
name: string | RegExp;
value?: string | RegExp | undefined;
}[] | undefined;
classNames?: (string | RegExp)[] | undefined;
ids?: (string | RegExp)[] | undefined;
}, {
tag?: string | RegExp | undefined;
attributes?: {
name: string | RegExp;
value?: string | RegExp | undefined;
}[] | undefined;
classNames?: (string | RegExp)[] | undefined;
ids?: (string | RegExp)[] | undefined;
}>;
/**
* Defines a pattern for matching DOM elements based on their properties.
* Used for targeted element selection during HTML sanitization.
*
* @property tag - Element tag name or regex pattern to match (e.g., 'div', /^h[1-6]$/)
* @property attributes - List of attribute patterns to match by name and optional value
* @property classNames - List of class names or regex patterns to match in the class attribute
* @property ids - List of ID patterns to match in the id attribute
*
* @example
* ```typescript
* const mainContentPattern: ElementPattern = {
* tag: 'div',
* attributes: [{ name: 'data-role', value: 'content' }],
* classNames: ['main-content', /^content-/],
* ids: ['main', 'article-body']
* };
*
* const navigationPattern: ElementPattern = {
* tag: /^nav$/i,
* classNames: [/nav/, /menu/, /header/],
* ids: ['navigation', 'main-menu']
* };
* ```
*/
type ElementPattern = z.infer<typeof ElementPatternSchema>;
/**
* Schema for metrics collected during HTML cleaning.
* Tracks performance and effectiveness of the cleaning process.
*/
declare const HTMLCleaningMetricsSchema: z.ZodObject<{
inputSize: z.ZodNumber;
outputSize: z.ZodNumber;
compressionRatio: z.ZodNumber;
}, "strip", z.ZodTypeAny, {
inputSize: number;
outputSize: number;
compressionRatio: number;
}, {
inputSize: number;
outputSize: number;
compressionRatio: number;
}>;
/**
* Metrics collected during HTML cleaning and sanitization.
* Provides insights into the effectiveness of the cleaning process.
*
* @property inputSize - Size of input HTML in bytes before cleaning
* @property outputSize - Size of cleaned HTML in bytes after processing
* @property compressionRatio - Ratio of output size to input size (lower is better)
*
* @example
* ```typescript
* const metrics: HTMLCleaningMetrics = {
* inputSize: 125000, // 125KB input
* outputSize: 42000, // 42KB output
* compressionRatio: 0.336 // 33.6% of original size
* };
* ```
*/
type HTMLCleaningMetrics = z.infer<typeof HTMLCleaningMetricsSchema>;
/**
* Schema for the result of HTML cleaning operation.
* Defines the structure of the cleaning operation output.
*/
declare const HTMLCleaningResultSchema: z.ZodObject<{
cleanedHtml: z.ZodString;
metrics: z.ZodOptional<z.ZodObject<{
inputSize: z.ZodNumber;
outputSize: z.ZodNumber;
compressionRatio: z.ZodNumber;
}, "strip", z.ZodTypeAny, {
inputSize: number;
outputSize: number;
compressionRatio: number;
}, {
inputSize: number;
outputSize: number;
compressionRatio: number;
}>>;
}, "strip", z.ZodTypeAny, {
cleanedHtml: string;
metrics?: {
inputSize: number;
outputSize: number;
compressionRatio: number;
} | undefined;
}, {
cleanedHtml: string;
metrics?: {
inputSize: number;
outputSize: number;
compressionRatio: number;
} | undefined;
}>;
/**
* Result of HTML cleaning and sanitization.
* Contains the processed HTML and optional performance metrics.
*
* @property cleanedHtml - Cleaned and sanitized HTML content
* @property metrics - Optional metrics collected during the cleaning process
*
* @example
* ```typescript
* const result: HTMLCleaningResult = {
* cleanedHtml: "<div><h1>Article Title</h1><p>Clean content...</p></div>",
* metrics: {
* inputSize: 125000,
* outputSize: 42000,
* compressionRatio: 0.336
* }
* };
* ```
*/
type HTMLCleaningResult = z.infer<typeof HTMLCleaningResultSchema>;
type HTMLCleaningTypes_ElementPattern = ElementPattern;
declare const HTMLCleaningTypes_ElementPatternSchema: typeof ElementPatternSchema;
type HTMLCleaningTypes_HTMLCleaningMetrics = HTMLCleaningMetrics;
declare const HTMLCleaningTypes_HTMLCleaningMetricsSchema: typeof HTMLCleaningMetricsSchema;
type HTMLCleaningTypes_HTMLCleaningOptions = HTMLCleaningOptions;
declare const HTMLCleaningTypes_HTMLCleaningOptionsSchema: typeof HTMLCleaningOptionsSchema;
type HTMLCleaningTypes_HTMLCleaningResult = HTMLCleaningResult;
declare const HTMLCleaningTypes_HTMLCleaningResultSchema: typeof HTMLCleaningResultSchema;
declare namespace HTMLCleaningTypes {
export { type HTMLCleaningTypes_ElementPattern as ElementPattern, HTMLCleaningTypes_ElementPatternSchema as ElementPatternSchema, type HTMLCleaningTypes_HTMLCleaningMetrics as HTMLCleaningMetrics, HTMLCleaningTypes_HTMLCleaningMetricsSchema as HTMLCleaningMetricsSchema, type HTMLCleaningTypes_HTMLCleaningOptions as HTMLCleaningOptions, HTMLCleaningTypes_HTMLCleaningOptionsSchema as HTMLCleaningOptionsSchema, type HTMLCleaningTypes_HTMLCleaningResult as HTMLCleaningResult, HTMLCleaningTypes_HTMLCleaningResultSchema as HTMLCleaningResultSchema };
}
export { ElementPatternSchema as E, HTMLCleaningTypes as H, HTMLCleaningOptionsSchema as a, type HTMLCleaningOptions as b, type ElementPattern as c, HTMLCleaningMetricsSchema as d, type HTMLCleaningMetrics as e, HTMLCleaningResultSchema as f, type HTMLCleaningResult as g };