UNPKG

@llml-browser/types

Version:

TypeScript types and schemas for the @llml-browser API

662 lines (658 loc) 26.4 kB
import { z } from 'zod'; /** * Base interface for all browse response types. * Defines common properties that exist in both success and error responses. * * @property success - Indicates whether the operation was successful * @property url - The URL that was requested to be browsed */ interface BrowseBaseResponse { /** * Indicates whether the operation was successful. */ success: boolean; /** * The URL that was requested to be browsed. */ url: string; } /** * Response returned when a browse operation is successful. * Extends the base response and includes the DataFormats directly in the response. * * @property success - Discriminator field to identify a successful response (always true) * @property url - The URL that was requested to be browsed * @property status - HTTP status code of the response (if applicable) * @property completedTimestamp - ISO timestamp when the browsing operation completed * @property browserSessionId - Unique identifier for the browser session that processed this request * @property markdown - Optional markdown representation of the page content * @property rawHtml - Optional raw HTML of the page as returned by the server * @property cleanedHtml - Optional cleaned HTML with unnecessary elements removed * @property links - Optional extracted links from the page * @property metadata - Optional metadata extracted from the page * * @example * ```typescript * const successResponse: BrowseSuccessResponse = { * success: true, * url: "https://example.com", * completedTimestamp: "2025-04-02T14:28:23.000Z", * browserSessionId: "session-123", * markdown: "# Example Page\nContent here..." * }; * ``` */ interface BrowseSuccessResponse extends BrowseBaseResponse, DataFormats { /** * Discriminator field to identify a successful response. * Will always be `true` for success responses. */ success: true; /** * HTTP status code of the response (if applicable). * Typically 200 for successful requests. */ status?: number; /** * ISO timestamp when the browsing operation completed. * Format: ISO 8601 string (e.g., "2025-04-02T14:28:23.000Z") */ completedTimestamp: string; /** * Unique identifier for the browser session that processed this request. * Can be used for debugging or tracking purposes. */ browserSessionId: string; } /** * Response returned when a browse operation fails. * Contains error information about what went wrong. * * @property success - Discriminator field to identify an error response (always false) * @property url - The URL that was requested to be browsed * @property error - Error message describing what went wrong * * @example * ```typescript * const errorResponse: BrowseErrorResponse = { * success: false, * url: "https://example.com", * error: "Failed to connect to the server" * }; * ``` */ interface BrowseErrorResponse extends BrowseBaseResponse { /** * Discriminator field to identify an error response. * Will always be `false` for error responses. */ success: false; /** * Error message describing what went wrong. * Provides details about the failure reason. */ error: string; } /** * Union type representing either a successful or failed browse operation. * Uses a discriminated union pattern with the 'success' property as the discriminator. * * @example * ```typescript * function handleResponse(response: BrowseResponse) { * if (response.success) { * // TypeScript knows this is a BrowseSuccessResponse * console.log(response.markdown); * } else { * // TypeScript knows this is a BrowseErrorResponse * console.error(response.error); * } * } * ``` */ type BrowseResponse = BrowseSuccessResponse | BrowseErrorResponse; /** * Schema for validating the different data formats that can be returned from a browse operation. * Each format is optional and will only be included if requested. */ declare const DataFormatsSchema: z.ZodObject<{ /** * Markdown representation of the page content. * Contains the page content converted to Markdown format. */ markdown: z.ZodOptional<z.ZodString>; /** * Raw HTML of the page as returned by the server. * Contains the unmodified HTML response from the target URL. */ rawHtml: z.ZodOptional<z.ZodString>; /** * Cleaned HTML with unnecessary elements removed. * Contains a sanitized version of the HTML with ads, scripts, and other non-content elements removed. */ cleanedHtml: z.ZodOptional<z.ZodString>; /** * Extracted links from the page. * Contains information about links found on the page. */ links: z.ZodOptional<z.ZodObject<{ internal: z.ZodOptional<z.ZodArray<z.ZodString, "many">>; external: z.ZodOptional<z.ZodArray<z.ZodString, "many">>; media: z.ZodOptional<z.ZodObject<{ images: z.ZodOptional<z.ZodArray<z.ZodString, "many">>; videos: z.ZodOptional<z.ZodArray<z.ZodString, "many">>; documents: z.ZodOptional<z.ZodArray<z.ZodString, "many">>; }, "strip", z.ZodTypeAny, { images?: string[] | undefined; videos?: string[] | undefined; documents?: string[] | undefined; }, { images?: string[] | undefined; videos?: string[] | undefined; documents?: string[] | undefined; }>>; }, "strip", z.ZodTypeAny, { internal?: string[] | undefined; external?: string[] | undefined; media?: { images?: string[] | undefined; videos?: string[] | undefined; documents?: string[] | undefined; } | undefined; }, { internal?: string[] | undefined; external?: string[] | undefined; media?: { images?: string[] | undefined; videos?: string[] | undefined; documents?: string[] | undefined; } | undefined; }>>; /** * Metadata extracted from the page. * Contains information like title, description, and other meta tags. */ metadata: z.ZodOptional<z.ZodObject<{ title: z.ZodOptional<z.ZodString>; description: z.ZodOptional<z.ZodString>; language: z.ZodOptional<z.ZodString>; canonical: z.ZodOptional<z.ZodString>; robots: z.ZodOptional<z.ZodString>; author: z.ZodOptional<z.ZodString>; keywords: z.ZodOptional<z.ZodArray<z.ZodString, "many">>; lastModified: z.ZodNullable<z.ZodOptional<z.ZodString>>; favicon: z.ZodOptional<z.ZodString>; ogTitle: z.ZodOptional<z.ZodString>; ogDescription: z.ZodOptional<z.ZodString>; ogImage: z.ZodOptional<z.ZodString>; ogUrl: z.ZodOptional<z.ZodString>; ogType: z.ZodOptional<z.ZodString>; ogSiteName: z.ZodOptional<z.ZodString>; twitterCard: z.ZodOptional<z.ZodString>; twitterSite: z.ZodOptional<z.ZodString>; twitterCreator: z.ZodOptional<z.ZodString>; twitterTitle: z.ZodOptional<z.ZodString>; twitterDescription: z.ZodOptional<z.ZodString>; twitterImage: z.ZodOptional<z.ZodString>; isIframeAllowed: z.ZodOptional<z.ZodBoolean>; }, "strip", z.ZodTypeAny, { title?: string | undefined; description?: string | undefined; language?: string | undefined; canonical?: string | undefined; robots?: string | undefined; author?: string | undefined; keywords?: string[] | undefined; favicon?: string | undefined; isIframeAllowed?: boolean | undefined; lastModified?: string | null | undefined; ogTitle?: string | undefined; ogDescription?: string | undefined; ogImage?: string | undefined; ogUrl?: string | undefined; ogType?: string | undefined; ogSiteName?: string | undefined; twitterCard?: string | undefined; twitterSite?: string | undefined; twitterCreator?: string | undefined; twitterTitle?: string | undefined; twitterDescription?: string | undefined; twitterImage?: string | undefined; }, { title?: string | undefined; description?: string | undefined; language?: string | undefined; canonical?: string | undefined; robots?: string | undefined; author?: string | undefined; keywords?: string[] | undefined; favicon?: string | undefined; isIframeAllowed?: boolean | undefined; lastModified?: string | null | undefined; ogTitle?: string | undefined; ogDescription?: string | undefined; ogImage?: string | undefined; ogUrl?: string | undefined; ogType?: string | undefined; ogSiteName?: string | undefined; twitterCard?: string | undefined; twitterSite?: string | undefined; twitterCreator?: string | undefined; twitterTitle?: string | undefined; twitterDescription?: string | undefined; twitterImage?: string | undefined; }>>; }, "strip", z.ZodTypeAny, { cleanedHtml?: string | undefined; metadata?: { title?: string | undefined; description?: string | undefined; language?: string | undefined; canonical?: string | undefined; robots?: string | undefined; author?: string | undefined; keywords?: string[] | undefined; favicon?: string | undefined; isIframeAllowed?: boolean | undefined; lastModified?: string | null | undefined; ogTitle?: string | undefined; ogDescription?: string | undefined; ogImage?: string | undefined; ogUrl?: string | undefined; ogType?: string | undefined; ogSiteName?: string | undefined; twitterCard?: string | undefined; twitterSite?: string | undefined; twitterCreator?: string | undefined; twitterTitle?: string | undefined; twitterDescription?: string | undefined; twitterImage?: string | undefined; } | undefined; rawHtml?: string | undefined; markdown?: string | undefined; links?: { internal?: string[] | undefined; external?: string[] | undefined; media?: { images?: string[] | undefined; videos?: string[] | undefined; documents?: string[] | undefined; } | undefined; } | undefined; }, { cleanedHtml?: string | undefined; metadata?: { title?: string | undefined; description?: string | undefined; language?: string | undefined; canonical?: string | undefined; robots?: string | undefined; author?: string | undefined; keywords?: string[] | undefined; favicon?: string | undefined; isIframeAllowed?: boolean | undefined; lastModified?: string | null | undefined; ogTitle?: string | undefined; ogDescription?: string | undefined; ogImage?: string | undefined; ogUrl?: string | undefined; ogType?: string | undefined; ogSiteName?: string | undefined; twitterCard?: string | undefined; twitterSite?: string | undefined; twitterCreator?: string | undefined; twitterTitle?: string | undefined; twitterDescription?: string | undefined; twitterImage?: string | undefined; } | undefined; rawHtml?: string | undefined; markdown?: string | undefined; links?: { internal?: string[] | undefined; external?: string[] | undefined; media?: { images?: string[] | undefined; videos?: string[] | undefined; documents?: string[] | undefined; } | undefined; } | undefined; }>; /** * Represents the various data formats that can be returned from a browse operation. * All properties are optional and will only be included if specifically requested. * * @property markdown - Optional markdown representation of the page content * @property rawHtml - Optional raw HTML of the page as returned by the server * @property cleanedHtml - Optional cleaned HTML with unnecessary elements removed * @property links - Optional extracted links from the page * @property metadata - Optional metadata extracted from the page */ type DataFormats = z.infer<typeof DataFormatsSchema>; /** * Enum of available data formats that can be requested in a browse operation. * Used to specify which formats should be included in the response. */ declare const DataFormatsEnum: z.ZodEnum<["markdown", "rawHtml", "cleanedHtml", "links", "metadata"]>; /** * Schema for validating browse operation options. * Defines the structure of the options object that can be passed to the browse function. */ declare const BrowseOptionsSchema: z.ZodObject<{ /** * Array of data formats to include in the response. * If not specified, defaults to ['markdown', 'metadata']. * * @example * ```typescript * const options = { * formats: ['markdown', 'links', 'metadata'] * }; * ``` */ formats: z.ZodDefault<z.ZodOptional<z.ZodArray<z.ZodEnum<["markdown", "rawHtml", "cleanedHtml", "links", "metadata"]>, "many">>>; /** * Options for metadata extraction. * If not specified, defaults to the default metadata options. */ metadataOptions: z.ZodOptional<z.ZodObject<{ title: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>; description: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>; language: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>; canonical: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>; robots: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>; author: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>; keywords: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>; favicon: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>; openGraph: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>; twitter: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>; isIframeAllowed: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>; }, "strip", z.ZodTypeAny, { title: boolean; description: boolean; language: boolean; canonical: boolean; robots: boolean; author: boolean; keywords: boolean; favicon: boolean; openGraph: boolean; twitter: boolean; isIframeAllowed: boolean; }, { title?: boolean | undefined; description?: boolean | undefined; language?: boolean | undefined; canonical?: boolean | undefined; robots?: boolean | undefined; author?: boolean | undefined; keywords?: boolean | undefined; favicon?: boolean | undefined; openGraph?: boolean | undefined; twitter?: boolean | undefined; isIframeAllowed?: boolean | undefined; }>>; /** * Options for link extraction. * If not specified, defaults to the default link extraction options. */ linksOptions: z.ZodOptional<z.ZodObject<{ includeExternal: z.ZodOptional<z.ZodBoolean>; includeMedia: z.ZodOptional<z.ZodBoolean>; excludePatterns: z.ZodOptional<z.ZodArray<z.ZodString, "many">>; removeQueryParams: z.ZodOptional<z.ZodBoolean>; }, "strict", z.ZodTypeAny, { includeExternal?: boolean | undefined; includeMedia?: boolean | undefined; excludePatterns?: string[] | undefined; removeQueryParams?: boolean | undefined; }, { includeExternal?: boolean | undefined; includeMedia?: boolean | undefined; excludePatterns?: string[] | undefined; removeQueryParams?: boolean | undefined; }>>; /** * Options for HTML cleaning. * If not specified, defaults to the default HTML cleaning options. */ cleanedHtmlOptions: z.ZodOptional<z.ZodObject<{ allowedHTMLTags: z.ZodOptional<z.ZodArray<z.ZodString, "many">>; disallowedHTMLTags: z.ZodOptional<z.ZodArray<z.ZodString, "many">>; extractMainContent: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>; documentBaseUrl: z.ZodOptional<z.ZodString>; removeBase64Images: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>; }, "strict", z.ZodTypeAny, { extractMainContent: boolean; removeBase64Images: boolean; allowedHTMLTags?: string[] | undefined; disallowedHTMLTags?: string[] | undefined; documentBaseUrl?: string | undefined; }, { allowedHTMLTags?: string[] | undefined; disallowedHTMLTags?: string[] | undefined; extractMainContent?: boolean | undefined; documentBaseUrl?: string | undefined; removeBase64Images?: boolean | undefined; }>>; }, "strip", z.ZodTypeAny, { formats: ("cleanedHtml" | "metadata" | "rawHtml" | "markdown" | "links")[]; metadataOptions?: { title: boolean; description: boolean; language: boolean; canonical: boolean; robots: boolean; author: boolean; keywords: boolean; favicon: boolean; openGraph: boolean; twitter: boolean; isIframeAllowed: boolean; } | undefined; linksOptions?: { includeExternal?: boolean | undefined; includeMedia?: boolean | undefined; excludePatterns?: string[] | undefined; removeQueryParams?: boolean | undefined; } | undefined; cleanedHtmlOptions?: { extractMainContent: boolean; removeBase64Images: boolean; allowedHTMLTags?: string[] | undefined; disallowedHTMLTags?: string[] | undefined; documentBaseUrl?: string | undefined; } | undefined; }, { metadataOptions?: { title?: boolean | undefined; description?: boolean | undefined; language?: boolean | undefined; canonical?: boolean | undefined; robots?: boolean | undefined; author?: boolean | undefined; keywords?: boolean | undefined; favicon?: boolean | undefined; openGraph?: boolean | undefined; twitter?: boolean | undefined; isIframeAllowed?: boolean | undefined; } | undefined; linksOptions?: { includeExternal?: boolean | undefined; includeMedia?: boolean | undefined; excludePatterns?: string[] | undefined; removeQueryParams?: boolean | undefined; } | undefined; cleanedHtmlOptions?: { allowedHTMLTags?: string[] | undefined; disallowedHTMLTags?: string[] | undefined; extractMainContent?: boolean | undefined; documentBaseUrl?: string | undefined; removeBase64Images?: boolean | undefined; } | undefined; formats?: ("cleanedHtml" | "metadata" | "rawHtml" | "markdown" | "links")[] | undefined; }>; /** * Represents the options that can be passed to the browse function. * Defines the structure of the options object. * * @property formats - Array of data formats to include in the response * @property metadataOptions - Options for metadata extraction * @property linksOptions - Options for link extraction * @property cleanedHtmlOptions - Options for HTML cleaning */ type BrowseOptions = z.infer<typeof BrowseOptionsSchema>; /** * Type representing a successful text response from a GET query. * A string containing the response data. */ type BrowseQuerySucessTextResponse = string; /** * Type representing a successful JSON response from a GET query. * Includes the browse response data and the request duration. * * @property success - Discriminator field to identify a successful response (always true) * @property url - The URL that was requested to be browsed * @property completedTimestamp - ISO timestamp when the browsing operation completed * @property browserSessionId - Unique identifier for the browser session * @property requestDuration - Duration of the request in milliseconds * @property markdown - Optional markdown representation of the page content * @property rawHtml - Optional raw HTML of the page as returned by the server * @property cleanedHtml - Optional cleaned HTML with unnecessary elements removed * @property links - Optional extracted links from the page * @property metadata - Optional metadata extracted from the page * * @example * ```typescript * const jsonResponse: BrowseQuerySucessJsonResponse = { * success: true, * url: "https://example.com", * completedTimestamp: "2025-04-02T14:28:23.000Z", * browserSessionId: "session-123", * markdown: "# Example Page\nContent here...", * requestDuration: "1234ms" * }; * ``` */ type BrowseQuerySucessJsonResponse = BrowseSuccessResponse & { /** * Duration of the request in milliseconds. * Format: string with "ms" suffix (e.g., "1234ms") */ requestDuration: string; }; /** * Union type representing the possible responses from a GET query. * Can be either a successful text response, a successful JSON response, or an error response. */ type BrowseQueryResponse = BrowseQuerySucessJsonResponse | BrowseQuerySucessTextResponse | BrowseErrorResponse; /** * Type representing an error response from a POST query. * Contains an error message describing what went wrong. * * @property success - Discriminator field to identify an error response (always false) * @property error - Error message describing what went wrong * * @example * ```typescript * const errorResponse: BrowsePostErrorResponse = { * success: false, * error: "Invalid URL format in batch request" * }; * ``` */ type BrowsePostErrorResponse = { /** * Discriminator field to identify an error response. * Will always be `false` for error responses. */ success: false; /** * Error message describing what went wrong. * Provides details about the failure reason. */ error: string; }; /** * Type representing a successful response from a POST query. * Includes the request duration, successful responses, and failed responses. * * @property requestDuration - Duration of the request in milliseconds * @property successful - Array of successful browse responses * @property failed - Array of failed browse responses * * @example * ```typescript * const successResponse: BrowsePostSuccessResponse = { * requestDuration: "2345ms", * successful: [ * { * success: true, * url: "https://example.com", * completedTimestamp: "2025-04-02T14:28:23.000Z", * browserSessionId: "session-123", * markdown: "# Example Page\nContent here..." * } * ], * failed: [ * { * success: false, * url: "https://invalid-url.com", * error: "Failed to connect to the server" * } * ] * }; * ``` */ type BrowsePostSuccessResponse = { /** * Duration of the request in milliseconds. * Format: string with "ms" suffix (e.g., "2345ms") */ requestDuration: string; /** * Array of successful browse responses. * Contains details for each URL that was successfully processed. */ successful: BrowseSuccessResponse[]; /** * Array of failed browse responses. * Contains error details for each URL that failed processing. */ failed: BrowseErrorResponse[]; }; /** * Union type representing the possible responses from a POST query. * Can be either a successful response or an error response. * * @example * ```typescript * function handleBatchResponse(response: BrowsePostResponse) { * if (response.success === false) { * // This is a BrowsePostErrorResponse * console.error(response.error); * } else { * // This is a BrowsePostSuccessResponse * console.log(`Processed ${response.successful.length} URLs successfully`); * console.log(`Failed to process ${response.failed.length} URLs`); * } * } * ``` */ type BrowsePostResponse = BrowsePostSuccessResponse | BrowsePostErrorResponse; type BrowseRouterTypes_BrowseErrorResponse = BrowseErrorResponse; type BrowseRouterTypes_BrowseOptions = BrowseOptions; declare const BrowseRouterTypes_BrowseOptionsSchema: typeof BrowseOptionsSchema; type BrowseRouterTypes_BrowsePostErrorResponse = BrowsePostErrorResponse; type BrowseRouterTypes_BrowsePostResponse = BrowsePostResponse; type BrowseRouterTypes_BrowsePostSuccessResponse = BrowsePostSuccessResponse; type BrowseRouterTypes_BrowseQueryResponse = BrowseQueryResponse; type BrowseRouterTypes_BrowseQuerySucessJsonResponse = BrowseQuerySucessJsonResponse; type BrowseRouterTypes_BrowseQuerySucessTextResponse = BrowseQuerySucessTextResponse; type BrowseRouterTypes_BrowseResponse = BrowseResponse; type BrowseRouterTypes_BrowseSuccessResponse = BrowseSuccessResponse; type BrowseRouterTypes_DataFormats = DataFormats; declare const BrowseRouterTypes_DataFormatsEnum: typeof DataFormatsEnum; declare namespace BrowseRouterTypes { export { type BrowseRouterTypes_BrowseErrorResponse as BrowseErrorResponse, type BrowseRouterTypes_BrowseOptions as BrowseOptions, BrowseRouterTypes_BrowseOptionsSchema as BrowseOptionsSchema, type BrowseRouterTypes_BrowsePostErrorResponse as BrowsePostErrorResponse, type BrowseRouterTypes_BrowsePostResponse as BrowsePostResponse, type BrowseRouterTypes_BrowsePostSuccessResponse as BrowsePostSuccessResponse, type BrowseRouterTypes_BrowseQueryResponse as BrowseQueryResponse, type BrowseRouterTypes_BrowseQuerySucessJsonResponse as BrowseQuerySucessJsonResponse, type BrowseRouterTypes_BrowseQuerySucessTextResponse as BrowseQuerySucessTextResponse, type BrowseRouterTypes_BrowseResponse as BrowseResponse, type BrowseRouterTypes_BrowseSuccessResponse as BrowseSuccessResponse, type BrowseRouterTypes_DataFormats as DataFormats, BrowseRouterTypes_DataFormatsEnum as DataFormatsEnum }; } export { BrowseRouterTypes as B, type DataFormats as D, type BrowseSuccessResponse as a, type BrowseErrorResponse as b, type BrowseResponse as c, DataFormatsEnum as d, BrowseOptionsSchema as e, type BrowseOptions as f, type BrowseQuerySucessTextResponse as g, type BrowseQuerySucessJsonResponse as h, type BrowseQueryResponse as i, type BrowsePostErrorResponse as j, type BrowsePostSuccessResponse as k, type BrowsePostResponse as l };