UNPKG

@llml-browser/types

Version:

TypeScript types and schemas for the @llml-browser API

195 lines (190 loc) 7.9 kB
"use strict"; var __defProp = Object.defineProperty; var __getOwnPropDesc = Object.getOwnPropertyDescriptor; var __getOwnPropNames = Object.getOwnPropertyNames; var __hasOwnProp = Object.prototype.hasOwnProperty; var __export = (target, all) => { for (var name in all) __defProp(target, name, { get: all[name], enumerable: true }); }; var __copyProps = (to, from, except, desc) => { if (from && typeof from === "object" || typeof from === "function") { for (let key of __getOwnPropNames(from)) if (!__hasOwnProp.call(to, key) && key !== except) __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable }); } return to; }; var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod); // src/routers/browse/index.ts var browse_exports = {}; __export(browse_exports, { BrowseOptionsSchema: () => BrowseOptionsSchema, DataFormatsEnum: () => DataFormatsEnum }); module.exports = __toCommonJS(browse_exports); // src/services/html-cleaning/types.ts var import_zod = require("zod"); var HTMLCleaningOptionsSchema = import_zod.z.object({ allowedHTMLTags: import_zod.z.array(import_zod.z.string()).optional(), disallowedHTMLTags: import_zod.z.array(import_zod.z.string()).optional(), extractMainContent: import_zod.z.boolean().optional().default(true), /* Deprecated property, will be removed in future. add baseUrl to HTMLCleaning as a required parameter instead*/ documentBaseUrl: import_zod.z.string().optional(), removeBase64Images: import_zod.z.boolean().optional().default(true) }).strict(); var ElementPatternSchema = import_zod.z.object({ tag: import_zod.z.union([import_zod.z.string(), import_zod.z.instanceof(RegExp)]).optional(), attributes: import_zod.z.array( import_zod.z.object({ name: import_zod.z.union([import_zod.z.string(), import_zod.z.instanceof(RegExp)]), value: import_zod.z.union([import_zod.z.string(), import_zod.z.instanceof(RegExp)]).optional() }).strict() ).optional(), classNames: import_zod.z.array(import_zod.z.union([import_zod.z.string(), import_zod.z.instanceof(RegExp)])).optional(), ids: import_zod.z.array(import_zod.z.union([import_zod.z.string(), import_zod.z.instanceof(RegExp)])).optional() }).strict(); var HTMLCleaningMetricsSchema = import_zod.z.object({ inputSize: import_zod.z.number(), outputSize: import_zod.z.number(), compressionRatio: import_zod.z.number() }); var HTMLCleaningResultSchema = import_zod.z.object({ cleanedHtml: import_zod.z.string(), metrics: HTMLCleaningMetricsSchema.optional() }); // src/services/link/types.ts var import_zod2 = require("zod"); var LinkExtractionOptionsSchema = import_zod2.z.object({ includeExternal: import_zod2.z.boolean().optional(), // Include links from other domains includeMedia: import_zod2.z.boolean().optional(), // Include media files (images, videos, docs) excludePatterns: import_zod2.z.array(import_zod2.z.string()).optional(), // Regex patterns to exclude URLs removeQueryParams: import_zod2.z.boolean().optional() // Remove query parameters from URLs }).strict(); var ExtractedLinksSchema = import_zod2.z.object({ internal: import_zod2.z.array(import_zod2.z.string()).optional(), external: import_zod2.z.array(import_zod2.z.string()).optional(), media: import_zod2.z.object({ images: import_zod2.z.array(import_zod2.z.string()).optional(), videos: import_zod2.z.array(import_zod2.z.string()).optional(), documents: import_zod2.z.array(import_zod2.z.string()).optional() }).optional() }); // src/services/metadata/types.ts var import_zod3 = require("zod"); var MetadataOptionsSchema = import_zod3.z.object({ title: import_zod3.z.boolean().optional().default(true), description: import_zod3.z.boolean().optional().default(true), language: import_zod3.z.boolean().optional().default(true), canonical: import_zod3.z.boolean().optional().default(true), robots: import_zod3.z.boolean().optional().default(true), author: import_zod3.z.boolean().optional().default(true), keywords: import_zod3.z.boolean().optional().default(true), favicon: import_zod3.z.boolean().optional().default(true), openGraph: import_zod3.z.boolean().optional().default(true), twitter: import_zod3.z.boolean().optional().default(true), isIframeAllowed: import_zod3.z.boolean().optional().default(true) }); var PageMetadataSchema = import_zod3.z.object({ // Basic metadata title: import_zod3.z.string().optional(), description: import_zod3.z.string().optional(), language: import_zod3.z.string().optional(), canonical: import_zod3.z.string().url().optional(), robots: import_zod3.z.string().optional(), author: import_zod3.z.string().optional(), keywords: import_zod3.z.array(import_zod3.z.string()).optional(), lastModified: import_zod3.z.string().optional().nullable(), favicon: import_zod3.z.string().url().optional(), // OpenGraph metadata (flattened) ogTitle: import_zod3.z.string().optional(), ogDescription: import_zod3.z.string().optional(), ogImage: import_zod3.z.string().url().optional(), ogUrl: import_zod3.z.string().url().optional(), ogType: import_zod3.z.string().optional(), ogSiteName: import_zod3.z.string().optional(), // Twitter Card metadata (flattened) twitterCard: import_zod3.z.string().optional(), twitterSite: import_zod3.z.string().optional(), twitterCreator: import_zod3.z.string().optional(), twitterTitle: import_zod3.z.string().optional(), twitterDescription: import_zod3.z.string().optional(), twitterImage: import_zod3.z.string().url().optional(), // iframe allowed isIframeAllowed: import_zod3.z.boolean().optional() }); // src/routers/browse/types.ts var import_zod4 = require("zod"); var DataFormatsSchema = import_zod4.z.object({ /** * Markdown representation of the page content. * Contains the page content converted to Markdown format. */ markdown: import_zod4.z.string().optional(), /** * Raw HTML of the page as returned by the server. * Contains the unmodified HTML response from the target URL. */ rawHtml: import_zod4.z.string().optional(), /** * Cleaned HTML with unnecessary elements removed. * Contains a sanitized version of the HTML with ads, scripts, and other non-content elements removed. */ cleanedHtml: import_zod4.z.string().optional(), /** * Extracted links from the page. * Contains information about links found on the page. */ links: ExtractedLinksSchema.optional(), /** * Metadata extracted from the page. * Contains information like title, description, and other meta tags. */ metadata: PageMetadataSchema.optional() }); var DataFormatsEnum = import_zod4.z.enum([ "markdown", "rawHtml", "cleanedHtml", "links", "metadata" ]); var BrowseOptionsSchema = import_zod4.z.object({ /** * Array of data formats to include in the response. * If not specified, defaults to ['markdown', 'metadata']. * * @example * ```typescript * const options = { * formats: ['markdown', 'links', 'metadata'] * }; * ``` */ formats: import_zod4.z.array(DataFormatsEnum).optional().default(["markdown", "metadata"]), /** * Options for metadata extraction. * If not specified, defaults to the default metadata options. */ metadataOptions: MetadataOptionsSchema.optional(), /** * Options for link extraction. * If not specified, defaults to the default link extraction options. */ linksOptions: LinkExtractionOptionsSchema.optional(), /** * Options for HTML cleaning. * If not specified, defaults to the default HTML cleaning options. */ cleanedHtmlOptions: HTMLCleaningOptionsSchema.optional() }); // Annotate the CommonJS export names for ESM import in node: 0 && (module.exports = { BrowseOptionsSchema, DataFormatsEnum }); //# sourceMappingURL=browse.js.map