UNPKG

@llml-browser/types

Version:

TypeScript types and schemas for the @llml-browser API

236 lines (231 loc) 8.88 kB
"use strict"; var __defProp = Object.defineProperty; var __getOwnPropDesc = Object.getOwnPropertyDescriptor; var __getOwnPropNames = Object.getOwnPropertyNames; var __hasOwnProp = Object.prototype.hasOwnProperty; var __export = (target, all) => { for (var name in all) __defProp(target, name, { get: all[name], enumerable: true }); }; var __copyProps = (to, from, except, desc) => { if (from && typeof from === "object" || typeof from === "function") { for (let key of __getOwnPropNames(from)) if (!__hasOwnProp.call(to, key) && key !== except) __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable }); } return to; }; var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod); // src/routers/links/index.ts var links_exports = {}; __export(links_exports, { contentOptionsSchema: () => contentOptionsSchema, linksOptionsSchema: () => linksOptionsSchema, treeOptionsSchema: () => treeOptionsSchema }); module.exports = __toCommonJS(links_exports); // src/services/html-cleaning/types.ts var import_zod = require("zod"); var HTMLCleaningOptionsSchema = import_zod.z.object({ allowedHTMLTags: import_zod.z.array(import_zod.z.string()).optional(), disallowedHTMLTags: import_zod.z.array(import_zod.z.string()).optional(), extractMainContent: import_zod.z.boolean().optional().default(true), /* Deprecated property, will be removed in future. add baseUrl to HTMLCleaning as a required parameter instead*/ documentBaseUrl: import_zod.z.string().optional(), removeBase64Images: import_zod.z.boolean().optional().default(true) }).strict(); var ElementPatternSchema = import_zod.z.object({ tag: import_zod.z.union([import_zod.z.string(), import_zod.z.instanceof(RegExp)]).optional(), attributes: import_zod.z.array( import_zod.z.object({ name: import_zod.z.union([import_zod.z.string(), import_zod.z.instanceof(RegExp)]), value: import_zod.z.union([import_zod.z.string(), import_zod.z.instanceof(RegExp)]).optional() }).strict() ).optional(), classNames: import_zod.z.array(import_zod.z.union([import_zod.z.string(), import_zod.z.instanceof(RegExp)])).optional(), ids: import_zod.z.array(import_zod.z.union([import_zod.z.string(), import_zod.z.instanceof(RegExp)])).optional() }).strict(); var HTMLCleaningMetricsSchema = import_zod.z.object({ inputSize: import_zod.z.number(), outputSize: import_zod.z.number(), compressionRatio: import_zod.z.number() }); var HTMLCleaningResultSchema = import_zod.z.object({ cleanedHtml: import_zod.z.string(), metrics: HTMLCleaningMetricsSchema.optional() }); // src/services/link/types.ts var import_zod2 = require("zod"); var LinkExtractionOptionsSchema = import_zod2.z.object({ includeExternal: import_zod2.z.boolean().optional(), // Include links from other domains includeMedia: import_zod2.z.boolean().optional(), // Include media files (images, videos, docs) excludePatterns: import_zod2.z.array(import_zod2.z.string()).optional(), // Regex patterns to exclude URLs removeQueryParams: import_zod2.z.boolean().optional() // Remove query parameters from URLs }).strict(); var ExtractedLinksSchema = import_zod2.z.object({ internal: import_zod2.z.array(import_zod2.z.string()).optional(), external: import_zod2.z.array(import_zod2.z.string()).optional(), media: import_zod2.z.object({ images: import_zod2.z.array(import_zod2.z.string()).optional(), videos: import_zod2.z.array(import_zod2.z.string()).optional(), documents: import_zod2.z.array(import_zod2.z.string()).optional() }).optional() }); // src/services/metadata/types.ts var import_zod3 = require("zod"); var MetadataOptionsSchema = import_zod3.z.object({ title: import_zod3.z.boolean().optional().default(true), description: import_zod3.z.boolean().optional().default(true), language: import_zod3.z.boolean().optional().default(true), canonical: import_zod3.z.boolean().optional().default(true), robots: import_zod3.z.boolean().optional().default(true), author: import_zod3.z.boolean().optional().default(true), keywords: import_zod3.z.boolean().optional().default(true), favicon: import_zod3.z.boolean().optional().default(true), openGraph: import_zod3.z.boolean().optional().default(true), twitter: import_zod3.z.boolean().optional().default(true), isIframeAllowed: import_zod3.z.boolean().optional().default(true) }); var PageMetadataSchema = import_zod3.z.object({ // Basic metadata title: import_zod3.z.string().optional(), description: import_zod3.z.string().optional(), language: import_zod3.z.string().optional(), canonical: import_zod3.z.string().url().optional(), robots: import_zod3.z.string().optional(), author: import_zod3.z.string().optional(), keywords: import_zod3.z.array(import_zod3.z.string()).optional(), lastModified: import_zod3.z.string().optional().nullable(), favicon: import_zod3.z.string().url().optional(), // OpenGraph metadata (flattened) ogTitle: import_zod3.z.string().optional(), ogDescription: import_zod3.z.string().optional(), ogImage: import_zod3.z.string().url().optional(), ogUrl: import_zod3.z.string().url().optional(), ogType: import_zod3.z.string().optional(), ogSiteName: import_zod3.z.string().optional(), // Twitter Card metadata (flattened) twitterCard: import_zod3.z.string().optional(), twitterSite: import_zod3.z.string().optional(), twitterCreator: import_zod3.z.string().optional(), twitterTitle: import_zod3.z.string().optional(), twitterDescription: import_zod3.z.string().optional(), twitterImage: import_zod3.z.string().url().optional(), // iframe allowed isIframeAllowed: import_zod3.z.boolean().optional() }); // src/routers/links/types.ts var import_zod4 = require("zod"); var contentOptionsSchema = import_zod4.z.object({ /** * Options for metadata extraction. * Controls how metadata like title, description, etc. are extracted. */ metadataOptions: MetadataOptionsSchema.optional(), /** * Options for link extraction. * Controls how links are extracted and categorized. */ linksOptions: LinkExtractionOptionsSchema.optional(), /** * Options for HTML cleaning. * Controls how HTML is sanitized and cleaned. */ cleanedHtmlOptions: HTMLCleaningOptionsSchema.optional() }); var treeOptionsSchema = import_zod4.z.object({ /** * Whether to place folders before leaf nodes in the tree. * Default: true */ folderFirst: import_zod4.z.preprocess( (val) => val !== "false" && val !== false, import_zod4.z.boolean().optional() ), /** * How to order links within each folder: * - 'page' preserve the original document order * - 'alphabetical' sort A→Z by URL * Default: 'page' */ linksOrder: import_zod4.z.enum(["page", "alphabetical"]).optional(), /** * Whether to include extracted links for each node in the tree. * Default: true */ extractedLinks: import_zod4.z.preprocess( (val) => val !== "false" && val !== false, import_zod4.z.boolean().optional() ), /** * Whether to exclude subdomain as root URL. * Default: true * e.g., if false: rootUrl: https://swr.vercel.app -> https://vercel.app */ subdomainAsRootUrl: import_zod4.z.preprocess( (val) => val !== "false" && val !== false, import_zod4.z.boolean().optional() ) }); var linksOptionsSchema = import_zod4.z.object({ /** * The URL to scrape. * Must be a valid URL string. */ url: import_zod4.z.string(), /** * Whether to build a site map tree. * Default: true */ // default true if not set tree: import_zod4.z.preprocess( (val) => val !== "false" && val !== false, import_zod4.z.boolean().optional() ), /** * Whether to extract metadata from the page. * Default: true */ // default true if not set metadata: import_zod4.z.preprocess( (val) => val !== "false" && val !== false, import_zod4.z.boolean().optional() ), /** * Whether to return cleaned HTML. * Default: false */ cleanedHtml: import_zod4.z.preprocess( (val) => val === "true" || val === true, import_zod4.z.boolean().optional() ), /** * Whether to fetch and parse robots.txt. * Default: false */ robots: import_zod4.z.preprocess( (val) => val === "true" || val === true, import_zod4.z.boolean().optional() ), /** * Whether to fetch and parse sitemap.xml. * Default: false */ sitemapXML: import_zod4.z.preprocess( (val) => val === "true" || val === true, import_zod4.z.boolean().optional() ), ...treeOptionsSchema.shape, ...contentOptionsSchema.shape }); // Annotate the CommonJS export names for ESM import in node: 0 && (module.exports = { contentOptionsSchema, linksOptionsSchema, treeOptionsSchema }); //# sourceMappingURL=links.js.map