@llml-browser/types
Version:
TypeScript types and schemas for the @llml-browser API
236 lines (231 loc) • 8.88 kB
JavaScript
"use strict";
var __defProp = Object.defineProperty;
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
var __getOwnPropNames = Object.getOwnPropertyNames;
var __hasOwnProp = Object.prototype.hasOwnProperty;
var __export = (target, all) => {
for (var name in all)
__defProp(target, name, { get: all[name], enumerable: true });
};
var __copyProps = (to, from, except, desc) => {
if (from && typeof from === "object" || typeof from === "function") {
for (let key of __getOwnPropNames(from))
if (!__hasOwnProp.call(to, key) && key !== except)
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
}
return to;
};
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
// src/routers/links/index.ts
var links_exports = {};
__export(links_exports, {
contentOptionsSchema: () => contentOptionsSchema,
linksOptionsSchema: () => linksOptionsSchema,
treeOptionsSchema: () => treeOptionsSchema
});
module.exports = __toCommonJS(links_exports);
// src/services/html-cleaning/types.ts
var import_zod = require("zod");
var HTMLCleaningOptionsSchema = import_zod.z.object({
allowedHTMLTags: import_zod.z.array(import_zod.z.string()).optional(),
disallowedHTMLTags: import_zod.z.array(import_zod.z.string()).optional(),
extractMainContent: import_zod.z.boolean().optional().default(true),
/* Deprecated property, will be removed in future. add baseUrl to HTMLCleaning as a required parameter instead*/
documentBaseUrl: import_zod.z.string().optional(),
removeBase64Images: import_zod.z.boolean().optional().default(true)
}).strict();
var ElementPatternSchema = import_zod.z.object({
tag: import_zod.z.union([import_zod.z.string(), import_zod.z.instanceof(RegExp)]).optional(),
attributes: import_zod.z.array(
import_zod.z.object({
name: import_zod.z.union([import_zod.z.string(), import_zod.z.instanceof(RegExp)]),
value: import_zod.z.union([import_zod.z.string(), import_zod.z.instanceof(RegExp)]).optional()
}).strict()
).optional(),
classNames: import_zod.z.array(import_zod.z.union([import_zod.z.string(), import_zod.z.instanceof(RegExp)])).optional(),
ids: import_zod.z.array(import_zod.z.union([import_zod.z.string(), import_zod.z.instanceof(RegExp)])).optional()
}).strict();
var HTMLCleaningMetricsSchema = import_zod.z.object({
inputSize: import_zod.z.number(),
outputSize: import_zod.z.number(),
compressionRatio: import_zod.z.number()
});
var HTMLCleaningResultSchema = import_zod.z.object({
cleanedHtml: import_zod.z.string(),
metrics: HTMLCleaningMetricsSchema.optional()
});
// src/services/link/types.ts
var import_zod2 = require("zod");
var LinkExtractionOptionsSchema = import_zod2.z.object({
includeExternal: import_zod2.z.boolean().optional(),
// Include links from other domains
includeMedia: import_zod2.z.boolean().optional(),
// Include media files (images, videos, docs)
excludePatterns: import_zod2.z.array(import_zod2.z.string()).optional(),
// Regex patterns to exclude URLs
removeQueryParams: import_zod2.z.boolean().optional()
// Remove query parameters from URLs
}).strict();
var ExtractedLinksSchema = import_zod2.z.object({
internal: import_zod2.z.array(import_zod2.z.string()).optional(),
external: import_zod2.z.array(import_zod2.z.string()).optional(),
media: import_zod2.z.object({
images: import_zod2.z.array(import_zod2.z.string()).optional(),
videos: import_zod2.z.array(import_zod2.z.string()).optional(),
documents: import_zod2.z.array(import_zod2.z.string()).optional()
}).optional()
});
// src/services/metadata/types.ts
var import_zod3 = require("zod");
var MetadataOptionsSchema = import_zod3.z.object({
title: import_zod3.z.boolean().optional().default(true),
description: import_zod3.z.boolean().optional().default(true),
language: import_zod3.z.boolean().optional().default(true),
canonical: import_zod3.z.boolean().optional().default(true),
robots: import_zod3.z.boolean().optional().default(true),
author: import_zod3.z.boolean().optional().default(true),
keywords: import_zod3.z.boolean().optional().default(true),
favicon: import_zod3.z.boolean().optional().default(true),
openGraph: import_zod3.z.boolean().optional().default(true),
twitter: import_zod3.z.boolean().optional().default(true),
isIframeAllowed: import_zod3.z.boolean().optional().default(true)
});
var PageMetadataSchema = import_zod3.z.object({
// Basic metadata
title: import_zod3.z.string().optional(),
description: import_zod3.z.string().optional(),
language: import_zod3.z.string().optional(),
canonical: import_zod3.z.string().url().optional(),
robots: import_zod3.z.string().optional(),
author: import_zod3.z.string().optional(),
keywords: import_zod3.z.array(import_zod3.z.string()).optional(),
lastModified: import_zod3.z.string().optional().nullable(),
favicon: import_zod3.z.string().url().optional(),
// OpenGraph metadata (flattened)
ogTitle: import_zod3.z.string().optional(),
ogDescription: import_zod3.z.string().optional(),
ogImage: import_zod3.z.string().url().optional(),
ogUrl: import_zod3.z.string().url().optional(),
ogType: import_zod3.z.string().optional(),
ogSiteName: import_zod3.z.string().optional(),
// Twitter Card metadata (flattened)
twitterCard: import_zod3.z.string().optional(),
twitterSite: import_zod3.z.string().optional(),
twitterCreator: import_zod3.z.string().optional(),
twitterTitle: import_zod3.z.string().optional(),
twitterDescription: import_zod3.z.string().optional(),
twitterImage: import_zod3.z.string().url().optional(),
// iframe allowed
isIframeAllowed: import_zod3.z.boolean().optional()
});
// src/routers/links/types.ts
var import_zod4 = require("zod");
var contentOptionsSchema = import_zod4.z.object({
/**
* Options for metadata extraction.
* Controls how metadata like title, description, etc. are extracted.
*/
metadataOptions: MetadataOptionsSchema.optional(),
/**
* Options for link extraction.
* Controls how links are extracted and categorized.
*/
linksOptions: LinkExtractionOptionsSchema.optional(),
/**
* Options for HTML cleaning.
* Controls how HTML is sanitized and cleaned.
*/
cleanedHtmlOptions: HTMLCleaningOptionsSchema.optional()
});
var treeOptionsSchema = import_zod4.z.object({
/**
* Whether to place folders before leaf nodes in the tree.
* Default: true
*/
folderFirst: import_zod4.z.preprocess(
(val) => val !== "false" && val !== false,
import_zod4.z.boolean().optional()
),
/**
* How to order links within each folder:
* - 'page' preserve the original document order
* - 'alphabetical' sort A→Z by URL
* Default: 'page'
*/
linksOrder: import_zod4.z.enum(["page", "alphabetical"]).optional(),
/**
* Whether to include extracted links for each node in the tree.
* Default: true
*/
extractedLinks: import_zod4.z.preprocess(
(val) => val !== "false" && val !== false,
import_zod4.z.boolean().optional()
),
/**
* Whether to exclude subdomain as root URL.
* Default: true
* e.g., if false: rootUrl: https://swr.vercel.app -> https://vercel.app
*/
subdomainAsRootUrl: import_zod4.z.preprocess(
(val) => val !== "false" && val !== false,
import_zod4.z.boolean().optional()
)
});
var linksOptionsSchema = import_zod4.z.object({
/**
* The URL to scrape.
* Must be a valid URL string.
*/
url: import_zod4.z.string(),
/**
* Whether to build a site map tree.
* Default: true
*/
// default true if not set
tree: import_zod4.z.preprocess(
(val) => val !== "false" && val !== false,
import_zod4.z.boolean().optional()
),
/**
* Whether to extract metadata from the page.
* Default: true
*/
// default true if not set
metadata: import_zod4.z.preprocess(
(val) => val !== "false" && val !== false,
import_zod4.z.boolean().optional()
),
/**
* Whether to return cleaned HTML.
* Default: false
*/
cleanedHtml: import_zod4.z.preprocess(
(val) => val === "true" || val === true,
import_zod4.z.boolean().optional()
),
/**
* Whether to fetch and parse robots.txt.
* Default: false
*/
robots: import_zod4.z.preprocess(
(val) => val === "true" || val === true,
import_zod4.z.boolean().optional()
),
/**
* Whether to fetch and parse sitemap.xml.
* Default: false
*/
sitemapXML: import_zod4.z.preprocess(
(val) => val === "true" || val === true,
import_zod4.z.boolean().optional()
),
...treeOptionsSchema.shape,
...contentOptionsSchema.shape
});
// Annotate the CommonJS export names for ESM import in node:
0 && (module.exports = {
contentOptionsSchema,
linksOptionsSchema,
treeOptionsSchema
});
//# sourceMappingURL=links.js.map