@llml-browser/types
Version:
TypeScript types and schemas for the @llml-browser API
195 lines (190 loc) • 7.9 kB
JavaScript
"use strict";
var __defProp = Object.defineProperty;
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
var __getOwnPropNames = Object.getOwnPropertyNames;
var __hasOwnProp = Object.prototype.hasOwnProperty;
var __export = (target, all) => {
for (var name in all)
__defProp(target, name, { get: all[name], enumerable: true });
};
var __copyProps = (to, from, except, desc) => {
if (from && typeof from === "object" || typeof from === "function") {
for (let key of __getOwnPropNames(from))
if (!__hasOwnProp.call(to, key) && key !== except)
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
}
return to;
};
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
// src/routers/browse/index.ts
var browse_exports = {};
__export(browse_exports, {
BrowseOptionsSchema: () => BrowseOptionsSchema,
DataFormatsEnum: () => DataFormatsEnum
});
module.exports = __toCommonJS(browse_exports);
// src/services/html-cleaning/types.ts
var import_zod = require("zod");
var HTMLCleaningOptionsSchema = import_zod.z.object({
allowedHTMLTags: import_zod.z.array(import_zod.z.string()).optional(),
disallowedHTMLTags: import_zod.z.array(import_zod.z.string()).optional(),
extractMainContent: import_zod.z.boolean().optional().default(true),
/* Deprecated property, will be removed in future. add baseUrl to HTMLCleaning as a required parameter instead*/
documentBaseUrl: import_zod.z.string().optional(),
removeBase64Images: import_zod.z.boolean().optional().default(true)
}).strict();
var ElementPatternSchema = import_zod.z.object({
tag: import_zod.z.union([import_zod.z.string(), import_zod.z.instanceof(RegExp)]).optional(),
attributes: import_zod.z.array(
import_zod.z.object({
name: import_zod.z.union([import_zod.z.string(), import_zod.z.instanceof(RegExp)]),
value: import_zod.z.union([import_zod.z.string(), import_zod.z.instanceof(RegExp)]).optional()
}).strict()
).optional(),
classNames: import_zod.z.array(import_zod.z.union([import_zod.z.string(), import_zod.z.instanceof(RegExp)])).optional(),
ids: import_zod.z.array(import_zod.z.union([import_zod.z.string(), import_zod.z.instanceof(RegExp)])).optional()
}).strict();
var HTMLCleaningMetricsSchema = import_zod.z.object({
inputSize: import_zod.z.number(),
outputSize: import_zod.z.number(),
compressionRatio: import_zod.z.number()
});
var HTMLCleaningResultSchema = import_zod.z.object({
cleanedHtml: import_zod.z.string(),
metrics: HTMLCleaningMetricsSchema.optional()
});
// src/services/link/types.ts
var import_zod2 = require("zod");
var LinkExtractionOptionsSchema = import_zod2.z.object({
includeExternal: import_zod2.z.boolean().optional(),
// Include links from other domains
includeMedia: import_zod2.z.boolean().optional(),
// Include media files (images, videos, docs)
excludePatterns: import_zod2.z.array(import_zod2.z.string()).optional(),
// Regex patterns to exclude URLs
removeQueryParams: import_zod2.z.boolean().optional()
// Remove query parameters from URLs
}).strict();
var ExtractedLinksSchema = import_zod2.z.object({
internal: import_zod2.z.array(import_zod2.z.string()).optional(),
external: import_zod2.z.array(import_zod2.z.string()).optional(),
media: import_zod2.z.object({
images: import_zod2.z.array(import_zod2.z.string()).optional(),
videos: import_zod2.z.array(import_zod2.z.string()).optional(),
documents: import_zod2.z.array(import_zod2.z.string()).optional()
}).optional()
});
// src/services/metadata/types.ts
var import_zod3 = require("zod");
var MetadataOptionsSchema = import_zod3.z.object({
title: import_zod3.z.boolean().optional().default(true),
description: import_zod3.z.boolean().optional().default(true),
language: import_zod3.z.boolean().optional().default(true),
canonical: import_zod3.z.boolean().optional().default(true),
robots: import_zod3.z.boolean().optional().default(true),
author: import_zod3.z.boolean().optional().default(true),
keywords: import_zod3.z.boolean().optional().default(true),
favicon: import_zod3.z.boolean().optional().default(true),
openGraph: import_zod3.z.boolean().optional().default(true),
twitter: import_zod3.z.boolean().optional().default(true),
isIframeAllowed: import_zod3.z.boolean().optional().default(true)
});
var PageMetadataSchema = import_zod3.z.object({
// Basic metadata
title: import_zod3.z.string().optional(),
description: import_zod3.z.string().optional(),
language: import_zod3.z.string().optional(),
canonical: import_zod3.z.string().url().optional(),
robots: import_zod3.z.string().optional(),
author: import_zod3.z.string().optional(),
keywords: import_zod3.z.array(import_zod3.z.string()).optional(),
lastModified: import_zod3.z.string().optional().nullable(),
favicon: import_zod3.z.string().url().optional(),
// OpenGraph metadata (flattened)
ogTitle: import_zod3.z.string().optional(),
ogDescription: import_zod3.z.string().optional(),
ogImage: import_zod3.z.string().url().optional(),
ogUrl: import_zod3.z.string().url().optional(),
ogType: import_zod3.z.string().optional(),
ogSiteName: import_zod3.z.string().optional(),
// Twitter Card metadata (flattened)
twitterCard: import_zod3.z.string().optional(),
twitterSite: import_zod3.z.string().optional(),
twitterCreator: import_zod3.z.string().optional(),
twitterTitle: import_zod3.z.string().optional(),
twitterDescription: import_zod3.z.string().optional(),
twitterImage: import_zod3.z.string().url().optional(),
// iframe allowed
isIframeAllowed: import_zod3.z.boolean().optional()
});
// src/routers/browse/types.ts
var import_zod4 = require("zod");
var DataFormatsSchema = import_zod4.z.object({
/**
* Markdown representation of the page content.
* Contains the page content converted to Markdown format.
*/
markdown: import_zod4.z.string().optional(),
/**
* Raw HTML of the page as returned by the server.
* Contains the unmodified HTML response from the target URL.
*/
rawHtml: import_zod4.z.string().optional(),
/**
* Cleaned HTML with unnecessary elements removed.
* Contains a sanitized version of the HTML with ads, scripts, and other non-content elements removed.
*/
cleanedHtml: import_zod4.z.string().optional(),
/**
* Extracted links from the page.
* Contains information about links found on the page.
*/
links: ExtractedLinksSchema.optional(),
/**
* Metadata extracted from the page.
* Contains information like title, description, and other meta tags.
*/
metadata: PageMetadataSchema.optional()
});
var DataFormatsEnum = import_zod4.z.enum([
"markdown",
"rawHtml",
"cleanedHtml",
"links",
"metadata"
]);
var BrowseOptionsSchema = import_zod4.z.object({
/**
* Array of data formats to include in the response.
* If not specified, defaults to ['markdown', 'metadata'].
*
* @example
* ```typescript
* const options = {
* formats: ['markdown', 'links', 'metadata']
* };
* ```
*/
formats: import_zod4.z.array(DataFormatsEnum).optional().default(["markdown", "metadata"]),
/**
* Options for metadata extraction.
* If not specified, defaults to the default metadata options.
*/
metadataOptions: MetadataOptionsSchema.optional(),
/**
* Options for link extraction.
* If not specified, defaults to the default link extraction options.
*/
linksOptions: LinkExtractionOptionsSchema.optional(),
/**
* Options for HTML cleaning.
* If not specified, defaults to the default HTML cleaning options.
*/
cleanedHtmlOptions: HTMLCleaningOptionsSchema.optional()
});
// Annotate the CommonJS export names for ESM import in node:
0 && (module.exports = {
BrowseOptionsSchema,
DataFormatsEnum
});
//# sourceMappingURL=browse.js.map