UNPKG

html-content-processor

Version:

A professional library for processing, cleaning, filtering, and converting HTML content to Markdown. Features advanced customization options, presets, plugin support, fluent API, and TypeScript integration for reliable content extraction.

106 lines (105 loc) 9.54 kB
"use strict"; /** * HTML Filter Strategy Library * * A comprehensive library for cleaning, filtering, and converting HTML content * to Markdown with advanced customization options, presets, and plugin support. * * @author HTML Filter Strategy Team * @license MIT */ Object.defineProperty(exports, "__esModule", { value: true }); exports.getVersionInfo = exports.API_VERSION = exports.VERSION = exports.useBuiltinPlugins = exports.builtinPlugins = exports.getPluginStats = exports.clearPlugins = exports.getPluginNames = exports.getAllPlugins = exports.hasPlugin = exports.getPlugin = exports.removePlugin = exports.usePlugin = exports.mergeWithPreset = exports.hasPreset = exports.getPresetNames = exports.getPreset = exports.presets = exports.extractContentAuto = exports.cleanHtmlAuto = exports.htmlToMarkdownAuto = exports.createProcessor = exports.gentleCleanHtml = exports.strictCleanHtml = exports.htmlToNewsMarkdown = exports.htmlToBlogMarkdown = exports.htmlToArticleMarkdown = exports.extractContent = exports.cleanHtml = exports.htmlToText = exports.htmlToMarkdownWithCitations = exports.htmlToMarkdown = exports.pageTypeDetector = exports.PageTypeDetector = exports.PluginError = exports.ConversionError = exports.FilterError = exports.ProcessorError = exports.isBrowser = exports.isNode = exports.getDocument = exports.getDOMParser = exports.parseHTML = exports.domAdapter = exports.DefaultMarkdownGenerator = exports.HtmlFilter = exports.HtmlProcessor = void 0; // Version management const version_1 = require("./version"); Object.defineProperty(exports, "VERSION", { enumerable: true, get: function () { return version_1.VERSION; } }); Object.defineProperty(exports, "API_VERSION", { enumerable: true, get: function () { return version_1.API_VERSION; } }); Object.defineProperty(exports, "getVersionInfo", { enumerable: true, get: function () { return version_1.getVersionInfo; } }); // Core classes and components var html_processor_1 = require("./html-processor"); Object.defineProperty(exports, "HtmlProcessor", { enumerable: true, get: function () { return html_processor_1.HtmlProcessor; } }); var html_filter_1 = require("./html-filter"); Object.defineProperty(exports, "HtmlFilter", { enumerable: true, get: function () { return html_filter_1.HtmlFilter; } }); var markdown_generator_1 = require("./markdown-generator"); Object.defineProperty(exports, "DefaultMarkdownGenerator", { enumerable: true, get: function () { return markdown_generator_1.DefaultMarkdownGenerator; } }); // DOM adapter for cross-environment compatibility var dom_adapter_1 = require("./dom-adapter"); Object.defineProperty(exports, "domAdapter", { enumerable: true, get: function () { return dom_adapter_1.domAdapter; } }); Object.defineProperty(exports, "parseHTML", { enumerable: true, get: function () { return dom_adapter_1.parseHTML; } }); Object.defineProperty(exports, "getDOMParser", { enumerable: true, get: function () { return dom_adapter_1.getDOMParser; } }); Object.defineProperty(exports, "getDocument", { enumerable: true, get: function () { return dom_adapter_1.getDocument; } }); Object.defineProperty(exports, "isNode", { enumerable: true, get: function () { return dom_adapter_1.isNode; } }); Object.defineProperty(exports, "isBrowser", { enumerable: true, get: function () { return dom_adapter_1.isBrowser; } }); // Type definitions var types_1 = require("./types"); Object.defineProperty(exports, "ProcessorError", { enumerable: true, get: function () { return types_1.ProcessorError; } }); Object.defineProperty(exports, "FilterError", { enumerable: true, get: function () { return types_1.FilterError; } }); Object.defineProperty(exports, "ConversionError", { enumerable: true, get: function () { return types_1.ConversionError; } }); Object.defineProperty(exports, "PluginError", { enumerable: true, get: function () { return types_1.PluginError; } }); // Page type detection var page_type_detector_1 = require("./page-type-detector"); Object.defineProperty(exports, "PageTypeDetector", { enumerable: true, get: function () { return page_type_detector_1.PageTypeDetector; } }); Object.defineProperty(exports, "pageTypeDetector", { enumerable: true, get: function () { return page_type_detector_1.pageTypeDetector; } }); // Convenience functions - Main API var convenience_api_1 = require("./convenience-api"); Object.defineProperty(exports, "htmlToMarkdown", { enumerable: true, get: function () { return convenience_api_1.htmlToMarkdown; } }); Object.defineProperty(exports, "htmlToMarkdownWithCitations", { enumerable: true, get: function () { return convenience_api_1.htmlToMarkdownWithCitations; } }); Object.defineProperty(exports, "htmlToText", { enumerable: true, get: function () { return convenience_api_1.htmlToText; } }); Object.defineProperty(exports, "cleanHtml", { enumerable: true, get: function () { return convenience_api_1.cleanHtml; } }); Object.defineProperty(exports, "extractContent", { enumerable: true, get: function () { return convenience_api_1.extractContent; } }); Object.defineProperty(exports, "htmlToArticleMarkdown", { enumerable: true, get: function () { return convenience_api_1.htmlToArticleMarkdown; } }); Object.defineProperty(exports, "htmlToBlogMarkdown", { enumerable: true, get: function () { return convenience_api_1.htmlToBlogMarkdown; } }); Object.defineProperty(exports, "htmlToNewsMarkdown", { enumerable: true, get: function () { return convenience_api_1.htmlToNewsMarkdown; } }); Object.defineProperty(exports, "strictCleanHtml", { enumerable: true, get: function () { return convenience_api_1.strictCleanHtml; } }); Object.defineProperty(exports, "gentleCleanHtml", { enumerable: true, get: function () { return convenience_api_1.gentleCleanHtml; } }); Object.defineProperty(exports, "createProcessor", { enumerable: true, get: function () { return convenience_api_1.createProcessor; } }); Object.defineProperty(exports, "htmlToMarkdownAuto", { enumerable: true, get: function () { return convenience_api_1.htmlToMarkdownAuto; } }); Object.defineProperty(exports, "cleanHtmlAuto", { enumerable: true, get: function () { return convenience_api_1.cleanHtmlAuto; } }); Object.defineProperty(exports, "extractContentAuto", { enumerable: true, get: function () { return convenience_api_1.extractContentAuto; } }); // Presets management var presets_1 = require("./presets"); Object.defineProperty(exports, "presets", { enumerable: true, get: function () { return presets_1.presets; } }); Object.defineProperty(exports, "getPreset", { enumerable: true, get: function () { return presets_1.getPreset; } }); Object.defineProperty(exports, "getPresetNames", { enumerable: true, get: function () { return presets_1.getPresetNames; } }); Object.defineProperty(exports, "hasPreset", { enumerable: true, get: function () { return presets_1.hasPreset; } }); Object.defineProperty(exports, "mergeWithPreset", { enumerable: true, get: function () { return presets_1.mergeWithPreset; } }); // Plugin system var plugin_manager_1 = require("./plugin-manager"); Object.defineProperty(exports, "usePlugin", { enumerable: true, get: function () { return plugin_manager_1.usePlugin; } }); Object.defineProperty(exports, "removePlugin", { enumerable: true, get: function () { return plugin_manager_1.removePlugin; } }); Object.defineProperty(exports, "getPlugin", { enumerable: true, get: function () { return plugin_manager_1.getPlugin; } }); Object.defineProperty(exports, "hasPlugin", { enumerable: true, get: function () { return plugin_manager_1.hasPlugin; } }); Object.defineProperty(exports, "getAllPlugins", { enumerable: true, get: function () { return plugin_manager_1.getAllPlugins; } }); Object.defineProperty(exports, "getPluginNames", { enumerable: true, get: function () { return plugin_manager_1.getPluginNames; } }); Object.defineProperty(exports, "clearPlugins", { enumerable: true, get: function () { return plugin_manager_1.clearPlugins; } }); Object.defineProperty(exports, "getPluginStats", { enumerable: true, get: function () { return plugin_manager_1.getPluginStats; } }); Object.defineProperty(exports, "builtinPlugins", { enumerable: true, get: function () { return plugin_manager_1.builtinPlugins; } }); Object.defineProperty(exports, "useBuiltinPlugins", { enumerable: true, get: function () { return plugin_manager_1.useBuiltinPlugins; } }); // Import core classes for default export const html_processor_2 = require("./html-processor"); const convenience_api_2 = require("./convenience-api"); /** * Main API object - Clean and simple interface */ const htmlFilterAPI = { // Main processor class HtmlProcessor: html_processor_2.HtmlProcessor, // Core convenience functions htmlToMarkdown: convenience_api_2.htmlToMarkdown, htmlToMarkdownWithCitations: convenience_api_2.htmlToMarkdownWithCitations, htmlToText: convenience_api_2.htmlToText, cleanHtml: convenience_api_2.cleanHtml, extractContent: convenience_api_2.extractContent, createProcessor: convenience_api_2.createProcessor, // Version information version: version_1.VERSION, apiVersion: version_1.API_VERSION, getVersionInfo: version_1.getVersionInfo }; // Minimal browser global registration (only if needed) if (typeof window !== 'undefined') { window.htmlFilter = htmlFilterAPI; console.log(`[HTMLFilter] Library loaded - Version: ${version_1.VERSION}`); } // Default export - Clean and simple exports.default = htmlFilterAPI;