html-content-processor
Version:
A professional library for processing, cleaning, filtering, and converting HTML content to Markdown. Features advanced customization options, presets, plugin support, fluent API, and TypeScript integration for reliable content extraction.
106 lines (105 loc) • 9.54 kB
JavaScript
;
/**
* HTML Filter Strategy Library
*
* A comprehensive library for cleaning, filtering, and converting HTML content
* to Markdown with advanced customization options, presets, and plugin support.
*
* @author HTML Filter Strategy Team
* @license MIT
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.getVersionInfo = exports.API_VERSION = exports.VERSION = exports.useBuiltinPlugins = exports.builtinPlugins = exports.getPluginStats = exports.clearPlugins = exports.getPluginNames = exports.getAllPlugins = exports.hasPlugin = exports.getPlugin = exports.removePlugin = exports.usePlugin = exports.mergeWithPreset = exports.hasPreset = exports.getPresetNames = exports.getPreset = exports.presets = exports.extractContentAuto = exports.cleanHtmlAuto = exports.htmlToMarkdownAuto = exports.createProcessor = exports.gentleCleanHtml = exports.strictCleanHtml = exports.htmlToNewsMarkdown = exports.htmlToBlogMarkdown = exports.htmlToArticleMarkdown = exports.extractContent = exports.cleanHtml = exports.htmlToText = exports.htmlToMarkdownWithCitations = exports.htmlToMarkdown = exports.pageTypeDetector = exports.PageTypeDetector = exports.PluginError = exports.ConversionError = exports.FilterError = exports.ProcessorError = exports.isBrowser = exports.isNode = exports.getDocument = exports.getDOMParser = exports.parseHTML = exports.domAdapter = exports.DefaultMarkdownGenerator = exports.HtmlFilter = exports.HtmlProcessor = void 0;
// Version management
const version_1 = require("./version");
Object.defineProperty(exports, "VERSION", { enumerable: true, get: function () { return version_1.VERSION; } });
Object.defineProperty(exports, "API_VERSION", { enumerable: true, get: function () { return version_1.API_VERSION; } });
Object.defineProperty(exports, "getVersionInfo", { enumerable: true, get: function () { return version_1.getVersionInfo; } });
// Core classes and components
var html_processor_1 = require("./html-processor");
Object.defineProperty(exports, "HtmlProcessor", { enumerable: true, get: function () { return html_processor_1.HtmlProcessor; } });
var html_filter_1 = require("./html-filter");
Object.defineProperty(exports, "HtmlFilter", { enumerable: true, get: function () { return html_filter_1.HtmlFilter; } });
var markdown_generator_1 = require("./markdown-generator");
Object.defineProperty(exports, "DefaultMarkdownGenerator", { enumerable: true, get: function () { return markdown_generator_1.DefaultMarkdownGenerator; } });
// DOM adapter for cross-environment compatibility
var dom_adapter_1 = require("./dom-adapter");
Object.defineProperty(exports, "domAdapter", { enumerable: true, get: function () { return dom_adapter_1.domAdapter; } });
Object.defineProperty(exports, "parseHTML", { enumerable: true, get: function () { return dom_adapter_1.parseHTML; } });
Object.defineProperty(exports, "getDOMParser", { enumerable: true, get: function () { return dom_adapter_1.getDOMParser; } });
Object.defineProperty(exports, "getDocument", { enumerable: true, get: function () { return dom_adapter_1.getDocument; } });
Object.defineProperty(exports, "isNode", { enumerable: true, get: function () { return dom_adapter_1.isNode; } });
Object.defineProperty(exports, "isBrowser", { enumerable: true, get: function () { return dom_adapter_1.isBrowser; } });
// Type definitions
var types_1 = require("./types");
Object.defineProperty(exports, "ProcessorError", { enumerable: true, get: function () { return types_1.ProcessorError; } });
Object.defineProperty(exports, "FilterError", { enumerable: true, get: function () { return types_1.FilterError; } });
Object.defineProperty(exports, "ConversionError", { enumerable: true, get: function () { return types_1.ConversionError; } });
Object.defineProperty(exports, "PluginError", { enumerable: true, get: function () { return types_1.PluginError; } });
// Page type detection
var page_type_detector_1 = require("./page-type-detector");
Object.defineProperty(exports, "PageTypeDetector", { enumerable: true, get: function () { return page_type_detector_1.PageTypeDetector; } });
Object.defineProperty(exports, "pageTypeDetector", { enumerable: true, get: function () { return page_type_detector_1.pageTypeDetector; } });
// Convenience functions - Main API
var convenience_api_1 = require("./convenience-api");
Object.defineProperty(exports, "htmlToMarkdown", { enumerable: true, get: function () { return convenience_api_1.htmlToMarkdown; } });
Object.defineProperty(exports, "htmlToMarkdownWithCitations", { enumerable: true, get: function () { return convenience_api_1.htmlToMarkdownWithCitations; } });
Object.defineProperty(exports, "htmlToText", { enumerable: true, get: function () { return convenience_api_1.htmlToText; } });
Object.defineProperty(exports, "cleanHtml", { enumerable: true, get: function () { return convenience_api_1.cleanHtml; } });
Object.defineProperty(exports, "extractContent", { enumerable: true, get: function () { return convenience_api_1.extractContent; } });
Object.defineProperty(exports, "htmlToArticleMarkdown", { enumerable: true, get: function () { return convenience_api_1.htmlToArticleMarkdown; } });
Object.defineProperty(exports, "htmlToBlogMarkdown", { enumerable: true, get: function () { return convenience_api_1.htmlToBlogMarkdown; } });
Object.defineProperty(exports, "htmlToNewsMarkdown", { enumerable: true, get: function () { return convenience_api_1.htmlToNewsMarkdown; } });
Object.defineProperty(exports, "strictCleanHtml", { enumerable: true, get: function () { return convenience_api_1.strictCleanHtml; } });
Object.defineProperty(exports, "gentleCleanHtml", { enumerable: true, get: function () { return convenience_api_1.gentleCleanHtml; } });
Object.defineProperty(exports, "createProcessor", { enumerable: true, get: function () { return convenience_api_1.createProcessor; } });
Object.defineProperty(exports, "htmlToMarkdownAuto", { enumerable: true, get: function () { return convenience_api_1.htmlToMarkdownAuto; } });
Object.defineProperty(exports, "cleanHtmlAuto", { enumerable: true, get: function () { return convenience_api_1.cleanHtmlAuto; } });
Object.defineProperty(exports, "extractContentAuto", { enumerable: true, get: function () { return convenience_api_1.extractContentAuto; } });
// Presets management
var presets_1 = require("./presets");
Object.defineProperty(exports, "presets", { enumerable: true, get: function () { return presets_1.presets; } });
Object.defineProperty(exports, "getPreset", { enumerable: true, get: function () { return presets_1.getPreset; } });
Object.defineProperty(exports, "getPresetNames", { enumerable: true, get: function () { return presets_1.getPresetNames; } });
Object.defineProperty(exports, "hasPreset", { enumerable: true, get: function () { return presets_1.hasPreset; } });
Object.defineProperty(exports, "mergeWithPreset", { enumerable: true, get: function () { return presets_1.mergeWithPreset; } });
// Plugin system
var plugin_manager_1 = require("./plugin-manager");
Object.defineProperty(exports, "usePlugin", { enumerable: true, get: function () { return plugin_manager_1.usePlugin; } });
Object.defineProperty(exports, "removePlugin", { enumerable: true, get: function () { return plugin_manager_1.removePlugin; } });
Object.defineProperty(exports, "getPlugin", { enumerable: true, get: function () { return plugin_manager_1.getPlugin; } });
Object.defineProperty(exports, "hasPlugin", { enumerable: true, get: function () { return plugin_manager_1.hasPlugin; } });
Object.defineProperty(exports, "getAllPlugins", { enumerable: true, get: function () { return plugin_manager_1.getAllPlugins; } });
Object.defineProperty(exports, "getPluginNames", { enumerable: true, get: function () { return plugin_manager_1.getPluginNames; } });
Object.defineProperty(exports, "clearPlugins", { enumerable: true, get: function () { return plugin_manager_1.clearPlugins; } });
Object.defineProperty(exports, "getPluginStats", { enumerable: true, get: function () { return plugin_manager_1.getPluginStats; } });
Object.defineProperty(exports, "builtinPlugins", { enumerable: true, get: function () { return plugin_manager_1.builtinPlugins; } });
Object.defineProperty(exports, "useBuiltinPlugins", { enumerable: true, get: function () { return plugin_manager_1.useBuiltinPlugins; } });
// Import core classes for default export
const html_processor_2 = require("./html-processor");
const convenience_api_2 = require("./convenience-api");
/**
* Main API object - Clean and simple interface
*/
const htmlFilterAPI = {
// Main processor class
HtmlProcessor: html_processor_2.HtmlProcessor,
// Core convenience functions
htmlToMarkdown: convenience_api_2.htmlToMarkdown,
htmlToMarkdownWithCitations: convenience_api_2.htmlToMarkdownWithCitations,
htmlToText: convenience_api_2.htmlToText,
cleanHtml: convenience_api_2.cleanHtml,
extractContent: convenience_api_2.extractContent,
createProcessor: convenience_api_2.createProcessor,
// Version information
version: version_1.VERSION,
apiVersion: version_1.API_VERSION,
getVersionInfo: version_1.getVersionInfo
};
// Minimal browser global registration (only if needed)
if (typeof window !== 'undefined') {
window.htmlFilter = htmlFilterAPI;
console.log(`[HTMLFilter] Library loaded - Version: ${version_1.VERSION}`);
}
// Default export - Clean and simple
exports.default = htmlFilterAPI;