html-content-processor
Version:
A professional library for processing, cleaning, filtering, and converting HTML content to Markdown. Features advanced customization options, presets, plugin support, fluent API, and TypeScript integration for reliable content extraction.
300 lines (299 loc) • 10 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.useBuiltinPlugins = exports.builtinPlugins = exports.pluginRegistry = exports.getPluginStats = exports.clearPlugins = exports.getPluginNames = exports.getAllPlugins = exports.hasPlugin = exports.getPlugin = exports.removePlugin = exports.usePlugin = void 0;
const types_1 = require("./types");
const version_1 = require("./version");
/**
* Global plugin registry
*/
class PluginRegistry {
constructor() {
this.plugins = new Map();
this.initializationOrder = [];
}
/**
* Register a new plugin
* @param plugin Plugin to register
*/
register(plugin) {
if (this.plugins.has(plugin.name)) {
throw new types_1.PluginError(`Plugin with name "${plugin.name}" is already registered`, plugin.name);
}
try {
// Initialize plugin if it has an init function
if (plugin.init) {
plugin.init();
}
this.plugins.set(plugin.name, plugin);
this.initializationOrder.push(plugin.name);
console.log(`[PluginManager] Registered plugin: ${plugin.name}${plugin.version ? ` v${plugin.version}` : ''}`);
}
catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
throw new types_1.PluginError(`Failed to initialize plugin "${plugin.name}": ${errorMessage}`, plugin.name, error instanceof Error ? error : undefined);
}
}
/**
* Unregister a plugin
* @param name Plugin name to unregister
*/
unregister(name) {
const plugin = this.plugins.get(name);
if (!plugin) {
throw new types_1.PluginError(`Plugin "${name}" is not registered`, name);
}
try {
// Call destroy function if it exists
if (plugin.destroy) {
plugin.destroy();
}
this.plugins.delete(name);
const index = this.initializationOrder.indexOf(name);
if (index > -1) {
this.initializationOrder.splice(index, 1);
}
console.log(`[PluginManager] Unregistered plugin: ${name}`);
}
catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
throw new types_1.PluginError(`Failed to destroy plugin "${name}": ${errorMessage}`, name, error instanceof Error ? error : undefined);
}
}
/**
* Get a registered plugin
* @param name Plugin name
* @returns Plugin instance or undefined
*/
get(name) {
return this.plugins.get(name);
}
/**
* Check if a plugin is registered
* @param name Plugin name
* @returns True if plugin is registered
*/
has(name) {
return this.plugins.has(name);
}
/**
* Get all registered plugins
* @returns Array of all plugins
*/
getAll() {
return Array.from(this.plugins.values());
}
/**
* Get all plugin names in registration order
* @returns Array of plugin names
*/
getNames() {
return [...this.initializationOrder];
}
/**
* Apply filter plugins to HTML content
* @param html HTML content to process
* @param context Plugin context
* @returns Processed HTML content
*/
applyFilterPlugins(html, context) {
let result = html;
for (const name of this.initializationOrder) {
const plugin = this.plugins.get(name);
if (plugin && plugin.filter) {
try {
const processed = plugin.filter(result, context);
if (typeof processed === 'string') {
result = processed;
}
}
catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
console.warn(`[PluginManager] Filter plugin "${name}" failed:`, errorMessage);
// Continue with other plugins instead of throwing
}
}
}
return result;
}
/**
* Apply conversion plugins to Markdown content
* @param markdown Markdown content to process
* @param context Plugin context
* @returns Processed Markdown content
*/
applyConvertPlugins(markdown, context) {
let result = markdown;
for (const name of this.initializationOrder) {
const plugin = this.plugins.get(name);
if (plugin && plugin.convert) {
try {
const processed = plugin.convert(result, context);
if (typeof processed === 'string') {
result = processed;
}
}
catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
console.warn(`[PluginManager] Convert plugin "${name}" failed:`, errorMessage);
// Continue with other plugins instead of throwing
}
}
}
return result;
}
/**
* Clear all plugins
*/
clear() {
// Destroy all plugins first
for (const name of [...this.initializationOrder].reverse()) {
try {
this.unregister(name);
}
catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
console.warn(`[PluginManager] Failed to unregister plugin "${name}" during clear:`, errorMessage);
}
}
this.plugins.clear();
this.initializationOrder.length = 0;
}
/**
* Get registry statistics
* @returns Registry statistics
*/
getStats() {
const plugins = this.getAll();
return {
total: plugins.length,
withFilter: plugins.filter(p => typeof p.filter === 'function').length,
withConvert: plugins.filter(p => typeof p.convert === 'function').length,
withInit: plugins.filter(p => typeof p.init === 'function').length,
withDestroy: plugins.filter(p => typeof p.destroy === 'function').length
};
}
}
// Global plugin registry instance
const globalRegistry = new PluginRegistry();
exports.pluginRegistry = globalRegistry;
/**
* Register a plugin globally
* @param plugin Plugin to register
*/
function usePlugin(plugin) {
globalRegistry.register(plugin);
}
exports.usePlugin = usePlugin;
/**
* Unregister a plugin globally
* @param name Plugin name to unregister
*/
function removePlugin(name) {
globalRegistry.unregister(name);
}
exports.removePlugin = removePlugin;
/**
* Get a registered plugin
* @param name Plugin name
* @returns Plugin instance or undefined
*/
function getPlugin(name) {
return globalRegistry.get(name);
}
exports.getPlugin = getPlugin;
/**
* Check if a plugin is registered
* @param name Plugin name
* @returns True if plugin is registered
*/
function hasPlugin(name) {
return globalRegistry.has(name);
}
exports.hasPlugin = hasPlugin;
/**
* Get all registered plugins
* @returns Array of all plugins
*/
function getAllPlugins() {
return globalRegistry.getAll();
}
exports.getAllPlugins = getAllPlugins;
/**
* Get all plugin names
* @returns Array of plugin names
*/
function getPluginNames() {
return globalRegistry.getNames();
}
exports.getPluginNames = getPluginNames;
/**
* Clear all plugins
*/
function clearPlugins() {
globalRegistry.clear();
}
exports.clearPlugins = clearPlugins;
/**
* Get plugin registry statistics
* @returns Registry statistics
*/
function getPluginStats() {
return globalRegistry.getStats();
}
exports.getPluginStats = getPluginStats;
/**
* Built-in plugins for common use cases
*/
exports.builtinPlugins = {
/**
* Plugin to remove advertisement elements
*/
adRemover: {
name: 'ad-remover',
version: version_1.VERSION,
description: 'Removes advertisement elements from HTML',
filter: (html) => {
return html.replace(/<[^>]*class[^>]*(?:ad|advertisement|banner|sponsored)[^>]*>.*?<\/[^>]+>/gi, '');
}
},
/**
* Plugin to remove social media widgets
*/
socialRemover: {
name: 'social-remover',
version: version_1.VERSION,
description: 'Removes social media widgets and share buttons',
filter: (html) => {
return html.replace(/<[^>]*class[^>]*(?:social|share|tweet|facebook|twitter|linkedin)[^>]*>.*?<\/[^>]+>/gi, '');
}
},
/**
* Plugin to clean up Markdown formatting
*/
markdownCleaner: {
name: 'markdown-cleaner',
version: version_1.VERSION,
description: 'Cleans up redundant Markdown formatting',
convert: (markdown) => {
return markdown
.replace(/\n{3,}/g, '\n\n') // Remove excessive line breaks
.replace(/[ \t]+$/gm, '') // Remove trailing whitespace
.trim();
}
}
};
/**
* Register all built-in plugins
*/
function useBuiltinPlugins() {
Object.values(exports.builtinPlugins).forEach(plugin => {
try {
usePlugin(plugin);
}
catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
console.warn(`[PluginManager] Failed to register built-in plugin "${plugin.name}":`, errorMessage);
}
});
}
exports.useBuiltinPlugins = useBuiltinPlugins;