UNPKG

html-content-processor

Version:

A professional library for processing, cleaning, filtering, and converting HTML content to Markdown. Features advanced customization options, presets, plugin support, fluent API, and TypeScript integration for reliable content extraction.

1 lines 51.5 kB
!function(e,t){"object"==typeof exports&&"object"==typeof module?module.exports=t():"function"==typeof define&&define.amd?define([],t):"object"==typeof exports?exports.htmlFilter=t():e.htmlFilter=t()}(this,(()=>(()=>{var e={78:function(e,t,r){"use strict";var n=this&&this.__awaiter||function(e,t,r,n){return new(r||(r=Promise))((function(i,o){function s(e){try{c(n.next(e))}catch(e){o(e)}}function a(e){try{c(n.throw(e))}catch(e){o(e)}}function c(e){var t;e.done?i(e.value):(t=e.value,t instanceof r?t:new r((function(e){e(t)}))).then(s,a)}c((n=n.apply(e,t||[])).next())}))};Object.defineProperty(t,"__esModule",{value:!0}),t.CustomHtml2Text=void 0;const i=r(886);t.CustomHtml2Text=class{constructor(e="",t={}){this.baseUrl=e,this.options=Object.assign({bodyWidth:0,ignoreEmphasis:!1,ignoreLinks:!1,ignoreImages:!1,protectLinks:!1,singleLineBreak:!1,markCode:!0,escapeSnob:!1,skipInternalLinks:!0,includeSuperSub:!1},t)}updateParams(e){this.options=Object.assign(Object.assign({},this.options),e)}handle(e){return n(this,void 0,void 0,(function*(){if(!e)return"";const t=yield(0,i.parseHTML)(e);return this.cleanDocument(t),yield this.domToMarkdown(t.body)}))}cleanDocument(e){e.querySelectorAll("script").forEach((e=>e.remove())),e.querySelectorAll("style").forEach((e=>e.remove())),["iframe","noscript","svg"].forEach((t=>{e.querySelectorAll(t).forEach((e=>e.remove()))}))}domToMarkdown(e,t=0){var r,i;return n(this,void 0,void 0,(function*(){if(!e)return"";let t="";const n=e.tagName.toLowerCase();switch(n){case"h1":case"h2":case"h3":case"h4":case"h5":case"h6":const o=parseInt(n.substring(1));t+="\n"+"#".repeat(o)+" "+this.getTextContent(e).trim()+"\n\n";break;case"p":t+=(yield this.processChildren(e)).trim()+"\n\n";break;case"br":t+="\n";break;case"hr":t+="\n---\n\n";break;case"ul":t+=(yield this.processList(e,"*"))+"\n";break;case"ol":t+=(yield this.processList(e,"1."))+"\n";break;case"li":t+="* "+(yield this.processChildren(e)).trim()+"\n";break;case"blockquote":t+=(yield this.processBlockquote(e))+"\n";break;case"pre":const s=e.querySelector("code"),a=s?s.textContent:e.textContent;t+="\n```"+((null===(r=null==s?void 0:s.className.match(/language-(\S+)/))||void 0===r?void 0:r[1])||"")+"\n"+(a||"").trim()+"\n```\n\n";break;case"code":"pre"!==(null===(i=e.parentElement)||void 0===i?void 0:i.tagName.toLowerCase())?t+="`"+(e.textContent||"").trim()+"`":t+=(e.textContent||"").trim();break;case"a":if(this.options.ignoreLinks)t+=this.getTextContent(e);else{const r=e.getAttribute("href")||"",n=this.getTextContent(e).trim();if(this.options.skipInternalLinks&&(r.startsWith("#")||""===r))t+=n;else{const i=this.resolveUrl(r),o=e.getAttribute("title")?` "${e.getAttribute("title")}"`:"";t+=n?`[${n}](${i}${o})`:`<${i}>`}}break;case"img":if(!this.options.ignoreImages){const r=e.getAttribute("src")||"",n=e.getAttribute("alt")||"",i=e.getAttribute("title")?` "${e.getAttribute("title")}"`:"";t+=`![${n}](${this.resolveUrl(r)}${i})`}break;case"strong":case"b":this.options.ignoreEmphasis?t+=(yield this.processChildren(e)).trim():t+="**"+(yield this.processChildren(e)).trim()+"**";break;case"em":case"i":this.options.ignoreEmphasis?t+=(yield this.processChildren(e)).trim():t+="_"+(yield this.processChildren(e)).trim()+"_";break;case"table":t+=(yield this.processTable(e))+"\n\n";break;case"sup":this.options.includeSuperSub?t+="^"+this.getTextContent(e).trim()+"^":t+=this.getTextContent(e).trim();break;case"sub":this.options.includeSuperSub?t+="~"+this.getTextContent(e).trim()+"~":t+=this.getTextContent(e).trim();break;default:t+=(yield this.processChildren(e))}return t}))}getTextContent(e){return e?(e.textContent||"").trim():""}processChildren(e){return n(this,void 0,void 0,(function*(){if(!e)return"";const t=yield(0,i.getNode)();let r="";for(const n of Array.from(e.childNodes))n.nodeType===t.TEXT_NODE?r+=(n.textContent||"").replace(/\s+/g," "):n.nodeType===t.ELEMENT_NODE&&(r+=(yield this.domToMarkdown(n)));return r}))}processList(e,t){return n(this,void 0,void 0,(function*(){let r="";const n=e.querySelectorAll("li");for(let e=0;e<n.length;e++){const i=n[e],o=(yield this.processChildren(i)).trim();r+="1."===t?`${e+1}. ${o}\n`:`${t} ${o}\n`}return r}))}processBlockquote(e){return n(this,void 0,void 0,(function*(){return(yield this.processChildren(e)).trim().split("\n").map((e=>`> ${e}`)).join("\n")+"\n"}))}processTable(e){return n(this,void 0,void 0,(function*(){const t=e.querySelectorAll("tr");let r="";for(let e=0;e<t.length;e++){const n=t[e].querySelectorAll("td, th"),i=[];for(const e of Array.from(n))i.push((yield this.processChildren(e)).trim().replace(/\|/g,"\\|"));r+="| "+i.join(" | ")+" |\n",0===e&&(r+="| "+i.map((()=>"---")).join(" | ")+" |\n")}return r}))}resolveUrl(e){if(!this.baseUrl||e.startsWith("http")||e.startsWith("//")||e.startsWith("data:"))return e;try{return new URL(e,this.baseUrl).href}catch(t){return e}}}},330:e=>{"use strict";e.exports=JSON.parse('{"name":"html-content-processor","version":"1.0.5","description":"A professional library for processing, cleaning, filtering, and converting HTML content to Markdown. Features advanced customization options, presets, plugin support, fluent API, and TypeScript integration for reliable content extraction.","main":"dist/index.js","types":"dist/index.d.ts","scripts":{"build":"tsc && webpack --mode production","test":"npm run test:detection","test:detection":"node tests/test-detection-accuracy.js","test:detection:add":"node tests/test-detection-accuracy.js add","test:ci":"npm run build && npm run test:detection","test:url:manage":"node tests/test-url-manager.js","prepare":"npm run build","dev":"webpack serve --mode development","update-docs-version":"node scripts/update-docs-version.js","verify-version-sync":"node scripts/verify-version-sync.js","version":"npm run update-docs-version && git add .","preversion":"npm run verify-version-sync","postversion":"npm run update-docs-version && npm run build"},"keywords":["html","markdown","content-filter","html-processor","content-extraction","html-to-markdown","typescript","page-type-detection","cross-environment","web-scraping","content-cleaning","modern-api","html-filter","content-converter","smart-filtering"],"author":"HTML Filter Strategy Team","license":"MIT","devDependencies":{"copy-webpack-plugin":"^11.0.0","ts-loader":"^9.5.0","typescript":"^4.9.5","webpack":"^5.88.2","webpack-cli":"^5.1.4","webpack-dev-server":"^4.15.1"},"optionalDependencies":{"jsdom":"^26.1.0"},"files":["dist/**/*","README.md","LICENSE"],"repository":{"type":"git","url":"git+https://github.com/kamjin3086/html-content-processor.git"},"bugs":{"url":"https://github.com/kamjin3086/html-content-processor/issues"},"homepage":"https://github.com/kamjin3086/html-content-processor#readme","engines":{"node":">=14.0.0"}}')},331:function(e,t,r){"use strict";var n=this&&this.__awaiter||function(e,t,r,n){return new(r||(r=Promise))((function(i,o){function s(e){try{c(n.next(e))}catch(e){o(e)}}function a(e){try{c(n.throw(e))}catch(e){o(e)}}function c(e){var t;e.done?i(e.value):(t=e.value,t instanceof r?t:new r((function(e){e(t)}))).then(s,a)}c((n=n.apply(e,t||[])).next())}))};Object.defineProperty(t,"__esModule",{value:!0}),t.DefaultMarkdownGenerator=void 0;const i=r(78),o=/!?\[([^\]]+)\]\(([^)]+?)(?:\s+"([^"]*)")?\)/g;function s(e,t){if(t.startsWith("http://")||t.startsWith("https://")||t.startsWith("mailto:")||t.startsWith("//")||t.startsWith("data:"))return t;if(t.startsWith("/")){if(!e)return t;try{const r=new URL(e);return`${r.protocol}//${r.host}${t}`}catch(r){return e.endsWith("/")?e.slice(0,-1)+t:e+t}}if(!e)return t;try{return new URL(t,e).href}catch(e){return t}}t.DefaultMarkdownGenerator=class{constructor(e=null,t={}){this.contentFilter=e,this.options=Object.assign({contentSource:"cleaned_html"},t)}convertLinksToRefs(e,t=""){const r=new Map,n=new Map,i=[];let a,c=0,l=1;for(;null!==(a=o.exec(e));){i.push(e.slice(c,a.index));const[o,u,d,h]=a;let m=d;if(t&&!(d.startsWith("http://")||d.startsWith("https://")||d.startsWith("mailto:")||d.startsWith("data:"))&&(n.has(d)||n.set(d,s(t,d)),m=n.get(d)||d),!r.has(m)){const e=[];h&&e.push(h),u&&u.trim()&&u!==h&&e.push(u.trim()),r.set(m,{refNum:l,description:e.length?": "+e.join(" - "):""}),l++}const g=r.get(m);if(g){const e=u.trim()||m;o.startsWith("!")?i.push(`![${e}⟨${g.refNum}⟩]`):i.push(`${e}⟨${g.refNum}⟩`)}c=a.index+o.length}i.push(e.slice(c));const u=i.join("");if(0===r.size)return[u,""];const d=["\n\n## References\n\n"],h=Array.from(r.entries()).sort(((e,t)=>e[1].refNum-t[1].refNum));for(const[e,t]of h)d.push(`⟨${t.refNum}⟩ ${e}${t.description}\n`);return[u,d.join("")]}generateMarkdown(e,t="",r={},o=null,s=!0){return n(this,void 0,void 0,(function*(){try{const n=new i.CustomHtml2Text(t),a={bodyWidth:0,ignoreEmphasis:!1,ignoreLinks:!1,ignoreImages:!1,protectLinks:!1,singleLineBreak:!0,markCode:!0,escapeSnob:!1},c=Object.assign(Object.assign(Object.assign({},a),this.options),r);n.updateParams(c);const l=e||"";let u;try{u=yield n.handle(l)}catch(e){u=`Error converting HTML to markdown: ${e instanceof Error?e.message:String(e)}`}u=u.replace(/^\s*```/gm,"```");let d=u,h="";if(s)try{[d,h]=this.convertLinksToRefs(u,t)}catch(e){h=`\n\nError generating citations: ${e instanceof Error?e.message:String(e)}`}let m="",g="";const p=o||this.contentFilter;if(p)try{g=(yield p.filterContent(l)).join("\n"),m=yield n.handle(g),m=m.replace(/^\s*```/gm,"```")}catch(e){m=`Error generating fit markdown: ${e instanceof Error?e.message:String(e)}`,g=`Error during HTML filtering for fit content: ${e instanceof Error?e.message:String(e)}`}return{rawMarkdown:u||"",markdownWithCitations:d||"",referencesMarkdown:h||"",fitMarkdown:m||u,fitHtml:g||""}}catch(e){const t=`Error in markdown generation: ${e instanceof Error?e.message:String(e)}`;return{rawMarkdown:t,markdownWithCitations:t,referencesMarkdown:"",fitMarkdown:t,fitHtml:t}}}))}}},539:(e,t,r)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.useBuiltinPlugins=t.builtinPlugins=t.pluginRegistry=t.getPluginStats=t.clearPlugins=t.getPluginNames=t.getAllPlugins=t.hasPlugin=t.getPlugin=t.removePlugin=t.usePlugin=void 0;const n=r(613),i=r(672),o=new class{constructor(){this.plugins=new Map,this.initializationOrder=[]}register(e){if(this.plugins.has(e.name))throw new n.PluginError(`Plugin with name "${e.name}" is already registered`,e.name);try{e.init&&e.init(),this.plugins.set(e.name,e),this.initializationOrder.push(e.name),console.log(`[PluginManager] Registered plugin: ${e.name}${e.version?` v${e.version}`:""}`)}catch(t){const r=t instanceof Error?t.message:String(t);throw new n.PluginError(`Failed to initialize plugin "${e.name}": ${r}`,e.name,t instanceof Error?t:void 0)}}unregister(e){const t=this.plugins.get(e);if(!t)throw new n.PluginError(`Plugin "${e}" is not registered`,e);try{t.destroy&&t.destroy(),this.plugins.delete(e);const r=this.initializationOrder.indexOf(e);r>-1&&this.initializationOrder.splice(r,1),console.log(`[PluginManager] Unregistered plugin: ${e}`)}catch(t){const r=t instanceof Error?t.message:String(t);throw new n.PluginError(`Failed to destroy plugin "${e}": ${r}`,e,t instanceof Error?t:void 0)}}get(e){return this.plugins.get(e)}has(e){return this.plugins.has(e)}getAll(){return Array.from(this.plugins.values())}getNames(){return[...this.initializationOrder]}applyFilterPlugins(e,t){let r=e;for(const e of this.initializationOrder){const n=this.plugins.get(e);if(n&&n.filter)try{const e=n.filter(r,t);"string"==typeof e&&(r=e)}catch(t){const r=t instanceof Error?t.message:String(t);console.warn(`[PluginManager] Filter plugin "${e}" failed:`,r)}}return r}applyConvertPlugins(e,t){let r=e;for(const e of this.initializationOrder){const n=this.plugins.get(e);if(n&&n.convert)try{const e=n.convert(r,t);"string"==typeof e&&(r=e)}catch(t){const r=t instanceof Error?t.message:String(t);console.warn(`[PluginManager] Convert plugin "${e}" failed:`,r)}}return r}clear(){for(const e of[...this.initializationOrder].reverse())try{this.unregister(e)}catch(t){const r=t instanceof Error?t.message:String(t);console.warn(`[PluginManager] Failed to unregister plugin "${e}" during clear:`,r)}this.plugins.clear(),this.initializationOrder.length=0}getStats(){const e=this.getAll();return{total:e.length,withFilter:e.filter((e=>"function"==typeof e.filter)).length,withConvert:e.filter((e=>"function"==typeof e.convert)).length,withInit:e.filter((e=>"function"==typeof e.init)).length,withDestroy:e.filter((e=>"function"==typeof e.destroy)).length}}};function s(e){o.register(e)}t.pluginRegistry=o,t.usePlugin=s,t.removePlugin=function(e){o.unregister(e)},t.getPlugin=function(e){return o.get(e)},t.hasPlugin=function(e){return o.has(e)},t.getAllPlugins=function(){return o.getAll()},t.getPluginNames=function(){return o.getNames()},t.clearPlugins=function(){o.clear()},t.getPluginStats=function(){return o.getStats()},t.builtinPlugins={adRemover:{name:"ad-remover",version:i.VERSION,description:"Removes advertisement elements from HTML",filter:e=>e.replace(/<[^>]*class[^>]*(?:ad|advertisement|banner|sponsored)[^>]*>.*?<\/[^>]+>/gi,"")},socialRemover:{name:"social-remover",version:i.VERSION,description:"Removes social media widgets and share buttons",filter:e=>e.replace(/<[^>]*class[^>]*(?:social|share|tweet|facebook|twitter|linkedin)[^>]*>.*?<\/[^>]+>/gi,"")},markdownCleaner:{name:"markdown-cleaner",version:i.VERSION,description:"Cleans up redundant Markdown formatting",convert:e=>e.replace(/\n{3,}/g,"\n\n").replace(/[ \t]+$/gm,"").trim()}},t.useBuiltinPlugins=function(){Object.values(t.builtinPlugins).forEach((e=>{try{s(e)}catch(t){const r=t instanceof Error?t.message:String(t);console.warn(`[PluginManager] Failed to register built-in plugin "${e.name}":`,r)}}))}},542:function(e,t,r){"use strict";var n=this&&this.__awaiter||function(e,t,r,n){return new(r||(r=Promise))((function(i,o){function s(e){try{c(n.next(e))}catch(e){o(e)}}function a(e){try{c(n.throw(e))}catch(e){o(e)}}function c(e){var t;e.done?i(e.value):(t=e.value,t instanceof r?t:new r((function(e){e(t)}))).then(s,a)}c((n=n.apply(e,t||[])).next())}))};Object.defineProperty(t,"__esModule",{value:!0}),t.HtmlFilter=void 0;const i=r(886);t.HtmlFilter=class{constructor(e,t="dynamic",r=.48){this.includedTags=new Set(["p","div","article","section","main","content","h1","h2","h3","h4","h5","h6","blockquote","pre","code","ul","ol","li","table","thead","tbody","tr","td","th","figure","figcaption","img","video","audio","embed","iframe","object","strong","em","b","i","u","mark","small","del","ins","sub","sup","a","span","time","address","cite","q","dfn","abbr","data","var","samp","kbd","br","hr","wbr"]),this.excludedTags=new Set(["nav","header","footer","aside","menu","menuitem","script","style","meta","link","title","head","noscript","template","slot","form","input","textarea","button","select","option","optgroup","label","fieldset","legend","canvas","svg","math"]),this.headerTags=new Set(["h1","h2","h3","h4","h5","h6"]),this.negativePattern=/comment|meta|footer|footnote|sidebar|nav|advertisement|banner|social|share|related|recommended|trending|popular|ads?|popup|modal|overlay|cookie|consent|notification|breadcrumb|pagination|search-suggest|autocomplete/i,this.minWordCount=e||2,this.threshold=r,this.thresholdType=t,this.tagImportance={article:1.5,main:1.5,section:1.2,div:.6,p:1.1,h1:1.4,h2:1.3,h3:1.2,h4:1.15,h5:1.1,h6:1.05,blockquote:1.1,ul:.9,ol:.9,li:.85,table:.9,tr:.8,td:.8,th:.85,figure:1,figcaption:.9,code:.9,pre:.9,strong:.95,em:.95,b:.9,i:.9,a:.8,span:.6},this.metricConfig={textDensity:!0,linkDensity:!0,tagWeight:!0,classIdWeight:!0,textLength:!0},this.metricWeights={textDensity:.35,linkDensity:.15,tagWeight:.25,classIdWeight:.15,textLength:.1}}filterContent(e){return n(this,void 0,void 0,(function*(){if(!e)return[];let t=yield(0,i.parseHTML)(e);t.body||(t=yield(0,i.parseHTML)(`<body>${e}</body>`)),yield this.removeComments(t),this.removeUnwantedTags(t);const r=t.body;this.pruneTree(r);const n=[];return Array.from(r.children).forEach((e=>{e.textContent&&e.textContent.trim().length>0&&n.push(e.outerHTML)})),n}))}filterContentAsString(e){return n(this,void 0,void 0,(function*(){return(yield this.filterContent(e)).join("")}))}removeComments(e){return n(this,void 0,void 0,(function*(){const t=yield(0,i.getNodeFilter)(),r=(yield(0,i.getDocument)()).createNodeIterator(e,t.SHOW_COMMENT,null);let n;const o=[];for(;n=r.nextNode();)n&&o.push(n);o.forEach((e=>{var t;null===(t=e.parentNode)||void 0===t||t.removeChild(e)}))}))}removeUnwantedTags(e){this.excludedTags.forEach((t=>{const r=e.getElementsByTagName(t);Array.from(r).forEach((e=>{var t;null===(t=e.parentNode)||void 0===t||t.removeChild(e)}))})),['[style*="display:none"]','[style*="visibility:hidden"]',"[hidden]",'[class*="ad"]','[id*="ad"]','[class*="advertisement"]','[id*="advertisement"]','[class*="banner"]','[id*="banner"]','[class*="suggest"]','[id*="suggest"]','[class*="autocomplete"]','[id*="autocomplete"]','[class*="dropdown"]','[id*="dropdown"]','[class*="popup"]','[id*="popup"]','[class*="modal"]','[id*="modal"]','[class*="overlay"]','[id*="overlay"]','[class*="breadcrumb"]','[id*="breadcrumb"]','[class*="pagination"]','[id*="pagination"]','[class*="toolbar"]','[id*="toolbar"]','[class*="sidebar"]','[id*="sidebar"]','[class*="cookie"]','[id*="cookie"]','[class*="consent"]','[id*="consent"]','[class*="notification"]','[id*="notification"]'].forEach((t=>{try{const r=e.querySelectorAll(t);Array.from(r).forEach((e=>{var t;this.isMainContentContainer(e)||null===(t=e.parentNode)||void 0===t||t.removeChild(e)}))}catch(e){}}))}isMainContentContainer(e){const t=e.tagName.toLowerCase(),r=e.className.toLowerCase(),n=e.id.toLowerCase();return!!["main","article","section"].includes(t)||["content","main","article","post","entry","text"].some((e=>r.includes(e)||n.includes(e)))}pruneTree(e){if(!e||!e.tagName)return;const t=e.tagName.toLowerCase(),r=e.textContent?e.textContent.trim().length:0,n=e.innerHTML.length,i=Array.from(e.querySelectorAll("a")).reduce(((e,t)=>e+(t.textContent?t.textContent.trim().length:0)),0),o={node:e,tagName:t,textLen:r,tagLen:n,linkTextLen:i},s=this.computeCompositeScore(o,r,n,i);let a=!1;if("fixed"===this.thresholdType)a=s<this.threshold;else{const e=this.tagImportance[o.tagName]||.7,t=n>0?r/n:0;let i=this.threshold;e>1&&(i*=.8),t>.4&&(i*=.9),a=s<i}if(a&&e.parentNode&&e.parentNode!==e.ownerDocument)return void e.parentNode.removeChild(e);const c=Array.from(e.children);for(let e=0;e<c.length;e++)this.pruneTree(c[e]);0===e.children.length&&0===(e.textContent||"").trim().length&&!this.isEssentialTag(t)&&this.countWords(e.textContent||"")<this.minWordCount&&e.parentNode&&e.parentNode!==e.ownerDocument&&e.parentNode.removeChild(e)}isEssentialTag(e){return this.includedTags.has(e)}computeCompositeScore(e,t,r,n){let i=0,o=0;return this.metricConfig.textDensity&&(i+=(r>0?t/r:0)*this.metricWeights.textDensity,o+=this.metricWeights.textDensity),this.metricConfig.linkDensity&&(i-=(t>0?n/t:0)*this.metricWeights.linkDensity,o+=this.metricWeights.linkDensity),this.metricConfig.tagWeight&&(i+=(this.tagImportance[e.tagName]||.5)*this.metricWeights.tagWeight,o+=this.metricWeights.tagWeight),this.metricConfig.classIdWeight&&(i+=this.computeClassIdWeight(e.node)*this.metricWeights.classIdWeight,o+=this.metricWeights.classIdWeight),this.metricConfig.textLength&&(i+=Math.min(1,t/100)*this.metricWeights.textLength,o+=this.metricWeights.textLength),o>0?i/o:0}computeClassIdWeight(e){let t=0;const r=`${e.className} ${e.id}`.toLowerCase();return this.negativePattern.test(r)&&(t-=.5),t}countWords(e){return e?e.trim().split(/\s+/).length:0}}},589:e=>{function t(e){var t=new Error("Cannot find module '"+e+"'");throw t.code="MODULE_NOT_FOUND",t}t.keys=()=>[],t.resolve=t,t.id=589,e.exports=t},598:function(e,t,r){"use strict";var n=this&&this.__awaiter||function(e,t,r,n){return new(r||(r=Promise))((function(i,o){function s(e){try{c(n.next(e))}catch(e){o(e)}}function a(e){try{c(n.throw(e))}catch(e){o(e)}}function c(e){var t;e.done?i(e.value):(t=e.value,t instanceof r?t:new r((function(e){e(t)}))).then(s,a)}c((n=n.apply(e,t||[])).next())}))};Object.defineProperty(t,"__esModule",{value:!0}),t.HtmlProcessor=void 0;const i=r(542),o=r(331),s=r(613),a=r(840),c=r(539),l=r(685);class u{constructor(e={}){this.processed=!1,this.dom=null,this.filteredDom=null,this.filterStats=null,this.pageTypeResult=null,this.autoDetectEnabled=!1,this.options=this.resolveOptions(e),this.htmlFilter=this.createHtmlFilter(),this.markdownGenerator=this.createMarkdownGenerator(),this.currentHtml="",this.baseUrl=this.options.baseUrl||""}static from(e,t={}){const r=new u(t);return r.currentHtml=e||"",r}withBaseUrl(e){return this.baseUrl=e,this.options.baseUrl=e,this}withOptions(e){return this.options=Object.assign(Object.assign(Object.assign({},this.options),e),{filter:Object.assign(Object.assign({},this.options.filter),e.filter),converter:Object.assign(Object.assign({},this.options.converter),e.converter)}),this.htmlFilter=this.createHtmlFilter(),this.markdownGenerator=this.createMarkdownGenerator(),this.processed=!1,this}filter(e){var t;return n(this,void 0,void 0,(function*(){try{const r=Date.now();this.autoDetectEnabled&&!this.pageTypeResult&&(yield this.detectPageType());const n=Object.assign(Object.assign(Object.assign({},this.options.filter),(null===(t=this.pageTypeResult)||void 0===t?void 0:t.filterOptions)||{}),e);this.options.filter=n,this.htmlFilter=this.createHtmlFilter();const i={options:this.options,baseUrl:this.baseUrl,originalHtml:this.currentHtml,metadata:{}};let o=c.pluginRegistry.applyFilterPlugins(this.currentHtml,i);const s=yield this.htmlFilter.filterContentAsString(o);this.currentHtml=s||this.currentHtml,this.processed=!0;const a=Date.now()-r;return this.options.debug&&console.log(`[HtmlProcessor] Filtering completed in ${a}ms`),this}catch(e){const t=e instanceof Error?e.message:String(e);throw new s.FilterError(`HTML filtering failed: ${t}`,e instanceof Error?e:void 0)}}))}toMarkdown(e){return n(this,void 0,void 0,(function*(){try{const t=Date.now(),r=Object.assign(Object.assign({},this.options.converter),e),n={ignoreLinks:r.ignoreLinks,ignoreImages:r.ignoreImages,escapeSnob:r.escapeSpecialChars},i=yield this.markdownGenerator.generateMarkdown(this.currentHtml,this.baseUrl,n,null,!1!==r.citations),o={options:this.options,baseUrl:this.baseUrl,originalHtml:this.currentHtml,metadata:{}};let s=c.pluginRegistry.applyConvertPlugins(i.rawMarkdown,o);const a=Date.now()-t,l={wordCount:this.countWords(s),linkCount:this.countMatches(s,/\[([^\]]+)\]\([^)]+\)/g),imageCount:this.countMatches(s,/!\[([^\]]*)\]\([^)]+\)/g),headingCount:this.countMatches(s,/^#+\s/gm),processingTime:a,sourceLength:this.currentHtml.length};return{content:s,contentWithCitations:i.markdownWithCitations,references:i.referencesMarkdown,metadata:l}}catch(e){const t=e instanceof Error?e.message:String(e);throw new s.ConversionError(`Markdown conversion failed: ${t}`,e instanceof Error?e:void 0)}}))}toText(){return n(this,void 0,void 0,(function*(){return(yield this.toMarkdown({ignoreLinks:!0,ignoreImages:!0})).content.replace(/^#+\s*/gm,"").replace(/\*\*(.*?)\*\*/g,"$1").replace(/\*(.*?)\*/g,"$1").replace(/`(.*?)`/g,"$1").replace(/\n{2,}/g,"\n\n").trim()}))}toArray(){return n(this,void 0,void 0,(function*(){if(!this.currentHtml)return[];try{const e=yield this.htmlFilter.filterContent(this.currentHtml);return e&&e.length>0?e:[this.currentHtml]}catch(e){return console.warn("[HtmlProcessor] Failed to convert to array:",e),[this.currentHtml]}}))}toString(){return this.currentHtml}toClean(){return this.toString()}getFilterResult(){return n(this,void 0,void 0,(function*(){const e=this.currentHtml,t=Date.now();try{const r=yield this.toArray(),n=Date.now()-t,i={originalLength:e.length,filteredLength:this.currentHtml.length,reductionPercent:e.length>0?Math.round(100*(1-this.currentHtml.length/e.length)):0,elementsRemoved:this.countElements(e)-this.countElements(this.currentHtml),processingTime:n};return{content:this.currentHtml,fragments:r,original:e,metadata:i}}catch(e){const t=e instanceof Error?e.message:String(e);throw new s.FilterError(`Failed to generate filter result: ${t}`,e instanceof Error?e:void 0)}}))}getOptions(){return JSON.parse(JSON.stringify(this.options))}isProcessed(){return this.processed}getHtml(){return this.currentHtml}getBaseUrl(){return this.baseUrl}resolveOptions(e){return e.preset?(0,a.mergeWithPreset)(e.preset,e):{filter:Object.assign({threshold:2,strategy:"dynamic",ratio:.48,minWords:0,preserveStructure:!1},e.filter),converter:Object.assign({citations:!0,ignoreLinks:!1,ignoreImages:!1,format:"github",linkStyle:"inline",escapeSpecialChars:!1},e.converter),baseUrl:e.baseUrl||"",preset:e.preset}}createHtmlFilter(){const e=this.options.filter;return new i.HtmlFilter(e.threshold||2,e.strategy||"dynamic",e.ratio||.48)}createMarkdownGenerator(){const e=this.options.converter,t={ignoreLinks:e.ignoreLinks||!1,ignoreImages:e.ignoreImages||!1,escapeSnob:e.escapeSpecialChars||!1};return new o.DefaultMarkdownGenerator(this.htmlFilter,t)}countWords(e){return e.trim().split(/\s+/).filter((e=>e.length>0)).length}countMatches(e,t){const r=e.match(t);return r?r.length:0}countElements(e){return this.countMatches(e,/<[^>]+>/g)}withAutoDetection(e){return n(this,void 0,void 0,(function*(){this.autoDetectEnabled=!0;const t=new l.PageTypeDetector;return this.pageTypeResult=yield t.detectPageType(this.currentHtml,e||this.baseUrl),this.options.debug&&this.pageTypeResult&&(console.log(`[HtmlProcessor] Auto-detected page type: ${this.pageTypeResult.type} (confidence: ${(100*this.pageTypeResult.confidence).toFixed(1)}%)`),console.log("[HtmlProcessor] Detection reasons:",this.pageTypeResult.reasons)),this.options.filter=Object.assign(Object.assign({},this.options.filter),this.pageTypeResult.filterOptions),this}))}getPageTypeResult(){return this.pageTypeResult}isAutoDetectionEnabled(){return this.autoDetectEnabled}withPageType(e){return n(this,void 0,void 0,(function*(){this.autoDetectEnabled=!1;const t=new l.PageTypeDetector;this.pageTypeResult=yield t.detectPageType(""),this.pageTypeResult.type=e,this.pageTypeResult.confidence=1,this.pageTypeResult.reasons=[`Manually set to ${e}`];const r=t.getFilterOptionsForType(e,this.pageTypeResult.characteristics);return this.pageTypeResult.filterOptions=r,this.options.filter=Object.assign(Object.assign({},this.options.filter),r),this.options.debug&&console.log(`[HtmlProcessor] Page type manually set to: ${e}`),this}))}detectPageType(){return n(this,void 0,void 0,(function*(){if(!this.pageTypeResult){const e=new l.PageTypeDetector;this.pageTypeResult=yield e.detectPageType(this.currentHtml,this.baseUrl)}}))}}t.HtmlProcessor=u},613:(e,t)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.PluginError=t.ConversionError=t.FilterError=t.ProcessorError=void 0;class r extends Error{constructor(e,t,r){super(e),this.code=t,this.cause=r,this.name="ProcessorError"}}t.ProcessorError=r,t.FilterError=class extends r{constructor(e,t){super(e,"FILTER_ERROR",t),this.name="FilterError"}},t.ConversionError=class extends r{constructor(e,t){super(e,"CONVERSION_ERROR",t),this.name="ConversionError"}},t.PluginError=class extends r{constructor(e,t,r){super(e,"PLUGIN_ERROR",r),this.pluginName=t,this.name="PluginError"}}},672:(e,t,r)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.version=t.getVersionInfo=t.API_VERSION=t.VERSION=void 0;const n=r(330);t.VERSION=n.version,t.API_VERSION="v1",t.getVersionInfo=function(){return{version:t.VERSION,apiVersion:t.API_VERSION,name:n.name,description:n.description}},t.version=t.VERSION},685:function(e,t,r){"use strict";var n=this&&this.__awaiter||function(e,t,r,n){return new(r||(r=Promise))((function(i,o){function s(e){try{c(n.next(e))}catch(e){o(e)}}function a(e){try{c(n.throw(e))}catch(e){o(e)}}function c(e){var t;e.done?i(e.value):(t=e.value,t instanceof r?t:new r((function(e){e(t)}))).then(s,a)}c((n=n.apply(e,t||[])).next())}))};Object.defineProperty(t,"__esModule",{value:!0}),t.pageTypeDetector=t.PageTypeDetector=void 0;const i=r(886);class o{constructor(){this.detectionRules=[{type:"search-engine",weight:.9,check:(e,t)=>this.hasSearchEngineIndicators(e,t),reason:"Contains search engine indicators (search box, results, suggestions)"},{type:"search-engine",weight:.8,check:e=>/baidu|google|bing|yahoo|duckduckgo|yandex/i.test(e),reason:"Contains search engine domain indicators"},{type:"search-engine",weight:.7,check:(e,t,r)=>r.hasSearch&&r.linkDensity>.6,reason:"High link density with search functionality"},{type:"blog",weight:.8,check:(e,t)=>this.hasBlogIndicators(e,t),reason:"Contains blog-specific elements (posts, archives, categories)"},{type:"blog",weight:.7,check:(e,t,r)=>r.hasArticleContent&&r.hasComments,reason:"Has article content with comments section"},{type:"news",weight:.8,check:(e,t)=>this.hasNewsIndicators(e,t),reason:"Contains news-specific elements (headlines, bylines, timestamps)"},{type:"news",weight:.7,check:e=>/news|breaking|headline|reporter|journalist/i.test(e),reason:"Contains news-related keywords"},{type:"documentation",weight:.9,check:(e,t,r)=>r.hasCodeBlocks&&this.hasDocIndicators(e,t),reason:"Contains code blocks and documentation structure"},{type:"documentation",weight:.8,check:e=>/api|docs|documentation|guide|tutorial|reference/i.test(e),reason:"Contains documentation keywords"},{type:"e-commerce",weight:.8,check:(e,t,r)=>r.hasProductListings,reason:"Contains product listings and pricing"},{type:"e-commerce",weight:.7,check:e=>/price|cart|buy|shop|product|order|checkout/i.test(e),reason:"Contains e-commerce keywords"},{type:"social-media",weight:.8,check:(e,t,r)=>r.hasSocialFeatures,reason:"Contains social media features (likes, shares, follows)"},{type:"social-media",weight:.7,check:e=>/facebook|twitter|instagram|linkedin|social|follow|like|share/i.test(e),reason:"Contains social media indicators"},{type:"forum",weight:.8,check:(e,t)=>this.hasForumIndicators(e,t),reason:"Contains forum structure (threads, posts, users)"},{type:"article",weight:.7,check:(e,t,r)=>r.hasArticleContent&&r.textDensity>.4,reason:"High text density with article structure"},{type:"landing-page",weight:.6,check:(e,t,r)=>r.formCount>0&&r.linkDensity<.3,reason:"Contains forms with low link density (typical of landing pages)"}]}detectPageType(e,t){return n(this,void 0,void 0,(function*(){if(!e||"string"!=typeof e)return this.createUnknownResult("Empty or invalid HTML content");try{const r=yield(0,i.parseHTML)(e),n=this.analyzePageCharacteristics(e,r),o=this.calculateTypeScores(e,r,n,t),s=Object.entries(o).sort((([,e],[,t])=>t.score-e.score))[0];if(!s||s[1].score<.3)return this.createUnknownResult("No clear page type detected");const[a,c]=s,l=this.getFilterOptionsForType(a,n);return{type:a,confidence:c.score,filterOptions:l,reasons:c.reasons,characteristics:n}}catch(e){return console.warn("[PageTypeDetector] Detection failed:",e),this.createUnknownResult("Detection failed due to parsing error")}}))}analyzePageCharacteristics(e,t){const r=t.body||t,n=(r.textContent||"").trim().length,i=e.length,o=i>0?n/i:0,s=r.querySelectorAll("a"),a=Array.from(s).reduce(((e,t)=>{var r;return e+((null===(r=t.textContent)||void 0===r?void 0:r.trim().length)||0)}),0),c=n>0?a/n:0;return{hasSearch:this.hasSearchElements(t),hasNavigation:this.hasNavigationElements(t),hasArticleContent:this.hasArticleElements(t),hasProductListings:this.hasProductElements(t),hasSocialFeatures:this.hasSocialElements(t),hasComments:this.hasCommentElements(t),hasCodeBlocks:this.hasCodeElements(t),linkDensity:c,textDensity:o,formCount:t.querySelectorAll("form").length,imageCount:t.querySelectorAll("img").length}}calculateTypeScores(e,t,r,n){const i={};["search-engine","blog","news","documentation","e-commerce","social-media","forum","landing-page","article"].forEach((e=>{i[e]={score:0,reasons:[]}})),n&&this.applyUrlHints(n,i);for(const n of this.detectionRules)try{n.check(e,t,r)&&(i[n.type].score+=n.weight,i[n.type].reasons.push(n.reason))}catch(e){}return Object.keys(i).forEach((e=>{i[e].score=Math.min(1,i[e].score)})),i}applyUrlHints(e,t){const r=e.toLowerCase(),n=[{pattern:/google|bing|yahoo|baidu|duckduckgo/,type:"search-engine",weight:.5},{pattern:/blog|wordpress|medium|substack/,type:"blog",weight:.4},{pattern:/news|cnn|bbc|reuters|ap/,type:"news",weight:.4},{pattern:/docs|documentation|api|guide/,type:"documentation",weight:.4},{pattern:/shop|store|amazon|ebay|buy/,type:"e-commerce",weight:.4},{pattern:/facebook|twitter|instagram|linkedin/,type:"social-media",weight:.4},{pattern:/forum|reddit|stackoverflow/,type:"forum",weight:.4}];for(const e of n)e.pattern.test(r)&&(t[e.type].score+=e.weight,t[e.type].reasons.push(`URL indicates ${e.type} site`))}getFilterOptionsForType(e,t){const r={threshold:2,strategy:"dynamic",ratio:.48,minWords:2,preserveStructure:!1};switch(e){case"search-engine":return Object.assign(Object.assign({},r),{threshold:8,strategy:"dynamic",ratio:.2,minWords:1,removeElements:["script","style","nav","header","footer","aside","form"]});case"blog":return Object.assign(Object.assign({},r),{threshold:3,strategy:"dynamic",ratio:.4,minWords:5,preserveStructure:!0,keepElements:["article","main","h1","h2","h3","h4","h5","h6","p","blockquote"]});case"news":return Object.assign(Object.assign({},r),{threshold:4,strategy:"dynamic",ratio:.35,minWords:10,preserveStructure:!0,keepElements:["article","main","h1","h2","h3","p","time","figure"]});case"documentation":return Object.assign(Object.assign({},r),{threshold:2,strategy:"fixed",ratio:.3,minWords:3,preserveStructure:!0,keepElements:["article","main","section","h1","h2","h3","h4","h5","h6","p","pre","code","ul","ol","li"]});case"e-commerce":return Object.assign(Object.assign({},r),{threshold:5,strategy:"dynamic",ratio:.25,minWords:2,removeElements:["nav","header","footer","aside","form","script","style"]});case"social-media":return Object.assign(Object.assign({},r),{threshold:6,strategy:"dynamic",ratio:.3,minWords:3,removeElements:["nav","header","footer","aside","script","style","form"]});case"forum":return Object.assign(Object.assign({},r),{threshold:3,strategy:"dynamic",ratio:.35,minWords:5,preserveStructure:!0});case"article":return Object.assign(Object.assign({},r),{threshold:2,strategy:"fixed",ratio:.45,minWords:10,preserveStructure:!0,keepElements:["article","main","h1","h2","h3","h4","h5","h6","p","blockquote","figure"]});case"landing-page":return Object.assign(Object.assign({},r),{threshold:4,strategy:"dynamic",ratio:.3,minWords:5,removeElements:["nav","header","footer","aside","script","style"]});default:return r}}hasSearchElements(e){return['input[type="search"]','input[name*="search"]','input[placeholder*="search"]',".search-box","#search",'[class*="search"]'].some((t=>{try{return null!==e.querySelector(t)}catch(e){return!1}}))}hasNavigationElements(e){return!!(e.querySelector("nav")||e.querySelector('[role="navigation"]')||e.querySelector(".navigation")||e.querySelector(".nav")||e.querySelector("#nav"))}hasArticleElements(e){return!!(e.querySelector("article")||e.querySelector('[role="article"]')||e.querySelector(".article")||e.querySelector(".post")||e.querySelector("main"))}hasProductElements(e){return[".product",".item",'[class*="price"]','[class*="cart"]','[class*="buy"]'].some((t=>{try{return null!==e.querySelector(t)}catch(e){return!1}}))}hasSocialElements(e){return['[class*="like"]','[class*="share"]','[class*="follow"]','[class*="social"]',".fb-like",".twitter-share"].some((t=>{try{return null!==e.querySelector(t)}catch(e){return!1}}))}hasCommentElements(e){return[".comment",".comments","#comments",'[class*="comment"]',".disqus"].some((t=>{try{return null!==e.querySelector(t)}catch(e){return!1}}))}hasCodeElements(e){return!!(e.querySelector("pre")||e.querySelector("code")||e.querySelector(".highlight")||e.querySelector(".code"))}hasSearchEngineIndicators(e,t){return[()=>null!==t.querySelector(".search-result"),()=>null!==t.querySelector(".result"),()=>null!==t.querySelector('[class*="suggest"]'),()=>/search.*result|result.*search/i.test(e),()=>/autocomplete|suggestion/i.test(e),()=>/google.*search|baidu.*search|bing.*search/i.test(e)].some((e=>{try{return e()}catch(e){return!1}}))}hasBlogIndicators(e,t){return[()=>null!==t.querySelector(".post"),()=>null!==t.querySelector(".entry"),()=>null!==t.querySelector(".blog"),()=>/posted.*by|published.*on|author/i.test(e),()=>/category|tag|archive/i.test(e)].some((e=>{try{return e()}catch(e){return!1}}))}hasNewsIndicators(e,t){return[()=>null!==t.querySelector("time"),()=>null!==t.querySelector(".byline"),()=>null!==t.querySelector(".headline"),()=>/breaking|news|reporter|journalist/i.test(e),()=>/published|updated|ago/i.test(e)].some((e=>{try{return e()}catch(e){return!1}}))}hasDocIndicators(e,t){return[()=>null!==t.querySelector(".toc"),()=>null!==t.querySelector(".table-of-contents"),()=>/table.*of.*contents|api.*reference/i.test(e),()=>t.querySelectorAll("h1, h2, h3, h4, h5, h6").length>5].some((e=>{try{return e()}catch(e){return!1}}))}hasForumIndicators(e,t){return[()=>null!==t.querySelector(".thread"),()=>null!==t.querySelector(".post"),()=>null!==t.querySelector(".user"),()=>/thread|forum|discussion|reply/i.test(e),()=>/joined|posts|reputation/i.test(e)].some((e=>{try{return e()}catch(e){return!1}}))}createUnknownResult(e){return{type:"unknown",confidence:0,filterOptions:{threshold:2,strategy:"dynamic",ratio:.48,minWords:2,preserveStructure:!1},reasons:[e],characteristics:{hasSearch:!1,hasNavigation:!1,hasArticleContent:!1,hasProductListings:!1,hasSocialFeatures:!1,hasComments:!1,hasCodeBlocks:!1,linkDensity:0,textDensity:0,formCount:0,imageCount:0}}}}t.PageTypeDetector=o,t.pageTypeDetector=new o},840:(e,t)=>{"use strict";function r(e){const r=t.presets[e];if(!r)throw new Error(`Unknown preset: ${e}`);return JSON.parse(JSON.stringify(r))}Object.defineProperty(t,"__esModule",{value:!0}),t.mergeWithPreset=t.hasPreset=t.getPresetNames=t.getPreset=t.presets=void 0,t.presets={default:{filter:{threshold:2,strategy:"dynamic",ratio:.48,minWords:0,preserveStructure:!1},converter:{citations:!0,ignoreLinks:!1,ignoreImages:!1,format:"github",linkStyle:"inline",escapeSpecialChars:!1}},article:{filter:{threshold:3,strategy:"dynamic",ratio:.55,minWords:10,preserveStructure:!0,removeElements:["nav","aside","footer",".ads",".advertisement",".sidebar"]},converter:{citations:!0,ignoreLinks:!1,ignoreImages:!1,format:"github",linkStyle:"reference",escapeSpecialChars:!1}},blog:{filter:{threshold:2,strategy:"dynamic",ratio:.5,minWords:5,preserveStructure:!0,removeElements:["nav","aside",".comments",".social-share",".ads"]},converter:{citations:!1,ignoreLinks:!1,ignoreImages:!1,format:"github",linkStyle:"inline",escapeSpecialChars:!1}},news:{filter:{threshold:2,strategy:"fixed",ratio:.45,minWords:15,preserveStructure:!0,removeElements:["nav","aside","footer",".ads",".advertisement",".related-articles",".social-share",".comments"]},converter:{citations:!0,ignoreLinks:!1,ignoreImages:!1,format:"commonmark",linkStyle:"reference",escapeSpecialChars:!0}},strict:{filter:{threshold:4,strategy:"fixed",ratio:.6,minWords:20,preserveStructure:!1,removeElements:["nav","aside","footer","header",".ads",".advertisement",".sidebar",".comments",".social-share",".related","script","style"],keepElements:["article","main","section","p","h1","h2","h3","h4","h5","h6"]},converter:{citations:!1,ignoreLinks:!0,ignoreImages:!0,format:"commonmark",linkStyle:"reference",escapeSpecialChars:!0}},loose:{filter:{threshold:1,strategy:"dynamic",ratio:.3,minWords:0,preserveStructure:!0,removeElements:["script","style","noscript"]},converter:{citations:!0,ignoreLinks:!1,ignoreImages:!1,format:"github",linkStyle:"inline",escapeSpecialChars:!1}}},t.getPreset=r,t.getPresetNames=function(){return Object.keys(t.presets)},t.hasPreset=function(e){return e in t.presets},t.mergeWithPreset=function(e,t){const n=r(e);return Object.assign(Object.assign(Object.assign({},n),t),{filter:Object.assign(Object.assign({},n.filter),t.filter),converter:Object.assign(Object.assign({},n.converter),t.converter)})}},846:function(e,t,r){"use strict";var n=this&&this.__awaiter||function(e,t,r,n){return new(r||(r=Promise))((function(i,o){function s(e){try{c(n.next(e))}catch(e){o(e)}}function a(e){try{c(n.throw(e))}catch(e){o(e)}}function c(e){var t;e.done?i(e.value):(t=e.value,t instanceof r?t:new r((function(e){e(t)}))).then(s,a)}c((n=n.apply(e,t||[])).next())}))};Object.defineProperty(t,"__esModule",{value:!0}),t.extractContentAuto=t.cleanHtmlAuto=t.htmlToMarkdownAuto=t.createProcessor=t.gentleCleanHtml=t.strictCleanHtml=t.htmlToNewsMarkdown=t.htmlToBlogMarkdown=t.htmlToArticleMarkdown=t.extractContent=t.cleanHtml=t.htmlToText=t.htmlToMarkdownWithCitations=t.htmlToMarkdown=void 0;const i=r(598);t.htmlToMarkdown=function(e,t){return n(this,void 0,void 0,(function*(){const r=i.HtmlProcessor.from(e,(null==t?void 0:t.baseUrl)?{baseUrl:t.baseUrl}:{});return!t||void 0===t.threshold&&void 0===t.strategy&&void 0===t.ratio?yield r.filter():yield r.filter(t),(yield r.toMarkdown(t)).content}))},t.htmlToMarkdownWithCitations=function(e,t,r){return n(this,void 0,void 0,(function*(){const n=i.HtmlProcessor.from(e,{baseUrl:t});!r||void 0===r.threshold&&void 0===r.strategy&&void 0===r.ratio?yield n.filter():yield n.filter(r);const o=yield n.toMarkdown(Object.assign(Object.assign({},r),{citations:!0}));return o.contentWithCitations+(o.references?"\n\n"+o.references:"")}))},t.htmlToText=function(e,t){return n(this,void 0,void 0,(function*(){const r=i.HtmlProcessor.from(e,{filter:t,converter:{ignoreLinks:!0,ignoreImages:!0}});return yield r.filter(t),yield r.toText()}))},t.cleanHtml=function(e,t){return n(this,void 0,void 0,(function*(){const r=i.HtmlProcessor.from(e,{filter:t});return yield r.filter(t),r.toString()}))},t.extractContent=function(e,t){return n(this,void 0,void 0,(function*(){const r=i.HtmlProcessor.from(e,{filter:t});return yield r.filter(t),yield r.toArray()}))},t.htmlToArticleMarkdown=function(e,t){return n(this,void 0,void 0,(function*(){const r=i.HtmlProcessor.from(e,{preset:"article",baseUrl:t});return yield r.filter(),(yield r.toMarkdown()).content}))},t.htmlToBlogMarkdown=function(e,t){return n(this,void 0,void 0,(function*(){const r=i.HtmlProcessor.from(e,{preset:"blog",baseUrl:t});return yield r.filter(),(yield r.toMarkdown()).content}))},t.htmlToNewsMarkdown=function(e,t){return n(this,void 0,void 0,(function*(){const r=i.HtmlProcessor.from(e,{preset:"news",baseUrl:t});return yield r.filter(),(yield r.toMarkdown()).content}))},t.strictCleanHtml=function(e){return n(this,void 0,void 0,(function*(){const t=i.HtmlProcessor.from(e,{preset:"strict"});return yield t.filter(),t.toString()}))},t.gentleCleanHtml=function(e){return n(this,void 0,void 0,(function*(){const t=i.HtmlProcessor.from(e,{preset:"loose"});return yield t.filter(),t.toString()}))},t.createProcessor=function(e){return new i.HtmlProcessor(e)},t.htmlToMarkdownAuto=function(e,t,r={}){return n(this,void 0,void 0,(function*(){const n=yield i.HtmlProcessor.from(e,r).withAutoDetection(t);return yield n.filter(),yield n.toMarkdown()}))},t.cleanHtmlAuto=function(e,t,r={}){return n(this,void 0,void 0,(function*(){const n=yield i.HtmlProcessor.from(e,r).withAutoDetection(t);return yield n.filter(),n.toString()}))},t.extractContentAuto=function(e,t,r={}){return n(this,void 0,void 0,(function*(){const n=yield i.HtmlProcessor.from(e,r).withAutoDetection(t);return yield n.filter(),{markdown:yield n.toMarkdown(),pageType:n.getPageTypeResult(),cleanHtml:n.toString()}}))}},886:function(e,t,r){"use strict";var n=this&&this.__createBinding||(Object.create?function(e,t,r,n){void 0===n&&(n=r);var i=Object.getOwnPropertyDescriptor(t,r);i&&!("get"in i?!t.__esModule:i.writable||i.configurable)||(i={enumerable:!0,get:function(){return t[r]}}),Object.defineProperty(e,n,i)}:function(e,t,r,n){void 0===n&&(n=r),e[n]=t[r]}),i=this&&this.__setModuleDefault||(Object.create?function(e,t){Object.defineProperty(e,"default",{enumerable:!0,value:t})}:function(e,t){e.default=t}),o=this&&this.__importStar||function(e){if(e&&e.__esModule)return e;var t={};if(null!=e)for(var r in e)"default"!==r&&Object.prototype.hasOwnProperty.call(e,r)&&n(t,e,r);return i(t,e),t},s=this&&this.__awaiter||function(e,t,r,n){return new(r||(r=Promise))((function(i,o){function s(e){try{c(n.next(e))}catch(e){o(e)}}function a(e){try{c(n.throw(e))}catch(e){o(e)}}function c(e){var t;e.done?i(e.value):(t=e.value,t instanceof r?t:new r((function(e){e(t)}))).then(s,a)}c((n=n.apply(e,t||[])).next())}))};Object.defineProperty(t,"__esModule",{value:!0}),t.isBrowser=t.isNode=t.getNode=t.getNodeFilter=t.getDocument=t.getDOMParser=t.parseHTML=t.domAdapter=t.NODE_TYPES=void 0,t.NODE_TYPES={TEXT_NODE:3,ELEMENT_NODE:1,COMMENT_NODE:8};class a{static detectEnvironment(){var e,t,r;if("undefined"!=typeof globalThis){const r=globalThis;if(void 0!==r.document&&void 0!==r.window)return"browser";if(void 0!==r.process&&(null===(t=null===(e=r.process)||void 0===e?void 0:e.versions)||void 0===t?void 0:t.node))return"node";if(void 0!==r.WorkerGlobalScope)return"webworker"}return"undefined"!=typeof window?"browser":"undefined"!=typeof process&&(null===(r=process.versions)||void 0===r?void 0:r.node)?"node":"undefined"!=typeof self&&void 0!==self.importScripts?"webworker":"unknown"}static detectDOMParser(){try{return"undefined"!=typeof DOMParser&&new DOMParser instanceof DOMParser}catch(e){return!1}}static detectDocument(){return"undefined"!=typeof document&&(null===document||void 0===document?void 0:document.createElement)&&"function"==typeof document.createElement}}class c{static isSupported(){return"browser"===a.detectEnvironment()&&a.detectDOMParser()&&a.detectDocument()}static createWindow(){if(!this.isSupported())throw new Error("Browser environment does not support required DOM APIs");return{DOMParser:window.DOMParser,document:window.document,NodeFilter:window.NodeFilter,Node:window.Node}}}class l{static isSupported(){return"node"===a.detectEnvironment()}static createWindow(){return s(this,void 0,void 0,(function*(){if(!this.isSupported())throw new Error("Not in Node.js environment");const e=yield this.loadJSDOM();if(e){const{JSDOM:t}=e,r=new t("<!DOCTYPE html><html><body></body></html>");return{DOMParser:r.window.DOMParser,document:r.window.document,NodeFilter:r.window.NodeFilter,Node:r.window.Node}}return this.createFallbackWindow()}))}static loadJSDOM(){return s(this,void 0,void 0,(function*(){if(!1===this.jsdomAvailable)return null;if(this.jsdomCache)return this.jsdomCache;try{const e=yield this.dynamicImport("jsdom");return this.jsdomCache=e,this.jsdomAvailable=!0,e}catch(e){return console.warn("jsdom not found. Installing jsdom is recommended for better performance: npm install jsdom"),this.jsdomAvailable=!1,null}}))}static dynamicImport(e){return s(this,void 0,void 0,(function*(){var t;try{return yield(t=e,Promise.resolve().then((()=>o(r(589)(t)))))}catch(t){try{return new Function("moduleName","return require(moduleName)")(e)}catch(r){const n=(null==t?void 0:t.message)||"Unknown import error";throw new Error(`Cannot load module ${e}: ${n}`)}}}))}static createFallbackWindow(){return{DOMParser:()=>({parseFromString:(e,t)=>({documentElement:{outerHTML:e,innerHTML:e,textContent:e.replace(/<[^>]*>/g,""),querySelectorAll:()=>[],querySelector:()=>null,remove:()=>{},children:[],childNodes:[],parentNode:null,ownerDocument:null},body:{innerHTML:e,textContent:e.replace(/<[^>]*>/g,""),querySelectorAll:()=>[],querySelector:()=>null,children:[],childNodes:[],getElementsByTagName:()=>[],remove:()=>{}},createNodeIterator:()=>({nextNode:()=>null}),createElement:e=>({tagName:e.toUpperCase(),innerHTML:"",textContent:"",remove:()=>{},children:[],childNodes:[],parentNode:null}),getElementsByTagName:()=>[]})}),document:{createNodeIterator:()=>({nextNode:()=>null}),createElement:e=>({tagName:e,remove:()=>{}}),body:null},NodeFilter:{SHOW_COMMENT:128},Node:t.NODE_TYPES}}static hasJSDOM(){return s(this,void 0,void 0,(function*(){if(!this.isSupported())return!1;try{return yield this.loadJSDOM(),!0===this.jsdomAvailable}catch(e){return!1}}))}}l.jsdomCache=null,l.jsdomAvailable=null;class u{constructor(){this._window=null,this._initialized=!1,this._environment=a.detectEnvironment()}static getInstance(){return u.instance||(u.instance=new u),u.instance}get isNode(){return"node"===this._environment}get isBrowser(){return"browser"===this._environment}get isWebWorker(){return"webworker"===this._environment}ensureInitialized(){return s(this,void 0,void 0,(function*(){if(!this._initialized){if(c.isSupported())this._window=c.createWindow();else{if(!l.isSupported())throw new Error(`Unsupported environment: ${this._environment}`);this._window=yield l.createWindow()}this._initialized=!0}}))}getDOMParser(){return s(this,void 0,void 0,(function*(){if(yield this.ensureInitialized(),!this._window)throw new Error("DOM environment not properly initialized");return new this._window.DOMParser}))}getDocument(){return s(this,void 0,void 0,(function*(){if(yield this.ensureInitialized(),!this._window)throw new Error("DOM environment not properly initialized");return this._window.document}))}getNodeFilter(){return s(this,void 0,void 0,(function*(){if(yield this.ensureInitialized(),!this._window)throw new Error("DOM environment not properly initialized");return this._window.NodeFilter}))}getNode(){return s(this,void 0,void 0,(function*(){if(yield this.ensureInitialized(),!this._window)throw new Error("DOM environment not properly initialized");return this._window.Node||t.NODE_TYPES}))}parseHTML(e){return s(this,void 0,void 0,(function*(){return(yield this.getDOMParser()).parseFromString(e,"text/html")}))}hasJSDOM(){return s(this,void 0,void 0,(function*(){return!!this.isNode&&(yield l.hasJSDOM())}))}getEnvironmentInfo(){return s(this,void 0,void 0,(function*(){return{environment:this._environment,isNode:this.isNode,isBrowser:this.isBrowser,isWebWorker:this.isWebWorker,hasJSDOM:yie