UNPKG

@adobe/spacecat-shared-html-analyzer

Version:

Analyze HTML content visibility for AI crawlers and citations - compare static HTML vs fully rendered content

123 lines (101 loc) 5.19 kB
/* * Copyright 2025 Adobe. All rights reserved. * This file is licensed to you under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. You may obtain a copy * of the License at http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS * OF ANY KIND, either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ import { expect } from 'chai'; import { analyzeTextComparison, calculateStats, calculateBothScenarioStats, stripTagsToText, } from '../src/index.js'; describe('HTML Visibility Analyzer', () => { const simpleHtml = '<html><body><h1>Title</h1><p>Content here</p></body></html>'; const richHtml = '<html><body><h1>Title</h1><p>Content here</p><script>console.log("loaded")</script><div class="dynamic">Dynamic content</div></body></html>'; describe('analyzeTextComparison', () => { it('should analyze content differences', async () => { const result = await analyzeTextComparison(simpleHtml, richHtml); expect(result).to.have.property('initialText'); expect(result).to.have.property('finalText'); expect(result).to.have.property('textRetention'); expect(result).to.have.property('wordDiff'); expect(result).to.have.property('lineDiff'); }); it('should handle identical content', async () => { const result = await analyzeTextComparison(simpleHtml, simpleHtml); expect(result.textRetention).to.equal(1); expect(result.initialText).to.equal(result.finalText); }); it('should handle empty content', async () => { const result = await analyzeTextComparison('', richHtml); expect(result.initialText).to.equal(''); expect(result.finalText.length).to.be.greaterThan(0); }); }); describe('calculateStats', () => { it('should provide basic comparison statistics', async () => { const result = await calculateStats(simpleHtml, richHtml); expect(result).to.have.property('wordCountBefore'); expect(result).to.have.property('wordCountAfter'); expect(result).to.have.property('wordDiff'); expect(result).to.have.property('contentIncreaseRatio'); expect(result).to.have.property('citationReadability'); expect(result.wordCountBefore).to.be.a('number'); expect(result.wordCountAfter).to.be.a('number'); expect(result.wordDiff).to.be.a('number'); expect(result.contentIncreaseRatio).to.be.a('number'); expect(result.citationReadability).to.be.a('number'); }); }); describe('calculateBothScenarioStats', () => { it('should provide statistics for both scenarios', async () => { const result = await calculateBothScenarioStats(simpleHtml, richHtml); expect(result).to.have.property('withNavFooterIgnored'); expect(result).to.have.property('withoutNavFooterIgnored'); // Verify withNavFooterIgnored has all required properties expect(result.withNavFooterIgnored).to.have.property('wordCountBefore'); expect(result.withNavFooterIgnored).to.have.property('wordCountAfter'); expect(result.withNavFooterIgnored).to.have.property('contentGain'); expect(result.withNavFooterIgnored).to.have.property('missingWords'); // Verify withoutNavFooterIgnored has all required properties expect(result.withoutNavFooterIgnored).to.have.property('wordCountBefore'); expect(result.withoutNavFooterIgnored).to.have.property('wordCountAfter'); expect(result.withoutNavFooterIgnored).to.have.property('missingWords'); }); }); describe('stripTagsToText', () => { it('should extract text content from HTML', async () => { const html = '<div><h1>Title</h1><p>Content with <strong>bold</strong> text</p></div>'; const text = await stripTagsToText(html); expect(text).to.include('Title'); expect(text).to.include('Content with'); expect(text).to.include('bold'); expect(text).to.include('text'); expect(text).to.not.include('<'); expect(text).to.not.include('>'); }); it('should remove navigation elements when ignoreNavFooter is true', async () => { const html = '<html><body><nav>Navigation</nav><h1>Title</h1><p>Content</p><footer>Footer</footer></body></html>'; const text = await stripTagsToText(html, true); expect(text).to.include('Title'); expect(text).to.include('Content'); expect(text).to.not.include('Navigation'); expect(text).to.not.include('Footer'); }); it('should keep navigation elements when ignoreNavFooter is false', async () => { const html = '<html><body><nav>Navigation</nav><h1>Title</h1><p>Content</p><footer>Footer</footer></body></html>'; const text = await stripTagsToText(html, false); expect(text).to.include('Title'); expect(text).to.include('Content'); expect(text).to.include('Navigation'); expect(text).to.include('Footer'); }); }); });