UNPKG

besper-frontend-site-dev-main

Version:

Professional B-esper Frontend Site - Site-wide integration toolkit for full website bot deployment

294 lines (261 loc) 9.63 kB
/** * @jest-environment jsdom */ /** * Tests for knowledge classification fixes in KnowledgeTab.js */ describe('Knowledge Classification Fixes', () => { // Mock knowledge items for testing const mockKnowledgeItems = [ // New format - should be filtered as web knowledge { source: 'Website', url: 'https://example.com/page1', title: 'Page 1', metadata: { domain: 'example.com' }, }, // Old format with metadata tags - should be filtered as web knowledge { file_name: 'old_page.txt', metadata: { tags: ['web_knowledge'], source: 'enhanced_web_scraping', }, }, // Old format with classification - should be filtered as web knowledge { file_name: 'another_page.txt', metadata: { classification: 'website_page', source: 'enhanced_web_scraping', }, }, // Direct upload - should NOT be filtered as web knowledge { source: 'Text', file_name: 'user_text.txt', metadata: { type: 'text_input' }, }, // Document upload - should NOT be filtered as web knowledge { source: 'PDF', file_name: 'document.pdf', metadata: { type: 'document' }, }, ]; test('should correctly identify web knowledge items (new filtering logic)', () => { // This is the filtering logic from the fixed frontend code const isWebKnowledge = item => { return ( item.source === 'Website' || item.metadata?.tags?.includes('web_knowledge') || item.metadata?.classification === 'website_page' ); }; const webKnowledgeItems = mockKnowledgeItems.filter(isWebKnowledge); const directKnowledgeItems = mockKnowledgeItems.filter( item => !isWebKnowledge(item) ); // Should identify 3 web knowledge items and 2 direct knowledge items expect(webKnowledgeItems).toHaveLength(3); expect(directKnowledgeItems).toHaveLength(2); // Check specific items expect(webKnowledgeItems[0].source).toBe('Website'); expect(webKnowledgeItems[1].metadata.tags).toContain('web_knowledge'); expect(webKnowledgeItems[2].metadata.classification).toBe('website_page'); expect(directKnowledgeItems[0].source).toBe('Text'); expect(directKnowledgeItems[1].source).toBe('PDF'); }); test('should handle old filtering logic (for comparison)', () => { // This was the old filtering logic that didn't work properly const oldDirectUploads = mockKnowledgeItems.filter( item => item.source && !['Website'].includes(item.source) ); // Old logic would incorrectly include items without a source field // This demonstrates why the fix was needed expect(oldDirectUploads).toHaveLength(2); // Only items with explicit source field // The old logic would miss web knowledge items that don't have source='Website' const oldWebsitePages = mockKnowledgeItems.filter( item => item.source === 'Website' ); expect(oldWebsitePages).toHaveLength(1); // Would miss 2 web knowledge items }); test('should correctly count items for statistics display', () => { // This simulates the statistics counting logic from the fix const countDirectKnowledge = mockKnowledgeItems.filter(item => { const isWebKnowledge = item.source === 'Website' || item.metadata?.tags?.includes('web_knowledge') || item.metadata?.classification === 'website_page'; return !isWebKnowledge; }).length; const countWebPages = mockKnowledgeItems.filter(item => { return ( item.source === 'Website' || item.metadata?.tags?.includes('web_knowledge') || item.metadata?.classification === 'website_page' ); }).length; expect(countDirectKnowledge).toBe(2); expect(countWebPages).toBe(3); }); test('should handle edge cases gracefully', () => { const edgeCaseItems = [ // Item with no metadata { source: 'Text', file_name: 'no_metadata.txt' }, // Item with empty metadata { source: 'Document', metadata: {} }, // Item with null source { source: null, metadata: { tags: ['some_tag'] } }, // Item with undefined metadata.tags { metadata: { classification: 'some_other_type' } }, ]; const isWebKnowledge = item => { return ( item.source === 'Website' || item.metadata?.tags?.includes('web_knowledge') || item.metadata?.classification === 'website_page' ); }; const webItems = edgeCaseItems.filter(isWebKnowledge); const directItems = edgeCaseItems.filter(item => !isWebKnowledge(item)); // None of these edge cases should be classified as web knowledge expect(webItems).toHaveLength(0); expect(directItems).toHaveLength(4); }); }); describe('Website Pages Connection Logic', () => { test('getDomainFromUrl should extract domain correctly', () => { // Test the domain extraction logic that's used in the fix const getDomainFromUrl = url => { try { if (!url || typeof url !== 'string' || url.trim() === '') { return ''; } // Ensure URL has protocol if missing let normalizedUrl = url.trim(); if ( !normalizedUrl.startsWith('http://') && !normalizedUrl.startsWith('https://') ) { normalizedUrl = 'https://' + normalizedUrl; } const urlObj = new URL(normalizedUrl); return urlObj.hostname; } catch { // Return empty string if URL parsing fails return ''; } }; expect(getDomainFromUrl('https://example.com')).toBe('example.com'); expect(getDomainFromUrl('https://www.example.com')).toBe('www.example.com'); expect(getDomainFromUrl('http://subdomain.example.com')).toBe( 'subdomain.example.com' ); expect(getDomainFromUrl('example.com')).toBe('example.com'); expect(getDomainFromUrl('')).toBe(''); expect(getDomainFromUrl(null)).toBe(''); expect(getDomainFromUrl(undefined)).toBe(''); expect(getDomainFromUrl('not-a-valid-url')).toBe('not-a-valid-url'); }); test('website pages filtering logic should work correctly', () => { // Test the filtering logic used in getScrapedPagesForWebsite const mockWebsites = [ { url: 'https://example.com/page1', title: 'Page 1' }, { url: 'https://example.com/page2', title: 'Page 2' }, { url: 'https://different.com/page3', title: 'Page 3' }, { url: 'https://www.example.com/page4', title: 'Page 4' }, { metadata: { domain: 'example.com' }, title: 'Page 5' }, ]; const getDomainFromUrl = url => { try { if (!url || typeof url !== 'string' || url.trim() === '') { return ''; } let normalizedUrl = url.trim(); if ( !normalizedUrl.startsWith('http://') && !normalizedUrl.startsWith('https://') ) { normalizedUrl = 'https://' + normalizedUrl; } const urlObj = new URL(normalizedUrl); return urlObj.hostname; } catch { return ''; } }; const getScrapedPagesForWebsite = websiteUrl => { const domain = getDomainFromUrl(websiteUrl); return mockWebsites.filter(page => { // Check if page URL matches the website domain if (page.url) { const pageDomain = getDomainFromUrl(page.url); return pageDomain === domain; } // Also check metadata for domain matching if (page.metadata?.domain) { return page.metadata.domain === domain; } return false; }); }; // Test filtering for example.com domain const examplePages = getScrapedPagesForWebsite('https://example.com'); expect(examplePages).toHaveLength(3); // Two from example.com + one with metadata expect(examplePages.map(p => p.title)).toEqual([ 'Page 1', 'Page 2', 'Page 5', ]); // Test filtering for different.com domain const differentPages = getScrapedPagesForWebsite('https://different.com'); expect(differentPages).toHaveLength(1); expect(differentPages[0].title).toBe('Page 3'); // Test filtering for www.example.com (should be different from example.com) const wwwPages = getScrapedPagesForWebsite('https://www.example.com'); expect(wwwPages).toHaveLength(1); expect(wwwPages[0].title).toBe('Page 4'); }); test('loadWebsitePages API call parameters should be correct', () => { // Test that the API call would be made with correct parameters const mockApiCall = jest.fn().mockResolvedValue({ success: true, pages: [], }); const credentials = { botId: 'test-bot-id', managementSecret: 'test-secret', managementId: 'test-guid', }; const testUrl = 'https://example.com'; const environment = 'test'; // This simulates what happens in loadWebsitePages const makeApiCall = async () => { return await mockApiCall( 'get_website_pages', 'POST', { bot_id: credentials.botId, botmanagementsecret: credentials.managementSecret, guid: credentials.managementId, website_url: testUrl.trim(), }, environment ); }; return makeApiCall().then(() => { expect(mockApiCall).toHaveBeenCalledWith( 'get_website_pages', 'POST', { bot_id: 'test-bot-id', botmanagementsecret: 'test-secret', guid: 'test-guid', website_url: 'https://example.com', }, 'test' ); }); }); });