besper-frontend-site-dev-main
Version:
Professional B-esper Frontend Site - Site-wide integration toolkit for full website bot deployment
294 lines (261 loc) • 9.63 kB
JavaScript
/**
* @jest-environment jsdom
*/
/**
* Tests for knowledge classification fixes in KnowledgeTab.js
*/
describe('Knowledge Classification Fixes', () => {
// Mock knowledge items for testing
const mockKnowledgeItems = [
// New format - should be filtered as web knowledge
{
source: 'Website',
url: 'https://example.com/page1',
title: 'Page 1',
metadata: { domain: 'example.com' },
},
// Old format with metadata tags - should be filtered as web knowledge
{
file_name: 'old_page.txt',
metadata: {
tags: ['web_knowledge'],
source: 'enhanced_web_scraping',
},
},
// Old format with classification - should be filtered as web knowledge
{
file_name: 'another_page.txt',
metadata: {
classification: 'website_page',
source: 'enhanced_web_scraping',
},
},
// Direct upload - should NOT be filtered as web knowledge
{
source: 'Text',
file_name: 'user_text.txt',
metadata: { type: 'text_input' },
},
// Document upload - should NOT be filtered as web knowledge
{
source: 'PDF',
file_name: 'document.pdf',
metadata: { type: 'document' },
},
];
test('should correctly identify web knowledge items (new filtering logic)', () => {
// This is the filtering logic from the fixed frontend code
const isWebKnowledge = item => {
return (
item.source === 'Website' ||
item.metadata?.tags?.includes('web_knowledge') ||
item.metadata?.classification === 'website_page'
);
};
const webKnowledgeItems = mockKnowledgeItems.filter(isWebKnowledge);
const directKnowledgeItems = mockKnowledgeItems.filter(
item => !isWebKnowledge(item)
);
// Should identify 3 web knowledge items and 2 direct knowledge items
expect(webKnowledgeItems).toHaveLength(3);
expect(directKnowledgeItems).toHaveLength(2);
// Check specific items
expect(webKnowledgeItems[0].source).toBe('Website');
expect(webKnowledgeItems[1].metadata.tags).toContain('web_knowledge');
expect(webKnowledgeItems[2].metadata.classification).toBe('website_page');
expect(directKnowledgeItems[0].source).toBe('Text');
expect(directKnowledgeItems[1].source).toBe('PDF');
});
test('should handle old filtering logic (for comparison)', () => {
// This was the old filtering logic that didn't work properly
const oldDirectUploads = mockKnowledgeItems.filter(
item => item.source && !['Website'].includes(item.source)
);
// Old logic would incorrectly include items without a source field
// This demonstrates why the fix was needed
expect(oldDirectUploads).toHaveLength(2); // Only items with explicit source field
// The old logic would miss web knowledge items that don't have source='Website'
const oldWebsitePages = mockKnowledgeItems.filter(
item => item.source === 'Website'
);
expect(oldWebsitePages).toHaveLength(1); // Would miss 2 web knowledge items
});
test('should correctly count items for statistics display', () => {
// This simulates the statistics counting logic from the fix
const countDirectKnowledge = mockKnowledgeItems.filter(item => {
const isWebKnowledge =
item.source === 'Website' ||
item.metadata?.tags?.includes('web_knowledge') ||
item.metadata?.classification === 'website_page';
return !isWebKnowledge;
}).length;
const countWebPages = mockKnowledgeItems.filter(item => {
return (
item.source === 'Website' ||
item.metadata?.tags?.includes('web_knowledge') ||
item.metadata?.classification === 'website_page'
);
}).length;
expect(countDirectKnowledge).toBe(2);
expect(countWebPages).toBe(3);
});
test('should handle edge cases gracefully', () => {
const edgeCaseItems = [
// Item with no metadata
{ source: 'Text', file_name: 'no_metadata.txt' },
// Item with empty metadata
{ source: 'Document', metadata: {} },
// Item with null source
{ source: null, metadata: { tags: ['some_tag'] } },
// Item with undefined metadata.tags
{ metadata: { classification: 'some_other_type' } },
];
const isWebKnowledge = item => {
return (
item.source === 'Website' ||
item.metadata?.tags?.includes('web_knowledge') ||
item.metadata?.classification === 'website_page'
);
};
const webItems = edgeCaseItems.filter(isWebKnowledge);
const directItems = edgeCaseItems.filter(item => !isWebKnowledge(item));
// None of these edge cases should be classified as web knowledge
expect(webItems).toHaveLength(0);
expect(directItems).toHaveLength(4);
});
});
describe('Website Pages Connection Logic', () => {
test('getDomainFromUrl should extract domain correctly', () => {
// Test the domain extraction logic that's used in the fix
const getDomainFromUrl = url => {
try {
if (!url || typeof url !== 'string' || url.trim() === '') {
return '';
}
// Ensure URL has protocol if missing
let normalizedUrl = url.trim();
if (
!normalizedUrl.startsWith('http://') &&
!normalizedUrl.startsWith('https://')
) {
normalizedUrl = 'https://' + normalizedUrl;
}
const urlObj = new URL(normalizedUrl);
return urlObj.hostname;
} catch {
// Return empty string if URL parsing fails
return '';
}
};
expect(getDomainFromUrl('https://example.com')).toBe('example.com');
expect(getDomainFromUrl('https://www.example.com')).toBe('www.example.com');
expect(getDomainFromUrl('http://subdomain.example.com')).toBe(
'subdomain.example.com'
);
expect(getDomainFromUrl('example.com')).toBe('example.com');
expect(getDomainFromUrl('')).toBe('');
expect(getDomainFromUrl(null)).toBe('');
expect(getDomainFromUrl(undefined)).toBe('');
expect(getDomainFromUrl('not-a-valid-url')).toBe('not-a-valid-url');
});
test('website pages filtering logic should work correctly', () => {
// Test the filtering logic used in getScrapedPagesForWebsite
const mockWebsites = [
{ url: 'https://example.com/page1', title: 'Page 1' },
{ url: 'https://example.com/page2', title: 'Page 2' },
{ url: 'https://different.com/page3', title: 'Page 3' },
{ url: 'https://www.example.com/page4', title: 'Page 4' },
{ metadata: { domain: 'example.com' }, title: 'Page 5' },
];
const getDomainFromUrl = url => {
try {
if (!url || typeof url !== 'string' || url.trim() === '') {
return '';
}
let normalizedUrl = url.trim();
if (
!normalizedUrl.startsWith('http://') &&
!normalizedUrl.startsWith('https://')
) {
normalizedUrl = 'https://' + normalizedUrl;
}
const urlObj = new URL(normalizedUrl);
return urlObj.hostname;
} catch {
return '';
}
};
const getScrapedPagesForWebsite = websiteUrl => {
const domain = getDomainFromUrl(websiteUrl);
return mockWebsites.filter(page => {
// Check if page URL matches the website domain
if (page.url) {
const pageDomain = getDomainFromUrl(page.url);
return pageDomain === domain;
}
// Also check metadata for domain matching
if (page.metadata?.domain) {
return page.metadata.domain === domain;
}
return false;
});
};
// Test filtering for example.com domain
const examplePages = getScrapedPagesForWebsite('https://example.com');
expect(examplePages).toHaveLength(3); // Two from example.com + one with metadata
expect(examplePages.map(p => p.title)).toEqual([
'Page 1',
'Page 2',
'Page 5',
]);
// Test filtering for different.com domain
const differentPages = getScrapedPagesForWebsite('https://different.com');
expect(differentPages).toHaveLength(1);
expect(differentPages[0].title).toBe('Page 3');
// Test filtering for www.example.com (should be different from example.com)
const wwwPages = getScrapedPagesForWebsite('https://www.example.com');
expect(wwwPages).toHaveLength(1);
expect(wwwPages[0].title).toBe('Page 4');
});
test('loadWebsitePages API call parameters should be correct', () => {
// Test that the API call would be made with correct parameters
const mockApiCall = jest.fn().mockResolvedValue({
success: true,
pages: [],
});
const credentials = {
botId: 'test-bot-id',
managementSecret: 'test-secret',
managementId: 'test-guid',
};
const testUrl = 'https://example.com';
const environment = 'test';
// This simulates what happens in loadWebsitePages
const makeApiCall = async () => {
return await mockApiCall(
'get_website_pages',
'POST',
{
bot_id: credentials.botId,
botmanagementsecret: credentials.managementSecret,
guid: credentials.managementId,
website_url: testUrl.trim(),
},
environment
);
};
return makeApiCall().then(() => {
expect(mockApiCall).toHaveBeenCalledWith(
'get_website_pages',
'POST',
{
bot_id: 'test-bot-id',
botmanagementsecret: 'test-secret',
guid: 'test-guid',
website_url: 'https://example.com',
},
'test'
);
});
});
});