UNPKG

chunk-match

Version:

NodeJS library that semantically chunks text and matches it against a user query using cosine similarity for precise and relevant text retrieval

601 lines (515 loc) 22.1 kB
// import form default values import defaultFormValues from './default-form-values.js'; // Load sample text on page load fetch('sample.txt') .then(response => response.text()) .then(text => { // Find the first document's textarea const firstDocTextarea = document.querySelector('.document-entry textarea[name="documentText"]'); if (firstDocTextarea) { firstDocTextarea.value = text; } }) .catch(error => console.error('Error loading sample text:', error)); // Load and populate model options fetch('models.json') .then(response => response.json()) .then(data => { const select = document.getElementById('onnxEmbeddingModel'); data.models.forEach(model => { const option = document.createElement('option'); option.value = model.value; option.textContent = model.label; select.appendChild(option); }); // Set default model value after options are populated select.value = defaultFormValues.onnxEmbeddingModel; }) .catch(error => console.error('Error loading models:', error)); // Update range input displays document.querySelectorAll('input[type="range"]').forEach(input => { const display = input.nextElementSibling; // Create inner elements if they don't exist if (!display.querySelector('.number')) { const number = document.createElement('span'); number.className = 'number'; const description = document.createElement('span'); description.className = 'description'; display.appendChild(number); display.appendChild(description); } // Update similarity display function updateSimilarityDisplay(value) { const number = display.querySelector('.number'); const description = display.querySelector('.description'); // Only update similarity descriptions for relevant sliders const similaritySliders = [ 'similarityThreshold', 'combineChunksSimilarityThreshold', 'dynamicThresholdLowerBound', 'dynamicThresholdUpperBound', 'minSimilarity' ]; if (similaritySliders.includes(input.id)) { const val = parseFloat(value); let className, text; if (val < 0.5) { className = 'similarity-low'; text = 'low similarity'; } else if (val <= 0.7) { className = 'similarity-moderate'; text = 'moderately similar'; } else { className = 'similarity-high'; text = 'very similar'; } number.className = 'number ' + className; number.textContent = val.toFixed(3); description.className = 'description ' + className; description.textContent = text; } else { number.textContent = value; description.textContent = ''; } } // Initial update updateSimilarityDisplay(input.value); // Update on change input.addEventListener('input', (e) => updateSimilarityDisplay(e.target.value)); }); const combineChunksToggle = document.getElementById('combineChunks'); const dependentControls = document.querySelectorAll('.depends-on-combine-chunks'); // Update dependent controls function updateDependentControls() { const isEnabled = combineChunksToggle.checked; dependentControls.forEach(control => { if (isEnabled) { // Show controls control.style.display = 'block'; // Use setTimeout to ensure display: block takes effect first setTimeout(() => { control.classList.remove('hidden'); const inputs = control.querySelectorAll('input'); inputs.forEach(input => input.disabled = false); }, 10); } else { // Hide controls control.classList.add('hidden'); const inputs = control.querySelectorAll('input'); inputs.forEach(input => input.disabled = true); // Remove display: none after transition completes control.addEventListener('transitionend', function handler() { if (!combineChunksToggle.checked) { control.style.display = 'none'; } control.removeEventListener('transitionend', handler); }); } }); } // Initial state updateDependentControls(); // Listen for changes combineChunksToggle.addEventListener('change', updateDependentControls); // Form submission handler const form = document.getElementById('chunkForm'); const resultsContainer = document.getElementById('results'); const resultsJson = document.getElementById('resultsJson'); const downloadButton = document.getElementById('downloadButton'); const resultsFooter = document.querySelector('.results-footer'); // Clear results and hide download button initially resultsJson.textContent = ''; resultsFooter.classList.remove('visible'); // Add spinner element reference const spinner = document.createElement('div'); spinner.className = 'spinner'; resultsJson.parentNode.insertBefore(spinner, resultsJson); // Add this function near the top of the file function scrollToResults() { if (window.innerWidth <= 800) { const resultsWrapper = document.querySelector('.results-wrapper'); if (resultsWrapper) { resultsWrapper.scrollIntoView({ behavior: 'smooth' }); } } } // Add document handling const documentsContainer = document.getElementById('documents-container'); const addDocumentButton = document.getElementById('addDocument'); addDocumentButton.addEventListener('click', () => { const newDoc = document.createElement('div'); newDoc.className = 'document-entry'; newDoc.innerHTML = ` <div class="form-group document-name-group"> <label for="documentName">Document Name:</label> <div class="input-with-buttons"> <input type="text" name="documentName" required> <button type="button" class="remove-document secondary-button">Remove</button> </div> </div> <div class="form-group"> <label for="documentText">Document Text:</label> <textarea name="documentText" required></textarea> </div> `; documentsContainer.appendChild(newDoc); }); documentsContainer.addEventListener('click', (e) => { if (e.target.classList.contains('remove-document')) { e.target.closest('.document-entry').remove(); } }); // Update form submission handler form.addEventListener('submit', async (e) => { e.preventDefault(); const submitButton = form.querySelector('button[type="submit"]'); const downloadButton = document.getElementById('downloadButton'); const defaultMessage = document.getElementById('defaultMessage'); try { submitButton.disabled = true; downloadButton.disabled = true; spinner.classList.add('visible'); resultsJson.style.display = 'none'; defaultMessage.style.display = 'none'; // Scroll to results immediately when we show the spinner scrollToResults(); // Collect documents const documents = []; const documentEntries = documentsContainer.querySelectorAll('.document-entry'); documentEntries.forEach(entry => { documents.push({ document_name: entry.querySelector('[name="documentName"]').value, document_text: entry.querySelector('[name="documentText"]').value }); }); // Get form data const formData = new FormData(form); const data = { documents, query: formData.get('query'), maxResults: formData.get('maxResults'), minSimilarity: formData.get('minSimilarity'), chunkingOptions: { maxTokenSize: formData.get('maxTokenSize'), similarityThreshold: formData.get('similarityThreshold'), dynamicThresholdLowerBound: formData.get('dynamicThresholdLowerBound'), dynamicThresholdUpperBound: formData.get('dynamicThresholdUpperBound'), numSimilaritySentencesLookahead: formData.get('numSimilaritySentencesLookahead'), combineChunks: form.elements['combineChunks'].checked, combineChunksSimilarityThreshold: formData.get('combineChunksSimilarityThreshold'), onnxEmbeddingModel: formData.get('onnxEmbeddingModel'), dtype: formData.get('dtype'), chunkPrefixDocument: formData.get('chunkPrefixDocument') || undefined, chunkPrefixQuery: formData.get('chunkPrefixQuery') || undefined } }; const startTime = performance.now(); const response = await fetch('/api/match', { method: 'POST', headers: { 'Content-Type': 'application/json', }, body: JSON.stringify(data) }); if (!response.ok) { const error = await response.json(); throw new Error(error.details || 'Failed to process match'); } // Display results const result = await response.json(); const endTime = performance.now(); const processingTime = ((endTime - startTime) / 1000).toFixed(2); // Update display code... spinner.classList.remove('visible'); defaultMessage.style.display = 'none'; resultsJson.style.display = 'block'; const codeElement = document.createElement('code'); codeElement.className = 'language-json'; const formattedJson = JSON.stringify(result, null, 2); codeElement.textContent = formattedJson; resultsJson.textContent = ''; resultsJson.appendChild(codeElement); hljs.highlightElement(codeElement); // Update stats document.getElementById('processingTime').textContent = `Total Time: ${processingTime}s`; document.getElementById('chunkCount').textContent = `Matches: ${result.length}`; // Calculate and display average token length if available if (result.length > 0 && result[0].token_length !== undefined) { const avgTokens = Math.round( result.reduce((sum, chunk) => sum + chunk.token_length, 0) / result.length ); document.getElementById('avgTokenLength').textContent = `Avg Tokens: ${avgTokens}`; } else { document.getElementById('avgTokenLength').textContent = ''; } downloadButton.disabled = false; scrollToResults(); } catch (error) { console.error('Error:', error); showToast(error.message); downloadButton.disabled = true; spinner.classList.remove('visible'); resultsJson.style.display = 'block'; } finally { submitButton.disabled = false; } }); // Initialize download button as disabled document.getElementById('downloadButton').disabled = true; // Document buttons handler document.querySelectorAll('.document-buttons button').forEach(button => { button.addEventListener('click', async () => { const fileType = button.dataset.file; const fileName = `${fileType}.txt`; const documentEntry = button.closest('.document-entry'); try { const response = await fetch(fileName); if (!response.ok) throw new Error(`Failed to load ${fileName}`); const text = await response.text(); const textArea = documentEntry.querySelector('textarea[name="documentText"]'); const nameInput = documentEntry.querySelector('input[name="documentName"]'); if (textArea) textArea.value = text; if (nameInput) nameInput.value = `${fileType} text`; } catch (error) { console.error('Error loading text file:', error); showToast(`Failed to load ${fileName}`); } }); }); const modal = document.getElementById('codeModal'); const getCodeBtn = document.getElementById('getCodeButton'); const closeBtn = document.querySelector('.close'); const copyBtn = document.getElementById('copyCode'); const codeExample = document.querySelector('#codeExample code'); // Get Code button handler getCodeBtn.onclick = () => { // Get all form data and properly handle checkbox values const formData = {}; const formElements = form.elements; for (let element of formElements) { if (element.type === 'checkbox') { formData[element.name] = element.checked; } else if (element.name) { formData[element.name] = element.value; } } codeExample.textContent = generateCode(formData); modal.style.display = "block"; document.body.style.overflow = 'hidden'; // Prevent body scrolling delete codeExample.dataset.highlighted; hljs.highlightElement(codeExample); }; // Generate Code function function generateCode(formData) { // Collect documents data const documents = Array.from(documentsContainer.querySelectorAll('.document-entry')).map(entry => ({ document_name: entry.querySelector('[name="documentName"]').value, document_text: entry.querySelector('[name="documentText"]').value })); // Format documents array for display const documentsStr = documents.map(doc => ` { document_name: "${doc.document_name}", document_text: "${doc.document_text.substring(0, 50)}..." }`).join(',\n'); // Map dtype value to string const dtypeValues = ['fp32', 'fp16', 'q8', 'q4']; const dtype = dtypeValues[parseInt(formData.dtype)] || 'fp32'; return `// import the chunk-match library import { matchChunks } from 'chunk-match'; // define the documents array const documents = [ ${documentsStr} ]; // define your search query const query = "${formData.query}"; // define your options const options = { maxResults: ${formData.maxResults}, minSimilarity: ${formData.minSimilarity}, chunkingOptions: { maxTokenSize: ${formData.maxTokenSize}, similarityThreshold: ${formData.similarityThreshold}, dynamicThresholdLowerBound: ${formData.dynamicThresholdLowerBound}, dynamicThresholdUpperBound: ${formData.dynamicThresholdUpperBound}, numSimilaritySentencesLookahead: ${formData.numSimilaritySentencesLookahead}, combineChunks: ${formData.combineChunks}, combineChunksSimilarityThreshold: ${formData.combineChunksSimilarityThreshold}, onnxEmbeddingModel: "${formData.onnxEmbeddingModel}", dtype: "${dtype}"${formData.chunkPrefixDocument ? `, chunkPrefixDocument: "${formData.chunkPrefixDocument}"` : ''}${formData.chunkPrefixQuery ? `, chunkPrefixQuery: "${formData.chunkPrefixQuery}"` : ''} } }; // call the matchChunks function const results = await matchChunks(documents, query, options); // results will contain matched chunks with similarity scores console.log(results);`; } // Close Modal button handler closeBtn.onclick = () => { modal.style.display = "none"; document.body.style.overflow = ''; // Restore body scrolling }; // Close Modal on click outside window.onclick = (event) => { if (event.target === modal) { modal.style.display = "none"; document.body.style.overflow = ''; // Restore body scrolling } }; // Copy Code button handler copyBtn.onclick = () => { navigator.clipboard.writeText(codeExample.textContent) .then(() => { copyBtn.textContent = "Copied!"; showToast('Code copied to clipboard!', 'success'); setTimeout(() => { copyBtn.textContent = "Copy Code"; }, 2000); }) .catch(err => { console.error('Failed to copy:', err); showToast('Failed to copy code to clipboard'); }); }; // Close Modal button handler const closeModalBtn = document.getElementById('closeModal'); closeModalBtn.onclick = () => { modal.style.display = "none"; document.body.style.overflow = ''; // Restore body scrolling }; // Toast functionality function showToast(message, type = 'error', duration = 7000) { const toastContainer = document.getElementById('toastContainer'); const toast = document.createElement('div'); toast.className = `toast ${type}`; toast.innerHTML = message; // Clear any existing toasts toastContainer.innerHTML = ''; toastContainer.classList.add('visible'); toastContainer.appendChild(toast); // Function to dismiss toast function dismissToast() { toast.style.animation = 'fadeOut 0.3s ease-out forwards'; setTimeout(() => { toastContainer.classList.remove('visible'); toastContainer.innerHTML = ''; }, 300); } // Add click handlers toastContainer.onclick = dismissToast; toast.onclick = (e) => { e.stopPropagation(); // Prevent double-firing with container click dismissToast(); }; // Auto dismiss after duration const timeoutId = setTimeout(dismissToast, duration); // Clear timeout if manually dismissed toastContainer.addEventListener('click', () => { clearTimeout(timeoutId); }, { once: true }); } // Add this with your other event listeners document.querySelector('.info-icon').addEventListener('click', () => { showToast('More model choices can be added by updating the "models.json" file in the "webui" directory.', 'info', 7000); }); // Add after other initialization code const resultsContent = document.querySelector('.results-content'); // Create and add the resize toggle button const processingTimeSpan = document.getElementById('processingTime'); // Create and add the resize toggle button const resizeToggle = document.createElement('button'); resizeToggle.className = 'resize-toggle'; resizeToggle.innerHTML = ` <svg viewBox="0 0 24 24"> <path d="M17 8.5L20 11.5L17 14.5M7 8.5L4 11.5L7 14.5M5.5 11.5H18.5" stroke="white" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" fill="none"/> </svg> `; resizeToggle.title = "Toggle text wrapping"; processingTimeSpan.parentNode.insertBefore(resizeToggle, processingTimeSpan.nextSibling); // Add click handler resizeToggle.addEventListener('click', () => { resultsJson.classList.toggle('wrapped'); resizeToggle.classList.toggle('wrapped'); }); const dtypeInput = document.getElementById('dtype'); const dtypeDisplay = dtypeInput.nextElementSibling; function updateDtypeDisplay(value) { const dtypeValues = { 0: { text: 'fp32 - Full Precision', class: 'precision-full' }, 1: { text: 'fp16 - Half Precision', class: 'precision-half' }, 2: { text: 'q8 - 8-bit Quantized', class: 'precision-q8' }, 3: { text: 'q4 - 4-bit Quantized', class: 'precision-q4' } }; const dtype = dtypeValues[value]; const number = dtypeDisplay.querySelector('.number'); const description = dtypeDisplay.querySelector('.description'); number.className = `number ${dtype.class}`; number.textContent = value; description.className = `description ${dtype.class}`; description.textContent = dtype.text; } // Initial update updateDtypeDisplay(dtypeInput.value); // Update on change dtypeInput.addEventListener('input', (e) => updateDtypeDisplay(e.target.value)); // Fetch and display version fetch('/version') .then(response => response.json()) .then(data => { document.getElementById('version').textContent = `v${data.version}`; }) .catch(error => console.error('Error fetching version:', error)); // Add this function after the imports function setDefaultFormValues() { // Set range inputs const rangeInputs = { maxResults: defaultFormValues.maxResults, minSimilarity: defaultFormValues.minSimilarity, maxTokenSize: defaultFormValues.maxTokenSize, similarityThreshold: defaultFormValues.similarityThreshold, dynamicThresholdLowerBound: defaultFormValues.dynamicThresholdLowerBound, dynamicThresholdUpperBound: defaultFormValues.dynamicThresholdUpperBound, numSimilaritySentencesLookahead: defaultFormValues.numSimilaritySentencesLookahead, combineChunksSimilarityThreshold: defaultFormValues.combineChunksSimilarityThreshold, }; // Set each range input and trigger their display updates Object.entries(rangeInputs).forEach(([id, value]) => { const input = document.getElementById(id); if (input) { input.value = value; input.dispatchEvent(new Event('input')); } }); // Set checkbox const combineChunksCheckbox = document.getElementById('combineChunks'); if (combineChunksCheckbox) { combineChunksCheckbox.checked = defaultFormValues.combineChunks; combineChunksCheckbox.dispatchEvent(new Event('change')); } // Set text inputs const textInputs = { chunkPrefixDocument: defaultFormValues.chunkPrefixDocument, chunkPrefixQuery: defaultFormValues.chunkPrefixQuery }; Object.entries(textInputs).forEach(([id, value]) => { const input = document.getElementById(id); if (input && value !== null) { input.value = value; } }); // Set dtype (model precision) const dtypeInput = document.getElementById('dtype'); if (dtypeInput) { const dtypeValues = { fp32: 0, fp16: 1, q8: 2, q4: 3 }; dtypeInput.value = dtypeValues[defaultFormValues.dtype] || 0; dtypeInput.dispatchEvent(new Event('input')); } } setDefaultFormValues();