UNPKG

@just-every/ensemble

Version:

LLM provider abstraction layer with unified streaming interface

201 lines 9.56 kB
import fs from 'fs/promises'; import path from 'path'; import crypto from 'crypto'; import { createToolFunction } from './create_tool_function.js'; const SUMMARIZE_AT_CHARS = 5000; const SUMMARIZE_TRUNCATE_CHARS = 200000; const HASH_MAP_FILENAME = 'summary_hash_map.json'; async function ensureDir(dir) { try { await fs.mkdir(dir, { recursive: true }); } catch (error) { if (error.code !== 'EEXIST') { throw error; } } } async function loadHashMap(file_path) { try { const data = await fs.readFile(file_path, 'utf-8'); return JSON.parse(data); } catch (error) { if (error.code === 'ENOENT') { return {}; } console.error(`Error loading summary hash map from ${file_path}:`, error); return {}; } } async function saveHashMap(file_path, map) { try { const data = JSON.stringify(map, null, 2); await fs.writeFile(file_path, data, 'utf-8'); } catch (error) { console.error(`Error saving summary hash map to ${file_path}:`, error); } } function truncate(text, length = SUMMARIZE_TRUNCATE_CHARS, separator = '\n\n...[truncated for summary]...\n\n') { text = text.trim(); if (text.length <= length) { return text; } return text.substring(0, length * 0.3) + separator + text.substring(text.length - length * 0.7 + separator.length); } export async function createSummary(document, context, summaryFn, includeExpansionReferences = false, summariesDir) { if (document.length <= SUMMARIZE_AT_CHARS) { return document; } const finalSummariesDir = summariesDir || './summaries'; await ensureDir(finalSummariesDir); const hashMapPath = path.join(finalSummariesDir, HASH_MAP_FILENAME); const documentHash = crypto.createHash('sha256').update(document).digest('hex'); const hashMap = await loadHashMap(hashMapPath); if (hashMap[documentHash]) { const summaryId = hashMap[documentHash]; const summaryFilePath = path.join(finalSummariesDir, `summary-${summaryId}.txt`); const originalFilePath = path.join(finalSummariesDir, `original-${summaryId}.txt`); try { const [existingSummary, originalDoc] = await Promise.all([ fs.readFile(summaryFilePath, 'utf-8'), fs.readFile(originalFilePath, 'utf-8'), ]); const originalLines = originalDoc.split('\n').length; const summaryLines = existingSummary.split('\n').length; const originalChars = originalDoc.length; const summaryChars = existingSummary.length; const metadata = includeExpansionReferences ? `\n\nSummarized large output to avoid excessive tokens (${originalLines} -> ${summaryLines} lines, ${originalChars} -> ${summaryChars} chars) [Write to file with write_source(${summaryId}, file_path) or read with read_source(${summaryId}, line_start, line_end)]` : `\n\nSummarized large output to avoid excessive tokens (${originalLines} -> ${summaryLines} lines, ${originalChars} -> ${summaryChars} chars)`; console.log(`Retrieved summary from cache for hash: ${documentHash.substring(0, 8)}...`); return existingSummary.trim() + metadata; } catch (error) { console.error(`Error reading cached summary files for ID ${summaryId}:`, error); delete hashMap[documentHash]; await saveHashMap(hashMapPath, hashMap); } } const originalDocumentForSave = document; const originalLines = originalDocumentForSave.split('\n').length; document = truncate(document); const summary = await summaryFn(document, context); const trimmedSummary = summary.trim(); const summaryLines = trimmedSummary.split('\n').length; const newSummaryId = crypto.randomUUID(); const summaryFilePath = path.join(finalSummariesDir, `summary-${newSummaryId}.txt`); const originalFilePath = path.join(finalSummariesDir, `original-${newSummaryId}.txt`); try { await Promise.all([ fs.writeFile(summaryFilePath, trimmedSummary, 'utf-8'), fs.writeFile(originalFilePath, originalDocumentForSave, 'utf-8'), ]); hashMap[documentHash] = newSummaryId; await saveHashMap(hashMapPath, hashMap); console.log(`Saved new summary with ID: ${newSummaryId} for hash: ${documentHash.substring(0, 8)}...`); } catch (error) { console.error(`Error saving new summary files for ID ${newSummaryId}:`, error); return trimmedSummary; } const originalChars = originalDocumentForSave.length; const summaryChars = trimmedSummary.length; const metadata = includeExpansionReferences ? `\n\nSummarized large output to avoid excessive tokens (${originalLines} -> ${summaryLines} lines, ${originalChars} -> ${summaryChars} chars) [Write to file with write_source(${newSummaryId}, file_path) or read with read_source(${newSummaryId}, line_start, line_end)]` : `\n\nSummarized large output to avoid excessive tokens (${originalLines} -> ${summaryLines} lines, ${originalChars} -> ${summaryChars} chars)`; return trimmedSummary + metadata; } export async function read_source(summary_id, line_start, line_end, summariesDir) { const finalSummariesDir = summariesDir || './summaries'; const originalFilePath = path.join(finalSummariesDir, `original-${summary_id}.txt`); try { let content = await fs.readFile(originalFilePath, 'utf-8'); if (line_start !== undefined && line_end !== undefined) { const lines = content.split('\n'); const start = Math.max(0, line_start); const end = Math.min(lines.length, line_end + 1); if (start >= end || start >= lines.length) { return `Error: Invalid line range requested (${line_start}-${line_end}) for document with ${lines.length} lines.`; } content = lines.slice(start, end).join('\n'); } return content; } catch (error) { if (error.code === 'ENOENT') { return `Error: Original document for summary ID '${summary_id}' not found at ${originalFilePath}.`; } console.error(`Error reading original summary source for ID ${summary_id}:`, error); return `Error: Could not retrieve original document for summary ID '${summary_id}'.`; } } export async function write_source(summary_id, file_path, summariesDir) { const finalSummariesDir = summariesDir || './summaries'; const originalFilePath = path.join(finalSummariesDir, `original-${summary_id}.txt`); try { const content = await fs.readFile(originalFilePath, 'utf-8'); if (!file_path) { return 'Error: file_path is required.'; } try { const directory = path.dirname(file_path); await fs.mkdir(directory, { recursive: true }); await fs.writeFile(file_path, content, 'utf-8'); console.log(`Summary written to file: ${file_path}`); return `Successfully wrote ${content.length} chars to file: ${file_path}\n\nStart of content:\n\n${content.substring(0, 400)}...`; } catch (writeError) { console.error(`Error writing summary to file ${file_path}:`, writeError); return `Error: Could not write summary to file ${file_path}.`; } } catch (error) { if (error.code === 'ENOENT') { return `Error: Original document for summary ID '${summary_id}' not found at ${originalFilePath}.`; } console.error(`Error reading original summary source for ID ${summary_id}:`, error); return `Error: Could not retrieve original document for summary ID '${summary_id}'.`; } } export function getSummaryTools(summariesDir) { const readSourceWrapper = async (summary_id, line_start, line_end) => { return read_source(summary_id, line_start, line_end, summariesDir); }; const writeSourceWrapper = async (summary_id, file_path) => { return write_source(summary_id, file_path, summariesDir); }; return [ createToolFunction(readSourceWrapper, 'Read the original (not summarized) document content. If possible, limit lines to limit tokens returned. Results will be truncated to 1000 characters - for larger files, use write_source.', { summary_id: { type: 'string', description: 'The unique ID of the summary.', }, line_start: { type: 'number', description: 'Starting line to retrieve (0-based). Optional.', optional: true, }, line_end: { type: 'number', description: 'Ending line to retrieve (0-based). Optional.', optional: true, }, }), createToolFunction(writeSourceWrapper, 'Write the original (not summarized) document to a file.', { summary_id: { type: 'string', description: 'The unique ID of the summary.', }, file_path: { type: 'string', description: 'Relative or absolute path to write the document to.', }, }), ]; } export function hasExpansionTools(toolNames) { return toolNames.includes('write_source') && toolNames.includes('read_source'); } //# sourceMappingURL=summary_utils.js.map