web-ai-toolkit
Version:
AI powered features on the web made easy
159 lines (135 loc) • 4.74 kB
text/typescript
import { pipeline, env } from '@huggingface/transformers';
import { webGPUCheck } from '../../utils';
import { processChunkedSummarization, splitTextIntoChunks } from './summarization-utils';
let summarizer: any = undefined;
export async function runSummarizer(
text: string,
model = 'Xenova/distilbart-cnn-6-6',
maxChunkLength = 1000,
overlap = 100,
minChunkLength = 200,
onProgress?: (progress: number, message: string) => void,
) {
return new Promise(async (resolve, reject) => {
try {
if (!summarizer) {
await loadSummarizer(model);
}
// If text is short enough, summarize directly without chunking
if (text.length <= maxChunkLength) {
const result = await summarizer(text);
resolve(result);
return;
}
// For long text, use chunked processing
const result = await processChunkedSummarization(
text,
maxChunkLength,
overlap,
minChunkLength,
onProgress,
async (chunk: string) => {
return await summarizer(chunk);
},
(summary: any) => {
return Array.isArray(summary) ? summary[0].summary_text : summary.summary_text;
},
(combinedText: string) => {
return [{ summary_text: combinedText }];
},
);
resolve(result);
}
catch (err) {
reject(err);
}
});
}
async function loadSummarizer(model: string): Promise<void> {
return new Promise(async (resolve) => {
if (!summarizer) {
env.allowLocalModels = false;
env.useBrowserCache = false;
summarizer = await pipeline('summarization', model || 'Xenova/distilbart-cnn-6-6', {
dtype: 'fp32',
device: (navigator as any).ml ? 'webnn' : await webGPUCheck() ? 'webgpu' : 'wasm',
});
resolve();
}
else {
resolve();
}
});
}
interface SummarizationOptions {
model?: string;
maxChunkLength?: number;
overlap?: number;
minChunkLength?: number;
onProgress?: (progress: number, message: string) => void;
}
export async function runSummarizerWithOptions(text: string, options: SummarizationOptions = {}) {
const {
model = 'Xenova/distilbart-cnn-6-6',
maxChunkLength = 1000,
overlap = 100,
minChunkLength = 200,
onProgress,
} = options;
return runSummarizer(text, model, maxChunkLength, overlap, minChunkLength, onProgress);
}
/**
* Enhanced summarization function that handles very long text by using hierarchical summarization
* @param text The text to summarize
* @param options Configuration options for summarization
* @returns Promise resolving to the summary
*/
export async function summarizeLongText(text: string, options: SummarizationOptions = {}) {
const {
model = 'Xenova/distilbart-cnn-6-6',
maxChunkLength = 1000,
overlap = 100,
minChunkLength = 200,
onProgress,
} = options;
// For very long texts, use hierarchical approach
if (text.length > maxChunkLength * 10) {
return hierarchicalSummarization(text, model, maxChunkLength, overlap, minChunkLength, onProgress);
} else {
return runSummarizer(text, model, maxChunkLength, overlap, minChunkLength, onProgress);
}
}
/**
* Hierarchical summarization for extremely long documents
*/
async function hierarchicalSummarization(
text: string,
model: string,
maxChunkLength: number,
overlap: number,
minChunkLength: number,
onProgress?: (progress: number, message: string) => void,
): Promise<any> {
if (!summarizer) {
await loadSummarizer(model);
}
onProgress?.(0, 'Starting hierarchical summarization...');
// Level 1: Split into large sections
const largeChunks = splitTextIntoChunks(text, maxChunkLength * 5, overlap * 2, minChunkLength * 2);
onProgress?.(0.1, `Processing ${largeChunks.length} large sections...`);
// Level 2: Summarize each large section
const sectionSummaries = [];
for (let i = 0; i < largeChunks.length; i++) {
const chunk = largeChunks[i];
const summary = await runSummarizer(chunk, model, maxChunkLength, overlap, minChunkLength, undefined) as any;
const summaryText = Array.isArray(summary) ? summary[0].summary_text : summary.summary_text;
sectionSummaries.push(summaryText);
const progress = 0.1 + ((i + 1) / largeChunks.length) * 0.8; // 10-90% for section processing
onProgress?.(progress, `Processed section ${i + 1} of ${largeChunks.length}`);
}
onProgress?.(0.95, 'Combining section summaries...');
// Level 3: Just combine all section summaries and return
const combinedSummaries = sectionSummaries.join(' ');
onProgress?.(1.0, 'Hierarchical summarization complete!');
return [{ summary_text: combinedSummaries }];
}