UNPKG

bilingual-summarizer

Version:

A powerful text summarization package for Arabic and English content with sentiment analysis and topic extraction

238 lines (237 loc) 10.9 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.isGeminiConfigValid = exports.isArabic = exports.extractTopics = void 0; exports.summarize = summarize; exports.summarizeArabic = summarizeArabic; exports.summarizeWithAI = summarizeWithAI; const reading_time_1 = __importDefault(require("reading-time")); const sentimentAnalyzer_1 = require("./analyzers/sentimentAnalyzer"); const languageDetection_1 = require("./utils/languageDetection"); Object.defineProperty(exports, "isArabic", { enumerable: true, get: function () { return languageDetection_1.isArabic; } }); const textPreprocessing_1 = require("./utils/textPreprocessing"); const topicExtraction_1 = require("./utils/topicExtraction"); Object.defineProperty(exports, "extractTopics", { enumerable: true, get: function () { return topicExtraction_1.extractTopics; } }); const summarizer_1 = require("./extractors/summarizer"); const arabicSummarizer_1 = require("./extractors/arabicSummarizer"); const geminiSummarizer_1 = require("./extractors/geminiSummarizer"); Object.defineProperty(exports, "isGeminiConfigValid", { enumerable: true, get: function () { return geminiSummarizer_1.isGeminiConfigValid; } }); /** * Default options for the summarize function */ const DEFAULT_OPTIONS = { sentenceCount: 5, includeTitleFromContent: true, includeImage: true, minLength: 100, maxLength: 2000, responseStructure: null, gemini: null, useAI: false }; /** * Processes the responseStructure option to determine which fields to include or exclude * @param result The full result object * @param responseStructure The responseStructure option value * @returns A filtered result object */ function filterResultFields(result, responseStructure) { // If responseStructure is an array, use it as an include list if (Array.isArray(responseStructure)) { // Always include 'ok' field for error handling, unless explicitly filtered out const includeOk = !responseStructure.includes('ok') && result.ok !== undefined; // Create a new object with only the specified fields const filteredResult = {}; // Add 'ok' if needed if (includeOk) { filteredResult.ok = result.ok; } // Add requested fields responseStructure.forEach(field => { if (field in result) { filteredResult[field] = result[field]; } }); return filteredResult; } // If responseStructure is an object, handle include/exclude options const responseObj = responseStructure; // Validate that both include and exclude aren't used together if (responseObj.include && responseObj.exclude) { throw new Error("Cannot use both 'include' and 'exclude' in responseStructure simultaneously"); } // Handle include option if (responseObj.include && Array.isArray(responseObj.include)) { return filterResultFields(result, responseObj.include); } // Handle exclude option if (responseObj.exclude && Array.isArray(responseObj.exclude)) { const filteredResult = { ...result }; responseObj.exclude.forEach(field => { if (field in filteredResult) { delete filteredResult[field]; } }); return filteredResult; } // If the object doesn't have valid include or exclude properties, return the original result return result; } /** * Summarizes and analyzes the provided text or HTML content * @param content The text or HTML content to summarize * @param options Optional configuration options * @returns A summary result object */ async function summarize(content, options = {}) { try { // Merge default and user options const finalOptions = { ...DEFAULT_OPTIONS, ...options }; // Clean the text (remove HTML, normalize spacing, etc.) const cleanedText = (0, textPreprocessing_1.cleanText)(content); // If content is too short, return it as is if (cleanedText.length < finalOptions.minLength) { const languageResult = (0, languageDetection_1.detectLanguage)(cleanedText); const result = { ok: true, title: options.title || (0, textPreprocessing_1.extractTitleFromHTML)(content) || '', summary: cleanedText, language: languageResult.language, languageName: (0, languageDetection_1.getLanguageName)(languageResult.language), sentiment: (0, sentimentAnalyzer_1.getSentimentLabel)((0, sentimentAnalyzer_1.analyzeSentiment)(cleanedText).score), topics: (0, topicExtraction_1.extractTopics)(cleanedText), relatedTopics: [], words: cleanedText.split(/\s+/).filter(Boolean).length, sentences: (0, textPreprocessing_1.extractSentences)(cleanedText).length, readingTime: Math.ceil((0, reading_time_1.default)(cleanedText).minutes || 1), difficulty: 'easy' }; if (finalOptions.includeImage) { result.image = (0, textPreprocessing_1.extractImageFromHTML)(content); } // Filter result if responseStructure is provided if (finalOptions.responseStructure) { return filterResultFields(result, finalOptions.responseStructure); } return result; } // Detect language of the text const languageResult = (0, languageDetection_1.detectLanguage)(cleanedText); const language = languageResult.language; // Generate summary using the appropriate method let summary; // Check if Gemini AI should be used if (finalOptions.useAI && finalOptions.gemini) { // Validate Gemini configuration if (!(0, geminiSummarizer_1.isGeminiConfigValid)(finalOptions.gemini)) { throw new Error('Invalid Gemini configuration. API key is required.'); } // Generate summary using Gemini AI try { summary = await (0, geminiSummarizer_1.summarizeWithGeminiAI)(cleanedText, finalOptions.sentenceCount, finalOptions.gemini); } catch (aiError) { console.error('Gemini AI summarization failed, falling back to default summarizer:', aiError); // Fall back to traditional summarization if AI fails summary = (0, summarizer_1.summarizeText)(cleanedText, finalOptions.sentenceCount); } } else { // Use traditional summarization summary = (0, summarizer_1.summarizeText)(cleanedText, finalOptions.sentenceCount); } // Extract topics const topics = (0, topicExtraction_1.extractTopics)(cleanedText); // Calculate reading time const readingTimeResult = (0, reading_time_1.default)(cleanedText); // Get sentiment analysis const sentimentResult = (0, sentimentAnalyzer_1.analyzeSentiment)(cleanedText); // Determine difficulty level based on average word length and sentence complexity let difficulty = 'medium'; const avgWordLength = cleanedText.length / cleanedText.split(/\s+/).filter(Boolean).length; if (avgWordLength < 4.5) { difficulty = 'easy'; } else if (avgWordLength > 6) { difficulty = 'hard'; } // Construct the result object const result = { ok: true, title: options.title || (0, textPreprocessing_1.extractTitleFromHTML)(content) || '', summary, language, languageName: (0, languageDetection_1.getLanguageName)(language), sentiment: (0, sentimentAnalyzer_1.getSentimentLabel)(sentimentResult.score), topics, relatedTopics: (0, topicExtraction_1.suggestRelatedTopics)(topics), words: cleanedText.split(/\s+/).filter(Boolean).length, sentences: (0, textPreprocessing_1.extractSentences)(cleanedText).length, readingTime: Math.ceil(readingTimeResult.minutes || 1), difficulty }; // Add image URL if requested if (finalOptions.includeImage) { result.image = (0, textPreprocessing_1.extractImageFromHTML)(content); } // Filter result if responseStructure is provided if (finalOptions.responseStructure) { return filterResultFields(result, finalOptions.responseStructure); } return result; } catch (error) { const errorResult = { ok: false, error: 'Failed to summarize the content', message: error instanceof Error ? error.message : String(error), language: 'en', summary: '', sentiment: 'neutral', topics: [], words: 0, readingTime: 0, difficulty: 'medium' }; // Filter error result if responseStructure is provided if (options.responseStructure) { try { return filterResultFields(errorResult, options.responseStructure); } catch (filterError) { // If filtering itself causes an error (e.g., invalid responseStructure), // return the original error with an additional message errorResult.message = `${errorResult.message}. Additionally: ${filterError instanceof Error ? filterError.message : String(filterError)}`; return errorResult; } } return errorResult; } } /** * Direct API to summarize Arabic text using the specialized Arabic summarizer * @param text The Arabic text to summarize * @param sentenceCount The number of sentences to include in the summary * @returns The summarized text */ function summarizeArabic(text, sentenceCount = 5) { return (0, arabicSummarizer_1.summarizeArabicText)(text, sentenceCount); } /** * Direct API to summarize text using Google's Gemini AI * @param text The text to summarize * @param sentenceCount The number of sentences to include in the summary * @param geminiConfig Configuration for the Gemini API * @returns A promise that resolves to the summarized text */ async function summarizeWithAI(text, sentenceCount = 5, geminiConfig) { if (!(0, geminiSummarizer_1.isGeminiConfigValid)(geminiConfig)) { throw new Error('Invalid Gemini configuration. API key is required.'); } return (0, geminiSummarizer_1.summarizeWithGeminiAI)(text, sentenceCount, geminiConfig); }