bilingual-summarizer
Version:
A powerful text summarization package for Arabic and English content with sentiment analysis and topic extraction
238 lines (237 loc) • 10.9 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.isGeminiConfigValid = exports.isArabic = exports.extractTopics = void 0;
exports.summarize = summarize;
exports.summarizeArabic = summarizeArabic;
exports.summarizeWithAI = summarizeWithAI;
const reading_time_1 = __importDefault(require("reading-time"));
const sentimentAnalyzer_1 = require("./analyzers/sentimentAnalyzer");
const languageDetection_1 = require("./utils/languageDetection");
Object.defineProperty(exports, "isArabic", { enumerable: true, get: function () { return languageDetection_1.isArabic; } });
const textPreprocessing_1 = require("./utils/textPreprocessing");
const topicExtraction_1 = require("./utils/topicExtraction");
Object.defineProperty(exports, "extractTopics", { enumerable: true, get: function () { return topicExtraction_1.extractTopics; } });
const summarizer_1 = require("./extractors/summarizer");
const arabicSummarizer_1 = require("./extractors/arabicSummarizer");
const geminiSummarizer_1 = require("./extractors/geminiSummarizer");
Object.defineProperty(exports, "isGeminiConfigValid", { enumerable: true, get: function () { return geminiSummarizer_1.isGeminiConfigValid; } });
/**
* Default options for the summarize function
*/
const DEFAULT_OPTIONS = {
sentenceCount: 5,
includeTitleFromContent: true,
includeImage: true,
minLength: 100,
maxLength: 2000,
responseStructure: null,
gemini: null,
useAI: false
};
/**
* Processes the responseStructure option to determine which fields to include or exclude
* @param result The full result object
* @param responseStructure The responseStructure option value
* @returns A filtered result object
*/
function filterResultFields(result, responseStructure) {
// If responseStructure is an array, use it as an include list
if (Array.isArray(responseStructure)) {
// Always include 'ok' field for error handling, unless explicitly filtered out
const includeOk = !responseStructure.includes('ok') && result.ok !== undefined;
// Create a new object with only the specified fields
const filteredResult = {};
// Add 'ok' if needed
if (includeOk) {
filteredResult.ok = result.ok;
}
// Add requested fields
responseStructure.forEach(field => {
if (field in result) {
filteredResult[field] = result[field];
}
});
return filteredResult;
}
// If responseStructure is an object, handle include/exclude options
const responseObj = responseStructure;
// Validate that both include and exclude aren't used together
if (responseObj.include && responseObj.exclude) {
throw new Error("Cannot use both 'include' and 'exclude' in responseStructure simultaneously");
}
// Handle include option
if (responseObj.include && Array.isArray(responseObj.include)) {
return filterResultFields(result, responseObj.include);
}
// Handle exclude option
if (responseObj.exclude && Array.isArray(responseObj.exclude)) {
const filteredResult = { ...result };
responseObj.exclude.forEach(field => {
if (field in filteredResult) {
delete filteredResult[field];
}
});
return filteredResult;
}
// If the object doesn't have valid include or exclude properties, return the original result
return result;
}
/**
* Summarizes and analyzes the provided text or HTML content
* @param content The text or HTML content to summarize
* @param options Optional configuration options
* @returns A summary result object
*/
async function summarize(content, options = {}) {
try {
// Merge default and user options
const finalOptions = {
...DEFAULT_OPTIONS,
...options
};
// Clean the text (remove HTML, normalize spacing, etc.)
const cleanedText = (0, textPreprocessing_1.cleanText)(content);
// If content is too short, return it as is
if (cleanedText.length < finalOptions.minLength) {
const languageResult = (0, languageDetection_1.detectLanguage)(cleanedText);
const result = {
ok: true,
title: options.title || (0, textPreprocessing_1.extractTitleFromHTML)(content) || '',
summary: cleanedText,
language: languageResult.language,
languageName: (0, languageDetection_1.getLanguageName)(languageResult.language),
sentiment: (0, sentimentAnalyzer_1.getSentimentLabel)((0, sentimentAnalyzer_1.analyzeSentiment)(cleanedText).score),
topics: (0, topicExtraction_1.extractTopics)(cleanedText),
relatedTopics: [],
words: cleanedText.split(/\s+/).filter(Boolean).length,
sentences: (0, textPreprocessing_1.extractSentences)(cleanedText).length,
readingTime: Math.ceil((0, reading_time_1.default)(cleanedText).minutes || 1),
difficulty: 'easy'
};
if (finalOptions.includeImage) {
result.image = (0, textPreprocessing_1.extractImageFromHTML)(content);
}
// Filter result if responseStructure is provided
if (finalOptions.responseStructure) {
return filterResultFields(result, finalOptions.responseStructure);
}
return result;
}
// Detect language of the text
const languageResult = (0, languageDetection_1.detectLanguage)(cleanedText);
const language = languageResult.language;
// Generate summary using the appropriate method
let summary;
// Check if Gemini AI should be used
if (finalOptions.useAI && finalOptions.gemini) {
// Validate Gemini configuration
if (!(0, geminiSummarizer_1.isGeminiConfigValid)(finalOptions.gemini)) {
throw new Error('Invalid Gemini configuration. API key is required.');
}
// Generate summary using Gemini AI
try {
summary = await (0, geminiSummarizer_1.summarizeWithGeminiAI)(cleanedText, finalOptions.sentenceCount, finalOptions.gemini);
}
catch (aiError) {
console.error('Gemini AI summarization failed, falling back to default summarizer:', aiError);
// Fall back to traditional summarization if AI fails
summary = (0, summarizer_1.summarizeText)(cleanedText, finalOptions.sentenceCount);
}
}
else {
// Use traditional summarization
summary = (0, summarizer_1.summarizeText)(cleanedText, finalOptions.sentenceCount);
}
// Extract topics
const topics = (0, topicExtraction_1.extractTopics)(cleanedText);
// Calculate reading time
const readingTimeResult = (0, reading_time_1.default)(cleanedText);
// Get sentiment analysis
const sentimentResult = (0, sentimentAnalyzer_1.analyzeSentiment)(cleanedText);
// Determine difficulty level based on average word length and sentence complexity
let difficulty = 'medium';
const avgWordLength = cleanedText.length / cleanedText.split(/\s+/).filter(Boolean).length;
if (avgWordLength < 4.5) {
difficulty = 'easy';
}
else if (avgWordLength > 6) {
difficulty = 'hard';
}
// Construct the result object
const result = {
ok: true,
title: options.title || (0, textPreprocessing_1.extractTitleFromHTML)(content) || '',
summary,
language,
languageName: (0, languageDetection_1.getLanguageName)(language),
sentiment: (0, sentimentAnalyzer_1.getSentimentLabel)(sentimentResult.score),
topics,
relatedTopics: (0, topicExtraction_1.suggestRelatedTopics)(topics),
words: cleanedText.split(/\s+/).filter(Boolean).length,
sentences: (0, textPreprocessing_1.extractSentences)(cleanedText).length,
readingTime: Math.ceil(readingTimeResult.minutes || 1),
difficulty
};
// Add image URL if requested
if (finalOptions.includeImage) {
result.image = (0, textPreprocessing_1.extractImageFromHTML)(content);
}
// Filter result if responseStructure is provided
if (finalOptions.responseStructure) {
return filterResultFields(result, finalOptions.responseStructure);
}
return result;
}
catch (error) {
const errorResult = {
ok: false,
error: 'Failed to summarize the content',
message: error instanceof Error ? error.message : String(error),
language: 'en',
summary: '',
sentiment: 'neutral',
topics: [],
words: 0,
readingTime: 0,
difficulty: 'medium'
};
// Filter error result if responseStructure is provided
if (options.responseStructure) {
try {
return filterResultFields(errorResult, options.responseStructure);
}
catch (filterError) {
// If filtering itself causes an error (e.g., invalid responseStructure),
// return the original error with an additional message
errorResult.message = `${errorResult.message}. Additionally: ${filterError instanceof Error ? filterError.message : String(filterError)}`;
return errorResult;
}
}
return errorResult;
}
}
/**
* Direct API to summarize Arabic text using the specialized Arabic summarizer
* @param text The Arabic text to summarize
* @param sentenceCount The number of sentences to include in the summary
* @returns The summarized text
*/
function summarizeArabic(text, sentenceCount = 5) {
return (0, arabicSummarizer_1.summarizeArabicText)(text, sentenceCount);
}
/**
* Direct API to summarize text using Google's Gemini AI
* @param text The text to summarize
* @param sentenceCount The number of sentences to include in the summary
* @param geminiConfig Configuration for the Gemini API
* @returns A promise that resolves to the summarized text
*/
async function summarizeWithAI(text, sentenceCount = 5, geminiConfig) {
if (!(0, geminiSummarizer_1.isGeminiConfigValid)(geminiConfig)) {
throw new Error('Invalid Gemini configuration. API key is required.');
}
return (0, geminiSummarizer_1.summarizeWithGeminiAI)(text, sentenceCount, geminiConfig);
}