bilingual-summarizer
Version:
A powerful text summarization package for Arabic and English content with sentiment analysis and topic extraction
142 lines (141 loc) • 4.61 kB
JavaScript
;
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
Object.defineProperty(exports, "__esModule", { value: true });
exports.detectLanguage = detectLanguage;
exports.isArabic = isArabic;
exports.getLanguageName = getLanguageName;
const langdetect = __importStar(require("langdetect"));
// Create a fallback for the arajs module since it might not be properly initialized in tests
let eld;
try {
// Try to import arajs
const arajs = require('arajs');
eld = arajs.eld;
}
catch (error) {
// Create a mock if arajs is not available
eld = {
detect: (text) => ({
language: '',
isReliable: () => false
})
};
}
/**
* Detects the language of a given text using multiple libraries for improved accuracy
* @param text The text to detect the language of
* @returns An object with the detected language code and confidence level
*/
function detectLanguage(text) {
try {
// Check for Arabic text patterns
const hasArabicChars = /[\u0600-\u06FF]/.test(text);
if (hasArabicChars) {
return {
language: 'ar',
confidence: 0.9
};
}
// Try using arajs first for better Arabic detection
if (eld && typeof eld.detect === 'function') {
const arabicResult = eld.detect(text);
if (arabicResult.language === 'ar' && arabicResult.isReliable()) {
return {
language: 'ar',
confidence: 0.9 // High confidence for arabic detection with arajs
};
}
}
// Use langdetect as fallback
try {
const results = langdetect.detect(text);
if (results && results.length > 0) {
// Get the most confident result
const bestResult = results[0];
return {
language: bestResult.lang,
confidence: bestResult.prob
};
}
}
catch (langError) {
console.error('Language detection library error:', langError);
}
// Default to English if nothing detected
return {
language: 'en',
confidence: 0.5
};
}
catch (error) {
console.error('Language detection error:', error);
// Default to English on error
return {
language: 'en',
confidence: 0.3
};
}
}
/**
* Checks if the detected language is Arabic
* @param text The text to check
* @returns True if the text is detected as Arabic
*/
function isArabic(text) {
// First do a simple check for Arabic characters
if (/[\u0600-\u06FF]/.test(text)) {
return true;
}
const result = detectLanguage(text);
return result.language === 'ar';
}
/**
* Gets the language name from a language code
* @param langCode The ISO language code
* @returns The full language name
*/
function getLanguageName(langCode) {
const languages = {
'ar': 'Arabic',
'en': 'English',
'fr': 'French',
'es': 'Spanish',
'de': 'German',
'zh': 'Chinese',
'ru': 'Russian',
'ja': 'Japanese'
};
return languages[langCode] || 'Unknown';
}