@codai/cbd
Version:
Codai Better Database - High-Performance Vector Memory System with HPKV-inspired architecture and MCP server
1,289 lines (1,091 loc) ⢠50.7 kB
JavaScript
#!/usr/bin/env node
/**
* CBD AI-Powered Analytics Engine (Simplified)
* Phase 4.3.2 - Advanced AI-Powered Analytics System
*
* Features:
* - Natural Language Processing for Query Interpretation
* - Predictive Analytics & Statistical Forecasting
* - Real-time Anomaly Detection (Statistical)
* - Automatic Pattern Recognition & Discovery
* - Intelligent Data Recommendations
* - Automated Report Generation
* - Business Intelligence Dashboard
* - Performance Optimization Analytics
* - User Behavior Analysis
* - Sentiment Analysis & Text Mining
*
* Author: CBD Development Team
* Date: August 2, 2025
*/
const express = require('express');
const cors = require('cors');
const natural = require('natural');
const fs = require('fs').promises;
const path = require('path');
const { v4: uuidv4 } = require('uuid');
class CBDAIAnalyticsEngine {
constructor() {
this.app = express();
this.port = 4700;
// AI Models and Engines
this.statisticalModels = new Map();
this.nlpProcessor = new natural.WordTokenizer();
// Analytics State
this.datasets = new Map();
this.predictions = new Map();
this.anomalies = new Map();
this.patterns = new Map();
this.insights = new Map();
this.reports = new Map();
// Real-time Analytics
this.realTimeData = new Map();
this.alertThresholds = new Map();
this.subscribers = new Map();
// Performance Metrics
this.stats = {
totalAnalyses: 0,
predictionsGenerated: 0,
anomaliesDetected: 0,
patternsDiscovered: 0,
reportsGenerated: 0,
averageProcessingTime: 0,
accuracy: 0.0,
nlpProcessed: 0
};
this.setupExpress();
this.initializeNLP();
this.initializeStatisticalModels();
}
setupExpress() {
this.app.use(cors());
this.app.use(express.json({ limit: '50mb' }));
// Health check
this.app.get('/health', (req, res) => {
res.json({
status: 'healthy',
service: 'CBD AI-Powered Analytics Engine',
version: '1.0.0',
timestamp: new Date().toISOString(),
stats: this.stats,
features: {
nlp_processing: 'natural',
predictive_analytics: 'statistical',
anomaly_detection: 'real-time',
pattern_recognition: 'automatic',
recommendation_engine: 'intelligent',
report_generation: 'automated',
real_time_processing: 'enabled',
sentiment_analysis: 'enabled',
text_mining: 'enabled'
},
models: {
statistical_models: this.statisticalModels.size,
nlp_classifiers: 'trained',
sentiment_analyzer: 'active'
},
performance: {
memory_usage: `${Math.round(process.memoryUsage().heapUsed / 1024 / 1024)}MB`,
datasets_loaded: this.datasets.size,
active_predictions: this.predictions.size,
real_time_streams: this.realTimeData.size
}
});
});
// Analytics Dashboard
this.app.get('/api/analytics/dashboard', (req, res) => {
res.json({
overview: this.generateDashboardOverview(),
recent_insights: this.getRecentInsights(),
active_models: Array.from(this.statisticalModels.keys()),
performance_metrics: this.stats,
real_time_data: this.getRealTimeStatus(),
trending_patterns: this.getTrendingPatterns(),
alerts: this.getActiveAlerts(),
nlp_statistics: this.getNLPStatistics()
});
});
// Statistical Prediction
this.app.post('/api/analytics/predict', async (req, res) => {
try {
const { data, method, options } = req.body;
const result = await this.makeStatisticalPrediction(data, method, options);
res.json(result);
} catch (error) {
res.status(500).json({ error: error.message });
}
});
// Predictive Analytics
this.app.post('/api/analytics/forecast', async (req, res) => {
try {
const { data, timeframe, metrics } = req.body;
const result = await this.generateForecast(data, timeframe, metrics);
res.json(result);
} catch (error) {
res.status(500).json({ error: error.message });
}
});
// Anomaly Detection
this.app.post('/api/analytics/anomaly-detection', async (req, res) => {
try {
const { data, sensitivity, realTime } = req.body;
const result = await this.detectAnomalies(data, sensitivity, realTime);
res.json(result);
} catch (error) {
res.status(500).json({ error: error.message });
}
});
// Natural Language Processing
this.app.post('/api/nlp/analyze', async (req, res) => {
try {
const { text, analysis_type } = req.body;
const result = await this.processNaturalLanguage(text, analysis_type);
res.json(result);
} catch (error) {
res.status(500).json({ error: error.message });
}
});
this.app.post('/api/nlp/query', async (req, res) => {
try {
const { query, context } = req.body;
const result = await this.interpretNaturalLanguageQuery(query, context);
res.json(result);
} catch (error) {
res.status(500).json({ error: error.message });
}
});
// Sentiment Analysis
this.app.post('/api/nlp/sentiment', async (req, res) => {
try {
const { text, detailed } = req.body;
const result = await this.analyzeSentiment(text, detailed);
res.json(result);
} catch (error) {
res.status(500).json({ error: error.message });
}
});
// Pattern Recognition
this.app.post('/api/analytics/pattern-discovery', async (req, res) => {
try {
const { data, algorithm, parameters } = req.body;
const result = await this.discoverPatterns(data, algorithm, parameters);
res.json(result);
} catch (error) {
res.status(500).json({ error: error.message });
}
});
// Recommendation Engine
this.app.post('/api/recommendations/generate', async (req, res) => {
try {
const { userId, context, preferences } = req.body;
const result = await this.generateRecommendations(userId, context, preferences);
res.json(result);
} catch (error) {
res.status(500).json({ error: error.message });
}
});
// Report Generation
this.app.post('/api/reports/generate', async (req, res) => {
try {
const { reportType, data, format, options } = req.body;
const result = await this.generateReport(reportType, data, format, options);
res.json(result);
} catch (error) {
res.status(500).json({ error: error.message });
}
});
this.app.get('/api/reports/:reportId', (req, res) => {
const { reportId } = req.params;
const report = this.reports.get(reportId);
if (!report) {
return res.status(404).json({ error: 'Report not found' });
}
res.json(report);
});
// Real-time Analytics
this.app.post('/api/realtime/stream', async (req, res) => {
try {
const { streamId, data, processing } = req.body;
const result = await this.processRealTimeData(streamId, data, processing);
res.json(result);
} catch (error) {
res.status(500).json({ error: error.message });
}
});
this.app.get('/api/realtime/status/:streamId', (req, res) => {
const { streamId } = req.params;
const status = this.getRealTimeStreamStatus(streamId);
res.json(status);
});
// Text Mining
this.app.post('/api/nlp/text-mining', async (req, res) => {
try {
const { texts, options } = req.body;
const result = await this.performTextMining(texts, options);
res.json(result);
} catch (error) {
res.status(500).json({ error: error.message });
}
});
// Statistical Analysis
this.app.post('/api/analytics/statistical-analysis', async (req, res) => {
try {
const { data, tests, options } = req.body;
const result = await this.performStatisticalAnalysis(data, tests, options);
res.json(result);
} catch (error) {
res.status(500).json({ error: error.message });
}
});
console.log('š§ AI Analytics REST API initialized');
}
initializeNLP() {
console.log('š Initializing Natural Language Processing...');
// Initialize stemmer and tokenizer
this.stemmer = natural.PorterStemmer;
this.tokenizer = new natural.WordTokenizer();
// Initialize classifiers
this.intentClassifier = new natural.LogisticRegressionClassifier();
// Use simple sentiment analysis without SentimentAnalyzer class
this.useSentimentAnalyzer = false;
// Initialize n-gram analyzer
this.nGrams = natural.NGrams;
// Initialize distance metrics
this.jaro = natural.JaroWinklerDistance;
this.levenshtein = natural.LevenshteinDistance;
// Predefined intents for query interpretation
this.trainIntentClassifier();
console.log('ā
NLP Engine initialized');
}
trainIntentClassifier() {
// Training data for query intent classification
const trainingData = [
{ text: 'show me sales data', intent: 'data_retrieval' },
{ text: 'predict future trends', intent: 'prediction' },
{ text: 'find anomalies in data', intent: 'anomaly_detection' },
{ text: 'generate report for last month', intent: 'report_generation' },
{ text: 'what patterns exist in user behavior', intent: 'pattern_analysis' },
{ text: 'recommend products for customer', intent: 'recommendation' },
{ text: 'analyze sentiment of reviews', intent: 'sentiment_analysis' },
{ text: 'forecast revenue for next quarter', intent: 'forecasting' },
{ text: 'statistical analysis of data', intent: 'statistical_analysis' },
{ text: 'text mining from documents', intent: 'text_mining' }
];
trainingData.forEach(item => {
this.intentClassifier.addDocument(item.text, item.intent);
});
this.intentClassifier.train();
}
initializeStatisticalModels() {
console.log('š Initializing Statistical Models...');
// Linear Regression Model
this.statisticalModels.set('linear_regression', {
name: 'Linear Regression',
type: 'regression',
predict: this.linearRegression.bind(this),
description: 'Simple linear regression for trend analysis'
});
// Moving Average Model
this.statisticalModels.set('moving_average', {
name: 'Moving Average',
type: 'time_series',
predict: this.movingAverage.bind(this),
description: 'Moving average for time series forecasting'
});
// Exponential Smoothing
this.statisticalModels.set('exponential_smoothing', {
name: 'Exponential Smoothing',
type: 'time_series',
predict: this.exponentialSmoothing.bind(this),
description: 'Exponential smoothing for trend forecasting'
});
// Z-Score Anomaly Detection
this.statisticalModels.set('zscore_anomaly', {
name: 'Z-Score Anomaly Detection',
type: 'anomaly_detection',
predict: this.zScoreAnomalyDetection.bind(this),
description: 'Statistical anomaly detection using z-scores'
});
// IQR Anomaly Detection
this.statisticalModels.set('iqr_anomaly', {
name: 'IQR Anomaly Detection',
type: 'anomaly_detection',
predict: this.iqrAnomalyDetection.bind(this),
description: 'Anomaly detection using interquartile range'
});
console.log(`š Initialized ${this.statisticalModels.size} statistical models`);
}
async makeStatisticalPrediction(data, method = 'linear_regression', options = {}) {
const startTime = Date.now();
if (!this.statisticalModels.has(method)) {
throw new Error(`Statistical method ${method} not available`);
}
const model = this.statisticalModels.get(method);
const result = await model.predict(data, options);
const predictionId = uuidv4();
const prediction = {
prediction_id: predictionId,
method,
model_name: model.name,
model_type: model.type,
input_size: Array.isArray(data) ? data.length : Object.keys(data).length,
predictions: result.predictions,
confidence: result.confidence || 0.8,
metadata: result.metadata || {},
timestamp: new Date().toISOString(),
processing_time: Date.now() - startTime
};
this.predictions.set(predictionId, prediction);
this.stats.predictionsGenerated++;
this.updateAverageProcessingTime(Date.now() - startTime);
return prediction;
}
async generateForecast(data, timeframe, metrics = {}) {
const startTime = Date.now();
// Prepare time series data
const timeSeriesData = this.prepareTimeSeriesData(data);
// Generate forecasts using multiple methods
const methods = ['moving_average', 'exponential_smoothing'];
const forecasts = {};
for (const method of methods) {
try {
const forecast = await this.makeStatisticalPrediction(timeSeriesData, method, {
timeframe,
confidence_interval: metrics.confidence_interval || 0.95
});
forecasts[method] = forecast;
} catch (error) {
console.log(`Warning: ${method} forecast failed:`, error.message);
}
}
// Combine forecasts (ensemble)
const combinedForecast = this.combineForecastMethods(forecasts, timeframe);
const result = {
forecast_id: uuidv4(),
timeframe,
individual_forecasts: forecasts,
combined_forecast: combinedForecast,
trends: this.identifyTrends(combinedForecast.predictions),
seasonal_patterns: this.detectSeasonalPatterns(timeSeriesData),
confidence_intervals: this.calculateConfidenceIntervals(combinedForecast.predictions),
insights: this.generateForecastInsights(combinedForecast, timeframe),
generated_at: new Date().toISOString(),
processing_time: Date.now() - startTime
};
return result;
}
async detectAnomalies(data, sensitivity = 0.95, realTime = false) {
const startTime = Date.now();
// Use multiple anomaly detection methods
const methods = ['zscore_anomaly', 'iqr_anomaly'];
const anomalyResults = {};
for (const method of methods) {
try {
const result = await this.makeStatisticalPrediction(data, method, { sensitivity });
anomalyResults[method] = result;
} catch (error) {
console.log(`Warning: ${method} anomaly detection failed:`, error.message);
}
}
// Combine anomaly detection results
const combinedAnomalies = this.combineAnomalyResults(anomalyResults, sensitivity);
const result = {
anomaly_detection_id: uuidv4(),
data_points_analyzed: Array.isArray(data) ? data.length : Object.keys(data).length,
individual_methods: anomalyResults,
combined_anomalies: combinedAnomalies,
anomalies_detected: combinedAnomalies.length,
sensitivity_threshold: sensitivity,
real_time: realTime,
detected_at: new Date().toISOString(),
processing_time: Date.now() - startTime
};
// Store anomalies
this.anomalies.set(result.anomaly_detection_id, result);
this.stats.anomaliesDetected += combinedAnomalies.length;
// Trigger real-time alerts if needed
if (realTime && combinedAnomalies.length > 0) {
this.triggerAnomalyAlerts(combinedAnomalies);
}
return result;
}
async processNaturalLanguage(text, analysisType = 'comprehensive') {
const startTime = Date.now();
// Tokenize text
const tokens = this.tokenizer.tokenize(text);
const stemmedTokens = tokens.map(token => this.stemmer.stem(token));
// Sentiment analysis (simplified)
const sentiment = this.calculateSimpleSentiment(tokens);
// Intent classification
const intent = this.intentClassifier.classify(text);
const intentConfidence = this.intentClassifier.getClassifications(text)[0].value;
// Entity extraction
const entities = this.extractEntities(text);
// Key phrase extraction
const keyPhrases = this.extractKeyPhrases(tokens);
// N-gram analysis
const bigrams = this.nGrams.bigrams(tokens);
const trigrams = this.nGrams.trigrams(tokens);
// Language statistics
const languageStats = this.calculateLanguageStatistics(text, tokens);
const result = {
analysis_id: uuidv4(),
original_text: text,
analysis_type: analysisType,
tokens,
stemmed_tokens: stemmedTokens,
sentiment_score: sentiment,
sentiment_classification: this.classifySentiment(sentiment),
intent,
intent_confidence: intentConfidence,
entities,
key_phrases: keyPhrases,
bigrams: bigrams.slice(0, 10), // Top 10 bigrams
trigrams: trigrams.slice(0, 5), // Top 5 trigrams
language_statistics: languageStats,
processed_at: new Date().toISOString(),
processing_time: Date.now() - startTime
};
this.stats.nlpProcessed++;
return result;
}
async analyzeSentiment(text, detailed = false) {
const startTime = Date.now();
// Process with NLP
const nlpResult = await this.processNaturalLanguage(text, 'sentiment_focused');
// Detailed sentiment analysis
let sentimentDetails = {};
if (detailed) {
sentimentDetails = {
positive_words: this.findPositiveWords(nlpResult.tokens),
negative_words: this.findNegativeWords(nlpResult.tokens),
neutral_words: this.findNeutralWords(nlpResult.tokens),
emotional_indicators: this.identifyEmotionalIndicators(text),
confidence_score: this.calculateSentimentConfidence(nlpResult.sentiment_score)
};
}
const result = {
sentiment_analysis_id: uuidv4(),
text,
sentiment_score: nlpResult.sentiment_score,
sentiment_classification: nlpResult.sentiment_classification,
detailed_analysis: detailed ? sentimentDetails : null,
processed_at: new Date().toISOString(),
processing_time: Date.now() - startTime
};
return result;
}
async interpretNaturalLanguageQuery(query, context = {}) {
const startTime = Date.now();
// Process the query with NLP
const nlpResult = await this.processNaturalLanguage(query, 'query_interpretation');
// Generate structured query based on intent
const structuredQuery = this.generateStructuredQuery(nlpResult, context);
// Generate suggested actions
const suggestions = this.generateQuerySuggestions(nlpResult, context);
// Extract query parameters
const parameters = this.extractQueryParameters(nlpResult, context);
const result = {
query_id: uuidv4(),
original_query: query,
nlp_analysis: nlpResult,
structured_query: structuredQuery,
suggested_actions: suggestions,
extracted_parameters: parameters,
confidence: nlpResult.intent_confidence,
executable: structuredQuery !== null,
context,
interpreted_at: new Date().toISOString(),
processing_time: Date.now() - startTime
};
return result;
}
async performTextMining(texts, options = {}) {
const startTime = Date.now();
// Process all texts
const processedTexts = [];
for (const text of texts) {
const nlpResult = await this.processNaturalLanguage(text, 'text_mining');
processedTexts.push(nlpResult);
}
// Aggregate analysis
const aggregated = this.aggregateTextAnalysis(processedTexts);
// Topic extraction (simplified clustering)
const topics = this.extractTopics(processedTexts, options.topic_count || 5);
// Sentiment distribution
const sentimentDistribution = this.calculateSentimentDistribution(processedTexts);
// Common patterns
const commonPatterns = this.findCommonTextPatterns(processedTexts);
const result = {
text_mining_id: uuidv4(),
texts_processed: texts.length,
aggregated_analysis: aggregated,
topics: topics,
sentiment_distribution: sentimentDistribution,
common_patterns: commonPatterns,
insights: this.generateTextMiningInsights(aggregated, topics, sentimentDistribution),
processed_at: new Date().toISOString(),
processing_time: Date.now() - startTime
};
return result;
}
async performStatisticalAnalysis(data, tests = [], options = {}) {
const startTime = Date.now();
// Basic descriptive statistics
const descriptiveStats = this.calculateDescriptiveStatistics(data);
// Correlation analysis
const correlations = this.calculateCorrelations(data);
// Distribution analysis
const distribution = this.analyzeDistribution(data);
// Hypothesis tests (if specified)
const testResults = {};
for (const test of tests) {
testResults[test] = this.performHypothesisTest(data, test, options);
}
const result = {
statistical_analysis_id: uuidv4(),
data_points: Array.isArray(data) ? data.length : Object.keys(data).length,
descriptive_statistics: descriptiveStats,
correlations,
distribution_analysis: distribution,
hypothesis_tests: testResults,
insights: this.generateStatisticalInsights(descriptiveStats, correlations, distribution),
analyzed_at: new Date().toISOString(),
processing_time: Date.now() - startTime
};
return result;
}
async discoverPatterns(data, algorithm = 'clustering', parameters = {}) {
const startTime = Date.now();
let patterns = [];
switch (algorithm) {
case 'clustering':
patterns = this.performSimpleClustering(data, parameters);
break;
case 'association':
patterns = this.findAssociationRules(data, parameters);
break;
case 'sequential':
patterns = this.findSequentialPatterns(data, parameters);
break;
case 'frequency':
patterns = this.findFrequencyPatterns(data, parameters);
break;
}
const result = {
pattern_discovery_id: uuidv4(),
algorithm,
data_points: Array.isArray(data) ? data.length : Object.keys(data).length,
patterns_discovered: patterns.length,
patterns,
parameters,
insights: this.generatePatternInsights(patterns),
discovered_at: new Date().toISOString(),
processing_time: Date.now() - startTime
};
// Store patterns
this.patterns.set(result.pattern_discovery_id, result);
this.stats.patternsDiscovered += patterns.length;
return result;
}
async generateRecommendations(userId, context, preferences = {}) {
const startTime = Date.now();
// Analyze user behavior and preferences
const userProfile = this.analyzeUserProfile(userId, context);
// Generate content-based recommendations
const contentRecommendations = this.generateContentBasedRecommendations(userProfile, preferences);
// Generate popularity-based recommendations
const popularityRecommendations = this.generatePopularityBasedRecommendations(context);
// Combine recommendations
const combinedRecommendations = this.combineRecommendations(
contentRecommendations,
popularityRecommendations,
preferences
);
const result = {
recommendation_id: uuidv4(),
user_id: userId,
recommendations: combinedRecommendations,
recommendation_count: combinedRecommendations.length,
user_profile: userProfile,
context,
preferences,
generated_at: new Date().toISOString(),
processing_time: Date.now() - startTime
};
return result;
}
async generateReport(reportType, data, format = 'json', options = {}) {
const startTime = Date.now();
let reportContent = {};
switch (reportType) {
case 'analytics_summary':
reportContent = this.generateAnalyticsSummaryReport(data, options);
break;
case 'performance_metrics':
reportContent = this.generatePerformanceReport(data, options);
break;
case 'user_behavior':
reportContent = await this.generateUserBehaviorReport(data, options);
break;
case 'predictive_insights':
reportContent = await this.generatePredictiveInsightsReport(data, options);
break;
case 'anomaly_summary':
reportContent = this.generateAnomalySummaryReport(data, options);
break;
case 'nlp_analysis':
reportContent = await this.generateNLPAnalysisReport(data, options);
break;
case 'statistical_summary':
reportContent = await this.generateStatisticalSummaryReport(data, options);
break;
}
const reportId = uuidv4();
const report = {
report_id: reportId,
report_type: reportType,
format,
content: reportContent,
metadata: {
generated_at: new Date().toISOString(),
processing_time: Date.now() - startTime,
data_points: Array.isArray(data) ? data.length : Object.keys(data).length,
options
}
};
// Store report
this.reports.set(reportId, report);
this.stats.reportsGenerated++;
return {
report_id: reportId,
report_url: `/api/reports/${reportId}`,
format,
generated_at: report.metadata.generated_at,
processing_time: report.metadata.processing_time
};
}
// Statistical Models Implementation
async linearRegression(data, options = {}) {
// Simple linear regression implementation
const n = data.length;
const sumX = data.reduce((sum, point, index) => sum + index, 0);
const sumY = data.reduce((sum, point) => sum + (typeof point === 'number' ? point : point.y || point.value), 0);
const sumXY = data.reduce((sum, point, index) => sum + index * (typeof point === 'number' ? point : point.y || point.value), 0);
const sumXX = data.reduce((sum, point, index) => sum + index * index, 0);
const slope = (n * sumXY - sumX * sumY) / (n * sumXX - sumX * sumX);
const intercept = (sumY - slope * sumX) / n;
// Generate predictions
const predictions = [];
const forecastLength = options.forecast_length || Math.min(10, Math.ceil(n * 0.2));
for (let i = 0; i < forecastLength; i++) {
const x = n + i;
const prediction = slope * x + intercept;
predictions.push(prediction);
}
// Calculate R-squared
const yMean = sumY / n;
const ssTotal = data.reduce((sum, point) => {
const y = typeof point === 'number' ? point : point.y || point.value;
return sum + Math.pow(y - yMean, 2);
}, 0);
const ssRes = data.reduce((sum, point, index) => {
const y = typeof point === 'number' ? point : point.y || point.value;
const predicted = slope * index + intercept;
return sum + Math.pow(y - predicted, 2);
}, 0);
const rSquared = 1 - (ssRes / ssTotal);
return {
predictions,
confidence: Math.max(0, rSquared),
metadata: {
slope,
intercept,
r_squared: rSquared,
method: 'linear_regression'
}
};
}
async movingAverage(data, options = {}) {
const window = options.window || Math.min(5, Math.ceil(data.length * 0.1));
const values = data.map(point => typeof point === 'number' ? point : point.y || point.value);
// Calculate moving averages
const movingAverages = [];
for (let i = window - 1; i < values.length; i++) {
const windowValues = values.slice(i - window + 1, i + 1);
const average = windowValues.reduce((sum, val) => sum + val, 0) / window;
movingAverages.push(average);
}
// Generate predictions
const forecastLength = options.forecast_length || Math.min(10, Math.ceil(data.length * 0.2));
const predictions = [];
for (let i = 0; i < forecastLength; i++) {
// Use last window for prediction
const lastWindow = values.slice(-window);
const prediction = lastWindow.reduce((sum, val) => sum + val, 0) / window;
predictions.push(prediction);
values.push(prediction); // Add prediction for next iteration
}
return {
predictions,
confidence: 0.7, // Moderate confidence for moving average
metadata: {
window_size: window,
moving_averages: movingAverages.slice(-5), // Last 5 moving averages
method: 'moving_average'
}
};
}
async exponentialSmoothing(data, options = {}) {
const alpha = options.alpha || 0.3; // Smoothing parameter
const values = data.map(point => typeof point === 'number' ? point : point.y || point.value);
// Calculate exponentially smoothed values
const smoothed = [values[0]];
for (let i = 1; i < values.length; i++) {
const smoothedValue = alpha * values[i] + (1 - alpha) * smoothed[i - 1];
smoothed.push(smoothedValue);
}
// Generate predictions
const forecastLength = options.forecast_length || Math.min(10, Math.ceil(data.length * 0.2));
const predictions = [];
let lastSmoothed = smoothed[smoothed.length - 1];
for (let i = 0; i < forecastLength; i++) {
predictions.push(lastSmoothed);
// For simple exponential smoothing, prediction remains constant
}
return {
predictions,
confidence: 0.75, // Good confidence for exponential smoothing
metadata: {
alpha,
smoothed_values: smoothed.slice(-5), // Last 5 smoothed values
method: 'exponential_smoothing'
}
};
}
async zScoreAnomalyDetection(data, options = {}) {
const threshold = options.sensitivity || 0.95; // 95% confidence
const zThreshold = this.getZScoreThreshold(threshold);
const values = Array.isArray(data[0]) ? data.map(point => point[0]) :
data.map(point => typeof point === 'number' ? point : point.value);
// Calculate mean and standard deviation
const mean = values.reduce((sum, val) => sum + val, 0) / values.length;
const variance = values.reduce((sum, val) => sum + Math.pow(val - mean, 2), 0) / values.length;
const stdDev = Math.sqrt(variance);
// Calculate z-scores and identify anomalies
const anomalies = [];
values.forEach((value, index) => {
const zScore = Math.abs((value - mean) / stdDev);
if (zScore > zThreshold) {
anomalies.push({
index,
value,
z_score: zScore,
severity: zScore > zThreshold * 1.5 ? 'high' : 'medium'
});
}
});
return {
predictions: anomalies,
confidence: threshold,
metadata: {
mean,
std_deviation: stdDev,
z_threshold: zThreshold,
total_anomalies: anomalies.length,
method: 'zscore_anomaly'
}
};
}
async iqrAnomalyDetection(data, options = {}) {
const values = Array.isArray(data[0]) ? data.map(point => point[0]) :
data.map(point => typeof point === 'number' ? point : point.value);
// Sort values for quartile calculation
const sortedValues = [...values].sort((a, b) => a - b);
const n = sortedValues.length;
// Calculate quartiles
const q1Index = Math.floor(n * 0.25);
const q3Index = Math.floor(n * 0.75);
const q1 = sortedValues[q1Index];
const q3 = sortedValues[q3Index];
const iqr = q3 - q1;
// Calculate bounds
const lowerBound = q1 - 1.5 * iqr;
const upperBound = q3 + 1.5 * iqr;
// Identify anomalies
const anomalies = [];
values.forEach((value, index) => {
if (value < lowerBound || value > upperBound) {
const severity = (value < lowerBound - iqr || value > upperBound + iqr) ? 'high' : 'medium';
anomalies.push({
index,
value,
distance_from_bound: Math.min(Math.abs(value - lowerBound), Math.abs(value - upperBound)),
severity
});
}
});
return {
predictions: anomalies,
confidence: 0.75, // IQR method confidence
metadata: {
q1,
q3,
iqr,
lower_bound: lowerBound,
upper_bound: upperBound,
total_anomalies: anomalies.length,
method: 'iqr_anomaly'
}
};
}
// Utility Methods
prepareTimeSeriesData(data) {
if (Array.isArray(data) && typeof data[0] === 'number') {
return data.map((value, index) => ({ x: index, y: value, value }));
}
return data;
}
calculateSimpleSentiment(tokens) {
// Simple sentiment calculation based on word lists
const positiveWords = ['good', 'great', 'excellent', 'amazing', 'wonderful', 'fantastic', 'awesome', 'brilliant', 'outstanding', 'perfect', 'love', 'like', 'happy', 'pleased', 'satisfied', 'impressed', 'delighted'];
const negativeWords = ['bad', 'terrible', 'awful', 'horrible', 'disappointing', 'poor', 'worst', 'hate', 'dislike', 'sad', 'angry', 'frustrated', 'annoyed', 'upset', 'dissatisfied', 'unhappy'];
let positiveScore = 0;
let negativeScore = 0;
tokens.forEach(token => {
const word = token.toLowerCase();
if (positiveWords.includes(word)) {
positiveScore++;
} else if (negativeWords.includes(word)) {
negativeScore++;
}
});
// Calculate sentiment score (-1 to 1)
const totalWords = tokens.length;
const sentimentScore = (positiveScore - negativeScore) / Math.max(totalWords, 1);
return Math.max(-1, Math.min(1, sentimentScore));
}
getZScoreThreshold(confidence) {
// Convert confidence level to z-score threshold
const zScores = {
0.90: 1.645,
0.95: 1.96,
0.99: 2.576,
0.999: 3.291
};
return zScores[confidence] || 1.96;
}
calculateDescriptiveStatistics(data) {
const values = Array.isArray(data) ? data : Object.values(data);
const numericValues = values.filter(val => typeof val === 'number').sort((a, b) => a - b);
if (numericValues.length === 0) return null;
const n = numericValues.length;
const sum = numericValues.reduce((sum, val) => sum + val, 0);
const mean = sum / n;
// Median
const median = n % 2 === 0 ?
(numericValues[n / 2 - 1] + numericValues[n / 2]) / 2 :
numericValues[Math.floor(n / 2)];
// Mode (most frequent value)
const frequency = {};
numericValues.forEach(val => {
frequency[val] = (frequency[val] || 0) + 1;
});
const mode = Object.keys(frequency).reduce((a, b) => frequency[a] > frequency[b] ? a : b);
// Variance and standard deviation
const variance = numericValues.reduce((sum, val) => sum + Math.pow(val - mean, 2), 0) / n;
const stdDev = Math.sqrt(variance);
// Quartiles
const q1 = numericValues[Math.floor(n * 0.25)];
const q3 = numericValues[Math.floor(n * 0.75)];
return {
count: n,
mean,
median,
mode: parseFloat(mode),
min: numericValues[0],
max: numericValues[n - 1],
range: numericValues[n - 1] - numericValues[0],
variance,
standard_deviation: stdDev,
q1,
q3,
iqr: q3 - q1
};
}
calculateCorrelations(data) {
// Simplified correlation calculation for basic use cases
if (!Array.isArray(data) || data.length < 2) return {};
const correlations = {};
// If data is array of objects, calculate correlations between numeric fields
if (typeof data[0] === 'object') {
const numericFields = Object.keys(data[0]).filter(key =>
typeof data[0][key] === 'number'
);
for (let i = 0; i < numericFields.length; i++) {
for (let j = i + 1; j < numericFields.length; j++) {
const field1 = numericFields[i];
const field2 = numericFields[j];
const values1 = data.map(item => item[field1]).filter(val => typeof val === 'number');
const values2 = data.map(item => item[field2]).filter(val => typeof val === 'number');
if (values1.length === values2.length && values1.length > 1) {
const correlation = this.calculatePearsonCorrelation(values1, values2);
correlations[`${field1}_${field2}`] = correlation;
}
}
}
}
return correlations;
}
calculatePearsonCorrelation(x, y) {
const n = x.length;
const sumX = x.reduce((sum, val) => sum + val, 0);
const sumY = y.reduce((sum, val) => sum + val, 0);
const sumXY = x.reduce((sum, val, i) => sum + val * y[i], 0);
const sumXX = x.reduce((sum, val) => sum + val * val, 0);
const sumYY = y.reduce((sum, val) => sum + val * val, 0);
const numerator = n * sumXY - sumX * sumY;
const denominator = Math.sqrt((n * sumXX - sumX * sumX) * (n * sumYY - sumY * sumY));
return denominator === 0 ? 0 : numerator / denominator;
}
analyzeDistribution(data) {
const values = Array.isArray(data) ? data : Object.values(data);
const numericValues = values.filter(val => typeof val === 'number');
if (numericValues.length === 0) return null;
const stats = this.calculateDescriptiveStatistics(numericValues);
// Skewness (simplified calculation)
const skewness = this.calculateSkewness(numericValues, stats.mean, stats.standard_deviation);
// Kurtosis (simplified calculation)
const kurtosis = this.calculateKurtosis(numericValues, stats.mean, stats.standard_deviation);
return {
...stats,
skewness,
kurtosis,
distribution_type: this.identifyDistributionType(stats, skewness, kurtosis)
};
}
calculateSkewness(values, mean, stdDev) {
const n = values.length;
const sum = values.reduce((sum, val) => sum + Math.pow((val - mean) / stdDev, 3), 0);
return (n / ((n - 1) * (n - 2))) * sum;
}
calculateKurtosis(values, mean, stdDev) {
const n = values.length;
const sum = values.reduce((sum, val) => sum + Math.pow((val - mean) / stdDev, 4), 0);
return (n * (n + 1) / ((n - 1) * (n - 2) * (n - 3))) * sum - (3 * (n - 1) * (n - 1) / ((n - 2) * (n - 3)));
}
identifyDistributionType(stats, skewness, kurtosis) {
if (Math.abs(skewness) < 0.5 && Math.abs(kurtosis) < 0.5) {
return 'normal';
} else if (skewness > 1) {
return 'right_skewed';
} else if (skewness < -1) {
return 'left_skewed';
} else if (kurtosis > 1) {
return 'heavy_tailed';
} else {
return 'irregular';
}
}
// Additional utility methods for completeness
updateAverageProcessingTime(processingTime) {
const totalAnalyses = this.stats.totalAnalyses + 1;
this.stats.averageProcessingTime =
(this.stats.averageProcessingTime * this.stats.totalAnalyses + processingTime) / totalAnalyses;
this.stats.totalAnalyses = totalAnalyses;
}
generateDashboardOverview() {
return {
models_active: this.statisticalModels.size,
predictions_today: this.stats.predictionsGenerated,
anomalies_detected: this.stats.anomaliesDetected,
patterns_discovered: this.stats.patternsDiscovered,
reports_generated: this.stats.reportsGenerated,
nlp_processed: this.stats.nlpProcessed,
system_status: 'operational',
last_updated: new Date().toISOString()
};
}
getRecentInsights() {
const recentInsights = Array.from(this.insights.values())
.sort((a, b) => new Date(b.created_at) - new Date(a.created_at))
.slice(0, 10);
return recentInsights;
}
getRealTimeStatus() {
return {
active_streams: this.realTimeData.size,
total_data_points: Array.from(this.realTimeData.values())
.reduce((sum, stream) => sum + stream.data_points.length, 0),
active_alerts: Array.from(this.realTimeData.values())
.reduce((sum, stream) => sum + stream.alerts.length, 0)
};
}
getTrendingPatterns() {
return Array.from(this.patterns.values())
.sort((a, b) => new Date(b.discovered_at) - new Date(a.discovered_at))
.slice(0, 5)
.map(pattern => ({
id: pattern.pattern_discovery_id,
algorithm: pattern.algorithm,
patterns_count: pattern.patterns_discovered,
discovered_at: pattern.discovered_at
}));
}
getActiveAlerts() {
const alerts = [];
this.realTimeData.forEach((stream, streamId) => {
if (stream.alerts) {
stream.alerts.forEach(alert => {
alerts.push({
stream_id: streamId,
alert_type: 'anomaly',
severity: alert.severity,
timestamp: alert.timestamp || new Date().toISOString()
});
});
}
});
return alerts.sort((a, b) => new Date(b.timestamp) - new Date(a.timestamp)).slice(0, 10);
}
getNLPStatistics() {
return {
total_processed: this.stats.nlpProcessed,
average_processing_time: this.stats.averageProcessingTime,
classifiers_trained: 'intent_classifier',
supported_languages: ['english'],
features: [
'sentiment_analysis',
'intent_classification',
'entity_extraction',
'key_phrase_extraction',
'n_gram_analysis',
'text_mining'
]
};
}
start() {
this.app.listen(this.port, () => {
console.log('\nš§ ================================');
console.log('š¤ CBD AI-Powered Analytics Engine');
console.log('š§ ================================');
console.log(`š Server running on port ${this.port}`);
console.log('š Health Check: http://localhost:' + this.port + '/health');
console.log('š Analytics Dashboard: http://localhost:' + this.port + '/api/analytics/dashboard');
console.log('š® Statistical Prediction: POST http://localhost:' + this.port + '/api/analytics/predict');
console.log('š Forecasting: POST http://localhost:' + this.port + '/api/analytics/forecast');
console.log('šØ Anomaly Detection: POST http://localhost:' + this.port + '/api/analytics/anomaly-detection');
console.log('š NLP Analysis: POST http://localhost:' + this.port + '/api/nlp/analyze');
console.log('š Sentiment Analysis: POST http://localhost:' + this.port + '/api/nlp/sentiment');
console.log('š Pattern Discovery: POST http://localhost:' + this.port + '/api/analytics/pattern-discovery');
console.log('š” Recommendations: POST http://localhost:' + this.port + '/api/recommendations/generate');
console.log('š Report Generation: POST http://localhost:' + this.port + '/api/reports/generate');
console.log('ā” Real-time Analytics: POST http://localhost:' + this.port + '/api/realtime/stream');
console.log('š Text Mining: POST http://localhost:' + this.port + '/api/nlp/text-mining');
console.log('š Statistical Analysis: POST http://localhost:' + this.port + '/api/analytics/statistical-analysis');
console.log('\nš§ AI Analytics Features:');
console.log(' ā
Natural Language Processing');
console.log(' ā
Statistical Predictive Analytics');
console.log(' ā
Real-time Anomaly Detection');
console.log(' ā
Sentiment Analysis & Text Mining');
console.log(' ā
Automatic Pattern Recognition');