UNPKG

datapilot-cli

Version:

Enterprise-grade streaming multi-format data analysis with comprehensive statistical insights and intelligent relationship detection - supports CSV, JSON, Excel, TSV, Parquet - memory-efficient, cross-platform

github.com/Mrassimo/datapilot

Mrassimo/datapilot

337 lines (292 loc) • 14.6 kB

JavaScript

"use strict"; /** * Rich Output Formatting for Join Analysis * Phase 1: Foundation Architecture - Comprehensive result formatting */ Object.defineProperty(exports, "__esModule", { value: true }); exports.JoinFormatter = void 0; const types_1 = require("./types"); class JoinFormatter { /** * Format join analysis results according to specified format */ format(result, format) { switch (format.type) { case 'MARKDOWN': return this.formatMarkdown(result); case 'JSON': return this.formatJSON(result); case 'SQL': return this.formatSQL(result); case 'DIAGRAM': return this.formatDiagram(result); default: return this.formatMarkdown(result); } } /** * Format as comprehensive markdown report */ formatMarkdown(result) { const sections = [ this.formatMarkdownHeader(result), this.formatMarkdownSummary(result), this.formatMarkdownJoinCandidates(result), this.formatMarkdownDependencyGraph(result), this.formatMarkdownIntegrityReport(result), this.formatMarkdownBusinessRules(result), this.formatMarkdownRecommendations(result), this.formatMarkdownPerformance(result) ]; return sections.join('\n\n'); } formatMarkdownHeader(result) { return `# DataPilot Join Analysis Report **Generated:** ${new Date().toISOString()} **Analysis Duration:** ${result.summary.analysisTime}ms **Tables Analyzed:** ${result.summary.tablesAnalyzed} **Total Records:** ${result.summary.totalRows.toLocaleString()} ---`; } formatMarkdownSummary(result) { const summary = result.summary; return `## 📊 Executive Summary | Metric | Value | |--------|-------| | **Join Candidates Found** | ${summary.joinCandidatesFound} | | **High Confidence Joins** | ${summary.highConfidenceJoins} | | **Potential Issues** | ${summary.potentialIssues} | | **Overall Complexity** | ${result.performance.overallComplexity} | ### Key Findings ${summary.highConfidenceJoins > 0 ? `✅ **${summary.highConfidenceJoins} high-confidence join relationships** identified` : '⚠️ **No high-confidence joins found** - consider data quality improvements'} ${summary.potentialIssues > 0 ? `⚠️ **${summary.potentialIssues} potential data quality issues** detected` : '✅ **No major data quality issues** identified'} ${result.businessRules.length > 0 ? `🎯 **${result.businessRules.length} business relationship patterns** discovered` : '💡 **No clear business patterns** - manual review recommended'}`; } formatMarkdownJoinCandidates(result) { if (result.candidates.length === 0) { return `## 🔗 Join Candidates No join candidates found with the current confidence threshold.`; } // Separate high-confidence and suggested joins const highConfidenceJoins = result.candidates.filter(candidate => candidate.confidence >= 0.5); const suggestedJoins = result.candidates.filter(candidate => candidate.confidence >= 0.3 && candidate.confidence < 0.5); let output = `## 🔗 Join Candidates\n\n`; // High confidence joins if (highConfidenceJoins.length > 0) { const candidateRows = highConfidenceJoins.map((candidate, index) => { const confidence = `${(candidate.confidence * 100).toFixed(1)}%`; const strategy = this.formatStrategy(candidate.strategy); const cardinality = this.formatCardinality(candidate.cardinality); const dataLoss = `${candidate.qualityMetrics.dataLoss.toFixed(1)}%`; return `| ${index + 1} | \`${candidate.leftTable.tableName}\` | \`${candidate.leftColumn}\` | \`${candidate.rightTable.tableName}\` | \`${candidate.rightColumn}\` | ${confidence} | ${strategy} | ${cardinality} | ${dataLoss} |`; }).join('\n'); output += `### ✅ High Confidence Joins | # | Left Table | Left Column | Right Table | Right Column | Confidence | Strategy | Cardinality | Data Loss | |---|------------|-------------|-------------|--------------|------------|----------|-------------|-----------| ${candidateRows} `; } // Suggested joins if (suggestedJoins.length > 0) { const suggestedRows = suggestedJoins.map((candidate, index) => { const confidence = `${(candidate.confidence * 100).toFixed(1)}%`; const strategy = this.formatStrategy(candidate.strategy); const cardinality = this.formatCardinality(candidate.cardinality); const dataLoss = `${candidate.qualityMetrics.dataLoss.toFixed(1)}%`; return `| ${index + 1} | \`${candidate.leftTable.tableName}\` | \`${candidate.leftColumn}\` | \`${candidate.rightTable.tableName}\` | \`${candidate.rightColumn}\` | ${confidence} | ${strategy} | ${cardinality} | ${dataLoss} |`; }).join('\n'); output += `### 💡 Suggested Joins (Lower Confidence) | # | Left Table | Left Column | Right Table | Right Column | Confidence | Strategy | Cardinality | Data Loss | |---|------------|-------------|-------------|--------------|------------|----------|-------------|-----------| ${suggestedRows} *Note: These joins have lower confidence scores but may still be useful. Consider verifying the relationships manually.* `; } // Show top recommendations from all candidates if (result.candidates.length > 0) { output += `### Top Join Recommendations ${this.formatTopJoins(result.candidates.slice(0, 3))}`; } return output; } formatTopJoins(topCandidates) { return topCandidates.map((candidate, index) => { const performance = candidate.qualityMetrics.performance; return `#### ${index + 1}. ${candidate.leftTable.tableName} ↔ ${candidate.rightTable.tableName} **Join Condition:** \`${candidate.leftTable.tableName}.${candidate.leftColumn} = ${candidate.rightTable.tableName}.${candidate.rightColumn}\` **Quality Metrics:** - **Confidence:** ${(candidate.confidence * 100).toFixed(1)}% - **Data Loss:** ${candidate.qualityMetrics.dataLoss.toFixed(1)}% - **Consistency:** ${candidate.qualityMetrics.consistency.toFixed(1)}% - **Estimated Rows:** ${candidate.estimatedRows.toLocaleString()} **Performance:** - **Estimated Time:** ${performance.estimatedTime}ms - **Memory Usage:** ${(performance.estimatedMemory / 1024 / 1024).toFixed(1)}MB - **Complexity:** ${performance.complexity} ${performance.indexRecommended ? '- **⚡ Index Recommended**' : ''}`; }).join('\n\n'); } formatMarkdownDependencyGraph(result) { const graph = result.dependencyGraph; const nodeList = graph.nodes.map(node => { const type = node.isRoot ? '🌿 Root' : node.isLeaf ? '🍃 Leaf' : '🔗 Node'; return `- **${node.table.tableName}** (${type}, Level ${node.level})`; }).join('\n'); const edgeList = graph.edges.map(edge => { const strength = `${(edge.strength * 100).toFixed(1)}%`; const columns = edge.columns.map(c => `${c.fromColumn}→${c.toColumn}`).join(', '); return `- **${edge.from}** → **${edge.to}** (${strength} confidence, ${columns})`; }).join('\n'); return `## 🕸️ Table Dependency Graph **Graph Depth:** ${graph.depth} **Detected Cycles:** ${graph.cycles.length} ### Tables ${nodeList} ### Relationships ${edgeList} ${graph.cycles.length > 0 ? `### ⚠️ Circular Dependencies ${graph.cycles.map(cycle => cycle.map(n => n.table.tableName).join(' → ')).join('\n')}` : ''}`; } formatMarkdownIntegrityReport(result) { const report = result.integrityReport; return `## 🛡️ Data Integrity Report **Valid Relationships:** ${report.validJoins.length} **Broken Relationships:** ${report.brokenRelationships.length} **Orphaned Records:** ${report.orphanedRecords.length} ${report.brokenRelationships.length > 0 ? `### ⚠️ Referential Integrity Issues ${report.brokenRelationships.map(broken => `- **${broken.fromTable}.${broken.fromColumn}** → **${broken.toTable}.${broken.toColumn}**: ${broken.violationCount} violations`).join('\n')}` : '### ✅ No Integrity Issues Found'} ${report.recommendations.length > 0 ? `### 💡 Integrity Recommendations ${report.recommendations.map(rec => `- ${rec}`).join('\n')}` : ''}`; } formatMarkdownBusinessRules(result) { if (result.businessRules.length === 0) { return `## 🎯 Business Rules No business relationship patterns detected.`; } const rulesList = result.businessRules.map(rule => { const confidence = `${(rule.confidence * 100).toFixed(1)}%`; const tables = rule.tables.map(t => `\`${t}\``).join(', '); return `### ${rule.name} **Description:** ${rule.description} **Confidence:** ${confidence} **Tables:** ${tables} **Source:** ${rule.source} **Conditions:** ${rule.conditions.map(c => `- ${c}`).join('\n')}`; }).join('\n\n'); return `## 🎯 Business Rules ${rulesList}`; } formatMarkdownRecommendations(result) { if (result.recommendations.length === 0) { return `## 💡 Recommendations No specific recommendations at this time.`; } const recommendationsList = result.recommendations.map(rec => { const priority = rec.priority === 'HIGH' ? '🔴' : rec.priority === 'MEDIUM' ? '🟡' : '🟢'; return `### ${priority} ${rec.title} **Type:** ${rec.type} **Priority:** ${rec.priority} **Estimated Effort:** ${rec.estimatedEffort} **Description:** ${rec.description} **Expected Impact:** ${rec.impact} **Implementation:** ${rec.implementation}`; }).join('\n\n'); return `## 💡 Recommendations ${recommendationsList}`; } formatMarkdownPerformance(result) { const perf = result.performance; return `## ⚡ Performance Analysis **Overall Complexity:** ${perf.overallComplexity} ### Current Capacity - **Rows:** ${perf.scalabilityAssessment.currentCapacity.rows.toLocaleString()} - **Size:** ${perf.scalabilityAssessment.currentCapacity.sizeGB.toFixed(2)} GB - **Tables:** ${perf.scalabilityAssessment.currentCapacity.tables} ### Projected Capacity (10x Growth) - **Rows:** ${perf.scalabilityAssessment.projectedCapacity.rows.toLocaleString()} - **Size:** ${perf.scalabilityAssessment.projectedCapacity.sizeGB.toFixed(2)} GB - **Scaling Strategy:** ${perf.scalabilityAssessment.scalingStrategy} ${perf.optimizations.length > 0 ? `### 🚀 Optimization Opportunities ${perf.optimizations.map(opt => `- **${opt.category}:** ${opt.description} (${opt.expectedImprovement})`).join('\n')}` : ''} --- *Generated by DataPilot Join Intelligence Engine v1.0*`; } /** * Format as JSON for programmatic consumption */ formatJSON(result) { return JSON.stringify(result, null, 2); } /** * Generate SQL join statements and optimization hints */ formatSQL(result) { const sqlStatements = result.candidates.map((candidate, index) => { const leftAlias = candidate.leftTable.tableName.substring(0, 1).toLowerCase(); const rightAlias = candidate.rightTable.tableName.substring(0, 1).toLowerCase(); let sql = `-- Join ${index + 1}: ${candidate.leftTable.tableName} ↔ ${candidate.rightTable.tableName}\n`; sql += `-- Confidence: ${(candidate.confidence * 100).toFixed(1)}%, Strategy: ${candidate.strategy}\n`; sql += `SELECT *\n`; sql += `FROM ${candidate.leftTable.tableName} ${leftAlias}\n`; sql += ` ${this.getJoinType(candidate)} ${candidate.rightTable.tableName} ${rightAlias}\n`; sql += ` ON ${leftAlias}.${candidate.leftColumn} = ${rightAlias}.${candidate.rightColumn};\n`; if (candidate.qualityMetrics.performance.indexRecommended) { sql += `\n-- Recommended indexes:\n`; sql += `-- CREATE INDEX idx_${candidate.leftTable.tableName}_${candidate.leftColumn} ON ${candidate.leftTable.tableName}(${candidate.leftColumn});\n`; sql += `-- CREATE INDEX idx_${candidate.rightTable.tableName}_${candidate.rightColumn} ON ${candidate.rightTable.tableName}(${candidate.rightColumn});\n`; } return sql; }).join('\n\n'); return `-- DataPilot Join Analysis SQL Export -- Generated: ${new Date().toISOString()} ${sqlStatements}`; } /** * Generate visual diagram representation */ formatDiagram(result) { // This would generate DOT notation for Graphviz or similar const nodes = result.dependencyGraph.nodes.map(node => ` "${node.table.tableName}" [label="${node.table.tableName}\\n${node.table.rowCount} rows"];`).join('\n'); const edges = result.dependencyGraph.edges.map(edge => ` "${edge.from}" -> "${edge.to}" [label="${edge.columns[0]?.fromColumn || ''}"];`).join('\n'); return `digraph JoinAnalysis { rankdir=TB; node [shape=box, style=rounded]; ${nodes} ${edges} }`; } // Helper methods for formatting formatStrategy(strategy) { const strategyMap = { [types_1.JoinStrategy.EXACT_MATCH]: '🎯 Exact', [types_1.JoinStrategy.FUZZY_MATCH]: '🔍 Fuzzy', [types_1.JoinStrategy.SEMANTIC_MATCH]: '🧠 Semantic', [types_1.JoinStrategy.PATTERN_MATCH]: '🔤 Pattern', [types_1.JoinStrategy.RANGE_OVERLAP]: '📊 Range', [types_1.JoinStrategy.STATISTICAL_MATCH]: '📈 Statistical' }; return strategyMap[strategy] || strategy; } formatCardinality(cardinality) { const cardinalityMap = { [types_1.CardinalityType.ONE_TO_ONE]: '1:1', [types_1.CardinalityType.ONE_TO_MANY]: '1:N', [types_1.CardinalityType.MANY_TO_ONE]: 'N:1', [types_1.CardinalityType.MANY_TO_MANY]: 'N:M' }; return cardinalityMap[cardinality] || cardinality; } getJoinType(candidate) { // Default to INNER JOIN, could be enhanced with logic to determine optimal join type return 'INNER JOIN'; } } exports.JoinFormatter = JoinFormatter; //# sourceMappingURL=join-formatter.js.map