datapilot-cli
Version:
Enterprise-grade streaming multi-format data analysis with comprehensive statistical insights and intelligent relationship detection - supports CSV, JSON, Excel, TSV, Parquet - memory-efficient, cross-platform
337 lines (292 loc) β’ 14.6 kB
JavaScript
;
/**
* Rich Output Formatting for Join Analysis
* Phase 1: Foundation Architecture - Comprehensive result formatting
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.JoinFormatter = void 0;
const types_1 = require("./types");
class JoinFormatter {
/**
* Format join analysis results according to specified format
*/
format(result, format) {
switch (format.type) {
case 'MARKDOWN':
return this.formatMarkdown(result);
case 'JSON':
return this.formatJSON(result);
case 'SQL':
return this.formatSQL(result);
case 'DIAGRAM':
return this.formatDiagram(result);
default:
return this.formatMarkdown(result);
}
}
/**
* Format as comprehensive markdown report
*/
formatMarkdown(result) {
const sections = [
this.formatMarkdownHeader(result),
this.formatMarkdownSummary(result),
this.formatMarkdownJoinCandidates(result),
this.formatMarkdownDependencyGraph(result),
this.formatMarkdownIntegrityReport(result),
this.formatMarkdownBusinessRules(result),
this.formatMarkdownRecommendations(result),
this.formatMarkdownPerformance(result)
];
return sections.join('\n\n');
}
formatMarkdownHeader(result) {
return `# DataPilot Join Analysis Report
**Generated:** ${new Date().toISOString()}
**Analysis Duration:** ${result.summary.analysisTime}ms
**Tables Analyzed:** ${result.summary.tablesAnalyzed}
**Total Records:** ${result.summary.totalRows.toLocaleString()}
---`;
}
formatMarkdownSummary(result) {
const summary = result.summary;
return `## π Executive Summary
| Metric | Value |
|--------|-------|
| **Join Candidates Found** | ${summary.joinCandidatesFound} |
| **High Confidence Joins** | ${summary.highConfidenceJoins} |
| **Potential Issues** | ${summary.potentialIssues} |
| **Overall Complexity** | ${result.performance.overallComplexity} |
### Key Findings
${summary.highConfidenceJoins > 0 ?
`β
**${summary.highConfidenceJoins} high-confidence join relationships** identified` :
'β οΈ **No high-confidence joins found** - consider data quality improvements'}
${summary.potentialIssues > 0 ?
`β οΈ **${summary.potentialIssues} potential data quality issues** detected` :
'β
**No major data quality issues** identified'}
${result.businessRules.length > 0 ?
`π― **${result.businessRules.length} business relationship patterns** discovered` :
'π‘ **No clear business patterns** - manual review recommended'}`;
}
formatMarkdownJoinCandidates(result) {
if (result.candidates.length === 0) {
return `## π Join Candidates
No join candidates found with the current confidence threshold.`;
}
// Separate high-confidence and suggested joins
const highConfidenceJoins = result.candidates.filter(candidate => candidate.confidence >= 0.5);
const suggestedJoins = result.candidates.filter(candidate => candidate.confidence >= 0.3 && candidate.confidence < 0.5);
let output = `## π Join Candidates\n\n`;
// High confidence joins
if (highConfidenceJoins.length > 0) {
const candidateRows = highConfidenceJoins.map((candidate, index) => {
const confidence = `${(candidate.confidence * 100).toFixed(1)}%`;
const strategy = this.formatStrategy(candidate.strategy);
const cardinality = this.formatCardinality(candidate.cardinality);
const dataLoss = `${candidate.qualityMetrics.dataLoss.toFixed(1)}%`;
return `| ${index + 1} | \`${candidate.leftTable.tableName}\` | \`${candidate.leftColumn}\` | \`${candidate.rightTable.tableName}\` | \`${candidate.rightColumn}\` | ${confidence} | ${strategy} | ${cardinality} | ${dataLoss} |`;
}).join('\n');
output += `### β
High Confidence Joins
| # | Left Table | Left Column | Right Table | Right Column | Confidence | Strategy | Cardinality | Data Loss |
|---|------------|-------------|-------------|--------------|------------|----------|-------------|-----------|
${candidateRows}
`;
}
// Suggested joins
if (suggestedJoins.length > 0) {
const suggestedRows = suggestedJoins.map((candidate, index) => {
const confidence = `${(candidate.confidence * 100).toFixed(1)}%`;
const strategy = this.formatStrategy(candidate.strategy);
const cardinality = this.formatCardinality(candidate.cardinality);
const dataLoss = `${candidate.qualityMetrics.dataLoss.toFixed(1)}%`;
return `| ${index + 1} | \`${candidate.leftTable.tableName}\` | \`${candidate.leftColumn}\` | \`${candidate.rightTable.tableName}\` | \`${candidate.rightColumn}\` | ${confidence} | ${strategy} | ${cardinality} | ${dataLoss} |`;
}).join('\n');
output += `### π‘ Suggested Joins (Lower Confidence)
| # | Left Table | Left Column | Right Table | Right Column | Confidence | Strategy | Cardinality | Data Loss |
|---|------------|-------------|-------------|--------------|------------|----------|-------------|-----------|
${suggestedRows}
*Note: These joins have lower confidence scores but may still be useful. Consider verifying the relationships manually.*
`;
}
// Show top recommendations from all candidates
if (result.candidates.length > 0) {
output += `### Top Join Recommendations
${this.formatTopJoins(result.candidates.slice(0, 3))}`;
}
return output;
}
formatTopJoins(topCandidates) {
return topCandidates.map((candidate, index) => {
const performance = candidate.qualityMetrics.performance;
return `#### ${index + 1}. ${candidate.leftTable.tableName} β ${candidate.rightTable.tableName}
**Join Condition:** \`${candidate.leftTable.tableName}.${candidate.leftColumn} = ${candidate.rightTable.tableName}.${candidate.rightColumn}\`
**Quality Metrics:**
- **Confidence:** ${(candidate.confidence * 100).toFixed(1)}%
- **Data Loss:** ${candidate.qualityMetrics.dataLoss.toFixed(1)}%
- **Consistency:** ${candidate.qualityMetrics.consistency.toFixed(1)}%
- **Estimated Rows:** ${candidate.estimatedRows.toLocaleString()}
**Performance:**
- **Estimated Time:** ${performance.estimatedTime}ms
- **Memory Usage:** ${(performance.estimatedMemory / 1024 / 1024).toFixed(1)}MB
- **Complexity:** ${performance.complexity}
${performance.indexRecommended ? '- **β‘ Index Recommended**' : ''}`;
}).join('\n\n');
}
formatMarkdownDependencyGraph(result) {
const graph = result.dependencyGraph;
const nodeList = graph.nodes.map(node => {
const type = node.isRoot ? 'πΏ Root' : node.isLeaf ? 'π Leaf' : 'π Node';
return `- **${node.table.tableName}** (${type}, Level ${node.level})`;
}).join('\n');
const edgeList = graph.edges.map(edge => {
const strength = `${(edge.strength * 100).toFixed(1)}%`;
const columns = edge.columns.map(c => `${c.fromColumn}β${c.toColumn}`).join(', ');
return `- **${edge.from}** β **${edge.to}** (${strength} confidence, ${columns})`;
}).join('\n');
return `## πΈοΈ Table Dependency Graph
**Graph Depth:** ${graph.depth}
**Detected Cycles:** ${graph.cycles.length}
### Tables
${nodeList}
### Relationships
${edgeList}
${graph.cycles.length > 0 ? `### β οΈ Circular Dependencies
${graph.cycles.map(cycle => cycle.map(n => n.table.tableName).join(' β ')).join('\n')}` : ''}`;
}
formatMarkdownIntegrityReport(result) {
const report = result.integrityReport;
return `## π‘οΈ Data Integrity Report
**Valid Relationships:** ${report.validJoins.length}
**Broken Relationships:** ${report.brokenRelationships.length}
**Orphaned Records:** ${report.orphanedRecords.length}
${report.brokenRelationships.length > 0 ? `### β οΈ Referential Integrity Issues
${report.brokenRelationships.map(broken => `- **${broken.fromTable}.${broken.fromColumn}** β **${broken.toTable}.${broken.toColumn}**: ${broken.violationCount} violations`).join('\n')}` : '### β
No Integrity Issues Found'}
${report.recommendations.length > 0 ? `### π‘ Integrity Recommendations
${report.recommendations.map(rec => `- ${rec}`).join('\n')}` : ''}`;
}
formatMarkdownBusinessRules(result) {
if (result.businessRules.length === 0) {
return `## π― Business Rules
No business relationship patterns detected.`;
}
const rulesList = result.businessRules.map(rule => {
const confidence = `${(rule.confidence * 100).toFixed(1)}%`;
const tables = rule.tables.map(t => `\`${t}\``).join(', ');
return `### ${rule.name}
**Description:** ${rule.description}
**Confidence:** ${confidence}
**Tables:** ${tables}
**Source:** ${rule.source}
**Conditions:**
${rule.conditions.map(c => `- ${c}`).join('\n')}`;
}).join('\n\n');
return `## π― Business Rules
${rulesList}`;
}
formatMarkdownRecommendations(result) {
if (result.recommendations.length === 0) {
return `## π‘ Recommendations
No specific recommendations at this time.`;
}
const recommendationsList = result.recommendations.map(rec => {
const priority = rec.priority === 'HIGH' ? 'π΄' : rec.priority === 'MEDIUM' ? 'π‘' : 'π’';
return `### ${priority} ${rec.title}
**Type:** ${rec.type}
**Priority:** ${rec.priority}
**Estimated Effort:** ${rec.estimatedEffort}
**Description:** ${rec.description}
**Expected Impact:** ${rec.impact}
**Implementation:** ${rec.implementation}`;
}).join('\n\n');
return `## π‘ Recommendations
${recommendationsList}`;
}
formatMarkdownPerformance(result) {
const perf = result.performance;
return `## β‘ Performance Analysis
**Overall Complexity:** ${perf.overallComplexity}
### Current Capacity
- **Rows:** ${perf.scalabilityAssessment.currentCapacity.rows.toLocaleString()}
- **Size:** ${perf.scalabilityAssessment.currentCapacity.sizeGB.toFixed(2)} GB
- **Tables:** ${perf.scalabilityAssessment.currentCapacity.tables}
### Projected Capacity (10x Growth)
- **Rows:** ${perf.scalabilityAssessment.projectedCapacity.rows.toLocaleString()}
- **Size:** ${perf.scalabilityAssessment.projectedCapacity.sizeGB.toFixed(2)} GB
- **Scaling Strategy:** ${perf.scalabilityAssessment.scalingStrategy}
${perf.optimizations.length > 0 ? `### π Optimization Opportunities
${perf.optimizations.map(opt => `- **${opt.category}:** ${opt.description} (${opt.expectedImprovement})`).join('\n')}` : ''}
---
*Generated by DataPilot Join Intelligence Engine v1.0*`;
}
/**
* Format as JSON for programmatic consumption
*/
formatJSON(result) {
return JSON.stringify(result, null, 2);
}
/**
* Generate SQL join statements and optimization hints
*/
formatSQL(result) {
const sqlStatements = result.candidates.map((candidate, index) => {
const leftAlias = candidate.leftTable.tableName.substring(0, 1).toLowerCase();
const rightAlias = candidate.rightTable.tableName.substring(0, 1).toLowerCase();
let sql = `-- Join ${index + 1}: ${candidate.leftTable.tableName} β ${candidate.rightTable.tableName}\n`;
sql += `-- Confidence: ${(candidate.confidence * 100).toFixed(1)}%, Strategy: ${candidate.strategy}\n`;
sql += `SELECT *\n`;
sql += `FROM ${candidate.leftTable.tableName} ${leftAlias}\n`;
sql += ` ${this.getJoinType(candidate)} ${candidate.rightTable.tableName} ${rightAlias}\n`;
sql += ` ON ${leftAlias}.${candidate.leftColumn} = ${rightAlias}.${candidate.rightColumn};\n`;
if (candidate.qualityMetrics.performance.indexRecommended) {
sql += `\n-- Recommended indexes:\n`;
sql += `-- CREATE INDEX idx_${candidate.leftTable.tableName}_${candidate.leftColumn} ON ${candidate.leftTable.tableName}(${candidate.leftColumn});\n`;
sql += `-- CREATE INDEX idx_${candidate.rightTable.tableName}_${candidate.rightColumn} ON ${candidate.rightTable.tableName}(${candidate.rightColumn});\n`;
}
return sql;
}).join('\n\n');
return `-- DataPilot Join Analysis SQL Export
-- Generated: ${new Date().toISOString()}
${sqlStatements}`;
}
/**
* Generate visual diagram representation
*/
formatDiagram(result) {
// This would generate DOT notation for Graphviz or similar
const nodes = result.dependencyGraph.nodes.map(node => ` "${node.table.tableName}" [label="${node.table.tableName}\\n${node.table.rowCount} rows"];`).join('\n');
const edges = result.dependencyGraph.edges.map(edge => ` "${edge.from}" -> "${edge.to}" [label="${edge.columns[0]?.fromColumn || ''}"];`).join('\n');
return `digraph JoinAnalysis {
rankdir=TB;
node [shape=box, style=rounded];
${nodes}
${edges}
}`;
}
// Helper methods for formatting
formatStrategy(strategy) {
const strategyMap = {
[types_1.JoinStrategy.EXACT_MATCH]: 'π― Exact',
[types_1.JoinStrategy.FUZZY_MATCH]: 'π Fuzzy',
[types_1.JoinStrategy.SEMANTIC_MATCH]: 'π§ Semantic',
[types_1.JoinStrategy.PATTERN_MATCH]: 'π€ Pattern',
[types_1.JoinStrategy.RANGE_OVERLAP]: 'π Range',
[types_1.JoinStrategy.STATISTICAL_MATCH]: 'π Statistical'
};
return strategyMap[strategy] || strategy;
}
formatCardinality(cardinality) {
const cardinalityMap = {
[types_1.CardinalityType.ONE_TO_ONE]: '1:1',
[types_1.CardinalityType.ONE_TO_MANY]: '1:N',
[types_1.CardinalityType.MANY_TO_ONE]: 'N:1',
[types_1.CardinalityType.MANY_TO_MANY]: 'N:M'
};
return cardinalityMap[cardinality] || cardinality;
}
getJoinType(candidate) {
// Default to INNER JOIN, could be enhanced with logic to determine optimal join type
return 'INNER JOIN';
}
}
exports.JoinFormatter = JoinFormatter;
//# sourceMappingURL=join-formatter.js.map