UNPKG

semantic-ds-toolkit

Version:

Performance-first semantic layer for modern data stacks - Stable Column Anchors & intelligent inference

310 lines (304 loc) 13.2 kB
"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); Object.defineProperty(exports, "__esModule", { value: true }); exports.GitHubBot = void 0; const rest_1 = require("@octokit/rest"); const pr_analyzer_1 = require("./pr-analyzer"); const suggestion_engine_1 = require("./suggestion-engine"); const evidence_logger_1 = require("./evidence-logger"); const anchor_store_1 = require("../core/anchor-store"); class GitHubBot { octokit; prAnalyzer; suggestionEngine; evidenceLogger; anchorStore; constructor(config) { this.octokit = new rest_1.Octokit({ auth: config.githubToken, baseUrl: config.baseUrl }); this.anchorStore = new anchor_store_1.AnchorStoreManager(config.storePath); this.prAnalyzer = new pr_analyzer_1.PRAnalyzer(); this.suggestionEngine = new suggestion_engine_1.SuggestionEngine(this.anchorStore); this.evidenceLogger = new evidence_logger_1.EvidenceLogger(); } async handlePullRequest(context) { const startTime = Date.now(); try { const prData = await this.getPRData(context); const analysis = await this.prAnalyzer.analyzePR(prData); if (analysis.hasSemanticChanges) { const suggestions = await this.suggestionEngine.generateSuggestions(analysis); const comment = this.formatPRComment(context, suggestions, analysis); await this.postComment(context, comment); await this.evidenceLogger.logAnalysis({ pr_context: context, analysis_result: analysis, suggestions: suggestions, processing_time_ms: Date.now() - startTime, timestamp: new Date().toISOString() }); } } catch (error) { console.error('Error handling PR:', error); await this.evidenceLogger.logError({ pr_context: context, error: error instanceof Error ? error.message : String(error), timestamp: new Date().toISOString() }); } } async handleQuickAccept(context, suggestionIds) { try { // Attempt to accept by in-memory suggestions first; if missing, load from evidence logs const logger = this.evidenceLogger; const latest = await logger.getLatestSuggestionsForPR(context); if (suggestionIds.length === 1 && (suggestionIds[0] === 'all' || suggestionIds[0].startsWith('accept_all_'))) { if (latest && latest.length > 0) { for (const m of latest) { await this.suggestionEngine.acceptSuggestionByData(m); } } } else { for (const suggestionId of suggestionIds) { const acceptedInMemory = await this.suggestionEngine.acceptSuggestion(suggestionId); if (!acceptedInMemory && latest) { const match = latest.find((m) => m.id === suggestionId); if (match) { await this.suggestionEngine.acceptSuggestionByData(match); } } } } const comment = this.formatAcceptanceComment(suggestionIds); await this.postComment(context, comment); await this.evidenceLogger.logAcceptance({ pr_context: context, accepted_suggestions: suggestionIds, timestamp: new Date().toISOString() }); } catch (error) { console.error('Error handling quick accept:', error); await this.evidenceLogger.logError({ pr_context: context, error: error instanceof Error ? error.message : String(error), timestamp: new Date().toISOString() }); } } async getPRData(context) { const [pr, diff, files] = await Promise.all([ this.octokit.pulls.get({ owner: context.owner, repo: context.repo, pull_number: context.pull_number }), this.octokit.pulls.get({ owner: context.owner, repo: context.repo, pull_number: context.pull_number, mediaType: { format: 'diff' } }), this.octokit.pulls.listFiles({ owner: context.owner, repo: context.repo, pull_number: context.pull_number }) ]); return { pr: pr.data, diff: typeof diff.data === 'string' ? diff.data : '', files: files.data }; } formatPRComment(context, suggestions, analysis) { const { newMappings, driftDetections, healthMetrics } = suggestions; let body = `## 🔬 Semantic Analysis for PR #${analysis.prNumber || analysis.pr?.number || 'N/A'}\n\n`; if (newMappings && newMappings.length > 0) { body += `### ✅ Suggested Mappings (${newMappings.length} new)\n`; newMappings.forEach((mapping) => { const workflowUrl = `https://github.com/${context.owner}/${context.repo}/actions/workflows/semantic-bot.yml`; const acceptLink = `[Accept](${workflowUrl})`; const command = ` (comment: \`/semantic accept ${mapping.id}\`)`; body += `- \`${mapping.column}\` → \`${mapping.semantic_type}\` (${Math.round(mapping.confidence * 100)}% confidence) ${acceptLink}${command}\n`; }); body += '\n'; } if (driftDetections && driftDetections.length > 0) { body += `### ⚠️ Drift Detected\n`; driftDetections.forEach((drift) => { body += `- \`${drift.column}\`: ${drift.description}\n`; body += `- Action: Update \`/semantics/${drift.file}\`\n`; }); body += '\n'; } if (healthMetrics) { body += `### 📊 Semantic Health\n`; body += `- Coverage: ${Math.round(healthMetrics.coverage * 100)}%\n`; body += `- Drift Risk: ${healthMetrics.driftRisk}\n`; body += `- Quality Score: ${Math.round(healthMetrics.qualityScore * 100)}/100\n\n`; } const acceptAllId = suggestions.acceptAllId || 'all'; const workflowUrl = `https://github.com/${context.owner}/${context.repo}/actions/workflows/semantic-bot.yml`; body += `[Accept All](${workflowUrl}) (comment: \`/semantic accept-all\`) `; body += `[Review](https://github.com/owner/repo/blob/main/semantics/) `; body += `[Docs](https://docs.semantic-toolkit.org/quick-start)\n\n`; body += `---\n*🤖 Generated by [Semantic Data Science Toolkit](https://semantic-toolkit.org) in ${analysis.processingTime || '<5'}s*`; return { body }; } formatAcceptanceComment(suggestionIds) { const count = suggestionIds.length; const body = `✅ **Quick Accept Complete**\n\n` + `Accepted ${count} semantic mapping${count > 1 ? 's' : ''}. ` + `Mappings have been added to the semantic store.\n\n` + `*🤖 Generated by [Semantic Data Science Toolkit](https://semantic-toolkit.org)*`; return { body }; } async postComment(context, comment) { await this.octokit.issues.createComment({ owner: context.owner, repo: context.repo, issue_number: context.pull_number, body: comment.body }); } async getRepositorySemantics(owner, repo) { try { const { data } = await this.octokit.repos.getContent({ owner, repo, path: 'semantics' }); if (Array.isArray(data)) { const semanticFiles = data.filter(item => item.type === 'file' && item.name.endsWith('.yml')); const contents = await Promise.all(semanticFiles.map(async (file) => { const fileData = await this.octokit.repos.getContent({ owner, repo, path: file.path }); return { name: file.name, content: 'content' in fileData.data ? Buffer.from(fileData.data.content, 'base64').toString() : '' }; })); return contents; } } catch (error) { console.log('No semantics directory found, starting fresh'); return []; } } async createSemanticPR(owner, repo, mappings, baseBranch = 'main') { const branchName = `semantic-mappings-${Date.now()}`; const { data: baseRef } = await this.octokit.git.getRef({ owner, repo, ref: `heads/${baseBranch}` }); await this.octokit.git.createRef({ owner, repo, ref: `refs/heads/${branchName}`, sha: baseRef.object.sha }); for (const mapping of mappings) { const content = this.generateSemanticYAML(mapping); await this.octokit.repos.createOrUpdateFileContents({ owner, repo, path: `semantics/${mapping.dataset}.yml`, message: `Add semantic mappings for ${mapping.dataset}`, content: Buffer.from(content).toString('base64'), branch: branchName }); } const { data: pr } = await this.octokit.pulls.create({ owner, repo, title: `🔬 Add semantic mappings (${mappings.length} datasets)`, head: branchName, base: baseBranch, body: this.generateSemanticPRBody(mappings) }); return pr.number; } generateSemanticYAML(mapping) { return `# Semantic mappings for ${mapping.dataset} dataset: ${mapping.dataset} version: "1.0" mappings: ${mapping.columns.map((col) => ` - column: ${col.name} semantic_type: ${col.semantic_type} confidence: ${col.confidence} anchor_id: ${col.anchor_id || 'auto-generated'}`).join('\n')} generated_by: semantic-data-science-toolkit generated_at: ${new Date().toISOString()} `; } generateSemanticPRBody(mappings) { const totalColumns = mappings.reduce((sum, m) => sum + m.columns.length, 0); return `## 🔬 Semantic Mappings Auto-Generated This PR adds semantic type mappings for **${totalColumns} columns** across **${mappings.length} dataset(s)**. ### Datasets Updated: ${mappings.map(m => `- \`${m.dataset}\` (${m.columns.length} columns)`).join('\n')} ### Benefits: - ✅ Enables automatic schema evolution tracking - ✅ Improves data discovery and lineage - ✅ Reduces manual mapping overhead - ✅ Provides semantic consistency across pipelines ### Next Steps: 1. Review the proposed mappings 2. Merge to activate semantic tracking 3. Future PRs will include drift detection and suggestions --- *🤖 Generated by [Semantic Data Science Toolkit](https://semantic-toolkit.org)*`; } async validateWebhookSignature(payload, signature, secret) { const crypto = await Promise.resolve().then(() => __importStar(require('crypto'))); const hmac = crypto.createHmac('sha256', secret); hmac.update(payload); const expectedSignature = `sha256=${hmac.digest('hex')}`; return crypto.timingSafeEqual(Buffer.from(signature), Buffer.from(expectedSignature)); } } exports.GitHubBot = GitHubBot; //# sourceMappingURL=github-bot.js.map