UNPKG

sql-talk

Version:

SQL Talk - 自然言語をSQLに変換するMCPサーバー(安全性保護・SSHトンネル対応) / SQL Talk - MCP Server for Natural Language to SQL conversion with safety guards and SSH tunnel support

477 lines 20.6 kB
import { readFileSync } from 'fs'; import { resolve } from 'path'; import { parse as parseYaml } from 'yaml'; import { configManager } from '../core/config.js'; import { logger } from '../core/logger.js'; import { piiMasker } from '../security/pii-masker.js'; export class CommentGenerator { dictionaries = []; style = 'concise'; constructor() { this.loadDictionaries(); } loadDictionaries() { try { const config = configManager.getConfig(); this.style = config.comment_infer.style; for (const dictPath of config.comment_infer.dictionary_paths) { try { const fullPath = resolve(process.cwd(), dictPath); const content = readFileSync(fullPath, 'utf-8'); const dict = parseYaml(content); this.dictionaries.push(dict); logger.info(`Loaded dictionary: ${dictPath}`); } catch (error) { logger.warn(`Failed to load dictionary ${dictPath}:`, error); } } logger.info(`Comment generator initialized with ${this.dictionaries.length} dictionaries`); } catch (error) { logger.error('Failed to initialize comment generator:', error); // Use default empty dictionaries this.dictionaries = []; } } generateTableComments(tables) { const proposals = []; for (const table of tables) { if (!table.table_comment) { const comment = this.inferTableComment(table); if (comment) { proposals.push({ object: 'table', fqdn: `${table.schema}.${table.table}`, before: table.table_comment, after: comment, rationale: this.getTableCommentRationale(table), pii: this.containsPII(table.table) }); } } } return proposals; } generateColumnComments(tables) { const proposals = []; for (const table of tables) { for (const column of table.columns) { if (!column.comment) { const comment = this.inferColumnComment(column, table); if (comment) { proposals.push({ object: 'column', fqdn: `${table.schema}.${table.table}.${column.name}`, before: column.comment, after: comment, rationale: this.getColumnCommentRationale(column, table), pii: piiMasker.isPIIColumn(column.name) }); } } } } return proposals; } inferTableComment(table) { const tableName = table.table.toLowerCase(); // Check for direct translations for (const dict of this.dictionaries) { if (dict.synonym) { for (const [key, value] of Object.entries(dict.synonym)) { if (tableName.includes(key.toLowerCase())) { return this.formatTableComment(value, table); } } } } // Tokenize and translate parts const tokens = this.tokenizeIdentifier(tableName); const translatedTokens = tokens.map(token => this.translateToken(token) || this.translateCommonWord(token)); if (translatedTokens.some(token => token !== null)) { const comment = translatedTokens .map(token => token || '') .filter(token => token.length > 0) .join(''); return this.formatTableComment(comment, table); } // Fallback: try to translate the whole table name const commonTranslation = this.translateCommonWord(tableName); if (commonTranslation) { return this.formatTableComment(commonTranslation, table); } // Handle common table name patterns if (tableName.includes('user') || tableName.includes('member')) { return this.formatTableComment('ユーザー', table); } if (tableName.includes('order')) { return this.formatTableComment('注文', table); } if (tableName.includes('product') || tableName.includes('item')) { return this.formatTableComment('商品', table); } if (tableName.includes('category')) { return this.formatTableComment('カテゴリ', table); } if (tableName.includes('partner') || tableName.includes('client') || tableName.includes('customer')) { return this.formatTableComment('取引先', table); } if (tableName.includes('company') || tableName.includes('business')) { return this.formatTableComment('会社', table); } // Last resort: generate a generic comment return this.formatTableComment('データ', table); } inferColumnComment(column, table) { const columnName = column.name.toLowerCase(); // Check for direct translations for (const dict of this.dictionaries) { if (dict.synonym) { for (const [key, value] of Object.entries(dict.synonym)) { if (columnName.includes(key.toLowerCase())) { return this.formatColumnComment(value, column, table); } } } } // Check abbreviations for (const dict of this.dictionaries) { if (dict.abbr) { for (const [abbr, expansion] of Object.entries(dict.abbr)) { if (columnName === abbr.toLowerCase() || columnName.endsWith('_' + abbr.toLowerCase())) { return this.formatColumnComment(expansion, column, table); } } } } // Handle common patterns first (more specific patterns) if (columnName.endsWith('_id') || columnName.endsWith('id')) { const entityName = columnName.replace(/_?id$/, ''); const translatedEntity = this.translateToken(entityName) || this.translateCommonWord(entityName); if (translatedEntity) { return this.formatColumnComment(`${translatedEntity}ID`, column, table); } else { return this.formatColumnComment('ID', column, table); } } if (columnName.includes('date') || columnName.includes('dt')) { return this.formatColumnComment('日付', column, table); } if (columnName.includes('time') || columnName.includes('tm')) { return this.formatColumnComment('時刻', column, table); } if (columnName.includes('flag') || columnName.includes('flg')) { return this.formatColumnComment('フラグ', column, table); } // Tokenize and translate const tokens = this.tokenizeIdentifier(columnName); const translatedTokens = tokens.map(token => this.translateToken(token) || this.translateCommonWord(token)); if (translatedTokens.some(token => token !== null)) { const baseComment = translatedTokens .map(token => token || '') .filter(token => token.length > 0) .join(''); return this.formatColumnComment(baseComment, column, table); } // Fallback: try to translate the whole column name as common words const commonTranslation = this.translateCommonWord(columnName); if (commonTranslation) { return this.formatColumnComment(commonTranslation, column, table); } // Last resort: generate basic comment based on data type const typeBasedComment = this.getBasicTypeComment(column.type); if (typeBasedComment) { return this.formatColumnComment(typeBasedComment, column, table); } return null; } tokenizeIdentifier(identifier) { // Split on underscores, camelCase, and numbers return identifier .replace(/([a-z])([A-Z])/g, '$1_$2') // camelCase to snake_case .replace(/([a-zA-Z])(\d)/g, '$1_$2') // letter followed by digit .replace(/(\d)([a-zA-Z])/g, '$1_$2') // digit followed by letter .toLowerCase() .split(/[_\-\s]+/) .filter(token => token.length > 0); } translateToken(token) { // Check abbreviations first for (const dict of this.dictionaries) { if (dict.abbr && dict.abbr[token]) { return dict.abbr[token]; } } // Check synonyms for (const dict of this.dictionaries) { if (dict.synonym) { for (const [key, value] of Object.entries(dict.synonym)) { if (key.toLowerCase() === token.toLowerCase()) { return value; } } } } return null; } formatTableComment(baseComment, table) { let comment = baseComment; // Add table suffix if not present if (!comment.includes('テーブル') && !comment.includes('表')) { if (this.style === 'verbose') { comment += 'テーブル'; } } // Add descriptive information if (this.style === 'verbose') { const columnCount = table.columns.length; comment += `(${columnCount}列)`; } return comment; } formatColumnComment(baseComment, column, table) { let comment = baseComment; // Add type information if (this.style === 'verbose') { const typeInfo = this.getTypeDescription(column.type); if (typeInfo) { comment += `(${typeInfo})`; } } else { // Concise style - just add units or format hints const styleInfo = this.getStyleInfo(column.type, column.name); if (styleInfo) { comment += styleInfo; } } return comment; } getTypeDescription(dataType) { const lowerType = dataType.toLowerCase(); if (lowerType.includes('int') || lowerType.includes('number') || lowerType.includes('numeric')) { return '数値'; } if (lowerType.includes('varchar') || lowerType.includes('text') || lowerType.includes('char')) { return '文字列'; } if (lowerType.includes('date')) { return '日付'; } if (lowerType.includes('time')) { return '時刻'; } if (lowerType.includes('bool')) { return '真偽値'; } return null; } getStyleInfo(dataType, columnName) { const lowerType = dataType.toLowerCase(); const lowerName = columnName.toLowerCase(); // Check style dictionary for (const dict of this.dictionaries) { if (dict.style) { if (lowerType.includes('date') && dict.style.date) { return `(${dict.style.date})`; } if (lowerType.includes('time') && dict.style.datetime) { return `(${dict.style.datetime})`; } if (lowerName.includes('amount') || lowerName.includes('amt') || lowerName.includes('fee')) { return dict.style.money_unit ? `(${dict.style.money_unit})` : null; } } } return null; } getTableCommentRationale(table) { const tokens = this.tokenizeIdentifier(table.table); return `テーブル名 '${table.table}' を分析し、含まれる要素 [${tokens.join(', ')}] から推測`; } getColumnCommentRationale(column, table) { const tokens = this.tokenizeIdentifier(column.name); const typeInfo = this.getTypeDescription(column.type); let rationale = `カラム名 '${column.name}' を分析`; if (tokens.length > 1) { rationale += `し、要素 [${tokens.join(', ')}] から推測`; } if (typeInfo) { rationale += `。データ型 '${column.type}' も考慮`; } return rationale; } translateCommonWord(word) { const commonTranslations = { // Business terms 'company': '会社', 'business': '事業', 'partner': 'パートナー', 'partners': 'パートナー', 'client': '顧客', 'customer': '顧客', 'supplier': '供給業者', 'vendor': '業者', // Contact information 'name': '名前', 'email': 'メールアドレス', 'phone': '電話番号', 'address': '住所', 'contact': '連絡先', 'person': '担当者', // Location 'postal': '郵便', 'code': 'コード', 'zip': '郵便番号', // Industry/Business 'industry': '業種', 'contract': '契約', 'start': '開始', 'status': '状況', 'rating': '度', // 「評価」より「度」が自然(信用度、満足度等) 'credit': '信用', // Common fields 'created': '作成', 'updated': '更新', 'at': '日時', 'date': '日付', 'time': '時刻', // User related 'user': 'ユーザー', 'users': 'ユーザー', 'order': '注文', 'orders': '注文', 'product': '商品', 'products': '商品', 'category': 'カテゴリ', 'categories': 'カテゴリ', // Quantities and amounts 'quantity': '数量', 'amount': '金額', 'price': '価格', 'total': '合計', 'unit': '単価', 'subtotal': '小計', // Details and descriptions 'detail': '詳細', 'details': '詳細', 'description': '説明', 'shipping': '配送', // Status values 'active': 'アクティブ', 'inactive': '非アクティブ', 'pending': '保留中', 'suspended': '停止中', 'completed': '完了', 'processing': '処理中' }; const lowerWord = word.toLowerCase(); return commonTranslations[lowerWord] || null; } getBasicTypeComment(dataType) { const lowerType = dataType.toLowerCase(); if (lowerType.includes('int') || lowerType.includes('number') || lowerType.includes('numeric')) { return '数値'; } if (lowerType.includes('varchar') || lowerType.includes('text') || lowerType.includes('char')) { return '文字列'; } if (lowerType.includes('date')) { return '日付'; } if (lowerType.includes('time')) { return '日時'; } if (lowerType.includes('bool')) { return 'フラグ'; } return '項目'; } /** * Generate enhanced comments using LLM context * This method provides detailed context to the MCP client for better comment generation */ generateEnhancedComments(tables, scope = 'all') { const proposals = []; let needsLLMAssistance = false; // Generate basic proposals first if (scope === 'all' || scope === 'tables') { const tableProposals = this.generateTableComments(tables); proposals.push(...tableProposals); } if (scope === 'all' || scope === 'columns') { const columnProposals = this.generateColumnComments(tables); proposals.push(...columnProposals); } // Check if we need LLM assistance for missing comments const tablesWithoutComments = tables.filter(t => !t.table_comment); const columnsWithoutComments = tables.flatMap(t => t.columns.filter(c => !c.comment).map(c => ({ table: t, column: c }))); if (tablesWithoutComments.length > 0 || columnsWithoutComments.length > 0) { needsLLMAssistance = true; } const result = { proposals }; if (needsLLMAssistance) { result.llmContext = this.buildLLMContextForComments(tablesWithoutComments, columnsWithoutComments); } return result; } buildLLMContextForComments(tablesWithoutComments, columnsWithoutComments) { let context = `## データベースコメント生成のためのコンテキスト情報\n\n`; if (tablesWithoutComments.length > 0) { context += `### コメントが必要なテーブル:\n`; for (const table of tablesWithoutComments) { context += `テーブル: ${table.schema}.${table.table}\n`; context += `カラム数: ${table.columns.length}列\n`; context += `主要カラム: ${table.columns.slice(0, 5).map(c => `${c.name}(${c.type})`).join(', ')}\n`; if (table.columns.length > 5) { context += `その他 ${table.columns.length - 5} カラム\n`; } context += `\n`; } } if (columnsWithoutComments.length > 0) { context += `### コメントが必要なカラム:\n`; const groupedByTable = columnsWithoutComments.reduce((acc, item) => { const key = `${item.table.schema}.${item.table.table}`; if (!acc[key]) acc[key] = []; acc[key].push(item.column); return acc; }, {}); for (const [tableName, columns] of Object.entries(groupedByTable)) { context += `テーブル: ${tableName}\n`; for (const column of columns) { context += ` - ${column.name} (${column.type}${column.nullable ? ', NULL可' : ', NOT NULL'}${column.default ? `, デフォルト: ${column.default}` : ''})\n`; } context += `\n`; } } context += `### コメント生成ルール:\n`; context += `1. **JLPT N1レベルの自然な日本語** - ビジネス用語を適切に使用\n`; context += `2. **テーブル名** - 「〜テーブル」「〜マスター」「〜管理」等の接尾辞を適切に付与\n`; context += `3. **カラム名** - データ型に応じた適切で自然な表現(ID、名前、日付、フラグ等)\n`; context += `4. **業務的意味** - システム的な名前ではなく、業務上の意味を重視\n`; context += `5. **統一性** - 同じ概念には同じ用語を使用\n`; context += `6. **簡潔性** - 冗長な表現を避け、必要最小限の語数で表現\n\n`; context += `### 生成例(JLPT N1レベル):\n`; context += `- users → ユーザーマスター\n`; context += `- business_partners → 取引先マスター\n`; context += `- company_name → 会社名(「会社名前」は冗長)\n`; context += `- contact_person → 担当者(「連絡先担当者」は冗長)\n`; context += `- contract_start_date → 契約開始日(「日付」は不十分)\n`; context += `- business_status → 取引状況(「事業状況」より具体的)\n`; context += `- credit_rating → 信用度(「信用評価」より簡潔)\n`; context += `- created_at → 作成日時\n`; context += `- updated_at → 更新日時(「日付」は不十分)\n`; context += `- is_active → 有効フラグ\n\n`; context += `**上記の情報を元に、適切な日本語コメントを生成してください。**`; return context; } containsPII(identifier) { return piiMasker.isPIIColumn(identifier); } } export const commentGenerator = new CommentGenerator(); //# sourceMappingURL=comment-generator.js.map