sql-talk
Version:
SQL Talk - 自然言語をSQLに変換するMCPサーバー(安全性保護・SSHトンネル対応) / SQL Talk - MCP Server for Natural Language to SQL conversion with safety guards and SSH tunnel support
477 lines • 20.6 kB
JavaScript
import { readFileSync } from 'fs';
import { resolve } from 'path';
import { parse as parseYaml } from 'yaml';
import { configManager } from '../core/config.js';
import { logger } from '../core/logger.js';
import { piiMasker } from '../security/pii-masker.js';
export class CommentGenerator {
dictionaries = [];
style = 'concise';
constructor() {
this.loadDictionaries();
}
loadDictionaries() {
try {
const config = configManager.getConfig();
this.style = config.comment_infer.style;
for (const dictPath of config.comment_infer.dictionary_paths) {
try {
const fullPath = resolve(process.cwd(), dictPath);
const content = readFileSync(fullPath, 'utf-8');
const dict = parseYaml(content);
this.dictionaries.push(dict);
logger.info(`Loaded dictionary: ${dictPath}`);
}
catch (error) {
logger.warn(`Failed to load dictionary ${dictPath}:`, error);
}
}
logger.info(`Comment generator initialized with ${this.dictionaries.length} dictionaries`);
}
catch (error) {
logger.error('Failed to initialize comment generator:', error);
// Use default empty dictionaries
this.dictionaries = [];
}
}
generateTableComments(tables) {
const proposals = [];
for (const table of tables) {
if (!table.table_comment) {
const comment = this.inferTableComment(table);
if (comment) {
proposals.push({
object: 'table',
fqdn: `${table.schema}.${table.table}`,
before: table.table_comment,
after: comment,
rationale: this.getTableCommentRationale(table),
pii: this.containsPII(table.table)
});
}
}
}
return proposals;
}
generateColumnComments(tables) {
const proposals = [];
for (const table of tables) {
for (const column of table.columns) {
if (!column.comment) {
const comment = this.inferColumnComment(column, table);
if (comment) {
proposals.push({
object: 'column',
fqdn: `${table.schema}.${table.table}.${column.name}`,
before: column.comment,
after: comment,
rationale: this.getColumnCommentRationale(column, table),
pii: piiMasker.isPIIColumn(column.name)
});
}
}
}
}
return proposals;
}
inferTableComment(table) {
const tableName = table.table.toLowerCase();
// Check for direct translations
for (const dict of this.dictionaries) {
if (dict.synonym) {
for (const [key, value] of Object.entries(dict.synonym)) {
if (tableName.includes(key.toLowerCase())) {
return this.formatTableComment(value, table);
}
}
}
}
// Tokenize and translate parts
const tokens = this.tokenizeIdentifier(tableName);
const translatedTokens = tokens.map(token => this.translateToken(token) || this.translateCommonWord(token));
if (translatedTokens.some(token => token !== null)) {
const comment = translatedTokens
.map(token => token || '')
.filter(token => token.length > 0)
.join('');
return this.formatTableComment(comment, table);
}
// Fallback: try to translate the whole table name
const commonTranslation = this.translateCommonWord(tableName);
if (commonTranslation) {
return this.formatTableComment(commonTranslation, table);
}
// Handle common table name patterns
if (tableName.includes('user') || tableName.includes('member')) {
return this.formatTableComment('ユーザー', table);
}
if (tableName.includes('order')) {
return this.formatTableComment('注文', table);
}
if (tableName.includes('product') || tableName.includes('item')) {
return this.formatTableComment('商品', table);
}
if (tableName.includes('category')) {
return this.formatTableComment('カテゴリ', table);
}
if (tableName.includes('partner') || tableName.includes('client') || tableName.includes('customer')) {
return this.formatTableComment('取引先', table);
}
if (tableName.includes('company') || tableName.includes('business')) {
return this.formatTableComment('会社', table);
}
// Last resort: generate a generic comment
return this.formatTableComment('データ', table);
}
inferColumnComment(column, table) {
const columnName = column.name.toLowerCase();
// Check for direct translations
for (const dict of this.dictionaries) {
if (dict.synonym) {
for (const [key, value] of Object.entries(dict.synonym)) {
if (columnName.includes(key.toLowerCase())) {
return this.formatColumnComment(value, column, table);
}
}
}
}
// Check abbreviations
for (const dict of this.dictionaries) {
if (dict.abbr) {
for (const [abbr, expansion] of Object.entries(dict.abbr)) {
if (columnName === abbr.toLowerCase() || columnName.endsWith('_' + abbr.toLowerCase())) {
return this.formatColumnComment(expansion, column, table);
}
}
}
}
// Handle common patterns first (more specific patterns)
if (columnName.endsWith('_id') || columnName.endsWith('id')) {
const entityName = columnName.replace(/_?id$/, '');
const translatedEntity = this.translateToken(entityName) || this.translateCommonWord(entityName);
if (translatedEntity) {
return this.formatColumnComment(`${translatedEntity}ID`, column, table);
}
else {
return this.formatColumnComment('ID', column, table);
}
}
if (columnName.includes('date') || columnName.includes('dt')) {
return this.formatColumnComment('日付', column, table);
}
if (columnName.includes('time') || columnName.includes('tm')) {
return this.formatColumnComment('時刻', column, table);
}
if (columnName.includes('flag') || columnName.includes('flg')) {
return this.formatColumnComment('フラグ', column, table);
}
// Tokenize and translate
const tokens = this.tokenizeIdentifier(columnName);
const translatedTokens = tokens.map(token => this.translateToken(token) || this.translateCommonWord(token));
if (translatedTokens.some(token => token !== null)) {
const baseComment = translatedTokens
.map(token => token || '')
.filter(token => token.length > 0)
.join('');
return this.formatColumnComment(baseComment, column, table);
}
// Fallback: try to translate the whole column name as common words
const commonTranslation = this.translateCommonWord(columnName);
if (commonTranslation) {
return this.formatColumnComment(commonTranslation, column, table);
}
// Last resort: generate basic comment based on data type
const typeBasedComment = this.getBasicTypeComment(column.type);
if (typeBasedComment) {
return this.formatColumnComment(typeBasedComment, column, table);
}
return null;
}
tokenizeIdentifier(identifier) {
// Split on underscores, camelCase, and numbers
return identifier
.replace(/([a-z])([A-Z])/g, '$1_$2') // camelCase to snake_case
.replace(/([a-zA-Z])(\d)/g, '$1_$2') // letter followed by digit
.replace(/(\d)([a-zA-Z])/g, '$1_$2') // digit followed by letter
.toLowerCase()
.split(/[_\-\s]+/)
.filter(token => token.length > 0);
}
translateToken(token) {
// Check abbreviations first
for (const dict of this.dictionaries) {
if (dict.abbr && dict.abbr[token]) {
return dict.abbr[token];
}
}
// Check synonyms
for (const dict of this.dictionaries) {
if (dict.synonym) {
for (const [key, value] of Object.entries(dict.synonym)) {
if (key.toLowerCase() === token.toLowerCase()) {
return value;
}
}
}
}
return null;
}
formatTableComment(baseComment, table) {
let comment = baseComment;
// Add table suffix if not present
if (!comment.includes('テーブル') && !comment.includes('表')) {
if (this.style === 'verbose') {
comment += 'テーブル';
}
}
// Add descriptive information
if (this.style === 'verbose') {
const columnCount = table.columns.length;
comment += `(${columnCount}列)`;
}
return comment;
}
formatColumnComment(baseComment, column, table) {
let comment = baseComment;
// Add type information
if (this.style === 'verbose') {
const typeInfo = this.getTypeDescription(column.type);
if (typeInfo) {
comment += `(${typeInfo})`;
}
}
else {
// Concise style - just add units or format hints
const styleInfo = this.getStyleInfo(column.type, column.name);
if (styleInfo) {
comment += styleInfo;
}
}
return comment;
}
getTypeDescription(dataType) {
const lowerType = dataType.toLowerCase();
if (lowerType.includes('int') || lowerType.includes('number') || lowerType.includes('numeric')) {
return '数値';
}
if (lowerType.includes('varchar') || lowerType.includes('text') || lowerType.includes('char')) {
return '文字列';
}
if (lowerType.includes('date')) {
return '日付';
}
if (lowerType.includes('time')) {
return '時刻';
}
if (lowerType.includes('bool')) {
return '真偽値';
}
return null;
}
getStyleInfo(dataType, columnName) {
const lowerType = dataType.toLowerCase();
const lowerName = columnName.toLowerCase();
// Check style dictionary
for (const dict of this.dictionaries) {
if (dict.style) {
if (lowerType.includes('date') && dict.style.date) {
return `(${dict.style.date})`;
}
if (lowerType.includes('time') && dict.style.datetime) {
return `(${dict.style.datetime})`;
}
if (lowerName.includes('amount') || lowerName.includes('amt') || lowerName.includes('fee')) {
return dict.style.money_unit ? `(${dict.style.money_unit})` : null;
}
}
}
return null;
}
getTableCommentRationale(table) {
const tokens = this.tokenizeIdentifier(table.table);
return `テーブル名 '${table.table}' を分析し、含まれる要素 [${tokens.join(', ')}] から推測`;
}
getColumnCommentRationale(column, table) {
const tokens = this.tokenizeIdentifier(column.name);
const typeInfo = this.getTypeDescription(column.type);
let rationale = `カラム名 '${column.name}' を分析`;
if (tokens.length > 1) {
rationale += `し、要素 [${tokens.join(', ')}] から推測`;
}
if (typeInfo) {
rationale += `。データ型 '${column.type}' も考慮`;
}
return rationale;
}
translateCommonWord(word) {
const commonTranslations = {
// Business terms
'company': '会社',
'business': '事業',
'partner': 'パートナー',
'partners': 'パートナー',
'client': '顧客',
'customer': '顧客',
'supplier': '供給業者',
'vendor': '業者',
// Contact information
'name': '名前',
'email': 'メールアドレス',
'phone': '電話番号',
'address': '住所',
'contact': '連絡先',
'person': '担当者',
// Location
'postal': '郵便',
'code': 'コード',
'zip': '郵便番号',
// Industry/Business
'industry': '業種',
'contract': '契約',
'start': '開始',
'status': '状況',
'rating': '度', // 「評価」より「度」が自然(信用度、満足度等)
'credit': '信用',
// Common fields
'created': '作成',
'updated': '更新',
'at': '日時',
'date': '日付',
'time': '時刻',
// User related
'user': 'ユーザー',
'users': 'ユーザー',
'order': '注文',
'orders': '注文',
'product': '商品',
'products': '商品',
'category': 'カテゴリ',
'categories': 'カテゴリ',
// Quantities and amounts
'quantity': '数量',
'amount': '金額',
'price': '価格',
'total': '合計',
'unit': '単価',
'subtotal': '小計',
// Details and descriptions
'detail': '詳細',
'details': '詳細',
'description': '説明',
'shipping': '配送',
// Status values
'active': 'アクティブ',
'inactive': '非アクティブ',
'pending': '保留中',
'suspended': '停止中',
'completed': '完了',
'processing': '処理中'
};
const lowerWord = word.toLowerCase();
return commonTranslations[lowerWord] || null;
}
getBasicTypeComment(dataType) {
const lowerType = dataType.toLowerCase();
if (lowerType.includes('int') || lowerType.includes('number') || lowerType.includes('numeric')) {
return '数値';
}
if (lowerType.includes('varchar') || lowerType.includes('text') || lowerType.includes('char')) {
return '文字列';
}
if (lowerType.includes('date')) {
return '日付';
}
if (lowerType.includes('time')) {
return '日時';
}
if (lowerType.includes('bool')) {
return 'フラグ';
}
return '項目';
}
/**
* Generate enhanced comments using LLM context
* This method provides detailed context to the MCP client for better comment generation
*/
generateEnhancedComments(tables, scope = 'all') {
const proposals = [];
let needsLLMAssistance = false;
// Generate basic proposals first
if (scope === 'all' || scope === 'tables') {
const tableProposals = this.generateTableComments(tables);
proposals.push(...tableProposals);
}
if (scope === 'all' || scope === 'columns') {
const columnProposals = this.generateColumnComments(tables);
proposals.push(...columnProposals);
}
// Check if we need LLM assistance for missing comments
const tablesWithoutComments = tables.filter(t => !t.table_comment);
const columnsWithoutComments = tables.flatMap(t => t.columns.filter(c => !c.comment).map(c => ({ table: t, column: c })));
if (tablesWithoutComments.length > 0 || columnsWithoutComments.length > 0) {
needsLLMAssistance = true;
}
const result = { proposals };
if (needsLLMAssistance) {
result.llmContext = this.buildLLMContextForComments(tablesWithoutComments, columnsWithoutComments);
}
return result;
}
buildLLMContextForComments(tablesWithoutComments, columnsWithoutComments) {
let context = `## データベースコメント生成のためのコンテキスト情報\n\n`;
if (tablesWithoutComments.length > 0) {
context += `### コメントが必要なテーブル:\n`;
for (const table of tablesWithoutComments) {
context += `テーブル: ${table.schema}.${table.table}\n`;
context += `カラム数: ${table.columns.length}列\n`;
context += `主要カラム: ${table.columns.slice(0, 5).map(c => `${c.name}(${c.type})`).join(', ')}\n`;
if (table.columns.length > 5) {
context += `その他 ${table.columns.length - 5} カラム\n`;
}
context += `\n`;
}
}
if (columnsWithoutComments.length > 0) {
context += `### コメントが必要なカラム:\n`;
const groupedByTable = columnsWithoutComments.reduce((acc, item) => {
const key = `${item.table.schema}.${item.table.table}`;
if (!acc[key])
acc[key] = [];
acc[key].push(item.column);
return acc;
}, {});
for (const [tableName, columns] of Object.entries(groupedByTable)) {
context += `テーブル: ${tableName}\n`;
for (const column of columns) {
context += ` - ${column.name} (${column.type}${column.nullable ? ', NULL可' : ', NOT NULL'}${column.default ? `, デフォルト: ${column.default}` : ''})\n`;
}
context += `\n`;
}
}
context += `### コメント生成ルール:\n`;
context += `1. **JLPT N1レベルの自然な日本語** - ビジネス用語を適切に使用\n`;
context += `2. **テーブル名** - 「〜テーブル」「〜マスター」「〜管理」等の接尾辞を適切に付与\n`;
context += `3. **カラム名** - データ型に応じた適切で自然な表現(ID、名前、日付、フラグ等)\n`;
context += `4. **業務的意味** - システム的な名前ではなく、業務上の意味を重視\n`;
context += `5. **統一性** - 同じ概念には同じ用語を使用\n`;
context += `6. **簡潔性** - 冗長な表現を避け、必要最小限の語数で表現\n\n`;
context += `### 生成例(JLPT N1レベル):\n`;
context += `- users → ユーザーマスター\n`;
context += `- business_partners → 取引先マスター\n`;
context += `- company_name → 会社名(「会社名前」は冗長)\n`;
context += `- contact_person → 担当者(「連絡先担当者」は冗長)\n`;
context += `- contract_start_date → 契約開始日(「日付」は不十分)\n`;
context += `- business_status → 取引状況(「事業状況」より具体的)\n`;
context += `- credit_rating → 信用度(「信用評価」より簡潔)\n`;
context += `- created_at → 作成日時\n`;
context += `- updated_at → 更新日時(「日付」は不十分)\n`;
context += `- is_active → 有効フラグ\n\n`;
context += `**上記の情報を元に、適切な日本語コメントを生成してください。**`;
return context;
}
containsPII(identifier) {
return piiMasker.isPIIColumn(identifier);
}
}
export const commentGenerator = new CommentGenerator();
//# sourceMappingURL=comment-generator.js.map