UNPKG

@codai/glass-mcp

Version:

GlassMCP - Model Context Protocol server for Windows automation integrated with CODAI ecosystem

780 lines (735 loc) 26.9 kB
#!/usr/bin/env node // src/mcp-server-enhanced.ts import { Server } from "@modelcontextprotocol/sdk/server/index.js"; import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; import { CallToolRequestSchema, ListToolsRequestSchema } from "@modelcontextprotocol/sdk/types.js"; import { exec } from "child_process"; import { promisify } from "util"; import { writeFileSync, unlinkSync } from "fs"; import { tmpdir } from "os"; import { join } from "path"; var execAsyncPromisified = promisify(exec); var GlassMCPError = class extends Error { constructor(message, code) { super(message); this.code = code; this.name = "GlassMCPError"; } }; async function execPowerShell(script) { const tempFile = join(tmpdir(), `glass-mcp-${Date.now()}.ps1`); writeFileSync(tempFile, script, "utf8"); try { const result = await execAsyncPromisified(`powershell -NoProfile -ExecutionPolicy Bypass -File "${tempFile}"`); return result; } catch (error) { throw new GlassMCPError(`PowerShell execution failed: ${error.message}`); } finally { try { unlinkSync(tempFile); } catch (cleanupError) { } } } async function listWindows() { const script = ` Add-Type @" using System; using System.Runtime.InteropServices; using System.Text; using System.Collections.Generic; public struct RECT { public int Left, Top, Right, Bottom; } public static class WindowManager { [DllImport("user32.dll")] public static extern bool EnumWindows(EnumWindowsProc enumProc, IntPtr lParam); [DllImport("user32.dll")] public static extern int GetWindowText(IntPtr hWnd, StringBuilder strText, int maxCount); [DllImport("user32.dll")] public static extern int GetWindowTextLength(IntPtr hWnd); [DllImport("user32.dll")] public static extern bool IsWindowVisible(IntPtr hWnd); [DllImport("user32.dll")] public static extern bool IsIconic(IntPtr hWnd); [DllImport("user32.dll")] public static extern bool IsZoomed(IntPtr hWnd); [DllImport("user32.dll")] public static extern bool GetWindowRect(IntPtr hwnd, ref RECT rectangle); [DllImport("user32.dll")] public static extern int GetClassName(IntPtr hWnd, StringBuilder lpClassName, int nMaxCount); public delegate bool EnumWindowsProc(IntPtr hWnd, IntPtr lParam); public static List<object> GetAllWindows() { var windows = new List<object>(); EnumWindows(delegate(IntPtr hWnd, IntPtr param) { var length = GetWindowTextLength(hWnd); if (length == 0) return true; var builder = new StringBuilder(length + 1); GetWindowText(hWnd, builder, builder.Capacity); var classBuilder = new StringBuilder(256); GetClassName(hWnd, classBuilder, classBuilder.Capacity); var rect = new RECT(); GetWindowRect(hWnd, ref rect); windows.Add(new { handle = hWnd.ToInt64().ToString(), title = builder.ToString(), className = classBuilder.ToString(), isVisible = IsWindowVisible(hWnd), isMinimized = IsIconic(hWnd), isMaximized = IsZoomed(hWnd), rect = new { left = rect.Left, top = rect.Top, right = rect.Right, bottom = rect.Bottom } }); return true; }, IntPtr.Zero); return windows; } } "@ $windows = [WindowManager]::GetAllWindows() $windows | ConvertTo-Json -Depth 3 `; const result = await execPowerShell(script); return JSON.parse(result.stdout); } async function extractWindowText(windowHandle) { const script = ` try { Add-Type -AssemblyName UIAutomationClient $handle = [IntPtr]${windowHandle} $automation = [System.Windows.Automation.AutomationElement]::FromHandle($handle) if ($automation -eq $null) { Write-Output "ERROR:Could not get automation element" exit } # Get window title $windowTitle = $automation.Current.Name if ([string]::IsNullOrEmpty($windowTitle)) { $windowTitle = "Unknown Window" } # Simple text extraction - get all text elements $textCondition = [System.Windows.Automation.Condition]::TrueCondition $textElements = $automation.FindAll([System.Windows.Automation.TreeScope]::Descendants, $textCondition) $allTexts = @() $elementCount = 0 foreach ($element in $textElements) { try { $text = "" # Try to get text from different sources if ($element.Current.ControlType -eq [System.Windows.Automation.ControlType]::Text -or $element.Current.ControlType -eq [System.Windows.Automation.ControlType]::Button -or $element.Current.ControlType -eq [System.Windows.Automation.ControlType]::Edit) { # Try name first $text = $element.Current.Name } # Clean and add text if (![string]::IsNullOrWhiteSpace($text) -and $text.Length -gt 0) { $cleanText = $text -replace '[\\r\\n\\t]', ' ' -replace '\\s+', ' ' $cleanText = $cleanText.Trim() if ($cleanText.Length -gt 0 -and $cleanText.Length -lt 100) { $allTexts += $cleanText $elementCount++ if ($elementCount -gt 50) { break } } } } catch { continue } } # Output simple format Write-Output "SUCCESS" Write-Output "TITLE:$windowTitle" Write-Output "COUNT:$elementCount" Write-Output "TEXTS:" foreach ($txt in $allTexts) { Write-Output "TEXT:$txt" } } catch { Write-Output "ERROR:$($_.Exception.Message)" } `; const result = await execPowerShell(script); const lines = result.stdout.split("\n").map((l) => l.trim()).filter((l) => l.length > 0); if (lines[0] === "ERROR") { throw new GlassMCPError(`UI Automation error: ${lines[1] || "Unknown error"}`); } if (lines[0] !== "SUCCESS") { throw new GlassMCPError(`Unexpected response format`); } let windowTitle = "Unknown Window"; let elementCount = 0; const textElements = []; for (let i = 1; i < lines.length; i++) { const line = lines[i]; if (line.startsWith("TITLE:")) { windowTitle = line.substring(6); } else if (line.startsWith("COUNT:")) { elementCount = parseInt(line.substring(6)); } else if (line.startsWith("TEXT:")) { const text = line.substring(5); textElements.push({ id: `elem-${textElements.length}`, text, elementType: "Text", bounds: { x: 0, y: 0, width: 0, height: 0 }, isVisible: true, isEnabled: true, automationId: "", className: "" }); } } return { windowHandle, windowTitle, textElements, totalTextLength: textElements.reduce((sum, el) => sum + el.text.length, 0), extractionTimestamp: (/* @__PURE__ */ new Date()).toISOString() }; } async function sendTextToWindow(windowHandle, text) { const script = ` Add-Type -AssemblyName System.Windows.Forms # Focus the window first $handle = [IntPtr]::new(${windowHandle}) Add-Type -TypeDefinition @" using System; using System.Runtime.InteropServices; public static class User32 { [DllImport("user32.dll")] public static extern bool SetForegroundWindow(IntPtr hWnd); [DllImport("user32.dll")] public static extern bool ShowWindow(IntPtr hWnd, int nCmdShow); } "@ [User32]::ShowWindow($handle, 1) # SW_SHOWNORMAL [User32]::SetForegroundWindow($handle) Start-Sleep -Milliseconds 100 # Send the text $escapedText = "${text.replace(/"/g, '""').replace(/\\/g, "\\\\")}" [System.Windows.Forms.SendKeys]::SendWait($escapedText) $true `; await execPowerShell(script); return true; } async function getClipboardText() { const script = ` Add-Type -AssemblyName System.Windows.Forms [System.Windows.Forms.Clipboard]::GetText() `; const result = await execPowerShell(script); return result.stdout.trim(); } async function setClipboardText(text) { const script = ` Add-Type -AssemblyName System.Windows.Forms $text = "${text.replace(/"/g, '""').replace(/\\/g, "\\\\")}" [System.Windows.Forms.Clipboard]::SetText($text) $true `; await execPowerShell(script); return true; } async function focusWindow(title, exact = false) { const windows = await listWindows(); const window = windows.find( (w) => exact ? w.title === title : w.title.toLowerCase().includes(title.toLowerCase()) ); if (!window) { throw new GlassMCPError(`Window not found: ${title}`); } const script = ` Add-Type -TypeDefinition @" using System; using System.Runtime.InteropServices; public static class User32 { [DllImport("user32.dll")] public static extern bool SetForegroundWindow(IntPtr hWnd); [DllImport("user32.dll")] public static extern bool ShowWindow(IntPtr hWnd, int nCmdShow); } "@ $handle = [IntPtr]::new(${window.handle}) [User32]::ShowWindow($handle, 1) [User32]::SetForegroundWindow($handle) `; await execPowerShell(script); return true; } function isSystemCapabilityQuery(query) { const lowerQuery = query.toLowerCase(); return lowerQuery.includes("glass") && (lowerQuery.includes("capabilities") || lowerQuery.includes("system") || lowerQuery.includes("info") || lowerQuery.includes("help") || lowerQuery.includes("what") || lowerQuery.includes("how")); } function isHelpQuery(query) { const lowerQuery = query.toLowerCase(); return lowerQuery === "help" || lowerQuery === "capabilities" || lowerQuery === "glass help" || lowerQuery === "glass capabilities" || lowerQuery.includes("how to use") || lowerQuery.includes("usage"); } function getSystemInformation() { return `GlassMCP Enhanced v5.1.0 - Windows Automation Server ===================================================== \u{1F680} CORE CAPABILITIES: \u2022 Windows application automation and control \u2022 Real-time window management and text extraction \u2022 Clipboard operations with system integration \u2022 UI Automation with PowerShell-based Windows API \u2022 Cross-application text input and interaction \u2022 Window focus, extraction, and content analysis \u{1F6E0}\uFE0F AVAILABLE TOOLS: \u2022 window_list() - List all open windows with properties \u2022 window_focus(title, exact?) - Focus specific windows by title \u2022 window_extract_text(windowHandle) - Extract text from windows \u2022 window_extract_text_by_title(title, exact?) - Extract text by window title \u2022 window_send_text(windowHandle, text) - Send text input to windows \u2022 window_send_text_by_title(title, text, exact?) - Send text by window title \u2022 clipboard_get_text() - Get clipboard content \u2022 clipboard_set_text(text) - Set clipboard content \u{1F4CA} PERFORMANCE FEATURES: \u2022 PowerShell-based Windows API integration \u2022 Real-time UI Automation capabilities \u2022 Error handling with detailed diagnostics \u2022 Safe window detection and manipulation \u2022 Optimized text extraction algorithms \u{1F527} PLATFORM SUPPORT: \u2022 Windows-only (win32) automation server \u2022 Supports all Windows applications with UI elements \u2022 Compatible with VS Code, browsers, office apps, and more \u2022 Works with visible and background windows For detailed usage examples, query "glass usage" or "glass examples".`; } function getSmartSuggestions() { return `\u{1F4A1} GLASSMCP SMART SUGGESTIONS: \u{1F50D} Getting Started: \u2022 Try: window_list() to see all open windows \u2022 Try: window_focus("Visual Studio Code") to focus VS Code \u2022 Try: window_extract_text_by_title("Notepad") to get text content \u2022 Try: clipboard_get_text() to see current clipboard \u26A1 Window Management: \u2022 Use exact=true for precise window title matching \u2022 Extract text before sending input for context awareness \u2022 Focus windows before sending text for reliability \u2022 Use clipboard operations for large text transfers \u{1F6E0}\uFE0F Advanced Automation: \u2022 Combine text extraction with analysis for intelligent responses \u2022 Chain operations: focus \u2192 extract \u2192 analyze \u2192 respond \u2022 Use window handles for direct window manipulation \u2022 Monitor clipboard for cross-application workflows \u{1F4C8} Best Practices: \u2022 Always check window_list first to identify available windows \u2022 Use descriptive window titles for reliable automation \u2022 Handle errors gracefully with fallback strategies \u2022 Test automation sequences before deployment`; } function getUsageTips() { return `\u{1F3AF} GLASSMCP USAGE TIPS: \u{1FA9F} Window Management: \u2022 window_list() - Get all windows: [{"handle": "123", "title": "App", "isVisible": true}] \u2022 window_focus("Chrome") - Focus browser window \u2022 window_focus("Document.docx", true) - Exact title match \u{1F4DD} Text Operations: \u2022 window_extract_text_by_title("Notepad") - Get all text from Notepad \u2022 window_send_text_by_title("Terminal", "echo Hello") - Send commands \u2022 window_extract_text(123456) - Extract using window handle \u{1F4CB} Clipboard Operations: \u2022 clipboard_get_text() - Read clipboard: {"text": "copied content"} \u2022 clipboard_set_text("Hello World") - Write to clipboard \u{1F680} Pro Tips: \u2022 Use window_list() first to discover available applications \u2022 Focus windows before text operations for best results \u2022 Extract text to understand current context before responding \u2022 Use exact title matching for reliable automation \u2022 Chain operations for complex automation workflows`; } var tools = [ { name: "window_list", description: "List all open windows with their titles, handles, and properties. Use query parameter for help and system information.", inputSchema: { type: "object", properties: { query: { type: "string", description: 'Optional query for help or system information (e.g., "help", "capabilities")' } }, required: [] } }, { name: "window_focus", description: "Focus a specific window by title", inputSchema: { type: "object", properties: { title: { type: "string", description: "The title of the window to focus" }, exact: { type: "boolean", description: "Whether to match the title exactly", default: false } }, required: ["title"] } }, { name: "window_extract_text", description: "Extract all text content from a window using UI Automation", inputSchema: { type: "object", properties: { windowHandle: { type: "number", description: "The handle of the window to extract text from" } }, required: ["windowHandle"] } }, { name: "window_extract_text_by_title", description: "Extract text content from a window by finding it by title", inputSchema: { type: "object", properties: { title: { type: "string", description: "The title of the window to extract text from" }, exact: { type: "boolean", description: "Whether to match the title exactly", default: false } }, required: ["title"] } }, { name: "window_send_text", description: "Send text input to a specific window", inputSchema: { type: "object", properties: { windowHandle: { type: "number", description: "The handle of the window to send text to" }, text: { type: "string", description: "The text to send to the window" } }, required: ["windowHandle", "text"] } }, { name: "window_send_text_by_title", description: "Send text input to a window by finding it by title", inputSchema: { type: "object", properties: { title: { type: "string", description: "The title of the window to send text to" }, text: { type: "string", description: "The text to send to the window" }, exact: { type: "boolean", description: "Whether to match the title exactly", default: false } }, required: ["title", "text"] } }, { name: "clipboard_get_text", description: "Get text content from the system clipboard", inputSchema: { type: "object", properties: {}, required: [] } }, { name: "clipboard_set_text", description: "Set text content to the system clipboard", inputSchema: { type: "object", properties: { text: { type: "string", description: "The text to set in the clipboard" } }, required: ["text"] } } ]; var server = new Server( { name: "GlassMCP Enhanced", version: "5.1.0" }, { capabilities: { tools: {} } } ); server.setRequestHandler(CallToolRequestSchema, async (request) => { const { name, arguments: args } = request.params; try { switch (name) { case "window_list": { const { query } = args; if (query && (isSystemCapabilityQuery(query) || isHelpQuery(query))) { return { content: [ { type: "text", text: JSON.stringify({ windows: [], count: 0, message: "GlassMCP system information and capabilities", debug: { requestId: `v5.1-${Date.now()}-${Math.random().toString(36).substr(2, 6)}`, queryLength: query.length, isCapabilityQuery: isSystemCapabilityQuery(query), isHelpQuery: isHelpQuery(query), timestamp: (/* @__PURE__ */ new Date()).toISOString() }, performance: { responseTime: "0ms", requestId: `v5.1-${Date.now()}-${Math.random().toString(36).substr(2, 6)}`, serverType: "glass-mcp-enhanced-v5.1.0", timestamp: (/* @__PURE__ */ new Date()).toISOString() }, systemInfo: { server: { name: "GlassMCP Enhanced", version: "5.1.0", platform: "Windows (win32)", status: "Active and Operational" }, capabilities: { coreTools: [ { name: "window_list", description: "List all open windows with properties", usage: "window_list(query?)", features: ["Window discovery", "Property inspection", "System information"] }, { name: "window_focus", description: "Focus specific windows by title", usage: "window_focus(title, exact?)", features: ["Window activation", "Exact/fuzzy matching", "Error handling"] }, { name: "window_extract_text", description: "Extract text content from windows", usage: "window_extract_text(windowHandle) / window_extract_text_by_title(title, exact?)", features: ["UI Automation", "Text extraction", "Content analysis"] }, { name: "window_send_text", description: "Send text input to windows", usage: "window_send_text(windowHandle, text) / window_send_text_by_title(title, text, exact?)", features: ["Text input", "Automation", "Cross-application communication"] }, { name: "clipboard_operations", description: "Clipboard get/set operations", usage: "clipboard_get_text() / clipboard_set_text(text)", features: ["System clipboard", "Cross-app data transfer", "Text operations"] } ], advancedFeatures: [ "PowerShell-based Windows API integration", "Real-time UI Automation capabilities", "Error handling with detailed diagnostics", "Safe window detection and manipulation", "Optimized text extraction algorithms", "Cross-application automation workflows" ], platformSupport: [ "Windows-only (win32) automation server", "All Windows applications with UI elements", "VS Code, browsers, office apps, and more", "Visible and background window support" ] } }, smartSuggestions: getSmartSuggestions().split("\n"), usageTips: getUsageTips().split("\n"), systemInformation: getSystemInformation().split("\n") }, null, 2) } ] }; } const windows = await listWindows(); return { content: [ { type: "text", text: JSON.stringify(windows, null, 2) } ] }; } case "window_focus": { const { title, exact = false } = args; const result = await focusWindow(title, exact); return { content: [ { type: "text", text: JSON.stringify(result) } ] }; } case "window_extract_text": { const { windowHandle } = args; const textContent = await extractWindowText(windowHandle); const cleanContent = { ...textContent, windowTitle: textContent.windowTitle.replace(/[\x00-\x1F\x7F-\x9F]/g, " ").trim(), textElements: textContent.textElements.map((el) => ({ ...el, text: el.text.replace(/[\x00-\x1F\x7F-\x9F]/g, " ").replace(/\\/g, "\\\\").replace(/"/g, '\\"').trim() })) }; return { content: [ { type: "text", text: JSON.stringify(cleanContent, null, 2) } ] }; } case "window_extract_text_by_title": { const { title, exact = false } = args; const windows = await listWindows(); const window = windows.find( (w) => exact ? w.title === title : w.title.toLowerCase().includes(title.toLowerCase()) ); if (!window) { throw new GlassMCPError(`Window not found: ${title}`); } const textContent = await extractWindowText(parseInt(window.handle)); const textList = textContent.textElements.map((el) => el.text).join("\n"); const response = `Window: ${textContent.windowTitle} Elements: ${textContent.textElements.length} Text Content: ${textList}`; return { content: [ { type: "text", text: response } ] }; } case "window_send_text": { const { windowHandle, text } = args; const result = await sendTextToWindow(windowHandle, text); return { content: [ { type: "text", text: JSON.stringify(result) } ] }; } case "window_send_text_by_title": { const { title, text, exact = false } = args; const windows = await listWindows(); const window = windows.find( (w) => exact ? w.title === title : w.title.toLowerCase().includes(title.toLowerCase()) ); if (!window) { throw new GlassMCPError(`Window not found: ${title}`); } const result = await sendTextToWindow(parseInt(window.handle), text); return { content: [ { type: "text", text: JSON.stringify(result) } ] }; } case "clipboard_get_text": { const text = await getClipboardText(); return { content: [ { type: "text", text: JSON.stringify({ text }) } ] }; } case "clipboard_set_text": { const { text } = args; const result = await setClipboardText(text); return { content: [ { type: "text", text: JSON.stringify(result) } ] }; } default: throw new GlassMCPError(`Unknown tool: ${name}`); } } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); return { content: [ { type: "text", text: JSON.stringify({ error: errorMessage }) } ], isError: true }; } }); server.setRequestHandler(ListToolsRequestSchema, async () => { return { tools }; }); async function main() { const transport = new StdioServerTransport(); await server.connect(transport); console.error("Enhanced GlassMCP Server started successfully"); } main().catch((error) => { console.error("Server failed to start:", error); process.exit(1); }); //# sourceMappingURL=mcp-server-enhanced.mjs.map