UNPKG

@guyycodes/plugin-sdk

Version:

AI-powered plugin scaffolding tool - Create full-stack applications with 7+ AI models, 50+ business integrations, and production-ready infrastructure

397 lines (333 loc) 16.8 kB
// inside the /server/chat folder there exist 1 file: // 1. chat.py const fs = require('fs-extra'); const path = require('path'); const chalk = require('chalk'); async function createChatFiles(serverPath) { console.log(chalk.blue('🤖 Creating chat files...')); // Create agent directory const chatPath = path.join(serverPath, 'chat'); fs.ensureDirSync(chatPath); // Create __init__.py await createChatInit(chatPath); // Create chat.py await createChat(chatPath); console.log(chalk.green('✅ Chat files created successfully')); } async function createChatInit(chatPath) { const initPy = `""" Chat package for LangGraph chatbot Provides state management and graph execution for the chatbot chat """`; fs.writeFileSync(path.join(chatPath, '__init__.py'), initPy); } async function createChat(chatPath) { const chatPy = `""" Chat handling module for FastAPI """ import json from typing import Dict, Any, Optional, List, Union from fastapi import HTTPException from fastapi.responses import StreamingResponse from langchain_core.messages import HumanMessage, AIMessage, ToolMessage from app_state import app_state from agent.graph import graph from agent.state import SummarySchema from PIL import Image import requests from io import BytesIO import base64 import logging import uuid from models.model_manager import model_manager logger = logging.getLogger(__name__) def preprocess_image_for_phi4(image_input: str, min_size: int = 448, max_size: int = 1344) -> str: """ Preprocess an image to ensure it meets size requirements for Phi4. Optimized for maximum quality using Phi4's full 64-patch capacity. Phi4 processes images in 448×448 patches and supports up to 64 patches (8×8 grid). higher quaility has variable results Resolution vs Quality trade-offs: - 896 (2×448): 4 patches - Fast, but poor for documents - 1344 (3×448): 9 patches - Basic quality - 1792 (4×448): 16 patches - Good quality - 2240 (5×448): 25 patches - Very good quality - 2688 (6×448): 36 patches - Excellent quality - 3136 (7×448): 49 patches - Near maximum quality - 3584 (8×448): 64 patches - Maximum quality (uses all available patches) Args: image_input: Image as URL, base64 string, or file path min_size: Minimum dimension size (default 448 for Phi4) max_size: Maximum dimension size (default 3584 = 448*8 for maximum quality) Returns: Base64 encoded image string that meets size requirements """ try: # Load the image if isinstance(image_input, str): if image_input.startswith('http'): # URL response = requests.get(image_input) pil_img = Image.open(BytesIO(response.content)) elif image_input.startswith('data:image'): # Base64 base64_data = image_input.split(',')[1] pil_img = Image.open(BytesIO(base64.b64decode(base64_data))) else: # File path pil_img = Image.open(image_input) else: pil_img = image_input # Convert to RGB if necessary if pil_img.mode not in ('RGB', 'L'): pil_img = pil_img.convert('RGB') width, height = pil_img.size # Calculate scale factor to ensure min_size <= dimensions <= max_size # First, ensure minimum size scale_factor = max(min_size / width, min_size / height) # Then, check if this would exceed maximum size new_width = width * scale_factor new_height = height * scale_factor if new_width > max_size or new_height > max_size: # Scale down to fit within max_size scale_factor = min(max_size / width, max_size / height) # Apply final scale new_width = int(width * scale_factor) new_height = int(height * scale_factor) # Ensure we still meet minimum requirements if new_width < min_size or new_height < min_size: # This means the aspect ratio is extreme and we need to letterbox if width > height: new_width = max_size new_height = min_size else: new_width = min_size new_height = max_size # Create letterbox letterboxed = Image.new('RGB', (new_width, new_height), color='white') # Scale image to fit scale = min(new_width / width, new_height / height) resize_width = int(width * scale) resize_height = int(height * scale) resized = pil_img.resize((resize_width, resize_height), Image.Resampling.LANCZOS) # Center paste paste_x = (new_width - resize_width) // 2 paste_y = (new_height - resize_height) // 2 letterboxed.paste(resized, (paste_x, paste_y)) final_image = letterboxed else: # Simple resize without letterboxing final_image = pil_img.resize((new_width, new_height), Image.Resampling.LANCZOS) # Convert to base64 buffered = BytesIO() final_image.save(buffered, format="PNG") img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8') logger.info(f"Preprocessed image from {width}x{height} to {final_image.size[0]}x{final_image.size[1]}") return f"data:image/png;base64,{img_base64}" except Exception as e: logger.error(f"Error preprocessing image: {e}") # Return original if preprocessing fails return image_input async def handle_chat(message: str, session_id: Optional[str] = None, model: Optional[str] = None, images: Optional[List[str]] = None, audios: Optional[List[str]] = None) -> Dict[str, Any]: """ Handle chat messages Args: message: The user's message session_id: Optional session ID for conversation threading model: Optional model name (e.g., 'gpt-4o-mini', 'Qwen25Math', 'phi4') images: Optional list of images (URLs, base64, or file paths) audios: Optional list of audio files (URLs, base64, or file paths) """ try: if not message: raise HTTPException(status_code=400, detail="Message is required") session_id = session_id or "default" print(f"Processing message for session {session_id}: {message}") if model: print(f"Using model: {model}") # Only set active model if it's different to avoid cache clearing if app_state.get_active_model() != model: app_state.set_active_model(model) else: model = app_state.get_active_model() print(f"Using active model: {app_state.get_active_model()}") # Create the initial state with the user's message initial_state = { "messages": [HumanMessage(content=message)], "context": [], "searchQuery": "", "needsWebSearch": False, "maxToolCalls": 2, "toolCallCount": 0, "summarySchema": SummarySchema(), "model": model or "gpt-4o-mini", # Default to gpt-4o-mini if not specified "session_id": session_id # Add session_id to state } # Add multimodal inputs to state if using multimodal models multimodal_models = ["phi4", "Qwen25VL", "FluxKontext"] if model in multimodal_models and (images or audios): # Preprocess images to ensure they meet minimum size requirements processed_images = [] if images: for img in images: # Only preprocess for phi4, Qwen25VL can handle images directly if model == "phi4": processed_img = preprocess_image_for_phi4(img) processed_images.append(processed_img) else: processed_images.append(img) logger.info(f"Processed {len(processed_images)} images for {model}") initial_state["multimodal_inputs"] = { "images": processed_images, "audios": audios or [] } # Run the graph result = await graph.ainvoke(initial_state, { "configurable": { "thread_id": session_id } }) ################################################################################################################## # Call orchestration logic to check if we should pass to next agent orchestration_result = await app_state.handle_agent_completion(result) if orchestration_result["passed"]: logger.info(f"State was passed to next agent in chat.py: {orchestration_result}") # Check if we should show the response to the user if not result.get("shouldShowResponseToUser", True): # Don't show response to user, just return orchestration info return { "message": "Request processed and forwarded to next agent.", "messageType": "SystemMessage", "sessionId": session_id, "metadata": { "passedToNextAgent": True, "nextAgentResponse": orchestration_result.get("next_agent_response") } } # If shouldShowResponseToUser is True, continue to format and return the response below ################################################################################################################## # Debug: Log the messages to understand the flow print("Message flow:") for idx, msg in enumerate(result["messages"]): msg_type = msg.__class__.__name__ content_preview = msg.content[:100] if msg.content else "(no content)" tool_calls = hasattr(msg, 'tool_calls') and msg.tool_calls print(f"{idx}: {msg_type} - {content_preview}{'...' if len(msg.content or '') > 100 else ''}{' [has tool calls]' if tool_calls else ''}") # Extract the final message from the result messages = result["messages"] # Find the last AI message with actual content and no pending tool calls final_message = messages[-1] for i in range(len(messages) - 1, -1, -1): msg = messages[i] if isinstance(msg, AIMessage) and msg.content: # Check if it's an AI message with tool calls if not hasattr(msg, 'tool_calls') or not msg.tool_calls: final_message = msg break # If we couldn't find a complete AI response, there might be an issue if not final_message.content or (isinstance(final_message, AIMessage) and hasattr(final_message, 'tool_calls') and final_message.tool_calls): print("Warning: Could not find a complete AI response. Last message has tool calls or no content.") # Get the summarySchema summary_schema = result.get("summarySchema", SummarySchema()) # Check if this is a multimodal model with multimodal inputs multimodal_models = ["phi4", "Qwen25VL", "FluxKontext"] if model in multimodal_models and "multimodal_inputs" in result: # Update summarySchema with multimodal information multimodal_inputs = result["multimodal_inputs"] had_images = len(multimodal_inputs.get("images", [])) > 0 had_audio = len(multimodal_inputs.get("audios", [])) > 0 if had_images or had_audio: if model == "Qwen25VL": summary_schema.contentSummary = f"Vision-language response: analyzed {len(multimodal_inputs.get('images', []))} image(s)" else: summary_schema.contentSummary = f"Multimodal response: {'images' if had_images else ''}{' and ' if had_images and had_audio else ''}{'audio' if had_audio else ''} processed" # Format the response for multimodal models response = { "message": final_message.content or "I've processed your multimodal input.", "messageType": final_message.__class__.__name__, "sessionId": session_id, "metadata": { "toolsUsed": result.get("toolCallCount", 0) > 0, "searchPerformed": len(result.get("context", [])) > 0, "hadImages": had_images, "hadAudio": had_audio }, "summarySchema": summary_schema.model_dump() } # Check if this is a Flux model and we have generated images elif model and "Flux" in model and hasattr(final_message, 'additional_kwargs'): generated_image = final_message.additional_kwargs.get('generated_image') if generated_image: # COMMENTED OUT: Don't add image to summary schema # summary_schema.images = [generated_image] # Also set a title and summary based on the prompt prompt = final_message.additional_kwargs.get('prompt', '') summary_schema.title = f"Generated Image: {prompt[:50]}..." if len(prompt) > 50 else f"Generated Image: {prompt}" summary_schema.contentSummary = f"AI-generated image based on the prompt: '{prompt}'" # Format the response for Flux response = { "message": final_message.content or "Image generated successfully.", "messageType": final_message.__class__.__name__, "sessionId": session_id, "metadata": { "toolsUsed": False, "searchPerformed": False, "hasImage": bool(generated_image) }, "summarySchema": summary_schema.model_dump() } # Check if this is a Qwen Math model and we should parse thoughts elif model and "Qwen25Math" in model and final_message.content and "\\n" in final_message.content: # Parse the Qwen Math response which contains thoughts separated by \\n\\n parts = final_message.content.split("\\n") # First part is the main response main_response = parts[0] # Rest are thoughts thoughts = [] if len(parts) > 1: thoughts = [{"content": thought.strip(), "type": "thought"} for thought in parts[1:] if thought.strip()] # Update summarySchema with thoughts summary_schema.thoughts = thoughts # Format the response with parsed thoughts response = { "message": main_response, "messageType": final_message.__class__.__name__, "sessionId": session_id, "metadata": { "toolsUsed": result.get("toolCallCount", 0) > 0, "searchPerformed": len(result.get("context", [])) > 0, "hasThoughts": len(thoughts) > 0 }, "summarySchema": summary_schema.model_dump() } else: # Format the standard response response = { "message": final_message.content or "I couldn't generate a response. Please try again.", "messageType": final_message.__class__.__name__, "sessionId": session_id, "metadata": { "toolsUsed": result.get("toolCallCount", 0) > 0, "searchPerformed": len(result.get("context", [])) > 0, }, "summarySchema": summary_schema.model_dump() } return response except HTTPException: raise except Exception as e: import traceback print(f"Error processing chat: {e}") print(f"Full traceback:\\n{traceback.format_exc()}") raise HTTPException( status_code=500, detail=f"An error occurred while processing your message: {str(e)}" )`; fs.writeFileSync(path.join(chatPath, 'chat.py'), chatPy); } module.exports = { createChatFiles, createChatInit, createChat }