@guyycodes/plugin-sdk
Version:
AI-powered plugin scaffolding tool - Create full-stack applications with 7+ AI models, 50+ business integrations, and production-ready infrastructure
397 lines (333 loc) • 16.8 kB
JavaScript
// inside the /server/chat folder there exist 1 file:
// 1. chat.py
const fs = require('fs-extra');
const path = require('path');
const chalk = require('chalk');
async function createChatFiles(serverPath) {
console.log(chalk.blue('🤖 Creating chat files...'));
// Create agent directory
const chatPath = path.join(serverPath, 'chat');
fs.ensureDirSync(chatPath);
// Create __init__.py
await createChatInit(chatPath);
// Create chat.py
await createChat(chatPath);
console.log(chalk.green('✅ Chat files created successfully'));
}
async function createChatInit(chatPath) {
const initPy = `"""
Chat package for LangGraph chatbot
Provides state management and graph execution for the chatbot chat
"""`;
fs.writeFileSync(path.join(chatPath, '__init__.py'), initPy);
}
async function createChat(chatPath) {
const chatPy = `"""
Chat handling module for FastAPI
"""
import json
from typing import Dict, Any, Optional, List, Union
from fastapi import HTTPException
from fastapi.responses import StreamingResponse
from langchain_core.messages import HumanMessage, AIMessage, ToolMessage
from app_state import app_state
from agent.graph import graph
from agent.state import SummarySchema
from PIL import Image
import requests
from io import BytesIO
import base64
import logging
import uuid
from models.model_manager import model_manager
logger = logging.getLogger(__name__)
def preprocess_image_for_phi4(image_input: str, min_size: int = 448, max_size: int = 1344) -> str:
"""
Preprocess an image to ensure it meets size requirements for Phi4.
Optimized for maximum quality using Phi4's full 64-patch capacity.
Phi4 processes images in 448×448 patches and supports up to 64 patches (8×8 grid).
higher quaility has variable results
Resolution vs Quality trade-offs:
- 896 (2×448): 4 patches - Fast, but poor for documents
- 1344 (3×448): 9 patches - Basic quality
- 1792 (4×448): 16 patches - Good quality
- 2240 (5×448): 25 patches - Very good quality
- 2688 (6×448): 36 patches - Excellent quality
- 3136 (7×448): 49 patches - Near maximum quality
- 3584 (8×448): 64 patches - Maximum quality (uses all available patches)
Args:
image_input: Image as URL, base64 string, or file path
min_size: Minimum dimension size (default 448 for Phi4)
max_size: Maximum dimension size (default 3584 = 448*8 for maximum quality)
Returns:
Base64 encoded image string that meets size requirements
"""
try:
# Load the image
if isinstance(image_input, str):
if image_input.startswith('http'):
# URL
response = requests.get(image_input)
pil_img = Image.open(BytesIO(response.content))
elif image_input.startswith('data:image'):
# Base64
base64_data = image_input.split(',')[1]
pil_img = Image.open(BytesIO(base64.b64decode(base64_data)))
else:
# File path
pil_img = Image.open(image_input)
else:
pil_img = image_input
# Convert to RGB if necessary
if pil_img.mode not in ('RGB', 'L'):
pil_img = pil_img.convert('RGB')
width, height = pil_img.size
# Calculate scale factor to ensure min_size <= dimensions <= max_size
# First, ensure minimum size
scale_factor = max(min_size / width, min_size / height)
# Then, check if this would exceed maximum size
new_width = width * scale_factor
new_height = height * scale_factor
if new_width > max_size or new_height > max_size:
# Scale down to fit within max_size
scale_factor = min(max_size / width, max_size / height)
# Apply final scale
new_width = int(width * scale_factor)
new_height = int(height * scale_factor)
# Ensure we still meet minimum requirements
if new_width < min_size or new_height < min_size:
# This means the aspect ratio is extreme and we need to letterbox
if width > height:
new_width = max_size
new_height = min_size
else:
new_width = min_size
new_height = max_size
# Create letterbox
letterboxed = Image.new('RGB', (new_width, new_height), color='white')
# Scale image to fit
scale = min(new_width / width, new_height / height)
resize_width = int(width * scale)
resize_height = int(height * scale)
resized = pil_img.resize((resize_width, resize_height), Image.Resampling.LANCZOS)
# Center paste
paste_x = (new_width - resize_width) // 2
paste_y = (new_height - resize_height) // 2
letterboxed.paste(resized, (paste_x, paste_y))
final_image = letterboxed
else:
# Simple resize without letterboxing
final_image = pil_img.resize((new_width, new_height), Image.Resampling.LANCZOS)
# Convert to base64
buffered = BytesIO()
final_image.save(buffered, format="PNG")
img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
logger.info(f"Preprocessed image from {width}x{height} to {final_image.size[0]}x{final_image.size[1]}")
return f"data:image/png;base64,{img_base64}"
except Exception as e:
logger.error(f"Error preprocessing image: {e}")
# Return original if preprocessing fails
return image_input
async def handle_chat(message: str, session_id: Optional[str] = None, model: Optional[str] = None,
images: Optional[List[str]] = None, audios: Optional[List[str]] = None) -> Dict[str, Any]:
"""
Handle chat messages
Args:
message: The user's message
session_id: Optional session ID for conversation threading
model: Optional model name (e.g., 'gpt-4o-mini', 'Qwen25Math', 'phi4')
images: Optional list of images (URLs, base64, or file paths)
audios: Optional list of audio files (URLs, base64, or file paths)
"""
try:
if not message:
raise HTTPException(status_code=400, detail="Message is required")
session_id = session_id or "default"
print(f"Processing message for session {session_id}: {message}")
if model:
print(f"Using model: {model}")
# Only set active model if it's different to avoid cache clearing
if app_state.get_active_model() != model:
app_state.set_active_model(model)
else:
model = app_state.get_active_model()
print(f"Using active model: {app_state.get_active_model()}")
# Create the initial state with the user's message
initial_state = {
"messages": [HumanMessage(content=message)],
"context": [],
"searchQuery": "",
"needsWebSearch": False,
"maxToolCalls": 2,
"toolCallCount": 0,
"summarySchema": SummarySchema(),
"model": model or "gpt-4o-mini", # Default to gpt-4o-mini if not specified
"session_id": session_id # Add session_id to state
}
# Add multimodal inputs to state if using multimodal models
multimodal_models = ["phi4", "Qwen25VL", "FluxKontext"]
if model in multimodal_models and (images or audios):
# Preprocess images to ensure they meet minimum size requirements
processed_images = []
if images:
for img in images:
# Only preprocess for phi4, Qwen25VL can handle images directly
if model == "phi4":
processed_img = preprocess_image_for_phi4(img)
processed_images.append(processed_img)
else:
processed_images.append(img)
logger.info(f"Processed {len(processed_images)} images for {model}")
initial_state["multimodal_inputs"] = {
"images": processed_images,
"audios": audios or []
}
# Run the graph
result = await graph.ainvoke(initial_state, {
"configurable": {
"thread_id": session_id
}
})
##################################################################################################################
# Call orchestration logic to check if we should pass to next agent
orchestration_result = await app_state.handle_agent_completion(result)
if orchestration_result["passed"]:
logger.info(f"State was passed to next agent in chat.py: {orchestration_result}")
# Check if we should show the response to the user
if not result.get("shouldShowResponseToUser", True):
# Don't show response to user, just return orchestration info
return {
"message": "Request processed and forwarded to next agent.",
"messageType": "SystemMessage",
"sessionId": session_id,
"metadata": {
"passedToNextAgent": True,
"nextAgentResponse": orchestration_result.get("next_agent_response")
}
}
# If shouldShowResponseToUser is True, continue to format and return the response below
##################################################################################################################
# Debug: Log the messages to understand the flow
print("Message flow:")
for idx, msg in enumerate(result["messages"]):
msg_type = msg.__class__.__name__
content_preview = msg.content[:100] if msg.content else "(no content)"
tool_calls = hasattr(msg, 'tool_calls') and msg.tool_calls
print(f"{idx}: {msg_type} - {content_preview}{'...' if len(msg.content or '') > 100 else ''}{' [has tool calls]' if tool_calls else ''}")
# Extract the final message from the result
messages = result["messages"]
# Find the last AI message with actual content and no pending tool calls
final_message = messages[-1]
for i in range(len(messages) - 1, -1, -1):
msg = messages[i]
if isinstance(msg, AIMessage) and msg.content:
# Check if it's an AI message with tool calls
if not hasattr(msg, 'tool_calls') or not msg.tool_calls:
final_message = msg
break
# If we couldn't find a complete AI response, there might be an issue
if not final_message.content or (isinstance(final_message, AIMessage) and hasattr(final_message, 'tool_calls') and final_message.tool_calls):
print("Warning: Could not find a complete AI response. Last message has tool calls or no content.")
# Get the summarySchema
summary_schema = result.get("summarySchema", SummarySchema())
# Check if this is a multimodal model with multimodal inputs
multimodal_models = ["phi4", "Qwen25VL", "FluxKontext"]
if model in multimodal_models and "multimodal_inputs" in result:
# Update summarySchema with multimodal information
multimodal_inputs = result["multimodal_inputs"]
had_images = len(multimodal_inputs.get("images", [])) > 0
had_audio = len(multimodal_inputs.get("audios", [])) > 0
if had_images or had_audio:
if model == "Qwen25VL":
summary_schema.contentSummary = f"Vision-language response: analyzed {len(multimodal_inputs.get('images', []))} image(s)"
else:
summary_schema.contentSummary = f"Multimodal response: {'images' if had_images else ''}{' and ' if had_images and had_audio else ''}{'audio' if had_audio else ''} processed"
# Format the response for multimodal models
response = {
"message": final_message.content or "I've processed your multimodal input.",
"messageType": final_message.__class__.__name__,
"sessionId": session_id,
"metadata": {
"toolsUsed": result.get("toolCallCount", 0) > 0,
"searchPerformed": len(result.get("context", [])) > 0,
"hadImages": had_images,
"hadAudio": had_audio
},
"summarySchema": summary_schema.model_dump()
}
# Check if this is a Flux model and we have generated images
elif model and "Flux" in model and hasattr(final_message, 'additional_kwargs'):
generated_image = final_message.additional_kwargs.get('generated_image')
if generated_image:
# COMMENTED OUT: Don't add image to summary schema
# summary_schema.images = [generated_image]
# Also set a title and summary based on the prompt
prompt = final_message.additional_kwargs.get('prompt', '')
summary_schema.title = f"Generated Image: {prompt[:50]}..." if len(prompt) > 50 else f"Generated Image: {prompt}"
summary_schema.contentSummary = f"AI-generated image based on the prompt: '{prompt}'"
# Format the response for Flux
response = {
"message": final_message.content or "Image generated successfully.",
"messageType": final_message.__class__.__name__,
"sessionId": session_id,
"metadata": {
"toolsUsed": False,
"searchPerformed": False,
"hasImage": bool(generated_image)
},
"summarySchema": summary_schema.model_dump()
}
# Check if this is a Qwen Math model and we should parse thoughts
elif model and "Qwen25Math" in model and final_message.content and "\\n" in final_message.content:
# Parse the Qwen Math response which contains thoughts separated by \\n\\n
parts = final_message.content.split("\\n")
# First part is the main response
main_response = parts[0]
# Rest are thoughts
thoughts = []
if len(parts) > 1:
thoughts = [{"content": thought.strip(), "type": "thought"} for thought in parts[1:] if thought.strip()]
# Update summarySchema with thoughts
summary_schema.thoughts = thoughts
# Format the response with parsed thoughts
response = {
"message": main_response,
"messageType": final_message.__class__.__name__,
"sessionId": session_id,
"metadata": {
"toolsUsed": result.get("toolCallCount", 0) > 0,
"searchPerformed": len(result.get("context", [])) > 0,
"hasThoughts": len(thoughts) > 0
},
"summarySchema": summary_schema.model_dump()
}
else:
# Format the standard response
response = {
"message": final_message.content or "I couldn't generate a response. Please try again.",
"messageType": final_message.__class__.__name__,
"sessionId": session_id,
"metadata": {
"toolsUsed": result.get("toolCallCount", 0) > 0,
"searchPerformed": len(result.get("context", [])) > 0,
},
"summarySchema": summary_schema.model_dump()
}
return response
except HTTPException:
raise
except Exception as e:
import traceback
print(f"Error processing chat: {e}")
print(f"Full traceback:\\n{traceback.format_exc()}")
raise HTTPException(
status_code=500,
detail=f"An error occurred while processing your message: {str(e)}"
)`;
fs.writeFileSync(path.join(chatPath, 'chat.py'), chatPy);
}
module.exports = {
createChatFiles,
createChatInit,
createChat
}