UNPKG

@guyycodes/plugin-sdk

Version:

AI-powered plugin scaffolding tool - Create full-stack applications with 7+ AI models, 50+ business integrations, and production-ready infrastructure

755 lines (607 loc) 29.7 kB
// inside the /server/agent folder there exist 2 files: // 1. state.py // 2. graph.py const fs = require('fs-extra'); const path = require('path'); const chalk = require('chalk'); async function createAgentFiles(serverPath) { console.log(chalk.blue('🤖 Creating agent files...')); // Create agent directory const agentPath = path.join(serverPath, 'agent'); fs.ensureDirSync(agentPath); // Create __init__.py await createAgentInit(agentPath); // Create README.md await createAgentReadme(agentPath); // Create state.py await createAgentState(agentPath); // Create graph.py await createAgentGraph(agentPath); console.log(chalk.green('✅ Agent files created successfully')); } async function createAgentInit(agentPath) { const initPy = `""" Agent package for LangGraph chatbot Provides state management and graph execution for the chatbot agent """`; fs.writeFileSync(path.join(agentPath, '__init__.py'), initPy); } async function createAgentReadme(agentPath) { const readmeMd = `\# System Architecture Summary This multi-agent orchestration system enables harmonizes AI workflows through modular, independent agents that communicate via API calls. Whether you run cloud based or local LLMs this architecture can enable integration. This readme primarily is focused on the python based backend. There exists a nodejs based backend, which is similar, but provides a smaller set of features. \*\*Key Features:\*\* - \*\*Modular Design\*\*: Each agent is self-contained and can be developed/deployed independently - \*\*Flexible Control\*\*: Three flags provide precise control over execution and visibility - \*\*API-Based Communication\*\*: Agents may run on different servers, use different languages, and scale independently and use custom tools, or existing MCP tools. - \*\*Context Preservation\*\*: SummarySchema carries metadata between agents while maintaining conversation history - \*\*Model Agnostic\*\*: Each agent can use different models (Ph4, Qwen, DeepSeek, GPT-4, Claude, Llama, Grok) and tools \*\*Example Workflows:\*\* - Research Assistant → Fact Checker → Report Writer - Web Scraper → Data Analyzer → Visualization Generator - Language Translator → Cultural Adapter → Tone Adjuster Each specialized agent hands off work seamlessly, creating a powerful team of AI assistants working together. \# Single node graph w/tools \`\`\`mermaid graph TD Start([Start]) --> analyze[analyze_query] analyze -->|needsWebSearch=False| generate_response[generate_response] analyze -->|needsWebSearch=True| agent_with_tools[agent_with_tools] agent_with_tools -->|has tool_calls| execute_tools[execute_tools] agent_with_tools -->|no tool_calls or<br/>exceeded limit| generate_response execute_tools --> agent_with_tools generate_response --> END([END]) style analyze fill:#e1f5fe style agent_with_tools fill:#fff3e0 style execute_tools fill:#f3e5f5 style generate_response fill:#e8f5e9 style END fill:#ffcdd2 \`\`\` \# Multi-Agent Orchestration Flow \`\`\`mermaid graph TD subgraph "Agent 1 - Web Search" Start1([Start]) --> A1_analyze[analyze_query] A1_analyze -->|needs tools| A1_tools[agent_with_tools] A1_analyze -->|no tools| A1_gen[generate_response] A1_tools --> A1_exec[execute_tools] A1_exec --> A1_tools A1_tools -->|done| A1_gen A1_gen -->|"Sets flags:<br/>agentCompleted=true<br/>shouldPassToNextAgent<br/>shouldShowResponseToUser"| E1([END]) end E1 -->|"Returns State"| Orchestrator1{{"app_state<br/>handle_agent_completion"}} Orchestrator1 -->|"shouldPassToNextAgent=false"| User1[User sees response] Orchestrator1 -->|"shouldPassToNextAgent=true<br/>shouldShowResponseToUser=true"| ShowAndPass[Show response + Pass to next] Orchestrator1 -->|"shouldPassToNextAgent=true<br/>shouldShowResponseToUser=false"| SilentPass[Silent handoff] ShowAndPass --> API1[["POST /chat<br/>to NEXT_AGENT_ADDRESS"]] SilentPass --> API1 API1 --> Start2([Start]) subgraph "Agent 2 - Process Web data" Start2 --> A2_analyze[analyze_query] A2_analyze -->|needs tools| A2_tools[agent_with_tools] A2_analyze -->|no tools| A2_gen[generate_response] A2_tools --> A2_exec[execute_tools] A2_exec --> A2_tools A2_tools -->|done| A2_gen A2_gen -->|"Sets flags:<br/>agentCompleted=true<br/>shouldPassToNextAgent<br/>shouldShowResponseToUser"| E2([END]) end E2 -->|"Returns State"| Orchestrator2{{"app_state<br/>handle_agent_completion"}} Orchestrator2 -->|"shouldPassToNextAgent=false"| User2[Final Response] Orchestrator2 -->|"Both flags true"| API2[["POST /chat<br/>to NEXT_AGENT_ADDRESS"]] API2 --> NextAgent[Next Agent...] style A1_gen fill:#e8f5e9 style A2_gen fill:#e8f5e9 style Orchestrator1 fill:#f0f4c3 style Orchestrator2 fill:#f0f4c3 style API1 fill:#fff3e0 style API2 fill:#fff3e0 style E1 fill:#ffcdd2 style E2 fill:#ffcdd2 style ShowAndPass fill:#e3f2fd style SilentPass fill:#f5f5f5 \`\`\` \# Data Flow Through the System \`\`\` 1. main.py → Receives POST /chat request 2. chat.py → Creates initial state, calls graph 3. graph.py → Runs LLM (for python runs: memory_optimizer, model_loader, entrypoint) (analyze → tools → response) 4. chat.py → Checks orchestration flags 5. app_state.py → If needed, forwards to next agent 6. chat.py → Formats final response 7. main.py → Returns JSON response to user \`\`\` \# Multi-Agent Orchestration - Each agent is a self-contained graph that processes independently and can pass state to the next agent via API calls. - Each agent has been enabled with its own tool wrapper, some agents were not built out of the box to allow the 'with_tools' tool calling ability, but they are capable of using tools through prompt engineering. *DeepHermes3 is an example of this.* \*\*Key Flags:\*\* - Agents can set these dynamically through manipulating their prompts, or by adding 1 line of code. - \`agentCompleted\`: Set to \`true\` by generate_response when the agent finishes - \`shouldPassToNextAgent\`: Controls whether state is passed to the next agent - \`shouldShowResponseToUser\`: Controls whether the user sees this agent's response \*\*Configuration:\*\* \`\`\`python \# Example 1: Show response AND pass to next agent (default) initial_state = { "messages": [HumanMessage(content="Find AI news")], "shouldPassToNextAgent": True, "shouldShowResponseToUser": True # User sees Agent 1's response } \# Example 2: Silent handoff - user only sees final response initial_state = { "messages": [HumanMessage(content="Process quietly")], "shouldPassToNextAgent": True, "shouldShowResponseToUser": False # Skip intermediate responses } \`\`\` \*\*Environment Setup:\*\* \`\`\`bash # Set the next agent's address export NEXT_AGENT_ADDRESS="http://localhost:3001" \`\`\` \*\*How It Works:\*\* 1. Agent completes its task and sets \`agentCompleted=true\` 2. If \`shouldPassToNextAgent=true\`, orchestration sends a POST request to \`NEXT_AGENT_ADDRESS\` 3. If \`shouldShowResponseToUser=true\`, the user sees the response before handoff 4. Each agent enriches the summarySchema before passing it along `; fs.writeFileSync(path.join(agentPath, 'README.md'), readmeMd); } async function createAgentState(agentPath) { const statePy = `""" State definition for the LangGraph agent """ from typing import List, TypedDict, Annotated, Dict, Optional from langgraph.graph import add_messages from langchain_core.messages import BaseMessage from pydantic import BaseModel class SummarySchema(BaseModel): """Summary schema for search results""" title: str = "" url: str = "" contentSummary: str = "" images: List[str] = [] sources: List[str] = [] files: List[str] = [] tokens_output: int = 0 tokens_input: int = 0 model: str = "" thoughts: List[Dict[str, str]] = [] # List of thoughts with content and type class State(TypedDict): """ The primary state of the agent, storing information between nodes """ # Messages track the primary execution state messages: Annotated[List[BaseMessage], add_messages] # Context accumulates information from web searches and other tools context: List[str] # Track the current search query for web searches searchQuery: str # Track whether a web search is needed needsWebSearch: bool # Maximum number of tool calls to prevent infinite loops maxToolCalls: int # Current count of tool calls toolCallCount: int # Summary schema for search results summarySchema: SummarySchema # Model to use for generation (optional, defaults to gpt-4o-mini) model: str # Session ID for WebSocket thought streaming session_id: Optional[str] # Multimodal inputs for models like Phi4 multimodal_inputs: Optional[Dict[str, List[str]]] # Flag indicating this agent has completed its work agentCompleted: Optional[bool] # Flag indicating state should be passed to next agent shouldPassToNextAgent: Optional[bool] # Flag indicating whether to show this agent's response to the user shouldShowResponseToUser: Optional[bool] `; fs.writeFileSync(path.join(agentPath, 'state.py'), statePy); } async function createAgentGraph(agentPath) { const graphPy = `""" LangGraph Chatbot with Web Search Capabilities This chatbot can answer questions, search the web, and maintain conversation context. """ import os import json import logging from typing import List, Dict, Any, Literal from datetime import datetime import re from langgraph.graph import StateGraph, END from langchain_openai import ChatOpenAI from langchain_core.messages import ( AIMessage, HumanMessage, SystemMessage, ToolMessage ) from agent.state import State, SummarySchema from config.env import validate_environment from models.model_loader import model_loader from app_state import app_state from tools.tools import web_search, calculator # Set up logging logger = logging.getLogger(__name__) # Validate environment variables validate_environment() # Configuration constants DEFAULT_MODEL = app_state.get_active_model() or "gpt-4o-mini" # Default model # Environment variables - now guaranteed to be available after validation OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") # Tool registry for easy extension AVAILABLE_TOOLS = { "web_search": web_search, "calculator": calculator, # Future tools can be added here: # "code_executor": code_executor_tool, } # Active tools list (can be configured based on needs) tools = [AVAILABLE_TOOLS["web_search"], AVAILABLE_TOOLS["calculator"]] async def analyze_query(state: State) -> Dict[str, Any]: """ Analyze the user's query to determine if web search is needed. This is a lightweight classification step. """ messages = state["messages"] model_name = state.get("model", DEFAULT_MODEL) # For models that don't use tools, skip web search if model_name in ["phi4", "Flux", "FluxKontext", "Qwen25Math", "Qwen25VL"]: return {"needsWebSearch": False} # Get the last human message user_query = None for msg in reversed(messages): if isinstance(msg, HumanMessage): user_query = msg.content break if not user_query: return {"needsWebSearch": False} # Use a cheaper/faster model for classification if available classifier_llm = model_loader.get_model("gpt-4o-mini", temperature=0) classification_prompt = f"""Analyze this query and determine if it requires tools to answer properly. User query: {user_query} Determine if this query needs: 1. Web search for current events information Respond with ONLY 'YES' if any tool is needed. Respond with ONLY 'NO' if it can be answered without tools. Response:""" try: response = await classifier_llm.ainvoke([SystemMessage(content=classification_prompt)]) needs_search = "yes" in response.content.lower() logger.debug(f"Query classification - Needs tools: {needs_search}") return {"needsWebSearch": needs_search} except Exception as e: logger.error(f"Error in query classification: {e}") # Default to not needing search on error return {"needsWebSearch": False} async def call_agent_with_tools(state: State) -> Dict[str, Any]: """ Agent node that uses tools to gather information. This only runs if web search is needed. """ messages = state["messages"] model_name = state.get("model", DEFAULT_MODEL) # Get the appropriate LLM with tools llm = model_loader.get_model(model_name, temperature=0) llm_with_tools = llm.bind_tools(tools) # The tool adapter will inject detailed tool instructions # For wrapped models (DeepHermes3, Qwen25Math), the tool adapter handles everything # For native tool models (GPT-4), we just need to ensure they know to use tools # Check if this is a wrapped model model_type = getattr(llm, '_llm_type', '') is_wrapped = 'tool_wrapper' in model_type # Check if we already have tool results (to prevent loops) has_tool_results = any(msg.type == "tool" for msg in messages) messages_to_send = messages.copy() # For wrapped models with tool results, add explicit instruction to stop if is_wrapped and has_tool_results: # Count how many times we've called tools tool_call_count = state.get("toolCallCount", 0) # control this with a dynamic variable to manage tool use for multiple turns. if tool_call_count >= 1: stop_instruction = SystemMessage( content="""IMPORTANT: You have already called tools and received results. DO NOT call any more tools directly. You MUST respond with ONE of these exact phrases: - "Search Again" - if you need more information to fully answer the user's question - "Tool results received" - if you have sufficient information to answer Do not include any other text. Do not synthesize or provide a final answer until told otherwise.""" ) # Add this right before invoking the model messages_to_send.append(stop_instruction) # For native tool support models (like GPT-4), add a hint elif not is_wrapped and model_name.startswith('gpt-'): # Add a system message encouraging tool use if not has_tool_results: tool_encouragement = SystemMessage( content="""You have access to tools that can help answer this query. The system has identified that tools are needed for this request. USE THEM to get the most accurate and up-to-date information.""" ) # Insert before the last user message if messages: messages_to_send = messages[:-1] + [tool_encouragement] + [messages[-1]] # For wrapped models, the tool adapter will handle everything ######################################################################################### # Invoke the model with tools # Pass session_id if available for DeepHermes3 thought streaming websockets session_id = state.get("session_id") if session_id and model_name == "DeepHermes3": response = await llm_with_tools.ainvoke(messages_to_send, session_id=session_id) else: response = await llm_with_tools.ainvoke(messages_to_send) ######################################################################################### # Update token counts updated_summary_schema = state.get("summarySchema", SummarySchema()) updated_summary_schema.model = model_name if hasattr(response, 'response_metadata') and 'token_usage' in response.response_metadata: token_usage = response.response_metadata['token_usage'] updated_summary_schema.tokens_input = token_usage.get('prompt_tokens', 0) updated_summary_schema.tokens_output = token_usage.get('completion_tokens', 0) elif hasattr(response, 'usage_metadata'): updated_summary_schema.tokens_input = getattr(response.usage_metadata, 'input_tokens', 0) updated_summary_schema.tokens_output = getattr(response.usage_metadata, 'output_tokens', 0) return { "messages": [response], "summarySchema": updated_summary_schema } async def execute_tools(state: State) -> Dict[str, Any]: """ Tool execution node - same as before but cleaner """ last_message = state["messages"][-1] if not isinstance(last_message, AIMessage) or not last_message.tool_calls: return {"messages": []} summary_schema = state.get("summarySchema", SummarySchema()) tool_messages = [] for tool_call in last_message.tool_calls: tool_name = tool_call.get("name") if isinstance(tool_call, dict) else tool_call.name tool_args = tool_call.get("args") if isinstance(tool_call, dict) else tool_call.args tool_id = tool_call.get("id") if isinstance(tool_call, dict) else tool_call.id tool_fn = None for t in tools: if t.name == tool_name: tool_fn = t break if not tool_fn: tool_messages.append( ToolMessage( content=f"Error: Tool {tool_name} not found", tool_call_id=tool_id ) ) continue try: result = await tool_fn.ainvoke(tool_args) # Parse search results for summary schema if tool_name == "web_search": try: parsed_result = json.loads(result) if parsed_result.get("results") and len(parsed_result["results"]) > 0: first_result = parsed_result["results"][0] summary_schema.title = parsed_result.get("query", "Web Search Results") summary_schema.contentSummary = f"Found {len(parsed_result['results'])} results" summary_schema.sources = [r.get("url", "") for r in parsed_result["results"]] except Exception as e: logger.error(f"Error parsing search results: {e}") tool_messages.append( ToolMessage( content=result, tool_call_id=tool_id ) ) except Exception as e: tool_messages.append( ToolMessage( content=f"Error executing tool: {str(e)}", tool_call_id=tool_id ) ) return { "messages": tool_messages, "toolCallCount": state.get("toolCallCount", 0) + len(tool_messages), "context": [msg.content for msg in tool_messages], "summarySchema": summary_schema, } async def generate_response(state: State) -> Dict[str, Any]: """ Generate the final response based on all gathered information. This node always runs and produces the final answer. """ messages = state["messages"] model_name = state.get("model", DEFAULT_MODEL) context = state.get("context", []) # Handle multimodal inputs for multimodal models (phi4, Qwen25VL, FluxKontext) multimodal_inputs = state.get("multimodal_inputs") multimodal_models = ["phi4", "Qwen25VL", "FluxKontext"] if model_name in multimodal_models and multimodal_inputs: # Get model with appropriate settings if model_name == "phi4": llm = model_loader.get_model(model_name, temperature=0, disable_hd_transform=False) else: llm = model_loader.get_model(model_name, temperature=0) # Handle different multimodal interfaces if model_name == "FluxKontext" and hasattr(llm, 'with_images'): images = multimodal_inputs.get("images", []) if images: llm = llm.with_images(images=images) elif hasattr(llm, 'with_multimodal_inputs'): images = multimodal_inputs.get("images", []) audios = multimodal_inputs.get("audios", []) llm = llm.with_multimodal_inputs(images=images, audios=audios) else: llm = model_loader.get_model(model_name, temperature=0) # Check if we have search results to incorporate search_results_text = "" has_search_results = False if context and any("web_search" in str(msg) for msg in messages if isinstance(msg, AIMessage)): # We have search results - format them nicely has_search_results = True for ctx in context: try: search_data = json.loads(ctx) if search_data.get("results"): search_results_text = f"\\n\\nSearch Results for '{search_data.get('query', 'your query')}':\\n" for i, result in enumerate(search_data["results"], 1): search_results_text += f"\\n{i}. {result['title']}\\n" search_results_text += f" URL: {result['url']}\\n" search_results_text += f" Content: {result['contentSummary']}\\n" except Exception: search_results_text = f"\\n\\nSearch context: {ctx}\\n" # Prepare messages based on whether we have search results messages_to_send = messages.copy() # For models that don't have their own system prompts or when we have search results if has_search_results and model_name not in ["phi4", "Flux", "FluxKontext"]: # Add search results as a system message for context # For DeepHermes3, encourage thinking when synthesizing results if model_name == "DeepHermes3": search_context_message = SystemMessage( content=f""" You are a Deep Thinking Agent. You have recieved the following web search results:\n {search_results_text}\n IMPORTANT:\n 1. Never mention use of internal tools such as web search tool.\n 2. Do NOT call any more tools.\n 3. ALWAYS Use <think> tags: <think> Your reasoning steps based on the results above go here...</think>.\n 4. CITE YOUR SOURCES at the end of your response.\n INSTRUCTIONS:\n 1. Reason about the results, then promptly synthesize your final answer.\n 2. DO NOT ignore the tool results. DO NOT make up information\n \n """ ) else: search_context_message = SystemMessage( content=f"""Based on the following web search results, provide a comprehensive answer. Prioritize information from these search results over your training data. Cite sources always. {search_results_text}""" ) # Insert the search context before the last message messages_to_send = messages[:-1] + [search_context_message] + [messages[-1]] # Invoke the model # Pass session_id if available for DeepHermes3 thought streaming websockets ######################################################################################### session_id = state.get("session_id") if session_id and model_name == "DeepHermes3": response = await llm.ainvoke(messages_to_send, session_id=session_id) else: response = await llm.ainvoke(messages_to_send) ######################################################################################### # Update summary schema updated_summary_schema = state.get("summarySchema", SummarySchema()) updated_summary_schema.model = model_name # Extract token usage if hasattr(response, 'response_metadata') and 'token_usage' in response.response_metadata: token_usage = response.response_metadata['token_usage'] updated_summary_schema.tokens_input += token_usage.get('prompt_tokens', 0) updated_summary_schema.tokens_output += token_usage.get('completion_tokens', 0) elif hasattr(response, 'usage_metadata'): updated_summary_schema.tokens_input += getattr(response.usage_metadata, 'input_tokens', 0) updated_summary_schema.tokens_output += getattr(response.usage_metadata, 'output_tokens', 0) # Handle image generation/modification responses if hasattr(response, 'additional_kwargs') and 'generated_image' in response.additional_kwargs: generated_image = response.additional_kwargs.get('generated_image') prompt = response.additional_kwargs.get('prompt', '') input_image = response.additional_kwargs.get('input_image', None) if generated_image: updated_summary_schema.images = [generated_image] if input_image: # This is an image modification (FluxKontext) updated_summary_schema.title = f"Modified Image: {prompt[:50]}..." if len(prompt) > 50 else f"Modified Image: {prompt}" updated_summary_schema.contentSummary = f"AI-modified image based on the prompt: '{prompt}'" else: # This is a new image generation (Flux) updated_summary_schema.title = f"Generated Image: {prompt[:50]}..." if len(prompt) > 50 else f"Generated Image: {prompt}" updated_summary_schema.contentSummary = f"AI-generated image based on the prompt: '{prompt}'" # Extract URLs from response for sources if hasattr(response, 'content'): url_pattern = r'https?://[^\s)"]+' sources = re.findall(url_pattern, response.content) if sources: updated_summary_schema.sources.extend(sources) # DUAL PURPOSE RETURN: # 1. Update messages for user response # 2. summarySchema is automatically exposed when graph ends return { "messages": [response], "summarySchema": updated_summary_schema, # Mark that this agent has completed its work "agentCompleted": True, # Default to not passing to next agent (can be overridden by caller) "shouldPassToNextAgent": state.get("shouldPassToNextAgent", False), # Default to showing response to user (can be overridden by caller) "shouldShowResponseToUser": state.get("shouldShowResponseToUser", True) } def should_use_tools(state: State) -> Literal["tools", "response"]: """ Routing function: Determines whether to use tools or go directly to response """ if state.get("needsWebSearch", False): return "tools" return "response" def should_continue_tools(state: State) -> Literal["execute", "response"]: """ Routing function: After calling agent with tools, check if we need to execute them """ last_message = state["messages"][-1] # Check if we just processed tool results if (state.get("toolCallCount", 0) > 0 and isinstance(last_message, AIMessage) and last_message.content): content_lower = last_message.content.lower().strip() # use these flags to control tool use for multiple turns. # If agent wants more searches, reset to analyze phase # if "search again" in content_lower: # # The agent wants to search again - this will trigger a new analyze phase # return "response" # If agent has enough information, proceed to final response if "tool results received" in content_lower: return "response" # This goes to generate_response for final answer # Check if there are tool calls and we haven't exceeded the limit if (isinstance(last_message, AIMessage) and hasattr(last_message, 'tool_calls') and last_message.tool_calls and state.get("toolCallCount", 0) < state.get("maxToolCalls", 5)): return "execute" # No tool calls or exceeded limit - generate response return "response" # Build the graph builder = StateGraph(State) # Add nodes builder.add_node("analyze", analyze_query) builder.add_node("agent_with_tools", call_agent_with_tools) builder.add_node("execute_tools", execute_tools) builder.add_node("generate_response", generate_response) # Set entry point builder.set_entry_point("analyze") # Add edges # From analyze, route based on whether we need web search # returns "tools" if web search is needed, "response" if not builder.add_conditional_edges("analyze", should_use_tools, { "tools": "agent_with_tools", "response": "generate_response" }) # From agent_with_tools, either execute tools or generate response builder.add_conditional_edges("agent_with_tools", should_continue_tools, { "execute": "execute_tools", "response": "generate_response" }) # After executing tools, go back to agent to process results builder.add_edge("execute_tools", "agent_with_tools") # Generate response always leads to end builder.add_edge("generate_response", END) # Compile the graph graph = builder.compile() # Set the graph name graph.name = "Chatbot Agent"`; fs.writeFileSync(path.join(agentPath, 'graph.py'), graphPy); } module.exports = { createAgentFiles, createAgentState, createAgentGraph };