UNPKG

orgo

Version:

Computers for AI agents

903 lines (889 loc) 32.4 kB
'use strict'; var fetch2 = require('cross-fetch'); var Anthropic = require('@anthropic-ai/sdk'); function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; } var fetch2__default = /*#__PURE__*/_interopDefault(fetch2); var Anthropic__default = /*#__PURE__*/_interopDefault(Anthropic); var __defProp = Object.defineProperty; var __getOwnPropNames = Object.getOwnPropertyNames; var __esm = (fn, res) => function __init() { return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res; }; var __export = (target, all) => { for (var name in all) __defProp(target, name, { get: all[name], enumerable: true }); }; // node_modules/tsup/assets/cjs_shims.js var init_cjs_shims = __esm({ "node_modules/tsup/assets/cjs_shims.js"() { } }); // src/utils/auth.ts function getApiKey(apiKey) { const key = apiKey || process.env.ORGO_API_KEY; if (!key) { throw new Error( "API key required. Set ORGO_API_KEY environment variable or pass apiKey parameter. Get a key at https://www.orgo.ai/start" ); } return key; } var init_auth = __esm({ "src/utils/auth.ts"() { init_cjs_shims(); } }); // src/errors.ts exports.OrgoError = void 0; exports.ApiError = void 0; exports.ConfigError = void 0; var init_errors = __esm({ "src/errors.ts"() { init_cjs_shims(); exports.OrgoError = class _OrgoError extends Error { constructor(message) { super(message); this.name = "OrgoError"; Object.setPrototypeOf(this, _OrgoError.prototype); } }; exports.ApiError = class _ApiError extends exports.OrgoError { constructor(message, statusCode, response) { super(message); this.name = "ApiError"; this.statusCode = statusCode; this.response = response; Object.setPrototypeOf(this, _ApiError.prototype); } }; exports.ConfigError = class _ConfigError extends exports.OrgoError { constructor(message) { super(message); this.name = "ConfigError"; Object.setPrototypeOf(this, _ConfigError.prototype); } }; } }); var ApiClient; var init_client = __esm({ "src/api/client.ts"() { init_cjs_shims(); init_auth(); init_errors(); ApiClient = class { constructor(apiKey, baseUrl) { this.apiKey = getApiKey(apiKey); this.baseUrl = baseUrl || "https://www.orgo.ai/api"; this.headers = { "Authorization": `Bearer ${this.apiKey}`, "Content-Type": "application/json", "Accept": "application/json" }; } async request(method, endpoint, data) { const url = `${this.baseUrl}/${endpoint}`; try { const options = { method, headers: this.headers }; if (method === "GET" && data) { const params = new URLSearchParams(data); const urlWithParams = `${url}?${params}`; const response2 = await fetch2__default.default(urlWithParams, options); return this.handleResponse(response2); } else if (data) { options.body = JSON.stringify(data); } const response = await fetch2__default.default(url, options); return this.handleResponse(response); } catch (error) { if (error instanceof exports.ApiError) { throw error; } throw new exports.ApiError(`Connection error: ${error instanceof Error ? error.message : "Unknown error"}`); } } async handleResponse(response) { if (!response.ok) { let errorMessage = `API error: ${response.status}`; try { const errorData = await response.json(); if (errorData.error) { errorMessage += ` - ${errorData.error}`; } } catch { } throw new exports.ApiError(errorMessage, response.status, response); } return response.json(); } // Computer lifecycle methods async createComputer(config) { const payload = {}; if (config) { payload.config = config; } return this.request("POST", "projects", Object.keys(payload).length > 0 ? payload : void 0); } async connectComputer(projectId) { return this.request("GET", `projects/by-name/${projectId}`); } async getStatus(projectId) { return this.request("GET", `projects/by-name/${projectId}`); } async startComputer(projectName) { const project = await this.getStatus(projectName); const projectId = project.id; if (!projectId) { throw new Error(`Could not find ID for project ${projectName}`); } return this.request("POST", `projects/${projectId}/start`); } async stopComputer(projectName) { const project = await this.getStatus(projectName); const projectId = project.id; if (!projectId) { throw new Error(`Could not find ID for project ${projectName}`); } return this.request("POST", `projects/${projectId}/stop`); } async restartComputer(projectName) { const project = await this.getStatus(projectName); const projectId = project.id; if (!projectId) { throw new Error(`Could not find ID for project ${projectName}`); } return this.request("POST", `projects/${projectId}/restart`); } async deleteComputer(projectName) { const project = await this.getStatus(projectName); const projectId = project.id; if (!projectId) { throw new Error(`Could not find ID for project ${projectName}`); } return this.request("POST", `projects/${projectId}/delete`); } // Computer control methods async leftClick(projectId, x, y) { return this.request("POST", `computers/${projectId}/click`, { button: "left", x, y }); } async rightClick(projectId, x, y) { return this.request("POST", `computers/${projectId}/click`, { button: "right", x, y }); } async doubleClick(projectId, x, y) { return this.request("POST", `computers/${projectId}/click`, { button: "left", x, y, double: true }); } async drag(projectId, startX, startY, endX, endY, button = "left", duration = 0.5) { return this.request("POST", `computers/${projectId}/drag`, { start_x: startX, start_y: startY, end_x: endX, end_y: endY, button, duration }); } async scroll(projectId, direction, amount) { return this.request("POST", `computers/${projectId}/scroll`, { direction, amount }); } async typeText(projectId, text) { return this.request("POST", `computers/${projectId}/type`, { text }); } async keyPress(projectId, key) { return this.request("POST", `computers/${projectId}/key`, { key }); } async getScreenshot(projectId) { return this.request("GET", `computers/${projectId}/screenshot`); } async executeBash(projectId, command) { return this.request("POST", `computers/${projectId}/bash`, { command }); } async executePython(projectId, code, timeout = 10) { return this.request("POST", `computers/${projectId}/exec`, { code, timeout }); } async wait(projectId, seconds) { return this.request("POST", `computers/${projectId}/wait`, { seconds }); } // Streaming methods async startStream(projectId, connectionName) { return this.request("POST", `computers/${projectId}/stream/start`, { connection_name: connectionName }); } async stopStream(projectId) { return this.request("POST", `computers/${projectId}/stream/stop`); } async getStreamStatus(projectId) { return this.request("GET", `computers/${projectId}/stream/status`); } }; } }); var AnthropicProvider; var init_anthropic = __esm({ "src/providers/anthropic.ts"() { init_cjs_shims(); init_client(); AnthropicProvider = class { constructor() { try { this.anthropic = Anthropic__default.default; } catch (error) { throw new Error( "Anthropic SDK not installed. Please install with 'npm install @anthropic-ai/sdk'" ); } } async execute(options) { const { computerId, instruction, callback, apiKey = process.env.ANTHROPIC_API_KEY, model = "claude-3-7-sonnet-20250219", displayWidth = 1024, displayHeight = 768, orgoApiKey, orgoBaseUrl, maxSavedScreenshots = 2, thinkingEnabled = false, thinkingBudget = 1024, maxTokens = 4096, maxIterations = 20 } = options; if (!apiKey) { throw new Error( "No Anthropic API key provided. Set ANTHROPIC_API_KEY environment variable or pass apiKey." ); } const client = new this.anthropic({ apiKey }); const messages = [ { role: "user", content: instruction } ]; const systemPrompt = this.getSystemPrompt(displayWidth, displayHeight); try { const tools = [ { type: "computer_20250124", name: "computer", display_width_px: displayWidth, display_height_px: displayHeight, display_number: 1 } ]; if (callback) { callback("status", "Starting conversation with Claude"); } const apiClient = new ApiClient(orgoApiKey, orgoBaseUrl); let iteration = 0; let screenshotCount = 0; while (iteration < maxIterations) { iteration++; if (screenshotCount > maxSavedScreenshots) { this.filterToNMostRecentImages(messages, maxSavedScreenshots); screenshotCount = maxSavedScreenshots; } const requestParams = { model, max_tokens: maxTokens, system: systemPrompt, messages, tools, betas: ["computer-use-2025-01-24"] }; if (thinkingEnabled) { requestParams.thinking = { type: "enabled", budget_tokens: thinkingBudget }; } try { const response = await client.beta.messages.create(requestParams); const responseContent = response.content; const assistantMessage = { role: "assistant", content: responseContent }; messages.push(assistantMessage); for (const block of responseContent) { if (block.type === "text" && callback) { callback("text", block.text); } else if (block.type === "thinking" && callback) { callback("thinking", block.thinking); } else if (block.type === "tool_use" && callback) { const toolParams = { action: block.name.split(".").pop(), ...block.input }; callback("tool_use", toolParams); } } const toolResults = []; for (const block of responseContent) { if (block.type === "tool_use") { const result = await this.executeTool( computerId, block.input, callback, apiClient ); const toolResult = { type: "tool_result", tool_use_id: block.id, content: [] }; if (typeof result === "object" && result !== null && "type" in result && result.type === "image") { toolResult.content = [result]; if (block.input.action === "screenshot") { screenshotCount++; } } else { toolResult.content = [{ type: "text", text: String(result) }]; } toolResults.push(toolResult); } } if (toolResults.length === 0) { if (callback) { callback("status", "Task completed"); } return messages; } messages.push({ role: "user", content: toolResults }); } catch (error) { if (error instanceof Error && error.message.toLowerCase().includes("base64")) { if (callback) { callback("error", "Base64 error detected. Attempting recovery..."); } this.filterToNMostRecentImages(messages, 1); continue; } throw error; } } if (callback) { callback("status", `Reached maximum iterations (${maxIterations})`); } return messages; } catch (error) { if (callback) { callback("error", error instanceof Error ? error.message : String(error)); } throw error; } } getSystemPrompt(displayWidth, displayHeight) { return `You are Claude, an AI assistant that controls a virtual Ubuntu computer with internet access. <SYSTEM_CAPABILITY> * You are utilising an Ubuntu virtual machine with a display resolution of ${displayWidth}x${displayHeight}. * You can take screenshots to see the current state and control the computer by clicking, typing, pressing keys, and scrolling. * The virtual environment is an Ubuntu system with standard applications. * Always start by taking a screenshot to see the current state before performing any actions. </SYSTEM_CAPABILITY> <UBUNTU_DESKTOP_GUIDELINES> * CRITICAL INSTRUCTION: When opening applications or files on the Ubuntu desktop, you MUST USE DOUBLE-CLICK rather than single-click. * Single-click only selects desktop icons but DOES NOT open them. To open desktop icons, you MUST use double-click. * Common desktop interactions: - Desktop icons: DOUBLE-CLICK to open applications and folders - Menu items: SINGLE-CLICK to select options - Taskbar icons: SINGLE-CLICK to open applications - Window buttons: SINGLE-CLICK to use close, minimize, maximize buttons - File browser items: DOUBLE-CLICK to open folders and files - When submitting, use the 'Enter' key, not the 'Return' key. * If you see an icon on the desktop that you need to open, ALWAYS use the double_click action, never use left_click. </UBUNTU_DESKTOP_GUIDELINES> <SCREENSHOT_GUIDELINES> * Be mindful of how many screenshots you take - they consume significant memory. * Only take screenshots when you need to see the current state of the screen. * Try to batch multiple actions before taking another screenshot. * For better performance, limit the number of screenshots you take. </SCREENSHOT_GUIDELINES>`; } filterToNMostRecentImages(messages, maxImages) { const imageBlocks = []; messages.forEach((msg, msgIdx) => { if (msg.role !== "user") return; const content = msg.content; if (!Array.isArray(content)) return; content.forEach((block, contentIdx) => { if (block?.type !== "tool_result") return; const blockContent = block.content || []; blockContent.forEach((item, itemIdx) => { if (item?.type === "image" && item.source) { imageBlocks.push({ msgIdx, contentIdx, contentItemIdx: itemIdx, contentItem: item }); } }); }); }); if (imageBlocks.length > maxImages) { const toRemove = imageBlocks.slice(0, -maxImages); toRemove.forEach(({ contentItem }) => { if (contentItem.source?.data) { contentItem.source.data = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNkYPhfDwAChwGA60e6kgAAAABJRU5ErkJggg=="; contentItem.source.media_type = "image/png"; } }); } } async executeTool(computerId, params, callback, apiClient) { const action = params.action; if (callback) { callback("tool_executing", { action, params }); } try { if (!apiClient) { apiClient = new ApiClient(); } switch (action) { case "screenshot": { const response = await apiClient.getScreenshot(computerId); if (callback) { callback("tool_result", { type: "image", action: "screenshot" }); } const imageUrl = response.image; if (!imageUrl) { throw new Error("No image URL received from API"); } const imgResponse = await fetch(imageUrl); if (!imgResponse.ok) { throw new Error(`Failed to fetch image: ${imgResponse.statusText}`); } const arrayBuffer = await imgResponse.arrayBuffer(); const base64 = Buffer.from(arrayBuffer).toString("base64"); return { type: "image", source: { type: "base64", media_type: "image/jpeg", data: base64 } }; } case "left_click": { const coord = params.coordinate; if (!coord) { throw new Error("Coordinates required for left click"); } const [x, y] = coord; await apiClient.leftClick(computerId, x, y); if (callback) { callback("tool_result", { action: "left_click", x, y }); } return `Left-clicked at (${x}, ${y})`; } case "right_click": { const coord = params.coordinate; if (!coord) { throw new Error("Coordinates required for right click"); } const [x, y] = coord; await apiClient.rightClick(computerId, x, y); if (callback) { callback("tool_result", { action: "right_click", x, y }); } return `Right-clicked at (${x}, ${y})`; } case "double_click": { const coord = params.coordinate; if (!coord) { throw new Error("Coordinates required for double click"); } const [x, y] = coord; await apiClient.doubleClick(computerId, x, y); if (callback) { callback("tool_result", { action: "double_click", x, y }); } return `Double-clicked at (${x}, ${y})`; } case "type": { const text = params.text; if (!text) { throw new Error("Text required for typing"); } await apiClient.typeText(computerId, text); if (callback) { callback("tool_result", { action: "type", text }); } return `Typed: "${text}"`; } case "key": { let key = params.text; if (!key) { throw new Error("Key required for key press"); } if (key.toLowerCase() === "return") { key = "enter"; } await apiClient.keyPress(computerId, key); if (callback) { callback("tool_result", { action: "key", key }); } return `Pressed key: ${key}`; } case "scroll": { const direction = params.scroll_direction; const amount = params.scroll_amount; if (!direction || amount === void 0) { throw new Error("Direction and amount required for scrolling"); } await apiClient.scroll(computerId, direction, amount); if (callback) { callback("tool_result", { action: "scroll", direction, amount }); } return `Scrolled ${direction} by ${amount}`; } case "wait": { const duration = params.duration || 1; await apiClient.wait(computerId, duration); if (callback) { callback("tool_result", { action: "wait", duration }); } return `Waited for ${duration} second(s)`; } default: { const error = `Unsupported action: ${action}`; if (callback) { callback("error", error); } throw new Error(error); } } } catch (error) { const errorMsg = `Error executing ${action}: ${error instanceof Error ? error.message : String(error)}`; if (callback) { callback("error", errorMsg); } return `Error: ${errorMsg}`; } } }; } }); // src/prompt.ts function getProvider(providerName = "anthropic") { const ProviderClass = PROVIDER_MAPPING[providerName]; if (!ProviderClass) { const available = Object.keys(PROVIDER_MAPPING).join(", "); throw new Error(`Unknown provider: ${providerName}. Available providers: ${available}`); } return new ProviderClass(); } var PROVIDER_MAPPING; var init_prompt = __esm({ "src/prompt.ts"() { init_cjs_shims(); init_anthropic(); PROVIDER_MAPPING = { anthropic: AnthropicProvider // Add more providers here as needed }; } }); // src/computer.ts var computer_exports = {}; __export(computer_exports, { Computer: () => exports.Computer }); exports.Computer = void 0; var init_computer = __esm({ "src/computer.ts"() { init_cjs_shims(); init_client(); init_prompt(); exports.Computer = class _Computer { constructor(apiKey, baseApiUrl, projectId, info) { this.apiKey = apiKey; this.baseApiUrl = baseApiUrl; this.api = new ApiClient(this.apiKey, this.baseApiUrl); if (projectId && info) { this.projectId = projectId; this._info = info; } } /** * Create a new Computer instance * * @param options Configuration options * @param options.projectId Existing project ID to connect to (optional) * @param options.apiKey Orgo API key (defaults to ORGO_API_KEY env var) * @param options.config Configuration for new computer (optional) * @param options.baseApiUrl Custom API URL (optional) * @param options.ram RAM in GB for new computer (2 or 4) - only used when creating * @param options.memory Alternative parameter for RAM in GB (2 or 4) - only used when creating * @param options.cpu CPU cores for new computer (2 or 4) - only used when creating * * Note: If both ram and memory are provided, ram takes precedence. */ static async create(options) { const apiKey = options?.apiKey || process.env.ORGO_API_KEY || ""; const computer = new _Computer(apiKey, options?.baseApiUrl); let ram = options?.ram; if (ram === void 0 && options?.memory !== void 0) { ram = options.memory; } const projectId = options?.projectId; if (projectId) { const info = await computer.api.connectComputer(projectId); computer.projectId = projectId; computer._info = info; if (ram !== void 0 || options?.memory !== void 0 || options?.cpu !== void 0) { console.info("Note: ram, memory, and cpu parameters are ignored when connecting to existing computer"); } } else { await computer._createNewComputer(options?.config, ram, options?.cpu); } return computer; } async _createNewComputer(config, ram, cpu) { if (ram !== void 0 && ram !== 2 && ram !== 4) { throw new Error("ram/memory must be either 2 or 4 GB"); } if (cpu !== void 0 && cpu !== 2 && cpu !== 4) { throw new Error("cpu must be either 2 or 4 cores"); } let finalConfig = config; if (ram !== void 0 || cpu !== void 0) { finalConfig = config ? { ...config } : {}; if (ram !== void 0) { finalConfig.ram = ram; } if (cpu !== void 0) { finalConfig.cpu = cpu; } } const response = await this.api.createComputer(finalConfig); const projectId = response.name || response.id || response.projectId; if (typeof projectId !== "string" || !projectId) { throw new Error("Failed to initialize computer: No project ID returned"); } this.projectId = projectId; this._info = response; } get info() { return this._info; } async status() { return this.api.getStatus(this.projectId); } async start() { return this.api.startComputer(this.projectId); } async stop() { return this.api.stopComputer(this.projectId); } async restart() { return this.api.restartComputer(this.projectId); } async destroy() { return this.api.deleteComputer(this.projectId); } // Navigation methods async leftClick(x, y) { return this.api.leftClick(this.projectId, x, y); } async rightClick(x, y) { return this.api.rightClick(this.projectId, x, y); } async doubleClick(x, y) { return this.api.doubleClick(this.projectId, x, y); } /** * Perform a smooth drag operation from start to end coordinates * * @param startX Starting X coordinate * @param startY Starting Y coordinate * @param endX Ending X coordinate * @param endY Ending Y coordinate * @param button Mouse button to use ("left" or "right", default: "left") * @param duration Duration of the drag in seconds (0.1 to 5.0, default: 0.5) * @returns Promise resolving when drag is complete */ async drag(startX, startY, endX, endY, button = "left", duration = 0.5) { return this.api.drag(this.projectId, startX, startY, endX, endY, button, duration); } async scroll(direction = "down", amount = 1) { return this.api.scroll(this.projectId, direction, amount); } // Input methods async type(text) { return this.api.typeText(this.projectId, text); } async key(key) { return this.api.keyPress(this.projectId, key); } // View methods async screenshot(options) { const response = await this.api.getScreenshot(this.projectId); const imageData = response.image || ""; if (imageData.startsWith("http://") || imageData.startsWith("https://")) { const fetch3 = (await import('cross-fetch')).default; const imgResponse = await fetch3(imageData); if (!imgResponse.ok) { throw new Error(`Failed to download screenshot: ${imgResponse.statusText}`); } const arrayBuffer = await imgResponse.arrayBuffer(); const buffer = Buffer.from(arrayBuffer); if (options?.format === "base64") { return buffer.toString("base64"); } return buffer; } else { if (options?.format === "base64") { return imageData; } return Buffer.from(imageData, "base64"); } } async screenshotBase64() { const response = await this.api.getScreenshot(this.projectId); const imageData = response.image || ""; if (imageData.startsWith("http://") || imageData.startsWith("https://")) { const fetch3 = (await import('cross-fetch')).default; const imgResponse = await fetch3(imageData); if (!imgResponse.ok) { throw new Error(`Failed to download screenshot: ${imgResponse.statusText}`); } const arrayBuffer = await imgResponse.arrayBuffer(); return Buffer.from(arrayBuffer).toString("base64"); } else { return imageData; } } // Execution methods async bash(command) { const response = await this.api.executeBash(this.projectId, command); return response.output || ""; } async exec(code, timeout = 10) { return this.api.executePython(this.projectId, code, timeout); } async wait(seconds) { return this.api.wait(this.projectId, seconds); } // Streaming methods /** * Start streaming the computer screen to an RTMP server * * @param connection Name of the RTMP connection configured in settings (e.g., "my-twitch-1") * @returns Promise resolving with streaming status information * * @example * // First configure a connection in settings at https://www.orgo.ai/settings * // Then start streaming * await computer.startStream("my-twitch-1"); * * // Do your demo/automation * await computer.type("Hello viewers!"); * * // Stop streaming when done * await computer.stopStream(); */ async startStream(connection) { return this.api.startStream(this.projectId, connection); } /** * Stop the active stream * * @returns Promise resolving with stop status information */ async stopStream() { return this.api.stopStream(this.projectId); } /** * Get the current streaming status * * @returns Promise resolving with streaming status * @returns.status "idle", "streaming", or "terminated" * @returns.start_time ISO timestamp when stream started (if streaming) * @returns.pid Process ID of ffmpeg (if streaming) */ async streamStatus() { return this.api.getStreamStatus(this.projectId); } // AI control method async prompt(options) { const { instruction, provider = "anthropic", model = "claude-3-7-sonnet-20250219", displayWidth = 1024, displayHeight = 768, callback, thinkingEnabled = false, thinkingBudget = 1024, maxTokens = 4096, maxIterations = 20, maxSavedScreenshots = 5, apiKey } = options; const providerInstance = getProvider(provider); return providerInstance.execute({ computerId: this.projectId, instruction, callback, apiKey, model, displayWidth, displayHeight, thinkingEnabled, thinkingBudget, maxTokens, maxIterations, maxSavedScreenshots, // Pass through the Orgo API client configuration orgoApiKey: this.apiKey, orgoBaseUrl: this.baseApiUrl }); } }; } }); // src/index.ts init_cjs_shims(); init_computer(); init_errors(); async function createComputer(options) { const { Computer: Computer2 } = await Promise.resolve().then(() => (init_computer(), computer_exports)); return Computer2.create(options); } exports.createComputer = createComputer; //# sourceMappingURL=index.js.map //# sourceMappingURL=index.js.map