@langchain/openai
Version:
OpenAI integrations for LangChain.js
1 lines • 17.2 kB
Source Map (JSON)
{"version":3,"file":"computerUse.d.ts","names":["z","OpenAI","OpenAIClient","DynamicStructuredTool","ToolRuntime","ToolMessage","ComputerUseEnvironment","ComputerUseClickAction","Responses","ResponseComputerToolCall","Click","ComputerUseDoubleClickAction","DoubleClick","ComputerUseDragAction","Drag","ComputerUseKeypressAction","Keypress","ComputerUseMoveAction","Move","ComputerUseScreenshotAction","Screenshot","ComputerUseScrollAction","Scroll","ComputerUseTypeAction","Type","ComputerUseWaitAction","Wait","ComputerUseAction","ComputerUseScreenshotActionSchema","ZodLiteral","core","$strip","ZodObject","ComputerUseClickActionSchema","ZodNumber","ZodEnum","ZodDefault","ComputerUseDoubleClickActionSchema","ComputerUseDragActionSchema","ZodArray","ComputerUseKeypressActionSchema","ZodString","ComputerUseMoveActionSchema","ComputerUseScrollActionSchema","ComputerUseTypeActionSchema","ComputerUseWaitActionSchema","ZodOptional","ComputerUseActionSchema","ZodDiscriminatedUnion","ComputerUseScreenshotActionType","infer","ComputerUseClickActionType","ComputerUseDoubleClickActionType","ComputerUseDragActionType","ComputerUseKeypressActionType","ComputerUseMoveActionType","ComputerUseScrollActionType","ComputerUseTypeActionType","ComputerUseWaitActionType","ComputerUseInput","ComputerUseReturnType","Promise","ComputerUseOptions","ComputerUseTool","ComputerTool","computerUse"],"sources":["../../src/tools/computerUse.d.ts"],"sourcesContent":["import { z } from \"zod/v4\";\nimport { OpenAI as OpenAIClient } from \"openai\";\nimport { type DynamicStructuredTool } from \"@langchain/core/tools\";\nimport { type ToolRuntime } from \"@langchain/core/tools\";\nimport { ToolMessage } from \"@langchain/core/messages\";\n/**\n * The type of computer environment to control.\n */\nexport type ComputerUseEnvironment = \"browser\" | \"mac\" | \"windows\" | \"linux\" | \"ubuntu\";\n/**\n * Re-export action types from OpenAI SDK for convenience.\n */\nexport type ComputerUseClickAction = OpenAIClient.Responses.ResponseComputerToolCall.Click;\nexport type ComputerUseDoubleClickAction = OpenAIClient.Responses.ResponseComputerToolCall.DoubleClick;\nexport type ComputerUseDragAction = OpenAIClient.Responses.ResponseComputerToolCall.Drag;\nexport type ComputerUseKeypressAction = OpenAIClient.Responses.ResponseComputerToolCall.Keypress;\nexport type ComputerUseMoveAction = OpenAIClient.Responses.ResponseComputerToolCall.Move;\nexport type ComputerUseScreenshotAction = OpenAIClient.Responses.ResponseComputerToolCall.Screenshot;\nexport type ComputerUseScrollAction = OpenAIClient.Responses.ResponseComputerToolCall.Scroll;\nexport type ComputerUseTypeAction = OpenAIClient.Responses.ResponseComputerToolCall.Type;\nexport type ComputerUseWaitAction = OpenAIClient.Responses.ResponseComputerToolCall.Wait;\n/**\n * Union type of all computer use actions from OpenAI SDK.\n */\nexport type ComputerUseAction = OpenAIClient.Responses.ResponseComputerToolCall[\"action\"];\ndeclare const ComputerUseScreenshotActionSchema: z.ZodObject<{\n type: z.ZodLiteral<\"screenshot\">;\n}, z.core.$strip>;\ndeclare const ComputerUseClickActionSchema: z.ZodObject<{\n type: z.ZodLiteral<\"click\">;\n x: z.ZodNumber;\n y: z.ZodNumber;\n button: z.ZodDefault<z.ZodEnum<{\n back: \"back\";\n forward: \"forward\";\n left: \"left\";\n right: \"right\";\n wheel: \"wheel\";\n }>>;\n}, z.core.$strip>;\ndeclare const ComputerUseDoubleClickActionSchema: z.ZodObject<{\n type: z.ZodLiteral<\"double_click\">;\n x: z.ZodNumber;\n y: z.ZodNumber;\n button: z.ZodDefault<z.ZodEnum<{\n back: \"back\";\n forward: \"forward\";\n left: \"left\";\n right: \"right\";\n wheel: \"wheel\";\n }>>;\n}, z.core.$strip>;\ndeclare const ComputerUseDragActionSchema: z.ZodObject<{\n type: z.ZodLiteral<\"drag\">;\n path: z.ZodArray<z.ZodObject<{\n x: z.ZodNumber;\n y: z.ZodNumber;\n }, z.core.$strip>>;\n}, z.core.$strip>;\ndeclare const ComputerUseKeypressActionSchema: z.ZodObject<{\n type: z.ZodLiteral<\"keypress\">;\n keys: z.ZodArray<z.ZodString>;\n}, z.core.$strip>;\ndeclare const ComputerUseMoveActionSchema: z.ZodObject<{\n type: z.ZodLiteral<\"move\">;\n x: z.ZodNumber;\n y: z.ZodNumber;\n}, z.core.$strip>;\ndeclare const ComputerUseScrollActionSchema: z.ZodObject<{\n type: z.ZodLiteral<\"scroll\">;\n x: z.ZodNumber;\n y: z.ZodNumber;\n scroll_x: z.ZodNumber;\n scroll_y: z.ZodNumber;\n}, z.core.$strip>;\ndeclare const ComputerUseTypeActionSchema: z.ZodObject<{\n type: z.ZodLiteral<\"type\">;\n text: z.ZodString;\n}, z.core.$strip>;\ndeclare const ComputerUseWaitActionSchema: z.ZodObject<{\n type: z.ZodLiteral<\"wait\">;\n duration: z.ZodOptional<z.ZodNumber>;\n}, z.core.$strip>;\nexport declare const ComputerUseActionSchema: z.ZodObject<{\n action: z.ZodDiscriminatedUnion<[z.ZodObject<{\n type: z.ZodLiteral<\"screenshot\">;\n }, z.core.$strip>, z.ZodObject<{\n type: z.ZodLiteral<\"click\">;\n x: z.ZodNumber;\n y: z.ZodNumber;\n button: z.ZodDefault<z.ZodEnum<{\n back: \"back\";\n forward: \"forward\";\n left: \"left\";\n right: \"right\";\n wheel: \"wheel\";\n }>>;\n }, z.core.$strip>, z.ZodObject<{\n type: z.ZodLiteral<\"double_click\">;\n x: z.ZodNumber;\n y: z.ZodNumber;\n button: z.ZodDefault<z.ZodEnum<{\n back: \"back\";\n forward: \"forward\";\n left: \"left\";\n right: \"right\";\n wheel: \"wheel\";\n }>>;\n }, z.core.$strip>, z.ZodObject<{\n type: z.ZodLiteral<\"drag\">;\n path: z.ZodArray<z.ZodObject<{\n x: z.ZodNumber;\n y: z.ZodNumber;\n }, z.core.$strip>>;\n }, z.core.$strip>, z.ZodObject<{\n type: z.ZodLiteral<\"keypress\">;\n keys: z.ZodArray<z.ZodString>;\n }, z.core.$strip>, z.ZodObject<{\n type: z.ZodLiteral<\"move\">;\n x: z.ZodNumber;\n y: z.ZodNumber;\n }, z.core.$strip>, z.ZodObject<{\n type: z.ZodLiteral<\"scroll\">;\n x: z.ZodNumber;\n y: z.ZodNumber;\n scroll_x: z.ZodNumber;\n scroll_y: z.ZodNumber;\n }, z.core.$strip>, z.ZodObject<{\n type: z.ZodLiteral<\"type\">;\n text: z.ZodString;\n }, z.core.$strip>, z.ZodObject<{\n type: z.ZodLiteral<\"wait\">;\n duration: z.ZodOptional<z.ZodNumber>;\n }, z.core.$strip>]>;\n}, z.core.$strip>;\nexport type ComputerUseScreenshotActionType = z.infer<typeof ComputerUseScreenshotActionSchema>;\nexport type ComputerUseClickActionType = z.infer<typeof ComputerUseClickActionSchema>;\nexport type ComputerUseDoubleClickActionType = z.infer<typeof ComputerUseDoubleClickActionSchema>;\nexport type ComputerUseDragActionType = z.infer<typeof ComputerUseDragActionSchema>;\nexport type ComputerUseKeypressActionType = z.infer<typeof ComputerUseKeypressActionSchema>;\nexport type ComputerUseMoveActionType = z.infer<typeof ComputerUseMoveActionSchema>;\nexport type ComputerUseScrollActionType = z.infer<typeof ComputerUseScrollActionSchema>;\nexport type ComputerUseTypeActionType = z.infer<typeof ComputerUseTypeActionSchema>;\nexport type ComputerUseWaitActionType = z.infer<typeof ComputerUseWaitActionSchema>;\n/**\n * Input structure for the Computer Use tool.\n * The action is wrapped in an `action` property.\n */\nexport interface ComputerUseInput {\n action: ComputerUseAction;\n}\nexport type ComputerUseReturnType = string | Promise<string> | ToolMessage<any> | Promise<ToolMessage<any>>;\n/**\n * Options for the Computer Use tool.\n */\nexport interface ComputerUseOptions {\n /**\n * The width of the computer display in pixels.\n */\n displayWidth: number;\n /**\n * The height of the computer display in pixels.\n */\n displayHeight: number;\n /**\n * The type of computer environment to control.\n * - `browser`: Browser automation (recommended for most use cases)\n * - `mac`: macOS environment\n * - `windows`: Windows environment\n * - `linux`: Linux environment\n * - `ubuntu`: Ubuntu environment\n */\n environment: ComputerUseEnvironment;\n /**\n * Execute function that handles computer action execution.\n * This function receives the action input and should return a base64-encoded\n * screenshot of the result.\n */\n execute: (action: ComputerUseAction, runtime: ToolRuntime<any, any>) => ComputerUseReturnType;\n}\n/**\n * OpenAI Computer Use tool type for the Responses API.\n */\nexport type ComputerUseTool = OpenAIClient.Responses.ComputerTool;\n/**\n * Creates a Computer Use tool that allows models to control computer interfaces\n * and perform tasks by simulating mouse clicks, keyboard input, scrolling, and more.\n *\n * **Computer Use** is a practical application of OpenAI's Computer-Using Agent (CUA)\n * model (`computer-use-preview`), which combines vision capabilities with advanced\n * reasoning to simulate controlling computer interfaces.\n *\n * **How it works**:\n * The tool operates in a continuous loop:\n * 1. Model sends computer actions (click, type, scroll, etc.)\n * 2. Your code executes these actions in a controlled environment\n * 3. You capture a screenshot of the result\n * 4. Send the screenshot back to the model\n * 5. Repeat until the task is complete\n *\n * **Important**: Computer use is in beta and requires careful consideration:\n * - Use in sandboxed environments only\n * - Do not use for high-stakes or authenticated tasks\n * - Always implement human-in-the-loop for important decisions\n * - Handle safety checks appropriately\n *\n * @see {@link https://platform.openai.com/docs/guides/tools-computer-use | OpenAI Computer Use Documentation}\n *\n * @param options - Configuration options for the Computer Use tool\n * @returns A Computer Use tool that can be passed to `bindTools`\n *\n * @example\n * ```typescript\n * import { ChatOpenAI, tools } from \"@langchain/openai\";\n *\n * const model = new ChatOpenAI({ model: \"computer-use-preview\" });\n *\n * // With execute callback for automatic action handling\n * const computer = tools.computerUse({\n * displayWidth: 1024,\n * displayHeight: 768,\n * environment: \"browser\",\n * execute: async (action) => {\n * if (action.type === \"screenshot\") {\n * return captureScreenshot();\n * }\n * if (action.type === \"click\") {\n * await page.mouse.click(action.x, action.y, { button: action.button });\n * return captureScreenshot();\n * }\n * if (action.type === \"type\") {\n * await page.keyboard.type(action.text);\n * return captureScreenshot();\n * }\n * // Handle other actions...\n * return captureScreenshot();\n * },\n * });\n *\n * const llmWithComputer = model.bindTools([computer]);\n * const response = await llmWithComputer.invoke(\n * \"Check the latest news on bing.com\"\n * );\n * ```\n *\n * @example\n * ```typescript\n * // Without execute callback (manual action handling)\n * const computer = tools.computerUse({\n * displayWidth: 1024,\n * displayHeight: 768,\n * environment: \"browser\",\n * });\n *\n * const response = await model.invoke(\"Check the news\", {\n * tools: [computer],\n * });\n *\n * // Access the computer call from the response\n * const computerCall = response.additional_kwargs.tool_outputs?.find(\n * (output) => output.type === \"computer_call\"\n * );\n * if (computerCall) {\n * console.log(\"Action to execute:\", computerCall.action);\n * // Execute the action manually, then send back a screenshot\n * }\n * ```\n *\n * @example\n * ```typescript\n * // For macOS desktop automation with Docker\n * const computer = tools.computerUse({\n * displayWidth: 1920,\n * displayHeight: 1080,\n * environment: \"mac\",\n * execute: async (action) => {\n * if (action.type === \"click\") {\n * await dockerExec(\n * `DISPLAY=:99 xdotool mousemove ${action.x} ${action.y} click 1`,\n * containerName\n * );\n * }\n * // Capture screenshot from container\n * return await getDockerScreenshot(containerName);\n * },\n * });\n * ```\n *\n * @remarks\n * - Only available through the Responses API (not Chat Completions)\n * - Requires `computer-use-preview` model\n * - Actions include: click, double_click, drag, keypress, move, screenshot, scroll, type, wait\n * - Safety checks may be returned that require acknowledgment before proceeding\n * - Use `truncation: \"auto\"` parameter when making requests\n * - Recommended to use with `reasoning.summary` for debugging\n */\nexport declare function computerUse(options: ComputerUseOptions): DynamicStructuredTool<z.ZodObject<{\n action: z.ZodDiscriminatedUnion<[z.ZodObject<{\n type: z.ZodLiteral<\"screenshot\">;\n }, z.core.$strip>, z.ZodObject<{\n type: z.ZodLiteral<\"click\">;\n x: z.ZodNumber;\n y: z.ZodNumber;\n button: z.ZodDefault<z.ZodEnum<{\n back: \"back\";\n forward: \"forward\";\n left: \"left\";\n right: \"right\";\n wheel: \"wheel\";\n }>>;\n }, z.core.$strip>, z.ZodObject<{\n type: z.ZodLiteral<\"double_click\">;\n x: z.ZodNumber;\n y: z.ZodNumber;\n button: z.ZodDefault<z.ZodEnum<{\n back: \"back\";\n forward: \"forward\";\n left: \"left\";\n right: \"right\";\n wheel: \"wheel\";\n }>>;\n }, z.core.$strip>, z.ZodObject<{\n type: z.ZodLiteral<\"drag\">;\n path: z.ZodArray<z.ZodObject<{\n x: z.ZodNumber;\n y: z.ZodNumber;\n }, z.core.$strip>>;\n }, z.core.$strip>, z.ZodObject<{\n type: z.ZodLiteral<\"keypress\">;\n keys: z.ZodArray<z.ZodString>;\n }, z.core.$strip>, z.ZodObject<{\n type: z.ZodLiteral<\"move\">;\n x: z.ZodNumber;\n y: z.ZodNumber;\n }, z.core.$strip>, z.ZodObject<{\n type: z.ZodLiteral<\"scroll\">;\n x: z.ZodNumber;\n y: z.ZodNumber;\n scroll_x: z.ZodNumber;\n scroll_y: z.ZodNumber;\n }, z.core.$strip>, z.ZodObject<{\n type: z.ZodLiteral<\"type\">;\n text: z.ZodString;\n }, z.core.$strip>, z.ZodObject<{\n type: z.ZodLiteral<\"wait\">;\n duration: z.ZodOptional<z.ZodNumber>;\n }, z.core.$strip>]>;\n}, z.core.$strip>, ComputerUseInput, unknown, ComputerUseReturnType>;\nexport {};\n//# sourceMappingURL=computerUse.d.ts.map"],"mappings":";;;;;;;;AAQA;AAIA;AACYW,KALAL,sBAAAA,GAK4B,SAAA,GAAGJ,KAAAA,GAAaM,SAAAA,GAAUC,OAAAA,GAAAA,QAAAA;AAClE;AACA;AACA;AACYU,KALAZ,sBAAAA,GAAyBL,QAAAA,CAAaM,SAAAA,CAAUC,wBAAAA,CAAyBC,KAKpBD;AACrDY,KALAV,4BAAAA,GAA+BT,QAAAA,CAAaM,SAAAA,CAAUC,wBAAAA,CAAyBG,WAKC;AAChFW,KALAV,qBAAAA,GAAwBX,QAAAA,CAAaM,SAAAA,CAAUC,wBAAAA,CAAyBK,IAKAU;AACxEC,KALAV,yBAAAA,GAA4Bb,QAAAA,CAAaM,SAAAA,CAAUC,wBAAAA,CAAyBO,QAKA;AAI5EW,KARAV,qBAAAA,GAAwBf,QAAAA,CAAaM,SAAAA,CAAUC,wBAAAA,CAAyBS,IAQL;AA4H9DyC,KAnILxC,2BAAAA,GAA8BjB,QAAAA,CAAaM,SAAAA,CAAUC,wBAAAA,CAAyBW,UAoI7D;AAEjBwC,KArIAvC,uBAAAA,GAA0BnB,QAAAA,CAAaM,SAAAA,CAAUC,wBAAAA,CAAyBa,MAqIrD;AAAYuC,KApIjCtC,qBAAAA,GAAwBrB,QAAAA,CAAaM,SAAAA,CAAUC,wBAAAA,CAAyBe,IAoIvCqC;AAAkBxD,KAnInDoB,qBAAAA,GAAwBvB,QAAAA,CAAaM,SAAAA,CAAUC,wBAAAA,CAAyBiB,IAmIrBrB;;;AAA0B;AAIxEyD,KAnILnC,iBAAAA,GAAoBzB,QAAAA,CAAaM,SAAAA,CAAUC,wBAmIpB,CAAA,QAAA,CAAA;;;;;UAPlBkD,gBAAAA;UACLhC;;KAEAiC,qBAAAA,YAAiCC,kBAAkBxD,mBAAmBwD,QAAQxD;;;;UAIzEyD,kBAAAA;;;;;;;;;;;;;;;;;eAiBAxD;;;;;;oBAMKqB,4BAA4BvB,0BAA0BwD;;;;;KAKhEG,eAAAA,GAAkB7D,QAAAA,CAAaM,SAAAA,CAAUwD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;iBAiH7BC,WAAAA,UAAqBH,qBAAqB3D,sBAAsBH,CAAAA,CAAEgC;UAC9EhC,CAAAA,CAAEgD,uBAAuBhD,CAAAA,CAAEgC;UACzBhC,CAAAA,CAAE6B;KACT7B,CAAAA,CAAE8B,IAAAA,CAAKC,SAAS/B,CAAAA,CAAEgC;UACXhC,CAAAA,CAAE6B;OACL7B,CAAAA,CAAEkC;OACFlC,CAAAA,CAAEkC;YACGlC,CAAAA,CAAEoC,WAAWpC,CAAAA,CAAEmC;;;;;;;KAOxBnC,CAAAA,CAAE8B,IAAAA,CAAKC,SAAS/B,CAAAA,CAAEgC;UACXhC,CAAAA,CAAE6B;OACL7B,CAAAA,CAAEkC;OACFlC,CAAAA,CAAEkC;YACGlC,CAAAA,CAAEoC,WAAWpC,CAAAA,CAAEmC;;;;;;;KAOxBnC,CAAAA,CAAE8B,IAAAA,CAAKC,SAAS/B,CAAAA,CAAEgC;UACXhC,CAAAA,CAAE6B;UACF7B,CAAAA,CAAEuC,SAASvC,CAAAA,CAAEgC;SACZhC,CAAAA,CAAEkC;SACFlC,CAAAA,CAAEkC;OACNlC,CAAAA,CAAE8B,IAAAA,CAAKC;KACX/B,CAAAA,CAAE8B,IAAAA,CAAKC,SAAS/B,CAAAA,CAAEgC;UACXhC,CAAAA,CAAE6B;UACF7B,CAAAA,CAAEuC,SAASvC,CAAAA,CAAEyC;KACpBzC,CAAAA,CAAE8B,IAAAA,CAAKC,SAAS/B,CAAAA,CAAEgC;UACXhC,CAAAA,CAAE6B;OACL7B,CAAAA,CAAEkC;OACFlC,CAAAA,CAAEkC;KACNlC,CAAAA,CAAE8B,IAAAA,CAAKC,SAAS/B,CAAAA,CAAEgC;UACXhC,CAAAA,CAAE6B;OACL7B,CAAAA,CAAEkC;OACFlC,CAAAA,CAAEkC;cACKlC,CAAAA,CAAEkC;cACFlC,CAAAA,CAAEkC;KACblC,CAAAA,CAAE8B,IAAAA,CAAKC,SAAS/B,CAAAA,CAAEgC;UACXhC,CAAAA,CAAE6B;UACF7B,CAAAA,CAAEyC;KACTzC,CAAAA,CAAE8B,IAAAA,CAAKC,SAAS/B,CAAAA,CAAEgC;UACXhC,CAAAA,CAAE6B;cACE7B,CAAAA,CAAE8C,YAAY9C,CAAAA,CAAEkC;KAC3BlC,CAAAA,CAAE8B,IAAAA,CAAKC;GACX/B,CAAAA,CAAE8B,IAAAA,CAAKC,SAAS4B,2BAA2BC"}