UNPKG

donobu

Version:

Create browser automations with an LLM agent and replay them as Playwright scripts.

101 lines 4.75 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.InputTextTool = exports.InputTextGptSchema = exports.InputTextNonGptSchema = exports.InputTextCoreSchema = void 0; const v4_1 = require("zod/v4"); const MiscUtils_1 = require("../utils/MiscUtils"); const TargetUtils_1 = require("../utils/TargetUtils"); const ReplayableInteraction_1 = require("./ReplayableInteraction"); exports.InputTextCoreSchema = v4_1.z.object({ text: v4_1.z.string().describe('The text to input'), append: v4_1.z .boolean() .optional() .describe('If true, append the given text to the existing contents of the input, rather than clearing it before putting in the text. Defaults to false.'), finalizeWithSubmit: v4_1.z .boolean() .optional() .describe("Attempt to submit the data after inputting the text (i.e. hitting 'Enter' at the end). " + "This can be useful if doing something like using a webpage's search box, etc."), }); exports.InputTextNonGptSchema = v4_1.z.object({ ...ReplayableInteraction_1.SelectorBasedSchema.shape, ...exports.InputTextCoreSchema.shape, }); exports.InputTextGptSchema = v4_1.z.object({ ...ReplayableInteraction_1.AnnotationBasedSchema.shape, ...exports.InputTextCoreSchema.shape, }); class InputTextTool extends ReplayableInteraction_1.ReplayableInteraction { constructor() { super(InputTextTool.NAME, "Input text to a webpage's text input box.", exports.InputTextCoreSchema, exports.InputTextNonGptSchema, exports.InputTextGptSchema); } async invoke(context, parameters, handles) { const element = handles.target; if (!parameters.append) { // Clear any existing text first. await this.clearField(element); } // Focus the element, then type via the page keyboard rather than holding // a reference to a specific DOM node. This survives mid-sequence DOM // replacement: if a reactive widget (e.g. Wikipedia's CDX search) swaps // the <input> on the first keydown, the replacement receives focus and // subsequent keystrokes are delivered there automatically. await element.focus(); const page = (0, TargetUtils_1.webPage)(context); await context.interactionVisualizer.pointAt(page, element); // Segment into grapheme clusters so that composite characters (e.g. ZWJ // emoji sequences like 👨‍👩‍👧, flags, skin-tone variants) are kept intact. const segmenter = new Intl.Segmenter(undefined, { granularity: 'grapheme', }); for (const { segment } of segmenter.segment(parameters.text)) { if (InputTextTool.isKeyboardPressable(segment)) { // Single typeable character — use press() for realistic keydown/keyup. await page.keyboard.press(segment, { delay: MiscUtils_1.MiscUtils.generateHumanLikeKeyPressDurationInMs(segment), }); } else { // Multi-code-point cluster or character Playwright can't press (e.g. // em-dash, smart quotes, emojis). Insert directly, mimicking a // virtual keyboard or input method. await page.keyboard.insertText(segment); } } // Submit if requested if (parameters.finalizeWithSubmit) { const enterKey = 'Enter'; await page.keyboard.press(enterKey, { delay: MiscUtils_1.MiscUtils.generateHumanLikeKeyPressDurationInMs(enterKey), }); } return `Inputted text '${parameters.text}' into: `; } /** * Returns true when the segment is a single character that Playwright's * keyboard.press() can handle: printable ASCII (space through tilde). */ static isKeyboardPressable(segment) { return segment.length === 1 && segment >= ' ' && segment <= '~'; } async clearField(element) { try { const value = await element.inputValue(); if (value !== '') { await element.selectText({ timeout: 3000 }); const backspaceKey = 'Backspace'; await element.press(backspaceKey, { delay: MiscUtils_1.MiscUtils.generateHumanLikeKeyPressDurationInMs(backspaceKey), timeout: 3000, }); } } catch (_e) { // This can happen if the element is not a text element, but still accepts text inputs. // Pass. } } } exports.InputTextTool = InputTextTool; InputTextTool.NAME = 'inputText'; //# sourceMappingURL=InputTextTool.js.map