donobu
Version:
Create browser automations with an LLM agent and replay them as Playwright scripts.
101 lines • 4.75 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.InputTextTool = exports.InputTextGptSchema = exports.InputTextNonGptSchema = exports.InputTextCoreSchema = void 0;
const v4_1 = require("zod/v4");
const MiscUtils_1 = require("../utils/MiscUtils");
const TargetUtils_1 = require("../utils/TargetUtils");
const ReplayableInteraction_1 = require("./ReplayableInteraction");
exports.InputTextCoreSchema = v4_1.z.object({
text: v4_1.z.string().describe('The text to input'),
append: v4_1.z
.boolean()
.optional()
.describe('If true, append the given text to the existing contents of the input, rather than clearing it before putting in the text. Defaults to false.'),
finalizeWithSubmit: v4_1.z
.boolean()
.optional()
.describe("Attempt to submit the data after inputting the text (i.e. hitting 'Enter' at the end). " +
"This can be useful if doing something like using a webpage's search box, etc."),
});
exports.InputTextNonGptSchema = v4_1.z.object({
...ReplayableInteraction_1.SelectorBasedSchema.shape,
...exports.InputTextCoreSchema.shape,
});
exports.InputTextGptSchema = v4_1.z.object({
...ReplayableInteraction_1.AnnotationBasedSchema.shape,
...exports.InputTextCoreSchema.shape,
});
class InputTextTool extends ReplayableInteraction_1.ReplayableInteraction {
constructor() {
super(InputTextTool.NAME, "Input text to a webpage's text input box.", exports.InputTextCoreSchema, exports.InputTextNonGptSchema, exports.InputTextGptSchema);
}
async invoke(context, parameters, handles) {
const element = handles.target;
if (!parameters.append) {
// Clear any existing text first.
await this.clearField(element);
}
// Focus the element, then type via the page keyboard rather than holding
// a reference to a specific DOM node. This survives mid-sequence DOM
// replacement: if a reactive widget (e.g. Wikipedia's CDX search) swaps
// the <input> on the first keydown, the replacement receives focus and
// subsequent keystrokes are delivered there automatically.
await element.focus();
const page = (0, TargetUtils_1.webPage)(context);
await context.interactionVisualizer.pointAt(page, element);
// Segment into grapheme clusters so that composite characters (e.g. ZWJ
// emoji sequences like 👨👩👧, flags, skin-tone variants) are kept intact.
const segmenter = new Intl.Segmenter(undefined, {
granularity: 'grapheme',
});
for (const { segment } of segmenter.segment(parameters.text)) {
if (InputTextTool.isKeyboardPressable(segment)) {
// Single typeable character — use press() for realistic keydown/keyup.
await page.keyboard.press(segment, {
delay: MiscUtils_1.MiscUtils.generateHumanLikeKeyPressDurationInMs(segment),
});
}
else {
// Multi-code-point cluster or character Playwright can't press (e.g.
// em-dash, smart quotes, emojis). Insert directly, mimicking a
// virtual keyboard or input method.
await page.keyboard.insertText(segment);
}
}
// Submit if requested
if (parameters.finalizeWithSubmit) {
const enterKey = 'Enter';
await page.keyboard.press(enterKey, {
delay: MiscUtils_1.MiscUtils.generateHumanLikeKeyPressDurationInMs(enterKey),
});
}
return `Inputted text '${parameters.text}' into: `;
}
/**
* Returns true when the segment is a single character that Playwright's
* keyboard.press() can handle: printable ASCII (space through tilde).
*/
static isKeyboardPressable(segment) {
return segment.length === 1 && segment >= ' ' && segment <= '~';
}
async clearField(element) {
try {
const value = await element.inputValue();
if (value !== '') {
await element.selectText({ timeout: 3000 });
const backspaceKey = 'Backspace';
await element.press(backspaceKey, {
delay: MiscUtils_1.MiscUtils.generateHumanLikeKeyPressDurationInMs(backspaceKey),
timeout: 3000,
});
}
}
catch (_e) {
// This can happen if the element is not a text element, but still accepts text inputs.
// Pass.
}
}
}
exports.InputTextTool = InputTextTool;
InputTextTool.NAME = 'inputText';
//# sourceMappingURL=InputTextTool.js.map