@eko-ai/eko-nodejs
Version:
Empowering language to transform human words into action.
1,422 lines (1,363 loc) • 294 kB
JavaScript
'use strict';
var fs = require('fs');
var os = require('os');
var path$1 = require('path');
var eko = require('@eko-ai/eko');
var playwright = require('playwright');
var fs$1 = require('fs/promises');
var node_url = require('node:url');
var node_path = require('node:path');
var actualFS = require('node:fs');
var promises = require('node:fs/promises');
var node_events = require('node:events');
var Stream = require('node:stream');
var node_string_decoder = require('node:string_decoder');
var child_process = require('child_process');
function _interopNamespaceDefault(e) {
var n = Object.create(null);
if (e) {
Object.keys(e).forEach(function (k) {
if (k !== 'default') {
var d = Object.getOwnPropertyDescriptor(e, k);
Object.defineProperty(n, k, d.get ? d : {
enumerable: true,
get: function () { return e[k]; }
});
}
});
}
n.default = e;
return Object.freeze(n);
}
var fs__namespace = /*#__PURE__*/_interopNamespaceDefault(fs);
var os__namespace = /*#__PURE__*/_interopNamespaceDefault(os);
var path__namespace = /*#__PURE__*/_interopNamespaceDefault(path$1);
var fs__namespace$1 = /*#__PURE__*/_interopNamespaceDefault(fs$1);
var actualFS__namespace = /*#__PURE__*/_interopNamespaceDefault(actualFS);
async function getCdpWsEndpoint(port) {
// Example => ws://localhost:9222/devtools/browser/{session-id}
const response = await fetch(`http://localhost:${port}/json/version`);
const browserInfo = await response.json();
eko.Log.info("browserInfo: ", browserInfo);
return browserInfo.webSocketDebuggerUrl;
}
function getDefaultChromeUserDataDir(copyToTempDir = false) {
const platform = os__namespace.platform();
const homeDir = os__namespace.homedir();
let defaultPath;
switch (platform) {
case "win32":
// Windows: %LOCALAPPDATA%\Google\Chrome\User Data
const localAppData = process.env.LOCALAPPDATA || path__namespace.join(homeDir, "AppData", "Local");
defaultPath = path__namespace.join(localAppData, "Google", "Chrome", "User Data");
break;
case "darwin":
// macOS: ~/Library/Application Support/Google/Chrome
defaultPath = path__namespace.join(homeDir, "Library", "Application Support", "Google", "Chrome");
break;
case "linux":
// Linux: ~/.config/google-chrome
defaultPath = path__namespace.join(homeDir, ".config", "google-chrome");
break;
}
if (defaultPath && fs__namespace.existsSync(defaultPath)) {
if (copyToTempDir) {
const tempDir = os__namespace.tmpdir();
const tempPath = path__namespace.join(tempDir, "chrome-user-data");
if (fs__namespace.existsSync(tempPath)) {
eko.Log.info(`Removing existing temp directory: ${tempPath}`);
fs__namespace.rmSync(tempPath, { recursive: true, force: true });
}
fs__namespace.cpSync(defaultPath, tempPath, { recursive: true });
// Delete all Chrome locked files and directories to prevent startup conflicts.
removeLockFiles(tempPath);
const defaultProfilePath = path__namespace.join(tempPath, "Default");
if (fs__namespace.existsSync(defaultProfilePath)) {
removeLockFiles(defaultProfilePath);
}
eko.Log.info(`Created clean Chrome user data directory: ${tempPath}`);
return tempPath;
}
else {
return defaultPath;
}
}
return undefined;
}
function removeLockFiles(dirPath) {
try {
const items = fs__namespace.readdirSync(dirPath);
items.forEach((item) => {
const itemPath = path__namespace.join(dirPath, item);
try {
const stat = fs__namespace.statSync(itemPath);
if (stat.isDirectory()) {
removeLockFiles(itemPath);
}
const shouldDelete = item === "SingletonLock" ||
item === "lockfile" ||
item === "RunningChromeVersion" ||
item === "SingletonCookie" ||
item === "SingletonSocket" ||
item === "chrome_debug.log" ||
item === "LOCK" ||
item === "LOG" ||
item === "LOG.old" ||
item.includes(".lock") ||
item.includes("Lock") ||
item.includes("LOCK") ||
item.includes(".tmp") ||
item.includes("Temp") ||
item.endsWith(".pid") ||
item.endsWith(".log") ||
item.includes("chrome_shutdown_ms.txt") ||
item.includes("Crashpad") ||
(stat.isDirectory() &&
(item.includes("CrashReports") ||
item.includes("ShaderCache") ||
item.includes("crashpad_database")));
if (shouldDelete) {
fs__namespace.rmSync(itemPath, { recursive: true, force: true });
}
}
catch (statError) {
if (item.includes("Lock") ||
item.includes("lock") ||
item.includes("LOCK")) {
try {
eko.Log.info(`Force deleting suspected lock file: ${itemPath}`);
fs__namespace.rmSync(itemPath, { recursive: true, force: true });
}
catch (deleteError) {
eko.Log.warn(`Failed to force delete ${itemPath}:`, deleteError);
}
}
}
});
}
catch (error) {
eko.Log.warn(`Error while removing lock files from ${dirPath}:`, error);
}
}
class BrowserAgent extends eko.BaseBrowserLabelsAgent {
constructor() {
super(...arguments);
this.browser = null;
this.browser_context = null;
this.current_page = null;
this.headless = false;
}
setHeadless(headless) {
this.headless = headless;
}
setCdpWsEndpoint(cdpWsEndpoint) {
this.cdpWsEndpoint = cdpWsEndpoint;
}
initUserDataDir(userDataDir) {
if (userDataDir) {
this.userDataDir = userDataDir;
}
else {
this.userDataDir = getDefaultChromeUserDataDir(true);
}
return this.userDataDir;
}
setOptions(options) {
this.options = options;
}
async screenshot(agentContext) {
let page = await this.currentPage();
let screenshotBuffer = await page.screenshot({
fullPage: false,
type: "jpeg",
quality: 60,
});
let base64 = screenshotBuffer.toString("base64");
return {
imageType: "image/jpeg",
imageBase64: base64,
};
}
async navigate_to(agentContext, url) {
let page = await this.open_url(agentContext, url);
await this.sleep(200);
return {
url: page.url(),
title: await page.title(),
};
}
async get_all_tabs(agentContext) {
if (!this.browser_context) {
return [];
}
let result = [];
const pages = await this.browser_context.pages();
for (let i = 0; i < pages.length; i++) {
let page = pages[i];
result.push({
tabId: i,
url: page.url(),
title: await page.title(),
});
}
return result;
}
async switch_tab(agentContext, tabId) {
if (!this.browser_context) {
throw new Error("tabId does not exist: " + tabId);
}
const pages = await this.browser_context.pages();
const page = pages[tabId];
if (!page) {
throw new Error("tabId does not exist: " + tabId);
}
this.current_page = page;
return {
tabId: tabId,
url: page.url(),
title: await page.title(),
};
}
async input_text(agentContext, index, text, enter) {
try {
let elementHandle = await this.get_element(index, true);
await elementHandle.fill("");
await elementHandle.fill(text);
if (enter) {
await elementHandle.press("Enter");
await this.sleep(200);
}
}
catch (e) {
await super.input_text(agentContext, index, text, enter);
}
}
async click_element(agentContext, index, num_clicks, button) {
try {
let elementHandle = await this.get_element(index, true);
await elementHandle.click({
button,
clickCount: num_clicks,
force: true,
});
}
catch (e) {
await super.click_element(agentContext, index, num_clicks, button);
}
}
async hover_to_element(agentContext, index) {
try {
let elementHandle = await this.get_element(index, true);
elementHandle.hover({ force: true });
}
catch (e) {
await super.hover_to_element(agentContext, index);
}
}
async execute_script(agentContext, func, args) {
let page = await this.currentPage();
return await page.evaluate(func, ...args);
}
async open_url(agentContext, url) {
let browser_context = await this.getBrowserContext();
const page = await browser_context.newPage();
// await page.setViewportSize({ width: 1920, height: 1080 });
await page.setViewportSize({ width: 1536, height: 864 });
try {
await page.goto(url, {
waitUntil: "networkidle",
timeout: 10000,
});
await page.waitForLoadState("load", { timeout: 8000 });
}
catch (e) {
if ((e + "").indexOf("Timeout") == -1) {
throw e;
}
}
this.current_page = page;
return page;
}
async currentPage() {
if (this.current_page == null) {
throw new Error("There is no page, please call navigate_to first");
}
let page = this.current_page;
try {
await page.waitForLoadState("domcontentloaded", { timeout: 10000 });
}
catch (e) { }
return page;
}
async get_element(index, findInput) {
let page = await this.currentPage();
return await page.evaluateHandle((params) => {
let element = window.get_highlight_element(params.index);
if (element && params.findInput) {
if (element.tagName != "INPUT" &&
element.tagName != "TEXTAREA" &&
element.childElementCount != 0) {
element =
element.querySelector("input") ||
element.querySelector("textarea") ||
element;
}
}
return element;
}, { index, findInput });
}
sleep(time) {
return new Promise((resolve) => setTimeout(() => resolve(), time));
}
async getBrowserContext() {
if (!this.browser_context) {
this.current_page = null;
this.browser_context = null;
if (this.cdpWsEndpoint) {
this.browser = await playwright.chromium.connectOverCDP(this.cdpWsEndpoint, this.options);
this.browser_context = await this.browser.newContext();
}
else if (this.userDataDir) {
this.browser_context = await playwright.chromium.launchPersistentContext(this.userDataDir, {
headless: this.headless,
// channel: 'chrome',
args: [
"--no-sandbox",
"--remote-allow-origins=*",
"--disable-dev-shm-usage",
"--disable-popup-blocking",
"--enable-automation",
"--ignore-ssl-errors",
"--ignore-certificate-errors",
"--ignore-certificate-errors-spki-list",
"--disable-blink-features=AutomationControlled",
],
...this.options,
});
}
else {
this.browser = await playwright.chromium.launch({
headless: this.headless,
args: [
"--no-sandbox",
"--remote-allow-origins=*",
"--disable-dev-shm-usage",
"--disable-popup-blocking",
"--enable-automation",
"--ignore-ssl-errors",
"--ignore-certificate-errors",
"--ignore-certificate-errors-spki-list",
"--disable-blink-features=AutomationControlled",
],
...this.options,
});
this.browser_context = await this.browser.newContext();
}
// Anti-crawling detection website:
// https://bot.sannysoft.com/
let init_script = await this.initScript();
this.browser_context.addInitScript(init_script);
}
return this.browser_context;
}
async initScript() {
return {
content: `
// Webdriver property
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined
});
// Languages
Object.defineProperty(navigator, 'languages', {
get: () => ['en-US']
});
// Plugins
Object.defineProperty(navigator, 'plugins', {
get: () => [{name:"1"}, {name:"2"}, {name:"3"}, {name:"4"}, {name:"5"}]
});
// Chrome runtime
window.chrome = { runtime: {} };
// Permissions
const originalQuery = window.navigator.permissions.query;
window.navigator.permissions.query = (parameters) => (
parameters.name === 'notifications' ?
Promise.resolve({ state: Notification.permission }) :
originalQuery(parameters)
);
(function () {
const originalAttachShadow = Element.prototype.attachShadow;
Element.prototype.attachShadow = function attachShadow(options) {
return originalAttachShadow.call(this, { ...options, mode: "open" });
};
})();
`,
};
}
}
function getDefaultExportFromCjs (x) {
return x && x.__esModule && Object.prototype.hasOwnProperty.call(x, 'default') ? x['default'] : x;
}
var balancedMatch;
var hasRequiredBalancedMatch;
function requireBalancedMatch () {
if (hasRequiredBalancedMatch) return balancedMatch;
hasRequiredBalancedMatch = 1;
balancedMatch = balanced;
function balanced(a, b, str) {
if (a instanceof RegExp) a = maybeMatch(a, str);
if (b instanceof RegExp) b = maybeMatch(b, str);
var r = range(a, b, str);
return r && {
start: r[0],
end: r[1],
pre: str.slice(0, r[0]),
body: str.slice(r[0] + a.length, r[1]),
post: str.slice(r[1] + b.length)
};
}
function maybeMatch(reg, str) {
var m = str.match(reg);
return m ? m[0] : null;
}
balanced.range = range;
function range(a, b, str) {
var begs, beg, left, right, result;
var ai = str.indexOf(a);
var bi = str.indexOf(b, ai + 1);
var i = ai;
if (ai >= 0 && bi > 0) {
if(a===b) {
return [ai, bi];
}
begs = [];
left = str.length;
while (i >= 0 && !result) {
if (i == ai) {
begs.push(i);
ai = str.indexOf(a, i + 1);
} else if (begs.length == 1) {
result = [ begs.pop(), bi ];
} else {
beg = begs.pop();
if (beg < left) {
left = beg;
right = bi;
}
bi = str.indexOf(b, i + 1);
}
i = ai < bi && ai >= 0 ? ai : bi;
}
if (begs.length) {
result = [ left, right ];
}
}
return result;
}
return balancedMatch;
}
var braceExpansion;
var hasRequiredBraceExpansion;
function requireBraceExpansion () {
if (hasRequiredBraceExpansion) return braceExpansion;
hasRequiredBraceExpansion = 1;
var balanced = requireBalancedMatch();
braceExpansion = expandTop;
var escSlash = '\0SLASH'+Math.random()+'\0';
var escOpen = '\0OPEN'+Math.random()+'\0';
var escClose = '\0CLOSE'+Math.random()+'\0';
var escComma = '\0COMMA'+Math.random()+'\0';
var escPeriod = '\0PERIOD'+Math.random()+'\0';
function numeric(str) {
return parseInt(str, 10) == str
? parseInt(str, 10)
: str.charCodeAt(0);
}
function escapeBraces(str) {
return str.split('\\\\').join(escSlash)
.split('\\{').join(escOpen)
.split('\\}').join(escClose)
.split('\\,').join(escComma)
.split('\\.').join(escPeriod);
}
function unescapeBraces(str) {
return str.split(escSlash).join('\\')
.split(escOpen).join('{')
.split(escClose).join('}')
.split(escComma).join(',')
.split(escPeriod).join('.');
}
// Basically just str.split(","), but handling cases
// where we have nested braced sections, which should be
// treated as individual members, like {a,{b,c},d}
function parseCommaParts(str) {
if (!str)
return [''];
var parts = [];
var m = balanced('{', '}', str);
if (!m)
return str.split(',');
var pre = m.pre;
var body = m.body;
var post = m.post;
var p = pre.split(',');
p[p.length-1] += '{' + body + '}';
var postParts = parseCommaParts(post);
if (post.length) {
p[p.length-1] += postParts.shift();
p.push.apply(p, postParts);
}
parts.push.apply(parts, p);
return parts;
}
function expandTop(str) {
if (!str)
return [];
// I don't know why Bash 4.3 does this, but it does.
// Anything starting with {} will have the first two bytes preserved
// but *only* at the top level, so {},a}b will not expand to anything,
// but a{},b}c will be expanded to [a}c,abc].
// One could argue that this is a bug in Bash, but since the goal of
// this module is to match Bash's rules, we escape a leading {}
if (str.substr(0, 2) === '{}') {
str = '\\{\\}' + str.substr(2);
}
return expand(escapeBraces(str), true).map(unescapeBraces);
}
function embrace(str) {
return '{' + str + '}';
}
function isPadded(el) {
return /^-?0\d/.test(el);
}
function lte(i, y) {
return i <= y;
}
function gte(i, y) {
return i >= y;
}
function expand(str, isTop) {
var expansions = [];
var m = balanced('{', '}', str);
if (!m) return [str];
// no need to expand pre, since it is guaranteed to be free of brace-sets
var pre = m.pre;
var post = m.post.length
? expand(m.post, false)
: [''];
if (/\$$/.test(m.pre)) {
for (var k = 0; k < post.length; k++) {
var expansion = pre+ '{' + m.body + '}' + post[k];
expansions.push(expansion);
}
} else {
var isNumericSequence = /^-?\d+\.\.-?\d+(?:\.\.-?\d+)?$/.test(m.body);
var isAlphaSequence = /^[a-zA-Z]\.\.[a-zA-Z](?:\.\.-?\d+)?$/.test(m.body);
var isSequence = isNumericSequence || isAlphaSequence;
var isOptions = m.body.indexOf(',') >= 0;
if (!isSequence && !isOptions) {
// {a},b}
if (m.post.match(/,.*\}/)) {
str = m.pre + '{' + m.body + escClose + m.post;
return expand(str);
}
return [str];
}
var n;
if (isSequence) {
n = m.body.split(/\.\./);
} else {
n = parseCommaParts(m.body);
if (n.length === 1) {
// x{{a,b}}y ==> x{a}y x{b}y
n = expand(n[0], false).map(embrace);
if (n.length === 1) {
return post.map(function(p) {
return m.pre + n[0] + p;
});
}
}
}
// at this point, n is the parts, and we know it's not a comma set
// with a single entry.
var N;
if (isSequence) {
var x = numeric(n[0]);
var y = numeric(n[1]);
var width = Math.max(n[0].length, n[1].length);
var incr = n.length == 3
? Math.abs(numeric(n[2]))
: 1;
var test = lte;
var reverse = y < x;
if (reverse) {
incr *= -1;
test = gte;
}
var pad = n.some(isPadded);
N = [];
for (var i = x; test(i, y); i += incr) {
var c;
if (isAlphaSequence) {
c = String.fromCharCode(i);
if (c === '\\')
c = '';
} else {
c = String(i);
if (pad) {
var need = width - c.length;
if (need > 0) {
var z = new Array(need + 1).join('0');
if (i < 0)
c = '-' + z + c.slice(1);
else
c = z + c;
}
}
}
N.push(c);
}
} else {
N = [];
for (var j = 0; j < n.length; j++) {
N.push.apply(N, expand(n[j], false));
}
}
for (var j = 0; j < N.length; j++) {
for (var k = 0; k < post.length; k++) {
var expansion = pre + N[j] + post[k];
if (!isTop || isSequence || expansion)
expansions.push(expansion);
}
}
}
return expansions;
}
return braceExpansion;
}
var braceExpansionExports = requireBraceExpansion();
var expand = /*@__PURE__*/getDefaultExportFromCjs(braceExpansionExports);
const MAX_PATTERN_LENGTH = 1024 * 64;
const assertValidPattern = (pattern) => {
if (typeof pattern !== 'string') {
throw new TypeError('invalid pattern');
}
if (pattern.length > MAX_PATTERN_LENGTH) {
throw new TypeError('pattern is too long');
}
};
// translate the various posix character classes into unicode properties
// this works across all unicode locales
// { <posix class>: [<translation>, /u flag required, negated]
const posixClasses = {
'[:alnum:]': ['\\p{L}\\p{Nl}\\p{Nd}', true],
'[:alpha:]': ['\\p{L}\\p{Nl}', true],
'[:ascii:]': ['\\x' + '00-\\x' + '7f', false],
'[:blank:]': ['\\p{Zs}\\t', true],
'[:cntrl:]': ['\\p{Cc}', true],
'[:digit:]': ['\\p{Nd}', true],
'[:graph:]': ['\\p{Z}\\p{C}', true, true],
'[:lower:]': ['\\p{Ll}', true],
'[:print:]': ['\\p{C}', true],
'[:punct:]': ['\\p{P}', true],
'[:space:]': ['\\p{Z}\\t\\r\\n\\v\\f', true],
'[:upper:]': ['\\p{Lu}', true],
'[:word:]': ['\\p{L}\\p{Nl}\\p{Nd}\\p{Pc}', true],
'[:xdigit:]': ['A-Fa-f0-9', false],
};
// only need to escape a few things inside of brace expressions
// escapes: [ \ ] -
const braceEscape = (s) => s.replace(/[[\]\\-]/g, '\\$&');
// escape all regexp magic characters
const regexpEscape = (s) => s.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, '\\$&');
// everything has already been escaped, we just have to join
const rangesToString = (ranges) => ranges.join('');
// takes a glob string at a posix brace expression, and returns
// an equivalent regular expression source, and boolean indicating
// whether the /u flag needs to be applied, and the number of chars
// consumed to parse the character class.
// This also removes out of order ranges, and returns ($.) if the
// entire class just no good.
const parseClass = (glob, position) => {
const pos = position;
/* c8 ignore start */
if (glob.charAt(pos) !== '[') {
throw new Error('not in a brace expression');
}
/* c8 ignore stop */
const ranges = [];
const negs = [];
let i = pos + 1;
let sawStart = false;
let uflag = false;
let escaping = false;
let negate = false;
let endPos = pos;
let rangeStart = '';
WHILE: while (i < glob.length) {
const c = glob.charAt(i);
if ((c === '!' || c === '^') && i === pos + 1) {
negate = true;
i++;
continue;
}
if (c === ']' && sawStart && !escaping) {
endPos = i + 1;
break;
}
sawStart = true;
if (c === '\\') {
if (!escaping) {
escaping = true;
i++;
continue;
}
// escaped \ char, fall through and treat like normal char
}
if (c === '[' && !escaping) {
// either a posix class, a collation equivalent, or just a [
for (const [cls, [unip, u, neg]] of Object.entries(posixClasses)) {
if (glob.startsWith(cls, i)) {
// invalid, [a-[] is fine, but not [a-[:alpha]]
if (rangeStart) {
return ['$.', false, glob.length - pos, true];
}
i += cls.length;
if (neg)
negs.push(unip);
else
ranges.push(unip);
uflag = uflag || u;
continue WHILE;
}
}
}
// now it's just a normal character, effectively
escaping = false;
if (rangeStart) {
// throw this range away if it's not valid, but others
// can still match.
if (c > rangeStart) {
ranges.push(braceEscape(rangeStart) + '-' + braceEscape(c));
}
else if (c === rangeStart) {
ranges.push(braceEscape(c));
}
rangeStart = '';
i++;
continue;
}
// now might be the start of a range.
// can be either c-d or c-] or c<more...>] or c] at this point
if (glob.startsWith('-]', i + 1)) {
ranges.push(braceEscape(c + '-'));
i += 2;
continue;
}
if (glob.startsWith('-', i + 1)) {
rangeStart = c;
i += 2;
continue;
}
// not the start of a range, just a single character
ranges.push(braceEscape(c));
i++;
}
if (endPos < i) {
// didn't see the end of the class, not a valid class,
// but might still be valid as a literal match.
return ['', false, 0, false];
}
// if we got no ranges and no negates, then we have a range that
// cannot possibly match anything, and that poisons the whole glob
if (!ranges.length && !negs.length) {
return ['$.', false, glob.length - pos, true];
}
// if we got one positive range, and it's a single character, then that's
// not actually a magic pattern, it's just that one literal character.
// we should not treat that as "magic", we should just return the literal
// character. [_] is a perfectly valid way to escape glob magic chars.
if (negs.length === 0 &&
ranges.length === 1 &&
/^\\?.$/.test(ranges[0]) &&
!negate) {
const r = ranges[0].length === 2 ? ranges[0].slice(-1) : ranges[0];
return [regexpEscape(r), false, endPos - pos, false];
}
const sranges = '[' + (negate ? '^' : '') + rangesToString(ranges) + ']';
const snegs = '[' + (negate ? '' : '^') + rangesToString(negs) + ']';
const comb = ranges.length && negs.length
? '(' + sranges + '|' + snegs + ')'
: ranges.length
? sranges
: snegs;
return [comb, uflag, endPos - pos, true];
};
/**
* Un-escape a string that has been escaped with {@link escape}.
*
* If the {@link windowsPathsNoEscape} option is used, then square-brace
* escapes are removed, but not backslash escapes. For example, it will turn
* the string `'[*]'` into `*`, but it will not turn `'\\*'` into `'*'`,
* becuase `\` is a path separator in `windowsPathsNoEscape` mode.
*
* When `windowsPathsNoEscape` is not set, then both brace escapes and
* backslash escapes are removed.
*
* Slashes (and backslashes in `windowsPathsNoEscape` mode) cannot be escaped
* or unescaped.
*/
const unescape = (s, { windowsPathsNoEscape = false, } = {}) => {
return windowsPathsNoEscape
? s.replace(/\[([^\/\\])\]/g, '$1')
: s.replace(/((?!\\).|^)\[([^\/\\])\]/g, '$1$2').replace(/\\([^\/])/g, '$1');
};
// parse a single path portion
const types = new Set(['!', '?', '+', '*', '@']);
const isExtglobType = (c) => types.has(c);
// Patterns that get prepended to bind to the start of either the
// entire string, or just a single path portion, to prevent dots
// and/or traversal patterns, when needed.
// Exts don't need the ^ or / bit, because the root binds that already.
const startNoTraversal = '(?!(?:^|/)\\.\\.?(?:$|/))';
const startNoDot = '(?!\\.)';
// characters that indicate a start of pattern needs the "no dots" bit,
// because a dot *might* be matched. ( is not in the list, because in
// the case of a child extglob, it will handle the prevention itself.
const addPatternStart = new Set(['[', '.']);
// cases where traversal is A-OK, no dot prevention needed
const justDots = new Set(['..', '.']);
const reSpecials = new Set('().*{}+?[]^$\\!');
const regExpEscape$1 = (s) => s.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, '\\$&');
// any single thing other than /
const qmark$1 = '[^/]';
// * => any number of characters
const star$1 = qmark$1 + '*?';
// use + when we need to ensure that *something* matches, because the * is
// the only thing in the path portion.
const starNoEmpty = qmark$1 + '+?';
// remove the \ chars that we added if we end up doing a nonmagic compare
// const deslash = (s: string) => s.replace(/\\(.)/g, '$1')
class AST {
type;
#root;
#hasMagic;
#uflag = false;
#parts = [];
#parent;
#parentIndex;
#negs;
#filledNegs = false;
#options;
#toString;
// set to true if it's an extglob with no children
// (which really means one child of '')
#emptyExt = false;
constructor(type, parent, options = {}) {
this.type = type;
// extglobs are inherently magical
if (type)
this.#hasMagic = true;
this.#parent = parent;
this.#root = this.#parent ? this.#parent.#root : this;
this.#options = this.#root === this ? options : this.#root.#options;
this.#negs = this.#root === this ? [] : this.#root.#negs;
if (type === '!' && !this.#root.#filledNegs)
this.#negs.push(this);
this.#parentIndex = this.#parent ? this.#parent.#parts.length : 0;
}
get hasMagic() {
/* c8 ignore start */
if (this.#hasMagic !== undefined)
return this.#hasMagic;
/* c8 ignore stop */
for (const p of this.#parts) {
if (typeof p === 'string')
continue;
if (p.type || p.hasMagic)
return (this.#hasMagic = true);
}
// note: will be undefined until we generate the regexp src and find out
return this.#hasMagic;
}
// reconstructs the pattern
toString() {
if (this.#toString !== undefined)
return this.#toString;
if (!this.type) {
return (this.#toString = this.#parts.map(p => String(p)).join(''));
}
else {
return (this.#toString =
this.type + '(' + this.#parts.map(p => String(p)).join('|') + ')');
}
}
#fillNegs() {
/* c8 ignore start */
if (this !== this.#root)
throw new Error('should only call on root');
if (this.#filledNegs)
return this;
/* c8 ignore stop */
// call toString() once to fill this out
this.toString();
this.#filledNegs = true;
let n;
while ((n = this.#negs.pop())) {
if (n.type !== '!')
continue;
// walk up the tree, appending everthing that comes AFTER parentIndex
let p = n;
let pp = p.#parent;
while (pp) {
for (let i = p.#parentIndex + 1; !pp.type && i < pp.#parts.length; i++) {
for (const part of n.#parts) {
/* c8 ignore start */
if (typeof part === 'string') {
throw new Error('string part in extglob AST??');
}
/* c8 ignore stop */
part.copyIn(pp.#parts[i]);
}
}
p = pp;
pp = p.#parent;
}
}
return this;
}
push(...parts) {
for (const p of parts) {
if (p === '')
continue;
/* c8 ignore start */
if (typeof p !== 'string' && !(p instanceof AST && p.#parent === this)) {
throw new Error('invalid part: ' + p);
}
/* c8 ignore stop */
this.#parts.push(p);
}
}
toJSON() {
const ret = this.type === null
? this.#parts.slice().map(p => (typeof p === 'string' ? p : p.toJSON()))
: [this.type, ...this.#parts.map(p => p.toJSON())];
if (this.isStart() && !this.type)
ret.unshift([]);
if (this.isEnd() &&
(this === this.#root ||
(this.#root.#filledNegs && this.#parent?.type === '!'))) {
ret.push({});
}
return ret;
}
isStart() {
if (this.#root === this)
return true;
// if (this.type) return !!this.#parent?.isStart()
if (!this.#parent?.isStart())
return false;
if (this.#parentIndex === 0)
return true;
// if everything AHEAD of this is a negation, then it's still the "start"
const p = this.#parent;
for (let i = 0; i < this.#parentIndex; i++) {
const pp = p.#parts[i];
if (!(pp instanceof AST && pp.type === '!')) {
return false;
}
}
return true;
}
isEnd() {
if (this.#root === this)
return true;
if (this.#parent?.type === '!')
return true;
if (!this.#parent?.isEnd())
return false;
if (!this.type)
return this.#parent?.isEnd();
// if not root, it'll always have a parent
/* c8 ignore start */
const pl = this.#parent ? this.#parent.#parts.length : 0;
/* c8 ignore stop */
return this.#parentIndex === pl - 1;
}
copyIn(part) {
if (typeof part === 'string')
this.push(part);
else
this.push(part.clone(this));
}
clone(parent) {
const c = new AST(this.type, parent);
for (const p of this.#parts) {
c.copyIn(p);
}
return c;
}
static #parseAST(str, ast, pos, opt) {
let escaping = false;
let inBrace = false;
let braceStart = -1;
let braceNeg = false;
if (ast.type === null) {
// outside of a extglob, append until we find a start
let i = pos;
let acc = '';
while (i < str.length) {
const c = str.charAt(i++);
// still accumulate escapes at this point, but we do ignore
// starts that are escaped
if (escaping || c === '\\') {
escaping = !escaping;
acc += c;
continue;
}
if (inBrace) {
if (i === braceStart + 1) {
if (c === '^' || c === '!') {
braceNeg = true;
}
}
else if (c === ']' && !(i === braceStart + 2 && braceNeg)) {
inBrace = false;
}
acc += c;
continue;
}
else if (c === '[') {
inBrace = true;
braceStart = i;
braceNeg = false;
acc += c;
continue;
}
if (!opt.noext && isExtglobType(c) && str.charAt(i) === '(') {
ast.push(acc);
acc = '';
const ext = new AST(c, ast);
i = AST.#parseAST(str, ext, i, opt);
ast.push(ext);
continue;
}
acc += c;
}
ast.push(acc);
return i;
}
// some kind of extglob, pos is at the (
// find the next | or )
let i = pos + 1;
let part = new AST(null, ast);
const parts = [];
let acc = '';
while (i < str.length) {
const c = str.charAt(i++);
// still accumulate escapes at this point, but we do ignore
// starts that are escaped
if (escaping || c === '\\') {
escaping = !escaping;
acc += c;
continue;
}
if (inBrace) {
if (i === braceStart + 1) {
if (c === '^' || c === '!') {
braceNeg = true;
}
}
else if (c === ']' && !(i === braceStart + 2 && braceNeg)) {
inBrace = false;
}
acc += c;
continue;
}
else if (c === '[') {
inBrace = true;
braceStart = i;
braceNeg = false;
acc += c;
continue;
}
if (isExtglobType(c) && str.charAt(i) === '(') {
part.push(acc);
acc = '';
const ext = new AST(c, part);
part.push(ext);
i = AST.#parseAST(str, ext, i, opt);
continue;
}
if (c === '|') {
part.push(acc);
acc = '';
parts.push(part);
part = new AST(null, ast);
continue;
}
if (c === ')') {
if (acc === '' && ast.#parts.length === 0) {
ast.#emptyExt = true;
}
part.push(acc);
acc = '';
ast.push(...parts, part);
return i;
}
acc += c;
}
// unfinished extglob
// if we got here, it was a malformed extglob! not an extglob, but
// maybe something else in there.
ast.type = null;
ast.#hasMagic = undefined;
ast.#parts = [str.substring(pos - 1)];
return i;
}
static fromGlob(pattern, options = {}) {
const ast = new AST(null, undefined, options);
AST.#parseAST(pattern, ast, 0, options);
return ast;
}
// returns the regular expression if there's magic, or the unescaped
// string if not.
toMMPattern() {
// should only be called on root
/* c8 ignore start */
if (this !== this.#root)
return this.#root.toMMPattern();
/* c8 ignore stop */
const glob = this.toString();
const [re, body, hasMagic, uflag] = this.toRegExpSource();
// if we're in nocase mode, and not nocaseMagicOnly, then we do
// still need a regular expression if we have to case-insensitively
// match capital/lowercase characters.
const anyMagic = hasMagic ||
this.#hasMagic ||
(this.#options.nocase &&
!this.#options.nocaseMagicOnly &&
glob.toUpperCase() !== glob.toLowerCase());
if (!anyMagic) {
return body;
}
const flags = (this.#options.nocase ? 'i' : '') + (uflag ? 'u' : '');
return Object.assign(new RegExp(`^${re}$`, flags), {
_src: re,
_glob: glob,
});
}
get options() {
return this.#options;
}
// returns the string match, the regexp source, whether there's magic
// in the regexp (so a regular expression is required) and whether or
// not the uflag is needed for the regular expression (for posix classes)
// TODO: instead of injecting the start/end at this point, just return
// the BODY of the regexp, along with the start/end portions suitable
// for binding the start/end in either a joined full-path makeRe context
// (where we bind to (^|/), or a standalone matchPart context (where
// we bind to ^, and not /). Otherwise slashes get duped!
//
// In part-matching mode, the start is:
// - if not isStart: nothing
// - if traversal possible, but not allowed: ^(?!\.\.?$)
// - if dots allowed or not possible: ^
// - if dots possible and not allowed: ^(?!\.)
// end is:
// - if not isEnd(): nothing
// - else: $
//
// In full-path matching mode, we put the slash at the START of the
// pattern, so start is:
// - if first pattern: same as part-matching mode
// - if not isStart(): nothing
// - if traversal possible, but not allowed: /(?!\.\.?(?:$|/))
// - if dots allowed or not possible: /
// - if dots possible and not allowed: /(?!\.)
// end is:
// - if last pattern, same as part-matching mode
// - else nothing
//
// Always put the (?:$|/) on negated tails, though, because that has to be
// there to bind the end of the negated pattern portion, and it's easier to
// just stick it in now rather than try to inject it later in the middle of
// the pattern.
//
// We can just always return the same end, and leave it up to the caller
// to know whether it's going to be used joined or in parts.
// And, if the start is adjusted slightly, can do the same there:
// - if not isStart: nothing
// - if traversal possible, but not allowed: (?:/|^)(?!\.\.?$)
// - if dots allowed or not possible: (?:/|^)
// - if dots possible and not allowed: (?:/|^)(?!\.)
//
// But it's better to have a simpler binding without a conditional, for
// performance, so probably better to return both start options.
//
// Then the caller just ignores the end if it's not the first pattern,
// and the start always gets applied.
//
// But that's always going to be $ if it's the ending pattern, or nothing,
// so the caller can just attach $ at the end of the pattern when building.
//
// So the todo is:
// - better detect what kind of start is needed
// - return both flavors of starting pattern
// - attach $ at the end of the pattern when creating the actual RegExp
//
// Ah, but wait, no, that all only applies to the root when the first pattern
// is not an extglob. If the first pattern IS an extglob, then we need all
// that dot prevention biz to live in the extglob portions, because eg
// +(*|.x*) can match .xy but not .yx.
//
// So, return the two flavors if it's #root and the first child is not an
// AST, otherwise leave it to the child AST to handle it, and there,
// use the (?:^|/) style of start binding.
//
// Even simplified further:
// - Since the start for a join is eg /(?!\.) and the start for a part
// is ^(?!\.), we can just prepend (?!\.) to the pattern (either root
// or start or whatever) and prepend ^ or / at the Regexp construction.
toRegExpSource(allowDot) {
const dot = allowDot ?? !!this.#options.dot;
if (this.#root === this)
this.#fillNegs();
if (!this.type) {
const noEmpty = this.isStart() && this.isEnd();
const src = this.#parts
.map(p => {
const [re, _, hasMagic, uflag] = typeof p === 'string'
? AST.#parseGlob(p, this.#hasMagic, noEmpty)
: p.toRegExpSource(allowDot);
this.#hasMagic = this.#hasMagic || hasMagic;
this.#uflag = this.#uflag || uflag;
return re;
})
.join('');
let start = '';
if (this.isStart()) {
if (typeof this.#parts[0] === 'string') {
// this is the string that will match the start of the pattern,
// so we need to protect against dots and such.
// '.' and '..' cannot match unless the pattern is that exactly,
// even if it starts with . or dot:true is set.
const dotTravAllowed = this.#parts.length === 1 && justDots.has(this.#parts[0]);
if (!dotTravAllowed) {
const aps = addPatternStart;
// check if we have a possibility of matching . or ..,
// and prevent that.
const needNoTrav =
// dots are allowed, and the pattern starts with [ or .
(dot && aps.has(src.charAt(0))) ||
// the pattern starts with \., and then [ or .
(src.startsWith('\\.') && aps.has(src.charAt(2))) ||
// the pattern starts with \.\., and then [ or .
(src.startsWith('\\.\\.') && aps.has(src.charAt(4)));
// no need to prevent dots if it can't match a dot, or if a
// sub-pattern will be preventing it anyway.
const needNoDot = !dot && !allowDot && aps.has(src.charAt(0));
start = needNoTrav ? startNoTraversal : needNoDot ? startNoDot : '';
}
}
}
// append the "end of path portion" pattern to negation tails
let end = '';
if (this.isEnd() &&
this.#root.#filledNegs &&
this.#parent?.type === '!') {
end = '(?:$|\\/)';
}
const final = start + src + end;
return [
final,
unescape(src),
(this.#hasMagic = !!this.#hasMagic),
this.#uflag,
];
}
// We need to calculate the body *twice* if it's a repeat pattern
// at the start, once in nodot mode, then again in dot mode, so a
// pattern like *(?) can match 'x.y'
const repeated = this.type === '*' || this.type === '+';
// some kind of extglob
const start = this.type === '!' ? '(?:(?!(?:' : '(?:';
let body = this.#partsToRegExp(dot);
if (this.isStart() && this.isEnd() && !body && this.type !== '!') {
// invalid extglob, has to at least be *something* present, if it's
// the entire path portion.
const s = this.toString();
this.#parts = [s];
this.type = null;
this.#hasMagic = undefined;
return [s, unescape(this.toString()), false, false];
}
// XXX abstract out this map method
let bodyDotAllowed = !repeated || allowDot || dot || false
? ''
: this.#partsToRegExp(true);
if (bodyDotAllowed === body) {
bodyDotAllowed = '';
}
if (bodyDotAllowed) {
body = `(?:${body})(?:${bodyDotAllowed})*?`;
}
// an empty !() is exactly equivalent to a starNoEmpty
let final = '';
if (this.type === '!' && this.#emptyExt) {
final = (this.isStart() && !dot ? startNoDot : '') + starNoEmpty;
}
else {
const close = this.type === '!'
? // !() must match something,but !(x) can match ''
'))' +
(this.isStart() && !dot && !allowDot ? startNoDot : '') +
star$1 +
')'
: this.type === '@'
? ')'
: this.type === '?'
? ')?'
: this.type === '+' && bodyDotAllowed
? ')'
: this.type === '*' && bodyDotAllowed
? `)?`
: `)${this.type}`;
final = start + body + close;
}
return [
final,
unescape(body),
(this.#hasMagic = !!this.#hasMagic),
this.#uflag,
];
}
#partsToRegExp(dot) {
return this.#parts
.map(p => {
// extglob ASTs should only contain parent ASTs
/* c8 ignore start */
if (typeof p === 'string') {
throw new Error('string type in extglob ast??');
}
/* c8 ignore stop */
// can ignore hasMagic, because extglobs are already always magic
const [re, _, _hasMagic, uflag] = p.toRegExpSource(dot);
this.#uflag = this.#uflag || uflag;
return re;
})
.filter(p => !(this.isStart() && this.isEnd()) || !!p)
.join('|');
}
static #parseGlob(glob, hasMagic, noEmpty = false) {
let escaping = false;
let re = '';
let uflag = false;
for (let i = 0; i < glob.length; i++) {
const c = glob.charAt(i);
if (escaping) {
escaping = false;
re += (reSpecials.has(c) ? '\\' : '') + c;
continue;
}
if (c === '\\') {
if (i === glob.leng