UNPKG

@eko-ai/eko-nodejs

Version:

Empowering language to transform human words into action.

1,422 lines (1,363 loc) 294 kB
'use strict'; var fs = require('fs'); var os = require('os'); var path$1 = require('path'); var eko = require('@eko-ai/eko'); var playwright = require('playwright'); var fs$1 = require('fs/promises'); var node_url = require('node:url'); var node_path = require('node:path'); var actualFS = require('node:fs'); var promises = require('node:fs/promises'); var node_events = require('node:events'); var Stream = require('node:stream'); var node_string_decoder = require('node:string_decoder'); var child_process = require('child_process'); function _interopNamespaceDefault(e) { var n = Object.create(null); if (e) { Object.keys(e).forEach(function (k) { if (k !== 'default') { var d = Object.getOwnPropertyDescriptor(e, k); Object.defineProperty(n, k, d.get ? d : { enumerable: true, get: function () { return e[k]; } }); } }); } n.default = e; return Object.freeze(n); } var fs__namespace = /*#__PURE__*/_interopNamespaceDefault(fs); var os__namespace = /*#__PURE__*/_interopNamespaceDefault(os); var path__namespace = /*#__PURE__*/_interopNamespaceDefault(path$1); var fs__namespace$1 = /*#__PURE__*/_interopNamespaceDefault(fs$1); var actualFS__namespace = /*#__PURE__*/_interopNamespaceDefault(actualFS); async function getCdpWsEndpoint(port) { // Example => ws://localhost:9222/devtools/browser/{session-id} const response = await fetch(`http://localhost:${port}/json/version`); const browserInfo = await response.json(); eko.Log.info("browserInfo: ", browserInfo); return browserInfo.webSocketDebuggerUrl; } function getDefaultChromeUserDataDir(copyToTempDir = false) { const platform = os__namespace.platform(); const homeDir = os__namespace.homedir(); let defaultPath; switch (platform) { case "win32": // Windows: %LOCALAPPDATA%\Google\Chrome\User Data const localAppData = process.env.LOCALAPPDATA || path__namespace.join(homeDir, "AppData", "Local"); defaultPath = path__namespace.join(localAppData, "Google", "Chrome", "User Data"); break; case "darwin": // macOS: ~/Library/Application Support/Google/Chrome defaultPath = path__namespace.join(homeDir, "Library", "Application Support", "Google", "Chrome"); break; case "linux": // Linux: ~/.config/google-chrome defaultPath = path__namespace.join(homeDir, ".config", "google-chrome"); break; } if (defaultPath && fs__namespace.existsSync(defaultPath)) { if (copyToTempDir) { const tempDir = os__namespace.tmpdir(); const tempPath = path__namespace.join(tempDir, "chrome-user-data"); if (fs__namespace.existsSync(tempPath)) { eko.Log.info(`Removing existing temp directory: ${tempPath}`); fs__namespace.rmSync(tempPath, { recursive: true, force: true }); } fs__namespace.cpSync(defaultPath, tempPath, { recursive: true }); // Delete all Chrome locked files and directories to prevent startup conflicts. removeLockFiles(tempPath); const defaultProfilePath = path__namespace.join(tempPath, "Default"); if (fs__namespace.existsSync(defaultProfilePath)) { removeLockFiles(defaultProfilePath); } eko.Log.info(`Created clean Chrome user data directory: ${tempPath}`); return tempPath; } else { return defaultPath; } } return undefined; } function removeLockFiles(dirPath) { try { const items = fs__namespace.readdirSync(dirPath); items.forEach((item) => { const itemPath = path__namespace.join(dirPath, item); try { const stat = fs__namespace.statSync(itemPath); if (stat.isDirectory()) { removeLockFiles(itemPath); } const shouldDelete = item === "SingletonLock" || item === "lockfile" || item === "RunningChromeVersion" || item === "SingletonCookie" || item === "SingletonSocket" || item === "chrome_debug.log" || item === "LOCK" || item === "LOG" || item === "LOG.old" || item.includes(".lock") || item.includes("Lock") || item.includes("LOCK") || item.includes(".tmp") || item.includes("Temp") || item.endsWith(".pid") || item.endsWith(".log") || item.includes("chrome_shutdown_ms.txt") || item.includes("Crashpad") || (stat.isDirectory() && (item.includes("CrashReports") || item.includes("ShaderCache") || item.includes("crashpad_database"))); if (shouldDelete) { fs__namespace.rmSync(itemPath, { recursive: true, force: true }); } } catch (statError) { if (item.includes("Lock") || item.includes("lock") || item.includes("LOCK")) { try { eko.Log.info(`Force deleting suspected lock file: ${itemPath}`); fs__namespace.rmSync(itemPath, { recursive: true, force: true }); } catch (deleteError) { eko.Log.warn(`Failed to force delete ${itemPath}:`, deleteError); } } } }); } catch (error) { eko.Log.warn(`Error while removing lock files from ${dirPath}:`, error); } } class BrowserAgent extends eko.BaseBrowserLabelsAgent { constructor() { super(...arguments); this.browser = null; this.browser_context = null; this.current_page = null; this.headless = false; } setHeadless(headless) { this.headless = headless; } setCdpWsEndpoint(cdpWsEndpoint) { this.cdpWsEndpoint = cdpWsEndpoint; } initUserDataDir(userDataDir) { if (userDataDir) { this.userDataDir = userDataDir; } else { this.userDataDir = getDefaultChromeUserDataDir(true); } return this.userDataDir; } setOptions(options) { this.options = options; } async screenshot(agentContext) { let page = await this.currentPage(); let screenshotBuffer = await page.screenshot({ fullPage: false, type: "jpeg", quality: 60, }); let base64 = screenshotBuffer.toString("base64"); return { imageType: "image/jpeg", imageBase64: base64, }; } async navigate_to(agentContext, url) { let page = await this.open_url(agentContext, url); await this.sleep(200); return { url: page.url(), title: await page.title(), }; } async get_all_tabs(agentContext) { if (!this.browser_context) { return []; } let result = []; const pages = await this.browser_context.pages(); for (let i = 0; i < pages.length; i++) { let page = pages[i]; result.push({ tabId: i, url: page.url(), title: await page.title(), }); } return result; } async switch_tab(agentContext, tabId) { if (!this.browser_context) { throw new Error("tabId does not exist: " + tabId); } const pages = await this.browser_context.pages(); const page = pages[tabId]; if (!page) { throw new Error("tabId does not exist: " + tabId); } this.current_page = page; return { tabId: tabId, url: page.url(), title: await page.title(), }; } async input_text(agentContext, index, text, enter) { try { let elementHandle = await this.get_element(index, true); await elementHandle.fill(""); await elementHandle.fill(text); if (enter) { await elementHandle.press("Enter"); await this.sleep(200); } } catch (e) { await super.input_text(agentContext, index, text, enter); } } async click_element(agentContext, index, num_clicks, button) { try { let elementHandle = await this.get_element(index, true); await elementHandle.click({ button, clickCount: num_clicks, force: true, }); } catch (e) { await super.click_element(agentContext, index, num_clicks, button); } } async hover_to_element(agentContext, index) { try { let elementHandle = await this.get_element(index, true); elementHandle.hover({ force: true }); } catch (e) { await super.hover_to_element(agentContext, index); } } async execute_script(agentContext, func, args) { let page = await this.currentPage(); return await page.evaluate(func, ...args); } async open_url(agentContext, url) { let browser_context = await this.getBrowserContext(); const page = await browser_context.newPage(); // await page.setViewportSize({ width: 1920, height: 1080 }); await page.setViewportSize({ width: 1536, height: 864 }); try { await page.goto(url, { waitUntil: "networkidle", timeout: 10000, }); await page.waitForLoadState("load", { timeout: 8000 }); } catch (e) { if ((e + "").indexOf("Timeout") == -1) { throw e; } } this.current_page = page; return page; } async currentPage() { if (this.current_page == null) { throw new Error("There is no page, please call navigate_to first"); } let page = this.current_page; try { await page.waitForLoadState("domcontentloaded", { timeout: 10000 }); } catch (e) { } return page; } async get_element(index, findInput) { let page = await this.currentPage(); return await page.evaluateHandle((params) => { let element = window.get_highlight_element(params.index); if (element && params.findInput) { if (element.tagName != "INPUT" && element.tagName != "TEXTAREA" && element.childElementCount != 0) { element = element.querySelector("input") || element.querySelector("textarea") || element; } } return element; }, { index, findInput }); } sleep(time) { return new Promise((resolve) => setTimeout(() => resolve(), time)); } async getBrowserContext() { if (!this.browser_context) { this.current_page = null; this.browser_context = null; if (this.cdpWsEndpoint) { this.browser = await playwright.chromium.connectOverCDP(this.cdpWsEndpoint, this.options); this.browser_context = await this.browser.newContext(); } else if (this.userDataDir) { this.browser_context = await playwright.chromium.launchPersistentContext(this.userDataDir, { headless: this.headless, // channel: 'chrome', args: [ "--no-sandbox", "--remote-allow-origins=*", "--disable-dev-shm-usage", "--disable-popup-blocking", "--enable-automation", "--ignore-ssl-errors", "--ignore-certificate-errors", "--ignore-certificate-errors-spki-list", "--disable-blink-features=AutomationControlled", ], ...this.options, }); } else { this.browser = await playwright.chromium.launch({ headless: this.headless, args: [ "--no-sandbox", "--remote-allow-origins=*", "--disable-dev-shm-usage", "--disable-popup-blocking", "--enable-automation", "--ignore-ssl-errors", "--ignore-certificate-errors", "--ignore-certificate-errors-spki-list", "--disable-blink-features=AutomationControlled", ], ...this.options, }); this.browser_context = await this.browser.newContext(); } // Anti-crawling detection website: // https://bot.sannysoft.com/ let init_script = await this.initScript(); this.browser_context.addInitScript(init_script); } return this.browser_context; } async initScript() { return { content: ` // Webdriver property Object.defineProperty(navigator, 'webdriver', { get: () => undefined }); // Languages Object.defineProperty(navigator, 'languages', { get: () => ['en-US'] }); // Plugins Object.defineProperty(navigator, 'plugins', { get: () => [{name:"1"}, {name:"2"}, {name:"3"}, {name:"4"}, {name:"5"}] }); // Chrome runtime window.chrome = { runtime: {} }; // Permissions const originalQuery = window.navigator.permissions.query; window.navigator.permissions.query = (parameters) => ( parameters.name === 'notifications' ? Promise.resolve({ state: Notification.permission }) : originalQuery(parameters) ); (function () { const originalAttachShadow = Element.prototype.attachShadow; Element.prototype.attachShadow = function attachShadow(options) { return originalAttachShadow.call(this, { ...options, mode: "open" }); }; })(); `, }; } } function getDefaultExportFromCjs (x) { return x && x.__esModule && Object.prototype.hasOwnProperty.call(x, 'default') ? x['default'] : x; } var balancedMatch; var hasRequiredBalancedMatch; function requireBalancedMatch () { if (hasRequiredBalancedMatch) return balancedMatch; hasRequiredBalancedMatch = 1; balancedMatch = balanced; function balanced(a, b, str) { if (a instanceof RegExp) a = maybeMatch(a, str); if (b instanceof RegExp) b = maybeMatch(b, str); var r = range(a, b, str); return r && { start: r[0], end: r[1], pre: str.slice(0, r[0]), body: str.slice(r[0] + a.length, r[1]), post: str.slice(r[1] + b.length) }; } function maybeMatch(reg, str) { var m = str.match(reg); return m ? m[0] : null; } balanced.range = range; function range(a, b, str) { var begs, beg, left, right, result; var ai = str.indexOf(a); var bi = str.indexOf(b, ai + 1); var i = ai; if (ai >= 0 && bi > 0) { if(a===b) { return [ai, bi]; } begs = []; left = str.length; while (i >= 0 && !result) { if (i == ai) { begs.push(i); ai = str.indexOf(a, i + 1); } else if (begs.length == 1) { result = [ begs.pop(), bi ]; } else { beg = begs.pop(); if (beg < left) { left = beg; right = bi; } bi = str.indexOf(b, i + 1); } i = ai < bi && ai >= 0 ? ai : bi; } if (begs.length) { result = [ left, right ]; } } return result; } return balancedMatch; } var braceExpansion; var hasRequiredBraceExpansion; function requireBraceExpansion () { if (hasRequiredBraceExpansion) return braceExpansion; hasRequiredBraceExpansion = 1; var balanced = requireBalancedMatch(); braceExpansion = expandTop; var escSlash = '\0SLASH'+Math.random()+'\0'; var escOpen = '\0OPEN'+Math.random()+'\0'; var escClose = '\0CLOSE'+Math.random()+'\0'; var escComma = '\0COMMA'+Math.random()+'\0'; var escPeriod = '\0PERIOD'+Math.random()+'\0'; function numeric(str) { return parseInt(str, 10) == str ? parseInt(str, 10) : str.charCodeAt(0); } function escapeBraces(str) { return str.split('\\\\').join(escSlash) .split('\\{').join(escOpen) .split('\\}').join(escClose) .split('\\,').join(escComma) .split('\\.').join(escPeriod); } function unescapeBraces(str) { return str.split(escSlash).join('\\') .split(escOpen).join('{') .split(escClose).join('}') .split(escComma).join(',') .split(escPeriod).join('.'); } // Basically just str.split(","), but handling cases // where we have nested braced sections, which should be // treated as individual members, like {a,{b,c},d} function parseCommaParts(str) { if (!str) return ['']; var parts = []; var m = balanced('{', '}', str); if (!m) return str.split(','); var pre = m.pre; var body = m.body; var post = m.post; var p = pre.split(','); p[p.length-1] += '{' + body + '}'; var postParts = parseCommaParts(post); if (post.length) { p[p.length-1] += postParts.shift(); p.push.apply(p, postParts); } parts.push.apply(parts, p); return parts; } function expandTop(str) { if (!str) return []; // I don't know why Bash 4.3 does this, but it does. // Anything starting with {} will have the first two bytes preserved // but *only* at the top level, so {},a}b will not expand to anything, // but a{},b}c will be expanded to [a}c,abc]. // One could argue that this is a bug in Bash, but since the goal of // this module is to match Bash's rules, we escape a leading {} if (str.substr(0, 2) === '{}') { str = '\\{\\}' + str.substr(2); } return expand(escapeBraces(str), true).map(unescapeBraces); } function embrace(str) { return '{' + str + '}'; } function isPadded(el) { return /^-?0\d/.test(el); } function lte(i, y) { return i <= y; } function gte(i, y) { return i >= y; } function expand(str, isTop) { var expansions = []; var m = balanced('{', '}', str); if (!m) return [str]; // no need to expand pre, since it is guaranteed to be free of brace-sets var pre = m.pre; var post = m.post.length ? expand(m.post, false) : ['']; if (/\$$/.test(m.pre)) { for (var k = 0; k < post.length; k++) { var expansion = pre+ '{' + m.body + '}' + post[k]; expansions.push(expansion); } } else { var isNumericSequence = /^-?\d+\.\.-?\d+(?:\.\.-?\d+)?$/.test(m.body); var isAlphaSequence = /^[a-zA-Z]\.\.[a-zA-Z](?:\.\.-?\d+)?$/.test(m.body); var isSequence = isNumericSequence || isAlphaSequence; var isOptions = m.body.indexOf(',') >= 0; if (!isSequence && !isOptions) { // {a},b} if (m.post.match(/,.*\}/)) { str = m.pre + '{' + m.body + escClose + m.post; return expand(str); } return [str]; } var n; if (isSequence) { n = m.body.split(/\.\./); } else { n = parseCommaParts(m.body); if (n.length === 1) { // x{{a,b}}y ==> x{a}y x{b}y n = expand(n[0], false).map(embrace); if (n.length === 1) { return post.map(function(p) { return m.pre + n[0] + p; }); } } } // at this point, n is the parts, and we know it's not a comma set // with a single entry. var N; if (isSequence) { var x = numeric(n[0]); var y = numeric(n[1]); var width = Math.max(n[0].length, n[1].length); var incr = n.length == 3 ? Math.abs(numeric(n[2])) : 1; var test = lte; var reverse = y < x; if (reverse) { incr *= -1; test = gte; } var pad = n.some(isPadded); N = []; for (var i = x; test(i, y); i += incr) { var c; if (isAlphaSequence) { c = String.fromCharCode(i); if (c === '\\') c = ''; } else { c = String(i); if (pad) { var need = width - c.length; if (need > 0) { var z = new Array(need + 1).join('0'); if (i < 0) c = '-' + z + c.slice(1); else c = z + c; } } } N.push(c); } } else { N = []; for (var j = 0; j < n.length; j++) { N.push.apply(N, expand(n[j], false)); } } for (var j = 0; j < N.length; j++) { for (var k = 0; k < post.length; k++) { var expansion = pre + N[j] + post[k]; if (!isTop || isSequence || expansion) expansions.push(expansion); } } } return expansions; } return braceExpansion; } var braceExpansionExports = requireBraceExpansion(); var expand = /*@__PURE__*/getDefaultExportFromCjs(braceExpansionExports); const MAX_PATTERN_LENGTH = 1024 * 64; const assertValidPattern = (pattern) => { if (typeof pattern !== 'string') { throw new TypeError('invalid pattern'); } if (pattern.length > MAX_PATTERN_LENGTH) { throw new TypeError('pattern is too long'); } }; // translate the various posix character classes into unicode properties // this works across all unicode locales // { <posix class>: [<translation>, /u flag required, negated] const posixClasses = { '[:alnum:]': ['\\p{L}\\p{Nl}\\p{Nd}', true], '[:alpha:]': ['\\p{L}\\p{Nl}', true], '[:ascii:]': ['\\x' + '00-\\x' + '7f', false], '[:blank:]': ['\\p{Zs}\\t', true], '[:cntrl:]': ['\\p{Cc}', true], '[:digit:]': ['\\p{Nd}', true], '[:graph:]': ['\\p{Z}\\p{C}', true, true], '[:lower:]': ['\\p{Ll}', true], '[:print:]': ['\\p{C}', true], '[:punct:]': ['\\p{P}', true], '[:space:]': ['\\p{Z}\\t\\r\\n\\v\\f', true], '[:upper:]': ['\\p{Lu}', true], '[:word:]': ['\\p{L}\\p{Nl}\\p{Nd}\\p{Pc}', true], '[:xdigit:]': ['A-Fa-f0-9', false], }; // only need to escape a few things inside of brace expressions // escapes: [ \ ] - const braceEscape = (s) => s.replace(/[[\]\\-]/g, '\\$&'); // escape all regexp magic characters const regexpEscape = (s) => s.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, '\\$&'); // everything has already been escaped, we just have to join const rangesToString = (ranges) => ranges.join(''); // takes a glob string at a posix brace expression, and returns // an equivalent regular expression source, and boolean indicating // whether the /u flag needs to be applied, and the number of chars // consumed to parse the character class. // This also removes out of order ranges, and returns ($.) if the // entire class just no good. const parseClass = (glob, position) => { const pos = position; /* c8 ignore start */ if (glob.charAt(pos) !== '[') { throw new Error('not in a brace expression'); } /* c8 ignore stop */ const ranges = []; const negs = []; let i = pos + 1; let sawStart = false; let uflag = false; let escaping = false; let negate = false; let endPos = pos; let rangeStart = ''; WHILE: while (i < glob.length) { const c = glob.charAt(i); if ((c === '!' || c === '^') && i === pos + 1) { negate = true; i++; continue; } if (c === ']' && sawStart && !escaping) { endPos = i + 1; break; } sawStart = true; if (c === '\\') { if (!escaping) { escaping = true; i++; continue; } // escaped \ char, fall through and treat like normal char } if (c === '[' && !escaping) { // either a posix class, a collation equivalent, or just a [ for (const [cls, [unip, u, neg]] of Object.entries(posixClasses)) { if (glob.startsWith(cls, i)) { // invalid, [a-[] is fine, but not [a-[:alpha]] if (rangeStart) { return ['$.', false, glob.length - pos, true]; } i += cls.length; if (neg) negs.push(unip); else ranges.push(unip); uflag = uflag || u; continue WHILE; } } } // now it's just a normal character, effectively escaping = false; if (rangeStart) { // throw this range away if it's not valid, but others // can still match. if (c > rangeStart) { ranges.push(braceEscape(rangeStart) + '-' + braceEscape(c)); } else if (c === rangeStart) { ranges.push(braceEscape(c)); } rangeStart = ''; i++; continue; } // now might be the start of a range. // can be either c-d or c-] or c<more...>] or c] at this point if (glob.startsWith('-]', i + 1)) { ranges.push(braceEscape(c + '-')); i += 2; continue; } if (glob.startsWith('-', i + 1)) { rangeStart = c; i += 2; continue; } // not the start of a range, just a single character ranges.push(braceEscape(c)); i++; } if (endPos < i) { // didn't see the end of the class, not a valid class, // but might still be valid as a literal match. return ['', false, 0, false]; } // if we got no ranges and no negates, then we have a range that // cannot possibly match anything, and that poisons the whole glob if (!ranges.length && !negs.length) { return ['$.', false, glob.length - pos, true]; } // if we got one positive range, and it's a single character, then that's // not actually a magic pattern, it's just that one literal character. // we should not treat that as "magic", we should just return the literal // character. [_] is a perfectly valid way to escape glob magic chars. if (negs.length === 0 && ranges.length === 1 && /^\\?.$/.test(ranges[0]) && !negate) { const r = ranges[0].length === 2 ? ranges[0].slice(-1) : ranges[0]; return [regexpEscape(r), false, endPos - pos, false]; } const sranges = '[' + (negate ? '^' : '') + rangesToString(ranges) + ']'; const snegs = '[' + (negate ? '' : '^') + rangesToString(negs) + ']'; const comb = ranges.length && negs.length ? '(' + sranges + '|' + snegs + ')' : ranges.length ? sranges : snegs; return [comb, uflag, endPos - pos, true]; }; /** * Un-escape a string that has been escaped with {@link escape}. * * If the {@link windowsPathsNoEscape} option is used, then square-brace * escapes are removed, but not backslash escapes. For example, it will turn * the string `'[*]'` into `*`, but it will not turn `'\\*'` into `'*'`, * becuase `\` is a path separator in `windowsPathsNoEscape` mode. * * When `windowsPathsNoEscape` is not set, then both brace escapes and * backslash escapes are removed. * * Slashes (and backslashes in `windowsPathsNoEscape` mode) cannot be escaped * or unescaped. */ const unescape = (s, { windowsPathsNoEscape = false, } = {}) => { return windowsPathsNoEscape ? s.replace(/\[([^\/\\])\]/g, '$1') : s.replace(/((?!\\).|^)\[([^\/\\])\]/g, '$1$2').replace(/\\([^\/])/g, '$1'); }; // parse a single path portion const types = new Set(['!', '?', '+', '*', '@']); const isExtglobType = (c) => types.has(c); // Patterns that get prepended to bind to the start of either the // entire string, or just a single path portion, to prevent dots // and/or traversal patterns, when needed. // Exts don't need the ^ or / bit, because the root binds that already. const startNoTraversal = '(?!(?:^|/)\\.\\.?(?:$|/))'; const startNoDot = '(?!\\.)'; // characters that indicate a start of pattern needs the "no dots" bit, // because a dot *might* be matched. ( is not in the list, because in // the case of a child extglob, it will handle the prevention itself. const addPatternStart = new Set(['[', '.']); // cases where traversal is A-OK, no dot prevention needed const justDots = new Set(['..', '.']); const reSpecials = new Set('().*{}+?[]^$\\!'); const regExpEscape$1 = (s) => s.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, '\\$&'); // any single thing other than / const qmark$1 = '[^/]'; // * => any number of characters const star$1 = qmark$1 + '*?'; // use + when we need to ensure that *something* matches, because the * is // the only thing in the path portion. const starNoEmpty = qmark$1 + '+?'; // remove the \ chars that we added if we end up doing a nonmagic compare // const deslash = (s: string) => s.replace(/\\(.)/g, '$1') class AST { type; #root; #hasMagic; #uflag = false; #parts = []; #parent; #parentIndex; #negs; #filledNegs = false; #options; #toString; // set to true if it's an extglob with no children // (which really means one child of '') #emptyExt = false; constructor(type, parent, options = {}) { this.type = type; // extglobs are inherently magical if (type) this.#hasMagic = true; this.#parent = parent; this.#root = this.#parent ? this.#parent.#root : this; this.#options = this.#root === this ? options : this.#root.#options; this.#negs = this.#root === this ? [] : this.#root.#negs; if (type === '!' && !this.#root.#filledNegs) this.#negs.push(this); this.#parentIndex = this.#parent ? this.#parent.#parts.length : 0; } get hasMagic() { /* c8 ignore start */ if (this.#hasMagic !== undefined) return this.#hasMagic; /* c8 ignore stop */ for (const p of this.#parts) { if (typeof p === 'string') continue; if (p.type || p.hasMagic) return (this.#hasMagic = true); } // note: will be undefined until we generate the regexp src and find out return this.#hasMagic; } // reconstructs the pattern toString() { if (this.#toString !== undefined) return this.#toString; if (!this.type) { return (this.#toString = this.#parts.map(p => String(p)).join('')); } else { return (this.#toString = this.type + '(' + this.#parts.map(p => String(p)).join('|') + ')'); } } #fillNegs() { /* c8 ignore start */ if (this !== this.#root) throw new Error('should only call on root'); if (this.#filledNegs) return this; /* c8 ignore stop */ // call toString() once to fill this out this.toString(); this.#filledNegs = true; let n; while ((n = this.#negs.pop())) { if (n.type !== '!') continue; // walk up the tree, appending everthing that comes AFTER parentIndex let p = n; let pp = p.#parent; while (pp) { for (let i = p.#parentIndex + 1; !pp.type && i < pp.#parts.length; i++) { for (const part of n.#parts) { /* c8 ignore start */ if (typeof part === 'string') { throw new Error('string part in extglob AST??'); } /* c8 ignore stop */ part.copyIn(pp.#parts[i]); } } p = pp; pp = p.#parent; } } return this; } push(...parts) { for (const p of parts) { if (p === '') continue; /* c8 ignore start */ if (typeof p !== 'string' && !(p instanceof AST && p.#parent === this)) { throw new Error('invalid part: ' + p); } /* c8 ignore stop */ this.#parts.push(p); } } toJSON() { const ret = this.type === null ? this.#parts.slice().map(p => (typeof p === 'string' ? p : p.toJSON())) : [this.type, ...this.#parts.map(p => p.toJSON())]; if (this.isStart() && !this.type) ret.unshift([]); if (this.isEnd() && (this === this.#root || (this.#root.#filledNegs && this.#parent?.type === '!'))) { ret.push({}); } return ret; } isStart() { if (this.#root === this) return true; // if (this.type) return !!this.#parent?.isStart() if (!this.#parent?.isStart()) return false; if (this.#parentIndex === 0) return true; // if everything AHEAD of this is a negation, then it's still the "start" const p = this.#parent; for (let i = 0; i < this.#parentIndex; i++) { const pp = p.#parts[i]; if (!(pp instanceof AST && pp.type === '!')) { return false; } } return true; } isEnd() { if (this.#root === this) return true; if (this.#parent?.type === '!') return true; if (!this.#parent?.isEnd()) return false; if (!this.type) return this.#parent?.isEnd(); // if not root, it'll always have a parent /* c8 ignore start */ const pl = this.#parent ? this.#parent.#parts.length : 0; /* c8 ignore stop */ return this.#parentIndex === pl - 1; } copyIn(part) { if (typeof part === 'string') this.push(part); else this.push(part.clone(this)); } clone(parent) { const c = new AST(this.type, parent); for (const p of this.#parts) { c.copyIn(p); } return c; } static #parseAST(str, ast, pos, opt) { let escaping = false; let inBrace = false; let braceStart = -1; let braceNeg = false; if (ast.type === null) { // outside of a extglob, append until we find a start let i = pos; let acc = ''; while (i < str.length) { const c = str.charAt(i++); // still accumulate escapes at this point, but we do ignore // starts that are escaped if (escaping || c === '\\') { escaping = !escaping; acc += c; continue; } if (inBrace) { if (i === braceStart + 1) { if (c === '^' || c === '!') { braceNeg = true; } } else if (c === ']' && !(i === braceStart + 2 && braceNeg)) { inBrace = false; } acc += c; continue; } else if (c === '[') { inBrace = true; braceStart = i; braceNeg = false; acc += c; continue; } if (!opt.noext && isExtglobType(c) && str.charAt(i) === '(') { ast.push(acc); acc = ''; const ext = new AST(c, ast); i = AST.#parseAST(str, ext, i, opt); ast.push(ext); continue; } acc += c; } ast.push(acc); return i; } // some kind of extglob, pos is at the ( // find the next | or ) let i = pos + 1; let part = new AST(null, ast); const parts = []; let acc = ''; while (i < str.length) { const c = str.charAt(i++); // still accumulate escapes at this point, but we do ignore // starts that are escaped if (escaping || c === '\\') { escaping = !escaping; acc += c; continue; } if (inBrace) { if (i === braceStart + 1) { if (c === '^' || c === '!') { braceNeg = true; } } else if (c === ']' && !(i === braceStart + 2 && braceNeg)) { inBrace = false; } acc += c; continue; } else if (c === '[') { inBrace = true; braceStart = i; braceNeg = false; acc += c; continue; } if (isExtglobType(c) && str.charAt(i) === '(') { part.push(acc); acc = ''; const ext = new AST(c, part); part.push(ext); i = AST.#parseAST(str, ext, i, opt); continue; } if (c === '|') { part.push(acc); acc = ''; parts.push(part); part = new AST(null, ast); continue; } if (c === ')') { if (acc === '' && ast.#parts.length === 0) { ast.#emptyExt = true; } part.push(acc); acc = ''; ast.push(...parts, part); return i; } acc += c; } // unfinished extglob // if we got here, it was a malformed extglob! not an extglob, but // maybe something else in there. ast.type = null; ast.#hasMagic = undefined; ast.#parts = [str.substring(pos - 1)]; return i; } static fromGlob(pattern, options = {}) { const ast = new AST(null, undefined, options); AST.#parseAST(pattern, ast, 0, options); return ast; } // returns the regular expression if there's magic, or the unescaped // string if not. toMMPattern() { // should only be called on root /* c8 ignore start */ if (this !== this.#root) return this.#root.toMMPattern(); /* c8 ignore stop */ const glob = this.toString(); const [re, body, hasMagic, uflag] = this.toRegExpSource(); // if we're in nocase mode, and not nocaseMagicOnly, then we do // still need a regular expression if we have to case-insensitively // match capital/lowercase characters. const anyMagic = hasMagic || this.#hasMagic || (this.#options.nocase && !this.#options.nocaseMagicOnly && glob.toUpperCase() !== glob.toLowerCase()); if (!anyMagic) { return body; } const flags = (this.#options.nocase ? 'i' : '') + (uflag ? 'u' : ''); return Object.assign(new RegExp(`^${re}$`, flags), { _src: re, _glob: glob, }); } get options() { return this.#options; } // returns the string match, the regexp source, whether there's magic // in the regexp (so a regular expression is required) and whether or // not the uflag is needed for the regular expression (for posix classes) // TODO: instead of injecting the start/end at this point, just return // the BODY of the regexp, along with the start/end portions suitable // for binding the start/end in either a joined full-path makeRe context // (where we bind to (^|/), or a standalone matchPart context (where // we bind to ^, and not /). Otherwise slashes get duped! // // In part-matching mode, the start is: // - if not isStart: nothing // - if traversal possible, but not allowed: ^(?!\.\.?$) // - if dots allowed or not possible: ^ // - if dots possible and not allowed: ^(?!\.) // end is: // - if not isEnd(): nothing // - else: $ // // In full-path matching mode, we put the slash at the START of the // pattern, so start is: // - if first pattern: same as part-matching mode // - if not isStart(): nothing // - if traversal possible, but not allowed: /(?!\.\.?(?:$|/)) // - if dots allowed or not possible: / // - if dots possible and not allowed: /(?!\.) // end is: // - if last pattern, same as part-matching mode // - else nothing // // Always put the (?:$|/) on negated tails, though, because that has to be // there to bind the end of the negated pattern portion, and it's easier to // just stick it in now rather than try to inject it later in the middle of // the pattern. // // We can just always return the same end, and leave it up to the caller // to know whether it's going to be used joined or in parts. // And, if the start is adjusted slightly, can do the same there: // - if not isStart: nothing // - if traversal possible, but not allowed: (?:/|^)(?!\.\.?$) // - if dots allowed or not possible: (?:/|^) // - if dots possible and not allowed: (?:/|^)(?!\.) // // But it's better to have a simpler binding without a conditional, for // performance, so probably better to return both start options. // // Then the caller just ignores the end if it's not the first pattern, // and the start always gets applied. // // But that's always going to be $ if it's the ending pattern, or nothing, // so the caller can just attach $ at the end of the pattern when building. // // So the todo is: // - better detect what kind of start is needed // - return both flavors of starting pattern // - attach $ at the end of the pattern when creating the actual RegExp // // Ah, but wait, no, that all only applies to the root when the first pattern // is not an extglob. If the first pattern IS an extglob, then we need all // that dot prevention biz to live in the extglob portions, because eg // +(*|.x*) can match .xy but not .yx. // // So, return the two flavors if it's #root and the first child is not an // AST, otherwise leave it to the child AST to handle it, and there, // use the (?:^|/) style of start binding. // // Even simplified further: // - Since the start for a join is eg /(?!\.) and the start for a part // is ^(?!\.), we can just prepend (?!\.) to the pattern (either root // or start or whatever) and prepend ^ or / at the Regexp construction. toRegExpSource(allowDot) { const dot = allowDot ?? !!this.#options.dot; if (this.#root === this) this.#fillNegs(); if (!this.type) { const noEmpty = this.isStart() && this.isEnd(); const src = this.#parts .map(p => { const [re, _, hasMagic, uflag] = typeof p === 'string' ? AST.#parseGlob(p, this.#hasMagic, noEmpty) : p.toRegExpSource(allowDot); this.#hasMagic = this.#hasMagic || hasMagic; this.#uflag = this.#uflag || uflag; return re; }) .join(''); let start = ''; if (this.isStart()) { if (typeof this.#parts[0] === 'string') { // this is the string that will match the start of the pattern, // so we need to protect against dots and such. // '.' and '..' cannot match unless the pattern is that exactly, // even if it starts with . or dot:true is set. const dotTravAllowed = this.#parts.length === 1 && justDots.has(this.#parts[0]); if (!dotTravAllowed) { const aps = addPatternStart; // check if we have a possibility of matching . or .., // and prevent that. const needNoTrav = // dots are allowed, and the pattern starts with [ or . (dot && aps.has(src.charAt(0))) || // the pattern starts with \., and then [ or . (src.startsWith('\\.') && aps.has(src.charAt(2))) || // the pattern starts with \.\., and then [ or . (src.startsWith('\\.\\.') && aps.has(src.charAt(4))); // no need to prevent dots if it can't match a dot, or if a // sub-pattern will be preventing it anyway. const needNoDot = !dot && !allowDot && aps.has(src.charAt(0)); start = needNoTrav ? startNoTraversal : needNoDot ? startNoDot : ''; } } } // append the "end of path portion" pattern to negation tails let end = ''; if (this.isEnd() && this.#root.#filledNegs && this.#parent?.type === '!') { end = '(?:$|\\/)'; } const final = start + src + end; return [ final, unescape(src), (this.#hasMagic = !!this.#hasMagic), this.#uflag, ]; } // We need to calculate the body *twice* if it's a repeat pattern // at the start, once in nodot mode, then again in dot mode, so a // pattern like *(?) can match 'x.y' const repeated = this.type === '*' || this.type === '+'; // some kind of extglob const start = this.type === '!' ? '(?:(?!(?:' : '(?:'; let body = this.#partsToRegExp(dot); if (this.isStart() && this.isEnd() && !body && this.type !== '!') { // invalid extglob, has to at least be *something* present, if it's // the entire path portion. const s = this.toString(); this.#parts = [s]; this.type = null; this.#hasMagic = undefined; return [s, unescape(this.toString()), false, false]; } // XXX abstract out this map method let bodyDotAllowed = !repeated || allowDot || dot || false ? '' : this.#partsToRegExp(true); if (bodyDotAllowed === body) { bodyDotAllowed = ''; } if (bodyDotAllowed) { body = `(?:${body})(?:${bodyDotAllowed})*?`; } // an empty !() is exactly equivalent to a starNoEmpty let final = ''; if (this.type === '!' && this.#emptyExt) { final = (this.isStart() && !dot ? startNoDot : '') + starNoEmpty; } else { const close = this.type === '!' ? // !() must match something,but !(x) can match '' '))' + (this.isStart() && !dot && !allowDot ? startNoDot : '') + star$1 + ')' : this.type === '@' ? ')' : this.type === '?' ? ')?' : this.type === '+' && bodyDotAllowed ? ')' : this.type === '*' && bodyDotAllowed ? `)?` : `)${this.type}`; final = start + body + close; } return [ final, unescape(body), (this.#hasMagic = !!this.#hasMagic), this.#uflag, ]; } #partsToRegExp(dot) { return this.#parts .map(p => { // extglob ASTs should only contain parent ASTs /* c8 ignore start */ if (typeof p === 'string') { throw new Error('string type in extglob ast??'); } /* c8 ignore stop */ // can ignore hasMagic, because extglobs are already always magic const [re, _, _hasMagic, uflag] = p.toRegExpSource(dot); this.#uflag = this.#uflag || uflag; return re; }) .filter(p => !(this.isStart() && this.isEnd()) || !!p) .join('|'); } static #parseGlob(glob, hasMagic, noEmpty = false) { let escaping = false; let re = ''; let uflag = false; for (let i = 0; i < glob.length; i++) { const c = glob.charAt(i); if (escaping) { escaping = false; re += (reSpecials.has(c) ? '\\' : '') + c; continue; } if (c === '\\') { if (i === glob.leng