UNPKG

@juspay/neurolink

Version:

Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio

411 lines (410 loc) 14.6 kB
/** * SSRF Guard — Safe URL Validation Utility * * Prevents Server-Side Request Forgery by: * 1. Enforcing HTTPS-only (no plain HTTP). * 2. Normalising encoded IPv4 forms (octal, hex, decimal integer, IPv4-mapped IPv6) * to canonical dotted-decimal before rangechecking. * 3. Resolving the hostname for **both** A and AAAA families and rejecting * requests to RFC 1918 private ranges, loopback, link-local, CGNAT, * IPv6 link-local/ULA, and cloud metadata endpoints * (AWS / GCP / Azure / Alibaba). * 4. Re-throwing on DNS failure rather than silently allowing the request. * * **DNS rebinding residual race:** `assertSafeUrl` validates the IP at the * moment of the lookup. If the resolver returns a public IP here and a private * IP at the actual `fetch()` call, the guard is bypassed. To eliminate the * race, use the companion `safeDownload` helper in `safeFetch.ts` which pins * the resolved IP onto the request via an undici Agent dispatcher. * * Usage: * await assertSafeUrl(url); * // ... or, for actual downloads: ... * await safeDownload(url, { maxBytes, label }); * * @module utils/ssrfGuard */ import { lookup } from "node:dns/promises"; import { isIP } from "node:net"; /** * Blocked IPv4 CIDRs. * * Each entry is a `[network, prefix]` pair. Membership is computed by * bitwise comparison of the 32-bit address vs the masked network. */ const BLOCKED_V4_CIDRS = [ ["0.0.0.0", 8], // "this network" ["10.0.0.0", 8], // RFC 1918 ["100.64.0.0", 10], // CGNAT (RFC 6598) ["127.0.0.0", 8], // loopback ["169.254.0.0", 16], // link-local (AWS/GCP/Azure metadata + APIPA) ["172.16.0.0", 12], // RFC 1918 ["192.0.0.0", 24], // protocol assignments ["192.168.0.0", 16], // RFC 1918 ["198.18.0.0", 15], // benchmarking ["100.100.100.200", 32], // Alibaba Cloud metadata (NOT in 100.64/10 CGNAT) ["224.0.0.0", 4], // multicast ["240.0.0.0", 4], // reserved ]; /** * Blocked IPv6 prefixes. * * Compared by lowercase prefix match on the expanded address form. * (`expandIPv6` normalizes `::1` to `0000:0000:...:0001` for unambiguous * prefix matching.) */ const BLOCKED_V6_PREFIXES = [ "0000:0000:0000:0000:0000:0000:0000:0000", // :: (unspecified) "0000:0000:0000:0000:0000:0000:0000:0001", // ::1 (loopback) "fc", // fc00::/7 unique-local (covers fc and fd prefixes) "fd", // fd00::/8 "fe8", // fe80::/10 link-local (covers fe8/fe9/fea/feb) "fe9", "fea", "feb", ]; function parseOctet(s) { if (s.length === 0) { return null; } if (/^0x[0-9a-f]+$/i.test(s)) { return parseInt(s.slice(2), 16); } // Plain "0" is valid decimal zero; leading-zero forms (`0177`) are octal if (s.length > 1 && s.startsWith("0") && /^0[0-7]+$/.test(s)) { return parseInt(s.slice(1), 8); } if (/^\d+$/.test(s)) { return parseInt(s, 10); } return null; } /** * Normalize any IPv4-like host string to canonical dotted-decimal form, or * return `null` if it's not parseable as IPv4. * * Handles: * - 127.0.0.1 (canonical) * - 0177.0.0.1 (octal octets) * - 0x7f.0.0.1 (hex octets) * - 0x7f000001 (hex integer) * - 2130706433 (decimal integer) * - 0177.0.0.1 (mixed encodings) */ function normalizeIPv4(host) { if (host.length === 0) { return null; } const parts = host.split("."); if (parts.length === 4) { const octets = parts.map(parseOctet); if (octets.some((o) => o === null || o < 0 || o > 255)) { return null; } return octets.join("."); } // Single integer form: 2130706433 or 0x7f000001 if (parts.length === 1) { let n; if (/^0x[0-9a-f]+$/i.test(host)) { n = parseInt(host.slice(2), 16); } else if (/^\d+$/.test(host)) { n = parseInt(host, 10); } else { return null; } if (Number.isNaN(n) || n < 0 || n > 0xffffffff) { return null; } return [ (n >>> 24) & 0xff, (n >>> 16) & 0xff, (n >>> 8) & 0xff, n & 0xff, ].join("."); } return null; } /** * Expand a compressed IPv6 address (`::1`) to full 8-group form * (`0000:0000:0000:0000:0000:0000:0000:0001`) for unambiguous prefix matching. * * Returns the expanded lowercased string, or `null` if `host` isn't IPv6. */ function expandIPv6(host) { if (isIP(host) !== 6) { return null; } // Handle IPv4-mapped IPv6: ::ffff:127.0.0.1 → expand the IPv4 part to two groups const v4MappedMatch = host.match(/^::ffff:(\d+\.\d+\.\d+\.\d+)$/i); let groups; if (v4MappedMatch) { const v4 = normalizeIPv4(v4MappedMatch[1]); if (!v4) { return null; } const v4Octets = v4.split(".").map((n) => parseInt(n, 10)); const high = ((v4Octets[0] << 8) | v4Octets[1]).toString(16); const low = ((v4Octets[2] << 8) | v4Octets[3]).toString(16); groups = ["0", "0", "0", "0", "0", "ffff", high, low]; } else { const [head, tail = ""] = host.split("::"); const headParts = head ? head.split(":") : []; const tailParts = tail ? tail.split(":") : []; const missing = 8 - headParts.length - tailParts.length; if (missing < 0) { return null; } groups = [...headParts, ...Array(missing).fill("0"), ...tailParts]; } if (groups.length !== 8) { return null; } return groups.map((g) => g.toLowerCase().padStart(4, "0")).join(":"); } /** * If `host` is an IPv4-mapped IPv6 address, return the embedded IPv4 in * canonical dotted-decimal form, or `null` otherwise. * * Handles both forms: * - dotted-decimal IPv4 part: `::ffff:127.0.0.1` * - hex-encoded IPv4 part: `::ffff:7f00:1` / `::ffff:7f00:0001` * * Node's `URL` parser normalises bracketed `::ffff:127.0.0.1` to * `[::ffff:7f00:1]`, so the hex form is the one we actually receive after * `URL.hostname` + bracket stripping. Both paths must be covered. */ function extractIPv4FromMapped(host) { // Form 1: `::ffff:127.0.0.1` const dottedMatch = host.match(/^::ffff:(\d+\.\d+\.\d+\.\d+)$/i); if (dottedMatch) { return normalizeIPv4(dottedMatch[1]); } // Form 2: `::ffff:7f00:1` (two hex groups, optionally zero-padded) const hexMatch = host.match(/^::ffff:([0-9a-f]{1,4}):([0-9a-f]{1,4})$/i); if (hexMatch) { const high = parseInt(hexMatch[1], 16); const low = parseInt(hexMatch[2], 16); if (Number.isNaN(high) || Number.isNaN(low) || high > 0xffff || low > 0xffff) { return null; } return [ (high >> 8) & 0xff, high & 0xff, (low >> 8) & 0xff, low & 0xff, ].join("."); } return null; } function ipv4ToInt(ip) { const [a, b, c, d] = ip.split(".").map((n) => parseInt(n, 10)); return ((a << 24) | (b << 16) | (c << 8) | d) >>> 0; } function isBlockedIPv4(ip) { const ipInt = ipv4ToInt(ip); for (const [network, prefix] of BLOCKED_V4_CIDRS) { const netInt = ipv4ToInt(network); const mask = prefix === 0 ? 0 : (0xffffffff << (32 - prefix)) >>> 0; if ((ipInt & mask) === (netInt & mask)) { return true; } } return false; } function isBlockedIPv6(expanded) { return BLOCKED_V6_PREFIXES.some((prefix) => { if (prefix.length === 39) { // full-form exact match return expanded === prefix; } return expanded.startsWith(prefix); }); } /** * Strip the IPv6 brackets that `URL.hostname` returns for IPv6 hosts * (Node behaviour varies — sometimes `[::1]`, sometimes `::1`). */ function stripBrackets(host) { if (host.startsWith("[") && host.endsWith("]")) { return host.slice(1, -1); } return host; } /** * Internal check: given a host string (already bracket-stripped, lowercased), * return a reject reason or null if safe. * * Detects IP literals via every encoded form. Does NOT do DNS — that's the * caller's job. */ function checkHostLiteral(host) { // IPv4 (including encoded forms) const v4 = normalizeIPv4(host); if (v4) { if (isBlockedIPv4(v4)) { return `IPv4 ${host}${v4} is in a blocked range`; } return null; } // IPv6 (including IPv4-mapped) if (host.includes(":")) { // First, check IPv4-mapped: convert and re-check via v4 path const v4FromMapped = extractIPv4FromMapped(host); if (v4FromMapped) { if (isBlockedIPv4(v4FromMapped)) { return `IPv4-mapped IPv6 ${host}${v4FromMapped} is in a blocked range`; } return null; } const expanded = expandIPv6(host); if (!expanded) { return `IPv6 ${host} could not be parsed`; } if (isBlockedIPv6(expanded)) { return `IPv6 ${host} is in a blocked range`; } return null; } // Not an IP literal — caller should fall through to DNS resolution return "not-an-ip"; } /** * Assert that `url` is safe to fetch server-side. * * @throws {Error} when the URL is non-HTTPS, parses as a blocked IP literal, * or resolves (A or AAAA) to a blocked IP. **Also throws on DNS lookup * failure** (the previous behaviour of silently allowing was a bypass — * an attacker-controlled resolver could force NXDOMAIN here and a private * IP at the actual fetch). */ export async function assertSafeUrl(url) { let parsed; try { parsed = new URL(url); } catch { throw new Error(`Invalid URL: "${url}"`); } if (parsed.protocol !== "https:") { throw new Error(`Only HTTPS URLs are permitted; got "${parsed.protocol}//" in "${url}"`); } const host = stripBrackets(parsed.hostname).toLowerCase(); // First, try as an IP literal (covers encoded forms + IPv4-mapped IPv6). const literalCheck = checkHostLiteral(host); if (literalCheck === null) { return; // routable IP literal — safe } if (literalCheck !== "not-an-ip") { throw new Error(`URL "${url}" rejected: ${literalCheck}`); } // Hostname — resolve BOTH A and AAAA. Reject if either family yields a // blocked address (closes off the "publish AAAA public, A private" attack). const [a, aaaa] = await Promise.allSettled([ lookup(host, { family: 4, all: true }), lookup(host, { family: 6, all: true }), ]); const v4Addresses = []; const v6Addresses = []; let hadAnySuccess = false; if (a.status === "fulfilled") { hadAnySuccess = true; for (const entry of a.value) { v4Addresses.push(entry.address); } } if (aaaa.status === "fulfilled") { hadAnySuccess = true; for (const entry of aaaa.value) { v6Addresses.push(entry.address); } } if (!hadAnySuccess) { // BOTH lookups failed — the host doesn't resolve at all. Re-throw with // a clear message rather than silently allowing the fetch (the prior // behaviour, which is the DNS-rebinding bypass). const aErr = a.status === "rejected" ? a.reason instanceof Error ? a.reason.message : String(a.reason) : "ok"; const aaaaErr = aaaa.status === "rejected" ? aaaa.reason instanceof Error ? aaaa.reason.message : String(aaaa.reason) : "ok"; throw new Error(`URL "${url}" rejected: hostname ${host} could not be resolved (A: ${aErr}; AAAA: ${aaaaErr})`); } for (const addr of v4Addresses) { if (isBlockedIPv4(addr)) { throw new Error(`URL "${url}" rejected: hostname ${host} resolves to ${addr} (IPv4 in blocked range)`); } } for (const addr of v6Addresses) { // Re-use the literal check pipeline for IPv6 so IPv4-mapped resolved // addresses are caught. const reason = checkHostLiteral(addr.toLowerCase()); if (reason && reason !== "not-an-ip") { throw new Error(`URL "${url}" rejected: hostname ${host} resolves to ${addr} (IPv6 ${reason})`); } } } /** * Validate `url` and return the resolved IP that should be used for the * actual fetch (companion to `safeFetch.ts:safeDownload`). * * For IP-literal hosts, returns the normalised IP and family. For hostnames, * returns the first acceptable IP from the resolver. Same throw semantics as * {@link assertSafeUrl}. * * This is the canonical entry point for binary downloads where DNS-rebinding * pinning matters — see `safeFetch.ts`. */ export async function validateAndResolveUrl(url) { let parsed; try { parsed = new URL(url); } catch { throw new Error(`Invalid URL: "${url}"`); } if (parsed.protocol !== "https:") { throw new Error(`Only HTTPS URLs are permitted; got "${parsed.protocol}//" in "${url}"`); } const host = stripBrackets(parsed.hostname).toLowerCase(); // IP literal — normalise + check, return canonical form const v4 = normalizeIPv4(host); if (v4) { if (isBlockedIPv4(v4)) { throw new Error(`URL "${url}" rejected: IPv4 ${host}${v4} is in a blocked range`); } return { url, ip: v4, family: 4 }; } if (host.includes(":")) { const v4FromMapped = extractIPv4FromMapped(host); if (v4FromMapped) { if (isBlockedIPv4(v4FromMapped)) { throw new Error(`URL "${url}" rejected: IPv4-mapped IPv6 ${host}${v4FromMapped} is in a blocked range`); } return { url, ip: v4FromMapped, family: 4 }; } const expanded = expandIPv6(host); if (!expanded) { throw new Error(`URL "${url}" rejected: IPv6 ${host} could not be parsed`); } if (isBlockedIPv6(expanded)) { throw new Error(`URL "${url}" rejected: IPv6 ${host} is in a blocked range`); } return { url, ip: host, family: 6 }; } // Hostname — resolve and pick a safe address await assertSafeUrl(url); const result = await lookup(host); return { url, ip: result.address, family: result.family }; }