UNPKG

@helia/verified-fetch

Version:

A fetch-like API for obtaining verified & trustless IPFS content on the web

264 lines 11.9 kB
import { AbortError } from '@libp2p/interface'; import { CID } from 'multiformats/cid'; import { getPeerIdFromString } from './get-peer-id-from-string.js'; import { serverTiming } from './server-timing.js'; import { TLRU } from './tlru.js'; export const ipnsCache = new TLRU(1000); const URL_REGEX = /^(?<protocol>ip[fn]s):\/\/(?<cidOrPeerIdOrDnsLink>[^/?]+)\/?(?<path>[^?]*)\??(?<queryString>.*)$/; const PATH_REGEX = /^\/(?<protocol>ip[fn]s)\/(?<cidOrPeerIdOrDnsLink>[^/?]+)\/?(?<path>[^?]*)\??(?<queryString>.*)$/; const PATH_GATEWAY_REGEX = /^https?:\/\/(.*[^/])\/(?<protocol>ip[fn]s)\/(?<cidOrPeerIdOrDnsLink>[^/?]+)\/?(?<path>[^?]*)\??(?<queryString>.*)$/; const SUBDOMAIN_GATEWAY_REGEX = /^https?:\/\/(?<cidOrPeerIdOrDnsLink>[^/?]+)\.(?<protocol>ip[fn]s)\.([^/?]+)\/?(?<path>[^?]*)\??(?<queryString>.*)$/; function matchUrlGroupsGuard(groups) { const protocol = groups?.protocol; if (protocol == null) { return false; } const cidOrPeerIdOrDnsLink = groups?.cidOrPeerIdOrDnsLink; if (cidOrPeerIdOrDnsLink == null) { return false; } const path = groups?.path; const queryString = groups?.queryString; return ['ipns', 'ipfs'].includes(protocol) && typeof cidOrPeerIdOrDnsLink === 'string' && (path == null || typeof path === 'string') && (queryString == null || typeof queryString === 'string'); } export function matchURLString(urlString) { for (const pattern of [SUBDOMAIN_GATEWAY_REGEX, URL_REGEX, PATH_GATEWAY_REGEX, PATH_REGEX]) { const match = urlString.match(pattern); if (matchUrlGroupsGuard(match?.groups)) { return match.groups; } } throw new TypeError(`Invalid URL: ${urlString}, please use ipfs://, ipns://, or gateway URLs only`); } /** * determines the TTL for the resolved resource that will be used for the `Cache-Control` header's `max-age` directive. * max-age is in seconds * * @see https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control#response_directives * * If we have ipnsTtlNs, it will be a BigInt representing "nanoseconds". We need to convert it back to seconds. * * For more TTL nuances: * * @see https://github.com/ipfs/js-ipns/blob/16e0e10682fa9a663e0bb493a44d3e99a5200944/src/index.ts#L200 * @see https://github.com/ipfs/js-ipns/pull/308 * @returns the ttl in seconds */ function calculateTtl(resolveResult) { if (resolveResult == null) { return undefined; } const dnsLinkTtl = resolveResult.answer?.TTL; const ipnsTtlNs = resolveResult.record?.ttl; const ipnsTtl = ipnsTtlNs != null ? Number(ipnsTtlNs / BigInt(1e9)) : undefined; return dnsLinkTtl ?? ipnsTtl; } /** * For DNSLink see https://specs.ipfs.tech/http-gateways/subdomain-gateway/#host-request-header * DNSLink names include . which means they must be inlined into a single DNS label to provide unique origin and work with wildcard TLS certificates. */ // DNS label can have up to 63 characters, consisting of alphanumeric // characters or hyphens -, but it must not start or end with a hyphen. const dnsLabelRegex = /^[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?$/; /** * Checks if label looks like inlined DNSLink. * (https://specs.ipfs.tech/http-gateways/subdomain-gateway/#host-request-header) */ function isInlinedDnsLink(label) { return dnsLabelRegex.test(label) && label.includes('-') && !label.includes('.'); } /** * DNSLink label decoding * - Every standalone - is replaced with . * - Every remaining -- is replaced with - * * @example en-wikipedia--on--ipfs-org.ipns.example.net -> example.net/ipns/en.wikipedia-on-ipfs.org */ function dnsLinkLabelDecoder(linkLabel) { return linkLabel.replace(/--/g, '%').replace(/-/g, '.').replace(/%/g, '-'); } /** * A function that parses ipfs:// and ipns:// URLs, returning an object with easily recognizable properties. * * After determining the protocol successfully, we process the cidOrPeerIdOrDnsLink: * - If it's ipfs, it parses the CID or throws Error[] * - If it's ipns, it attempts to resolve the PeerId and then the DNSLink. If both fail, Error[] is thrown. * * @todo we need to break out each step of this function (cid parsing, ipns resolving, dnslink resolving) into separate functions and then remove the eslint-disable comment * * @throws {Error[]} */ // eslint-disable-next-line complexity export async function parseUrlString({ urlString, ipns, logger, withServerTiming = false }, options) { const log = logger.forComponent('helia:verified-fetch:parse-url-string'); const { protocol, cidOrPeerIdOrDnsLink, path: urlPath, queryString } = matchURLString(urlString); let cid; let resolvedPath; const errors = []; let resolveResult; const serverTimings = []; if (protocol === 'ipfs') { try { cid = CID.parse(cidOrPeerIdOrDnsLink); /** * no ttl set. @link {setCacheControlHeader} */ } catch (err) { log.error(err); errors.push(new TypeError('Invalid CID for ipfs://<cid> URL')); } } else { // protocol is ipns resolveResult = ipnsCache.get(cidOrPeerIdOrDnsLink); if (resolveResult != null) { cid = resolveResult.cid; resolvedPath = resolveResult.path; log.trace('resolved %s to %c from cache', cidOrPeerIdOrDnsLink, cid); } else { log.trace('Attempting to resolve PeerId for %s', cidOrPeerIdOrDnsLink); let peerId; try { // try resolving as an IPNS name peerId = getPeerIdFromString(cidOrPeerIdOrDnsLink); const pubKey = peerId?.publicKey; if (pubKey == null) { throw new TypeError('cidOrPeerIdOrDnsLink contains no public key'); } if (withServerTiming) { const resolveIpns = async () => { return ipns.resolve(pubKey, options); }; const resolveResultWithServerTiming = await serverTiming('ipns.resolve', `Resolve IPNS name ${cidOrPeerIdOrDnsLink}`, resolveIpns); serverTimings.push(resolveResultWithServerTiming); // eslint-disable-next-line max-depth if (resolveResultWithServerTiming.error != null) { throw resolveResultWithServerTiming.error; } resolveResult = resolveResultWithServerTiming.result; } else { resolveResult = await ipns.resolve(pubKey, options); } cid = resolveResult?.cid; resolvedPath = resolveResult?.path; log.trace('resolved %s to %c', cidOrPeerIdOrDnsLink, cid); } catch (err) { if (options?.signal?.aborted) { throw new AbortError(options?.signal?.reason); } if (peerId == null) { log.error('could not parse PeerId string "%s"', cidOrPeerIdOrDnsLink, err); errors.push(new TypeError(`Could not parse PeerId in ipns url "${cidOrPeerIdOrDnsLink}", ${err.message}`)); } else { log.error('could not resolve PeerId %c', peerId, err); errors.push(new TypeError(`Could not resolve PeerId "${cidOrPeerIdOrDnsLink}": ${err.message}`)); } } if (cid == null) { // cid is still null, try resolving as a DNSLink let decodedDnsLinkLabel = cidOrPeerIdOrDnsLink; if (isInlinedDnsLink(cidOrPeerIdOrDnsLink)) { decodedDnsLinkLabel = dnsLinkLabelDecoder(cidOrPeerIdOrDnsLink); log.trace('decoded dnslink from "%s" to "%s"', cidOrPeerIdOrDnsLink, decodedDnsLinkLabel); } log.trace('Attempting to resolve DNSLink for %s', decodedDnsLinkLabel); try { // eslint-disable-next-line max-depth if (withServerTiming) { const resolveResultWithServerTiming = await serverTiming('ipns.resolveDNSLink', `Resolve DNSLink ${decodedDnsLinkLabel}`, ipns.resolveDNSLink.bind(ipns, decodedDnsLinkLabel, options)); serverTimings.push(resolveResultWithServerTiming); // eslint-disable-next-line max-depth if (resolveResultWithServerTiming.error != null) { throw resolveResultWithServerTiming.error; } resolveResult = resolveResultWithServerTiming.result; } else { resolveResult = await ipns.resolveDNSLink(decodedDnsLinkLabel, options); } cid = resolveResult?.cid; resolvedPath = resolveResult?.path; log.trace('resolved %s to %c', decodedDnsLinkLabel, cid); } catch (err) { // eslint-disable-next-line max-depth if (options?.signal?.aborted) { throw new AbortError(options?.signal?.reason); } log.error('could not resolve DnsLink for "%s"', cidOrPeerIdOrDnsLink, err); errors.push(err); } } } } if (cid == null) { if (errors.length === 1) { throw errors[0]; } errors.push(new Error(`Invalid resource. Cannot determine CID from URL "${urlString}".`)); // eslint-disable-next-line @typescript-eslint/only-throw-error throw errors; } let ttl = calculateTtl(resolveResult); if (resolveResult != null) { // use the ttl for the resolved resource for the cache, but fallback to 2 minutes if not available ttl = ttl ?? 60 * 2; log.trace('caching %s resolved to %s with TTL: %s', cidOrPeerIdOrDnsLink, cid, ttl); // convert ttl from seconds to ms for the cache ipnsCache.set(cidOrPeerIdOrDnsLink, resolveResult, ttl * 1000); } // parse query string const query = {}; if (queryString != null && queryString.length > 0) { const queryParts = queryString.split('&'); for (const part of queryParts) { const [key, value] = part.split('='); query[key] = decodeURIComponent(value); } if (query.download != null) { query.download = query.download === 'true'; } if (query.filename != null) { query.filename = query.filename.toString(); } } return { protocol, cid, path: joinPaths(resolvedPath, urlPath ?? ''), query, ttl, ipfsPath: `/${protocol}/${cidOrPeerIdOrDnsLink}${urlPath != null && urlPath !== '' ? `/${urlPath}` : ''}`, serverTimings }; } /** * join the path from resolve result & given path. * e.g. /ipns/<peerId>/ that is resolved to /ipfs/<cid>/<path1>, when requested as /ipns/<peerId>/<path2>, should be * resolved to /ipfs/<cid>/<path1>/<path2> */ function joinPaths(resolvedPath, urlPath) { let path = ''; if (resolvedPath != null) { path += resolvedPath; } if (urlPath.length > 0) { path = `${path.length > 0 ? `${path}/` : path}${urlPath}`; } // replace duplicate forward slashes path = path.replace(/\/(\/)+/g, '/'); // strip trailing forward slash if present if (path.startsWith('/')) { path = path.substring(1); } return path.split('/').map(decodeURIComponent).join('/'); } //# sourceMappingURL=parse-url-string.js.map