@helia/verified-fetch
Version:
A fetch-like API for obtaining verified & trustless IPFS content on the web
264 lines • 11.9 kB
JavaScript
import { AbortError } from '@libp2p/interface';
import { CID } from 'multiformats/cid';
import { getPeerIdFromString } from './get-peer-id-from-string.js';
import { serverTiming } from './server-timing.js';
import { TLRU } from './tlru.js';
export const ipnsCache = new TLRU(1000);
const URL_REGEX = /^(?<protocol>ip[fn]s):\/\/(?<cidOrPeerIdOrDnsLink>[^/?]+)\/?(?<path>[^?]*)\??(?<queryString>.*)$/;
const PATH_REGEX = /^\/(?<protocol>ip[fn]s)\/(?<cidOrPeerIdOrDnsLink>[^/?]+)\/?(?<path>[^?]*)\??(?<queryString>.*)$/;
const PATH_GATEWAY_REGEX = /^https?:\/\/(.*[^/])\/(?<protocol>ip[fn]s)\/(?<cidOrPeerIdOrDnsLink>[^/?]+)\/?(?<path>[^?]*)\??(?<queryString>.*)$/;
const SUBDOMAIN_GATEWAY_REGEX = /^https?:\/\/(?<cidOrPeerIdOrDnsLink>[^/?]+)\.(?<protocol>ip[fn]s)\.([^/?]+)\/?(?<path>[^?]*)\??(?<queryString>.*)$/;
function matchUrlGroupsGuard(groups) {
const protocol = groups?.protocol;
if (protocol == null) {
return false;
}
const cidOrPeerIdOrDnsLink = groups?.cidOrPeerIdOrDnsLink;
if (cidOrPeerIdOrDnsLink == null) {
return false;
}
const path = groups?.path;
const queryString = groups?.queryString;
return ['ipns', 'ipfs'].includes(protocol) &&
typeof cidOrPeerIdOrDnsLink === 'string' &&
(path == null || typeof path === 'string') &&
(queryString == null || typeof queryString === 'string');
}
export function matchURLString(urlString) {
for (const pattern of [SUBDOMAIN_GATEWAY_REGEX, URL_REGEX, PATH_GATEWAY_REGEX, PATH_REGEX]) {
const match = urlString.match(pattern);
if (matchUrlGroupsGuard(match?.groups)) {
return match.groups;
}
}
throw new TypeError(`Invalid URL: ${urlString}, please use ipfs://, ipns://, or gateway URLs only`);
}
/**
* determines the TTL for the resolved resource that will be used for the `Cache-Control` header's `max-age` directive.
* max-age is in seconds
*
* @see https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control#response_directives
*
* If we have ipnsTtlNs, it will be a BigInt representing "nanoseconds". We need to convert it back to seconds.
*
* For more TTL nuances:
*
* @see https://github.com/ipfs/js-ipns/blob/16e0e10682fa9a663e0bb493a44d3e99a5200944/src/index.ts#L200
* @see https://github.com/ipfs/js-ipns/pull/308
* @returns the ttl in seconds
*/
function calculateTtl(resolveResult) {
if (resolveResult == null) {
return undefined;
}
const dnsLinkTtl = resolveResult.answer?.TTL;
const ipnsTtlNs = resolveResult.record?.ttl;
const ipnsTtl = ipnsTtlNs != null ? Number(ipnsTtlNs / BigInt(1e9)) : undefined;
return dnsLinkTtl ?? ipnsTtl;
}
/**
* For DNSLink see https://specs.ipfs.tech/http-gateways/subdomain-gateway/#host-request-header
* DNSLink names include . which means they must be inlined into a single DNS label to provide unique origin and work with wildcard TLS certificates.
*/
// DNS label can have up to 63 characters, consisting of alphanumeric
// characters or hyphens -, but it must not start or end with a hyphen.
const dnsLabelRegex = /^[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?$/;
/**
* Checks if label looks like inlined DNSLink.
* (https://specs.ipfs.tech/http-gateways/subdomain-gateway/#host-request-header)
*/
function isInlinedDnsLink(label) {
return dnsLabelRegex.test(label) && label.includes('-') && !label.includes('.');
}
/**
* DNSLink label decoding
* - Every standalone - is replaced with .
* - Every remaining -- is replaced with -
*
* @example en-wikipedia--on--ipfs-org.ipns.example.net -> example.net/ipns/en.wikipedia-on-ipfs.org
*/
function dnsLinkLabelDecoder(linkLabel) {
return linkLabel.replace(/--/g, '%').replace(/-/g, '.').replace(/%/g, '-');
}
/**
* A function that parses ipfs:// and ipns:// URLs, returning an object with easily recognizable properties.
*
* After determining the protocol successfully, we process the cidOrPeerIdOrDnsLink:
* - If it's ipfs, it parses the CID or throws Error[]
* - If it's ipns, it attempts to resolve the PeerId and then the DNSLink. If both fail, Error[] is thrown.
*
* @todo we need to break out each step of this function (cid parsing, ipns resolving, dnslink resolving) into separate functions and then remove the eslint-disable comment
*
* @throws {Error[]}
*/
// eslint-disable-next-line complexity
export async function parseUrlString({ urlString, ipns, logger, withServerTiming = false }, options) {
const log = logger.forComponent('helia:verified-fetch:parse-url-string');
const { protocol, cidOrPeerIdOrDnsLink, path: urlPath, queryString } = matchURLString(urlString);
let cid;
let resolvedPath;
const errors = [];
let resolveResult;
const serverTimings = [];
if (protocol === 'ipfs') {
try {
cid = CID.parse(cidOrPeerIdOrDnsLink);
/**
* no ttl set. @link {setCacheControlHeader}
*/
}
catch (err) {
log.error(err);
errors.push(new TypeError('Invalid CID for ipfs://<cid> URL'));
}
}
else {
// protocol is ipns
resolveResult = ipnsCache.get(cidOrPeerIdOrDnsLink);
if (resolveResult != null) {
cid = resolveResult.cid;
resolvedPath = resolveResult.path;
log.trace('resolved %s to %c from cache', cidOrPeerIdOrDnsLink, cid);
}
else {
log.trace('Attempting to resolve PeerId for %s', cidOrPeerIdOrDnsLink);
let peerId;
try {
// try resolving as an IPNS name
peerId = getPeerIdFromString(cidOrPeerIdOrDnsLink);
const pubKey = peerId?.publicKey;
if (pubKey == null) {
throw new TypeError('cidOrPeerIdOrDnsLink contains no public key');
}
if (withServerTiming) {
const resolveIpns = async () => {
return ipns.resolve(pubKey, options);
};
const resolveResultWithServerTiming = await serverTiming('ipns.resolve', `Resolve IPNS name ${cidOrPeerIdOrDnsLink}`, resolveIpns);
serverTimings.push(resolveResultWithServerTiming);
// eslint-disable-next-line max-depth
if (resolveResultWithServerTiming.error != null) {
throw resolveResultWithServerTiming.error;
}
resolveResult = resolveResultWithServerTiming.result;
}
else {
resolveResult = await ipns.resolve(pubKey, options);
}
cid = resolveResult?.cid;
resolvedPath = resolveResult?.path;
log.trace('resolved %s to %c', cidOrPeerIdOrDnsLink, cid);
}
catch (err) {
if (options?.signal?.aborted) {
throw new AbortError(options?.signal?.reason);
}
if (peerId == null) {
log.error('could not parse PeerId string "%s"', cidOrPeerIdOrDnsLink, err);
errors.push(new TypeError(`Could not parse PeerId in ipns url "${cidOrPeerIdOrDnsLink}", ${err.message}`));
}
else {
log.error('could not resolve PeerId %c', peerId, err);
errors.push(new TypeError(`Could not resolve PeerId "${cidOrPeerIdOrDnsLink}": ${err.message}`));
}
}
if (cid == null) {
// cid is still null, try resolving as a DNSLink
let decodedDnsLinkLabel = cidOrPeerIdOrDnsLink;
if (isInlinedDnsLink(cidOrPeerIdOrDnsLink)) {
decodedDnsLinkLabel = dnsLinkLabelDecoder(cidOrPeerIdOrDnsLink);
log.trace('decoded dnslink from "%s" to "%s"', cidOrPeerIdOrDnsLink, decodedDnsLinkLabel);
}
log.trace('Attempting to resolve DNSLink for %s', decodedDnsLinkLabel);
try {
// eslint-disable-next-line max-depth
if (withServerTiming) {
const resolveResultWithServerTiming = await serverTiming('ipns.resolveDNSLink', `Resolve DNSLink ${decodedDnsLinkLabel}`, ipns.resolveDNSLink.bind(ipns, decodedDnsLinkLabel, options));
serverTimings.push(resolveResultWithServerTiming);
// eslint-disable-next-line max-depth
if (resolveResultWithServerTiming.error != null) {
throw resolveResultWithServerTiming.error;
}
resolveResult = resolveResultWithServerTiming.result;
}
else {
resolveResult = await ipns.resolveDNSLink(decodedDnsLinkLabel, options);
}
cid = resolveResult?.cid;
resolvedPath = resolveResult?.path;
log.trace('resolved %s to %c', decodedDnsLinkLabel, cid);
}
catch (err) {
// eslint-disable-next-line max-depth
if (options?.signal?.aborted) {
throw new AbortError(options?.signal?.reason);
}
log.error('could not resolve DnsLink for "%s"', cidOrPeerIdOrDnsLink, err);
errors.push(err);
}
}
}
}
if (cid == null) {
if (errors.length === 1) {
throw errors[0];
}
errors.push(new Error(`Invalid resource. Cannot determine CID from URL "${urlString}".`));
// eslint-disable-next-line @typescript-eslint/only-throw-error
throw errors;
}
let ttl = calculateTtl(resolveResult);
if (resolveResult != null) {
// use the ttl for the resolved resource for the cache, but fallback to 2 minutes if not available
ttl = ttl ?? 60 * 2;
log.trace('caching %s resolved to %s with TTL: %s', cidOrPeerIdOrDnsLink, cid, ttl);
// convert ttl from seconds to ms for the cache
ipnsCache.set(cidOrPeerIdOrDnsLink, resolveResult, ttl * 1000);
}
// parse query string
const query = {};
if (queryString != null && queryString.length > 0) {
const queryParts = queryString.split('&');
for (const part of queryParts) {
const [key, value] = part.split('=');
query[key] = decodeURIComponent(value);
}
if (query.download != null) {
query.download = query.download === 'true';
}
if (query.filename != null) {
query.filename = query.filename.toString();
}
}
return {
protocol,
cid,
path: joinPaths(resolvedPath, urlPath ?? ''),
query,
ttl,
ipfsPath: `/${protocol}/${cidOrPeerIdOrDnsLink}${urlPath != null && urlPath !== '' ? `/${urlPath}` : ''}`,
serverTimings
};
}
/**
* join the path from resolve result & given path.
* e.g. /ipns/<peerId>/ that is resolved to /ipfs/<cid>/<path1>, when requested as /ipns/<peerId>/<path2>, should be
* resolved to /ipfs/<cid>/<path1>/<path2>
*/
function joinPaths(resolvedPath, urlPath) {
let path = '';
if (resolvedPath != null) {
path += resolvedPath;
}
if (urlPath.length > 0) {
path = `${path.length > 0 ? `${path}/` : path}${urlPath}`;
}
// replace duplicate forward slashes
path = path.replace(/\/(\/)+/g, '/');
// strip trailing forward slash if present
if (path.startsWith('/')) {
path = path.substring(1);
}
return path.split('/').map(decodeURIComponent).join('/');
}
//# sourceMappingURL=parse-url-string.js.map