@helia/verified-fetch
Version:
A fetch-like API for obtaining verified & trustless IPFS content on the web
344 lines (297 loc) • 12.9 kB
text/typescript
import { AbortError } from '@libp2p/interface'
import { CID } from 'multiformats/cid'
import { getPeerIdFromString } from './get-peer-id-from-string.js'
import { serverTiming } from './server-timing.js'
import { TLRU } from './tlru.js'
import type { ServerTimingResult } from './server-timing.js'
import type { RequestFormatShorthand } from '../types.js'
import type { DNSLinkResolveResult, IPNS, IPNSResolveResult, IPNSRoutingEvents, ResolveDNSLinkProgressEvents, ResolveProgressEvents, ResolveResult } from '@helia/ipns'
import type { AbortOptions, ComponentLogger, PeerId } from '@libp2p/interface'
import type { ProgressOptions } from 'progress-events'
export const ipnsCache = new TLRU<DNSLinkResolveResult | IPNSResolveResult>(1000)
export interface ParseUrlStringInput {
urlString: string
ipns: IPNS
logger: ComponentLogger
withServerTiming?: boolean
}
export interface ParseUrlStringOptions extends ProgressOptions<ResolveProgressEvents | IPNSRoutingEvents | ResolveDNSLinkProgressEvents>, AbortOptions {
}
export interface ParsedUrlQuery extends Record<string, string | unknown> {
format?: RequestFormatShorthand
download?: boolean
filename?: string
'dag-scope'?: string
}
export interface ParsedUrlStringResults extends ResolveResult {
protocol: 'ipfs' | 'ipns'
query: ParsedUrlQuery
/**
* The value for the IPFS gateway spec compliant header `X-Ipfs-Path` on the
* response.
* The value of this header should be the original requested content path,
* prior to any path resolution or traversal.
*
* @see https://specs.ipfs.tech/http-gateways/path-gateway/#x-ipfs-path-response-header
*/
ipfsPath: string
/**
* seconds as a number
*/
ttl?: number
/**
* serverTiming items
*/
serverTimings: Array<ServerTimingResult<any>>
}
const URL_REGEX = /^(?<protocol>ip[fn]s):\/\/(?<cidOrPeerIdOrDnsLink>[^/?]+)\/?(?<path>[^?]*)\??(?<queryString>.*)$/
const PATH_REGEX = /^\/(?<protocol>ip[fn]s)\/(?<cidOrPeerIdOrDnsLink>[^/?]+)\/?(?<path>[^?]*)\??(?<queryString>.*)$/
const PATH_GATEWAY_REGEX = /^https?:\/\/(.*[^/])\/(?<protocol>ip[fn]s)\/(?<cidOrPeerIdOrDnsLink>[^/?]+)\/?(?<path>[^?]*)\??(?<queryString>.*)$/
const SUBDOMAIN_GATEWAY_REGEX = /^https?:\/\/(?<cidOrPeerIdOrDnsLink>[^/?]+)\.(?<protocol>ip[fn]s)\.([^/?]+)\/?(?<path>[^?]*)\??(?<queryString>.*)$/
interface MatchUrlGroups {
protocol: 'ipfs' | 'ipns'
cidOrPeerIdOrDnsLink: string
path?: string
queryString?: string
}
function matchUrlGroupsGuard (groups?: null | { [key in string]: string; } | MatchUrlGroups): groups is MatchUrlGroups {
const protocol = groups?.protocol
if (protocol == null) { return false }
const cidOrPeerIdOrDnsLink = groups?.cidOrPeerIdOrDnsLink
if (cidOrPeerIdOrDnsLink == null) { return false }
const path = groups?.path
const queryString = groups?.queryString
return ['ipns', 'ipfs'].includes(protocol) &&
typeof cidOrPeerIdOrDnsLink === 'string' &&
(path == null || typeof path === 'string') &&
(queryString == null || typeof queryString === 'string')
}
export function matchURLString (urlString: string): MatchUrlGroups {
for (const pattern of [SUBDOMAIN_GATEWAY_REGEX, URL_REGEX, PATH_GATEWAY_REGEX, PATH_REGEX]) {
const match = urlString.match(pattern)
if (matchUrlGroupsGuard(match?.groups)) {
return match.groups satisfies MatchUrlGroups
}
}
throw new TypeError(`Invalid URL: ${urlString}, please use ipfs://, ipns://, or gateway URLs only`)
}
/**
* determines the TTL for the resolved resource that will be used for the `Cache-Control` header's `max-age` directive.
* max-age is in seconds
*
* @see https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control#response_directives
*
* If we have ipnsTtlNs, it will be a BigInt representing "nanoseconds". We need to convert it back to seconds.
*
* For more TTL nuances:
*
* @see https://github.com/ipfs/js-ipns/blob/16e0e10682fa9a663e0bb493a44d3e99a5200944/src/index.ts#L200
* @see https://github.com/ipfs/js-ipns/pull/308
* @returns the ttl in seconds
*/
function calculateTtl (resolveResult?: IPNSResolveResult | DNSLinkResolveResult): number | undefined {
if (resolveResult == null) {
return undefined
}
const dnsLinkTtl = (resolveResult as DNSLinkResolveResult).answer?.TTL
const ipnsTtlNs = (resolveResult as IPNSResolveResult).record?.ttl
const ipnsTtl = ipnsTtlNs != null ? Number(ipnsTtlNs / BigInt(1e9)) : undefined
return dnsLinkTtl ?? ipnsTtl
}
/**
* For DNSLink see https://specs.ipfs.tech/http-gateways/subdomain-gateway/#host-request-header
* DNSLink names include . which means they must be inlined into a single DNS label to provide unique origin and work with wildcard TLS certificates.
*/
// DNS label can have up to 63 characters, consisting of alphanumeric
// characters or hyphens -, but it must not start or end with a hyphen.
const dnsLabelRegex = /^[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?$/
/**
* Checks if label looks like inlined DNSLink.
* (https://specs.ipfs.tech/http-gateways/subdomain-gateway/#host-request-header)
*/
function isInlinedDnsLink (label: string): boolean {
return dnsLabelRegex.test(label) && label.includes('-') && !label.includes('.')
}
/**
* DNSLink label decoding
* - Every standalone - is replaced with .
* - Every remaining -- is replaced with -
*
* @example en-wikipedia--on--ipfs-org.ipns.example.net -> example.net/ipns/en.wikipedia-on-ipfs.org
*/
function dnsLinkLabelDecoder (linkLabel: string): string {
return linkLabel.replace(/--/g, '%').replace(/-/g, '.').replace(/%/g, '-')
}
/**
* A function that parses ipfs:// and ipns:// URLs, returning an object with easily recognizable properties.
*
* After determining the protocol successfully, we process the cidOrPeerIdOrDnsLink:
* - If it's ipfs, it parses the CID or throws Error[]
* - If it's ipns, it attempts to resolve the PeerId and then the DNSLink. If both fail, Error[] is thrown.
*
* @todo we need to break out each step of this function (cid parsing, ipns resolving, dnslink resolving) into separate functions and then remove the eslint-disable comment
*
* @throws {Error[]}
*/
// eslint-disable-next-line complexity
export async function parseUrlString ({ urlString, ipns, logger, withServerTiming = false }: ParseUrlStringInput, options?: ParseUrlStringOptions): Promise<ParsedUrlStringResults> {
const log = logger.forComponent('helia:verified-fetch:parse-url-string')
const { protocol, cidOrPeerIdOrDnsLink, path: urlPath, queryString } = matchURLString(urlString)
let cid: CID | undefined
let resolvedPath: string | undefined
const errors: Error[] = []
let resolveResult: IPNSResolveResult | DNSLinkResolveResult | undefined
const serverTimings: Array<ServerTimingResult<any>> = []
if (protocol === 'ipfs') {
try {
cid = CID.parse(cidOrPeerIdOrDnsLink)
/**
* no ttl set. @link {setCacheControlHeader}
*/
} catch (err) {
log.error(err)
errors.push(new TypeError('Invalid CID for ipfs://<cid> URL'))
}
} else {
// protocol is ipns
resolveResult = ipnsCache.get(cidOrPeerIdOrDnsLink)
if (resolveResult != null) {
cid = resolveResult.cid
resolvedPath = resolveResult.path
log.trace('resolved %s to %c from cache', cidOrPeerIdOrDnsLink, cid)
} else {
log.trace('Attempting to resolve PeerId for %s', cidOrPeerIdOrDnsLink)
let peerId: PeerId | undefined
try {
// try resolving as an IPNS name
peerId = getPeerIdFromString(cidOrPeerIdOrDnsLink)
const pubKey = peerId?.publicKey
if (pubKey == null) {
throw new TypeError('cidOrPeerIdOrDnsLink contains no public key')
}
if (withServerTiming) {
const resolveIpns = async (): Promise<IPNSResolveResult> => {
return ipns.resolve(pubKey, options)
}
const resolveResultWithServerTiming = await serverTiming('ipns.resolve', `Resolve IPNS name ${cidOrPeerIdOrDnsLink}`, resolveIpns)
serverTimings.push(resolveResultWithServerTiming)
// eslint-disable-next-line max-depth
if (resolveResultWithServerTiming.error != null) {
throw resolveResultWithServerTiming.error
}
resolveResult = resolveResultWithServerTiming.result
} else {
resolveResult = await ipns.resolve(pubKey, options)
}
cid = resolveResult?.cid
resolvedPath = resolveResult?.path
log.trace('resolved %s to %c', cidOrPeerIdOrDnsLink, cid)
} catch (err) {
if (options?.signal?.aborted) {
throw new AbortError(options?.signal?.reason)
}
if (peerId == null) {
log.error('could not parse PeerId string "%s"', cidOrPeerIdOrDnsLink, err)
errors.push(new TypeError(`Could not parse PeerId in ipns url "${cidOrPeerIdOrDnsLink}", ${(err as Error).message}`))
} else {
log.error('could not resolve PeerId %c', peerId, err)
errors.push(new TypeError(`Could not resolve PeerId "${cidOrPeerIdOrDnsLink}": ${(err as Error).message}`))
}
}
if (cid == null) {
// cid is still null, try resolving as a DNSLink
let decodedDnsLinkLabel = cidOrPeerIdOrDnsLink
if (isInlinedDnsLink(cidOrPeerIdOrDnsLink)) {
decodedDnsLinkLabel = dnsLinkLabelDecoder(cidOrPeerIdOrDnsLink)
log.trace('decoded dnslink from "%s" to "%s"', cidOrPeerIdOrDnsLink, decodedDnsLinkLabel)
}
log.trace('Attempting to resolve DNSLink for %s', decodedDnsLinkLabel)
try {
// eslint-disable-next-line max-depth
if (withServerTiming) {
const resolveResultWithServerTiming = await serverTiming('ipns.resolveDNSLink', `Resolve DNSLink ${decodedDnsLinkLabel}`, ipns.resolveDNSLink.bind(ipns, decodedDnsLinkLabel, options))
serverTimings.push(resolveResultWithServerTiming)
// eslint-disable-next-line max-depth
if (resolveResultWithServerTiming.error != null) {
throw resolveResultWithServerTiming.error
}
resolveResult = resolveResultWithServerTiming.result
} else {
resolveResult = await ipns.resolveDNSLink(decodedDnsLinkLabel, options)
}
cid = resolveResult?.cid
resolvedPath = resolveResult?.path
log.trace('resolved %s to %c', decodedDnsLinkLabel, cid)
} catch (err: any) {
// eslint-disable-next-line max-depth
if (options?.signal?.aborted) {
throw new AbortError(options?.signal?.reason)
}
log.error('could not resolve DnsLink for "%s"', cidOrPeerIdOrDnsLink, err)
errors.push(err)
}
}
}
}
if (cid == null) {
if (errors.length === 1) {
throw errors[0]
}
errors.push(new Error(`Invalid resource. Cannot determine CID from URL "${urlString}".`))
// eslint-disable-next-line @typescript-eslint/only-throw-error
throw errors
}
let ttl = calculateTtl(resolveResult)
if (resolveResult != null) {
// use the ttl for the resolved resource for the cache, but fallback to 2 minutes if not available
ttl = ttl ?? 60 * 2
log.trace('caching %s resolved to %s with TTL: %s', cidOrPeerIdOrDnsLink, cid, ttl)
// convert ttl from seconds to ms for the cache
ipnsCache.set(cidOrPeerIdOrDnsLink, resolveResult, ttl * 1000)
}
// parse query string
const query: Record<string, any> = {}
if (queryString != null && queryString.length > 0) {
const queryParts = queryString.split('&')
for (const part of queryParts) {
const [key, value] = part.split('=')
query[key] = decodeURIComponent(value)
}
if (query.download != null) {
query.download = query.download === 'true'
}
if (query.filename != null) {
query.filename = query.filename.toString()
}
}
return {
protocol,
cid,
path: joinPaths(resolvedPath, urlPath ?? ''),
query,
ttl,
ipfsPath: `/${protocol}/${cidOrPeerIdOrDnsLink}${urlPath != null && urlPath !== '' ? `/${urlPath}` : ''}`,
serverTimings
} satisfies ParsedUrlStringResults
}
/**
* join the path from resolve result & given path.
* e.g. /ipns/<peerId>/ that is resolved to /ipfs/<cid>/<path1>, when requested as /ipns/<peerId>/<path2>, should be
* resolved to /ipfs/<cid>/<path1>/<path2>
*/
function joinPaths (resolvedPath: string | undefined, urlPath: string): string {
let path = ''
if (resolvedPath != null) {
path += resolvedPath
}
if (urlPath.length > 0) {
path = `${path.length > 0 ? `${path}/` : path}${urlPath}`
}
// replace duplicate forward slashes
path = path.replace(/\/(\/)+/g, '/')
// strip trailing forward slash if present
if (path.startsWith('/')) {
path = path.substring(1)
}
return path.split('/').map(decodeURIComponent).join('/')
}