UNPKG

@fedify/fedify

Version:

An ActivityPub server framework

357 lines (356 loc) • 13.9 kB
import * as dntShim from "../_dnt.shims.js"; import { isDeno, isNode } from "../deps/jsr.io/@david/which-runtime/0.2.1/mod.js"; import { HTTPHeaderLink } from "@hugoalh/http-header-link"; import { getLogger } from "@logtape/logtape"; import process from "node:process"; import metadata from "../deno.js"; import { signRequest } from "../sig/http.js"; import { validateCryptoKey } from "../sig/key.js"; import preloadedContexts from "./contexts.js"; import { UrlError, validatePublicUrl } from "./url.js"; const logger = getLogger(["fedify", "runtime", "docloader"]); /** * Error thrown when fetching a JSON-LD document failed. */ export class FetchError extends Error { /** * The URL that failed to fetch. */ url; /** * Constructs a new `FetchError`. * * @param url The URL that failed to fetch. * @param message Error message. */ constructor(url, message) { super(message == null ? url.toString() : `${url}: ${message}`); this.name = "FetchError"; this.url = typeof url === "string" ? new URL(url) : url; } } function createRequest(url, options = {}) { return new Request(url, { headers: { Accept: "application/activity+json, application/ld+json", "User-Agent": typeof options.userAgent === "string" ? options.userAgent : getUserAgent(options.userAgent), }, redirect: "manual", }); } function logRequest(request) { logger.debug("Fetching document: {method} {url} {headers}", { method: request.method, url: request.url, headers: Object.fromEntries(request.headers.entries()), }); } async function getRemoteDocument(url, response, fetch) { const documentUrl = response.url === "" ? url : response.url; const docUrl = new URL(documentUrl); if (!response.ok) { logger.error("Failed to fetch document: {status} {url} {headers}", { status: response.status, url: documentUrl, headers: Object.fromEntries(response.headers.entries()), }); throw new FetchError(documentUrl, `HTTP ${response.status}: ${documentUrl}`); } const contentType = response.headers.get("Content-Type"); const jsonLd = contentType == null || contentType === "application/activity+json" || contentType.startsWith("application/activity+json;") || contentType === "application/ld+json" || contentType.startsWith("application/ld+json;"); const linkHeader = response.headers.get("Link"); let contextUrl = null; if (linkHeader != null) { let link; try { link = new HTTPHeaderLink(linkHeader); } catch (e) { if (e instanceof SyntaxError) { link = new HTTPHeaderLink(); } else { throw e; } } if (jsonLd) { const entries = link.getByRel("http://www.w3.org/ns/json-ld#context"); for (const [uri, params] of entries) { if ("type" in params && params.type === "application/ld+json") { contextUrl = uri; break; } } } else { const entries = link.getByRel("alternate"); for (const [uri, params] of entries) { const altUri = new URL(uri, docUrl); if ("type" in params && (params.type === "application/activity+json" || params.type === "application/ld+json" || params.type.startsWith("application/ld+json;")) && altUri.href !== docUrl.href) { logger.debug("Found alternate document: {alternateUrl} from {url}", { alternateUrl: altUri.href, url: documentUrl }); return await fetch(altUri.href); } } } } let document; if (!jsonLd && (contentType === "text/html" || contentType?.startsWith("text/html;") || contentType === "application/xhtml+xml" || contentType?.startsWith("application/xhtml+xml;"))) { const p = /<(a|link)((\s+[a-z][a-z:_-]*=("[^"]*"|'[^']*'|[^\s>]+))+)\/?>/ig; const p2 = /\s+([a-z][a-z:_-]*)=("([^"]*)"|'([^']*)'|([^\s>]+))/ig; const html = await response.text(); let m; const rawAttribs = []; while ((m = p.exec(html)) !== null) rawAttribs.push(m[2]); for (const rawAttrs of rawAttribs) { let m2; const attribs = {}; while ((m2 = p2.exec(rawAttrs)) !== null) { const key = m2[1].toLowerCase(); const value = m2[3] ?? m2[4] ?? m2[5] ?? ""; attribs[key] = value; } if (attribs.rel === "alternate" && "type" in attribs && (attribs.type === "application/activity+json" || attribs.type === "application/ld+json" || attribs.type.startsWith("application/ld+json;")) && "href" in attribs && new URL(attribs.href, docUrl).href !== docUrl.href) { logger.debug("Found alternate document: {alternateUrl} from {url}", { alternateUrl: attribs.href, url: documentUrl }); return await fetch(new URL(attribs.href, docUrl).href); } } document = JSON.parse(html); } else { document = await response.json(); } logger.debug("Fetched document: {status} {url} {headers}", { status: response.status, url: documentUrl, headers: Object.fromEntries(response.headers.entries()), }); return { contextUrl, document, documentUrl }; } /** * Creates a JSON-LD document loader that utilizes the browser's `fetch` API. * * The created loader preloads the below frequently used contexts by default * (unless `options.ignorePreloadedContexts` is set to `true`): * * - <https://www.w3.org/ns/activitystreams> * - <https://w3id.org/security/v1> * - <https://w3id.org/security/data-integrity/v1> * - <https://www.w3.org/ns/did/v1> * - <https://w3id.org/security/multikey/v1> * - <https://purl.archive.org/socialweb/webfinger> * - <http://schema.org/> * @param options Options for the document loader. * @returns The document loader. * @since 1.3.0 */ export function getDocumentLoader({ allowPrivateAddress, skipPreloadedContexts, userAgent } = {}) { async function load(url) { if (!skipPreloadedContexts && url in preloadedContexts) { logger.debug("Using preloaded context: {url}.", { url }); return { contextUrl: null, document: preloadedContexts[url], documentUrl: url, }; } if (!allowPrivateAddress) { try { await validatePublicUrl(url); } catch (error) { if (error instanceof UrlError) { logger.error("Disallowed private URL: {url}", { url, error }); } throw error; } } const request = createRequest(url, { userAgent }); logRequest(request); const response = await fetch(request, { // Since Bun has a bug that ignores the `Request.redirect` option, // to work around it we specify `redirect: "manual"` here too: // https://github.com/oven-sh/bun/issues/10754 redirect: "manual", }); // Follow redirects manually to get the final URL: if (response.status >= 300 && response.status < 400 && response.headers.has("Location")) { return load(response.headers.get("Location")); } return getRemoteDocument(url, response, load); } return load; } const _fetchDocumentLoader = getDocumentLoader(); const _fetchDocumentLoader_allowPrivateAddress = getDocumentLoader({ allowPrivateAddress: true, }); /** * A JSON-LD document loader that utilizes the browser's `fetch` API. * * This loader preloads the below frequently used contexts: * * - <https://www.w3.org/ns/activitystreams> * - <https://w3id.org/security/v1> * - <https://w3id.org/security/data-integrity/v1> * - <https://www.w3.org/ns/did/v1> * - <https://w3id.org/security/multikey/v1> * - <https://purl.archive.org/socialweb/webfinger> * - <http://schema.org/> * @param url The URL of the document to load. * @param allowPrivateAddress Whether to allow fetching private network * addresses. Turned off by default. * @returns The remote document. * @deprecated Use {@link getDocumentLoader} instead. */ export function fetchDocumentLoader(url, allowPrivateAddress = false) { logger.warn("fetchDocumentLoader() function is deprecated. " + "Use getDocumentLoader() function instead."); return (allowPrivateAddress ? _fetchDocumentLoader_allowPrivateAddress : _fetchDocumentLoader)(url); } /** * Gets an authenticated {@link DocumentLoader} for the given identity. * Note that an authenticated document loader intentionally does not cache * the fetched documents. * @param identity The identity to get the document loader for. * The actor's key pair. * @param options The options for the document loader. * @returns The authenticated document loader. * @throws {TypeError} If the key is invalid or unsupported. * @since 0.4.0 */ export function getAuthenticatedDocumentLoader(identity, { allowPrivateAddress, userAgent } = {}) { validateCryptoKey(identity.privateKey); async function load(url) { if (!allowPrivateAddress) { try { await validatePublicUrl(url); } catch (error) { if (error instanceof UrlError) { logger.error("Disallowed private URL: {url}", { url, error }); } throw error; } } let request = createRequest(url, { userAgent }); request = await signRequest(request, identity.privateKey, identity.keyId); logRequest(request); const response = await fetch(request, { // Since Bun has a bug that ignores the `Request.redirect` option, // to work around it we specify `redirect: "manual"` here too: // https://github.com/oven-sh/bun/issues/10754 redirect: "manual", }); // Follow redirects manually to get the final URL: if (response.status >= 300 && response.status < 400 && response.headers.has("Location")) { return load(response.headers.get("Location")); } return getRemoteDocument(url, response, load); } return load; } /** * Decorates a {@link DocumentLoader} with a cache backed by a {@link Deno.Kv}. * @param parameters The parameters for the cache. * @returns The decorated document loader which is cache-enabled. */ export function kvCache({ loader, kv, prefix, rules }) { const keyPrefix = prefix ?? ["_fedify", "remoteDocument"]; rules ??= [ [new dntShim.URLPattern({}), dntShim.Temporal.Duration.from({ minutes: 5 })], ]; for (const [p, duration] of rules) { if (dntShim.Temporal.Duration.compare(duration, { days: 30 }) > 0) { throw new TypeError("The maximum cache duration is 30 days: " + (p instanceof dntShim.URLPattern ? `${p.protocol}://${p.username}:${p.password}@${p.hostname}:${p.port}/${p.pathname}?${p.search}#${p.hash}` : p.toString())); } } function matchRule(url) { for (const [pattern, duration] of rules) { if (typeof pattern === "string") { if (url === pattern) return duration; continue; } if (pattern instanceof URL) { if (pattern.href == url) return duration; continue; } if (pattern.test(url)) return duration; } return null; } return async (url) => { const match = matchRule(url); if (match == null) return await loader(url); const key = [...keyPrefix, url]; let cache = undefined; try { cache = await kv.get(key); } catch (error) { if (error instanceof Error) { logger.warn("Failed to get the document of {url} from the KV cache: {error}", { url, error }); } } if (cache == null) { const remoteDoc = await loader(url); try { await kv.set(key, remoteDoc, { ttl: match }); } catch (error) { logger.warn("Failed to save the document of {url} to the KV cache: {error}", { url, error }); } return remoteDoc; } return cache; }; } /** * Gets the user agent string for the given application and URL. * @param options The options for making the user agent string. * @returns The user agent string. * @since 1.3.0 */ export function getUserAgent({ software, url } = {}) { const fedify = `Fedify/${metadata.version}`; const runtime = isDeno ? `Deno/${dntShim.Deno.version.deno}` : "Bun" in dntShim.dntGlobalThis // @ts-ignore: `Bun` is a global variable in Bun ? `Bun/${Bun.version}` : isNode ? `Node.js/${process.version}` : null; const userAgent = software == null ? [fedify] : [software, fedify]; if (runtime != null) userAgent.push(runtime); if (url != null) userAgent.push(`+${url.toString()}`); const first = userAgent.shift(); return `${first} (${userAgent.join("; ")})`; }