@fedify/fedify
Version:
An ActivityPub server framework
357 lines (356 loc) • 13.9 kB
JavaScript
import * as dntShim from "../_dnt.shims.js";
import { isDeno, isNode } from "../deps/jsr.io/@david/which-runtime/0.2.1/mod.js";
import { HTTPHeaderLink } from "@hugoalh/http-header-link";
import { getLogger } from "@logtape/logtape";
import process from "node:process";
import metadata from "../deno.js";
import { signRequest } from "../sig/http.js";
import { validateCryptoKey } from "../sig/key.js";
import preloadedContexts from "./contexts.js";
import { UrlError, validatePublicUrl } from "./url.js";
const logger = getLogger(["fedify", "runtime", "docloader"]);
/**
* Error thrown when fetching a JSON-LD document failed.
*/
export class FetchError extends Error {
/**
* The URL that failed to fetch.
*/
url;
/**
* Constructs a new `FetchError`.
*
* @param url The URL that failed to fetch.
* @param message Error message.
*/
constructor(url, message) {
super(message == null ? url.toString() : `${url}: ${message}`);
this.name = "FetchError";
this.url = typeof url === "string" ? new URL(url) : url;
}
}
function createRequest(url, options = {}) {
return new Request(url, {
headers: {
Accept: "application/activity+json, application/ld+json",
"User-Agent": typeof options.userAgent === "string"
? options.userAgent
: getUserAgent(options.userAgent),
},
redirect: "manual",
});
}
function logRequest(request) {
logger.debug("Fetching document: {method} {url} {headers}", {
method: request.method,
url: request.url,
headers: Object.fromEntries(request.headers.entries()),
});
}
async function getRemoteDocument(url, response, fetch) {
const documentUrl = response.url === "" ? url : response.url;
const docUrl = new URL(documentUrl);
if (!response.ok) {
logger.error("Failed to fetch document: {status} {url} {headers}", {
status: response.status,
url: documentUrl,
headers: Object.fromEntries(response.headers.entries()),
});
throw new FetchError(documentUrl, `HTTP ${response.status}: ${documentUrl}`);
}
const contentType = response.headers.get("Content-Type");
const jsonLd = contentType == null ||
contentType === "application/activity+json" ||
contentType.startsWith("application/activity+json;") ||
contentType === "application/ld+json" ||
contentType.startsWith("application/ld+json;");
const linkHeader = response.headers.get("Link");
let contextUrl = null;
if (linkHeader != null) {
let link;
try {
link = new HTTPHeaderLink(linkHeader);
}
catch (e) {
if (e instanceof SyntaxError) {
link = new HTTPHeaderLink();
}
else {
throw e;
}
}
if (jsonLd) {
const entries = link.getByRel("http://www.w3.org/ns/json-ld#context");
for (const [uri, params] of entries) {
if ("type" in params && params.type === "application/ld+json") {
contextUrl = uri;
break;
}
}
}
else {
const entries = link.getByRel("alternate");
for (const [uri, params] of entries) {
const altUri = new URL(uri, docUrl);
if ("type" in params &&
(params.type === "application/activity+json" ||
params.type === "application/ld+json" ||
params.type.startsWith("application/ld+json;")) &&
altUri.href !== docUrl.href) {
logger.debug("Found alternate document: {alternateUrl} from {url}", { alternateUrl: altUri.href, url: documentUrl });
return await fetch(altUri.href);
}
}
}
}
let document;
if (!jsonLd &&
(contentType === "text/html" || contentType?.startsWith("text/html;") ||
contentType === "application/xhtml+xml" ||
contentType?.startsWith("application/xhtml+xml;"))) {
const p = /<(a|link)((\s+[a-z][a-z:_-]*=("[^"]*"|'[^']*'|[^\s>]+))+)\/?>/ig;
const p2 = /\s+([a-z][a-z:_-]*)=("([^"]*)"|'([^']*)'|([^\s>]+))/ig;
const html = await response.text();
let m;
const rawAttribs = [];
while ((m = p.exec(html)) !== null)
rawAttribs.push(m[2]);
for (const rawAttrs of rawAttribs) {
let m2;
const attribs = {};
while ((m2 = p2.exec(rawAttrs)) !== null) {
const key = m2[1].toLowerCase();
const value = m2[3] ?? m2[4] ?? m2[5] ?? "";
attribs[key] = value;
}
if (attribs.rel === "alternate" && "type" in attribs && (attribs.type === "application/activity+json" ||
attribs.type === "application/ld+json" ||
attribs.type.startsWith("application/ld+json;")) && "href" in attribs &&
new URL(attribs.href, docUrl).href !== docUrl.href) {
logger.debug("Found alternate document: {alternateUrl} from {url}", { alternateUrl: attribs.href, url: documentUrl });
return await fetch(new URL(attribs.href, docUrl).href);
}
}
document = JSON.parse(html);
}
else {
document = await response.json();
}
logger.debug("Fetched document: {status} {url} {headers}", {
status: response.status,
url: documentUrl,
headers: Object.fromEntries(response.headers.entries()),
});
return { contextUrl, document, documentUrl };
}
/**
* Creates a JSON-LD document loader that utilizes the browser's `fetch` API.
*
* The created loader preloads the below frequently used contexts by default
* (unless `options.ignorePreloadedContexts` is set to `true`):
*
* - <https://www.w3.org/ns/activitystreams>
* - <https://w3id.org/security/v1>
* - <https://w3id.org/security/data-integrity/v1>
* - <https://www.w3.org/ns/did/v1>
* - <https://w3id.org/security/multikey/v1>
* - <https://purl.archive.org/socialweb/webfinger>
* - <http://schema.org/>
* @param options Options for the document loader.
* @returns The document loader.
* @since 1.3.0
*/
export function getDocumentLoader({ allowPrivateAddress, skipPreloadedContexts, userAgent } = {}) {
async function load(url) {
if (!skipPreloadedContexts && url in preloadedContexts) {
logger.debug("Using preloaded context: {url}.", { url });
return {
contextUrl: null,
document: preloadedContexts[url],
documentUrl: url,
};
}
if (!allowPrivateAddress) {
try {
await validatePublicUrl(url);
}
catch (error) {
if (error instanceof UrlError) {
logger.error("Disallowed private URL: {url}", { url, error });
}
throw error;
}
}
const request = createRequest(url, { userAgent });
logRequest(request);
const response = await fetch(request, {
// Since Bun has a bug that ignores the `Request.redirect` option,
// to work around it we specify `redirect: "manual"` here too:
// https://github.com/oven-sh/bun/issues/10754
redirect: "manual",
});
// Follow redirects manually to get the final URL:
if (response.status >= 300 && response.status < 400 &&
response.headers.has("Location")) {
return load(response.headers.get("Location"));
}
return getRemoteDocument(url, response, load);
}
return load;
}
const _fetchDocumentLoader = getDocumentLoader();
const _fetchDocumentLoader_allowPrivateAddress = getDocumentLoader({
allowPrivateAddress: true,
});
/**
* A JSON-LD document loader that utilizes the browser's `fetch` API.
*
* This loader preloads the below frequently used contexts:
*
* - <https://www.w3.org/ns/activitystreams>
* - <https://w3id.org/security/v1>
* - <https://w3id.org/security/data-integrity/v1>
* - <https://www.w3.org/ns/did/v1>
* - <https://w3id.org/security/multikey/v1>
* - <https://purl.archive.org/socialweb/webfinger>
* - <http://schema.org/>
* @param url The URL of the document to load.
* @param allowPrivateAddress Whether to allow fetching private network
* addresses. Turned off by default.
* @returns The remote document.
* @deprecated Use {@link getDocumentLoader} instead.
*/
export function fetchDocumentLoader(url, allowPrivateAddress = false) {
logger.warn("fetchDocumentLoader() function is deprecated. " +
"Use getDocumentLoader() function instead.");
return (allowPrivateAddress
? _fetchDocumentLoader_allowPrivateAddress
: _fetchDocumentLoader)(url);
}
/**
* Gets an authenticated {@link DocumentLoader} for the given identity.
* Note that an authenticated document loader intentionally does not cache
* the fetched documents.
* @param identity The identity to get the document loader for.
* The actor's key pair.
* @param options The options for the document loader.
* @returns The authenticated document loader.
* @throws {TypeError} If the key is invalid or unsupported.
* @since 0.4.0
*/
export function getAuthenticatedDocumentLoader(identity, { allowPrivateAddress, userAgent } = {}) {
validateCryptoKey(identity.privateKey);
async function load(url) {
if (!allowPrivateAddress) {
try {
await validatePublicUrl(url);
}
catch (error) {
if (error instanceof UrlError) {
logger.error("Disallowed private URL: {url}", { url, error });
}
throw error;
}
}
let request = createRequest(url, { userAgent });
request = await signRequest(request, identity.privateKey, identity.keyId);
logRequest(request);
const response = await fetch(request, {
// Since Bun has a bug that ignores the `Request.redirect` option,
// to work around it we specify `redirect: "manual"` here too:
// https://github.com/oven-sh/bun/issues/10754
redirect: "manual",
});
// Follow redirects manually to get the final URL:
if (response.status >= 300 && response.status < 400 &&
response.headers.has("Location")) {
return load(response.headers.get("Location"));
}
return getRemoteDocument(url, response, load);
}
return load;
}
/**
* Decorates a {@link DocumentLoader} with a cache backed by a {@link Deno.Kv}.
* @param parameters The parameters for the cache.
* @returns The decorated document loader which is cache-enabled.
*/
export function kvCache({ loader, kv, prefix, rules }) {
const keyPrefix = prefix ?? ["_fedify", "remoteDocument"];
rules ??= [
[new dntShim.URLPattern({}), dntShim.Temporal.Duration.from({ minutes: 5 })],
];
for (const [p, duration] of rules) {
if (dntShim.Temporal.Duration.compare(duration, { days: 30 }) > 0) {
throw new TypeError("The maximum cache duration is 30 days: " +
(p instanceof dntShim.URLPattern
? `${p.protocol}://${p.username}:${p.password}@${p.hostname}:${p.port}/${p.pathname}?${p.search}#${p.hash}`
: p.toString()));
}
}
function matchRule(url) {
for (const [pattern, duration] of rules) {
if (typeof pattern === "string") {
if (url === pattern)
return duration;
continue;
}
if (pattern instanceof URL) {
if (pattern.href == url)
return duration;
continue;
}
if (pattern.test(url))
return duration;
}
return null;
}
return async (url) => {
const match = matchRule(url);
if (match == null)
return await loader(url);
const key = [...keyPrefix, url];
let cache = undefined;
try {
cache = await kv.get(key);
}
catch (error) {
if (error instanceof Error) {
logger.warn("Failed to get the document of {url} from the KV cache: {error}", { url, error });
}
}
if (cache == null) {
const remoteDoc = await loader(url);
try {
await kv.set(key, remoteDoc, { ttl: match });
}
catch (error) {
logger.warn("Failed to save the document of {url} to the KV cache: {error}", { url, error });
}
return remoteDoc;
}
return cache;
};
}
/**
* Gets the user agent string for the given application and URL.
* @param options The options for making the user agent string.
* @returns The user agent string.
* @since 1.3.0
*/
export function getUserAgent({ software, url } = {}) {
const fedify = `Fedify/${metadata.version}`;
const runtime = isDeno ? `Deno/${dntShim.Deno.version.deno}` : "Bun" in dntShim.dntGlobalThis
// @ts-ignore: `Bun` is a global variable in Bun
? `Bun/${Bun.version}`
: isNode
? `Node.js/${process.version}`
: null;
const userAgent = software == null ? [fedify] : [software, fedify];
if (runtime != null)
userAgent.push(runtime);
if (url != null)
userAgent.push(`+${url.toString()}`);
const first = userAgent.shift();
return `${first} (${userAgent.join("; ")})`;
}