UNPKG

@specprotected/spec-proxy-service-worker

Version:

Server Worker API implementation for integrating with Spec Proxy from an Edge Worker

430 lines (400 loc) 15.5 kB
/* * This is the common Service Worker API library to support the Spec Proxy product. * If you are a user, you likely are looking for a platform-specific library like * spec-proxy-cloudflare-worker or spec-proxy-fastly-worker. */ import { parse as parseCookies, serialize as serializeSetCookie } from "cookie"; // path prefix to always route traffic to Spec Proxy const SPEC_PATH_PREFIX = "/spec_traffic"; // header that controls request forwarding for Spec Proxy const SPEC_HEADER_FORWARD_ORIGIN = "x-spec-forward-origin"; // cookie key for the Spec ID const SPEC_COOKIE_ID = "x-spec-id"; // cookie for the mirror mode key const SPEC_HEADER_CUSTOMER_KEY = "x-spec-customer-authorization"; // the Set-Cookie header const HEADER_SET_COOKIE = "set-cookie"; // the Cookie header const HEADER_COOKIE = "cookie"; // standard X-Forwarded-For header const HEADER_HOST = "host"; // standard X-Forwarded-For header const HEADER_X_FORWARDED_FOR = "x-forwarded-for"; // header that indicates successful processing by the Spec platform const HEADER_SPEC_ACTIVITY = "x-atvak-activity-count"; // Note: type RequestInfo = string | Request; export type PlatformRequest = new ( url: RequestInfo, info?: RequestInit, original?: Request, config?: SpecConfiguration, ) => Request; // Spec Proxy configuration object export interface SpecConfiguration { /** * When true, disable Spec Proxy, this library and all functionality is disabled */ disableSpecProxy?: boolean; /** * When true, the request returned by this function is modified * to make a request to Spec Proxy, which will result in Spec Proxy making * the request to the customer origin itself */ inlineMode?: boolean; /** * A number between 0 and 100 that identifies the percentage of IP traffic * the SpecTrust platform should process. */ percentageOfIPs?: number; /** * An key provided by spec, which validates traffic as originating from * the customer when in mirror mode. */ customerKey?: string; /** * Disables routing traffic prefixed with /spec_traffic to Spec */ disableSpecTraffic?: boolean; /** * A prefix to add to the `.spec-internal.com` suffix this library adds to * the incoming request's hostname. * ex: domainOverride: "prefix" -> "prefix.spec-internal.com" */ domainOverride?: string; } /** * This is one of two lower-level entrypoints into the Spec Proxy framework. * This function takes the Service Worker `event` object that's provided as an * argument to the 'fetch' event and a configuration object that describes how * the library should behave. * * Clients of this library should use the returned request as if it were the request * originally provided by your Edge Worker. * * The returned request should always be processed with `processSpecProxyResponse` * before being returned. * * Use this lower-level API if you need to do additional work between the * request and response, such as integrate additional libraries in your worker * function. * * ```javascript * import { * specProxyProcessRequest, * specProxyProcessResponse, * } from '@specprotected/spec-proxy-service-worker'; * * addEventListener('fetch', event => { * // if we don't catch the exception, fail open to original traffic path * event.passThroughOnException(); * * event.respondWith(handleEvent(event)); * }) * * const specConfig = { inlineMode: false }; * * async function handleEvent(event) { * let request = specProxyProcessRequest(event, specConfig); * * let response = await fetch(request); * * // do more work, if required * * // call the Spec Proxy response method to enact potential response changes * return specProxyProcessResponse(request, response, specConfig); * } * ``` * * @param event - the FetchEvent created by the Fetch API * @param config - configuration object to adjust Spec Proxy behavior * @returns the modified Request object */ export function specProxyProcessRequest( event: FetchEvent, config: SpecConfiguration = {}, requestConstructor: PlatformRequest = Request, ): Request { // first check to see if we shouldn't process anything at all if (config.disableSpecProxy) { return event.request; } // check if we should handle this http request if ( !shouldHandleRequest( new URL(event.request.url), event.request.headers, config, ) ) { return event.request; } let request = event.request; let specUrl = new URL(request.url); // save it in case we're in Inline mode // Note: this was just `.host` before, but for local development sometimes // ports will be included, using hostname avoids setting overwriting port values let originalHost = specUrl.hostname; // write to spec proxy specUrl.hostname = config.domainOverride ? `${config.domainOverride}.spec-internal.com` : `${originalHost}.spec-internal.com`; // For all requests destined for spec proxy, add the customer key to the // headers if provided. let specHeaders = new Headers(request.headers); if (config.customerKey) { specHeaders.set(SPEC_HEADER_CUSTOMER_KEY, config.customerKey); } // use this flag to signal that Spec Proxy is responsible for forwarding the // request on to the customer's servers. if ( config.inlineMode || // or if we're sending some traffic to /spec_traffic (!config.disableSpecTraffic && specUrl.pathname.startsWith(SPEC_PATH_PREFIX)) ) { // Note: /spec_traffic ignores forwarding directives specHeaders.set(SPEC_HEADER_FORWARD_ORIGIN, originalHost); return new requestConstructor( specUrl.toString(), { body: request.body, headers: specHeaders, method: request.method, redirect: request.redirect, }, request, config, ); } else { // Otherwise, we'll mirror the request. We will register this request promise // to be awaited in the background by the Service Worker API and in order to // do that without reading the body, we need to duplicate the body stream. // clone the ReadableStream, if it exists. let teedBody: [ReadableStream | null, ReadableStream | null] = [null, null]; if (request.body !== null && request.body !== undefined) { teedBody = request.body.tee(); } // build a request with the new url and the new ReadableStream const specRequest = new requestConstructor( specUrl.toString(), { body: teedBody[0], headers: specHeaders, method: request.method, redirect: request.redirect, }, request, config, ); // background the request to spec proxy, but ask service worker to wait // until it's finished even though we don't await it event.waitUntil(fetch(specRequest)); // create the new request with the adjusted host if redirectHeader is set // and use the new teed ReadableStream body return new requestConstructor( request.url, { body: teedBody[1], headers: request.headers, method: request.method, redirect: request.redirect, }, request, config, ); } } /** * Process the Response as it returns to the originator of the Request. This * function generally adds any details the SpecTrust platform requires to identify * site visitors, such as the Spec Cookie. * * Note: this function does not process the Body of a Response, so won't require * awaiting while reading the body stream, which enables efficient processing. * * @param request - the request object that was sent to customer servers * @param response - the response object that was returned from customer servers * @param config - the configuration object that defines how this library should behave * @returns - the modified response object */ export function specProxyProcessResponse( request: Request, response: Response, config: SpecConfiguration = {}, ): Response { if ( config.disableSpecProxy || config.inlineMode === true || !shouldHandleRequest(new URL(request.url), request.headers, config) ) { return response; } let cookies: Record<string, string | undefined> = {}; if (request.headers) { cookies = parseCookies(request.headers.get(HEADER_COOKIE) || ""); } // Note: falsy check because we set our cookie on undefined or "" values if (!cookies[SPEC_COOKIE_ID]) { let specId = crypto.randomUUID(); let setCookie = serializeSetCookie(SPEC_COOKIE_ID, specId, { // Note: 10 years long, essentially a "very long time" maxAge: 320000000, // the || will turn the (empty string | null) into undefined domain: extractTopLevelDomain(request.headers) || undefined, // valid for all paths path: "/", sameSite: "none", secure: true, }); // reconstruct Response to avoid immutability response = new Response(response.body, response); response.headers.append(HEADER_SET_COOKIE, setCookie); } return response; } /** * This is the simplest method in the Spec Proxy Service Worker library. * If the edge worker you're writing only requires integration Spec Proxy and * no other additional libraries, this is the easiest way to use Spec Proxy. * * It only requires one method, so we'll show an example: * * ```javascript * import { specProxyProcess } from '@spectrust/server-worker-proxy'; * * addEventListener('fetch', event => { * // if we don't catch the exception, fail open to original traffic path * event.passThroughOnException(); * * event.respondWith(specProxyProcess(event, { * // set to false to mirror traffic to Spec Proxy instead of redirecting it * inlineMode: true * })); * }) * ``` * * @param event - the FetchEvent originating from the Fetch API * @param config - configuration object to control Spec Proxy behavior * @returns - a Promise to resolve to the modified response from the customer server */ export async function specProxyProcess( event: FetchEvent, config: SpecConfiguration, requestConstructor: PlatformRequest, ): Promise<Response> { let request = specProxyProcessRequest(event, config, requestConstructor); let response = await specMakeRequestWithFallback(event, request, config); return specProxyProcessResponse(request, response, config); } /** * Makes a request with intelligent fallback behavior based on the configuration mode. * This function is responsible for handling the actual network request to either the * Spec Proxy service or directly to the customer origin, depending on the response * from the initial request and the configured mode. * * In inline mode, this function first attempts to make a request through the Spec Proxy * service. If the response contains the "x-activity-count" header, it indicates * successful processing by the Spec platform, and the response is returned after * removing the internal header. If this header is not present, it falls back to * making a direct request to the customer origin using the original request from * the FetchEvent. * * In mirror mode, there is no fallback mechanism since the request body has been teed * (duplicated) and the original request cannot be safely reused. The function simply * makes the request as provided. * * @param event - The FetchEvent originating from the Fetch API, used for fallback in inline mode * @param request - The potentially modified Request object to be sent to Spec Proxy * @param config - Configuration object that controls Spec Proxy behavior * @returns - A Promise that resolves to the Response from either Spec Proxy or the customer origin */ export async function specMakeRequestWithFallback( event: FetchEvent, request: Request, config: SpecConfiguration, ): Promise<Response> { if (config.inlineMode) { let response = await fetch(request); // This is the correct case, we received a header that the Spec platform sets. if (response.headers.get(HEADER_SPEC_ACTIVITY)) { let newResponse = new Response(response.body, response); newResponse.headers.delete(HEADER_SPEC_ACTIVITY); return newResponse; } // This is the fallback case, we did not receive a header that the Spec platform sets. // Note that this is safe because in inline mode, we don't tee the body, so we can // use the request on the event directly. return await fetch(event.request); } else { // In mirror mode, the request "is the original" except for the teed body, // so there's no way to fallback. return await fetch(request); } } /** * Function that determines if Spec Proxy should handle the incoming request. * This involves observing the configuration object and resolving whether or not * we should process this Request under the given configuration values. * * @param headers - Header map from the originating request * @param config - configuration object to control Spec Proxy behavior * @returns - true if we should process the request */ function shouldHandleRequest( url: URL, headers: Headers, config: SpecConfiguration, ): boolean { // If we're directing /spec_traffic to Spec, we should always do that! if (!config.disableSpecTraffic && url.pathname.startsWith(SPEC_PATH_PREFIX)) { return true; } // if we're not filtering out a percentage of IPs, or the filter is 100% // we should always handle traffic. if (config.percentageOfIPs === undefined || config.percentageOfIPs >= 100) { return true; } // early abort if it's impossible to match else if (config.percentageOfIPs <= 0) { return false; } // split up the ip address into octets, convert them to integers, and then sum them. // default the string to 99 so if, for some reason, there's a problem the traffic // doesn't go through unless it's at 100%. Note: 99 because there's 100 numbers in // [0, 99]! let ip_octet_sum = (headers.get(HEADER_X_FORWARDED_FOR) || "99") .split(".") .map((octet) => parseInt(octet)) .reduce((acc, n) => acc + n); // if we don't know what number this is...don't assume anything if (isNaN(ip_octet_sum)) { ip_octet_sum = 99; } // not `<=` because it's a percentage, e.g. "allow 1%" would allow // IP octect sums that result in `0`, which is 1 slice in the range [0, 99] return ip_octet_sum % 100 < config.percentageOfIPs; } /** * Extract the top-level (apex) domain from the Host header. * This will exclude the `.spec-internal.com` domain if it is present. * In the event that we do not match on the Host header for any reason, * the value of the header itself is returned. * * @param headers - Header map from the originating request * @returns - top-level domain if the Host header was present, otherwise null */ function extractTopLevelDomain(headers: Headers): string | null { let host = headers.get(HEADER_HOST); if (host) { // regex attempts to match as much as it can, lazily, then a sequence of non-"." characters, // a ".", then more non-"." to comprise the apex domain. if .spec-internal.com is present, // the final group will attempt to match it, removing it from the apex domain. const domain_extract = /^(.*?\.)?(?<domain>[^.]+\.(com|co|org|edu|net|int|gov|mil|uk|co\.uk|ac\.uk|gov\.uk|ltd\.uk|me\.uk|net\.uk|nhs\.uk|org\.uk|plc\.uk|police\.uk))(\.spec-internal\.com)?$/; let matches = domain_extract.exec(host); if (matches?.groups?.domain) { host = matches.groups.domain; } } return host; }