@specprotected/spec-proxy-service-worker
Version:
Server Worker API implementation for integrating with Spec Proxy from an Edge Worker
430 lines (400 loc) • 15.5 kB
text/typescript
/*
* This is the common Service Worker API library to support the Spec Proxy product.
* If you are a user, you likely are looking for a platform-specific library like
* spec-proxy-cloudflare-worker or spec-proxy-fastly-worker.
*/
import { parse as parseCookies, serialize as serializeSetCookie } from "cookie";
// path prefix to always route traffic to Spec Proxy
const SPEC_PATH_PREFIX = "/spec_traffic";
// header that controls request forwarding for Spec Proxy
const SPEC_HEADER_FORWARD_ORIGIN = "x-spec-forward-origin";
// cookie key for the Spec ID
const SPEC_COOKIE_ID = "x-spec-id";
// cookie for the mirror mode key
const SPEC_HEADER_CUSTOMER_KEY = "x-spec-customer-authorization";
// the Set-Cookie header
const HEADER_SET_COOKIE = "set-cookie";
// the Cookie header
const HEADER_COOKIE = "cookie";
// standard X-Forwarded-For header
const HEADER_HOST = "host";
// standard X-Forwarded-For header
const HEADER_X_FORWARDED_FOR = "x-forwarded-for";
// header that indicates successful processing by the Spec platform
const HEADER_SPEC_ACTIVITY = "x-atvak-activity-count";
// Note: type RequestInfo = string | Request;
export type PlatformRequest = new (
url: RequestInfo,
info?: RequestInit,
original?: Request,
config?: SpecConfiguration,
) => Request;
// Spec Proxy configuration object
export interface SpecConfiguration {
/**
* When true, disable Spec Proxy, this library and all functionality is disabled
*/
disableSpecProxy?: boolean;
/**
* When true, the request returned by this function is modified
* to make a request to Spec Proxy, which will result in Spec Proxy making
* the request to the customer origin itself
*/
inlineMode?: boolean;
/**
* A number between 0 and 100 that identifies the percentage of IP traffic
* the SpecTrust platform should process.
*/
percentageOfIPs?: number;
/**
* An key provided by spec, which validates traffic as originating from
* the customer when in mirror mode.
*/
customerKey?: string;
/**
* Disables routing traffic prefixed with /spec_traffic to Spec
*/
disableSpecTraffic?: boolean;
/**
* A prefix to add to the `.spec-internal.com` suffix this library adds to
* the incoming request's hostname.
* ex: domainOverride: "prefix" -> "prefix.spec-internal.com"
*/
domainOverride?: string;
}
/**
* This is one of two lower-level entrypoints into the Spec Proxy framework.
* This function takes the Service Worker `event` object that's provided as an
* argument to the 'fetch' event and a configuration object that describes how
* the library should behave.
*
* Clients of this library should use the returned request as if it were the request
* originally provided by your Edge Worker.
*
* The returned request should always be processed with `processSpecProxyResponse`
* before being returned.
*
* Use this lower-level API if you need to do additional work between the
* request and response, such as integrate additional libraries in your worker
* function.
*
* ```javascript
* import {
* specProxyProcessRequest,
* specProxyProcessResponse,
* } from '@specprotected/spec-proxy-service-worker';
*
* addEventListener('fetch', event => {
* // if we don't catch the exception, fail open to original traffic path
* event.passThroughOnException();
*
* event.respondWith(handleEvent(event));
* })
*
* const specConfig = { inlineMode: false };
*
* async function handleEvent(event) {
* let request = specProxyProcessRequest(event, specConfig);
*
* let response = await fetch(request);
*
* // do more work, if required
*
* // call the Spec Proxy response method to enact potential response changes
* return specProxyProcessResponse(request, response, specConfig);
* }
* ```
*
* @param event - the FetchEvent created by the Fetch API
* @param config - configuration object to adjust Spec Proxy behavior
* @returns the modified Request object
*/
export function specProxyProcessRequest(
event: FetchEvent,
config: SpecConfiguration = {},
requestConstructor: PlatformRequest = Request,
): Request {
// first check to see if we shouldn't process anything at all
if (config.disableSpecProxy) {
return event.request;
}
// check if we should handle this http request
if (
!shouldHandleRequest(
new URL(event.request.url),
event.request.headers,
config,
)
) {
return event.request;
}
let request = event.request;
let specUrl = new URL(request.url);
// save it in case we're in Inline mode
// Note: this was just `.host` before, but for local development sometimes
// ports will be included, using hostname avoids setting overwriting port values
let originalHost = specUrl.hostname;
// write to spec proxy
specUrl.hostname = config.domainOverride
? `${config.domainOverride}.spec-internal.com`
: `${originalHost}.spec-internal.com`;
// For all requests destined for spec proxy, add the customer key to the
// headers if provided.
let specHeaders = new Headers(request.headers);
if (config.customerKey) {
specHeaders.set(SPEC_HEADER_CUSTOMER_KEY, config.customerKey);
}
// use this flag to signal that Spec Proxy is responsible for forwarding the
// request on to the customer's servers.
if (
config.inlineMode ||
// or if we're sending some traffic to /spec_traffic
(!config.disableSpecTraffic &&
specUrl.pathname.startsWith(SPEC_PATH_PREFIX))
) {
// Note: /spec_traffic ignores forwarding directives
specHeaders.set(SPEC_HEADER_FORWARD_ORIGIN, originalHost);
return new requestConstructor(
specUrl.toString(),
{
body: request.body,
headers: specHeaders,
method: request.method,
redirect: request.redirect,
},
request,
config,
);
} else {
// Otherwise, we'll mirror the request. We will register this request promise
// to be awaited in the background by the Service Worker API and in order to
// do that without reading the body, we need to duplicate the body stream.
// clone the ReadableStream, if it exists.
let teedBody: [ReadableStream | null, ReadableStream | null] = [null, null];
if (request.body !== null && request.body !== undefined) {
teedBody = request.body.tee();
}
// build a request with the new url and the new ReadableStream
const specRequest = new requestConstructor(
specUrl.toString(),
{
body: teedBody[0],
headers: specHeaders,
method: request.method,
redirect: request.redirect,
},
request,
config,
);
// background the request to spec proxy, but ask service worker to wait
// until it's finished even though we don't await it
event.waitUntil(fetch(specRequest));
// create the new request with the adjusted host if redirectHeader is set
// and use the new teed ReadableStream body
return new requestConstructor(
request.url,
{
body: teedBody[1],
headers: request.headers,
method: request.method,
redirect: request.redirect,
},
request,
config,
);
}
}
/**
* Process the Response as it returns to the originator of the Request. This
* function generally adds any details the SpecTrust platform requires to identify
* site visitors, such as the Spec Cookie.
*
* Note: this function does not process the Body of a Response, so won't require
* awaiting while reading the body stream, which enables efficient processing.
*
* @param request - the request object that was sent to customer servers
* @param response - the response object that was returned from customer servers
* @param config - the configuration object that defines how this library should behave
* @returns - the modified response object
*/
export function specProxyProcessResponse(
request: Request,
response: Response,
config: SpecConfiguration = {},
): Response {
if (
config.disableSpecProxy ||
config.inlineMode === true ||
!shouldHandleRequest(new URL(request.url), request.headers, config)
) {
return response;
}
let cookies: Record<string, string | undefined> = {};
if (request.headers) {
cookies = parseCookies(request.headers.get(HEADER_COOKIE) || "");
}
// Note: falsy check because we set our cookie on undefined or "" values
if (!cookies[SPEC_COOKIE_ID]) {
let specId = crypto.randomUUID();
let setCookie = serializeSetCookie(SPEC_COOKIE_ID, specId, {
// Note: 10 years long, essentially a "very long time"
maxAge: 320000000,
// the || will turn the (empty string | null) into undefined
domain: extractTopLevelDomain(request.headers) || undefined,
// valid for all paths
path: "/",
sameSite: "none",
secure: true,
});
// reconstruct Response to avoid immutability
response = new Response(response.body, response);
response.headers.append(HEADER_SET_COOKIE, setCookie);
}
return response;
}
/**
* This is the simplest method in the Spec Proxy Service Worker library.
* If the edge worker you're writing only requires integration Spec Proxy and
* no other additional libraries, this is the easiest way to use Spec Proxy.
*
* It only requires one method, so we'll show an example:
*
* ```javascript
* import { specProxyProcess } from '@spectrust/server-worker-proxy';
*
* addEventListener('fetch', event => {
* // if we don't catch the exception, fail open to original traffic path
* event.passThroughOnException();
*
* event.respondWith(specProxyProcess(event, {
* // set to false to mirror traffic to Spec Proxy instead of redirecting it
* inlineMode: true
* }));
* })
* ```
*
* @param event - the FetchEvent originating from the Fetch API
* @param config - configuration object to control Spec Proxy behavior
* @returns - a Promise to resolve to the modified response from the customer server
*/
export async function specProxyProcess(
event: FetchEvent,
config: SpecConfiguration,
requestConstructor: PlatformRequest,
): Promise<Response> {
let request = specProxyProcessRequest(event, config, requestConstructor);
let response = await specMakeRequestWithFallback(event, request, config);
return specProxyProcessResponse(request, response, config);
}
/**
* Makes a request with intelligent fallback behavior based on the configuration mode.
* This function is responsible for handling the actual network request to either the
* Spec Proxy service or directly to the customer origin, depending on the response
* from the initial request and the configured mode.
*
* In inline mode, this function first attempts to make a request through the Spec Proxy
* service. If the response contains the "x-activity-count" header, it indicates
* successful processing by the Spec platform, and the response is returned after
* removing the internal header. If this header is not present, it falls back to
* making a direct request to the customer origin using the original request from
* the FetchEvent.
*
* In mirror mode, there is no fallback mechanism since the request body has been teed
* (duplicated) and the original request cannot be safely reused. The function simply
* makes the request as provided.
*
* @param event - The FetchEvent originating from the Fetch API, used for fallback in inline mode
* @param request - The potentially modified Request object to be sent to Spec Proxy
* @param config - Configuration object that controls Spec Proxy behavior
* @returns - A Promise that resolves to the Response from either Spec Proxy or the customer origin
*/
export async function specMakeRequestWithFallback(
event: FetchEvent,
request: Request,
config: SpecConfiguration,
): Promise<Response> {
if (config.inlineMode) {
let response = await fetch(request);
// This is the correct case, we received a header that the Spec platform sets.
if (response.headers.get(HEADER_SPEC_ACTIVITY)) {
let newResponse = new Response(response.body, response);
newResponse.headers.delete(HEADER_SPEC_ACTIVITY);
return newResponse;
}
// This is the fallback case, we did not receive a header that the Spec platform sets.
// Note that this is safe because in inline mode, we don't tee the body, so we can
// use the request on the event directly.
return await fetch(event.request);
} else {
// In mirror mode, the request "is the original" except for the teed body,
// so there's no way to fallback.
return await fetch(request);
}
}
/**
* Function that determines if Spec Proxy should handle the incoming request.
* This involves observing the configuration object and resolving whether or not
* we should process this Request under the given configuration values.
*
* @param headers - Header map from the originating request
* @param config - configuration object to control Spec Proxy behavior
* @returns - true if we should process the request
*/
function shouldHandleRequest(
url: URL,
headers: Headers,
config: SpecConfiguration,
): boolean {
// If we're directing /spec_traffic to Spec, we should always do that!
if (!config.disableSpecTraffic && url.pathname.startsWith(SPEC_PATH_PREFIX)) {
return true;
}
// if we're not filtering out a percentage of IPs, or the filter is 100%
// we should always handle traffic.
if (config.percentageOfIPs === undefined || config.percentageOfIPs >= 100) {
return true;
}
// early abort if it's impossible to match
else if (config.percentageOfIPs <= 0) {
return false;
}
// split up the ip address into octets, convert them to integers, and then sum them.
// default the string to 99 so if, for some reason, there's a problem the traffic
// doesn't go through unless it's at 100%. Note: 99 because there's 100 numbers in
// [0, 99]!
let ip_octet_sum = (headers.get(HEADER_X_FORWARDED_FOR) || "99")
.split(".")
.map((octet) => parseInt(octet))
.reduce((acc, n) => acc + n);
// if we don't know what number this is...don't assume anything
if (isNaN(ip_octet_sum)) {
ip_octet_sum = 99;
}
// not `<=` because it's a percentage, e.g. "allow 1%" would allow
// IP octect sums that result in `0`, which is 1 slice in the range [0, 99]
return ip_octet_sum % 100 < config.percentageOfIPs;
}
/**
* Extract the top-level (apex) domain from the Host header.
* This will exclude the `.spec-internal.com` domain if it is present.
* In the event that we do not match on the Host header for any reason,
* the value of the header itself is returned.
*
* @param headers - Header map from the originating request
* @returns - top-level domain if the Host header was present, otherwise null
*/
function extractTopLevelDomain(headers: Headers): string | null {
let host = headers.get(HEADER_HOST);
if (host) {
// regex attempts to match as much as it can, lazily, then a sequence of non-"." characters,
// a ".", then more non-"." to comprise the apex domain. if .spec-internal.com is present,
// the final group will attempt to match it, removing it from the apex domain.
const domain_extract =
/^(.*?\.)?(?<domain>[^.]+\.(com|co|org|edu|net|int|gov|mil|uk|co\.uk|ac\.uk|gov\.uk|ltd\.uk|me\.uk|net\.uk|nhs\.uk|org\.uk|plc\.uk|police\.uk))(\.spec-internal\.com)?$/;
let matches = domain_extract.exec(host);
if (matches?.groups?.domain) {
host = matches.groups.domain;
}
}
return host;
}