@webrecorder/awp-sw
Version:
This library has been factored out of [ArchiveWeb.page](https://webrecorder/archiveweb.page) and represents the core service worker implementation necessarily for high-fidelity web archiving.
320 lines (264 loc) • 8.35 kB
text/typescript
import {
type ADBType,
ArchiveDB,
type ArchiveRequest,
type ArchiveResponse,
type CollectionLoader,
type PageEntry,
LiveProxy,
SWCollections,
randomId,
} from "@webrecorder/wabac/swlib";
//declare let self: ServiceWorkerGlobalScope;
import { type IDBPDatabase, type IDBPTransaction } from "idb";
import { postToGetUrl } from "warcio";
//export interface RecDBType extends ADBType {
export type RecDBType = ADBType & {
rec: {
key: string;
};
};
export type ExtPageEntry = PageEntry & {
id: string;
title: string;
size: number;
ts: number;
favIconUrl?: string;
text?: string;
};
// ===========================================================================
export class RecProxy extends ArchiveDB {
collLoader: CollectionLoader;
recordProxied: boolean;
liveProxy: LiveProxy;
pageId: string;
isNew = true;
firstPageOnly: boolean;
counter = 0;
isRecording = true;
allPages = new Map<string, string>();
// eslint-disable-next-line @typescript-eslint/no-explicit-any
constructor(config: any, collLoader: CollectionLoader) {
// eslint-disable-next-line @typescript-eslint/no-unsafe-argument
super(config.dbname);
this.name = config.dbname.slice(3);
this.collLoader = collLoader;
this.recordProxied = config.extraConfig.recordProxied || false;
// eslint-disable-next-line @typescript-eslint/no-unsafe-argument
this.liveProxy = new LiveProxy(config.extraConfig, {
cloneResponse: true,
allowBody: true,
});
this.pageId = randomId();
this.isNew = true;
this.firstPageOnly = config.extraConfig.firstPageOnly || false;
this.counter = 0;
}
override _initDB(
db: IDBPDatabase<ADBType>,
oldV: number,
newV: number | null,
tx: IDBPTransaction<
ADBType,
(keyof ADBType)[],
"readwrite" | "versionchange"
>,
) {
super._initDB(db, oldV, newV, tx);
//TODO: fix
(db as unknown as IDBPDatabase<RecDBType>).createObjectStore("rec");
}
async decCounter() {
this.counter--;
//console.log("rec counter", this.counter);
//TODO: fix
// eslint-disable-next-line @typescript-eslint/no-explicit-any
await (this.db! as any).put("rec", this.counter, "numPending");
}
async getCounter(): Promise<number | undefined> {
//TODO: fix
// eslint-disable-next-line @typescript-eslint/no-explicit-any, @typescript-eslint/no-unsafe-return
return await (this.db! as any).get("rec", "numPending");
}
override async getResource(request: ArchiveRequest, prefix: string, event: FetchEvent) {
if (!this.isRecording) {
return await super.getResource(request, prefix, event);
}
let req;
if (request.method === "POST" || request.method === "PUT") {
req = request.request.clone();
} else {
req = request.request;
}
let response: ArchiveResponse | null = null;
try {
this.counter++;
response = await this.liveProxy.getResource(request, prefix);
} catch (_e) {
await this.decCounter();
return null;
}
// error response, don't record
if (response?.noRW && response.status >= 400) {
await this.decCounter();
return response;
}
// don't record content proxied from specified hosts
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
if (!this.recordProxied && this.liveProxy.hostProxy) {
const parsedUrl = new URL(response!.url);
if (this.liveProxy.hostProxy[parsedUrl.host]) {
await this.decCounter();
return response;
}
}
this.doRecord(response!, req, request.mod)
.catch(() => {})
.finally(async () => this.decCounter());
return response;
}
async doRecord(response: ArchiveResponse, request: Request, mod: string) {
let url = response.url;
const ts = response.date.getTime();
const mime = (response.headers.get("content-type") || "").split(";")[0];
const range = response.headers.get("content-range");
if (range && !range.startsWith("bytes 0-")) {
console.log("skip range request: " + range);
return;
}
const status = response.status;
const statusText = response.statusText;
const respHeaders = Object.fromEntries(response.headers.entries());
const reqHeaders = Object.fromEntries(request.headers.entries());
const payload = new Uint8Array(
await response.clonedResponse!.arrayBuffer(),
);
if (range) {
const expectedRange = `bytes 0-${payload.length - 1}/${payload.length}`;
if (range !== expectedRange) {
console.log("skip range request: " + range);
return;
}
}
if (request.mode === "navigate" && mod === "mp_") {
this.pageId = randomId();
if (!this.firstPageOnly) {
this.isNew = true;
}
}
const pageId = this.pageId;
const referrer = request.referrer;
if (request.method === "POST" || request.method === "PUT") {
const data = {
method: request.method,
postData: await request.text(),
headers: request.headers,
url,
};
if (postToGetUrl(data)) {
url = new URL(data.url).href;
}
}
const data = {
url,
ts,
status,
statusText,
pageId,
payload,
mime,
respHeaders,
reqHeaders,
referrer,
};
await this.addResource(data);
await this.collLoader.updateSize(this.name, payload.length, payload.length);
// don't add page for redirects
if (this.isPage(url, request, status, referrer, mod)) {
await this.addPages([{ id: pageId, url, ts }]);
this.allPages.set(url, pageId);
this.isNew = false;
} else {
console.log("not page", url);
}
}
isPage(url: string, request: Request, status: number, referrer: string, mod: string) {
if (!this.isNew) {
return false;
}
if ((status >= 301 && status < 400) || status === 204) {
return false;
}
if (request.mode !== "navigate" || mod !== "mp_") {
return false;
}
if (!referrer) {
return true;
}
const inx = referrer.indexOf("mp_/");
if (inx > 0) {
const refUrl = referrer.slice(inx + 4);
return url === refUrl || this.allPages.has(refUrl);
} else if (referrer.indexOf("if_/") > 0) {
return false;
} else if (referrer.indexOf("?source=")) {
return true;
} else {
return false;
}
}
async updateFavIcon(url: string, favIconUrl: string) {
const pageId = this.allPages.get(url);
if (!pageId) {
return;
}
const page = await this.db!.get("pages", pageId) as ExtPageEntry | undefined;
if (!page) {
return;
}
page.favIconUrl = favIconUrl;
try {
await this.db!.put("pages", page);
} catch (_e: unknown) {
// ignore
}
}
}
// ===========================================================================
export class RecordingCollections extends SWCollections {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
override async _initStore(type: string, config: any) {
let store;
switch (type) {
case "recordingproxy":
store = new RecProxy(config, this);
await store.initing;
return store;
}
// eslint-disable-next-line @typescript-eslint/no-unsafe-argument
return await super._initStore(type, config);
}
override async _handleMessage(event: MessageEvent) {
let coll;
switch (event.data.msg_type) {
case "toggle-record":
// eslint-disable-next-line @typescript-eslint/no-unsafe-argument
coll = await this.getColl(event.data.id);
if (coll && coll.store instanceof RecProxy) {
console.log("Recording Toggled!", event.data.isRecording);
coll.store.isRecording = event.data.isRecording;
}
break;
case "update-favicon":
// eslint-disable-next-line @typescript-eslint/no-unsafe-argument
coll = await this.getColl(event.data.id);
if (coll && coll.store instanceof RecProxy) {
// eslint-disable-next-line @typescript-eslint/no-unsafe-argument
await coll.store.updateFavIcon(event.data.url, event.data.favIconUrl);
}
break;
default:
return await super._handleMessage(event);
}
}
}