@webrecorder/awp-sw
Version:
This library has been factored out of [ArchiveWeb.page](https://webrecorder/archiveweb.page) and represents the core service worker implementation necessarily for high-fidelity web archiving.
484 lines (392 loc) • 12.6 kB
text/typescript
import { API, type SWCollections, tsToDate } from "@webrecorder/wabac/swlib";
import { Downloader, type Metadata } from "./downloader";
import { Signer } from "./keystore";
import { ipfsAdd, ipfsRemove, setAutoIPFSUrl } from "./ipfsutils";
import { RecProxy } from "./recproxy";
import { type Collection } from "@webrecorder/wabac/swlib";
// eslint-disable-next-line @typescript-eslint/no-explicit-any
type RouteMatch = Record<string, any>;
declare let self: ServiceWorkerGlobalScope;
const DEFAULT_SOFTWARE_STRING = `Webrecorder ArchiveWeb.page ${__AWP_VERSION__}, using warcio.js ${__WARCIO_VERSION__}`;
// ===========================================================================
class ExtAPI extends API {
softwareString = "";
uploading: Map<string, CountingStream> = new Map<string, CountingStream>();
constructor(
collections: SWCollections,
{ softwareString = "", replaceSoftwareString = false } = {},
) {
super(collections);
this.softwareString = replaceSoftwareString
? softwareString
: softwareString + DEFAULT_SOFTWARE_STRING;
}
override get routes(): Record<string, string | [string, string]> {
return {
...super.routes,
downloadPages: "c/:coll/dl",
upload: ["c/:coll/upload", "POST"],
uploadStatus: "c/:coll/upload",
uploadDelete: ["c/:coll/upload", "DELETE"],
recPending: "c/:coll/recPending",
pageTitle: ["c/:coll/pageTitle", "POST"],
ipfsAdd: ["c/:coll/ipfs", "POST"],
ipfsRemove: ["c/:coll/ipfs", "DELETE"],
ipfsDaemonUrl: ["ipfs/daemonUrl", "POST"],
publicKey: "publicKey",
};
}
downloaderOpts() {
const softwareString = this.softwareString;
const signer = new Signer(softwareString, { cacheSig: true });
return { softwareString, signer };
}
// eslint-disable-next-line @typescript-eslint/no-explicit-any
override async handleApi(request: Request, params: any, event: FetchEvent) {
switch (params._route) {
case "downloadPages":
return await this.handleDownload(params);
case "upload":
return await this.handleUpload(params, request, event);
case "uploadStatus":
return await this.getUploadStatus(params);
case "uploadDelete":
return await this.deleteUpload(params);
case "recPending":
return await this.recordingPending(params);
case "pageTitle":
return await this.updatePageTitle(params.coll, request);
case "publicKey":
return await this.getPublicKey();
case "ipfsAdd":
//return await this.startIpfsAdd(event, request, params.coll);
return {};
case "ipfsRemove":
//return await this.ipfsRemove(request, params.coll);
return {};
case "ipfsDaemonUrl":
return await this.setIPFSDaemonUrlFromBody(request);
default:
return await super.handleApi(request, params, event);
}
}
async handleDownload(params: RouteMatch) {
const { dl, error } = await this.getDownloader(params);
if (error) {
return error;
}
return dl.download();
}
async getDownloader(params: RouteMatch) {
const coll = await this.collections.loadColl(params.coll);
if (!coll) {
return { error: { error: "collection_not_found" } };
}
const pageQ = params["_query"].get("pages");
const pageList = pageQ === "all" ? null : pageQ.split(",");
const format = params["_query"].get("format") || "wacz";
const filename = params["_query"].get("filename");
return {
dl: new Downloader({
...this.downloaderOpts(),
coll,
format,
filename,
pageList,
}),
};
}
async handleUpload(params: RouteMatch, request: Request, event: FetchEvent) {
const uploading = this.uploading;
const prevUpload = uploading.get(params.coll);
const { url, headers, abortUpload } = await request.json();
if (prevUpload && prevUpload.status === "uploading") {
if (abortUpload && prevUpload.abort) {
prevUpload.abort();
return { aborted: true };
}
return { error: "already_uploading" };
} else if (abortUpload) {
return { error: "not_uploading" };
}
const { dl, error } = await this.getDownloader(params);
if (error) {
return error;
}
const dlResp = await dl.download();
if (!(dlResp instanceof Response)) {
return dlResp;
}
const filename = dlResp.filename || "";
const abort = new AbortController();
const signal = abort.signal;
const counter = new CountingStream(dl.metadata.size, abort);
const body = dlResp.body!.pipeThrough(counter.transformStream());
try {
const urlObj = new URL(url);
urlObj.searchParams.set("filename", filename || "");
urlObj.searchParams.set("name", dl.metadata["title"] || filename || "");
// eslint-disable-next-line @typescript-eslint/no-unsafe-argument
const fetchPromise = fetch(urlObj.href, {
method: "PUT",
headers,
duplex: "half",
body,
signal,
// eslint-disable-next-line @typescript-eslint/no-explicit-any
} as any);
uploading.set(params.coll, counter);
if (event.waitUntil) {
event.waitUntil(
this.uploadFinished(
fetchPromise,
params.coll,
dl.metadata,
filename,
counter,
),
);
}
return { uploading: true };
} catch (e: unknown) {
uploading.delete(params.coll);
// eslint-disable-next-line @typescript-eslint/no-explicit-any
return { error: "upload_failed", details: (e as any).toString() };
}
}
async uploadFinished(
fetchPromise: Promise<Response>,
collId: string,
metadata: Metadata,
filename: string,
counter: CountingStream,
) {
try {
const resp = await fetchPromise;
const json = await resp.json();
console.log(`Upload finished for ${filename} ${collId}`);
metadata.uploadTime = new Date().getTime();
metadata.uploadId = json.id;
if (!metadata.mtime) {
metadata.mtime = metadata.uploadTime;
}
if (!metadata.ctime) {
metadata.ctime = metadata.uploadTime;
}
await this.collections.updateMetadata(
collId,
metadata as Record<string, string>,
);
counter.status = "done";
} catch (e) {
console.log(`Upload failed for ${filename} ${collId}`);
console.log(e);
counter.status = counter.aborted ? "aborted" : "failed";
}
}
async deleteUpload(params: RouteMatch) {
const collId = params.coll;
this.uploading.delete(collId);
const coll = await this.collections.loadColl(collId);
if (coll?.metadata) {
coll.metadata.uploadTime = null;
coll.metadata.uploadId = null;
await this.collections.updateMetadata(collId, coll.metadata);
return { deleted: true };
}
return { deleted: false };
}
async getUploadStatus(params: RouteMatch) {
let result: Metadata = {};
const counter = this.uploading.get(params.coll);
if (!counter) {
result = { status: "idle" };
} else {
const { size, totalSize, status } = counter;
result = { status, size, totalSize };
if (status !== "uploading") {
this.uploading.delete(params.coll);
}
}
const coll = await this.collections.loadColl(params.coll);
if (coll?.metadata) {
result.uploadTime = coll.metadata.uploadTime;
result.uploadId = coll.metadata.uploadId;
result.ctime = coll.metadata.ctime;
result.mtime = coll.metadata.mtime;
}
return result;
}
async recordingPending(params: RouteMatch) {
const coll = await this.collections.loadColl(params.coll);
if (!coll) {
return { error: "collection_not_found" };
}
if (!(coll.store instanceof RecProxy)) {
return { error: "invalid_collection" };
}
const numPending = await coll.store.getCounter();
return { numPending };
}
async prepareColl(collId: string, request: Request) {
const coll = await this.collections.loadColl(collId);
if (!coll) {
return { error: "collection_not_found" };
}
const body = await this.setIPFSDaemonUrlFromBody(request);
return { coll, body };
}
async setIPFSDaemonUrlFromBody(request: Request) {
let body;
try {
body = await request.json();
if (body.ipfsDaemonUrl) {
setAutoIPFSUrl(body.ipfsDaemonUrl);
}
} catch (_e: unknown) {
body = {};
}
return body;
}
async startIpfsAdd(event: FetchEvent, request: Request, collId: string) {
const { coll, body } = await this.prepareColl(collId, request);
const client = await self.clients.get(event.clientId);
const p = runIPFSAdd(
collId,
coll,
client,
this.downloaderOpts(),
this.collections,
body,
);
if (event.waitUntil) {
event.waitUntil(p);
}
try {
await p;
} catch (_e) {
return { error: "ipfs_not_available" };
}
return { collId };
}
async ipfsRemove(request: Request, collId: string) {
const { coll } = await this.prepareColl(collId, request);
if (await ipfsRemove(coll)) {
await this.collections.updateMetadata(coll.name, coll.config.metadata);
return { removed: true };
}
return { removed: false };
}
async updatePageTitle(collId: string, request: Request) {
const json = await request.json();
const { url, title } = json;
let { ts } = json;
ts = tsToDate(ts).getTime();
const coll = await this.collections.loadColl(collId);
if (!coll) {
return { error: "collection_not_found" };
}
//await coll.store.db.init();
const result = await coll.store.lookupUrl(url, ts);
if (!result) {
return { error: "page_not_found" };
}
// drop to second precision for comparison
const roundedTs = Math.floor(result.ts / 1000) * 1000;
if (url !== result.url || ts !== roundedTs) {
return { error: "no_exact_match" };
}
const page = await coll.store.db.getFromIndex("pages", "url", url);
if (!page) {
return { error: "page_not_found" };
}
page.title = title;
await coll.store.db.put("pages", page);
return { added: true };
}
async getPublicKey() {
const { signer } = this.downloaderOpts();
const keys = await signer.loadKeys();
if (!keys?.public) {
return {};
} else {
return { publicKey: keys.public };
}
}
}
// ===========================================================================
async function runIPFSAdd(
collId: string,
coll: Collection,
client: Client | undefined,
// eslint-disable-next-line @typescript-eslint/no-explicit-any
opts: any,
collections: SWCollections,
// eslint-disable-next-line @typescript-eslint/no-explicit-any
replayOpts: any,
) {
let size = 0;
let totalSize = 0;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const sendMessage = (type: string, result: any = null) => {
if (client) {
client.postMessage({
type,
collId,
size,
result,
totalSize,
});
}
};
const { url, cid } = await ipfsAdd(
coll,
opts,
replayOpts,
(incSize: number, _totalSize: number) => {
size += incSize;
totalSize = _totalSize;
sendMessage("ipfsProgress");
},
);
const result = { cid, ipfsURL: url };
sendMessage("ipfsAdd", result);
if (coll.config.metadata) {
await collections.updateMetadata(coll.name, coll.config.metadata);
}
}
// ===========================================================================
class CountingStream {
totalSize: number;
status: string;
size = 0;
_abort?: AbortController;
aborted: boolean;
constructor(totalSize?: number, abort?: AbortController) {
this.totalSize = totalSize || 0;
this.status = "uploading";
this.size = 0;
this._abort = abort;
this.aborted = false;
}
abort() {
if (this._abort) {
this._abort.abort();
this.aborted = true;
}
}
transformStream() {
const counterStream = this;
return new TransformStream({
start() {
counterStream.size = 0;
},
transform(chunk, controller) {
counterStream.size += chunk.length;
//console.log(`Uploaded: ${counterStream.size}`);
controller.enqueue(chunk);
},
});
}
}
export { ExtAPI };