@webrecorder/awp-sw
Version:
This library has been factored out of [ArchiveWeb.page](https://webrecorder/archiveweb.page) and represents the core service worker implementation necessarily for high-fidelity web archiving.
1,112 lines (911 loc) • 27.4 kB
text/typescript
import { makeZip } from "client-zip";
import { Deflate } from "pako";
import { v5 as uuidv5 } from "uuid";
import { createSHA256 } from "hash-wasm";
import { type IHasher } from "hash-wasm/dist/lib/WASMInterface.js";
import { getSurt, WARCRecord, WARCSerializer } from "warcio";
import {
getTSMillis,
getStatusText,
digestMessage,
type Collection,
type ArchiveDB,
type ResourceEntry,
} from "@webrecorder/wabac/swlib";
import { type DataSignature, type Signer } from "./keystore";
import { type ExtPageEntry } from "./recproxy";
export type SizeCallback = (size: number) => void;
export type ResponseWithFilename = Response & {
filename?: string;
};
type ClientZipEntry = {
name: string;
lastModified: Date;
input: AsyncGenerator<Uint8Array>;
};
type FileStats = {
filename: string;
size: number;
hash?: string;
};
export type DownloaderOpts = {
coll: Collection;
format: string;
filename?: string;
pageList?: string[];
signer?: Signer;
softwareString?: string;
gzip?: boolean;
uuidNamespace?: string;
markers?: Markers;
};
export type Markers = {
ZIP?: Uint8Array;
WARC_PAYLOAD?: Uint8Array;
WARC_GROUP?: Uint8Array;
};
type DLResourceEntry = ResourceEntry & {
offset?: number;
length?: number;
timestamp?: string;
skipped?: boolean;
text?: string;
pageId: string;
digest: string;
};
type CDXJEntry = {
url: string;
digest: string;
mime: string;
offset: number;
length: number;
recordDigest: string;
status: number;
method?: string;
filename?: string;
requestBody?: string;
};
type DLPageData = {
title: string;
url: string;
id: string;
size: number;
ts: string;
favIconUrl?: string;
text?: string;
};
type Gen =
| AsyncGenerator<Uint8Array>
| AsyncGenerator<string>
| Generator<Uint8Array>
| Generator<string>;
type WARCVersion = "WARC/1.0" | "WARC/1.1";
type DigestCache = {
url: string;
date: string;
payloadDigest?: string;
};
type DataPackageJSON = {
profile: string;
resources: {
name: string;
path: string;
hash: string;
bytes: number;
}[];
wacz_version: string;
software: string;
created: string;
title?: string;
description?: string;
modified?: string;
};
export type Metadata = {
uploadId?: string;
uploadTime?: number;
ctime?: number;
mtime?: number;
size?: number;
title?: string;
desc?: string;
status?: string;
totalSize?: number;
};
// ===========================================================================
const WACZ_VERSION = "1.1.1";
const SPLIT_REQUEST_Q_RX = /(.*?)[?&](?:__wb_method=|__wb_post=)[^&]+&(.*)/;
const LINES_PER_BLOCK = 1024;
const RESOURCE_BATCH_SIZE = LINES_PER_BLOCK * 8;
const DEFAULT_UUID_NAMESPACE = "f9ec3936-7f66-4461-bec4-34f4495ea242";
const DATAPACKAGE_FILENAME = "datapackage.json";
const DIGEST_FILENAME = "datapackage-digest.json";
const encoder = new TextEncoder();
const EMPTY = new Uint8Array([]);
async function* getPayload(payload: Uint8Array) {
yield payload;
}
async function* hashingGen(
gen: Gen,
stats: FileStats,
hasher: IHasher,
sizeCallback: SizeCallback | null,
zipMarker?: Uint8Array,
) {
stats.size = 0;
hasher.init();
if (zipMarker) {
yield zipMarker;
}
for await (let chunk of gen) {
if (typeof chunk === "string") {
chunk = encoder.encode(chunk);
}
yield chunk;
stats.size += chunk.byteLength;
if (sizeCallback) {
sizeCallback(chunk.byteLength);
}
hasher.update(chunk);
}
if (zipMarker) {
yield zipMarker;
}
stats.hash = hasher.digest("hex");
}
// ===========================================================================
class Downloader {
db: ArchiveDB;
pageList: string[] | null;
collId: string;
metadata: Metadata;
gzip: boolean;
markers: Markers;
warcName: string;
alreadyDecoded: boolean;
softwareString: string;
uuidNamespace: string;
createdDateDt: Date;
createdDate: string;
modifiedDate: string | null;
format: string;
warcVersion: WARCVersion;
digestOpts: {
algo: string;
prefix: string;
base32?: boolean;
};
filename: string;
signer: Signer | null;
offset = 0;
firstResources: ResourceEntry[] = [];
textResources: DLResourceEntry[] = [];
cdxjLines: string[] = [];
// compressed index (idx) entries
indexLines: string[] = [];
digestsVisted: Record<string, DigestCache> = {};
fileHasher: IHasher | null = null;
recordHasher: IHasher | null = null;
datapackageDigest = "";
fileStats: FileStats[] = [];
hashType = "";
lastUrl?: string;
lastPageId?: string;
constructor({
coll,
format = "wacz",
filename,
pageList,
signer,
softwareString,
gzip = true,
uuidNamespace,
markers,
}: DownloaderOpts) {
this.db = coll.store;
this.pageList = pageList || null;
this.collId = coll.name;
this.metadata = coll.config.metadata || {};
this.gzip = gzip;
this.markers = markers || {};
this.warcName = this.gzip ? "data.warc.gz" : "data.warc";
this.alreadyDecoded = !coll.config["decode"] && !coll.config["loadUrl"];
this.softwareString = softwareString || "ArchiveWeb.page";
this.uuidNamespace = uuidNamespace || DEFAULT_UUID_NAMESPACE;
this.createdDateDt = new Date(coll.config.ctime!);
this.createdDate = this.createdDateDt.toISOString();
this.modifiedDate = coll.config.metadata!.mtime
? new Date(coll.config.metadata!.mtime).toISOString()
: null;
this.format = format;
this.warcVersion = format === "warc1.0" ? "WARC/1.0" : "WARC/1.1";
if (format === "warc1.0") {
this.digestOpts = { algo: "sha-1", prefix: "sha1:", base32: true };
} else {
this.digestOpts = { algo: "sha-256", prefix: "sha256:" };
}
// determine filename from title, if it exists
if (!filename && coll.config.metadata!.title) {
filename = encodeURIComponent(
coll.config.metadata!.title.toLowerCase().replace(/\s/g, "-"),
);
}
if (!filename) {
filename = "webarchive";
}
this.filename = filename;
this.signer = signer || null;
}
async download(sizeCallback: SizeCallback | null = null) {
switch (this.format) {
case "wacz":
return this.downloadWACZ(this.filename, sizeCallback);
case "warc":
case "warc1.0":
return this.downloadWARC(this.filename, sizeCallback);
default:
return { error: "invalid 'format': must be wacz or warc" };
}
}
downloadWARC(filename: string, sizeCallback: SizeCallback | null = null) {
filename = (filename || "webarchive").split(".")[0] + ".warc";
// eslint-disable-next-line @typescript-eslint/no-this-alias
const dl = this;
const rs = new ReadableStream({
async start(controller) {
await dl.queueWARC(controller, filename, sizeCallback);
},
});
const headers = {
"Content-Disposition": `attachment; filename="${filename}"`,
"Content-Type": "application/octet-stream",
};
const resp: ResponseWithFilename = new Response(rs, { headers });
resp.filename = filename;
return resp;
}
async loadResourcesBlock(
start: [string, number] | [] = [],
): Promise<ResourceEntry[]> {
return await this.db.db!.getAll(
"resources",
IDBKeyRange.lowerBound(start, true),
RESOURCE_BATCH_SIZE,
);
}
async *iterResources(resources: ResourceEntry[]) {
let start: [string, number] | [] = [];
//let count = 0;
while (resources.length) {
const last: ResourceEntry = resources[resources.length - 1]!;
if (this.pageList) {
resources = resources.filter((res) =>
this.pageList!.includes(res.pageId || ""),
);
}
//count += resources.length;
yield* resources;
start = [last.url, last.ts];
resources = await this.loadResourcesBlock(start);
}
// if (count !== this.numResources) {
// console.warn(`Iterated ${count}, but expected ${this.numResources}`);
// }
}
async queueWARC(
controller: ReadableStreamDefaultController,
filename: string,
sizeCallback: SizeCallback | null,
) {
this.firstResources = await this.loadResourcesBlock();
for await (const chunk of this.generateWARC(filename)) {
controller.enqueue(chunk);
if (sizeCallback) {
sizeCallback(chunk.length);
}
}
for await (const chunk of this.generateTextWARC(filename)) {
controller.enqueue(chunk);
if (sizeCallback) {
sizeCallback(chunk.length);
}
}
controller.close();
}
addFile(
zip: ClientZipEntry[],
filename: string,
generator: Gen,
sizeCallback: SizeCallback | null,
) {
const stats: FileStats = { filename, size: 0 };
if (filename !== DATAPACKAGE_FILENAME && filename !== DIGEST_FILENAME) {
this.fileStats.push(stats);
}
zip.push({
name: filename,
lastModified: this.createdDateDt,
input: hashingGen(
generator,
stats,
this.fileHasher!,
sizeCallback,
this.markers.ZIP,
),
});
}
recordDigest(data: Uint8Array | string) {
this.recordHasher!.init();
this.recordHasher!.update(data);
return this.hashType + ":" + this.recordHasher!.digest("hex");
}
getWARCRecordUUID(name: string) {
return `<urn:uuid:${uuidv5(name, this.uuidNamespace)}>`;
}
async downloadWACZ(filename: string, sizeCallback: SizeCallback | null) {
filename = (filename || "webarchive").split(".")[0] + ".wacz";
this.fileHasher = await createSHA256();
this.recordHasher = await createSHA256();
this.hashType = "sha256";
const zip: ClientZipEntry[] = [];
this.firstResources = await this.loadResourcesBlock();
this.addFile(zip, "pages/pages.jsonl", this.generatePages(), sizeCallback);
this.addFile(
zip,
`archive/${this.warcName}`,
this.generateWARC(filename + `#/archive/${this.warcName}`, true),
sizeCallback,
);
//this.addFile(zip, "archive/text.warc", this.generateTextWARC(filename + "#/archive/text.warc"), false);
// don't use compressed index if we'll have a single block, need to have at least enough for 2 blocks
if (this.firstResources.length < 2 * LINES_PER_BLOCK) {
this.addFile(zip, "indexes/index.cdx", this.generateCDX(), sizeCallback);
} else {
this.addFile(
zip,
"indexes/index.cdx.gz",
this.generateCompressedCDX("index.cdx.gz"),
sizeCallback,
);
this.addFile(zip, "indexes/index.idx", this.generateIDX(), sizeCallback);
}
this.addFile(
zip,
DATAPACKAGE_FILENAME,
this.generateDataPackage(),
sizeCallback,
);
this.addFile(
zip,
DIGEST_FILENAME,
this.generateDataManifest(),
sizeCallback,
);
const headers = {
"Content-Disposition": `attachment; filename="${filename}"`,
"Content-Type": "application/zip",
};
const rs = makeZip(zip);
const response: ResponseWithFilename = new Response(rs, { headers });
response.filename = filename;
return response;
}
async *generateWARC(
filename: string,
digestRecordAndCDX = false,
): AsyncGenerator<Uint8Array> {
try {
let offset = 0;
// if filename provided, add warcinfo
if (filename) {
const warcinfo = await this.createWARCInfo(filename);
yield warcinfo;
offset += warcinfo.length;
}
if (this.markers.WARC_GROUP) {
yield this.markers.WARC_GROUP;
}
for await (const res of this.iterResources(this.firstResources)) {
const resource: DLResourceEntry = res as DLResourceEntry;
resource.offset = offset;
const records = await this.createWARCRecord(resource);
if (!records) {
resource.skipped = true;
continue;
}
// response record
const responseData: { length: number; digest?: string } = { length: 0 };
yield* this.emitRecord(records[0]!, digestRecordAndCDX, responseData);
offset += responseData.length;
resource.length = responseData.length;
if (digestRecordAndCDX && !resource.recordDigest) {
//resource.recordDigest = this.recordDigest(records[0]);
resource.recordDigest = responseData.digest;
}
// request record, if any
if (records.length > 1) {
const requestData = { length: 0 };
yield* this.emitRecord(records[1]!, false, requestData);
offset += requestData.length;
}
if (digestRecordAndCDX) {
this.cdxjLines.push(this.getCDXJ(resource, this.warcName));
}
if (this.markers.WARC_GROUP) {
yield this.markers.WARC_GROUP;
}
}
} catch (e) {
console.warn(e);
}
}
async *emitRecord(
record: WARCRecord,
doDigest: boolean,
output: { length: number; digest?: string },
) {
const opts = { gzip: this.gzip, digest: this.digestOpts };
const s = new WARCSerializer(record, opts);
const chunks = [];
if (doDigest) {
this.recordHasher!.init();
}
for await (const chunk of s) {
if (doDigest) {
this.recordHasher!.update(chunk as Uint8Array);
}
chunks.push(chunk);
output.length += chunk.length;
}
if (doDigest) {
output.digest = this.hashType + ":" + this.recordHasher!.digest("hex");
}
if (
!this.gzip &&
this.markers.WARC_PAYLOAD &&
record.warcType !== "request" &&
(chunks.length === 5 || chunks.length === 4)
) {
if (chunks.length === 5) {
yield chunks[0];
yield chunks[1];
yield chunks[2];
yield this.markers.WARC_PAYLOAD;
if (chunks[3].length) {
yield chunks[3];
yield this.markers.WARC_PAYLOAD;
}
yield chunks[4];
} else {
yield chunks[0];
yield chunks[1];
yield this.markers.WARC_PAYLOAD;
if (chunks[2].length) {
yield chunks[2];
yield this.markers.WARC_PAYLOAD;
}
yield chunks[3];
}
} else {
for (const chunk of chunks) {
yield chunk;
}
}
}
async *generateTextWARC(filename: string) {
try {
let offset = 0;
// if filename provided, add warcinfo
if (filename) {
const warcinfo = await this.createWARCInfo(filename);
yield warcinfo;
offset += warcinfo.length;
}
for (const resource of this.textResources) {
resource.offset = offset;
const chunk = await this.createTextWARCRecord(resource);
yield chunk;
offset += chunk.length;
resource.length = chunk.length;
}
} catch (e) {
console.warn(e);
}
}
getCDXJ(resource: DLResourceEntry, filename: string): string {
const data: CDXJEntry = {
url: resource.url,
digest: resource.digest,
mime: resource.mime!,
offset: resource.offset!,
length: resource.length!,
recordDigest: resource.recordDigest!,
status: resource.status!,
};
if (filename) {
data.filename = filename;
}
if (resource.method && resource.method !== "GET") {
const m = resource.url.match(SPLIT_REQUEST_Q_RX);
if (m) {
data.url = m[1]!;
// resource.requestBody is the raw payload, use the converted one from the url for the cdx
data.requestBody = m[2];
}
data.method = resource.method;
}
return `${getSurt(resource.url)} ${resource.timestamp} ${JSON.stringify(data)}\n`;
}
*generateCDX() {
this.cdxjLines.sort();
yield* this.cdxjLines;
}
*generateCompressedCDX(filename: string) {
let offset = 0;
let chunkDeflater: Deflate | null = null;
let count = 0;
let key = "";
// eslint-disable-next-line @typescript-eslint/no-this-alias
const dl = this;
const finishChunk = () => {
const data = chunkDeflater!.result as Uint8Array;
const length = data.length;
const digest = dl.recordDigest(data);
const idx =
key + " " + JSON.stringify({ offset, length, digest, filename });
dl.indexLines.push(idx);
offset += length;
chunkDeflater = null;
count = 0;
key = "";
return data;
};
for (const cdx of this.generateCDX()) {
if (!chunkDeflater) {
chunkDeflater = new Deflate({ gzip: true });
}
if (!key) {
key = cdx.split(" {", 1)[0] || "";
}
if (++count === LINES_PER_BLOCK) {
chunkDeflater.push(cdx, true);
yield finishChunk();
} else {
chunkDeflater.push(cdx);
}
}
if (chunkDeflater) {
chunkDeflater.push(EMPTY, true);
yield finishChunk();
}
}
async *generateDataManifest() {
const hash = this.datapackageDigest;
const path = DATAPACKAGE_FILENAME;
const data: { path: string; hash: string; signedData?: DataSignature } = {
path,
hash,
};
if (this.signer) {
try {
data.signedData = await this.signer.sign(hash, this.createdDate);
this.signer.close();
this.signer = null;
} catch (e) {
// failed to sign
console.log(e);
}
}
const res = JSON.stringify(data, null, 2);
yield res;
}
async *generateDataPackage() {
const root: DataPackageJSON = {
profile: "data-package",
resources: this.fileStats.map((stats) => {
const path = stats.filename;
return {
name: path.slice(path.lastIndexOf("/") + 1),
path,
hash: this.hashType + ":" + stats.hash,
bytes: stats.size,
};
}),
wacz_version: WACZ_VERSION,
software: this.softwareString,
created: this.createdDate,
};
if (this.metadata.title) {
root.title = this.metadata.title;
}
if (this.metadata.desc) {
root.description = this.metadata.desc;
}
if (this.modifiedDate) {
root.modified = this.modifiedDate;
}
const datapackageText = JSON.stringify(root, null, 2);
this.datapackageDigest = this.recordDigest(datapackageText);
yield datapackageText;
}
async *generatePages() {
const pageIter: ExtPageEntry[] = (
this.pageList
? await this.db.getPages(this.pageList)
: await this.db.getAllPages()
) as ExtPageEntry[];
yield JSON.stringify({
format: "json-pages-1.0",
id: "pages",
title: "All Pages",
hasText: true,
});
for (const page of pageIter) {
const ts = new Date(page.ts).toISOString();
const pageData: DLPageData = {
title: page.title,
url: page.url,
id: page.id,
size: page.size,
ts,
};
if (page.favIconUrl) {
pageData.favIconUrl = page.favIconUrl;
}
if (page.text) {
pageData.text = page.text;
}
yield "\n" + JSON.stringify(pageData);
if (page.text) {
this.textResources.push({
url: page.url,
ts: page.ts,
text: page.text,
pageId: page.id,
digest: "",
});
}
}
}
/*
async getLists() {
try {
const lists = await this.db.getAllCuratedByList();
console.log(lists);
return yaml.safeDump(lists, {skipInvalid: true});
} catch (e) {
console.log(e);
}
}
*/
async *generateIDX() {
yield this.indexLines.join("\n");
}
async createWARCInfo(filename: string) {
const warcVersion = this.warcVersion;
const type = "warcinfo";
const info = {
software: this.softwareString,
format:
warcVersion === "WARC/1.0"
? "WARC File Format 1.0"
: "WARC File Format 1.1",
isPartOf: this.metadata["title"] || this.collId,
};
//info["json-metadata"] = JSON.stringify(metadata);
const warcHeaders = {
"WARC-Record-ID": this.getWARCRecordUUID(JSON.stringify(info)),
};
const date = this.createdDate;
const record = WARCRecord.createWARCInfo(
{ filename, type, date, warcHeaders, warcVersion },
info,
);
const buffer = await WARCSerializer.serialize(record, {
gzip: this.gzip,
digest: this.digestOpts,
});
return buffer;
}
fixupHttpHeaders(headersMap: Record<string, string>, length: number) {
// how many headers are we parsing here
const numHeaders = this.alreadyDecoded ? 3 : 1;
let count = 0;
for (const [name] of Object.entries(headersMap)) {
const lowerName = name.toLowerCase();
switch (lowerName) {
case "content-encoding":
case "transfer-encoding":
if (this.alreadyDecoded) {
headersMap["x-orig-" + name] = headersMap[name]!;
delete headersMap[name];
++count;
}
break;
case "content-length":
headersMap[name] = "" + length;
++count;
break;
}
if (count === numHeaders) {
break;
}
}
}
async createWARCRecord(resource: DLResourceEntry) {
let url = resource.url;
const date = new Date(resource.ts).toISOString();
resource.timestamp = getTSMillis(date);
const httpHeaders = resource.respHeaders || {};
const warcVersion = this.warcVersion;
const pageId = resource.pageId;
let payload: Uint8Array | null | undefined = resource.payload;
let type: "response" | "request" | "resource" | "revisit";
let refersToUrl, refersToDate;
let refersToDigest;
let storeDigest: DigestCache | null = null;
let method = "GET";
let requestBody;
// non-GET request/response:
// if original request body + original requestURL is preserved, write that with original method
// otherwise, just serialize the converted-to-GET form
if (
resource.method &&
resource.method !== "GET" &&
resource.requestBody &&
resource.requestUrl
) {
// ensure payload is an arraybuffer
requestBody =
typeof resource.requestBody === "string"
? encoder.encode(resource.requestBody)
: resource.requestBody;
method = resource.method;
url = resource.requestUrl;
} else {
requestBody = new Uint8Array([]);
}
if (!resource.digest && resource.payload) {
resource.digest = await digestMessage(resource.payload, "sha-256");
}
const digestOriginal = this.digestsVisted[resource.digest];
if (resource.digest && digestOriginal) {
// if exact resource in a row, and same page, then just skip instead of writing revisit
if (
url === this.lastUrl &&
method === "GET" &&
pageId === this.lastPageId
) {
//console.log("Skip Dupe: " + url);
return null;
}
type = "revisit";
resource.mime = "warc/revisit";
payload = EMPTY;
refersToUrl = digestOriginal.url;
refersToDate = digestOriginal.date;
refersToDigest = digestOriginal.payloadDigest || resource.digest;
} else if (resource.origURL && resource.origTS) {
if (!resource.digest || !digestOriginal) {
//console.log("Skip fuzzy resource with no digest");
return null;
}
type = "revisit";
resource.mime = "warc/revisit";
payload = EMPTY;
refersToUrl = resource.origURL;
refersToDate = new Date(resource.origTS).toISOString();
refersToDigest = digestOriginal.payloadDigest || resource.digest;
} else {
type = "response";
if (!payload) {
payload = (await this.db.loadPayload(
resource,
{},
)) as Uint8Array | null;
}
if (!payload) {
//console.log("Skipping No Payload For: " + url, resource);
return null;
}
if (method === "GET") {
storeDigest = { url, date };
this.digestsVisted[resource.digest] = storeDigest;
}
}
const status = resource.status || 200;
const statusText = resource.statusText || getStatusText(status);
const statusline = `HTTP/1.1 ${status} ${statusText}`;
const responseRecordId = this.getWARCRecordUUID(
type + ":" + resource.timestamp + "/" + resource.url,
);
const warcHeaders: Record<string, string> = {
"WARC-Record-ID": responseRecordId,
};
if (pageId) {
warcHeaders["WARC-Page-ID"] = pageId;
}
if (resource.extraOpts && Object.keys(resource.extraOpts).length) {
warcHeaders["WARC-JSON-Metadata"] = JSON.stringify(resource.extraOpts);
}
if (refersToDigest) {
warcHeaders["WARC-Payload-Digest"] = refersToDigest;
}
// remove encoding, set content-length as encoding never preserved in browser-based capture
this.fixupHttpHeaders(httpHeaders, payload.length);
const record = WARCRecord.create(
{
url,
date,
type,
warcVersion,
warcHeaders,
statusline,
httpHeaders,
refersToUrl,
refersToDate,
},
getPayload(payload),
);
//const buffer = await WARCSerializer.serialize(record, {gzip: this.gzip, digest: this.digestOpts});
if (!resource.digest && record.warcPayloadDigest) {
resource.digest = record.warcPayloadDigest;
}
if (storeDigest && record.warcPayloadDigest) {
storeDigest.payloadDigest = record.warcPayloadDigest;
}
this.lastPageId = pageId;
this.lastUrl = url;
const records = [record];
if (resource.reqHeaders) {
const type = "request";
const reqWarcHeaders: Record<string, string> = {
"WARC-Record-ID": this.getWARCRecordUUID(
type + ":" + resource.timestamp + "/" + resource.url,
),
"WARC-Page-ID": pageId,
"WARC-Concurrent-To": responseRecordId,
};
const urlParsed = new URL(url);
const statusline = `${method} ${url.slice(urlParsed.origin.length)} HTTP/1.1`;
const reqRecord = WARCRecord.create(
{
url,
date,
warcVersion,
type,
warcHeaders: reqWarcHeaders,
httpHeaders: resource.reqHeaders,
statusline,
},
getPayload(requestBody),
);
//records.push(await WARCSerializer.serialize(reqRecord, {gzip: this.gzip, digest: this.digestOpts}));
records.push(reqRecord);
}
return records;
}
async createTextWARCRecord(resource: DLResourceEntry) {
const date = new Date(resource.ts).toISOString();
const timestamp = getTSMillis(date);
resource.timestamp = timestamp;
const url = `urn:text:${timestamp}/${resource.url}`;
resource.url = url;
const type = "resource";
const warcHeaders = { "Content-Type": 'text/plain; charset="UTF-8"' };
const warcVersion = this.warcVersion;
const payload = getPayload(encoder.encode(resource.text));
const record = WARCRecord.create(
{ url, date, warcHeaders, warcVersion, type },
payload,
);
const buffer = await WARCSerializer.serialize(record, {
gzip: this.gzip,
digest: this.digestOpts,
});
if (!resource.digest && record.warcPayloadDigest) {
resource.digest = record.warcPayloadDigest;
}
return buffer;
}
}
export { Downloader };