@lsdsoftware/s3-log-store
Version:
Efficiently store and retrieve log-structured data (append only) in S3
62 lines (59 loc) • 2.49 kB
text/typescript
import { promisify } from "util"
import { gzip } from "zlib"
import { makeCheckpointFile } from "./checkpoint-file.js"
import { makeS3Store } from "./s3-store.js"
import { makeWorkDir } from "./work-dir.js"
export function makeBackupTask({ workDir, checkpointFile, s3Store, chunkSize }: {
workDir: ReturnType<typeof makeWorkDir>
checkpointFile: ReturnType<typeof makeCheckpointFile>
s3Store: ReturnType<typeof makeS3Store>
chunkSize: number
}) {
return {
async run() {
const checkpointTime = await checkpointFile.mtime()
if (checkpointTime) {
const fileNames = await workDir.listChanges(checkpointFile.filePath)
await checkpointFile.touch()
const filesBackedUp = []
for (const fileName of fileNames) {
filesBackedUp.push(await backup(fileName))
}
// Note that if any file is rewritten during backup (because exceed chunkSize or missing header),
// its mtime will be after the checkpoint time, i.e. it will be processed again in the next
// backup cycle (even if it hasn't changed). This is a tolerated redundancy.
return { lastCheckpoint: checkpointTime, filesBackedUp }
} else {
await workDir.ensureEmpty()
await checkpointFile.create()
return { lastCheckpoint: undefined }
}
}
}
async function backup(fileName: string) {
const workFile = workDir.makeWorkFile(fileName)
const { header, payload } = await workFile.read()
if (payload.length) {
const payloadGzipped = await promisify(gzip)(payload)
if (header) {
await s3Store.putFile(fileName, header.seqNum, payloadGzipped)
if (payloadGzipped.length >= chunkSize) {
await workFile.write(header.seqNum + 1)
}
return { fileName, hasHeader: true, seqNum: header.seqNum, size: payload.length, gzipSize: payloadGzipped.length }
} else {
const seqNum = (await s3Store.getMaxSeqNum(fileName)) + 1
await s3Store.putFile(fileName, seqNum, payloadGzipped)
if (payloadGzipped.length >= chunkSize) {
await workFile.write(seqNum + 1)
} else {
//rewrite file with header info
await workFile.write(seqNum, payload)
}
return { fileName, hasHeader: false, seqNum, size: payload.length, gzipSize: payloadGzipped.length }
}
} else {
return { fileName, hasHeader: header != undefined, seqNum: header?.seqNum, size: 0, gzipSize: 0 }
}
}
}