@lsdsoftware/s3-log-store
Version:
Efficiently store and retrieve log-structured data (append only) in S3
109 lines (100 loc) • 3.85 kB
text/typescript
import * as s3 from "@aws-sdk/client-s3"
import { Fetch } from "multilayer-async-cache-builder"
import path from "path"
import * as rxjs from "rxjs"
import util from "util"
import zlib from "zlib"
import { makeBackupTask } from "./backup-task.js"
import { makeCheckpointFile } from "./checkpoint-file.js"
import { makeDeleteInactiveTask } from "./delete-inactive-task.js"
import { AccessTracker, makeRetrievalCache } from "./retrieval-cache.js"
import { makeS3Store } from "./s3-store.js"
import { makeWorkDir } from "./work-dir.js"
export function makeLogStore<T>({
workDirConfig,
s3StoreConfig,
retrievalCacheConfig,
}: {
workDirConfig: {
dirPath: string
syncInterval: number
chunkSize: number
inactiveTtlDays: number
}
s3StoreConfig: {
clientConfig: s3.S3ClientConfig
bucket: string
folder: string
}
retrievalCacheConfig: {
cacheFolder: string
cleanupInterval: number
makeAccessTracker(): AccessTracker
}
}) {
const workDir = makeWorkDir(workDirConfig.dirPath)
const checkpointFile = makeCheckpointFile(path.join(workDirConfig.dirPath, 'checkpoint'))
const s3Store = makeS3Store(s3StoreConfig)
const deleteInactiveTask = makeDeleteInactiveTask({ workDir, inactiveTtlDays: workDirConfig.inactiveTtlDays })
const backupTask = makeBackupTask({ workDir, checkpointFile, s3Store, chunkSize: workDirConfig.chunkSize })
const retrievalCache = makeRetrievalCache(retrievalCacheConfig)
const getChunk = new Fetch(async (key: { fileName: string, seqNum: number, hashKey: string }) =>
s3Store.getFile(key.fileName, key.seqNum)
.then(util.promisify(zlib.gunzip))
.then(bytes => bytes.toString())
)
.cache(retrievalCache)
.dedupe()
const subscriberMap = new Map<string, Set<rxjs.Subscriber<T>>>()
return {
syncJob$: rxjs.timer(0, workDirConfig.syncInterval).pipe(
rxjs.exhaustMap(async () => {
const deleteInactiveStatus = await deleteInactiveTask.run()
const backupStatus = await backupTask.run()
return { deleteInactiveStatus, backupStatus }
})
),
retrievalCacheCleanupJob$: retrievalCache.cleanupJob$,
async append(fileName: string, entry: T) {
const workFile = workDir.makeWorkFile(fileName)
await workFile.append(JSON.stringify(entry) + ',\n')
subscriberMap.get(fileName)?.forEach(x => x.next(entry))
},
async retrieve(fileName: string, offset: number, limit: number) {
const workFile = workDir.makeWorkFile(fileName)
const { header, payload } = await workFile.read()
.catch(err => {
if (err.code == 'ENOENT') return { header: undefined, payload: '' }
else throw err
})
let seqNum = header?.seqNum
let entries = parseChunk(payload)
while (entries.length < offset + limit) {
if (seqNum == undefined) {
seqNum = await s3Store.getMaxSeqNum(fileName) + 1
await workFile.write(seqNum, payload)
}
if (seqNum <= 1) break
seqNum--
const chunk = await getChunk({ fileName, seqNum, hashKey: `${fileName}-${seqNum}` })
const earlierEntries = parseChunk(chunk)
entries = earlierEntries.concat(entries)
}
return entries.slice(-(offset + limit), -offset || undefined).reverse()
},
subscribe(fileName: string) {
return new rxjs.Observable<T>(subscriber => {
let subscribers = subscriberMap.get(fileName)
if (!subscribers) subscriberMap.set(fileName, subscribers = new Set())
subscribers.add(subscriber)
return () => {
subscribers.delete(subscriber)
if (subscribers.size == 0) subscriberMap.delete(fileName)
}
})
}
}
function parseChunk(payload: string): T[] {
return JSON.parse('[' + payload.replace(/,\n$/, '') + ']')
}
}