UNPKG

whalesong

Version:

Distributed container registry built on hypercores and IPFS

320 lines (278 loc) 12.1 kB
/* * Distributed (hyperbee(hypercore) + ipfs) based storage. * * Hyperbee is used to store the mutable state, i.e. tags and manifests. It also stores * mappings between docker digests and ipfs digests. * * IPFS is used to store immutable blobs, such as the layers themselves. */ import crypto from 'crypto' import Debug from 'debug' import { getHyperbee, getNewHyperbee, getSettingsHyperbee, getIpfsClient, setup, shutdown } from './distributed-clients.js' import Settings from './settings.js' import UploadStore from './upload-store.js' import { pipeline as pipelineCb, PassThrough } from 'stream' import { pipeline } from 'stream/promises' const debug = Debug('whalesong:storage') // After syncing a new hyperbee, wait for a couple of seconds to allow for an initial sync. // This is to avoid whalesong to return 404 Not Found for a manifest even though it actually exists. // TODO: actually detect if we have synced something, and then stop waiting to avoid unnecessary wait, // or wait even a bit longer if nothing has been synced yet. const INITIAL_SYNC_SETTLE = 10 class DistributedStorage { constructor () { this.ipfs = null this.hyperbees = new Map() this.uploadStore = new UploadStore() } blobStore (org) { // returns the blob sub-database for the hyperbee for a given organization if it exists, otherwise nullish const bee = this.hyperbees.get(org) debug(`returning blob store for org ${org}, got bee ${bee}}`) return bee?.sub('blob') } manifestStore (org, name) { // returns the manifest sub-database for the hyperbee for a given organization if it exists, otherwise nullish debug(`returning manifest store for org ${org}`) const bee = this.hyperbees.get(org) return bee?.sub('manifest').sub(name) } async getBlob (org, name, dockerDigest) { const blobs = this.blobStore(org) // TODO: should we make an initial sync here too just like for manifests if org. does not exist? if (blobs) { debug(`getting blob ${dockerDigest}`) const blobObjectNode = await blobs.get(dockerDigest) const blobObject = blobObjectNode?.value debug(`blob lookup resulted in ${JSON.stringify(blobObject)}`) if (!blobObject) { return { stream: null, size: null, contentType: null } } const { contentType, size } = blobObject const ipfsCid = blobObject.locations[0].ref // currently, support only ipfs cid and a single location for every blob debug(`blob lookup yielded ipfs cid ${ipfsCid}`) if (!ipfsCid) { return { stream: null, size: null, contentType: null } } debug(`retrieving blob stream from IPFS with CID ${ipfsCid}, based on digest ${dockerDigest}. size ${size}, content type: ${contentType}`) // Now get the blob from ipfs. const data = this.ipfs.cat(ipfsCid) // Pin it so we keep it in storage. // TODO: sometime in the future (tm) we should unpin them so storage doesn't grow indefinitely. debug('pinning ipfs cid %s', ipfsCid) await this.ipfs.pin.add(ipfsCid) // skip sanity check here, we trust that we have stored the correct hash in ipfs. const pass = new PassThrough() const hash = crypto.createHash('sha256') pipelineCb( data, async function * (source) { for await (const chunk of source) { hash.update(chunk) yield chunk } }, pass, (err) => { if (err) { console.error('Error in get blob stream pipeline:', err) return } // Trust but verify: we will return the digest we got (manifestDigest) as the digest of // the data, but will verify the digest in the background and log any mismatch. const actual = `sha256:${hash.digest('hex')}` if (actual !== dockerDigest) { console.error(`Mismatching digest when fetching from IPFS, expected ${dockerDigest}, actual ${actual}`) } } ) return { stream: pass, size, contentType } } return { stream: null, size: null, contentType: null } } async hasBlob (org, name, digest) { const blobs = this.blobStore(org) debug(`trying to check if blob ${digest} exists`) const blobObjectNode = await blobs?.get(digest) debug(`blob object node is ${JSON.stringify(blobObjectNode)}`) if (blobObjectNode) { const { contentType, size } = blobObjectNode?.value return { contentType, size } } return { contentType: null, size: null } } async _putBlob (org, name, stream, contentType) { // hook into the stream to calculate the digest as data flows to ipfs. const hash = crypto.createHash('sha256') const pass = new PassThrough() let streamSize = 0 const pipe = pipeline( stream, async function * (source) { for await (const chunk of source) { hash.update(chunk) streamSize += chunk.byteLength yield chunk } }, pass ) // Add to IPFS and get CID. const [, ipfsAdded] = await Promise.all([pipe, this.ipfs.add(pass)]) const ipfsCid = ipfsAdded.cid.toString() const digest = `sha256:${hash.digest('hex')}` debug(`stored blob in IPFS with CID ${ipfsCid} and digest ${digest}`) // Store the mapping between digest and cid in hyperdrive for that org. const blobs = this.blobStore(org) if (blobs) { const blobObject = { size: streamSize, contentType, locations: [{ type: 'ipfs', ref: ipfsCid }] } await blobs.put(digest, blobObject) } else { throw new Error('tried to put blob into non-existing (thus not writable) org') } return digest } async getManifest (org, name, tagOrDigest) { let manifests = this.manifestStore(org, name) if (!manifests) { // try to get new manifest store for this pubkey. // TODO: how is this affected if pubkey doesn't exist? sync is slow? etc. debug(`we did not have hyperbee for org ${org}, so we try to sync it now.`) const { key, bee } = await getHyperbee(org) const initialSyncSettle = INITIAL_SYNC_SETTLE debug('storing reference to hyperbee for key %s. Allowing %d seconds for initial sync.', key, initialSyncSettle) this.hyperbees.set(key, bee) await this.settings.addSubscribedFeed(org) manifests = this.manifestStore(org, name) await new Promise(resolve => setTimeout(resolve, initialSyncSettle * 1000)) debug('Finished waiting for initial sync of %s.', key) } // Check if we got a tag or a digest. let manifestDigest = tagOrDigest if (!tagOrDigest.startsWith('sha256:')) { // Find by tag. Perform following lookup org[tag] -> { digest: dockerdigest } -> org[dockerdigest] -> ipfs cid debug('looking up manifest by tag for tag %s', tagOrDigest) const manifestDigestNode = await manifests.get(tagOrDigest) manifestDigest = manifestDigestNode?.value?.digest debug(`got manifest for ${org}/${name}:${tagOrDigest} returned digest ${manifestDigest}`) } debug(`trying to get manifestDigest ${manifestDigest}`) if (manifestDigest) { // We now know the digest to use. Grab it! const { stream, size, contentType } = await this.getBlob(org, name, manifestDigest) debug(`got blob with size ${size}`) if (stream != null) { return { digest: manifestDigest, stream, size, contentType } } } return { digest: null, stream: null, size: null, contentType: null } } async hasManifest (org, name, tagOrDigest) { let manifests = this.manifestStore(org, name) if (!manifests) { // TODO: should we even do this for a HEAD? I think yes but i'm not sure. if yes, break out to separate function. // try to get new manifest store for this pubkey. // TODO: how is this affected if pubkey doesn't exist? sync is slow? etc. debug(`we did not have hyperbee for org ${org}, so we try to sync it now.`) const { key, bee } = await getHyperbee(org) const initialSyncSettle = INITIAL_SYNC_SETTLE debug('storing reference to hyperbee for key %s. Allowing %d seconds for initial sync.', key, initialSyncSettle) this.hyperbees.set(key, bee) await this.settings.addSubscribedFeed(org) manifests = this.manifestStore(org, name) await new Promise(resolve => setTimeout(resolve, initialSyncSettle * 1000)) debug('Finished waiting for initial sync of %s.', key) } // Check if we got a tag or a digest. let manifestDigest = tagOrDigest if (!tagOrDigest.startsWith('sha256:')) { // Find by tag. Perform following lookup org[tag] -> { digest: dockerdigest } -> org[dockerdigest] -> ipfs cid debug('looking up manifest by tag for tag %s', tagOrDigest) const manifestDigestNode = await manifests.get(tagOrDigest) manifestDigest = manifestDigestNode?.value?.digest debug(`got manifest for ${org}/${name}:${tagOrDigest} returned digest ${manifestDigest}`) } if (manifestDigest) { // We now know the digest to use. Grab it! const { size, contentType } = await this.hasBlob(org, name, manifestDigest) if (size != null) { return { digest: manifestDigest, size, contentType } } } return { digest: null, size: null, contentType: null } } async putManifest (org, name, tag, stream, contentType) { // Store the manifest as a blob itself inside ipfs, the KV mapping is // tag-name -> { digest: dockerdigest } (i.e., not ipfs cid as for blobs) const digest = await this._putBlob(org, name, stream, contentType) const manifests = this.manifestStore(org, name) if (manifests) { await manifests.put(tag, { digest }) debug(`stored manifest for ${org}/${name}:${tag} as digest ${digest} with contentType ${contentType}`) } else { throw new Error('tried to put manifest into non-existing (thus not writable) org') } return digest } async newUpload (org, name) { return this.uploadStore.newUpload() } async patchUpload (org, name, uuid, stream) { return this.uploadStore.appendUpload(uuid, stream) } async putUpload (org, name, uuid, inStream) { const uploaded = await this.uploadStore.appendUpload(uuid, inStream) const stream = await this.uploadStore.getUpload(uuid) const digest = await this._putBlob(org, name, stream, null) // TODO: content-type for regular blobs too? debug(`Concluding upload ${uuid}`) this.uploadStore.finishUpload(uuid) return { digest, uploaded } } async init () { await setup() this.ipfs = getIpfsClient() // get our own settings bee. const { bee: settingsBee } = await getSettingsHyperbee() this.settings = new Settings(settingsBee) // get a list of previously subscribed feeds, and start listening for them too. const feeds = await this.settings.getSubscribedFeeds() for (const feed of feeds) { if (!this.hyperbees.has(feed)) { debug(`loading prev subscribed feed ${feed}.`) } const { key, bee } = await getHyperbee(feed) this.hyperbees.set(key, bee) } console.log(`Loaded ${feeds.length} previously subscribed feeds.`) // check if the list of subscribed feeds contains any writeable feed, if not, create a new one. if (this.getMyPubKey() === null) { // now initalize a hyperbee (our own, writable) const { key, bee } = await getNewHyperbee() this.hyperbees.set(key, bee) await this.settings.addSubscribedFeed(key) } // init upload store await this.uploadStore.init() } async shutdown () { console.debug('Shutting down storage') await shutdown() console.debug('Shut down complete') } getMyPubKey () { // Returns the first hyperbee that is writable, or null. for (const [key, bee] of this.hyperbees) { if (bee.feed.writable) { return key } } return null } } export default DistributedStorage