@telios/nebula
Version:
Real-time distributed file and data storage.
996 lines (824 loc) • 27.8 kB
JavaScript
const fs = require('fs')
const EventEmitter = require('events')
const getDirName = require('path').dirname
const path = require('path')
const Database = require('./lib/database')
const Hyperbee = require('hyperbee')
const Hypercore = require('hypercore')
const pump = require('pump')
const Crypto = require('./lib/crypto')
const Swarm = require('./lib/swarm')
const stream = require('stream')
const blake = require('blakejs')
const Hyperswarm = require('hyperswarm')
const DHT = require('@hyperswarm/dht')
const MemoryStream = require('memorystream')
const { v4: uuidv4 } = require('uuid')
const FixedChunker = require('./util/fixedChunker.js')
const RequestChunker = require('./util/requestChunker.js')
const WorkerKeyPairs = require('./util/workerKeyPairs.js')
const isOnline = require('is-online')
const FileDB = require('./util/filedb.util')
const BSON = require('bson')
const HASH_OUTPUT_LENGTH = 32 // bytes
const MAX_PLAINTEXT_BLOCK_SIZE = 65536
const MAX_ENCRYPTED_BLOCK_SIZE = 65553
const FILE_TIMEOUT = 2000 // How long to wait for the on data event when downloading a file from a remote drive.
const FILE_RETRY_ATTEMPTS = 3 // Fail to fetch file after n attempts
const FILE_BATCH_SIZE = 10 // How many parallel requests are made in each file request batch
class Drive extends EventEmitter {
constructor(
drivePath,
peerPubKey, // Key used to clone and seed drive. Should only be shared with trusted sources
{
storage,
keyPair, // ed25519 keypair to listen on
writable,
swarmOpts,
encryptionKey,
fileTimeout,
fileRetryAttempts,
checkNetworkStatus,
joinSwarm,
fullTextSearch, // Initialize a corestore to support full text search indexes.
blind, // Set to true if blind mirroring another drive (you don't have the encryption key)
storageMaxBytes, // Max size this drive will store in bytes before turning off replication/file syncing
syncFiles = true,
includeFiles,
broadcast = true // Tell the other peer drives about this drive
}
) {
super()
this.storage = storage
this.encryptionKey = encryptionKey
this.database = null
this.db = null;
this.drivePath = drivePath
this.swarmOpts = swarmOpts
this.publicKey = null
this.peerPubKey = peerPubKey
this.peerWriterKey = null
this.keyPair = keyPair ? keyPair : DHT.keyPair()
this.writable = writable
this.fullTextSearch = fullTextSearch
this.fileTimeout = fileTimeout || FILE_TIMEOUT
this.fileRetryAttempts = fileRetryAttempts-1 || FILE_RETRY_ATTEMPTS-1
this.requestQueue = new RequestChunker(null, FILE_BATCH_SIZE)
this.checkNetworkStatus = checkNetworkStatus
this.joinSwarm = typeof joinSwarm === 'boolean' ? joinSwarm : true
this.peers = new Set()
this.network = {
internet: false,
drive: false
}
this.blind = blind ? blind : false
this.storageMaxBytes = storageMaxBytes || false
this.syncFiles = syncFiles
this.includeFiles = includeFiles
this.opened = false
this.broadcast = broadcast
// When using custom storage, transform drive path into beginning of the storage namespace
this.storageName = drivePath.slice(drivePath.lastIndexOf('/') + 1, drivePath.length)
this._localCore = null
this._localHB = null // Optional datastore for storing encrypted data locally. This will not sync with peers or be replicated.
this._swarm = null
this._workerKeyPairs = new WorkerKeyPairs(FILE_BATCH_SIZE)
this._collections = {}
this._filesDir = path.join(drivePath, `./Files`)
this._localDB = null // Local Key value datastore only
this._lastSeq = null
this._checkInternetInt = null
this._checkInternetInProgress = false
this._fileStatPath = this.drivePath + '/Files/file_stat.txt'
this._stat = {
file_bytes: 0,
core_bytes: 0,
total_bytes: 0
}
this._dbVersion = '2.0'
this.indexCreated = false
if (!fs.existsSync(drivePath)) {
fs.mkdirSync(drivePath)
}
if (!fs.existsSync(this._filesDir)) {
fs.mkdirSync(this._filesDir)
}
this.requestQueue.on('process-queue', async files => {
this.requestQueue.reset()
await this.fetchFileBatch(files, (stream, file) => {
return new Promise((resolve, reject) => {
fs.mkdirSync(getDirName(this._filesDir + file.path), { recursive: true })
const writeStream = fs.createWriteStream(this._filesDir + file.path)
pump(stream, writeStream, (err) => {
if (err) reject(err)
setTimeout(async () => {
if(this.opened) {
this.emit('file-sync', file)
const filePath = file.encrypted ? `/${file.uuid}` : file.path
await this._localHB.put(filePath, {})
}
})
resolve()
})
})
})
})
// Periodically check this drive's connection to the internet.
// When the internet is down, emit a network status updated event.
if(this.checkNetworkStatus) {
this._checkInternetInt = setInterval(async () => {
if(!this._checkInternetInProgress) {
this._checkInternetInProgress = true
await this._checkInternet();
this._checkInternetInProgress = false
}
}, 1500)
}
}
async ready() {
const uncaughtCount = process.listenerCount('uncaughtException')
if(uncaughtCount === 0) {
process.on('uncaughtException', (err) => {
if(
err.message.indexOf('PEER_NOT_FOUND') === -1 &&
err.message.indexOf('PeerDiscovery') === -1 &&
err.message.indexOf('connection reset') === -1 &&
err.message.indexOf('Linearization') === -1
) {
// uncaught error
}
})
}
await this._bootstrap()
const stat = this._localDB.get('stat')
if(!stat) {
await this.database._updateStatBytes(0)
this._localDB.put('stat', { ...this._stat })
} else {
this._stat = stat
}
this.publicKey = this.database.localMetaCore.key.toString('hex')
if(!this.blind) {
this.peerWriterKey = this.database.localInput.key.toString('hex')
}
if (this.peerPubKey) {
this.discoveryKey = createTopicHash(this.peerPubKey).toString('hex')
} else {
this.discoveryKey = createTopicHash(this.publicKey).toString('hex')
}
if (this.keyPair && this.joinSwarm) {
await this.connect()
}
// Data here can only be read by peer drives
// that are sharing the same drive secret
if(!this.blind) {
this._collections.files = await this.database.collection('file')
// This drastically speeds up queries and is necessary for sorting by fields
this._collections.files.createIndex(['path'])
}
this.database.on('collection-update', async data => {
if(!data) {
this.emit('collection-update')
}
if(
data.value.author === this.keyPair.publicKey.toString('hex') ||
data.value.peerPubKey === this.keyPair.publicKey.toString('hex') ||
data.value.peer === this.keyPair.publicKey.toString('hex')
) return
if(data && data.collection === 'metadb' && !data.value.cores) {
await this._update(data)
} else {
if(data.collection !== 'metadb') {
// If this drive can't decipher the data inside the remote hypercore's then just listen for when those cores are updated.
this.emit('collection-update', !this.blind ? data : null)
}
}
})
this.opened = true
}
// Connect to the Hyperswarm network
async connect() {
if (this._swarm) {
await this._swarm.close()
}
this._swarm = new Swarm({
keyPair: this.keyPair,
blind: this.blind,
workerKeyPairs: this._workerKeyPairs.keyPairs,
topic: this.discoveryKey,
publicKey: this.peerPubKey || this.publicKey,
isServer: this.swarmOpts.server,
isClient: this.swarmOpts.client,
acl: this.swarmOpts.acl,
})
this._swarm.on('peer-connected', async socket => {
if(this.broadcast) {
socket.write(JSON.stringify({
type: 'sync',
meta: {
__version: this._dbVersion,
drivePubKey: this.peerPubKey || this.publicKey,
peerPubKey: this.keyPair.publicKey.toString('hex'),
blind: this.blind,
writer: this.peerWriterKey,
meta: this.publicKey
}
}))
}
})
if(this.checkNetworkStatus) {
this._swarm.on('disconnected', () => {
if(this.network.drive) {
this.network.drive = false
this.emit('network-updated', { drive: this.network.drive })
}
})
this._swarm.on('connected', () => {
if(!this.network.drive) {
this.network.drive = true
this.emit('network-updated', { drive: this.network.drive })
}
})
}
this._swarm.on('message', async (peerPubKey, data) => {
const drivePubKey = this.peerPubKey || this.publicKey
try {
const msg = JSON.parse(data.toString())
if(msg && msg.type === 'sync') {
this.emit('newMessage', msg)
if(msg.meta.drivePubKey === drivePubKey) {
await this.addPeer(msg.meta)
} else {
// ACCESS DENIED
}
}
this.emit('message', peerPubKey, data)
} catch(err) {
console.log(err)
}
})
this._swarm.on('file-requested', socket => {
socket.once('data', async data => {
const fileHash = data.toString('utf-8')
let file
try {
file = await this.metadb.findOne({ hash: fileHash })
} catch(err) {
// Not Found
}
if (!file) {
let err = new Error()
err.message = 'Requested file was not found on drive'
socket.destroy(err)
} else {
const readStream = fs.createReadStream(path.join(this.drivePath, `./Files${file.path}`))
pump(readStream, socket, (err) => {
// handle done
})
}
})
socket.on('error', (err) => {
// handle errors
})
})
await this._swarm.ready()
}
async addPeer(peer) {
try {
const doc = await this.database.metadb.findOne({ peerPubKey: peer.peerPubKey })
} catch(err) {
if(peer.__version === this._dbVersion) {
await this.database.metadb.insert({
__version: this._dbVersion,
blacklisted: false,
peerPubKey: peer.peerPubKey,
blind: peer.blind,
cores: {
writer: peer.writer,
meta: peer.meta
}
})
await this.database.addRemotePeer(peer)
}
}
}
// Remove Peer
async removePeer(peer) {
await this.database.removeRemotePeer({
peerPubKey: peer.publicKey,
blind: peer.blind,
writer: peer.writer,
meta: peer.meta
})
await this.database.metadb.update({ peerPubKey: peer.publicKey }, { blacklisted: true })
}
async writeFile(path, readStream, opts = {}) {
let filePath = path
let dest
const uuid = uuidv4()
if (filePath[0] === '/') {
filePath = filePath.slice(1, filePath.length)
}
if (opts.encrypted) {
dest = `${this._filesDir}/${uuid}`
} else {
fs.mkdirSync(getDirName(this._filesDir + path), { recursive: true })
dest = this._filesDir + path
}
return new Promise(async (resolve, reject) => {
const pathSeg = filePath.split('/')
let fullFile = pathSeg[pathSeg.length - 1]
let fileName
let fileExt
if (fullFile.indexOf('.') > -1) {
fileName = fullFile.split('.')[0]
fileExt = fullFile.split('.')[1]
}
const writeStream = fs.createWriteStream(dest)
if (opts.encrypted && !opts.skipEncryption) {
const fixedChunker = new FixedChunker(readStream, MAX_PLAINTEXT_BLOCK_SIZE)
const { key, header, file } = await Crypto.encryptStream(fixedChunker, writeStream)
await this.database._updateStatBytes(file.size)
await this.metadb.update(
{
path: `/${uuid}`
},
{
uuid,
size: file.size,
hash: file.hash,
path: `/${uuid}`,
peer: this.keyPair.publicKey.toString('hex'),
discovery_key: this.discoveryKey,
custom_data: opts.customData
},
{
upsert: true
}
)
const fileMeta = {
uuid,
name: fileName,
size: file.size,
mimetype: fileExt,
encrypted: true,
key: key.toString('hex'),
header: header.toString('hex'),
hash: file.hash,
path: filePath,
peer: this.keyPair.publicKey.toString('hex'),
discovery_key: this.discoveryKey,
custom_data: opts.customData
}
await this._collections.files.update(
{
path: filePath
},
{
...fileMeta,
updatedAt: new Date().toISOString()
},
{
upsert: true
}
)
this.emit('file-add', fileMeta)
resolve({
key: key.toString('hex'),
header: header.toString('hex'),
...fileMeta
})
} else {
let bytes = 0
const hash = blake.blake2bInit(HASH_OUTPUT_LENGTH, null)
const calcHash = new stream.Transform({
transform
})
function transform(chunk, encoding, callback) {
bytes += chunk.byteLength
blake.blake2bUpdate(hash, chunk)
callback(null, chunk)
}
pump(readStream, calcHash, writeStream, async () => {
setTimeout(async () => {
const _hash = Buffer.from(blake.blake2bFinal(hash)).toString('hex')
if (bytes > 0) {
await this.database._updateStatBytes(bytes)
await this.metadb.update(
{
path
},
{
uuid,
size: bytes,
hash: _hash,
path,
peer: this.keyPair.publicKey.toString('hex'),
discovery_key: this.discoveryKey,
custom_data: opts.customData
},
{
upsert: true
}
)
const fileMeta = {
uuid,
name: fileName,
size: bytes,
mimetype: fileExt,
hash: _hash,
path: filePath,
peer: this.keyPair.publicKey.toString('hex'),
discovery_key: this.discoveryKey,
custom_data: opts.customData
}
await this._collections.files.update(
{
path: filePath
},
{
...fileMeta,
updatedAt: new Date().toISOString()
},
{
upsert: true
}
)
this.emit('file-add', fileMeta)
resolve(fileMeta)
} else {
reject('No bytes were written.')
}
})
})
}
})
}
async readFile(path) {
let file
let filePath = path
if (filePath[0] === '/') {
filePath = filePath.slice(1, filePath.length)
}
try {
file = await this._collections.files.findOne({ path: filePath })
if(!fs.existsSync(`${this._filesDir}/${file.uuid}`)) {
throw new Error('File does not exist.')
}
const stream = fs.createReadStream(`${this._filesDir}/${file.uuid}`)
// If key then decipher file
if (file.encrypted && file.key && file.header) {
const fixedChunker = new FixedChunker(stream, MAX_ENCRYPTED_BLOCK_SIZE)
return Crypto.decryptStream(fixedChunker, file.key, file.header)
} else {
return stream
}
} catch (err) {
throw err
}
}
decryptFileStream(stream, key, header) {
const fixedChunker = new FixedChunker(stream, MAX_ENCRYPTED_BLOCK_SIZE)
return Crypto.decryptStream(fixedChunker, key, header)
}
// TODO: Implement this
fetchFileByHash(fileHash) {
}
async fetchFileByDriveHash(discoveryKey, fileHash, opts = {}) {
const keyPair = opts.keyPair || this.keyPair
const memStream = new MemoryStream()
const topic = blake.blake2bHex(discoveryKey, null, HASH_OUTPUT_LENGTH)
if (!fileHash || typeof fileHash !== 'string') {
return reject('File hash is required before making a request.')
}
if (!discoveryKey || typeof discoveryKey !== 'string') {
return reject('Discovery key cannot be null and must be a string.')
}
try {
await this._initFileSwarm(memStream, topic, fileHash, 0, { keyPair })
} catch(e) {
setTimeout(() => {
memStream.destroy(e)
})
return memStream
}
if (opts.key && opts.header) {
return this.decryptFileStream(memStream, opts.key, opts.header)
}
return memStream
}
async fetchFileBatch(files, cb) {
const batches = new RequestChunker(files, FILE_BATCH_SIZE)
for (let batch of batches) {
const requests = []
for (let file of batch) {
if(typeof file.size === 'number') await this.database._updateStatBytes(file.size)
const stat = this._localDB.get('stat')
if(stat.total_bytes <= this.storageMaxBytes || !this.storageMaxBytes) {
requests.push(new Promise(async (resolve, reject) => {
if (file.discovery_key) {
try {
const keyPair = this._workerKeyPairs.getKeyPair()
const stream = await this.fetchFileByDriveHash(file.discovery_key, file.hash, { key: file.key, header: file.header, keyPair })
await cb(stream, file)
return resolve()
} catch(err) {
return reject(err)
}
} else {
// TODO: Fetch files by hash
return reject()
}
}))
}
}
try {
await Promise.all(requests)
} catch(err) {
// Could not download some files. Will try again.
}
this.requestQueue.queue = []
}
}
async _initFileSwarm(stream, topic, fileHash, attempts, { keyPair }) {
return new Promise(async (resolve, reject) => {
if(!this.opened) throw ('Drive is closed.')
if (attempts > this.fileRetryAttempts) {
const err = new Error('Unable to make a connection or receive data within the allotted time.')
err.fileHash = fileHash
this._workerKeyPairs.release(keyPair.publicKey.toString('hex'))
stream.destroy(err)
return reject(err)
}
const swarm = new Hyperswarm({ keyPair })
let connected = false
let receivedData = false
let streamError = false
swarm.join(Buffer.from(topic, 'hex'), { server: false, client: true })
swarm.on('connection', async (socket, info) => {
receivedData = false
if (!connected) {
connected = true
// Tell the host drive which file we want
socket.write(fileHash)
socket.on('data', (data) => {
resolve()
stream.write(data)
receivedData = true
})
socket.once('end', () => {
if (receivedData) {
this._workerKeyPairs.release(keyPair.publicKey.toString('hex'))
stream.end()
swarm.destroy()
}
})
socket.once('error', (err) => {
stream.destroy(err)
streamError = true
reject(err)
})
}
})
setTimeout(async () => {
if (!connected || streamError || !receivedData) {
attempts += 1
await swarm.destroy()
try {
await this._initFileSwarm(stream, topic, fileHash, attempts, { keyPair })
resolve()
} catch(e) {
reject(e)
}
}
}, this.fileTimeout)
})
}
async _checkInternet() {
return new Promise((resolve, reject) => {
isOnline().then((isOnline) => {
if(!isOnline && this.network.internet) {
this.network.internet = false
this.emit('network-updated', { internet: this.network.internet })
}
if(isOnline && !this.network.internet) {
this.network.internet = true
this.emit('network-updated', { internet: this.network.internet })
}
resolve()
})
})
}
async unlink(filePath) {
let fp = filePath
if (fp[0] === '/') {
fp = filePath.slice(1, fp.length)
}
try {
const file = await this._collections.files.findOne({ path: fp })
if (!file) return
fs.unlinkSync(path.join(this._filesDir, file.encrypted ? `/${file.uuid}` : file.path))
await this._collections.files.delete({ _id: file._id })
await this.database._updateStatBytes(-Math.abs(file.size))
await this.metadb.delete({ hash: file.hash })
this.emit('file-unlink', file)
} catch (err) {
throw err
}
}
async destroyHyperfile(path) {
const filePath = await this.bee.get(path)
const file = await this.bee.get(filePath.value.hash)
await this._clearStorage(file.value)
}
async _bootstrap() {
this._localCore = new Hypercore(path.join(this.drivePath, `./LocalCore`))
await this._localCore.ready()
this._localHB = new Hyperbee(this._localCore, { keyEncoding: 'utf8', valueEncoding: 'json' })
this._localDB = new FileDB(`${this.drivePath}/LocalDS`)
this.database = new Database(this.storage || this.drivePath, {
localDB: this._localDB,
keyPair: this.keyPair,
storageName: this.storageName,
encryptionKey: this.encryptionKey,
peerPubKey: this.peerPubKey,
acl: this.swarmOpts && this.swarmOpts.acl ? this.swarmOpts.acl : null,
joinSwarm: this.joinSwarm,
fts: this.fullTextSearch,
blind: this.blind,
stat: this._stat,
storageMaxBytes: this.storageMaxBytes,
fileStatPath: this._fileStatPath,
broadcast: this.broadcast,
dbVersion: this._dbVersion
})
this.database.on('disconnected', () => {
if(this.network.drive) {
this.network.drive = false
this.emit('network-updated', { drive: this.network.drive })
}
})
this.database.on('connected', () => {
if(!this.network.drive) {
this.network.drive = true
this.emit('network-updated', { drive: this.network.drive })
}
})
if(this.checkNetworkStatus) {
this.database.on('disconnected', () => {
if(this.network.drive) {
this.network.drive = false
this.emit('network-updated', { drive: this.network.drive })
}
})
}
this.database.on('remote-cores-downloaded', () => {
this.emit('remote-cores-downloaded')
})
this.database.on('peer-connected', (peer) => {
if(!this.peers.has(peer.peerPubKey)) {
this.emit('peer-connected', peer)
this.peers.add(peer.peerPubKey)
}
})
this.database.on('peer-disconnected', (peer) => {
if(this.peers.has(peer.peerPubKey)) {
this.emit('peer-disconnected', peer)
this.peers.delete(peer.peerPubKey)
}
})
await this.database.ready()
this.db = this.database
this.metadb = this.database.metadb
}
async _update(data) {
this.emit('sync', data)
const fileHash = await this._localHB.get(data.value.path)
if (
data.type !== 'del' &&
data.value.peer !== this.keyPair.publicKey.toString('hex') &&
!fileHash
) {
if (this.syncFiles && !this.includeFiles && data.value.hash || this.includeFiles && this.includeFiles.indexOf(data.value.path) > -1 && data.value.hash) {
try {
const stat = this._localDB.get('stat')
if(stat.total_bytes <= this.storageMaxBytes || !this.storageMaxBytes) {
this.requestQueue.addFile(data.value)
}
} catch (err) {
throw err
}
} else {
if(data.value.path) {
const filePath = data.value.encrypted ? `/${data.value.uuid}` : data.value.path
await this._localHB.put(filePath, {})
}
}
}
if (data.type === 'del' &&
data.value.peer !== this.keyPair.publicKey.toString('hex')) {
try {
let filePath = path.join(this._filesDir, `${data.value.path}`)
if (fs.existsSync(filePath)) {
fs.unlinkSync(filePath)
const _file = await this._localHB.get(data.value.path)
if(_file) {
await this._localHB.del(data.value.path)
}
await this.database._updateStatBytes(-Math.abs(data.value.size))
setTimeout(() => {
this.emit('file-unlink', data.value)
})
}
} catch (err) {
console.log(err)
throw err
}
}
}
// Deprecated
info() {
const bytes = getTotalSize(this.drivePath)
return {
size: bytes
}
}
async stat() {
return this._localDB.get('stat')
}
/**
* Close drive and disconnect from all Hyperswarm topics
*/
async close() {
this.opened = false
if(this.joinSwarm) {
await this._swarm.close()
}
if(this._localCore) {
await this._localCore.close()
}
await this.database.close()
this.database = null
clearInterval(this._checkInternetInt)
this.network = {
internet: false,
drive: false
}
this.emit('network-updated', this.network)
if(this.checkNetworkStatus) {
clearInterval(this._checkInternetInt)
this.network = {
internet: false,
drive: false
}
this.emit('network-updated', this.network)
}
if(this.joinSwarm) {
this.removeAllListeners()
this.requestQueue.removeAllListeners()
this._swarm.removeAllListeners()
}
}
}
function createTopicHash(topic) {
const crypto = require('crypto')
return crypto.createHash('sha256')
.update(topic)
.digest()
}
async function auditFile(stream, remoteHash) {
return new Promise((resolve, reject) => {
let hash = blake.blake2bInit(HASH_OUTPUT_LENGTH, null)
stream.on('error', err => reject(err))
stream.on('data', chunk => {
blake.blake2bUpdate(hash, chunk)
})
stream.on('end', () => {
const localHash = Buffer.from(blake.blake2bFinal(hash)).toString('hex')
if (localHash === remoteHash)
return resolve()
reject('Hashes do not match')
})
})
}
const getAllFiles = function (dirPath, arrayOfFiles) {
files = fs.readdirSync(dirPath)
arrayOfFiles = arrayOfFiles || []
files.forEach(function (file) {
if (fs.statSync(dirPath + "/" + file).isDirectory()) {
arrayOfFiles = getAllFiles(dirPath + "/" + file, arrayOfFiles)
} else {
arrayOfFiles.push(path.join(dirPath, file))
}
})
return arrayOfFiles
}
const getTotalSize = function (directoryPath) {
const arrayOfFiles = getAllFiles(directoryPath)
let totalSize = 0
arrayOfFiles.forEach(function (filePath) {
totalSize += fs.statSync(filePath).size
})
return totalSize
}
module.exports = Drive