UNPKG

pixl-server-storage

Version:

A key/value/list storage component for the pixl-server framework.

884 lines (762 loc) 25.7 kB
// PixlServer Storage System - Hash Mixin // Copyright (c) 2016 Joseph Huckaby // Released under the MIT License var util = require("util"); var async = require('async'); var Class = require("pixl-class"); var Tools = require("pixl-tools"); module.exports = Class.create({ hashCreate: function(path, opts, callback) { // Create new hash table var self = this; if (!opts) opts = {}; if (!opts.page_size) opts.page_size = this.hashItemsPerPage; opts.length = 0; opts.type = 'hash'; this.logDebug(9, "Creating new hash: " + path, opts); this.get(path, function(err, hash) { if (hash) { // hash already exists self.logDebug(9, "Hash already exists: " + path, hash); return callback(null, hash); } self.put( path, opts, function(err) { if (err) return callback(err); // create first page self.put( path + '/data', { type: 'hash_page', length: 0, items: {} }, function(err) { if (err) return callback(err); else callback(null, opts); } ); // put } ); // header created } ); // get check }, _hashLoad: function(path, create_opts, callback) { // Internal method, load hash root, possibly create if doesn't exist var self = this; if (create_opts && (typeof(create_opts) != 'object')) create_opts = {}; this.logDebug(9, "Loading hash: " + path); this.get(path, function(err, hash) { if (hash) { // hash already exists callback(null, hash); } else if (create_opts && err && (err.code == "NoSuchKey")) { // create new hash, ONLY if record was not found (and not some other error) self.logDebug(9, "Hash not found, creating it: " + path); self.hashCreate(path, create_opts, function(err, hash) { if (err) callback(err); else callback( null, hash ); } ); } else { // no exist and no create, or some other error self.logDebug(9, "Hash could not be loaded: " + path + ": " + err); callback(err); } } ); // get }, _hashLock: function(key, wait, callback) { // internal hash lock wrapper // uses unique key prefix so won't deadlock with user locks this.lock( '|'+key, wait, callback ); }, _hashUnlock: function(key) { // internal hash unlock wrapper this.unlock( '|'+key ); }, _hashShareLock: function(key, wait, callback) { // internal hash shared lock wrapper // uses unique key prefix so won't deadlock with user locks this.shareLock( 'C|'+key, wait, callback ); }, _hashShareUnlock: function(key) { // internal hash shared unlock wrapper this.shareUnlock( 'C|'+key ); }, hashPut: function(path, hkey, hvalue, create_opts, callback) { // store key/value pair into hash table var self = this; if (!callback && (typeof(create_opts) == 'function')) { callback = create_opts; create_opts = {}; } if (!path) return callback(new Error("Hash path must be a valid string.")); if (!hkey) return callback(new Error("Hash key must be a valid string.")); if (typeof(hvalue) == 'undefined') return callback(new Error("Hash value must not be undefined.")); this.logDebug(9, "Storing hash key: " + path + ": " + hkey, this.debugLevel(10) ? hvalue : null); // lock hash for this this._hashLock(path, true, function() { // load header self._hashLoad(path, create_opts, function(err, hash) { if (err) { self._hashUnlock(path); return callback(err); } var state = { path: path, data_path: path + '/data', hkey: ''+hkey, hvalue: hvalue, hash: hash, index_depth: -1, key_digest: Tools.digestHex(hkey, 'md5') }; self._hashPutKey(state, function(err) { // done self._hashUnlock(path); return callback(err); }); // _hashPutKey }); // load }); // lock }, _hashPutKey: function(state, callback) { // internal hash put method, store at one hashing level // recurse for deeper indexes var self = this; self.get(state.data_path, function(err, data) { if (err) data = { type: 'hash_page', length: 0, items: {} }; if (data.type == 'hash_index') { // recurse for deeper level state.index_depth++; state.data_path += '/' + state.key_digest.substring(state.index_depth, state.index_depth + 1); return self._hashPutKey(state, callback); } else { // got page, store at this level var new_key = false; data.items = Tools.copyHashRemoveProto( data.items ); if (!(state.hkey in data.items)) { data.length++; state.hash.length++; new_key = true; } data.items[state.hkey] = state.hvalue; var finish = function(err) { if (err) return callback(err); if (data.length > state.hash.page_size) { // enqueue page reindex task self.logDebug(9, "Hash page has grown beyond max keys, running index split: " + state.data_path, { num_keys: data.length, page_size: state.hash.page_size }); self._hashSplitIndex(state, callback); } // reindex else { // no reindex needed callback(); } }; // finish // save page and possibly hash header self.put(state.data_path, data, function(err) { if (err) return callback(err); if (new_key) self.put(state.path, state.hash, finish); else finish(); }); // put } // hash_page }); // get }, _hashSplitIndex: function(state, callback) { // hash split index // split hash level into 16 new index buckets var self = this; state.index_depth++; this.logDebug(9, "Splitting hash data into new index: " + state.data_path + " (" + state.index_depth + ")"); // load data page which will be converted to a hash index self.get(state.data_path, function(err, data) { // check for error or if someone stepped on our toes if (err) { // normal, hash may have been deleted self.logError('hash', "Failed to fetch data record for hash split: " + state.data_path + ": " + err); return callback(); } if (data.type == 'hash_index') { // normal, hash may already have been indexed self.logDebug(9, "Data page has been reindexed already, skipping: " + state.data_path, data); return callback(); } // rehash keys at new index depth var pages = {}; data.items = Tools.copyHashRemoveProto( data.items ); for (var hkey in data.items) { var key_digest = Tools.digestHex(hkey, 'md5'); var ch = key_digest.substring(state.index_depth, state.index_depth + 1); if (!pages[ch]) pages[ch] = { type: 'hash_page', length: 0, items: {} }; pages[ch].items[hkey] = data.items[hkey]; pages[ch].length++; // Note: In the very rare case where a subpage also overflows, // the next hashPut will take care of the nested reindex. } // foreach key // save all pages in parallel, then rewrite data page as an index async.forEachOfLimit(pages, self.concurrency, function (page, ch, callback) { self.put( state.data_path + '/' + ch, page, callback ); }, function(err) { if (err) { return callback( new Error("Failed to write data records for hash split: " + state.data_path + "/*: " + err.message) ); } // final conversion of original data path self.put( state.data_path, { type: 'hash_index' }, function(err) { if (err) { return callback( new Error("Failed to write data record for hash split: " + state.data_path + ": " + err.message) ); } self.logDebug(9, "Hash split complete: " + state.data_path); callback(); }); // final put } // complete ); // forEachOf }); // get }, hashPutMulti: function(path, records, create_opts, callback) { // put multiple hash records at once, given object of keys and values // need concurrency limit of 1 because hashPut locks var self = this; if (!callback && (typeof(create_opts) == 'function')) { callback = create_opts; create_opts = {}; } async.eachLimit(Object.keys(records), 1, function(hkey, callback) { // iterator for each key self.hashPut(path, hkey, records[hkey], create_opts, function(err) { callback(err); } ); }, function(err) { // all keys stored callback(err); } ); }, hashGet: function(path, hkey, callback) { // fetch key/value pair from hash table var self = this; var state = { path: path, data_path: path + '/data', hkey: hkey, index_depth: -1, key_digest: Tools.digestHex(hkey, 'md5') }; this.logDebug(9, "Fetching hash key: " + path + ": " + hkey); this._hashShareLock(path, true, function() { // share locked self._hashGetKey(state, function(err, value) { // done self._hashShareUnlock(path); callback(err, value); }); // _hashGetKey } ); // _hashShareLock }, _hashGetKey: function(state, callback) { // internal hash get method, fetch at one hashing level // recurse for deeper indexes var self = this; self.get(state.data_path, function(err, data) { if (err) return callback(err); if (data.type == 'hash_index') { // recurse for deeper level state.index_depth++; state.data_path += '/' + state.key_digest.substring(state.index_depth, state.index_depth + 1); return self._hashGetKey(state, callback); } else { // got page, fetch at this level data.items = Tools.copyHashRemoveProto( data.items ); if (!(state.hkey in data.items)) { // key not found var err = new Error("Failed to fetch hash key: " + state.path + ": " + state.hkey + ": Not found"); err.code = "NoSuchKey"; return callback(err); } callback(null, data.items[state.hkey]); } // hash_page }); // get }, hashGetMulti: function(path, hkeys, callback) { // fetch multiple hash records at once, given array of keys // callback is provided an array of values in matching order to keys var self = this; var records = Object.create(null); async.eachLimit(hkeys, this.concurrency, function(hkey, callback) { // iterator for each key self.hashGet(path, hkey, function(err, value) { if (err) return callback(err); records[hkey] = value; callback(); } ); }, function(err) { if (err) return callback(err); // sort records into array of values ordered by keys var values = []; for (var idx = 0, len = hkeys.length; idx < len; idx++) { values.push( records[hkeys[idx]] ); } callback(null, values); } ); }, hashUpdate: function(path, hkey, updates, callback) { // update existing key/value pair in hash table var self = this; if (!path) return callback(new Error("Hash path must be a valid string.")); if (!hkey) return callback(new Error("Hash key must be a valid string.")); if (!Tools.isaHash(updates)) return callback(new Error("Hash updates must be an object.")); this.logDebug(9, "Updating hash key: " + path + ": " + hkey, this.debugLevel(10) ? updates : null); // lock hash for this this._hashLock(path, true, function() { // load header, do not create new self._hashLoad(path, false, function(err, hash) { if (err) { self._hashUnlock(path); return callback(err); } var state = { path: path, data_path: path + '/data', hkey: ''+hkey, updates: updates, hash: hash, index_depth: -1, key_digest: Tools.digestHex(hkey, 'md5') }; self._hashUpdateKey(state, function(err) { // done self._hashUnlock(path); return callback(err); }); // _hashPutKey }); // load }); // lock }, _hashUpdateKey: function(state, callback) { // internal hash update method, store at one hashing level // recurse for deeper indexes var self = this; self.get(state.data_path, function(err, data) { if (err) data = { type: 'hash_page', length: 0, items: {} }; if (data.type == 'hash_index') { // recurse for deeper level state.index_depth++; state.data_path += '/' + state.key_digest.substring(state.index_depth, state.index_depth + 1); return self._hashUpdateKey(state, callback); } else { // got page, our key should be at this level data.items = Tools.copyHashRemoveProto( data.items ); if (!(state.hkey in data.items)) { // key not found var err = new Error("Failed to fetch hash key: " + state.path + ": " + state.hkey + ": Not found"); err.code = "NoSuchKey"; return callback(err); } var hvalue = data.items[state.hkey]; // apply updates directly to forehead for (var key in state.updates) { Tools.setPath( hvalue, key, state.updates[key] ); } // save page self.put(state.data_path, data, callback); } // hash_page }); // get }, hashUpdateMulti: function(path, records, callback) { // update multiple hash records at once, given object of keys and values // need concurrency limit of 1 because hashUpdate locks var self = this; async.eachLimit(Object.keys(records), 1, function(hkey, callback) { // iterator for each key self.hashUpdate(path, hkey, records[hkey], function(err) { callback(err); } ); }, function(err) { // all keys updated callback(err); } ); }, hashEachPage: function(path, iterator, callback) { // call user iterator for each populated hash page, data only // iterator will be passed page items hash object var self = this; this._hashShareLock(path, true, function() { // share locked self._hashEachPage(path + '/data', function(data, callback) { if ((data.type == 'hash_page') && (data.length > 0)) { data.items = Tools.copyHashRemoveProto( data.items ); iterator(data.items, callback); } else callback(); }, function(err) { self._hashShareUnlock(path); callback(err); } ); // _hashEachPage } ); // _hashShareLock }, _hashEachPage: function(data_path, iterator, callback) { // internal method for iterating over hash pages // invokes interator for both index and data pages var self = this; self.get(data_path, function(err, data) { if (err) return callback(); // normal, page may not exist data.path = data_path; iterator(data, function(err) { if (err) return callback(err); // abnormal if (data.type == 'hash_index') { // recurse for deeper level async.eachSeries( [0,1,2,3,4,5,6,7,8,9,'a','b','c','d','e','f'], function(ch, callback) { self._hashEachPage( data_path + '/' + ch, iterator, callback ); }, callback ); } else callback(); }); // complete }); // get }, hashGetAll: function(path, callback) { // return ALL keys/values as a single, in-memory hash var self = this; var everything = Object.create(null); this._hashShareLock(path, true, function() { // share locked self._hashEachPage( path + '/data', function(page, callback) { // called for each hash page (index or data) if (page.type == 'hash_page') { page.items = Tools.copyHashRemoveProto( page.items ); Tools.mergeHashInto( everything, page.items ); } callback(); }, function(err) { self._hashShareUnlock(path); callback(err, err ? null : everything); } // done ); // _hashEachPage } ); // _hashShareLock }, hashEach: function(path, iterator, callback) { // iterate over hash and invoke function for every key/value // iterator function is asynchronous (callback), like async.forEachOfSeries var self = this; this._hashShareLock(path, true, function() { // share locked self._hashEachPage( path + '/data', function(page, callback) { // called for each hash page (index or data) if (page.type == 'hash_page') { page.items = Tools.copyHashRemoveProto( page.items ); async.forEachOfSeries( page.items, function(hvalue, hkey, callback) { // swap places of hkey,hvalue in iterator args because I HATE how async does it iterator(hkey, hvalue, callback); }, callback ); // forEachOfSeries } // hash_page else callback(); }, // page function(err) { self._hashShareUnlock(path); callback(err); } ); // _hashEachPage } ); // _hashShareLock }, hashEachSync: function(path, iterator, callback) { // iterate over hash and invoke function for every key/value // iterator function is synchronous (no callback), like Array.forEach() var self = this; this._hashShareLock(path, true, function() { // share locked self._hashEachPage( path + '/data', function(page, callback) { // called for each hash page (index or data) if (page.type == 'hash_page') { page.items = Tools.copyHashRemoveProto( page.items ); for (var hkey in page.items) { if (iterator( hkey, page.items[hkey] ) === false) { // user abort return callback( new Error("User Abort") ); } } } // hash_page callback(); }, // page function(err) { self._hashShareUnlock(path); callback(err); } ); // _hashEachPage } ); // _hashShareLock }, hashCopy: function(old_path, new_path, callback) { // copy entire hash to new location var self = this; this.logDebug(9, "Copying hash: " + old_path + " to " + new_path); this._hashLock( new_path, true, function() { // copy header self.copy( old_path, new_path, function(err) { if (err) { self._hashUnlock(new_path); return callback(err); } // iterate over each page self._hashEachPage( old_path + '/data', function(page, callback) { // called for each hash page (index or data) var new_page_path = page.path.replace( old_path, new_path ); // copy page self.copy(page.path, new_page_path, callback); }, // page function(err) { // all pages copied self._hashUnlock(new_path); callback(err); } ); // _hashEachPage } ); // copy header }); // lock }, hashRename: function(old_path, new_path, callback) { // Copy, then delete hash (and all keys) var self = this; this.logDebug(9, "Renaming hash: " + old_path + " to " + new_path); this.hashCopy( old_path, new_path, function(err) { // copy complete, now delete old hash if (err) return callback(err); self.hashDeleteAll( old_path, true, callback ); } ); // copied }, hashDeleteAll: function(path, entire, callback) { // delete entire hash var self = this; // support 2-arg calling convention (no entire) if (!callback && (typeof(entire) == 'function')) { callback = entire; entire = false; } this.logDebug(9, "Deleting hash: " + path); this._hashLock( path, true, function() { // load header self._hashLoad(path, false, function(err, hash) { if (err) { self._hashUnlock(path); return callback(err); } // iterate over each page self._hashEachPage( path + '/data', function(page, callback) { // called for each hash page (index or data) self.delete(page.path, callback); }, // page function(err) { // all pages deleted if (err) { self._hashUnlock(path); return callback(err); } if (entire) { // delete hash header as well self.delete( path, function(err) { self._hashUnlock(path); callback(err); } ); // delete } else { // reset hash for future use hash.length = 0; self.put( path, hash, function(err) { self._hashUnlock(path); callback(err); } ); // put } } // complete ); // _hashEachPage }); // _hashLoad }); // lock }, hashDelete: function(path, hkey, entire, callback) { // delete single key from hash var self = this; // support 3-arg calling convention (no entire) if (!callback && (typeof(entire) == 'function')) { callback = entire; entire = false; } this.logDebug(9, "Deleting hash key: " + path + ": " + hkey); // lock hash for this this._hashLock(path, true, function() { // load header self._hashLoad(path, false, function(err, hash) { if (err) { self._hashUnlock(path); return callback(err); } var state = { path: path, data_path: path + '/data', hkey: hkey, hash: hash, index_depth: -1, key_digest: Tools.digestHex(hkey, 'md5'), entire: entire }; self._hashDeleteKey(state, function(err) { // done self._hashUnlock(path); return callback(err); }); // _hashDeleteKey }); // load }); // lock }, _hashDeleteKey: function(state, callback) { // internal hash delete method, delete from one hashing level // recurse for deeper indexes var self = this; self.get(state.data_path, function(err, data) { if (err) return callback(err); if (data.type == 'hash_index') { // recurse for deeper level state.index_depth++; state.data_path += '/' + state.key_digest.substring(state.index_depth, state.index_depth + 1); return self._hashDeleteKey(state, callback); } else { // got page, delete from this level data.items = Tools.copyHashRemoveProto( data.items ); if (!(state.hkey in data.items)) { var err = new Error("Failed to delete hash key: " + state.path + ": " + state.hkey + ": Not found"); err.code = 'NoSuchKey'; self.logError('hash', err.message); return callback(err); } data.length--; state.hash.length--; delete data.items[state.hkey]; // check for delete entire on empty if (!state.hash.length && state.entire) { self.delete(state.data_path, function(err) { if (err) return callback(err); self.delete(state.path, callback); }); // put return; } // save page and hash header self.put(state.data_path, data, function(err) { if (err) return callback(err); self.put(state.path, state.hash, function(err) { if (err) return callback(err); // index unsplit time? if (!data.length && (state.index_depth > -1)) { // index unsplit task self.logDebug(9, "Hash page has no more keys, running unsplit check: " + state.data_path); self._hashUnsplitIndexCheck(state, callback); } // unsplit else { // no unsplit check needed callback(); } }); // put }); // put } // hash_page }); // get }, _hashUnsplitIndexCheck: function(state, callback) { // unsplit hash index // check if all sub-pages are empty, and if so, delete all and convert index back into page var self = this; var data_path = state.data_path.replace(/\/\w+$/, ''); var found_keys = false; var sub_pages = []; this.logDebug(9, "Checking all hash index sub-pages for unsplit: " + data_path + "/*"); // make sure page is still an index self.get(data_path, function(err, data) { if (err) { self.logDebug(9, "Hash page could not be loaded, aborting unsplit: " + data_path); return callback(); } if (data.type != 'hash_index') { self.logDebug(9, "Hash page is no longer an index, aborting unsplit: " + data_path); return callback(); } // test each sub-page, counting keys // abort on first key (i.e. no need to load all pages in that case) async.eachLimit( [0,1,2,3,4,5,6,7,8,9,'a','b','c','d','e','f'], self.concurrency, function(ch, callback) { self.get( data_path + '/' + ch, function(err, data) { if (data) sub_pages.push( ch ); if (data && ((data.type != 'hash_page') || data.length)) { self.logDebug(9, "Index page still has keys: " + data_path + '/' + ch); found_keys = true; callback( new Error("ABORT") ); } else callback(); } ); }, function(err) { // scanned all pages if (found_keys || !sub_pages.length) { // nothing to be done self.logDebug(9, "Nothing to do, aborting unsplit: " + data_path); return callback(); } self.logDebug(9, "Proceeding with unsplit: " + data_path); // proceed with unsplit async.eachLimit( sub_pages, self.concurrency, function(ch, callback) { self.delete( data_path + '/' + ch, callback ); }, function(err) { // all pages deleted, now rewrite index if (err) { // this should never happen, but we must continue the op. // we cannot leave the index in a partially unsplit state. self.logError('hash', "Failed to delete index sub-pages: " + data_path + "/*: " + err); } self.put( data_path, { type: 'hash_page', length: 0, items: {} }, function(err) { // all done if (err) { self.logError('hash', "Failed to put index page: " + data_path + ": " + err); } else { self.logDebug(9, "Unsplit operation complete: " + data_path); } callback(); } ); // put } // pages deleted ); // eachLimit } // key check ); // eachLimit } ); // load }, hashDeleteMulti: function(path, hkeys, callback) { // delete multiple hash records at once, given array of keys // need concurrency limit of 1 because hashDelete locks var self = this; async.eachLimit(hkeys, 1, function(hkey, callback) { // iterator for each key self.hashDelete(path, hkey, function(err) { callback(err); } ); }, function(err) { // all keys deleted callback(err); } ); }, hashGetInfo: function(path, callback) { // Return info about hash (number of items, etc.) this._hashLoad( path, false, callback ); } });