federer
Version:
Experiments in asynchronous federated learning and decentralized learning
44 lines • 1.97 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.zipfSplits = exports.zipf = void 0;
const assert = require("assert");
const common_1 = require("../../common");
/**
* Given number of splits, the function determines fraction of data in each split.
* Zipf's law is given by f(k;s;N) = (1/k**s)/ Summation{from=1}{to=N}(1/n**s)
* @param N Number of splits
* @param s the value of the exponent characterizing the distribution
* @returns array of split fractions
*
*/
function zipf(N, s) {
const splits = common_1.range(1, N).map((x) => 1 / x ** s);
const den = splits.reduce((a, b) => a + b);
return splits.map((x) => x / den);
}
exports.zipf = zipf;
/**
*
* @param size total number of samples to be split
* @param numberOfBatches total number of batches to be formed
* @param s exponent in the function
* @returns sample size for each batch
* @throws Error for s < 1 or size < 0 or numberOfBatches<0
* @assumes batch size = 0 is okay since these batches are used in conjunction
* with uniform batches
*
*/
function zipfSplits(size, numberOfBatches, s) {
assert(size >= 1 && Number.isInteger(size), Error("Total number of samples in batches should be an integer and value greater than 0"));
assert(numberOfBatches >= 1 && Number.isInteger(numberOfBatches), Error("Number of batches should be an integer with value greater than 0"));
assert(s >= 1, "While zipf's law can have value less than 1. " +
"Value greater than 1 is required for proper error correction");
const batchSizes = zipf(numberOfBatches, s).map((x) => Math.round(size * x));
const error = size - batchSizes.reduce((a, b) => a + b);
// since s >1 first batch size will be the largest. Doing error correction on batch 1.
// for size in [1000,100000] and s=3 error ranges from [-3,+3]
batchSizes[0] += error;
return batchSizes;
}
exports.zipfSplits = zipfSplits;
//# sourceMappingURL=datasplits.js.map