federer
Version:
Experiments in asynchronous federated learning and decentralized learning
85 lines • 3.46 kB
TypeScript
import * as tf from "@tensorflow/tfjs-node";
import { Dataset, DataSubset, DataSubsetFilepaths } from "../../common";
import { DataDistributionStats } from "./distribution-stats";
/** Result of preprocessing a dataset. */
export interface PreprocessResult {
/** Test set, in memory. */
testSet: DataSubset;
/** Paths to files of the saved test set. */
testSetFiles: DataSubsetFilepaths;
/**
* Paths to the training sets of each client. This array has one entry per
* client.
*/
clientTrainFiles: DataSubsetFilepaths[];
/** Statistics on how the data was distributed among clients. */
dataDistribution: DataDistributionStats;
}
/**
* Represents a full preprocessing pipeline for Federated Learning.
*
* For FL, preprocessing must split the raw data into "shards"; each FL client's
* training set is equivalent to one shard.
*
* The preprocessing must also return an test set, used by the coordinator to
* evaluate the performance of the global model.
*/
export declare class PreprocessPipeline {
private readonly numberLabelClasses;
private readonly pipeline;
/** Path of the directory containing cached results of a previous run. */
private readonly rootDir;
/** Path of the file containing paths to the cached results of a previous run. */
private readonly pathsFile;
/** Paths to the directories to which files should be saved to. */
private readonly directories;
constructor(experimentName: string, pipelineName: string, numberLabelClasses: number, pipeline: Readonly<PreprocessPipelineFunctions>);
run(allowReadFromCache?: boolean): Promise<PreprocessResult>;
private runAndCache;
private runPipeline;
private createTestSet;
private createClientTrainSets;
private preprocess;
private readCachedResults;
private cacheResults;
private saveDistributionMatrix;
private saveShards;
}
/**
* Set of functions that implement the steps on a preprocessing pipeline for
* federated learning.
*/
export interface PreprocessPipelineFunctions {
/** Function that reads the raw data. Will only be called if necessary. */
readRawData: () => Promise<Dataset>;
/**
* An initial filtering of the data. This is useful to determine what subset
* of the raw data source should be used for an experiment.
*
* This function is optional; if `undefined`, no filtering will be done.
*/
filter?: FilterFn;
/** Function splitting the data into shards */
shard: ShardFn;
/**
* Functions for preprocessing the data into a format that is suitable for
* consumption by an ML model. In a production environment, these
* preprocessing functions would run on the clients, but for the sake of
* simplicity, in this implementation we run them on the coordinator ahead of
* time.
*/
preprocess: {
preprocessItems: PreprocessFn;
preprocessLabels: PreprocessFn;
};
}
/**
* Function that filters a dataset, discarding datapoints that should not be
* used.
*/
export declare type FilterFn = (data: DataSubset) => Promise<DataSubset>;
/** Function that splits a full dataset into shards. */
export declare type ShardFn = (data: DataSubset) => Promise<DataSubset[]>;
/** Function that transforms a tensor to prepare it for a model. */
export declare type PreprocessFn = (tensor: tf.Tensor) => tf.Tensor;
//# sourceMappingURL=PreprocessPipeline.d.ts.map