diff --git a/build/crd-samples/sedna/federatedlearningjob_v1alpha1.yaml b/build/crd-samples/sedna/federatedlearningjob_v1alpha1.yaml index 0066365fe..632c2eaa1 100644 --- a/build/crd-samples/sedna/federatedlearningjob_v1alpha1.yaml +++ b/build/crd-samples/sedna/federatedlearningjob_v1alpha1.yaml @@ -3,6 +3,29 @@ kind: FederatedLearningJob metadata: name: surface-defect-detection spec: + stopCondition: + operator: "or" # and + conditions: + - operator: ">" + threshold: 100 + metric: rounds + - operator: ">" + threshold: 0.95 + metric: target_accuracy + - operator: "<" + threshold: 0.03 + metric: delta_loss + transimitter: + transimitterAlgorithms: + - name: "adaptive_freezing" # simple, adaptive_freezing, adaptive_sync ... + parameters: + - name: "sync_frequency" + value: "10" + aggregationTrigger: + condition: + operator: ">" + threshold: 5 + metric: num_of_ready_clients aggregationWorker: model: name: "surface-defect-detection-model" @@ -11,12 +34,17 @@ spec: nodeName: "cloud" containers: - image: kubeedge/sedna-example-federated-learning-surface-defect-detection-aggregation:v0.1.0 - name: agg-worker + name: agg-worker imagePullPolicy: IfNotPresent env: # user defined environments - - name: "exit_round" - value: "3" - resources: # user defined resources + - name: "cut_layer" + value: "4" + - name: "epsilon" + value: "100" + - name: "aggregation_algorithm" + value: "mistnet" + - name: "batch_size" + resources: # user defined resources limits: memory: 2Gi trainingWorkers: @@ -27,34 +55,34 @@ spec: nodeName: "edge1" containers: - image: kubeedge/sedna-example-federated-learning-surface-defect-detection-train:v0.1.0 - name: train-worker + name: train-worker imagePullPolicy: IfNotPresent - env: # user defined environments + env: # user defined environments - name: "batch_size" value: "32" - name: "learning_rate" value: "0.001" - name: "epochs" value: "1" - resources: # user defined resources + resources: # user defined resources limits: memory: 2Gi - dataset: - name: "edge2-surface-defect-detection-dataset" + name: "edge2-surface-defect-detection-dataset" template: spec: nodeName: "edge2" containers: - image: kubeedge/sedna-example-federated-learning-surface-defect-detection-train:v0.1.0 - name: train-worker + name: train-worker imagePullPolicy: IfNotPresent - env: # user defined environments + env: # user defined environments - name: "batch_size" value: "32" - name: "learning_rate" value: "0.001" - name: "epochs" value: "1" - resources: # user defined resources + resources: # user defined resources limits: memory: 2Gi diff --git a/examples/federated_learning/mistnet/README.md b/examples/federated_learning/mistnet/README.md new file mode 100644 index 000000000..3ef7dc18c --- /dev/null +++ b/examples/federated_learning/mistnet/README.md @@ -0,0 +1,211 @@ +# Collaboratively Train Using MistNet on MNIST Dataset +This case introduces how to use federated learning job with an aggregation algorithm named MistNet in MNIST handwritten digit classification scenario. Data is scattered in different places (such as server node, camera or others) and cannot be aggregated due to data privacy and bandwidth. As a result, we cannot use all the data for training. In some special cases, edge node have limited computing resources and even have no training capability. Using MistNet can solve this problem, that complete federated learning without training capability on the edge node. + + MistNet partitions a DNN model into two parts, a lightweight feature extractor at the edge side to generate meaningful features from the raw training data, and a classifier including the most model layers at the cloud to be iteratively trained for specific tasks. MistNet achieves acceptable model utility while greatly reducing privacy leakage from the released intermediate features. + + + + +## Surface Defect Detection Experiment +> Assume that there are two edge nodes and a cloud node. Data on the edge nodes cannot be migrated to the cloud due to privacy issues. +> Base on this scenario, we will demonstrate the surface inspection. + +### Prepare Nodes +``` +CLOUD_NODE="cloud-node-name" +EDGE1_NODE="edge1-node-name" +EDGE2_NODE="edge2-node-name" +``` + +### Install Sedna + +Follow the [Sedna installation document](/docs/setup/install.md) to install Sedna. + +### Prepare Dataset + +Download [dataset](https://data.deepai.org/mnist.zip) to `/data` of ```EDGE1_NODE```. +``` +mkdir -p /data +cd /data +git clone https://github.com/abin24/Magnetic-tile-defect-datasets..git Magnetic-tile-defect-datasets +TODO: command to place part 1 of dataset +``` + +Download [dataset](https://data.deepai.org/mnist.zip) to `/data` of ```EDGE2_NODE```. +``` +mkdir -p /data +cd /data +wget https://data.deepai.org/mnist.zip +TODO: command to place part 2 of dataset +``` + +### Prepare Images +This example uses these images: +1. aggregation worker: ```kubeedge/sedna-example-federated-learning-mistnet:v0.4.0``` +2. train worker: ```kubeedge/sedna-example-federated-learning-mistnet:v0.4.0``` + +These images are generated by the script [build_images.sh](/examples/build_image.sh). + +### Create Federated Learning Job + +#### Create Dataset + +create dataset for `$EDGE1_NODE` +```n +kubectl create -f - <" + threshold: 100 + metric: rounds + - operator: ">" + threshold: 0.95 + metric: targetAccuracy + - operator: "<" + threshold: 0.03 + metric: deltaLoss + aggregationTrigger: + condition: + operator: ">" + threshold: 5 + metric: num_of_ready_clients + aggregationWorker: + model: + name: "mistnet-on-mnist-model" + template: + spec: + nodeName: $CLOUD_NODE + containers: + - image: kubeedge/sedna-example-federated-learning-mistnet-on-mnist-dataset-aggregation:v0.4.0 + name: agg-worker + imagePullPolicy: IfNotPresent + env: # user defined environments + - name: "cut_layer" + value: "4" + - name: "epsilon" + value: "100" + - name: "aggregation_algorithm" + value: "mistnet" + - name: "batch_size" + value: "10" + resources: # user defined resources + limits: + memory: 2Gi + trainingWorkers: + - dataset: + name: "edge1-surface-defect-detection-dataset" + template: + spec: + nodeName: $EDGE1_NODE + containers: + - image: kubeedge/sedna-example-federated-learning-mistnet-on-mnist-dataset-train:v0.4.0 + name: train-worker + imagePullPolicy: IfNotPresent + env: # user defined environments + - name: "batch_size" + value: "32" + - name: "learning_rate" + value: "0.001" + - name: "epochs" + value: "2" + resources: # user defined resources + limits: + memory: 2Gi + - dataset: + name: "edge2-surface-defect-detection-dataset" + template: + spec: + nodeName: $EDGE2_NODE + containers: + - image: kubeedge/sedna-example-federated-learning-mistnet-on-mnist-dataset-train:v0.4.0 + name: train-worker + imagePullPolicy: IfNotPresent + env: # user defined environments + - name: "batch_size" + value: "32" + - name: "learning_rate" + value: "0.001" + - name: "epochs" + value: "2" + resources: # user defined resources + limits: + memory: 2Gi +EOF +``` + +``` +TODO: show the benifit of mistnet. for example, the compared results of fedavg & mistnet. + +``` + +### Check Federated Learning Status + +``` +kubectl get federatedlearningjob surface-defect-detection +``` + +### Check Federated Learning Train Result +After the job completed, you will find the model generated on the directory `/model` in `$EDGE1_NODE` and `$EDGE2_NODE`. diff --git a/pkg/apis/sedna/v1alpha1/common_types.go b/pkg/apis/sedna/v1alpha1/common_types.go index 5388a5193..051064c50 100644 --- a/pkg/apis/sedna/v1alpha1/common_types.go +++ b/pkg/apis/sedna/v1alpha1/common_types.go @@ -27,3 +27,9 @@ type ParaSpec struct { Key string `json:"key"` Value string `json:"value"` } + +type Condition struct { + Operator string `json:"operator"` + Threshold float64 `json:"threshold"` + Metric string `json:"metric"` +} \ No newline at end of file diff --git a/pkg/apis/sedna/v1alpha1/federatedlearningjob_types.go b/pkg/apis/sedna/v1alpha1/federatedlearningjob_types.go index 5a41f731b..2e04bfdb6 100644 --- a/pkg/apis/sedna/v1alpha1/federatedlearningjob_types.go +++ b/pkg/apis/sedna/v1alpha1/federatedlearningjob_types.go @@ -37,10 +37,35 @@ type FederatedLearningJob struct { // FLJobSpec is a description of a federatedlearning job type FLJobSpec struct { + StopCondition Condition `json:"stopCondition,omitempty"` + Transmitter Transmitter `json:"transmitter,omitempty"` + AggregationTrigger AggregationTrigger `json:"aggregationTrigger,omitempty"` AggregationWorker AggregationWorker `json:"aggregationWorker"` TrainingWorkers []TrainingWorker `json:"trainingWorkers"` } +// StopCondition describes the exit conditions for federated learning job +type StopCondition struct { + CompositeOperator CompositeOperator `json:"operator"` +} + +type CompositeOperator struct { + Conditions []Condition `json:"conditions"` +} + +type Transmitter struct { + TransmitterAlgorithms []TransmitterAlgorithm `json:transmitterAlgorithms` +} + +type TransmitterAlgorithm struct { + Name string `json:"name"` + Parameters []ParaSpec `json:"parameters,omitempty"` +} + +type AggregationTrigger struct { + Condition Condition `json:"condition"` +} + // AggregationWorker describes the data an aggregation worker should have type AggregationWorker struct { Model modelRefer `json:"model"` diff --git a/pkg/apis/sedna/v1alpha1/incrementallearningjob_types.go b/pkg/apis/sedna/v1alpha1/incrementallearningjob_types.go index 2f7236bad..fc96b7159 100644 --- a/pkg/apis/sedna/v1alpha1/incrementallearningjob_types.go +++ b/pkg/apis/sedna/v1alpha1/incrementallearningjob_types.go @@ -79,11 +79,6 @@ type Timer struct { End string `json:"end"` } -type Condition struct { - Operator string `json:"operator"` - Threshold float64 `json:"threshold"` - Metric string `json:"metric"` -} type ILDataset struct { Name string `json:"name"`