add example of mistnet on MNIST dataset

Signed-off-by: Jie Pu <pujie2@huawei.com>
kubeedge · Jul 27, 2021 · 8aa9a9e · 8aa9a9e
1 parent e67e4ac
commit 8aa9a9e
Show file tree

Hide file tree

Showing 5 changed files with 281 additions and 16 deletions.
diff --git a/build/crd-samples/sedna/federatedlearningjob_v1alpha1.yaml b/build/crd-samples/sedna/federatedlearningjob_v1alpha1.yaml
@@ -3,6 +3,29 @@ kind: FederatedLearningJob
 metadata:
   name: surface-defect-detection
 spec:
+  stopCondition:
+    operator: "or" # and
+      conditions:
+        - operator: ">"
+          threshold: 100
+          metric: rounds
+        - operator: ">"
+          threshold: 0.95
+          metric: target_accuracy
+        - operator: "<"
+          threshold: 0.03
+          metric: delta_loss
+  transimitter:
+    transimitterAlgorithms:
+      - name: "adaptive_freezing" # simple, adaptive_freezing, adaptive_sync ...
+        parameters:
+          - name: "sync_frequency"
+            value: "10"
+  aggregationTrigger:
+    condition:
+      operator: ">"
+      threshold: 5
+      metric: num_of_ready_clients
   aggregationWorker:
     model:
       name: "surface-defect-detection-model"
@@ -11,12 +34,17 @@ spec:
         nodeName: "cloud"
         containers:
           - image: kubeedge/sedna-example-federated-learning-surface-defect-detection-aggregation:v0.1.0
-            name:  agg-worker
+            name: agg-worker
             imagePullPolicy: IfNotPresent
             env: # user defined environments
-              - name: "exit_round"
-                value: "3"
-            resources:  # user defined resources
+              - name: "cut_layer"
+                value: "4"
+              - name: "epsilon"
+                value: "100"
+              - name: "aggregation_algorithm"
+                value: "mistnet"
+              - name: "batch_size"
+            resources: # user defined resources
               limits:
                 memory: 2Gi
   trainingWorkers:
@@ -27,34 +55,34 @@ spec:
           nodeName: "edge1"
           containers:
             - image: kubeedge/sedna-example-federated-learning-surface-defect-detection-train:v0.1.0
-              name:  train-worker
+              name: train-worker
               imagePullPolicy: IfNotPresent
-              env:  # user defined environments
+              env: # user defined environments
                 - name: "batch_size"
                   value: "32"
                 - name: "learning_rate"
                   value: "0.001"
                 - name: "epochs"
                   value: "1"
-              resources:  # user defined resources
+              resources: # user defined resources
                 limits:
                   memory: 2Gi
     - dataset:
-          name: "edge2-surface-defect-detection-dataset"
+        name: "edge2-surface-defect-detection-dataset"
       template:
         spec:
           nodeName: "edge2"
           containers:
             - image: kubeedge/sedna-example-federated-learning-surface-defect-detection-train:v0.1.0
-              name:  train-worker
+              name: train-worker
               imagePullPolicy: IfNotPresent
-              env:  # user defined environments
+              env: # user defined environments
                 - name: "batch_size"
                   value: "32"
                 - name: "learning_rate"
                   value: "0.001"
                 - name: "epochs"
                   value: "1"
-              resources:  # user defined resources
+              resources: # user defined resources
                 limits:
                   memory: 2Gi
diff --git a/examples/federated_learning/mistnet/README.md b/examples/federated_learning/mistnet/README.md
@@ -0,0 +1,211 @@
+# Collaboratively Train Using MistNet on MNIST Dataset
+This case introduces how to use federated learning job with an aggregation algorithm named MistNet in MNIST handwritten digit classification scenario. Data is scattered in different places (such as server node, camera or others) and cannot be aggregated due to data privacy and bandwidth. As a result, we cannot use all the data for training. In some special cases, edge node have limited computing resources and even have no training capability. Using MistNet can solve this problem, that complete federated learning without training capability on the edge node. 
+
+ MistNet partitions a DNN model into two parts, a lightweight feature extractor at the edge side to generate meaningful features from the raw training data,  and a classifier including the most model layers at the cloud to be iteratively trained  for specific tasks.   MistNet achieves acceptable model utility while greatly reducing privacy leakage from the released intermediate features.
+
+
+
+
+## Surface Defect Detection Experiment
+> Assume that there are two edge nodes and a cloud node. Data on the edge nodes cannot be migrated to the cloud due to privacy issues.
+> Base on this scenario, we will demonstrate the surface inspection.
+
+### Prepare Nodes
+```
+CLOUD_NODE="cloud-node-name"
+EDGE1_NODE="edge1-node-name"
+EDGE2_NODE="edge2-node-name"
+```
+
+### Install Sedna
+
+Follow the [Sedna installation document](/docs/setup/install.md) to install Sedna.
+
+### Prepare Dataset
+
+Download [dataset](https://data.deepai.org/mnist.zip)  to `/data` of ```EDGE1_NODE```.  
+```
+mkdir -p /data
+cd /data
+git clone https://github.com/abin24/Magnetic-tile-defect-datasets..git Magnetic-tile-defect-datasets
+TODO: command to place part 1 of dataset
+```
+
+Download [dataset](https://data.deepai.org/mnist.zip)  to `/data` of ```EDGE2_NODE```.
+```
+mkdir -p /data
+cd /data
+wget https://data.deepai.org/mnist.zip
+TODO: command to place part 2 of dataset
+```
+
+### Prepare Images
+This example uses these images:
+1. aggregation worker: ```kubeedge/sedna-example-federated-learning-mistnet:v0.4.0```
+2. train worker: ```kubeedge/sedna-example-federated-learning-mistnet:v0.4.0```
+
+These images are generated by the script [build_images.sh](/examples/build_image.sh).
+
+### Create Federated Learning Job 
+
+#### Create Dataset
+
+create dataset for `$EDGE1_NODE`
+```n
+kubectl create -f - <<EOF
+apiVersion: sedna.io/v1alpha1
+kind: Dataset
+metadata:
+  name: "edge1-mistnet-on-mnist-dataset"
+spec:
+  url: "/data/1.txt"
+  format: "txt"
+  nodeName: $EDGE1_NODE
+EOF
+```
+
+create dataset for `$EDGE2_NODE`
+```
+kubectl create -f - <<EOF
+apiVersion: sedna.io/v1alpha1
+kind: Dataset
+metadata:
+  name: "edge2-mistnet-on-mnist-dataset"
+spec:
+  url: "/data/2.txt"
+  format: "txt"
+  nodeName: $EDGE2_NODE
+EOF
+```
+
+#### Create Model
+
+create the directory `/model` in the host of `$EDGE1_NODE`
+```
+mkdir /model
+```
+create the directory `/model` in the host of `$EDGE2_NODE`
+```
+mkdir /model
+```
+
+```
+TODO: put pretrained model on nodes.
+```
+
+create model
+```
+kubectl create -f - <<EOF
+apiVersion: sedna.io/v1alpha1
+kind: Model
+metadata:
+  name: "mistnet-on-mnist-pretrained-model"
+spec:
+  url: "/model"
+  format: "pb"
+EOF
+```
+
+#### Start Federated Learning Job
+
+```
+kubectl create -f - <<EOF
+apiVersion: sedna.io/v1alpha1
+kind: FederatedLearningJob
+metadata:
+  name: mistnet-on-mnist-dataset
+spec:
+  stopCondition:
+    operator: "or" # and
+      conditions:
+        - operator: ">"
+          threshold: 100
+          metric: rounds
+        - operator: ">"
+          threshold: 0.95
+          metric: targetAccuracy
+        - operator: "<"
+          threshold: 0.03
+          metric: deltaLoss
+  aggregationTrigger:
+    condition:
+      operator: ">"
+      threshold: 5
+      metric: num_of_ready_clients
+  aggregationWorker:
+    model:
+      name: "mistnet-on-mnist-model"
+    template:
+      spec:
+        nodeName: $CLOUD_NODE
+        containers:
+          - image: kubeedge/sedna-example-federated-learning-mistnet-on-mnist-dataset-aggregation:v0.4.0
+            name:  agg-worker
+            imagePullPolicy: IfNotPresent
+            env: # user defined environments
+              - name: "cut_layer"
+                value: "4"
+              - name: "epsilon"
+                value: "100"
+              - name: "aggregation_algorithm"
+                value: "mistnet"
+              - name: "batch_size"
+                value: "10"
+            resources:  # user defined resources
+              limits:
+                memory: 2Gi
+  trainingWorkers:
+    - dataset:
+        name: "edge1-surface-defect-detection-dataset"
+      template:
+        spec:
+          nodeName: $EDGE1_NODE
+          containers:
+            - image: kubeedge/sedna-example-federated-learning-mistnet-on-mnist-dataset-train:v0.4.0
+              name:  train-worker
+              imagePullPolicy: IfNotPresent
+              env:  # user defined environments
+                - name: "batch_size"
+                  value: "32"
+                - name: "learning_rate"
+                  value: "0.001"
+                - name: "epochs"
+                  value: "2"
+              resources:  # user defined resources
+                limits:
+                  memory: 2Gi
+    - dataset:
+        name: "edge2-surface-defect-detection-dataset"
+      template:
+        spec:
+          nodeName: $EDGE2_NODE
+          containers:
+            - image: kubeedge/sedna-example-federated-learning-mistnet-on-mnist-dataset-train:v0.4.0
+              name:  train-worker
+              imagePullPolicy: IfNotPresent
+              env:  # user defined environments
+                - name: "batch_size"
+                  value: "32"
+                - name: "learning_rate"
+                  value: "0.001"
+                - name: "epochs"
+                  value: "2"
+              resources:  # user defined resources
+                limits:
+                  memory: 2Gi
+EOF
+```
+
+```
+TODO: show the benifit of mistnet. for example, the compared results of fedavg & mistnet.
+
+```
+
+### Check Federated Learning Status
+
+```
+kubectl get federatedlearningjob surface-defect-detection
+```
+
+### Check Federated Learning Train Result
+After the job completed, you will find the model generated on the directory `/model` in `$EDGE1_NODE` and `$EDGE2_NODE`.
diff --git a/pkg/apis/sedna/v1alpha1/common_types.go b/pkg/apis/sedna/v1alpha1/common_types.go
@@ -27,3 +27,9 @@ type ParaSpec struct {
 	Key   string `json:"key"`
 	Value string `json:"value"`
 }
+
+type Condition struct {
+	Operator  string  `json:"operator"`
+	Threshold float64 `json:"threshold"`
+	Metric    string  `json:"metric"`
+}
diff --git a/pkg/apis/sedna/v1alpha1/federatedlearningjob_types.go b/pkg/apis/sedna/v1alpha1/federatedlearningjob_types.go
@@ -37,10 +37,35 @@ type FederatedLearningJob struct {
 
 // FLJobSpec is a description of a federatedlearning job
 type FLJobSpec struct {
+    StopCondition     Condition     `json:"stopCondition,omitempty"`
+    Transmitter       Transmitter       `json:"transmitter,omitempty"`
+    AggregationTrigger  AggregationTrigger `json:"aggregationTrigger,omitempty"`
 	AggregationWorker AggregationWorker `json:"aggregationWorker"`
 	TrainingWorkers   []TrainingWorker  `json:"trainingWorkers"`
 }
 
+// StopCondition describes the exit conditions for federated learning job
+type StopCondition struct {
+    CompositeOperator    CompositeOperator      `json:"operator"`
+}
+
+type CompositeOperator struct {
+   Conditions  []Condition     `json:"conditions"`
+}
+
+type Transmitter struct {
+    TransmitterAlgorithms   []TransmitterAlgorithm  `json:transmitterAlgorithms`
+}
+
+type TransmitterAlgorithm struct {
+    Name    string  `json:"name"`
+    Parameters []ParaSpec `json:"parameters,omitempty"`
+}
+
+type AggregationTrigger struct {
+    Condition   Condition   `json:"condition"`
+}
+
 // AggregationWorker describes the data an aggregation worker should have
 type AggregationWorker struct {
 	Model    modelRefer         `json:"model"`

diff --git a/pkg/apis/sedna/v1alpha1/incrementallearningjob_types.go b/pkg/apis/sedna/v1alpha1/incrementallearningjob_types.go
@@ -79,11 +79,6 @@ type Timer struct {
 	End   string `json:"end"`
 }
 
-type Condition struct {
-	Operator  string  `json:"operator"`
-	Threshold float64 `json:"threshold"`
-	Metric    string  `json:"metric"`
-}
 
 type ILDataset struct {
 	Name      string  `json:"name"`