add federated learning inplementation by plato

Signed-off-by: XinYao1994 <xyao@cs.hku.hk>
kubeedge · Sep 8, 2021 · 972af2a · 972af2a
1 parent cabdfe0
commit 972af2a
Show file tree

Hide file tree

Showing 19 changed files with 430 additions and 146 deletions.
diff --git a/build/crd-samples/sedna/federatedlearningjob_yolo_v1alpha1.yaml b/build/crd-samples/sedna/federatedlearningjob_yolo_v1alpha1.yaml
@@ -5,7 +5,7 @@ metadata:
 spec:
   pretrainedModel: # option
     name: "yolo-v5-pretrained-model"
-  transimitter: # option
+  transmitter: # option
     ws: { } # option, by default
     s3: # option, but at least one
       aggDataPath: "s3://sedna/fl/aggregation_data"
@@ -17,7 +17,7 @@ spec:
       spec:
         nodeName: "sedna-control-plane"
         containers:
-          - image: kubeedge/sedna-fl-aggregation:mistnetyolo
+          - image: kubeedge/sedna-example-federated-learning-mistnet-yolo-aggregator:v0.4.0
             name: agg-worker
             imagePullPolicy: IfNotPresent
             env: # user defined environments
@@ -28,21 +28,54 @@ spec:
               - name: "aggregation_algorithm"
                 value: "mistnet"
               - name: "batch_size"
+                value: "32"
             resources: # user defined resources
               limits:
                 memory: 8Gi
   trainingWorkers:
     - dataset:
-        name: "coco-dataset"
+        name: "coco-dataset-1"
       template:
         spec:
           nodeName: "edge-node"
           containers:
-            - image: kubeedge/sedna-fl-train:mistnetyolo
+            - image: kubeedge/sedna-example-federated-learning-mistnet-yolo-client:v0.4.0
               name: train-worker
               imagePullPolicy: IfNotPresent
               args: [ "-i", "1" ]
               env: # user defined environments
+                - name: "cut_layer"
+                  value: "4"
+                - name: "epsilon"
+                  value: "100"
+                - name: "aggregation_algorithm"
+                  value: "mistnet"
+                - name: "batch_size"
+                  value: "32"
+                - name: "learning_rate"
+                  value: "0.001"
+                - name: "epochs"
+                  value: "1"
+              resources: # user defined resources
+                limits:
+                  memory: 2Gi
+    - dataset:
+        name: "coco-dataset-2"
+      template:
+        spec:
+          nodeName: "edge-node"
+          containers:
+            - image: kubeedge/sedna-example-federated-learning-mistnet-yolo-client:v0.4.0
+              name: train-worker
+              imagePullPolicy: IfNotPresent
+              args: [ "-i", "2" ]
+              env: # user defined environments
+                - name: "cut_layer"
+                  value: "4"
+                - name: "epsilon"
+                  value: "100"
+                - name: "aggregation_algorithm"
+                  value: "mistnet"
                 - name: "batch_size"
                   value: "32"
                 - name: "learning_rate"

diff --git a/examples/build_image.sh b/examples/build_image.sh
@@ -17,11 +17,13 @@
 cd "$(dirname "${BASH_SOURCE[0]}")"
 
 IMAGE_REPO=${IMAGE_REPO:-kubeedge}
-IMAGE_TAG=${IMAGE_TAG:-v0.3.0}
+IMAGE_TAG=${IMAGE_TAG:-v0.4.0}
 
 EXAMPLE_REPO_PREFIX=${IMAGE_REPO}/sedna-example-
 
 dockerfiles=(
+federated-learning-mistnet-yolo-aggregator.Dockerfile
+federated-learning-mistnet-yolo-client.Dockerfile
 federated-learning-surface-defect-detection-aggregation.Dockerfile
 federated-learning-surface-defect-detection-train.Dockerfile
 incremental-learning-helmet-detection.Dockerfile

diff --git a/examples/federated-learning-mistnet-yolo-aggregator.Dockerfile b/examples/federated-learning-mistnet-yolo-aggregator.Dockerfile
@@ -0,0 +1,23 @@
+FROM tensorflow/tensorflow:1.15.4
+
+RUN apt update \
+  && apt install -y libgl1-mesa-glx git
+
+COPY ./lib/requirements.txt /home
+
+RUN python -m pip install --upgrade pip
+
+RUN pip install -r /home/requirements.txt
+
+ENV PYTHONPATH "/home/lib:/home/plato:/home/plato/packages/yolov5"
+
+COPY ./lib /home/lib
+RUN git clone https://github.com/TL-System/plato.git /home/plato
+
+RUN pip install -r /home/plato/requirements.txt
+RUN pip install -r /home/plato/packages/yolov5/requirements.txt
+
+WORKDIR /home/work
+COPY examples/federated_learning/yolov5_coco128_mistnet  /home/work/
+
+CMD ["/bin/sh", "-c", "ulimit -n 50000; python aggregate.py"]
diff --git a/examples/federated-learning-mistnet-yolo-client.Dockerfile b/examples/federated-learning-mistnet-yolo-client.Dockerfile
@@ -0,0 +1,23 @@
+FROM tensorflow/tensorflow:1.15.4
+
+RUN apt update \
+  && apt install -y libgl1-mesa-glx git
+
+COPY ./lib/requirements.txt /home
+
+RUN python -m pip install --upgrade pip
+
+RUN pip install -r /home/requirements.txt
+
+ENV PYTHONPATH "/home/lib:/home/plato:/home/plato/packages/yolov5"
+
+COPY ./lib /home/lib
+RUN git clone https://github.com/TL-System/plato.git /home/plato
+
+RUN pip install -r /home/plato/requirements.txt
+RUN pip install -r /home/plato/packages/yolov5/requirements.txt
+
+WORKDIR /home/work
+COPY examples/federated_learning/yolov5_coco128_mistnet   /home/work/
+
+ENTRYPOINT ["python", "train.py"]
diff --git a/examples/federated_learning/surface_defect_detection/training_worker/train.py b/examples/federated_learning/surface_defect_detection/training_worker/train.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 import os
 
 import numpy as np
@@ -74,6 +73,7 @@ def main():
         learning_rate=learning_rate,
         validation_split=validation_split
     )
+
     return train_jobs