Skip to content

Commit

Permalink
add federated learning inplementation by plato
Browse files Browse the repository at this point in the history
Signed-off-by: XinYao1994 <xyao@cs.hku.hk>
  • Loading branch information
XinYao1994 committed Sep 8, 2021
1 parent cabdfe0 commit 972af2a
Show file tree
Hide file tree
Showing 19 changed files with 430 additions and 146 deletions.
41 changes: 37 additions & 4 deletions build/crd-samples/sedna/federatedlearningjob_yolo_v1alpha1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ metadata:
spec:
pretrainedModel: # option
name: "yolo-v5-pretrained-model"
transimitter: # option
transmitter: # option
ws: { } # option, by default
s3: # option, but at least one
aggDataPath: "s3://sedna/fl/aggregation_data"
Expand All @@ -17,7 +17,7 @@ spec:
spec:
nodeName: "sedna-control-plane"
containers:
- image: kubeedge/sedna-fl-aggregation:mistnetyolo
- image: kubeedge/sedna-example-federated-learning-mistnet-yolo-aggregator:v0.4.0
name: agg-worker
imagePullPolicy: IfNotPresent
env: # user defined environments
Expand All @@ -28,21 +28,54 @@ spec:
- name: "aggregation_algorithm"
value: "mistnet"
- name: "batch_size"
value: "32"
resources: # user defined resources
limits:
memory: 8Gi
trainingWorkers:
- dataset:
name: "coco-dataset"
name: "coco-dataset-1"
template:
spec:
nodeName: "edge-node"
containers:
- image: kubeedge/sedna-fl-train:mistnetyolo
- image: kubeedge/sedna-example-federated-learning-mistnet-yolo-client:v0.4.0
name: train-worker
imagePullPolicy: IfNotPresent
args: [ "-i", "1" ]
env: # user defined environments
- name: "cut_layer"
value: "4"
- name: "epsilon"
value: "100"
- name: "aggregation_algorithm"
value: "mistnet"
- name: "batch_size"
value: "32"
- name: "learning_rate"
value: "0.001"
- name: "epochs"
value: "1"
resources: # user defined resources
limits:
memory: 2Gi
- dataset:
name: "coco-dataset-2"
template:
spec:
nodeName: "edge-node"
containers:
- image: kubeedge/sedna-example-federated-learning-mistnet-yolo-client:v0.4.0
name: train-worker
imagePullPolicy: IfNotPresent
args: [ "-i", "2" ]
env: # user defined environments
- name: "cut_layer"
value: "4"
- name: "epsilon"
value: "100"
- name: "aggregation_algorithm"
value: "mistnet"
- name: "batch_size"
value: "32"
- name: "learning_rate"
Expand Down
4 changes: 3 additions & 1 deletion examples/build_image.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,13 @@
cd "$(dirname "${BASH_SOURCE[0]}")"

IMAGE_REPO=${IMAGE_REPO:-kubeedge}
IMAGE_TAG=${IMAGE_TAG:-v0.3.0}
IMAGE_TAG=${IMAGE_TAG:-v0.4.0}

EXAMPLE_REPO_PREFIX=${IMAGE_REPO}/sedna-example-

dockerfiles=(
federated-learning-mistnet-yolo-aggregator.Dockerfile
federated-learning-mistnet-yolo-client.Dockerfile
federated-learning-surface-defect-detection-aggregation.Dockerfile
federated-learning-surface-defect-detection-train.Dockerfile
incremental-learning-helmet-detection.Dockerfile
Expand Down
23 changes: 23 additions & 0 deletions examples/federated-learning-mistnet-yolo-aggregator.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
FROM tensorflow/tensorflow:1.15.4

RUN apt update \
&& apt install -y libgl1-mesa-glx git

COPY ./lib/requirements.txt /home

RUN python -m pip install --upgrade pip

RUN pip install -r /home/requirements.txt

ENV PYTHONPATH "/home/lib:/home/plato:/home/plato/packages/yolov5"

COPY ./lib /home/lib
RUN git clone https://github.com/TL-System/plato.git /home/plato

RUN pip install -r /home/plato/requirements.txt
RUN pip install -r /home/plato/packages/yolov5/requirements.txt

WORKDIR /home/work
COPY examples/federated_learning/yolov5_coco128_mistnet /home/work/

CMD ["/bin/sh", "-c", "ulimit -n 50000; python aggregate.py"]
23 changes: 23 additions & 0 deletions examples/federated-learning-mistnet-yolo-client.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
FROM tensorflow/tensorflow:1.15.4

RUN apt update \
&& apt install -y libgl1-mesa-glx git

COPY ./lib/requirements.txt /home

RUN python -m pip install --upgrade pip

RUN pip install -r /home/requirements.txt

ENV PYTHONPATH "/home/lib:/home/plato:/home/plato/packages/yolov5"

COPY ./lib /home/lib
RUN git clone https://github.com/TL-System/plato.git /home/plato

RUN pip install -r /home/plato/requirements.txt
RUN pip install -r /home/plato/packages/yolov5/requirements.txt

WORKDIR /home/work
COPY examples/federated_learning/yolov5_coco128_mistnet /home/work/

ENTRYPOINT ["python", "train.py"]
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os

import numpy as np
Expand Down Expand Up @@ -74,6 +73,7 @@ def main():
learning_rate=learning_rate,
validation_split=validation_split
)

return train_jobs


Expand Down
Loading

0 comments on commit 972af2a

Please sign in to comment.