From a726b7f02ee48e92e3c784a03670ee4c2446cf5a Mon Sep 17 00:00:00 2001
From: cuiboyuan <boyuanbryan.cui@mail.utoronto.ca>
Date: Sat, 7 May 2022 19:49:53 -0400
Subject: [PATCH 1/8] Add CelebA dataset and tests

---
 celeba_test.py              | 22 ++++++++++++++
 plato/datasources/celeba.py | 59 +++++++++++++++++++++++++++++++++++++
 2 files changed, 81 insertions(+)
 create mode 100644 celeba_test.py
 create mode 100644 plato/datasources/celeba.py

diff --git a/celeba_test.py b/celeba_test.py
new file mode 100644
index 000000000..f926e2a42
--- /dev/null
+++ b/celeba_test.py
@@ -0,0 +1,22 @@
+import torch
+from plato.datasources import celeba
+
+if __name__ == '__main__':
+    ds = celeba.DataSource()
+    total_num = 0
+    train_loader = torch.utils.data.DataLoader(ds.trainset, batch_size=1)
+    test_loader = torch.utils.data.DataLoader(ds.testset, batch_size=1)
+
+    all_data = None
+    for batch_id, (examples, labels) in enumerate(train_loader):
+        if all_data is None:
+            all_data = examples
+        else:
+            all_data = torch.cat((all_data, examples), 0)
+
+    for batch_id, (examples, labels) in enumerate(test_loader):
+        all_data = torch.cat((all_data, examples), 0)
+
+    print(all_data.shape)
+    print(torch.mean(all_data, [0, 2, 3]))
+    print(torch.std(all_data, [0, 2, 3]))
diff --git a/plato/datasources/celeba.py b/plato/datasources/celeba.py
new file mode 100644
index 000000000..b91a74c4a
--- /dev/null
+++ b/plato/datasources/celeba.py
@@ -0,0 +1,59 @@
+"""
+The CelebA dataset from the torchvision package.
+"""
+
+from torchvision import datasets, transforms
+
+import zipfile
+import os
+import logging
+from plato.config import Config
+from plato.datasources import base
+
+
+class CelebA(datasets.CelebA):
+
+    def _check_integrity(self):
+        return True
+
+
+class DataSource(base.DataSource):
+    """The CelebA dataset."""
+
+    def __init__(self):
+        super().__init__()
+        _path = Config().data.data_path
+
+        DataSource.download_celeba(_path)
+
+        _transform = transforms.Compose([transforms.ToTensor()])
+        self.trainset = CelebA(root=_path,
+                               split='train',
+                               target_type=['attr', 'identity'],
+                               download=False,
+                               transform=_transform)
+        self.testset = CelebA(root=_path,
+                              split='test',
+                              target_type=['attr', 'identity'],
+                              download=False,
+                              transform=_transform)
+
+    @staticmethod
+    def download_celeba(root_path):
+        """ Download and unzip all CelebA data points. """
+        datapath = os.path.join(root_path, "celeba")
+        filename = os.path.join(datapath, "img_align_celeba.zip")
+        extracted_path, _ = os.path.splitext(filename)
+        if not os.path.exists(extracted_path):
+            logging.info("Extracting all images in %s to %s.",
+                         "img_align_celeba.zip", extracted_path)
+            with zipfile.ZipFile(filename, 'r') as zip_ref:
+                zip_ref.extractall(datapath)
+        else:
+            logging.info("Path %s already exists.", extracted_path)
+
+    def num_train_examples(self):
+        return 162770
+
+    def num_test_examples(self):
+        return 19962

From 324c86945d6902f039c0141f0fabd3685c070557 Mon Sep 17 00:00:00 2001
From: cuiboyuan <boyuanbryan.cui@mail.utoronto.ca>
Date: Sat, 7 May 2022 21:46:43 -0400
Subject: [PATCH 2/8] Update CelebA impl and remove temp tests

---
 celeba_test.py              | 22 -------------
 plato/datasources/celeba.py | 62 +++++++++++++++++--------------------
 2 files changed, 29 insertions(+), 55 deletions(-)
 delete mode 100644 celeba_test.py

diff --git a/celeba_test.py b/celeba_test.py
deleted file mode 100644
index f926e2a42..000000000
--- a/celeba_test.py
+++ /dev/null
@@ -1,22 +0,0 @@
-import torch
-from plato.datasources import celeba
-
-if __name__ == '__main__':
-    ds = celeba.DataSource()
-    total_num = 0
-    train_loader = torch.utils.data.DataLoader(ds.trainset, batch_size=1)
-    test_loader = torch.utils.data.DataLoader(ds.testset, batch_size=1)
-
-    all_data = None
-    for batch_id, (examples, labels) in enumerate(train_loader):
-        if all_data is None:
-            all_data = examples
-        else:
-            all_data = torch.cat((all_data, examples), 0)
-
-    for batch_id, (examples, labels) in enumerate(test_loader):
-        all_data = torch.cat((all_data, examples), 0)
-
-    print(all_data.shape)
-    print(torch.mean(all_data, [0, 2, 3]))
-    print(torch.std(all_data, [0, 2, 3]))
diff --git a/plato/datasources/celeba.py b/plato/datasources/celeba.py
index b91a74c4a..588c8c012 100644
--- a/plato/datasources/celeba.py
+++ b/plato/datasources/celeba.py
@@ -2,19 +2,25 @@
 The CelebA dataset from the torchvision package.
 """
 
+from typing import Callable, List, Optional, Union
 from torchvision import datasets, transforms
-
-import zipfile
-import os
-import logging
 from plato.config import Config
 from plato.datasources import base
 
 
 class CelebA(datasets.CelebA):
 
-    def _check_integrity(self):
-        return True
+    def __init__(self,
+                 root: str,
+                 split: str = "train",
+                 target_type: Union[List[str], str] = "attr",
+                 transform: Optional[Callable] = None,
+                 target_transform: Optional[Callable] = None,
+                 download: bool = False) -> None:
+        super().__init__(root, split, target_type, transform, target_transform,
+                         download)
+        self.targets = self.identity.flatten().tolist()
+        self.classes = [f'Celebrity #{i}' for i in range(10177 + 1)]
 
 
 class DataSource(base.DataSource):
@@ -24,33 +30,23 @@ def __init__(self):
         super().__init__()
         _path = Config().data.data_path
 
-        DataSource.download_celeba(_path)
-
-        _transform = transforms.Compose([transforms.ToTensor()])
-        self.trainset = CelebA(root=_path,
-                               split='train',
-                               target_type=['attr', 'identity'],
-                               download=False,
-                               transform=_transform)
-        self.testset = CelebA(root=_path,
-                              split='test',
-                              target_type=['attr', 'identity'],
-                              download=False,
-                              transform=_transform)
-
-    @staticmethod
-    def download_celeba(root_path):
-        """ Download and unzip all CelebA data points. """
-        datapath = os.path.join(root_path, "celeba")
-        filename = os.path.join(datapath, "img_align_celeba.zip")
-        extracted_path, _ = os.path.splitext(filename)
-        if not os.path.exists(extracted_path):
-            logging.info("Extracting all images in %s to %s.",
-                         "img_align_celeba.zip", extracted_path)
-            with zipfile.ZipFile(filename, 'r') as zip_ref:
-                zip_ref.extractall(datapath)
-        else:
-            logging.info("Path %s already exists.", extracted_path)
+        _transform = transforms.Compose([
+            transforms.ToTensor(),
+            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
+        ])
+
+        self.trainset = CelebA(
+            root=_path,
+            split='train',
+            target_type=['attr', 'identity', 'bbox', 'landmarks'],
+            download=True,
+            transform=_transform)
+        self.testset = CelebA(
+            root=_path,
+            split='test',
+            target_type=['attr', 'identity', 'bbox', 'landmarks'],
+            download=True,
+            transform=_transform)
 
     def num_train_examples(self):
         return 162770

From d3cf45934785a433d172b3fcf2c2848243fe9f6c Mon Sep 17 00:00:00 2001
From: cuiboyuan <boyuanbryan.cui@mail.utoronto.ca>
Date: Sun, 8 May 2022 13:12:16 -0400
Subject: [PATCH 3/8] Update CelebA transforms and add some docs

---
 plato/datasources/celeba.py | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/plato/datasources/celeba.py b/plato/datasources/celeba.py
index 588c8c012..20aaa7435 100644
--- a/plato/datasources/celeba.py
+++ b/plato/datasources/celeba.py
@@ -9,6 +9,11 @@
 
 
 class CelebA(datasets.CelebA):
+    """
+    A wrapper class of torchvision's CelebA dataset class
+    to add <targets> and <classes> attributes as celebrity
+    identity, which is used for non-IID samplers.
+    """
 
     def __init__(self,
                  root: str,
@@ -30,23 +35,24 @@ def __init__(self):
         super().__init__()
         _path = Config().data.data_path
 
+        image_size = 64
         _transform = transforms.Compose([
+            transforms.Resize(image_size),
+            transforms.CenterCrop(image_size),
             transforms.ToTensor(),
             transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
         ])
 
-        self.trainset = CelebA(
-            root=_path,
-            split='train',
-            target_type=['attr', 'identity', 'bbox', 'landmarks'],
-            download=True,
-            transform=_transform)
-        self.testset = CelebA(
-            root=_path,
-            split='test',
-            target_type=['attr', 'identity', 'bbox', 'landmarks'],
-            download=True,
-            transform=_transform)
+        self.trainset = CelebA(root=_path,
+                               split='train',
+                               target_type=['attr', 'identity'],
+                               download=True,
+                               transform=_transform)
+        self.testset = CelebA(root=_path,
+                              split='test',
+                              target_type=['attr', 'identity'],
+                              download=True,
+                              transform=_transform)
 
     def num_train_examples(self):
         return 162770

From 59111b540e33ac13b0cb814a9edbed5f4ea6d5d6 Mon Sep 17 00:00:00 2001
From: cuiboyuan <boyuanbryan.cui@mail.utoronto.ca>
Date: Sun, 8 May 2022 14:38:09 -0400
Subject: [PATCH 4/8] Add CelebA to registry and add target transform to CelebA
 dataset

---
 plato/datasources/celeba.py   | 26 ++++++++++++++++++++++++--
 plato/datasources/registry.py |  4 +++-
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/plato/datasources/celeba.py b/plato/datasources/celeba.py
index 20aaa7435..500503e1c 100644
--- a/plato/datasources/celeba.py
+++ b/plato/datasources/celeba.py
@@ -2,6 +2,7 @@
 The CelebA dataset from the torchvision package.
 """
 
+import torch
 from typing import Callable, List, Optional, Union
 from torchvision import datasets, transforms
 from plato.config import Config
@@ -47,12 +48,33 @@ def __init__(self):
                                split='train',
                                target_type=['attr', 'identity'],
                                download=True,
-                               transform=_transform)
+                               transform=_transform,
+                               target_transform=DataSource._target_transform)
         self.testset = CelebA(root=_path,
                               split='test',
                               target_type=['attr', 'identity'],
                               download=True,
-                              transform=_transform)
+                              transform=_transform,
+                              target_transform=DataSource._target_transform)
+
+    @staticmethod
+    def _target_transform(label):
+        """
+        Output labels are in a tuple of tensors if specified more
+        than one target types, so we need to convert the tuple to
+        tensors. Here, we just merge two tensors by adding identity
+        as the 41st attribute
+        """
+        attr, identity = label
+        return torch.cat((attr.reshape([
+            -1,
+        ]), identity.reshape([
+            -1,
+        ])))
+
+    @staticmethod
+    def input_shape():
+        return [162770, 2, 64, 64]
 
     def num_train_examples(self):
         return 162770
diff --git a/plato/datasources/registry.py b/plato/datasources/registry.py
index 0b97642fd..d1e3498fd 100644
--- a/plato/datasources/registry.py
+++ b/plato/datasources/registry.py
@@ -29,7 +29,8 @@
 else:
     from plato.datasources import (mnist, fashion_mnist, emnist, cifar10,
                                    cinic10, huggingface, pascal_voc,
-                                   tiny_imagenet, femnist, feature, qoenflx)
+                                   tiny_imagenet, femnist, feature, qoenflx,
+                                   celeba)
 
     registered_datasources = OrderedDict([
         ('MNIST', mnist),
@@ -42,6 +43,7 @@
         ('TinyImageNet', tiny_imagenet),
         ('Feature', feature),
         ('QoENFLX', qoenflx),
+        ('CelebA', celeba),
     ])
 
     registered_partitioned_datasources = OrderedDict([('FEMNIST', femnist)])

From 96df41e86267bc66226aaf1fbcfad45713eadc05 Mon Sep 17 00:00:00 2001
From: cuiboyuan <boyuanbryan.cui@mail.utoronto.ca>
Date: Sun, 8 May 2022 21:48:02 -0400
Subject: [PATCH 5/8] Add yaml file for CelebA testing and update ResNet to
 support custom number of classes

---
 configs/CelebA/fedavg_resnet18.yml | 74 ++++++++++++++++++++++++++++++
 plato/datasources/celeba.py        | 36 +++++++++++----
 plato/models/resnet.py             | 16 +++++--
 3 files changed, 111 insertions(+), 15 deletions(-)
 create mode 100644 configs/CelebA/fedavg_resnet18.yml

diff --git a/configs/CelebA/fedavg_resnet18.yml b/configs/CelebA/fedavg_resnet18.yml
new file mode 100644
index 000000000..d5160c2d2
--- /dev/null
+++ b/configs/CelebA/fedavg_resnet18.yml
@@ -0,0 +1,74 @@
+clients:
+    # Type
+    type: simple
+
+    # The total number of clients
+    total_clients: 3
+
+    # The number of clients selected in each round
+    per_round: 1
+
+    # Should the clients compute test accuracy locally?
+    do_test: false
+
+server:
+    address: 127.0.0.1
+    port: 8000
+
+data:
+    # The training and testing dataset
+    datasource: CelebA
+
+    # Only add face identity as labels for training
+    celeba_targets:
+        attr: false
+        identity: true
+    
+    # Number of identity in CelebA
+    num_classes: 10178
+
+    # Where the dataset is located
+    data_path: ./data
+
+    # Number of samples in each partition
+    partition_size: 20000
+
+    # IID or non-IID?
+    sampler: iid
+
+    # The concentration parameter for the Dirichlet distribution
+    concentration: 0.5
+
+    # The random seed for sampling data
+    random_seed: 1
+
+trainer:
+    # The type of the trainer
+    type: basic
+
+    # The maximum number of training rounds
+    rounds: 5
+
+    # Whether the training should use multiple GPUs if available
+    parallelized: false
+
+    # The maximum number of clients running concurrently
+    max_concurrency: 3
+
+    # The target accuracy
+    target_accuracy: 0.94
+
+    # Number of epoches for local training in each communication round
+    epochs: 5
+    batch_size: 32
+    optimizer: SGD
+    learning_rate: 0.01
+    momentum: 0.9
+    weight_decay: 0.0
+
+    # The machine learning model
+    model_name: resnet_18
+
+algorithm:
+    # Aggregation algorithm
+    type: fedavg
diff --git a/plato/datasources/celeba.py b/plato/datasources/celeba.py
index 500503e1c..3640dde9d 100644
--- a/plato/datasources/celeba.py
+++ b/plato/datasources/celeba.py
@@ -36,7 +36,17 @@ def __init__(self):
         super().__init__()
         _path = Config().data.data_path
 
-        image_size = 64
+        target_types = []
+        if hasattr(Config().data, "celeba_targets"):
+            targets = Config().data.celeba_targets
+            if hasattr(targets, "attr") and targets.attr:
+                target_types.append("attr")
+            if hasattr(targets, "identity") and targets.identity:
+                target_types.append("identity")
+        else:
+            target_types = ['attr', 'identity']
+
+        image_size = 32
         _transform = transforms.Compose([
             transforms.Resize(image_size),
             transforms.CenterCrop(image_size),
@@ -46,13 +56,13 @@ def __init__(self):
 
         self.trainset = CelebA(root=_path,
                                split='train',
-                               target_type=['attr', 'identity'],
+                               target_type=target_types,
                                download=True,
                                transform=_transform,
                                target_transform=DataSource._target_transform)
         self.testset = CelebA(root=_path,
                               split='test',
-                              target_type=['attr', 'identity'],
+                              target_type=target_types,
                               download=True,
                               transform=_transform,
                               target_transform=DataSource._target_transform)
@@ -65,16 +75,22 @@ def _target_transform(label):
         tensors. Here, we just merge two tensors by adding identity
         as the 41st attribute
         """
-        attr, identity = label
-        return torch.cat((attr.reshape([
-            -1,
-        ]), identity.reshape([
-            -1,
-        ])))
+        if isinstance(label, tuple):
+            if len(label) == 1:
+                return label[0]
+            elif len(label) == 2:
+                attr, identity = label
+                return torch.cat((attr.reshape([
+                    -1,
+                ]), identity.reshape([
+                    -1,
+                ])))
+        else:
+            return label
 
     @staticmethod
     def input_shape():
-        return [162770, 2, 64, 64]
+        return [162770, 3, 32, 32]
 
     def num_train_examples(self):
         return 162770
diff --git a/plato/models/resnet.py b/plato/models/resnet.py
index 7d712d6b8..f15b7df76 100644
--- a/plato/models/resnet.py
+++ b/plato/models/resnet.py
@@ -9,6 +9,8 @@
 import torch.nn as nn
 import torch.nn.functional as F
 
+from plato.config import Config
+
 
 class BasicBlock(nn.Module):
     expansion = 1
@@ -176,13 +178,17 @@ def get_model(model_type):
 
         resnet_type = int(model_type.split('_')[1])
 
+        num_classes = 10
+        if hasattr(Config().data, 'num_classes'):
+            num_classes = Config().data.num_classes
+
         if resnet_type == 18:
-            return Model(BasicBlock, [2, 2, 2, 2])
+            return Model(BasicBlock, [2, 2, 2, 2], num_classes)
         elif resnet_type == 34:
-            return Model(BasicBlock, [3, 4, 6, 3])
+            return Model(BasicBlock, [3, 4, 6, 3], num_classes)
         elif resnet_type == 50:
-            return Model(Bottleneck, [3, 4, 6, 3])
+            return Model(Bottleneck, [3, 4, 6, 3], num_classes)
         elif resnet_type == 101:
-            return Model(Bottleneck, [3, 4, 23, 3])
+            return Model(Bottleneck, [3, 4, 23, 3], num_classes)
         elif resnet_type == 152:
-            return Model(Bottleneck, [3, 8, 36, 3])
+            return Model(Bottleneck, [3, 8, 36, 3], num_classes)

From af07466e7d99824bc8d27e1d9c6e74602ab80814 Mon Sep 17 00:00:00 2001
From: cuiboyuan <boyuanbryan.cui@mail.utoronto.ca>
Date: Thu, 12 May 2022 12:05:21 -0400
Subject: [PATCH 6/8] Download celeba from our own web server

---
 plato/datasources/celeba.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/plato/datasources/celeba.py b/plato/datasources/celeba.py
index 3640dde9d..521b2c076 100644
--- a/plato/datasources/celeba.py
+++ b/plato/datasources/celeba.py
@@ -1,10 +1,13 @@
 """
 The CelebA dataset from the torchvision package.
 """
+import logging
+import os
+from typing import Callable, List, Optional, Union
 
 import torch
-from typing import Callable, List, Optional, Union
 from torchvision import datasets, transforms
+
 from plato.config import Config
 from plato.datasources import base
 
@@ -36,6 +39,13 @@ def __init__(self):
         super().__init__()
         _path = Config().data.data_path
 
+        if not os.path.exists(os.path.join(_path, 'celeba')):
+            celeba_url = ''
+            DataSource.download(celeba_url, _path)
+        else:
+            logging.info("CelebA data already decompressed under %s",
+                         os.path.join(_path, 'celeba'))
+
         target_types = []
         if hasattr(Config().data, "celeba_targets"):
             targets = Config().data.celeba_targets
@@ -57,13 +67,13 @@ def __init__(self):
         self.trainset = CelebA(root=_path,
                                split='train',
                                target_type=target_types,
-                               download=True,
+                               download=False,
                                transform=_transform,
                                target_transform=DataSource._target_transform)
         self.testset = CelebA(root=_path,
                               split='test',
                               target_type=target_types,
-                              download=True,
+                              download=False,
                               transform=_transform,
                               target_transform=DataSource._target_transform)
 

From 0d9c713501148288e060f8a39866f4155b79686f Mon Sep 17 00:00:00 2001
From: cuiboyuan <boyuanbryan.cui@mail.utoronto.ca>
Date: Thu, 12 May 2022 12:34:55 -0400
Subject: [PATCH 7/8] Provide working CelebA download URL

---
 configs/CelebA/fedavg_resnet18.yml | 5 +++--
 plato/datasources/celeba.py        | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/configs/CelebA/fedavg_resnet18.yml b/configs/CelebA/fedavg_resnet18.yml
index d5160c2d2..ee3db1e3c 100644
--- a/configs/CelebA/fedavg_resnet18.yml
+++ b/configs/CelebA/fedavg_resnet18.yml
@@ -21,6 +21,7 @@ data:
 
     # Only add face identity as labels for training
     celeba_targets:
+        # For ResNet, do not set <attr> to True since it does not match the expected output of ResNet
         attr: false
         identity: true
     
@@ -34,7 +35,7 @@ data:
     partition_size: 20000
 
     # IID or non-IID?
-    sampler: iid
+    sampler: noniid
 
     # The concentration parameter for the Dirichlet distribution
     concentration: 0.5
@@ -50,7 +51,7 @@ trainer:
     rounds: 5
 
     # Whether the training should use multiple GPUs if available
-    parallelized: false
+    parallelized: true
 
     # The maximum number of clients running concurrently
     max_concurrency: 3
diff --git a/plato/datasources/celeba.py b/plato/datasources/celeba.py
index 521b2c076..d94f23173 100644
--- a/plato/datasources/celeba.py
+++ b/plato/datasources/celeba.py
@@ -40,7 +40,7 @@ def __init__(self):
         _path = Config().data.data_path
 
         if not os.path.exists(os.path.join(_path, 'celeba')):
-            celeba_url = ''
+            celeba_url = 'http://iqua.ece.toronto.edu/baochun/celeba.tar.gz'
             DataSource.download(celeba_url, _path)
         else:
             logging.info("CelebA data already decompressed under %s",

From 1e8d332fbe4851ce4ca854a5993d19f1896bd97f Mon Sep 17 00:00:00 2001
From: cuiboyuan <boyuanbryan.cui@mail.utoronto.ca>
Date: Thu, 12 May 2022 12:39:52 -0400
Subject: [PATCH 8/8] Update CINIC-10 download url

---
 configs/CINIC10/fedavg_vgg16.yml | 2 +-
 plato/datasources/cinic10.py     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/configs/CINIC10/fedavg_vgg16.yml b/configs/CINIC10/fedavg_vgg16.yml
index 575c6791e..05bcd77b4 100644
--- a/configs/CINIC10/fedavg_vgg16.yml
+++ b/configs/CINIC10/fedavg_vgg16.yml
@@ -23,7 +23,7 @@ data:
     data_path: ./data/CINIC-10
 
     #
-    download_url: https://iqua.ece.toronto.edu/~bli/CINIC-10.tar.gz
+    download_url: http://iqua.ece.toronto.edu/baochun/CINIC-10.tar.gz
     # Number of samples in each partition
     partition_size: 20000
 
diff --git a/plato/datasources/cinic10.py b/plato/datasources/cinic10.py
index 1d654f832..4c04c82d4 100644
--- a/plato/datasources/cinic10.py
+++ b/plato/datasources/cinic10.py
@@ -26,7 +26,7 @@ def __init__(self):
                 "Downloading the CINIC-10 dataset. This may take a while.")
             url = Config().data.download_url if hasattr(
                 Config().data, 'download_url'
-            ) else 'https://iqua.ece.toronto.edu/~bli/CINIC-10.tar.gz'
+            ) else 'http://iqua.ece.toronto.edu/baochun/CINIC-10.tar.gz'
             DataSource.download(url, _path)
 
         _transform = transforms.Compose([