From 959880fae9b5e86dc157c37ac16a53462c326179 Mon Sep 17 00:00:00 2001 From: ABD-01 Date: Thu, 5 Aug 2021 10:55:19 +0000 Subject: [PATCH 01/16] Added LFW Dataset --- torchvision/datasets/lfw.py | 260 ++++++++++++++++++++++++++++++++++++ 1 file changed, 260 insertions(+) create mode 100644 torchvision/datasets/lfw.py diff --git a/torchvision/datasets/lfw.py b/torchvision/datasets/lfw.py new file mode 100644 index 00000000000..069d1cfa361 --- /dev/null +++ b/torchvision/datasets/lfw.py @@ -0,0 +1,260 @@ +import os +from typing import Any, Callable, Optional, Tuple +from PIL import Image +from torchvision.datasets import VisionDataset +from torchvision.datasets.utils import check_integrity, download_and_extract_archive, download_url, verify_str_arg + + +class LFW_People(VisionDataset): + """`LFW `_ Dataset. + Args: + root (string): Root directory of dataset where directory + ``lfw-py`` exists or will be saved to if download is set to True. + train (bool, optional): If True, creates dataset from "DevTrain" set, otherwise + creates from "DevTest" set. + image_set (str, optional): Type of image funneling to use, ``lfw``, ``lfw-funneled`` or + ``lfw-deepfunneled``. Defaults to ``lfw``. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + download (bool, optional): If true, downloads the dataset from the internet and + puts it in root directory. If dataset is already downloaded, it is not + downloaded again. + """ + + base_folder = 'lfw-py' + download_url_prefix = "http://vis-www.cs.umass.edu/lfw/" + + file_dict = { + 'lfw': ("lfw.tgz", "a17d05bd522c52d84eca14327a23d494"), + 'lfw_funneled': ("lfw-funneled.tgz", "1b42dfed7d15c9b2dd63d5e5840c86ad"), + 'lfw-deepfunneled': ("lfw-deepfunneled.tgz", "68331da3eb755a505a502b5aacb3c201") + } + + peopleDevTrain = " peopleDevTrain.txt" + peopleDevTest = " peopleDevTest.txt" + + def __init__( + self, + root: str, + train: bool = True, + image_set: str = "lfw", + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + download: bool = False, + ): + super(LFW_People, self).__init__(os.path.join(root, self.base_folder), + transform=transform, target_transform=target_transform) + + self.filename, self.md5 = self.file_dict[verify_str_arg(image_set.lower(), 'image_set', self.file_dict.keys())] + + if download: + self.download() + + if not self._check_integrity(): + raise RuntimeError('Dataset not found or corrupted.' + + ' You can use download=True to download it') + + self.images_dir = os.path.join(self.root, image_set) + + if train: + self.split = "Train" + else: + self.split = "Test" + download_url(f"{self.download_url_prefix}peopleDev{self.split}.txt", self.root) + self.people_file = os.path.join(self.root, f"peopleDev{self.split}.txt") + + self.cls_to_names, self.data, self.targets = self._get_people(self.images_dir, self.people_file) + + def _get_people(self, images_dir, people_file): + with open(people_file, 'r') as f: + lines = f.readlines() + n_lines = int(lines[0]) + people = [line.strip().split("\t") for line in lines[1: n_lines + 1]] + + cls_to_names = [] + data = [] + targets = [] + for cls, (identity, num_imgs) in enumerate(people): + cls_to_names.append(identity) + for num in range(1, int(num_imgs) + 1): + img = os.path.join(images_dir, identity, "{}_{:04d}.jpg".format( + identity, num)) + if os.path.exists(img): + data.append(img) + targets.append(cls) + + return cls_to_names, data, targets + + def __len__(self): + return self.data.__len__() + + def __getitem__(self, index: int) -> Tuple[Any, Any]: + img = self.loader(self.data[index]) + target = self.targets[index] + + if self.transform is not None: + img = self.transform(img) + + if self.target_transform is not None: + target = self.target_transform(target) + + return img, target + + def loader(self, path: str) -> Image.Image: + with open(path, 'rb') as f: + img = Image.open(f) + return img.convert('RGB') + + def _check_integrity(self): + fpath = os.path.join(self.root, self.filename) + if not check_integrity(fpath, self.md5): + return False + return True + + def download(self): + if self._check_integrity(): + print('Files already downloaded and verified') + return + url = f"{self.download_url_prefix}{self.filename}" + download_and_extract_archive(url, self.root, filename=self.filename, md5=self.md5) + # download_url("http://vis-www.cs.umass.edu/lfw/lfw-names.txt", self.root) + + def extra_repr(self) -> str: + return "Split: {} \nNo. of classes: {}".format(self.split, len(self.cls_to_names)) + + +class LFW_Pairs(VisionDataset): + """`LFW `_ Dataset. + Args: + root (string): Root directory of dataset where directory + ``lfw-py`` exists or will be saved to if download is set to True. + train (bool, optional): If True, creates dataset from "DevTrain" set, otherwise + creates from "DevTest" set. + image_set (str, optional): Type of image funneling to use, ``lfw``, ``lfw-funneled`` or + ``lfw-deepfunneled``. Defaults to ``lfw``. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + download (bool, optional): If true, downloads the dataset from the internet and + puts it in root directory. If dataset is already downloaded, it is not + downloaded again. + """ + + base_folder = 'lfw-py' + download_url_prefix = "http://vis-www.cs.umass.edu/lfw/" + + file_dict = { + 'lfw': ("lfw.tgz", "a17d05bd522c52d84eca14327a23d494"), + 'lfw_funneled': ("lfw-funneled.tgz", "1b42dfed7d15c9b2dd63d5e5840c86ad"), + 'lfw-deepfunneled': ("lfw-deepfunneled.tgz", "68331da3eb755a505a502b5aacb3c201") + } + + pairsDevTrain = "pairsDevTrain.txt" + pairsDevTest = "pairsDevTest.txt" + + def __init__( + self, + root: str, + train: bool = True, + image_set: str = "lfw", + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + download: bool = False, + ): + super(LFW_Pairs, self).__init__(os.path.join(root, self.base_folder), + transform=transform, target_transform=target_transform) + + self.filename, self.md5 = self.file_dict[verify_str_arg(image_set.lower(), 'image_set', self.file_dict.keys())] + + if download: + self.download() + + if not self._check_integrity(): + raise RuntimeError('Dataset not found or corrupted.' + + ' You can use download=True to download it') + + self.images_dir = os.path.join(self.root, image_set) + + if train: + self.split = "Train" + else: + self.split = "Test" + download_url(f"{self.download_url_prefix}pairsDev{self.split}.txt", self.root) + self.pairs_file = os.path.join(self.root, f"pairsDev{self.split}.txt") + + self.pair_names, self.data, self.targets = self._get_pairs(self.images_dir, self.pairs_file) + + def _get_pairs(self, images_dir, pairs_file): + with open(pairs_file, 'r') as f: + lines = f.readlines() + n_pairs = int(lines[0]) + matched_pairs = [line.strip().split("\t") for line in lines[1: n_pairs + 1]] + unmatched_pairs = [line.strip().split("\t") for line in lines[n_pairs + 1: 2 * n_pairs + 1]] + + pair_names = [] + data = [] + targets = [] + for pair in matched_pairs: + img1 = os.path.join(images_dir, pair[0], "{}_{:04d}.jpg".format( + pair[0], int(pair[1]))) + img2 = os.path.join(images_dir, pair[0], "{}_{:04d}.jpg".format( + pair[0], int(pair[2]))) + same = 1 # same = True + if os.path.exists(img1) and os.path.exists(img2): + pair_names.append((pair[0], pair[0])) + data.append((img1, img2)) + targets.append(same) + for pair in unmatched_pairs: + img1 = os.path.join(images_dir, pair[0], "{}_{:04d}.jpg".format( + pair[0], int(pair[1]))) + img2 = os.path.join(images_dir, pair[2], "{}_{:04d}.jpg".format( + pair[2], int(pair[3]))) + same = 0 # same = False + if os.path.exists(img1) and os.path.exists(img2): + pair_names.append((pair[0], pair[2])) + data.append((img1, img2)) + targets.append(same) + + return pair_names, data, targets + + def __len__(self): + return self.data.__len__() + + def __getitem__(self, index: int) -> Tuple[Any, Any]: + pass + img1, img2 = self.data[index] + img1, img2 = self.loader(img1), self.loader(img2) + target = self.targets[index] + + if self.transform is not None: + img1, img2 = self.transform(img1), self.transform(img2) + + if self.target_transform is not None: + target = self.target_transform(target) + + return (img1, img2), target + + def loader(self, path: str) -> Image.Image: + with open(path, 'rb') as f: + img = Image.open(f) + return img.convert('RGB') + + def _check_integrity(self): + fpath = os.path.join(self.root, self.filename) + if not check_integrity(fpath, self.md5): + return False + return True + + def download(self): + if self._check_integrity(): + print('Files already downloaded and verified') + return + url = f"{self.download_url_prefix}{self.filename}" + download_and_extract_archive(url, self.root, filename=self.filename, md5=self.md5) + # download_url("http://vis-www.cs.umass.edu/lfw/lfw-names.txt", self.root) + + def extra_repr(self) -> str: + return "Split: {}".format(self.split) From ac4b4ad4b6563276b1d123c1068988ddd06ec39d Mon Sep 17 00:00:00 2001 From: ABD-01 Date: Mon, 23 Aug 2021 05:06:33 +0000 Subject: [PATCH 02/16] Added dataset to list in __init__.py --- torchvision/datasets/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/torchvision/datasets/__init__.py b/torchvision/datasets/__init__.py index 3b4a5408ecf..b4298486491 100644 --- a/torchvision/datasets/__init__.py +++ b/torchvision/datasets/__init__.py @@ -26,6 +26,7 @@ from .places365 import Places365 from .kitti import Kitti from .inaturalist import INaturalist +from .lfw import LFWPeople, LFWPairs __all__ = ('LSUN', 'LSUNClass', 'ImageFolder', 'DatasetFolder', 'FakeData', @@ -36,5 +37,5 @@ 'VOCSegmentation', 'VOCDetection', 'Cityscapes', 'ImageNet', 'Caltech101', 'Caltech256', 'CelebA', 'WIDERFace', 'SBDataset', 'VisionDataset', 'USPS', 'Kinetics400', "Kinetics", 'HMDB51', 'UCF101', - 'Places365', 'Kitti', "INaturalist" + 'Places365', 'Kitti', "INaturalist", "LFWPeople", "LFWPairs" ) From cfef8c8f816014a69d137542d81d895fbe2c6c04 Mon Sep 17 00:00:00 2001 From: ABD-01 Date: Mon, 23 Aug 2021 05:25:40 +0000 Subject: [PATCH 03/16] Updated lfw.py * Created a common superclass for people and pairs type datatsets * corrected the .download() method --- torchvision/datasets/lfw.py | 200 ++++++++++++++---------------------- 1 file changed, 79 insertions(+), 121 deletions(-) diff --git a/torchvision/datasets/lfw.py b/torchvision/datasets/lfw.py index 069d1cfa361..979610df456 100644 --- a/torchvision/datasets/lfw.py +++ b/torchvision/datasets/lfw.py @@ -5,49 +5,35 @@ from torchvision.datasets.utils import check_integrity, download_and_extract_archive, download_url, verify_str_arg -class LFW_People(VisionDataset): - """`LFW `_ Dataset. - Args: - root (string): Root directory of dataset where directory - ``lfw-py`` exists or will be saved to if download is set to True. - train (bool, optional): If True, creates dataset from "DevTrain" set, otherwise - creates from "DevTest" set. - image_set (str, optional): Type of image funneling to use, ``lfw``, ``lfw-funneled`` or - ``lfw-deepfunneled``. Defaults to ``lfw``. - transform (callable, optional): A function/transform that takes in an PIL image - and returns a transformed version. E.g, ``transforms.RandomCrop`` - target_transform (callable, optional): A function/transform that takes in the - target and transforms it. - download (bool, optional): If true, downloads the dataset from the internet and - puts it in root directory. If dataset is already downloaded, it is not - downloaded again. - """ +class _LFW(VisionDataset): base_folder = 'lfw-py' download_url_prefix = "http://vis-www.cs.umass.edu/lfw/" file_dict = { - 'lfw': ("lfw.tgz", "a17d05bd522c52d84eca14327a23d494"), - 'lfw_funneled': ("lfw-funneled.tgz", "1b42dfed7d15c9b2dd63d5e5840c86ad"), - 'lfw-deepfunneled': ("lfw-deepfunneled.tgz", "68331da3eb755a505a502b5aacb3c201") + 'original': ("lfw", "lfw.tgz", "a17d05bd522c52d84eca14327a23d494"), + 'funneled': ("lfw_funneled", "lfw-funneled.tgz", "1b42dfed7d15c9b2dd63d5e5840c86ad"), + 'deepfunneled': ("lfw-deepfunneled", "lfw-deepfunneled.tgz", "68331da3eb755a505a502b5aacb3c201") } - peopleDevTrain = " peopleDevTrain.txt" - peopleDevTest = " peopleDevTest.txt" - def __init__( self, root: str, - train: bool = True, - image_set: str = "lfw", + train: bool, + image_set: str, + view: str, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False, ): - super(LFW_People, self).__init__(os.path.join(root, self.base_folder), - transform=transform, target_transform=target_transform) + super(_LFW, self).__init__(os.path.join(root, self.base_folder), + transform=transform, target_transform=target_transform) - self.filename, self.md5 = self.file_dict[verify_str_arg(image_set.lower(), 'image_set', self.file_dict.keys())] + image_set = verify_str_arg(image_set.lower(), 'image_set', self.file_dict.keys()) + images_dir, self.filename, self.md5 = self.file_dict[image_set] + + self.view = verify_str_arg(view.lower(), 'view', ['people', 'pairs']) + self.split = "Train" if train else "Test" if download: self.download() @@ -56,15 +42,60 @@ def __init__( raise RuntimeError('Dataset not found or corrupted.' + ' You can use download=True to download it') - self.images_dir = os.path.join(self.root, image_set) + self.images_dir = os.path.join(self.root, images_dir) - if train: - self.split = "Train" - else: - self.split = "Test" - download_url(f"{self.download_url_prefix}peopleDev{self.split}.txt", self.root) - self.people_file = os.path.join(self.root, f"peopleDev{self.split}.txt") + def loader(self, path: str) -> Image.Image: + with open(path, 'rb') as f: + img = Image.open(f) + return img.convert('RGB') + + def _check_integrity(self): + fpath = os.path.join(self.root, self.filename) + fname = os.path.join(self.root, f"{self.view}Dev{self.split}.txt") + if not check_integrity(fpath, self.md5) or not check_integrity(fname): + return False + return True + + def download(self): + if self._check_integrity(): + print('Files already downloaded and verified') + return + url = f"{self.download_url_prefix}{self.filename}" + download_and_extract_archive(url, self.root, filename=self.filename, md5=self.md5) + download_url(f"{self.download_url_prefix}{self.view}Dev{self.split}.txt", self.root) + + +class LFWPeople(_LFW): + """`LFW `_ Dataset. + Args: + root (string): Root directory of dataset where directory + ``lfw-py`` exists or will be saved to if download is set to True. + train (bool, optional): If True, creates dataset from "DevTrain" set, otherwise + creates from "DevTest" set. + image_set (str, optional): Type of image funneling to use, ``original``, ``funneled`` or + ``deepfunneled``. Defaults to ``original``. + transform (callable, optional): A function/transform that takes in an PIL image + and returns a transformed version. E.g, ``transforms.RandomCrop`` + target_transform (callable, optional): A function/transform that takes in the + target and transforms it. + download (bool, optional): If true, downloads the dataset from the internet and + puts it in root directory. If dataset is already downloaded, it is not + downloaded again. + """ + def __init__( + self, + root: str, + train: bool = True, + image_set: str = "original", + transform: Optional[Callable] = None, + target_transform: Optional[Callable] = None, + download: bool = False, + ): + super(LFWPeople, self).__init__(root, train, image_set, "people", + transform, target_transform, download) + + self.people_file = os.path.join(self.root, f"peopleDev{self.split}.txt") self.cls_to_names, self.data, self.targets = self._get_people(self.images_dir, self.people_file) def _get_people(self, images_dir, people_file): @@ -79,8 +110,7 @@ def _get_people(self, images_dir, people_file): for cls, (identity, num_imgs) in enumerate(people): cls_to_names.append(identity) for num in range(1, int(num_imgs) + 1): - img = os.path.join(images_dir, identity, "{}_{:04d}.jpg".format( - identity, num)) + img = os.path.join(images_dir, identity, f"{identity}_{num:04d}.jpg") if os.path.exists(img): data.append(img) targets.append(cls) @@ -88,7 +118,7 @@ def _get_people(self, images_dir, people_file): return cls_to_names, data, targets def __len__(self): - return self.data.__len__() + return len(self.data) def __getitem__(self, index: int) -> Tuple[Any, Any]: img = self.loader(self.data[index]) @@ -102,38 +132,19 @@ def __getitem__(self, index: int) -> Tuple[Any, Any]: return img, target - def loader(self, path: str) -> Image.Image: - with open(path, 'rb') as f: - img = Image.open(f) - return img.convert('RGB') - - def _check_integrity(self): - fpath = os.path.join(self.root, self.filename) - if not check_integrity(fpath, self.md5): - return False - return True - - def download(self): - if self._check_integrity(): - print('Files already downloaded and verified') - return - url = f"{self.download_url_prefix}{self.filename}" - download_and_extract_archive(url, self.root, filename=self.filename, md5=self.md5) - # download_url("http://vis-www.cs.umass.edu/lfw/lfw-names.txt", self.root) - def extra_repr(self) -> str: return "Split: {} \nNo. of classes: {}".format(self.split, len(self.cls_to_names)) -class LFW_Pairs(VisionDataset): +class LFWPairs(_LFW): """`LFW `_ Dataset. Args: root (string): Root directory of dataset where directory ``lfw-py`` exists or will be saved to if download is set to True. train (bool, optional): If True, creates dataset from "DevTrain" set, otherwise creates from "DevTest" set. - image_set (str, optional): Type of image funneling to use, ``lfw``, ``lfw-funneled`` or - ``lfw-deepfunneled``. Defaults to ``lfw``. + image_set (str, optional): Type of image funneling to use, ``original``, ``funneled`` or + ``deepfunneled``. Defaults to ``original``. transform (callable, optional): A function/transform that takes in an PIL image and returns a transformed version. E.g, ``transforms.RandomCrop`` target_transform (callable, optional): A function/transform that takes in the @@ -143,48 +154,19 @@ class LFW_Pairs(VisionDataset): downloaded again. """ - base_folder = 'lfw-py' - download_url_prefix = "http://vis-www.cs.umass.edu/lfw/" - - file_dict = { - 'lfw': ("lfw.tgz", "a17d05bd522c52d84eca14327a23d494"), - 'lfw_funneled': ("lfw-funneled.tgz", "1b42dfed7d15c9b2dd63d5e5840c86ad"), - 'lfw-deepfunneled': ("lfw-deepfunneled.tgz", "68331da3eb755a505a502b5aacb3c201") - } - - pairsDevTrain = "pairsDevTrain.txt" - pairsDevTest = "pairsDevTest.txt" - def __init__( self, root: str, train: bool = True, - image_set: str = "lfw", + image_set: str = "original", transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False, ): - super(LFW_Pairs, self).__init__(os.path.join(root, self.base_folder), - transform=transform, target_transform=target_transform) + super(LFWPairs, self).__init__(root, train, image_set, "pairs", + transform, target_transform, download) - self.filename, self.md5 = self.file_dict[verify_str_arg(image_set.lower(), 'image_set', self.file_dict.keys())] - - if download: - self.download() - - if not self._check_integrity(): - raise RuntimeError('Dataset not found or corrupted.' + - ' You can use download=True to download it') - - self.images_dir = os.path.join(self.root, image_set) - - if train: - self.split = "Train" - else: - self.split = "Test" - download_url(f"{self.download_url_prefix}pairsDev{self.split}.txt", self.root) self.pairs_file = os.path.join(self.root, f"pairsDev{self.split}.txt") - self.pair_names, self.data, self.targets = self._get_pairs(self.images_dir, self.pairs_file) def _get_pairs(self, images_dir, pairs_file): @@ -198,20 +180,16 @@ def _get_pairs(self, images_dir, pairs_file): data = [] targets = [] for pair in matched_pairs: - img1 = os.path.join(images_dir, pair[0], "{}_{:04d}.jpg".format( - pair[0], int(pair[1]))) - img2 = os.path.join(images_dir, pair[0], "{}_{:04d}.jpg".format( - pair[0], int(pair[2]))) + img1 = os.path.join(images_dir, pair[0], f"{pair[0]}_{int(pair[1]):04d}.jpg") + img2 = os.path.join(images_dir, pair[0], f"{pair[0]}_{int(pair[2]):04d}.jpg") same = 1 # same = True if os.path.exists(img1) and os.path.exists(img2): pair_names.append((pair[0], pair[0])) data.append((img1, img2)) targets.append(same) for pair in unmatched_pairs: - img1 = os.path.join(images_dir, pair[0], "{}_{:04d}.jpg".format( - pair[0], int(pair[1]))) - img2 = os.path.join(images_dir, pair[2], "{}_{:04d}.jpg".format( - pair[2], int(pair[3]))) + img1 = os.path.join(images_dir, pair[0], f"{pair[0]}_{int(pair[1]):04d}.jpg") + img2 = os.path.join(images_dir, pair[2], f"{pair[2]}_{int(pair[3]):04d}.jpg") same = 0 # same = False if os.path.exists(img1) and os.path.exists(img2): pair_names.append((pair[0], pair[2])) @@ -221,10 +199,9 @@ def _get_pairs(self, images_dir, pairs_file): return pair_names, data, targets def __len__(self): - return self.data.__len__() + return len(self.data) def __getitem__(self, index: int) -> Tuple[Any, Any]: - pass img1, img2 = self.data[index] img1, img2 = self.loader(img1), self.loader(img2) target = self.targets[index] @@ -237,24 +214,5 @@ def __getitem__(self, index: int) -> Tuple[Any, Any]: return (img1, img2), target - def loader(self, path: str) -> Image.Image: - with open(path, 'rb') as f: - img = Image.open(f) - return img.convert('RGB') - - def _check_integrity(self): - fpath = os.path.join(self.root, self.filename) - if not check_integrity(fpath, self.md5): - return False - return True - - def download(self): - if self._check_integrity(): - print('Files already downloaded and verified') - return - url = f"{self.download_url_prefix}{self.filename}" - download_and_extract_archive(url, self.root, filename=self.filename, md5=self.md5) - # download_url("http://vis-www.cs.umass.edu/lfw/lfw-names.txt", self.root) - def extra_repr(self) -> str: return "Split: {}".format(self.split) From c7bf4aeb4e186d1600324c1f9bf14477c9dd48e7 Mon Sep 17 00:00:00 2001 From: ABD-01 Date: Thu, 26 Aug 2021 17:48:14 +0530 Subject: [PATCH 04/16] Added docstrings and updated datasets.rst --- docs/source/datasets.rst | 11 +++++++++++ torchvision/datasets/lfw.py | 23 +++++++++++++++++------ 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/docs/source/datasets.rst b/docs/source/datasets.rst index 90cc4458d7d..050622625ec 100644 --- a/docs/source/datasets.rst +++ b/docs/source/datasets.rst @@ -147,6 +147,17 @@ KMNIST .. autoclass:: KMNIST +LFW +~~~~~ + +.. autoclass:: LFWPeople + :members: __getitem__ + :special-members: + +.. autoclass:: LFWPairs + :members: __getitem__ + :special-members: + LSUN ~~~~ diff --git a/torchvision/datasets/lfw.py b/torchvision/datasets/lfw.py index 979610df456..19ce1d70386 100644 --- a/torchvision/datasets/lfw.py +++ b/torchvision/datasets/lfw.py @@ -64,6 +64,9 @@ def download(self): download_and_extract_archive(url, self.root, filename=self.filename, md5=self.md5) download_url(f"{self.download_url_prefix}{self.view}Dev{self.split}.txt", self.root) + def __len__(self): + return len(self.data) + class LFWPeople(_LFW): """`LFW `_ Dataset. @@ -117,10 +120,14 @@ def _get_people(self, images_dir, people_file): return cls_to_names, data, targets - def __len__(self): - return len(self.data) - def __getitem__(self, index: int) -> Tuple[Any, Any]: + """ + Args: + index (int): Index + + Returns: + tuple: Tuple (image, target) where target is the identity of the person. + """ img = self.loader(self.data[index]) target = self.targets[index] @@ -198,10 +205,14 @@ def _get_pairs(self, images_dir, pairs_file): return pair_names, data, targets - def __len__(self): - return len(self.data) - def __getitem__(self, index: int) -> Tuple[Any, Any]: + """ + Args: + index (int): Index + + Returns: + tuple: (image1, image2, target) where target is `0` for different indentities and `1` for same identities. + """ img1, img2 = self.data[index] img1, img2 = self.loader(img1), self.loader(img2) target = self.targets[index] From 749308a3d8a3f77fc5e7dc668e8674d1ca33b7c2 Mon Sep 17 00:00:00 2001 From: ABD-01 Date: Sat, 28 Aug 2021 21:55:55 +0530 Subject: [PATCH 05/16] Wrote tests for LFWPeople and LFWPairs --- test/test_datasets.py | 64 ++++++++++++++++++++++++++++++++++++- torchvision/datasets/lfw.py | 7 ++-- 2 files changed, 67 insertions(+), 4 deletions(-) diff --git a/test/test_datasets.py b/test/test_datasets.py index 5b7eabc4cb1..cd456a01d67 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -719,7 +719,7 @@ class CocoDetectionTestCase(datasets_utils.ImageDatasetTestCase): _IMAGE_FOLDER = "images" _ANNOTATIONS_FOLDER = "annotations" _ANNOTATIONS_FILE = "annotations.json" - +` def dataset_args(self, tmpdir, config): tmpdir = pathlib.Path(tmpdir) root = tmpdir / self._IMAGE_FOLDER @@ -1790,5 +1790,67 @@ def test_targets(self): assert item[6] == i // 3 +class _LFWPeopleTestCase(datasets_utils.DatasetTestCase): + DATASET_CLASS = datasets.LFWPeople + FEATURE_TYPES = (PIL.Image.Image, int) + DEFAULT_CONFIG = dict(image_set="deepfunneled") + ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( + train=(True, False), + image_set=('original', 'funneled', 'deepfunneled') + ) + _IMAGES_DIR = { + "original": "lfw", + "funneled": "lfw_funneled", + "deepfunneled": "lfw-deepfunneled" + } + + def inject_fake_data(self, tmpdir: str, config: Dict[str, Any]) -> Union[int, Dict[str, Any]]: + tmpdir = pathlib.Path(tmpdir) / "lfw-py" + os.makedirs(tmpdir, exist_ok=True) + split = "Train" if config["train"] else "Test" + self._create_images_dir(tmpdir, self._IMAGES_DIR[config["image_set"]], split) + + def _create_images_dir(self, root, idir, split): + num_people = 5 + with open(pathlib.Path(root) / f"peopleDev{split}.txt", "w") as file: + file.write(num_people) + for i in num_people: + name = datasets_utils.create_random_string(random.randint(5, 7)) + '_' \ + + datasets_utils.create_random_string(random.randint(4, 7)) + no = random.randint(1, 100) + file.write(f"\n{name}\t{no}") + datasets_utils.create_image_file(os.path.join(root, idir), f"{name}_{no:04d}.jpg", 250) + + return num_people + + +class _LFWPairsTestCase(datasets_utils.DatasetTestCase): + DATASET_CLASS = datasets.LFWPairs + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, int) + + def _create_images_dir(self, root, idir, split): + num_pairs = 7 # effectively 7*2 = 14 + with open(pathlib.Path(root) / f"pairsDev{split}.txt", "w") as file: + file.write(num_pairs) + for i in num_pairs: + name1 = datasets_utils.create_random_string(random.randint(5, 7)) + '_' \ + + datasets_utils.create_random_string(random.randint(4, 7)) + no1, no2 = random.randint(1, 100), random.randint(1, 100) + file.write(f"\n{name1}\t{no1}\t{no2}") + datasets_utils.create_image_file(os.path.join(root, idir), f"{name1}_{no1:04d}.jpg", 250) + datasets_utils.create_image_file(os.path.join(root, idir), f"{name1}_{no2:04d}.jpg", 250) + for i in num_pairs: + name1 = datasets_utils.create_random_string(random.randint(5, 7)) + '_' \ + + datasets_utils.create_random_string(random.randint(4, 7)) + name2 = datasets_utils.create_random_string(random.randint(5, 7)) + '_' \ + + datasets_utils.create_random_string(random.randint(4, 7)) + no1, no2 = random.randint(1, 100), random.randint(1, 100) + file.write(f"\n{name1}\t{no1}\t{name2}\t{no2}") + datasets_utils.create_image_file(os.path.join(root, idir), f"{name1}_{no1:04d}.jpg", 250) + datasets_utils.create_image_file(os.path.join(root, idir), f"{name2}_{no2:04d}.jpg", 250) + + return num_pairs * 2 + + if __name__ == "__main__": unittest.main() diff --git a/torchvision/datasets/lfw.py b/torchvision/datasets/lfw.py index 19ce1d70386..3abf0c67792 100644 --- a/torchvision/datasets/lfw.py +++ b/torchvision/datasets/lfw.py @@ -34,6 +34,7 @@ def __init__( self.view = verify_str_arg(view.lower(), 'view', ['people', 'pairs']) self.split = "Train" if train else "Test" + self.data = [] if download: self.download() @@ -44,7 +45,7 @@ def __init__( self.images_dir = os.path.join(self.root, images_dir) - def loader(self, path: str) -> Image.Image: + def _loader(self, path: str) -> Image.Image: with open(path, 'rb') as f: img = Image.open(f) return img.convert('RGB') @@ -128,7 +129,7 @@ def __getitem__(self, index: int) -> Tuple[Any, Any]: Returns: tuple: Tuple (image, target) where target is the identity of the person. """ - img = self.loader(self.data[index]) + img = self._loader(self.data[index]) target = self.targets[index] if self.transform is not None: @@ -214,7 +215,7 @@ def __getitem__(self, index: int) -> Tuple[Any, Any]: tuple: (image1, image2, target) where target is `0` for different indentities and `1` for same identities. """ img1, img2 = self.data[index] - img1, img2 = self.loader(img1), self.loader(img2) + img1, img2 = self._loader(img1), self._loader(img2) target = self.targets[index] if self.transform is not None: From 6590da3cb50a0b83897bbbe5b15a14f3f2f6d7e6 Mon Sep 17 00:00:00 2001 From: ABD-01 Date: Sun, 29 Aug 2021 00:17:53 +0530 Subject: [PATCH 06/16] Resolved mypy error: Need type annotation for "data" --- test/test_datasets.py | 2 +- torchvision/datasets/lfw.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test/test_datasets.py b/test/test_datasets.py index cd456a01d67..9b7d849e9ef 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -719,7 +719,7 @@ class CocoDetectionTestCase(datasets_utils.ImageDatasetTestCase): _IMAGE_FOLDER = "images" _ANNOTATIONS_FOLDER = "annotations" _ANNOTATIONS_FILE = "annotations.json" -` + def dataset_args(self, tmpdir, config): tmpdir = pathlib.Path(tmpdir) root = tmpdir / self._IMAGE_FOLDER diff --git a/torchvision/datasets/lfw.py b/torchvision/datasets/lfw.py index 3abf0c67792..2e71d37a25c 100644 --- a/torchvision/datasets/lfw.py +++ b/torchvision/datasets/lfw.py @@ -1,5 +1,5 @@ import os -from typing import Any, Callable, Optional, Tuple +from typing import Any, Callable, List, Optional, Tuple from PIL import Image from torchvision.datasets import VisionDataset from torchvision.datasets.utils import check_integrity, download_and_extract_archive, download_url, verify_str_arg @@ -34,7 +34,7 @@ def __init__( self.view = verify_str_arg(view.lower(), 'view', ['people', 'pairs']) self.split = "Train" if train else "Test" - self.data = [] + self.data: List[Any] = [] if download: self.download() From acb68ae90680ea1cbc4e395eb246988666f097c1 Mon Sep 17 00:00:00 2001 From: ABD-01 Date: Sun, 29 Aug 2021 00:36:56 +0530 Subject: [PATCH 07/16] Updated inject_fake_data method for LFWPeople --- test/test_datasets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_datasets.py b/test/test_datasets.py index 9b7d849e9ef..9c8027f787d 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -1804,7 +1804,7 @@ class _LFWPeopleTestCase(datasets_utils.DatasetTestCase): "deepfunneled": "lfw-deepfunneled" } - def inject_fake_data(self, tmpdir: str, config: Dict[str, Any]) -> Union[int, Dict[str, Any]]: + def inject_fake_data(self, tmpdir, config): tmpdir = pathlib.Path(tmpdir) / "lfw-py" os.makedirs(tmpdir, exist_ok=True) split = "Train" if config["train"] else "Test" From 10fffb098a0bf55b8482197b5d98044722daf2ec Mon Sep 17 00:00:00 2001 From: ABD-01 Date: Sun, 29 Aug 2021 13:31:09 +0530 Subject: [PATCH 08/16] Updated tests for LFW --- test/test_datasets.py | 52 ++++++++++++++++++++++++++----------------- 1 file changed, 31 insertions(+), 21 deletions(-) diff --git a/test/test_datasets.py b/test/test_datasets.py index 9c8027f787d..a166a9cb2dd 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -1790,7 +1790,7 @@ def test_targets(self): assert item[6] == i // 3 -class _LFWPeopleTestCase(datasets_utils.DatasetTestCase): +class LFWPeopleTestCase(datasets_utils.DatasetTestCase): DATASET_CLASS = datasets.LFWPeople FEATURE_TYPES = (PIL.Image.Image, int) DEFAULT_CONFIG = dict(image_set="deepfunneled") @@ -1808,46 +1808,56 @@ def inject_fake_data(self, tmpdir, config): tmpdir = pathlib.Path(tmpdir) / "lfw-py" os.makedirs(tmpdir, exist_ok=True) split = "Train" if config["train"] else "Test" - self._create_images_dir(tmpdir, self._IMAGES_DIR[config["image_set"]], split) + return dict( + num_examples=self._create_images_dir(tmpdir, self._IMAGES_DIR[config["image_set"]], split), + split=split + ) def _create_images_dir(self, root, idir, split): - num_people = 5 - with open(pathlib.Path(root) / f"peopleDev{split}.txt", "w") as file: - file.write(num_people) - for i in num_people: - name = datasets_utils.create_random_string(random.randint(5, 7)) + '_' \ - + datasets_utils.create_random_string(random.randint(4, 7)) - no = random.randint(1, 100) - file.write(f"\n{name}\t{no}") - datasets_utils.create_image_file(os.path.join(root, idir), f"{name}_{no:04d}.jpg", 250) + idir = os.path.join(root, idir) + os.makedirs(idir, exist_ok=True) + num_people = 4 + num_examples = 0 + flines = [str(num_people)] + for i in range(num_people): + name = datasets_utils.create_random_string(random.randint(5, 7)) + '_' \ + + datasets_utils.create_random_string(random.randint(4, 7)) + no = random.randint(1, 10) + flines.append(f"\n{name}\t{no}") + datasets_utils.create_image_folder(idir, name, lambda n: f"{name}_{n+1:04d}.jpg", no, 250) + num_examples += no + with open(pathlib.Path(root) / f"peopleDev{split}.txt", "w") as f: + f.writelines(flines) - return num_people + return num_examples -class _LFWPairsTestCase(datasets_utils.DatasetTestCase): +class LFWPairsTestCase(LFWPeopleTestCase): DATASET_CLASS = datasets.LFWPairs - FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, int) + FEATURE_TYPES = ((PIL.Image.Image, PIL.Image.Image), int) def _create_images_dir(self, root, idir, split): + idir = os.path.join(root, idir) + os.makedirs(idir, exist_ok=True) num_pairs = 7 # effectively 7*2 = 14 with open(pathlib.Path(root) / f"pairsDev{split}.txt", "w") as file: - file.write(num_pairs) - for i in num_pairs: + file.write(str(num_pairs)) + for i in range(num_pairs): name1 = datasets_utils.create_random_string(random.randint(5, 7)) + '_' \ + datasets_utils.create_random_string(random.randint(4, 7)) no1, no2 = random.randint(1, 100), random.randint(1, 100) file.write(f"\n{name1}\t{no1}\t{no2}") - datasets_utils.create_image_file(os.path.join(root, idir), f"{name1}_{no1:04d}.jpg", 250) - datasets_utils.create_image_file(os.path.join(root, idir), f"{name1}_{no2:04d}.jpg", 250) - for i in num_pairs: + datasets_utils.create_image_folder(idir, name1, lambda _: f"{name1}_{no1:04d}.jpg", 1, 250) + datasets_utils.create_image_folder(idir, name1, lambda _: f"{name1}_{no2:04d}.jpg", 1, 250) + for i in range(num_pairs): name1 = datasets_utils.create_random_string(random.randint(5, 7)) + '_' \ + datasets_utils.create_random_string(random.randint(4, 7)) name2 = datasets_utils.create_random_string(random.randint(5, 7)) + '_' \ + datasets_utils.create_random_string(random.randint(4, 7)) no1, no2 = random.randint(1, 100), random.randint(1, 100) file.write(f"\n{name1}\t{no1}\t{name2}\t{no2}") - datasets_utils.create_image_file(os.path.join(root, idir), f"{name1}_{no1:04d}.jpg", 250) - datasets_utils.create_image_file(os.path.join(root, idir), f"{name2}_{no2:04d}.jpg", 250) + datasets_utils.create_image_folder(idir, name1, lambda _: f"{name1}_{no1:04d}.jpg", 1, 250) + datasets_utils.create_image_folder(idir, name2, lambda _: f"{name2}_{no2:04d}.jpg", 1, 250) return num_pairs * 2 From 87197f54ab95d90e435b14cb9d4d3d427d590ad6 Mon Sep 17 00:00:00 2001 From: ABD-01 Date: Tue, 31 Aug 2021 17:23:48 +0000 Subject: [PATCH 09/16] Updated LFW tests and minor changes in lfw.py --- test/test_datasets.py | 45 +++++++++++++++++++------------------ torchvision/datasets/lfw.py | 12 ++++++---- 2 files changed, 31 insertions(+), 26 deletions(-) diff --git a/test/test_datasets.py b/test/test_datasets.py index a166a9cb2dd..e4ed2772291 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -1793,7 +1793,6 @@ def test_targets(self): class LFWPeopleTestCase(datasets_utils.DatasetTestCase): DATASET_CLASS = datasets.LFWPeople FEATURE_TYPES = (PIL.Image.Image, int) - DEFAULT_CONFIG = dict(image_set="deepfunneled") ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( train=(True, False), image_set=('original', 'funneled', 'deepfunneled') @@ -1820,8 +1819,7 @@ def _create_images_dir(self, root, idir, split): num_examples = 0 flines = [str(num_people)] for i in range(num_people): - name = datasets_utils.create_random_string(random.randint(5, 7)) + '_' \ - + datasets_utils.create_random_string(random.randint(4, 7)) + name = self._create_random_id() no = random.randint(1, 10) flines.append(f"\n{name}\t{no}") datasets_utils.create_image_folder(idir, name, lambda n: f"{name}_{n+1:04d}.jpg", no, 250) @@ -1831,36 +1829,39 @@ def _create_images_dir(self, root, idir, split): return num_examples + def _create_random_id(self): + return datasets_utils.create_random_string(random.randint(5, 7)) + '_' \ + + datasets_utils.create_random_string(random.randint(4, 7)) + class LFWPairsTestCase(LFWPeopleTestCase): DATASET_CLASS = datasets.LFWPairs - FEATURE_TYPES = ((PIL.Image.Image, PIL.Image.Image), int) + FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, int) def _create_images_dir(self, root, idir, split): idir = os.path.join(root, idir) os.makedirs(idir, exist_ok=True) num_pairs = 7 # effectively 7*2 = 14 - with open(pathlib.Path(root) / f"pairsDev{split}.txt", "w") as file: - file.write(str(num_pairs)) - for i in range(num_pairs): - name1 = datasets_utils.create_random_string(random.randint(5, 7)) + '_' \ - + datasets_utils.create_random_string(random.randint(4, 7)) - no1, no2 = random.randint(1, 100), random.randint(1, 100) - file.write(f"\n{name1}\t{no1}\t{no2}") - datasets_utils.create_image_folder(idir, name1, lambda _: f"{name1}_{no1:04d}.jpg", 1, 250) - datasets_utils.create_image_folder(idir, name1, lambda _: f"{name1}_{no2:04d}.jpg", 1, 250) - for i in range(num_pairs): - name1 = datasets_utils.create_random_string(random.randint(5, 7)) + '_' \ - + datasets_utils.create_random_string(random.randint(4, 7)) - name2 = datasets_utils.create_random_string(random.randint(5, 7)) + '_' \ - + datasets_utils.create_random_string(random.randint(4, 7)) - no1, no2 = random.randint(1, 100), random.randint(1, 100) - file.write(f"\n{name1}\t{no1}\t{name2}\t{no2}") - datasets_utils.create_image_folder(idir, name1, lambda _: f"{name1}_{no1:04d}.jpg", 1, 250) - datasets_utils.create_image_folder(idir, name2, lambda _: f"{name2}_{no2:04d}.jpg", 1, 250) + flines = [str(num_pairs)] + for i in range(num_pairs): + name1 = self._create_random_id() + no1, no2 = random.randint(1, 100), random.randint(1, 100) + flines.append(f"\n{name1}\t{no1}\t{no2}") + self._create_id_folder(idir, name1, no1, name1, no2) + for i in range(num_pairs): + name1, name2 = self._create_random_id(), self._create_random_id() + no1, no2 = random.randint(1, 100), random.randint(1, 100) + flines.append(f"\n{name1}\t{no1}\t{name2}\t{no2}") + self._create_id_folder(idir, name1, no1, name2, no2) + with open(pathlib.Path(root) / f"pairsDev{split}.txt", "w") as f: + f.writelines(flines) return num_pairs * 2 + def _create_id_folder(self, root, name1, no1, name2, no2): + datasets_utils.create_image_folder(root, name1, lambda _: f"{name1}_{no1:04d}.jpg", 1, 250) + datasets_utils.create_image_folder(root, name2, lambda _: f"{name2}_{no2:04d}.jpg", 1, 250) + if __name__ == "__main__": unittest.main() diff --git a/torchvision/datasets/lfw.py b/torchvision/datasets/lfw.py index 2e71d37a25c..f88e6f6b363 100644 --- a/torchvision/datasets/lfw.py +++ b/torchvision/datasets/lfw.py @@ -1,8 +1,8 @@ import os from typing import Any, Callable, List, Optional, Tuple from PIL import Image -from torchvision.datasets import VisionDataset -from torchvision.datasets.utils import check_integrity, download_and_extract_archive, download_url, verify_str_arg +from .vision import VisionDataset +from .utils import check_integrity, download_and_extract_archive, download_url, verify_str_arg class _LFW(VisionDataset): @@ -71,6 +71,7 @@ def __len__(self): class LFWPeople(_LFW): """`LFW `_ Dataset. + Args: root (string): Root directory of dataset where directory ``lfw-py`` exists or will be saved to if download is set to True. @@ -85,6 +86,7 @@ class LFWPeople(_LFW): download (bool, optional): If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again. + """ def __init__( @@ -146,6 +148,7 @@ def extra_repr(self) -> str: class LFWPairs(_LFW): """`LFW `_ Dataset. + Args: root (string): Root directory of dataset where directory ``lfw-py`` exists or will be saved to if download is set to True. @@ -160,6 +163,7 @@ class LFWPairs(_LFW): download (bool, optional): If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again. + """ def __init__( @@ -206,7 +210,7 @@ def _get_pairs(self, images_dir, pairs_file): return pair_names, data, targets - def __getitem__(self, index: int) -> Tuple[Any, Any]: + def __getitem__(self, index: int) -> Tuple[Any, Any, int]: """ Args: index (int): Index @@ -224,7 +228,7 @@ def __getitem__(self, index: int) -> Tuple[Any, Any]: if self.target_transform is not None: target = self.target_transform(target) - return (img1, img2), target + return img1, img2, target def extra_repr(self) -> str: return "Split: {}".format(self.split) From 754951786cdcaf29c3ab16ea673db9e4e24a56f2 Mon Sep 17 00:00:00 2001 From: ABD-01 Date: Sat, 4 Sep 2021 15:34:35 +0530 Subject: [PATCH 10/16] Updated LFW * Added functionality for 10-fold validation view * Optimized the code so to replace repeated lines by method in super class * Updated LFWPeople to get classes from all lfw-names.txt rather than just the classes fron trainset --- torchvision/datasets/lfw.py | 136 ++++++++++++++++++++---------------- 1 file changed, 77 insertions(+), 59 deletions(-) diff --git a/torchvision/datasets/lfw.py b/torchvision/datasets/lfw.py index f88e6f6b363..7f245d0efac 100644 --- a/torchvision/datasets/lfw.py +++ b/torchvision/datasets/lfw.py @@ -16,10 +16,13 @@ class _LFW(VisionDataset): 'deepfunneled': ("lfw-deepfunneled", "lfw-deepfunneled.tgz", "68331da3eb755a505a502b5aacb3c201") } + annot_file = {'10fold': '', 'train': 'DevTrain', 'test': 'DevTest'} + names = "lfw-names.txt" + def __init__( self, root: str, - train: bool, + split: str, image_set: str, view: str, transform: Optional[Callable] = None, @@ -33,7 +36,8 @@ def __init__( images_dir, self.filename, self.md5 = self.file_dict[image_set] self.view = verify_str_arg(view.lower(), 'view', ['people', 'pairs']) - self.split = "Train" if train else "Test" + self.split = verify_str_arg(split.lower(), 'split', ['10fold', 'train', 'test']) + self.labels_file = f"{self.view}{self.annot_file[self.split]}.txt" self.data: List[Any] = [] if download: @@ -51,10 +55,10 @@ def _loader(self, path: str) -> Image.Image: return img.convert('RGB') def _check_integrity(self): - fpath = os.path.join(self.root, self.filename) - fname = os.path.join(self.root, f"{self.view}Dev{self.split}.txt") - if not check_integrity(fpath, self.md5) or not check_integrity(fname): + if not check_integrity(os.path.join(self.root, self.filename), self.md5) or not check_integrity(os.path.join(self.root, self.labels_file)): return False + if self.view == "people": + return check_integrity(os.path.join(self.root, self.names)) return True def download(self): @@ -63,7 +67,15 @@ def download(self): return url = f"{self.download_url_prefix}{self.filename}" download_and_extract_archive(url, self.root, filename=self.filename, md5=self.md5) - download_url(f"{self.download_url_prefix}{self.view}Dev{self.split}.txt", self.root) + download_url(f"{self.download_url_prefix}{self.labels_file}", self.root) + if self.view == "people": + download_url(f"{self.download_url_prefix}{self.names}", self.root) + + def _get_path(self, identity, no): + return os.path.join(self.images_dir, identity, f"{identity}_{int(no):04d}.jpg") + + def extra_repr(self) -> str: + return "Split: {}".format(self.split) def __len__(self): return len(self.data) @@ -92,36 +104,43 @@ class LFWPeople(_LFW): def __init__( self, root: str, - train: bool = True, - image_set: str = "original", + split: str = "10fold", + image_set: str = "funneled", transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False, ): - super(LFWPeople, self).__init__(root, train, image_set, "people", + super(LFWPeople, self).__init__(root, split, image_set, "people", transform, target_transform, download) - self.people_file = os.path.join(self.root, f"peopleDev{self.split}.txt") - self.cls_to_names, self.data, self.targets = self._get_people(self.images_dir, self.people_file) + self.class_to_idx = self._get_classes() + self.data, self.targets = self._get_people() - def _get_people(self, images_dir, people_file): - with open(people_file, 'r') as f: + def _get_people(self): + with open(os.path.join(self.root, self.labels_file), 'r') as f: + lines = f.readlines() + n_folds, s = int(lines[0]), 1 if self.split == "10fold" else 1, 0 + + data, target = [], [] + for fold in range(n_folds): + n_lines = int(lines[s]) + people = [line.strip().split("\t") for line in lines[s + 1: s + n_lines + 2]] + s += n_lines + 2 + for i, (identity, num_imgs) in enumerate(people): + for num in range(1, int(num_imgs) + 1): + img = self._get_path(identity, num) + if os.path.exists(img): + data.append(img) + targets.append(self.class_to_idx[identity]) + + return data, targets + + def _get_classes(self): + with open(os.path.join(self.root, self.names), 'r') as f: lines = f.readlines() - n_lines = int(lines[0]) - people = [line.strip().split("\t") for line in lines[1: n_lines + 1]] - - cls_to_names = [] - data = [] - targets = [] - for cls, (identity, num_imgs) in enumerate(people): - cls_to_names.append(identity) - for num in range(1, int(num_imgs) + 1): - img = os.path.join(images_dir, identity, f"{identity}_{num:04d}.jpg") - if os.path.exists(img): - data.append(img) - targets.append(cls) - - return cls_to_names, data, targets + names = [line.strip().split()[0] for line in lines] + class_to_idx = {name: i for i, name in enumerate(names)} + return class_to_idx def __getitem__(self, index: int) -> Tuple[Any, Any]: """ @@ -143,7 +162,7 @@ def __getitem__(self, index: int) -> Tuple[Any, Any]: return img, target def extra_repr(self) -> str: - return "Split: {} \nNo. of classes: {}".format(self.split, len(self.cls_to_names)) + return "No. of classes: {}".format(len(self.class_to_idx)) class LFWPairs(_LFW): @@ -169,44 +188,43 @@ class LFWPairs(_LFW): def __init__( self, root: str, - train: bool = True, - image_set: str = "original", + split: str = "10fold", + image_set: str = "funneled", transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False, ): - super(LFWPairs, self).__init__(root, train, image_set, "pairs", + super(LFWPairs, self).__init__(root, split, image_set, "pairs", transform, target_transform, download) - self.pairs_file = os.path.join(self.root, f"pairsDev{self.split}.txt") - self.pair_names, self.data, self.targets = self._get_pairs(self.images_dir, self.pairs_file) + self.pair_names, self.data, self.targets = self._get_pairs(self.images_dir) - def _get_pairs(self, images_dir, pairs_file): - with open(pairs_file, 'r') as f: + def _get_pairs(self, images_dir): + with open(os.path.join(self.root, self.labels_file), 'r') as f: lines = f.readlines() - n_pairs = int(lines[0]) - matched_pairs = [line.strip().split("\t") for line in lines[1: n_pairs + 1]] - unmatched_pairs = [line.strip().split("\t") for line in lines[n_pairs + 1: 2 * n_pairs + 1]] - - pair_names = [] - data = [] - targets = [] - for pair in matched_pairs: - img1 = os.path.join(images_dir, pair[0], f"{pair[0]}_{int(pair[1]):04d}.jpg") - img2 = os.path.join(images_dir, pair[0], f"{pair[0]}_{int(pair[2]):04d}.jpg") - same = 1 # same = True - if os.path.exists(img1) and os.path.exists(img2): - pair_names.append((pair[0], pair[0])) - data.append((img1, img2)) - targets.append(same) - for pair in unmatched_pairs: - img1 = os.path.join(images_dir, pair[0], f"{pair[0]}_{int(pair[1]):04d}.jpg") - img2 = os.path.join(images_dir, pair[2], f"{pair[2]}_{int(pair[3]):04d}.jpg") - same = 0 # same = False - if os.path.exists(img1) and os.path.exists(img2): - pair_names.append((pair[0], pair[2])) - data.append((img1, img2)) - targets.append(same) + if self.split == "10fold": + n_folds, n_pairs = lines[0].split("\t") + n_folds, n_pairs = int(n_folds), int(n_pairs) + else: + n_folds, n_pairs = 1, int(lines[0]) + s = 1 + pair_names, data, targets = [], [], [] + for fold in range(n_folds): + matched_pairs = [line.strip().split("\t") for line in lines[s: s + n_pairs]] + unmatched_pairs = [line.strip().split("\t") for line in lines[s + n_pairs: s + (2 * n_pairs)]] + s += (2 * n_pairs) + for pair in matched_pairs: + img1, img2, same = self._get_path(pair[0], pair[1]), self._get_path(pair[0], pair[2]), 1 + if os.path.exists(img1) and os.path.exists(img2): + pair_names.append((pair[0], pair[0])) + data.append((img1, img2)) + targets.append(same) + for pair in unmatched_pairs: + img1, img2, same = self._get_path(pair[0], pair[1]), self._get_path(pair[2], pair[3]), 0 + if os.path.exists(img1) and os.path.exists(img2): + pair_names.append((pair[0], pair[2])) + data.append((img1, img2)) + targets.append(same) return pair_names, data, targets From c52c891deb6e39b0e4f6f4bfe1ddd7fb678551bd Mon Sep 17 00:00:00 2001 From: ABD-01 Date: Thu, 9 Sep 2021 17:21:31 +0530 Subject: [PATCH 11/16] Updated lfw.py and tests * Updated inject_fake_data method to create 10fold fake data * Minor changes in docstring and extra_repr --- test/test_datasets.py | 71 +++++++++++++++++++++---------------- torchvision/datasets/lfw.py | 38 ++++++++++---------- 2 files changed, 58 insertions(+), 51 deletions(-) diff --git a/test/test_datasets.py b/test/test_datasets.py index e4ed2772291..7e28f441b11 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -1794,7 +1794,7 @@ class LFWPeopleTestCase(datasets_utils.DatasetTestCase): DATASET_CLASS = datasets.LFWPeople FEATURE_TYPES = (PIL.Image.Image, int) ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( - train=(True, False), + split=('10fold', 'train', 'test'), image_set=('original', 'funneled', 'deepfunneled') ) _IMAGES_DIR = { @@ -1802,36 +1802,43 @@ class LFWPeopleTestCase(datasets_utils.DatasetTestCase): "funneled": "lfw_funneled", "deepfunneled": "lfw-deepfunneled" } + _split = {'10fold': '', 'train': 'DevTrain', 'test': 'DevTest'} def inject_fake_data(self, tmpdir, config): tmpdir = pathlib.Path(tmpdir) / "lfw-py" os.makedirs(tmpdir, exist_ok=True) - split = "Train" if config["train"] else "Test" return dict( - num_examples=self._create_images_dir(tmpdir, self._IMAGES_DIR[config["image_set"]], split), - split=split + num_examples=self._create_images_dir(tmpdir, self._IMAGES_DIR[config["image_set"]], config["split"]), + split=config["split"] ) def _create_images_dir(self, root, idir, split): idir = os.path.join(root, idir) os.makedirs(idir, exist_ok=True) - num_people = 4 + n, flines = (10, ["10\n"]) if split == "10fold" else (1, []) num_examples = 0 - flines = [str(num_people)] - for i in range(num_people): - name = self._create_random_id() - no = random.randint(1, 10) - flines.append(f"\n{name}\t{no}") - datasets_utils.create_image_folder(idir, name, lambda n: f"{name}_{n+1:04d}.jpg", no, 250) - num_examples += no - with open(pathlib.Path(root) / f"peopleDev{split}.txt", "w") as f: + names = [] + for _ in range(n): + num_people = random.randint(2, 5) + flines.append(f"{num_people}\n") + for i in range(num_people): + name = self._create_random_id() + no = random.randint(1, 10) + flines.append(f"{name}\t{no}\n") + names.append(f"{name}\t{no}\n") + datasets_utils.create_image_folder(idir, name, lambda n: f"{name}_{n+1:04d}.jpg", no, 250) + num_examples += no + with open(pathlib.Path(root) / f"people{self._split[split]}.txt", "w") as f: f.writelines(flines) + with open(pathlib.Path(root) / "lfw-names.txt", "w") as f: + f.writelines(names) return num_examples def _create_random_id(self): - return datasets_utils.create_random_string(random.randint(5, 7)) + '_' \ - + datasets_utils.create_random_string(random.randint(4, 7)) + part1 = datasets_utils.create_random_string(random.randint(5, 7)) + part2 = datasets_utils.create_random_string(random.randint(4, 7)) + return f"{part1}_{part2}" class LFWPairsTestCase(LFWPeopleTestCase): @@ -1841,26 +1848,28 @@ class LFWPairsTestCase(LFWPeopleTestCase): def _create_images_dir(self, root, idir, split): idir = os.path.join(root, idir) os.makedirs(idir, exist_ok=True) - num_pairs = 7 # effectively 7*2 = 14 - flines = [str(num_pairs)] + num_pairs = 7 # effectively 7*2*n = 14*n + n, self.flines = (10, [f"10\t{num_pairs}"]) if split == "10fold" else (1, [str(num_pairs)]) + for _ in range(n): + self._inject_pairs(idir, num_pairs, True) + self._inject_pairs(idir, num_pairs, False) + with open(pathlib.Path(root) / f"pairs{self._split[split]}.txt", "w") as f: + f.writelines(self.flines) + + return num_pairs * 2 * n + + def _inject_pairs(self, root, num_pairs, same): for i in range(num_pairs): name1 = self._create_random_id() + name2 = name1 if same else self._create_random_id() no1, no2 = random.randint(1, 100), random.randint(1, 100) - flines.append(f"\n{name1}\t{no1}\t{no2}") - self._create_id_folder(idir, name1, no1, name1, no2) - for i in range(num_pairs): - name1, name2 = self._create_random_id(), self._create_random_id() - no1, no2 = random.randint(1, 100), random.randint(1, 100) - flines.append(f"\n{name1}\t{no1}\t{name2}\t{no2}") - self._create_id_folder(idir, name1, no1, name2, no2) - with open(pathlib.Path(root) / f"pairsDev{split}.txt", "w") as f: - f.writelines(flines) - - return num_pairs * 2 + if same: + self.flines.append(f"\n{name1}\t{no1}\t{no2}") + else: + self.flines.append(f"\n{name1}\t{no1}\t{name2}\t{no2}") - def _create_id_folder(self, root, name1, no1, name2, no2): - datasets_utils.create_image_folder(root, name1, lambda _: f"{name1}_{no1:04d}.jpg", 1, 250) - datasets_utils.create_image_folder(root, name2, lambda _: f"{name2}_{no2:04d}.jpg", 1, 250) + datasets_utils.create_image_folder(root, name1, lambda _: f"{name1}_{no1:04d}.jpg", 1, 250) + datasets_utils.create_image_folder(root, name2, lambda _: f"{name2}_{no2:04d}.jpg", 1, 250) if __name__ == "__main__": diff --git a/torchvision/datasets/lfw.py b/torchvision/datasets/lfw.py index 7f245d0efac..440fa765848 100644 --- a/torchvision/datasets/lfw.py +++ b/torchvision/datasets/lfw.py @@ -32,8 +32,8 @@ def __init__( super(_LFW, self).__init__(os.path.join(root, self.base_folder), transform=transform, target_transform=target_transform) - image_set = verify_str_arg(image_set.lower(), 'image_set', self.file_dict.keys()) - images_dir, self.filename, self.md5 = self.file_dict[image_set] + self.image_set = verify_str_arg(image_set.lower(), 'image_set', self.file_dict.keys()) + images_dir, self.filename, self.md5 = self.file_dict[self.image_set] self.view = verify_str_arg(view.lower(), 'view', ['people', 'pairs']) self.split = verify_str_arg(split.lower(), 'split', ['10fold', 'train', 'test']) @@ -75,7 +75,7 @@ def _get_path(self, identity, no): return os.path.join(self.images_dir, identity, f"{identity}_{int(no):04d}.jpg") def extra_repr(self) -> str: - return "Split: {}".format(self.split) + return f"Alignment: {self.image_set}\nSplit: {self.split}" def __len__(self): return len(self.data) @@ -87,12 +87,12 @@ class LFWPeople(_LFW): Args: root (string): Root directory of dataset where directory ``lfw-py`` exists or will be saved to if download is set to True. - train (bool, optional): If True, creates dataset from "DevTrain" set, otherwise - creates from "DevTest" set. + split (string, optional): The image split to use. Can be one of ``train`` (default), ``test``, + ``10fold``. image_set (str, optional): Type of image funneling to use, ``original``, ``funneled`` or ``deepfunneled``. Defaults to ``original``. transform (callable, optional): A function/transform that takes in an PIL image - and returns a transformed version. E.g, ``transforms.RandomCrop`` + and returns a transformed version. E.g, ``transforms.RandomRotation`` target_transform (callable, optional): A function/transform that takes in the target and transforms it. download (bool, optional): If true, downloads the dataset from the internet and @@ -104,7 +104,7 @@ class LFWPeople(_LFW): def __init__( self, root: str, - split: str = "10fold", + split: str = "train", image_set: str = "funneled", transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, @@ -117,15 +117,15 @@ def __init__( self.data, self.targets = self._get_people() def _get_people(self): + data, targets = [], [] with open(os.path.join(self.root, self.labels_file), 'r') as f: lines = f.readlines() - n_folds, s = int(lines[0]), 1 if self.split == "10fold" else 1, 0 + n_folds, s = (int(lines[0]), 1) if self.split == "10fold" else (1, 0) - data, target = [], [] for fold in range(n_folds): n_lines = int(lines[s]) - people = [line.strip().split("\t") for line in lines[s + 1: s + n_lines + 2]] - s += n_lines + 2 + people = [line.strip().split("\t") for line in lines[s + 1: s + n_lines + 1]] + s += n_lines + 1 for i, (identity, num_imgs) in enumerate(people): for num in range(1, int(num_imgs) + 1): img = self._get_path(identity, num) @@ -162,7 +162,7 @@ def __getitem__(self, index: int) -> Tuple[Any, Any]: return img, target def extra_repr(self) -> str: - return "No. of classes: {}".format(len(self.class_to_idx)) + return super().extra_repr() + "\nClasses (identities): {}".format(len(self.class_to_idx)) class LFWPairs(_LFW): @@ -171,12 +171,12 @@ class LFWPairs(_LFW): Args: root (string): Root directory of dataset where directory ``lfw-py`` exists or will be saved to if download is set to True. - train (bool, optional): If True, creates dataset from "DevTrain" set, otherwise - creates from "DevTest" set. + split (string, optional): The image split to use. Can be one of ``train``, ``test``, + ``10fold``. Defaults to ``10fold``. image_set (str, optional): Type of image funneling to use, ``original``, ``funneled`` or ``deepfunneled``. Defaults to ``original``. transform (callable, optional): A function/transform that takes in an PIL image - and returns a transformed version. E.g, ``transforms.RandomCrop`` + and returns a transformed version. E.g, ``transforms.RandomRotation`` target_transform (callable, optional): A function/transform that takes in the target and transforms it. download (bool, optional): If true, downloads the dataset from the internet and @@ -200,6 +200,7 @@ def __init__( self.pair_names, self.data, self.targets = self._get_pairs(self.images_dir) def _get_pairs(self, images_dir): + pair_names, data, targets = [], [], [] with open(os.path.join(self.root, self.labels_file), 'r') as f: lines = f.readlines() if self.split == "10fold": @@ -208,7 +209,7 @@ def _get_pairs(self, images_dir): else: n_folds, n_pairs = 1, int(lines[0]) s = 1 - pair_names, data, targets = [], [], [] + for fold in range(n_folds): matched_pairs = [line.strip().split("\t") for line in lines[s: s + n_pairs]] unmatched_pairs = [line.strip().split("\t") for line in lines[s + n_pairs: s + (2 * n_pairs)]] @@ -246,7 +247,4 @@ def __getitem__(self, index: int) -> Tuple[Any, Any, int]: if self.target_transform is not None: target = self.target_transform(target) - return img1, img2, target - - def extra_repr(self) -> str: - return "Split: {}".format(self.split) + return img1, img2, target \ No newline at end of file From df96b442b301fbe32f1dd46163b2a4237596477b Mon Sep 17 00:00:00 2001 From: ABD-01 Date: Thu, 9 Sep 2021 18:50:44 +0530 Subject: [PATCH 12/16] resolved py lint errors --- test/test_datasets.py | 2 +- torchvision/datasets/lfw.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/test/test_datasets.py b/test/test_datasets.py index 7e28f441b11..32572c07469 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -1838,7 +1838,7 @@ def _create_images_dir(self, root, idir, split): def _create_random_id(self): part1 = datasets_utils.create_random_string(random.randint(5, 7)) part2 = datasets_utils.create_random_string(random.randint(4, 7)) - return f"{part1}_{part2}" + return f"{part1}_{part2}" class LFWPairsTestCase(LFWPeopleTestCase): diff --git a/torchvision/datasets/lfw.py b/torchvision/datasets/lfw.py index 440fa765848..60512ae14e9 100644 --- a/torchvision/datasets/lfw.py +++ b/torchvision/datasets/lfw.py @@ -55,7 +55,8 @@ def _loader(self, path: str) -> Image.Image: return img.convert('RGB') def _check_integrity(self): - if not check_integrity(os.path.join(self.root, self.filename), self.md5) or not check_integrity(os.path.join(self.root, self.labels_file)): + if not check_integrity(os.path.join(self.root, self.filename), self.md5) \ + or not check_integrity(os.path.join(self.root, self.labels_file)): return False if self.view == "people": return check_integrity(os.path.join(self.root, self.names)) @@ -209,7 +210,7 @@ def _get_pairs(self, images_dir): else: n_folds, n_pairs = 1, int(lines[0]) s = 1 - + for fold in range(n_folds): matched_pairs = [line.strip().split("\t") for line in lines[s: s + n_pairs]] unmatched_pairs = [line.strip().split("\t") for line in lines[s + n_pairs: s + (2 * n_pairs)]] @@ -247,4 +248,4 @@ def __getitem__(self, index: int) -> Tuple[Any, Any, int]: if self.target_transform is not None: target = self.target_transform(target) - return img1, img2, target \ No newline at end of file + return img1, img2, target From 4feed66b69bce440f4e77f40ebf309112b044421 Mon Sep 17 00:00:00 2001 From: ABD-01 Date: Thu, 9 Sep 2021 22:34:36 +0530 Subject: [PATCH 13/16] Added checksums for annotation files --- torchvision/datasets/lfw.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/torchvision/datasets/lfw.py b/torchvision/datasets/lfw.py index 60512ae14e9..5e063ed807b 100644 --- a/torchvision/datasets/lfw.py +++ b/torchvision/datasets/lfw.py @@ -15,7 +15,15 @@ class _LFW(VisionDataset): 'funneled': ("lfw_funneled", "lfw-funneled.tgz", "1b42dfed7d15c9b2dd63d5e5840c86ad"), 'deepfunneled': ("lfw-deepfunneled", "lfw-deepfunneled.tgz", "68331da3eb755a505a502b5aacb3c201") } - + checksums = { + 'pairs.txt': '9f1ba174e4e1c508ff7cdf10ac338a7d', + 'pairsDevTest.txt': '5132f7440eb68cf58910c8a45a2ac10b', + 'pairsDevTrain.txt': '4f27cbf15b2da4a85c1907eb4181ad21', + 'people.txt': '450f0863dd89e85e73936a6d71a3474b', + 'peopleDevTest.txt': 'e4bf5be0a43b5dcd9dc5ccfcb8fb19c5', + 'peopleDevTrain.txt': '54eaac34beb6d042ed3a7d883e247a21', + 'lfw-names.txt': 'a6d0a479bd074669f656265a6e693f6d' + } annot_file = {'10fold': '', 'train': 'DevTrain', 'test': 'DevTest'} names = "lfw-names.txt" @@ -55,11 +63,12 @@ def _loader(self, path: str) -> Image.Image: return img.convert('RGB') def _check_integrity(self): - if not check_integrity(os.path.join(self.root, self.filename), self.md5) \ - or not check_integrity(os.path.join(self.root, self.labels_file)): + st1 = check_integrity(os.path.join(self.root, self.filename), self.md5) + st2 = check_integrity(os.path.join(self.root, self.labels_file), self.checksums[self.labels_file]) + if not st1 or not st2: return False if self.view == "people": - return check_integrity(os.path.join(self.root, self.names)) + return check_integrity(os.path.join(self.root, self.names), self.checksums[self.names]) return True def download(self): From e9cb48e37c44d460a3219aecbf075b34adb46373 Mon Sep 17 00:00:00 2001 From: ABD-01 Date: Thu, 9 Sep 2021 23:30:25 +0530 Subject: [PATCH 14/16] Minor changes in test --- test/test_datasets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_datasets.py b/test/test_datasets.py index da5d0fd8df7..5e0ee2fafc9 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -1842,7 +1842,7 @@ def _create_images_dir(self, root, idir, split): with open(pathlib.Path(root) / f"people{self._split[split]}.txt", "w") as f: f.writelines(flines) with open(pathlib.Path(root) / "lfw-names.txt", "w") as f: - f.writelines(names) + f.writelines(sorted(names)) return num_examples From cc475cccd94ecac96492d8d0802ce10f7e6f1761 Mon Sep 17 00:00:00 2001 From: ABD-01 Date: Fri, 10 Sep 2021 16:59:11 +0530 Subject: [PATCH 15/16] Updated docstrings, defaults and minor changes in test --- test/test_datasets.py | 6 +++--- torchvision/datasets/lfw.py | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/test/test_datasets.py b/test/test_datasets.py index 5e0ee2fafc9..2f4662fbac9 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -1813,7 +1813,7 @@ class LFWPeopleTestCase(datasets_utils.DatasetTestCase): "funneled": "lfw_funneled", "deepfunneled": "lfw-deepfunneled" } - _split = {'10fold': '', 'train': 'DevTrain', 'test': 'DevTest'} + _file_id = {'10fold': '', 'train': 'DevTrain', 'test': 'DevTest'} def inject_fake_data(self, tmpdir, config): tmpdir = pathlib.Path(tmpdir) / "lfw-py" @@ -1839,7 +1839,7 @@ def _create_images_dir(self, root, idir, split): names.append(f"{name}\t{no}\n") datasets_utils.create_image_folder(idir, name, lambda n: f"{name}_{n+1:04d}.jpg", no, 250) num_examples += no - with open(pathlib.Path(root) / f"people{self._split[split]}.txt", "w") as f: + with open(pathlib.Path(root) / f"people{self._file_id[split]}.txt", "w") as f: f.writelines(flines) with open(pathlib.Path(root) / "lfw-names.txt", "w") as f: f.writelines(sorted(names)) @@ -1864,7 +1864,7 @@ def _create_images_dir(self, root, idir, split): for _ in range(n): self._inject_pairs(idir, num_pairs, True) self._inject_pairs(idir, num_pairs, False) - with open(pathlib.Path(root) / f"pairs{self._split[split]}.txt", "w") as f: + with open(pathlib.Path(root) / f"pairs{self._file_id[split]}.txt", "w") as f: f.writelines(self.flines) return num_pairs * 2 * n diff --git a/torchvision/datasets/lfw.py b/torchvision/datasets/lfw.py index 5e063ed807b..725c19f17e0 100644 --- a/torchvision/datasets/lfw.py +++ b/torchvision/datasets/lfw.py @@ -97,10 +97,10 @@ class LFWPeople(_LFW): Args: root (string): Root directory of dataset where directory ``lfw-py`` exists or will be saved to if download is set to True. - split (string, optional): The image split to use. Can be one of ``train`` (default), ``test``, - ``10fold``. + split (string, optional): The image split to use. Can be one of ``train``, ``test``, + ``10fold`` (default). image_set (str, optional): Type of image funneling to use, ``original``, ``funneled`` or - ``deepfunneled``. Defaults to ``original``. + ``deepfunneled``. Defaults to ``funneled``. transform (callable, optional): A function/transform that takes in an PIL image and returns a transformed version. E.g, ``transforms.RandomRotation`` target_transform (callable, optional): A function/transform that takes in the @@ -114,7 +114,7 @@ class LFWPeople(_LFW): def __init__( self, root: str, - split: str = "train", + split: str = "10fold", image_set: str = "funneled", transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, @@ -184,7 +184,7 @@ class LFWPairs(_LFW): split (string, optional): The image split to use. Can be one of ``train``, ``test``, ``10fold``. Defaults to ``10fold``. image_set (str, optional): Type of image funneling to use, ``original``, ``funneled`` or - ``deepfunneled``. Defaults to ``original``. + ``deepfunneled``. Defaults to ``funneled``. transform (callable, optional): A function/transform that takes in an PIL image and returns a transformed version. E.g, ``transforms.RandomRotation`` target_transform (callable, optional): A function/transform that takes in the From 146633480bff486b81fed99c2f3f626aa68eee6a Mon Sep 17 00:00:00 2001 From: ABD-01 Date: Fri, 10 Sep 2021 18:09:22 +0530 Subject: [PATCH 16/16] Removed 'os.path.exists' check --- torchvision/datasets/lfw.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/torchvision/datasets/lfw.py b/torchvision/datasets/lfw.py index 725c19f17e0..ecd5c3820ed 100644 --- a/torchvision/datasets/lfw.py +++ b/torchvision/datasets/lfw.py @@ -139,9 +139,8 @@ def _get_people(self): for i, (identity, num_imgs) in enumerate(people): for num in range(1, int(num_imgs) + 1): img = self._get_path(identity, num) - if os.path.exists(img): - data.append(img) - targets.append(self.class_to_idx[identity]) + data.append(img) + targets.append(self.class_to_idx[identity]) return data, targets @@ -226,16 +225,14 @@ def _get_pairs(self, images_dir): s += (2 * n_pairs) for pair in matched_pairs: img1, img2, same = self._get_path(pair[0], pair[1]), self._get_path(pair[0], pair[2]), 1 - if os.path.exists(img1) and os.path.exists(img2): - pair_names.append((pair[0], pair[0])) - data.append((img1, img2)) - targets.append(same) + pair_names.append((pair[0], pair[0])) + data.append((img1, img2)) + targets.append(same) for pair in unmatched_pairs: img1, img2, same = self._get_path(pair[0], pair[1]), self._get_path(pair[2], pair[3]), 0 - if os.path.exists(img1) and os.path.exists(img2): - pair_names.append((pair[0], pair[2])) - data.append((img1, img2)) - targets.append(same) + pair_names.append((pair[0], pair[2])) + data.append((img1, img2)) + targets.append(same) return pair_names, data, targets