Skip to content

Commit

Permalink
Merge pull request #24 from Xu-Justin/feature/pascal-voc-2007
Browse files Browse the repository at this point in the history
add pascal voc 2007 dataset
  • Loading branch information
Xu-Justin authored Nov 26, 2022
2 parents 09ea0c2 + e8b49f0 commit c4773c2
Show file tree
Hide file tree
Showing 5 changed files with 174 additions and 9 deletions.
83 changes: 76 additions & 7 deletions Streamlit/section.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os, sys
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

import patchmentation
from patchmentation.collections import Image, Dataset, ImagePatch, Patch
from patchmentation.utils import loader
from patchmentation.utils import transform
Expand All @@ -27,15 +28,21 @@
COCO_FOLDER_IMAGES = 'dataset/sample_format_coco/images/'
COCO_FILE_ANNOTATIONS = 'dataset/sample_format_coco/annotations/instances_default.json'

PASCAL_VOC_FOLDER_IMAGES = 'dataset/sample_format_pascal_voc/JPEGImages/'
PASCAL_VOC_FOLDER_ANNOTATIONS = 'dataset/sample_format_pascal_voc/Annotations/'
PASCAL_VOC_FILE_IMAGESETS = 'dataset/sample_format_pascal_voc/ImageSets/Main/default.txt'
SAMPLE_PASCAL_VOC_FOLDER_IMAGES = 'dataset/sample_format_pascal_voc/JPEGImages/'
SAMPLE_PASCAL_VOC_FOLDER_ANNOTATIONS = 'dataset/sample_format_pascal_voc/Annotations/'
SAMPLE_PASCAL_VOC_FILE_IMAGESETS = 'dataset/sample_format_pascal_voc/ImageSets/Main/default.txt'

DATASET_SAMPLE = 'Sample'
DATASET_FORMAT_YOLO = 'YOLO'
DATASET_FORMAT_COCO = 'COCO'
DATASET_FORMAT_PASCAL_VOC = 'Pascal VOC'

DATASET_SOURCE_SAMPLE = 'Sample'
DATASET_SOURCE_CUSTOM = 'Custom'
DATASET_SOURCE_PASCAL_VOC_2007_TRAIN = 'Pascal VOC 2007 - Train'
DATASET_SOURCE_PASCAL_VOC_2007_VAL = 'Pascal VOC 2007 - Val'
DATASET_SOURCE_PASCAL_VOC_2007_TEST = 'Pascal VOC 2007 - Test'

TRANSFORM_RESIZE = 'Resize'
TRANSFORM_RANDOM_RESIZE = 'Random Resize'
TRANSFORM_SCALE = 'Scale'
Expand Down Expand Up @@ -106,9 +113,71 @@ def dataset_coco(key: str) -> Dataset:
return loader.load_coco_dataset(folder_images, file_annotations)

def dataset_pascal_voc(key: str) -> Dataset:
folder_images = st.text_input('Path to Pascal VOC Images', PASCAL_VOC_FOLDER_IMAGES, key=f'{key}-pascal-voc-folder_images')
folder_annotations = st.text_input('Path to Pascal VOC Annotations', PASCAL_VOC_FOLDER_ANNOTATIONS, key=f'{key}-pascal-voc-folder_annotations')
file_imagesets = st.text_input('Path to Pascal VOC Image Sets', PASCAL_VOC_FILE_IMAGESETS, key=f'{key}-pascal-voc-file_imagesets')
source = st.radio('Dataset Source', [
DATASET_SOURCE_SAMPLE,
DATASET_SOURCE_PASCAL_VOC_2007_TRAIN,
DATASET_SOURCE_PASCAL_VOC_2007_VAL,
DATASET_SOURCE_PASCAL_VOC_2007_TEST,
DATASET_SOURCE_CUSTOM],
key=f'{key}-source')

if source == DATASET_SOURCE_SAMPLE:
return _dataset_pascal_voc(
SAMPLE_PASCAL_VOC_FOLDER_IMAGES,
SAMPLE_PASCAL_VOC_FOLDER_ANNOTATIONS,
SAMPLE_PASCAL_VOC_FILE_IMAGESETS,
disabled=True,
key=f'{key}-sample'
)

if source == DATASET_SOURCE_CUSTOM:
return _dataset_pascal_voc(
SAMPLE_PASCAL_VOC_FOLDER_IMAGES,
SAMPLE_PASCAL_VOC_FOLDER_ANNOTATIONS,
SAMPLE_PASCAL_VOC_FILE_IMAGESETS,
disabled=False,
key=f'{key}-sample'
)

if source == DATASET_SOURCE_PASCAL_VOC_2007_TRAIN:
if not os.path.exists(patchmentation.dataset._PASCAL_VOC_2007_FOLDER):
patchmentation.dataset._download_pascal_voc_2007()
return _dataset_pascal_voc(
patchmentation.dataset._PASCAL_VOC_2007_FOLDER_IMAGES,
patchmentation.dataset._PASCAL_VOC_2007_FOLDER_ANNOTATIONS,
patchmentation.dataset._PASCAL_VOC_2007_IMAGESETS_TRAIN,
disabled=True,
key=f'{key}-pascal-voc-2007-train'
)

if source == DATASET_SOURCE_PASCAL_VOC_2007_VAL:
if not os.path.exists(patchmentation.dataset._PASCAL_VOC_2007_FOLDER):
patchmentation.dataset._download_pascal_voc_2007()
return _dataset_pascal_voc(
patchmentation.dataset._PASCAL_VOC_2007_FOLDER_IMAGES,
patchmentation.dataset._PASCAL_VOC_2007_FOLDER_ANNOTATIONS,
patchmentation.dataset._PASCAL_VOC_2007_IMAGESETS_VAL,
disabled=True,
key=f'{key}-pascal-voc-2007-val'
)

if source == DATASET_SOURCE_PASCAL_VOC_2007_TEST:
if not os.path.exists(patchmentation.dataset._PASCAL_VOC_2007_FOLDER):
patchmentation.dataset._download_pascal_voc_2007()
return _dataset_pascal_voc(
patchmentation.dataset._PASCAL_VOC_2007_FOLDER_IMAGES,
patchmentation.dataset._PASCAL_VOC_2007_FOLDER_ANNOTATIONS,
patchmentation.dataset._PASCAL_VOC_2007_IMAGESETS_TEST,
disabled=True,
key=f'{key}-pascal-voc-2007-test'
)

raise ValueError(f'Unexpected source value {source}')

def _dataset_pascal_voc(default_folder_images: str, default_folder_annotations: str, default_file_imagesets: str, disabled: bool, key: str) -> Dataset:
folder_images = st.text_input('Path to Pascal VOC Images', default_folder_images, disabled=disabled, key=f'{key}-pascal-voc-folder_images')
folder_annotations = st.text_input('Path to Pascal VOC Annotations', default_folder_annotations, disabled=disabled, key=f'{key}-pascal-voc-folder_annotations')
file_imagesets = st.text_input('Path to Pascal VOC Image Sets', default_file_imagesets, disabled=disabled, key=f'{key}-pascal-voc-file_imagesets')
return loader.load_pascal_voc_dataset(folder_images, folder_annotations, file_imagesets)

def background_image(key: str) -> Image:
Expand All @@ -121,8 +190,8 @@ def display_dataset(dataset: Dataset, key: str) -> None:
col_images, col_right = st.columns([5, 1])

with col_right:
classes = input_classes(dataset.classes, key=f'{key}-classes')
index = st.number_input('Index', min_value=0, max_value=len(dataset.image_patches)-1, step=1, key=f'{key}-index_input')
classes = input_classes(dataset.classes, key=f'{key}-classes')

with col_images:
image_array = dataset.image_patches[index].image_array(classes)
Expand Down
1 change: 1 addition & 0 deletions patchmentation/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from . import collections
from . import utils
from . import dataset
from .patchmentation import patch_augmentation
91 changes: 91 additions & 0 deletions patchmentation/dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import os
import wget
import tarfile
from appdirs import user_cache_dir

from typing import Dict
from patchmentation.utils import loader
from patchmentation.collections import Dataset

PASCAL_VOC_2007 = 'PASCAL_VOC_2007'

_CACHEDIR = user_cache_dir('patchmentation-dataset')
_PASCAL_VOC_2007_FOLDER = os.path.join(_CACHEDIR, 'VOCdevkit', 'VOC2007')

_PASCAL_VOC_2007_URL_TRAIN_VAL = 'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar'
_PASCAL_VOC_2007_URL_TEST = 'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar'
_PASCAL_VOC_2007_TAR_TRAIN_VAL = os.path.join(_CACHEDIR, 'VOCtrainval_06-Nov-2007.tar')
_PASCAL_VOC_2007_TAR_TEST = os.path.join(_CACHEDIR, 'VOCtest_06-Nov-2007.tar')
_PASCAL_VOC_2007_FOLDER_IMAGES = os.path.join(_PASCAL_VOC_2007_FOLDER, 'JPEGImages')
_PASCAL_VOC_2007_FOLDER_ANNOTATIONS = os.path.join(_PASCAL_VOC_2007_FOLDER, 'Annotations')
_PASCAL_VOC_2007_IMAGESETS = os.path.join(_PASCAL_VOC_2007_FOLDER, 'ImageSets', 'Main')
_PASCAL_VOC_2007_IMAGESETS_TRAIN = os.path.join(_PASCAL_VOC_2007_IMAGESETS, 'train.txt')
_PASCAL_VOC_2007_IMAGESETS_VAL = os.path.join(_PASCAL_VOC_2007_IMAGESETS, 'val.txt')
_PASCAL_VOC_2007_IMAGESETS_TEST = os.path.join(_PASCAL_VOC_2007_IMAGESETS, 'test.txt')

def _download(source, target):
print(f'download from {source} to {target}')
wget.download(source, target)

def _extract_tar(source, target):
print(f'extract from {source} to {target}')
with tarfile.open(source) as f:
f.extractall(target)

def _remove_file(file):
print(f'removing {file}')
os.remove(file)

def load(dataset) -> Dict[str, Dataset]:
if dataset == PASCAL_VOC_2007:
return load_pascal_voc_2007()
raise ValueError(f'Unexpected dataset value : {dataset}')

def load_pascal_voc_2007() -> Dict[str, Dataset]:
os.makedirs(_CACHEDIR, exist_ok=True)
if not os.path.exists(_PASCAL_VOC_2007_FOLDER):
_download_pascal_voc_2007()
dataset = {
'train' : loader.load_pascal_voc_dataset(_PASCAL_VOC_2007_FOLDER_IMAGES, _PASCAL_VOC_2007_FOLDER_ANNOTATIONS, _PASCAL_VOC_2007_IMAGESETS_TRAIN),
'val' : loader.load_pascal_voc_dataset(_PASCAL_VOC_2007_FOLDER_IMAGES, _PASCAL_VOC_2007_FOLDER_ANNOTATIONS, _PASCAL_VOC_2007_IMAGESETS_VAL),
'test' : loader.load_pascal_voc_dataset(_PASCAL_VOC_2007_FOLDER_IMAGES, _PASCAL_VOC_2007_FOLDER_ANNOTATIONS, _PASCAL_VOC_2007_IMAGESETS_TEST),
}
return dataset

def _download_pascal_voc_2007():
if not os.path.exists(_PASCAL_VOC_2007_TAR_TRAIN_VAL):
_download_pascal_voc_2007_train_val()
if not os.path.exists(_PASCAL_VOC_2007_TAR_TEST):
_download_pascal_voc_2007_test()
_extract_pascal_voc_2007_train_val()
_extract_pascal_voc_2007_test()
_remove_tar_pascal_voc_2007_train_val()
_remove_tar_pascal_voc_2007_test()

def _download_pascal_voc_2007_train_val():
os.makedirs(_CACHEDIR, exist_ok=True)
if os.path.exists(_PASCAL_VOC_2007_TAR_TRAIN_VAL):
raise FileExistsError(_PASCAL_VOC_2007_TAR_TRAIN_VAL)
_download(_PASCAL_VOC_2007_URL_TRAIN_VAL, _PASCAL_VOC_2007_TAR_TRAIN_VAL)

def _download_pascal_voc_2007_test():
os.makedirs(_CACHEDIR, exist_ok=True)
if os.path.exists(_PASCAL_VOC_2007_TAR_TEST):
raise FileExistsError(_PASCAL_VOC_2007_TAR_TEST)
_download(_PASCAL_VOC_2007_URL_TEST, _PASCAL_VOC_2007_TAR_TEST)

def _extract_pascal_voc_2007_train_val():
os.makedirs(_CACHEDIR, exist_ok=True)
_extract_tar(_PASCAL_VOC_2007_TAR_TRAIN_VAL, _CACHEDIR)

def _extract_pascal_voc_2007_test():
os.makedirs(_CACHEDIR, exist_ok=True)
_extract_tar(_PASCAL_VOC_2007_TAR_TEST, _CACHEDIR)

def _remove_tar_pascal_voc_2007_train_val():
if os.path.exists(_PASCAL_VOC_2007_TAR_TRAIN_VAL):
_remove_file(_PASCAL_VOC_2007_TAR_TRAIN_VAL)

def _remove_tar_pascal_voc_2007_test():
if os.path.exists(_PASCAL_VOC_2007_TAR_TEST):
_remove_file(_PASCAL_VOC_2007_TAR_TEST)
4 changes: 3 additions & 1 deletion requirements/requirements-main.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,6 @@ matplotlib>=3.5.3
numpy>=1.23.1
opencv-python>=4.6.0.66
typing-extensions>=4.3.0
scipy>=1.9.3
scipy>=1.9.3
appdirs==1.4.4
wget==3.2
4 changes: 3 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,9 @@ def read(file):
'numpy>=1.23.1',
'opencv-python>=4.6.0.66',
'typing-extensions>=4.3.0',
'scipy>=1.9.3'
'scipy>=1.9.3',
'appdirs==1.4.4',
'wget==3.2'
]
CLASSIFIERS = [
'Programming Language :: Python :: 3',
Expand Down

0 comments on commit c4773c2

Please sign in to comment.