From de109519c9ab288c5d3e638fe90205732b250b38 Mon Sep 17 00:00:00 2001 From: KerekesDavid Date: Tue, 24 Sep 2024 13:24:40 +0200 Subject: [PATCH 01/10] Replace rasterio with tifffile for mados --- datasets/mados.py | 94 ++++++++++++++++++++++++----------------------- 1 file changed, 48 insertions(+), 46 deletions(-) diff --git a/datasets/mados.py b/datasets/mados.py index 62a4826d..cd889cfc 100644 --- a/datasets/mados.py +++ b/datasets/mados.py @@ -1,6 +1,6 @@ -''' +""" Adapted from: https://github.com/gkakogeorgiou/mados -''' +""" import os import time @@ -10,16 +10,10 @@ import zipfile from glob import glob -import rasterio +import tifffile import numpy as np -import warnings - -warnings.filterwarnings("ignore", category=rasterio.errors.NotGeoreferencedWarning) - import torch -import torchvision.transforms.functional as TF -import torchvision.transforms as T from .utils import DownloadProgressBar from utils.registry import DATASET_REGISTRY @@ -29,39 +23,45 @@ # MADOS DATASET # ############################################################### + @DATASET_REGISTRY.register() class MADOS(torch.utils.data.Dataset): def __init__(self, cfg, split, is_train=True): - self.root_path = cfg['root_path'] - self.data_mean = cfg['data_mean'] - self.data_std = cfg['data_std'] - self.classes = cfg['classes'] + self.root_path = cfg["root_path"] + self.data_mean = cfg["data_mean"] + self.data_std = cfg["data_std"] + self.classes = cfg["classes"] self.class_num = len(self.classes) self.split = split self.is_train = is_train - self.ROIs_split = np.genfromtxt(os.path.join(self.root_path, 'splits', f'{split}_X.txt'), dtype='str') + self.ROIs_split = np.genfromtxt( + os.path.join(self.root_path, "splits", f"{split}_X.txt"), dtype="str" + ) self.image_list = [] self.target_list = [] - self.tiles = sorted(glob(os.path.join(self.root_path, '*'))) + self.tiles = sorted(glob(os.path.join(self.root_path, "*"))) for tile in self.tiles: - splits = [f.split('_cl_')[-1] for f in glob(os.path.join(tile, '10', '*_cl_*'))] + splits = [ + f.split("_cl_")[-1] for f in glob(os.path.join(tile, "10", "*_cl_*")) + ] for crop in splits: - crop_name = os.path.basename(tile) + '_' + crop.split('.tif')[0] + crop_name = os.path.basename(tile) + "_" + crop.split(".tif")[0] if crop_name in self.ROIs_split: - all_bands = glob(os.path.join(tile, '*', '*L2R_rhorc*_' + crop)) + all_bands = glob(os.path.join(tile, "*", "*L2R_rhorc*_" + crop)) all_bands = sorted(all_bands, key=self.get_band) - # all_bands = np.array(all_bands) self.image_list.append(all_bands) - cl_path = os.path.join(tile, '10', os.path.basename(tile) + '_L2R_cl_' + crop) + cl_path = os.path.join( + tile, "10", os.path.basename(tile) + "_L2R_cl_" + crop + ) self.target_list.append(cl_path) def __len__(self): @@ -72,42 +72,41 @@ def getnames(self): def __getitem__(self, index): - all_bands = self.image_list[index] + band_paths = self.image_list[index] current_image = [] - for c, band in enumerate(all_bands): - upscale_factor = int(os.path.basename(os.path.dirname(band))) // 10 - with rasterio.open(band, mode='r') as src: - this_band = src.read(1, - out_shape=(int(src.height * upscale_factor), int(src.width * upscale_factor)), - resampling=rasterio.enums.Resampling.nearest - ) - this_band = torch.from_numpy(this_band) - #this_band[torch.isnan(this_band)] = self.data_mean['optical'][c] - current_image.append(this_band) + for path in band_paths: + upscale_factor = int(os.path.basename(os.path.dirname(path))) // 10 + + band = tifffile.imread(path) + band = np.transpose(band, (2, 0, 1)) + band_tensor = torch.from_numpy(band) + band_tensor.unsqueeze_(0) + band_tensor = torch.nn.functional.interpolate( + band_tensor, scale_factor=upscale_factor, mode="nearest" + ) + current_image.append(band_tensor) image = torch.stack(current_image) invalid_mask = torch.isnan(image) image[invalid_mask] = 0 - - - with rasterio.open(self.target_list[index], mode='r') as src: - target = src.read(1) + target = tifffile.imread(self.target_list[index]) + target = np.transpose(target, (2, 0, 1)) target = torch.from_numpy(target.astype(np.int64)) target = target - 1 output = { - 'image': { - 'optical': image, + "image": { + "optical": image, }, - 'target': target, - 'metadata': {} + "target": target, + "metadata": {}, } return output @staticmethod def get_band(path): - return int(path.split('_')[-2]) + return int(path.split("_")[-2]) @staticmethod def download(dataset_config: dict, silent=False): @@ -128,15 +127,17 @@ def download(dataset_config: dict, silent=False): try: urllib.request.urlretrieve(url, output_path / temp_file_name, pbar) except urllib.error.HTTPError as e: - print('Error while downloading dataset: The server couldn\'t fulfill the request.') - print('Error code: ', e.code) + print( + "Error while downloading dataset: The server couldn't fulfill the request." + ) + print("Error code: ", e.code) return except urllib.error.URLError as e: - print('Error while downloading dataset: Failed to reach a server.') - print('Reason: ', e.reason) + print("Error while downloading dataset: Failed to reach a server.") + print("Reason: ", e.reason) return - with zipfile.ZipFile(output_path / temp_file_name, 'r') as zip_ref: + with zipfile.ZipFile(output_path / temp_file_name, "r") as zip_ref: print(f"Extracting to {output_path} ...") # Remove top-level dir in ZIP file for nicer data dir structure members = [] @@ -155,4 +156,5 @@ def get_splits(dataset_config): dataset_train = MADOS(cfg=dataset_config, split="train", is_train=True) dataset_val = MADOS(cfg=dataset_config, split="val", is_train=False) dataset_test = MADOS(cfg=dataset_config, split="test", is_train=False) - return dataset_train, dataset_val, dataset_test \ No newline at end of file + return dataset_train, dataset_val, dataset_test + From e37bf332c7fe5db3798cf24232e59d1c19c01df0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kerekes=20D=C3=A1vid?= Date: Tue, 24 Sep 2024 15:07:13 +0200 Subject: [PATCH 02/10] Fix tensor dimensions --- .gitignore | 3 ++- datasets/mados.py | 8 +++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index ae0740ae..08d56225 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ **/__pycache__/ +*.pyc old_files/ pretrained/ @@ -15,4 +16,4 @@ data/mados/splits/* !data/mados/splits/tiny_X.txt .vscode -.idea \ No newline at end of file +.idea diff --git a/datasets/mados.py b/datasets/mados.py index cd889cfc..d4988a7e 100644 --- a/datasets/mados.py +++ b/datasets/mados.py @@ -78,19 +78,17 @@ def __getitem__(self, index): upscale_factor = int(os.path.basename(os.path.dirname(path))) // 10 band = tifffile.imread(path) - band = np.transpose(band, (2, 0, 1)) band_tensor = torch.from_numpy(band) - band_tensor.unsqueeze_(0) + band_tensor.unsqueeze_(0).unsqueeze_(0) band_tensor = torch.nn.functional.interpolate( band_tensor, scale_factor=upscale_factor, mode="nearest" - ) + ).squeeze_(0) current_image.append(band_tensor) - image = torch.stack(current_image) + image = torch.cat(current_image) invalid_mask = torch.isnan(image) image[invalid_mask] = 0 target = tifffile.imread(self.target_list[index]) - target = np.transpose(target, (2, 0, 1)) target = torch.from_numpy(target.astype(np.int64)) target = target - 1 From c103c66a3e0c98c0002eaf4eb790fa1b7ded658e Mon Sep 17 00:00:00 2001 From: KerekesDavid Date: Tue, 24 Sep 2024 17:04:22 +0200 Subject: [PATCH 03/10] Replace biomassters, fivebillionpixels, hlsburn, pastis, some utils --- datasets/biomassters.py | 12 ++++-------- datasets/fivebillionpixels.py | 1 - datasets/hlsburnscars.py | 10 +++------- datasets/pastis.py | 14 ++++++-------- datasets/utils.py | 13 ++----------- utils/compute_norm_std.py | 7 +++---- 6 files changed, 18 insertions(+), 39 deletions(-) diff --git a/datasets/biomassters.py b/datasets/biomassters.py index b2244c17..61379f5a 100644 --- a/datasets/biomassters.py +++ b/datasets/biomassters.py @@ -2,10 +2,7 @@ import torch import pandas as pd import pathlib -import rasterio -from tifffile import imread -from os.path import join as opj -from .utils import read_tif +import tifffile from utils.registry import DATASET_REGISTRY def read_imgs(multi_temporal, temp , fname, data_dir, img_size): @@ -22,7 +19,7 @@ def read_imgs(multi_temporal, temp , fname, data_dir, img_size): s1_filepath = data_dir.joinpath(s1_fname) if s1_filepath.exists(): - img_s1 = imread(s1_filepath) + img_s1 = tifffile.imread(s1_filepath) m = img_s1 == -9999 img_s1 = img_s1.astype('float32') img_s1 = np.where(m, 0, img_s1) @@ -31,7 +28,7 @@ def read_imgs(multi_temporal, temp , fname, data_dir, img_size): s2_filepath = data_dir.joinpath(s2_fname) if s2_filepath.exists(): - img_s2 = imread(s2_filepath) + img_s2 = tifffile.imread(s2_filepath) img_s2 = img_s2.astype('float32') else: img_s2 = np.zeros((img_size, img_size) + (11,), dtype='float32') @@ -77,8 +74,7 @@ def __getitem__(self, index): fname = str(chip_id)+'_agbm.tif' imgs_s1, imgs_s2, mask = read_imgs(self.multi_temporal, self.temp, fname, self.dir_features, self.img_size) - with rasterio.open(self.dir_labels.joinpath(fname)) as lbl: - target = lbl.read(1) + target = tifffile.imread(self.dir_labels.joinpath(fname), key=0) target = np.nan_to_num(target) imgs_s1 = torch.from_numpy(imgs_s1).float() diff --git a/datasets/fivebillionpixels.py b/datasets/fivebillionpixels.py index dedd6ef1..3dfe2f77 100644 --- a/datasets/fivebillionpixels.py +++ b/datasets/fivebillionpixels.py @@ -2,7 +2,6 @@ import time import torch import numpy as np -import rasterio import random from glob import glob diff --git a/datasets/hlsburnscars.py b/datasets/hlsburnscars.py index 91dfb579..212d641b 100644 --- a/datasets/hlsburnscars.py +++ b/datasets/hlsburnscars.py @@ -2,10 +2,9 @@ import time import torch import numpy as np -import rasterio +import tifffile from glob import glob -import torch import torchvision.transforms.functional as TF import torchvision.transforms as T @@ -41,11 +40,8 @@ def __len__(self): return len(self.image_list) def __getitem__(self, index): - with rasterio.open(self.image_list[index]) as src: - image = src.read() - with rasterio.open(self.target_list[index]) as src: - target = src.read(1) - + image = tifffile.imread(self.image_list[index]) + target = tifffile.imread(self.target_list[index], key=0) image = torch.from_numpy(image) target = torch.from_numpy(target.astype(np.int64)) diff --git a/datasets/pastis.py b/datasets/pastis.py index 5313a951..f63b9916 100644 --- a/datasets/pastis.py +++ b/datasets/pastis.py @@ -10,7 +10,7 @@ import geopandas as gpd import numpy as np import pandas as pd -import rasterio +import tifffile import torch from einops import rearrange from omegaconf import OmegaConf @@ -142,17 +142,15 @@ def __getitem__(self, i): for modality in self.modalities: if modality == "aerial": - with rasterio.open( - os.path.join( + path = os.path.join( self.path, "DATA_SPOT/PASTIS_SPOT6_RVB_1M00_2019/SPOT6_RVB_1M00_2019_" + str(name) + ".tif", - ) - ) as f: - output["aerial"] = split_image( - torch.FloatTensor(f.read()), self.nb_split, part - ) + ) + output["aerial"] = split_image( + torch.FloatTensor(tifffile.imread(path), self.nb_split, part) + ) elif modality == "s1-median": modality_name = "s1a" images = split_image( diff --git a/datasets/utils.py b/datasets/utils.py index 9b28cd88..3c12064a 100644 --- a/datasets/utils.py +++ b/datasets/utils.py @@ -1,6 +1,6 @@ import os import tqdm -import rasterio +import tifffile import pathlib import concurrent.futures from google.cloud.storage import Client @@ -83,14 +83,5 @@ def download_blob_file_pair(blob_file_pair): def read_tif(file: pathlib.Path): - with rasterio.open(file) as dataset: - arr = dataset.read() # (bands X height X width) + arr = tifffile.imread(file) return arr.transpose((1, 2, 0)) - - -def read_tif_with_metadata(file: pathlib.Path): - with rasterio.open(file) as dataset: - arr = dataset.read() # (bands X height X width) - transform = dataset.transform - crs = dataset.crs - return arr.transpose((1, 2, 0)), transform, crs \ No newline at end of file diff --git a/utils/compute_norm_std.py b/utils/compute_norm_std.py index 06e734a3..d08ada51 100644 --- a/utils/compute_norm_std.py +++ b/utils/compute_norm_std.py @@ -1,7 +1,7 @@ import glob import numpy as np import os -import rasterio +import tifffile split_file = os.path.join("data/sen1floods11_v1.1/v1.1", f"splits/flood_handlabeled/flood_train_data.csv") @@ -20,9 +20,8 @@ sum_sq = np.zeros(2).astype(np.float64) data_list = [] for i, img in enumerate(path[:]): - with rasterio.open(img) as src: - data = src.read() - data = np.nan_to_num(data) + data = tifffile.imread(img) + data = np.nan_to_num(data) data = data.reshape((2, -1))#.astype(np.float64) data_list.append(data) From 638aadedac8d68ee6e8481c324100b95e5c911ff Mon Sep 17 00:00:00 2001 From: KerekesDavid Date: Wed, 25 Sep 2024 09:49:43 +0200 Subject: [PATCH 04/10] Remove rasterio completely - sen1floods11, spacanet7, utae --- datasets/sen1floods11.py | 14 +++++--------- datasets/spacenet7.py | 11 ++++++----- datasets/utae_dynamicen.py | 28 +++++++++++++--------------- environment.yaml | 4 +--- 4 files changed, 25 insertions(+), 32 deletions(-) diff --git a/datasets/sen1floods11.py b/datasets/sen1floods11.py index ceefeaa4..ff3003f3 100644 --- a/datasets/sen1floods11.py +++ b/datasets/sen1floods11.py @@ -4,7 +4,7 @@ import geopandas import numpy as np import pandas as pd -import rasterio +import tifffile import torch from .utils import download_bucket_concurrently @@ -59,16 +59,12 @@ def _get_date(self, index): return date_np def __getitem__(self, index): - with rasterio.open(self.s2_image_list[index]) as src: - s2_image = src.read() + s2_image = tifffile.imread(self.s2_image_list[index]) - with rasterio.open(self.s1_image_list[index]) as src: - s1_image = src.read() - # Convert the missing values (clouds etc.) - s1_image = np.nan_to_num(s1_image) + s1_image = tifffile.imread(self.s1_image_list[index]) + s1_image = np.nan_to_num(s1_image) - with rasterio.open(self.target_list[index]) as src: - target = src.read(1) + target = tifffile.imread(self.target_list[index], key=0) timestamp = self._get_date(index) diff --git a/datasets/spacenet7.py b/datasets/spacenet7.py index c0af09f4..419dde61 100644 --- a/datasets/spacenet7.py +++ b/datasets/spacenet7.py @@ -13,7 +13,8 @@ import json from glob import glob -import rasterio +import cv2 +import tifffile import numpy as np import torch @@ -132,8 +133,8 @@ def __len__(self) -> int: def load_planet_mosaic(self, aoi_id: str, year: int, month: int) -> np.ndarray: folder = self.root_path / 'train' / aoi_id / 'images_masked' file = folder / f'global_monthly_{year}_{month:02d}_mosaic_{aoi_id}.tif' - with rasterio.open(str(file), mode='r') as src: - img = src.read(out_shape=(1024, 1024), resampling=rasterio.enums.Resampling.nearest) + img = tifffile.imread(file) + img = cv2.resize(img, dsize=(1024,1024), interpolation=cv2.INTER_NEAREST) # 4th band (last oen) is alpha band img = img[:-1] return img.astype(np.float32) @@ -141,8 +142,8 @@ def load_planet_mosaic(self, aoi_id: str, year: int, month: int) -> np.ndarray: def load_building_label(self, aoi_id: str, year: int, month: int) -> np.ndarray: folder = self.root_path / 'train' / aoi_id / 'labels_raster' file = folder / f'global_monthly_{year}_{month:02d}_mosaic_{aoi_id}_Buildings.tif' - with rasterio.open(str(file), mode='r') as src: - label = src.read(out_shape=(1024, 1024), resampling=rasterio.enums.Resampling.nearest) + label = tifffile.imread(file) + label = cv2.resize(label, dsize=(1024,1024), interpolation=cv2.INTER_NEAREST) label = (label > 0).squeeze() return label.astype(np.int64) diff --git a/datasets/utae_dynamicen.py b/datasets/utae_dynamicen.py index ca478da4..70b8df96 100644 --- a/datasets/utae_dynamicen.py +++ b/datasets/utae_dynamicen.py @@ -1,6 +1,6 @@ import os import numpy as np -import rasterio +import tifffile import torch from torch.utils.data import Dataset from torchvision import transforms @@ -89,19 +89,18 @@ def load_data(self, index): cur_images, cur_dates = [], [] if self.mode == 'daily': for i in range(1, self.all_days[index][0]+1): - img = rasterio.open(os.path.join(self.root_path, self.all_days[index][i][0][1:])) - red = img.read(3) - green = img.read(2) - blue = img.read(1) - nir = img.read(4) + with tifffile.TiffFile.open(os.path.join(self.root_path, self.all_days[index][i][0][1:])) as img: + red = img.pages[2].asarray() + green = img.pages[1].asarray() + blue = img.pages[0].asarray() + nir = img.pages[3].asarray() image = np.dstack((red, green, blue, nir)) cur_images.append(np.expand_dims(np.asarray(image, dtype=np.float32), axis=0)) # np.array already\ cur_dates.append(self.all_days[index][i][1]) image_stack = np.concatenate(cur_images, axis=0) dates = torch.from_numpy(np.array(cur_dates, dtype=np.int32)) - label = rasterio.open(os.path.join(self.root_path, self.labels[index][1:])) - label = label.read() + label = tifffile.imread(os.path.join(self.root_path, self.labels[index][1:])) mask = np.zeros((label.shape[1], label.shape[2]), dtype=np.int32) for i in range(self.class_num + 1): @@ -115,17 +114,16 @@ def load_data(self, index): else: for i in range(len(self.dates)): # read .tif - img = rasterio.open(os.path.join(self.root_path, self.planet_day[index][i][1:])) - red = img.read(3) - green = img.read(2) - blue = img.read(1) - nir = img.read(4) + with tifffile.TiffFile.open(os.path.join(self.root_path, self.planet_day[index][i][1:])) as img: + red = img.pages[2].asarray() + green = img.pages[1].asarray() + blue = img.pages[0].asarray() + nir = img.pages[3].asarray() image = np.dstack((red, green, blue, nir)) cur_images.append(np.expand_dims(np.asarray(image, dtype=np.float32), axis=0)) # np.array already\ image_stack = np.concatenate(cur_images, axis=0) dates = torch.from_numpy(np.array(self.planet_day[index][len(self.dates):], dtype=np.int32)) - label = rasterio.open(os.path.join(self.root_path, self.labels[index][1:])) - label = label.read() + label = tifffile.imread(os.path.join(self.root_path, self.labels[index][1:])) mask = np.zeros((label.shape[1], label.shape[2]), dtype=np.int32) for i in range(self.class_num + 1): diff --git a/environment.yaml b/environment.yaml index 1c486cf8..5837a7d2 100644 --- a/environment.yaml +++ b/environment.yaml @@ -10,10 +10,8 @@ dependencies: - numpy>=2.0 - pillow - pytorch>=2.1 - - rasterio - scikit-learn - tensorboard - - torchaudio - torchvision - tqdm - tifffile @@ -28,4 +26,4 @@ dependencies: - google-cloud-storage - omegaconf - pydataverse - - pytest \ No newline at end of file + - pytest From 34786ec3c135fe2b37230728283ad2fabe87c51f Mon Sep 17 00:00:00 2001 From: SebastianHafner Date: Thu, 26 Sep 2024 09:59:33 +0200 Subject: [PATCH 05/10] fixed resampling with tifffile --- datasets/spacenet7.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/datasets/spacenet7.py b/datasets/spacenet7.py index 419dde61..18ae0c40 100644 --- a/datasets/spacenet7.py +++ b/datasets/spacenet7.py @@ -134,16 +134,16 @@ def load_planet_mosaic(self, aoi_id: str, year: int, month: int) -> np.ndarray: folder = self.root_path / 'train' / aoi_id / 'images_masked' file = folder / f'global_monthly_{year}_{month:02d}_mosaic_{aoi_id}.tif' img = tifffile.imread(file) - img = cv2.resize(img, dsize=(1024,1024), interpolation=cv2.INTER_NEAREST) - # 4th band (last oen) is alpha band - img = img[:-1] + img = cv2.resize(img, dsize=(self.img_size, self.img_size), interpolation=cv2.INTER_NEAREST) + # 4th band (last one) is alpha band + img = img.transpose(2, 0, 1)[:-1] return img.astype(np.float32) def load_building_label(self, aoi_id: str, year: int, month: int) -> np.ndarray: folder = self.root_path / 'train' / aoi_id / 'labels_raster' file = folder / f'global_monthly_{year}_{month:02d}_mosaic_{aoi_id}_Buildings.tif' label = tifffile.imread(file) - label = cv2.resize(label, dsize=(1024,1024), interpolation=cv2.INTER_NEAREST) + label = cv2.resize(label, dsize=(self.img_size, self.img_size), interpolation=cv2.INTER_NEAREST) label = (label > 0).squeeze() return label.astype(np.int64) From f3317c7e0108a9f159b671399d086c96da28d52c Mon Sep 17 00:00:00 2001 From: KerekesDavid Date: Tue, 1 Oct 2024 17:08:41 +0200 Subject: [PATCH 06/10] Redo some changes lost in the merge --- pangaea/datasets/biomassters.py | 10 ++++------ pangaea/datasets/fivebillionpixels.py | 1 - pangaea/datasets/hlsburnscars.py | 1 - pangaea/datasets/mados.py | 10 ++++------ pangaea/datasets/sen1floods11.py | 18 ++++++++---------- pangaea/utils/compute_norm_std.py | 8 ++++---- requirements.txt | 1 - 7 files changed, 20 insertions(+), 29 deletions(-) diff --git a/pangaea/datasets/biomassters.py b/pangaea/datasets/biomassters.py index 8e98b785..bcf90ba4 100644 --- a/pangaea/datasets/biomassters.py +++ b/pangaea/datasets/biomassters.py @@ -2,8 +2,7 @@ import torch import pandas as pd import pathlib -import rasterio -from tifffile import imread +import tifffile from os.path import join as opj from pangaea.datasets.utils import read_tif @@ -23,7 +22,7 @@ def read_imgs(multi_temporal, temp , fname, data_dir, img_size): s1_filepath = data_dir.joinpath(s1_fname) if s1_filepath.exists(): - img_s1 = imread(s1_filepath) + img_s1 = tifffile.imread(s1_filepath) m = img_s1 == -9999 img_s1 = img_s1.astype('float32') img_s1 = np.where(m, 0, img_s1) @@ -32,7 +31,7 @@ def read_imgs(multi_temporal, temp , fname, data_dir, img_size): s2_filepath = data_dir.joinpath(s2_fname) if s2_filepath.exists(): - img_s2 = imread(s2_filepath) + img_s2 = tifffile.imread(s2_filepath) img_s2 = img_s2.astype('float32') else: img_s2 = np.zeros((img_size, img_size) + (11,), dtype='float32') @@ -155,8 +154,7 @@ def __getitem__(self, index): fname = str(chip_id)+'_agbm.tif' imgs_s1, imgs_s2, mask = read_imgs(self.multi_temporal, self.temp, fname, self.dir_features, self.img_size) - with rasterio.open(self.dir_labels.joinpath(fname)) as lbl: - target = lbl.read(1) + target = tifffile.imread(self.dir_labels.joinpath(fname), key=0) target = np.nan_to_num(target) imgs_s1 = torch.from_numpy(imgs_s1).float() diff --git a/pangaea/datasets/fivebillionpixels.py b/pangaea/datasets/fivebillionpixels.py index 4cae19ef..0f656fd1 100644 --- a/pangaea/datasets/fivebillionpixels.py +++ b/pangaea/datasets/fivebillionpixels.py @@ -2,7 +2,6 @@ import time import torch import numpy as np -import rasterio import random from glob import glob diff --git a/pangaea/datasets/hlsburnscars.py b/pangaea/datasets/hlsburnscars.py index c2a2e0b9..6b254beb 100644 --- a/pangaea/datasets/hlsburnscars.py +++ b/pangaea/datasets/hlsburnscars.py @@ -2,7 +2,6 @@ import time import torch import numpy as np -# import rasterio import tifffile as tiff from typing import Sequence, Dict, Any, Union, Literal, Tuple from sklearn.model_selection import train_test_split diff --git a/pangaea/datasets/mados.py b/pangaea/datasets/mados.py index 1181333c..c0067377 100644 --- a/pangaea/datasets/mados.py +++ b/pangaea/datasets/mados.py @@ -6,6 +6,7 @@ import zipfile from glob import glob +import cv2 import tifffile import numpy as np @@ -148,11 +149,8 @@ def __getitem__(self, index): upscale_factor = int(os.path.basename(os.path.dirname(path))) // 10 band = tifffile.imread(path) - band_tensor = torch.from_numpy(band) - band_tensor.unsqueeze_(0).unsqueeze_(0) - band_tensor = torch.nn.functional.interpolate( - band_tensor, scale_factor=upscale_factor, mode="nearest" - ).squeeze_(0) + band = cv2.resize(band, fx=upscale_factor, fy=upscale_factor, interpolation=cv2.INTER_NEAREST) + band_tensor = torch.from_numpy(band).unsqueeze(0) current_image.append(band_tensor) image = torch.cat(current_image) @@ -217,4 +215,4 @@ def download(self, silent=False): zip_ref.extractall(output_path, members) print("done.") - (output_path / temp_file_name).unlink() \ No newline at end of file + (output_path / temp_file_name).unlink() diff --git a/pangaea/datasets/sen1floods11.py b/pangaea/datasets/sen1floods11.py index 76680c7d..9ca53941 100644 --- a/pangaea/datasets/sen1floods11.py +++ b/pangaea/datasets/sen1floods11.py @@ -4,7 +4,7 @@ import geopandas import numpy as np import pandas as pd -import rasterio +import tifffile import torch from pangaea.datasets.utils import download_bucket_concurrently @@ -138,17 +138,15 @@ def _get_date(self, index): return date_np def __getitem__(self, index): - with rasterio.open(self.s2_image_list[index]) as src: - s2_image = src.read() + s2_image = tifffile.imread(self.s2_image_list[index]) + s2_image = s2_image.transpose(2, 0, 1) - with rasterio.open(self.s1_image_list[index]) as src: - s1_image = src.read() - # Convert the missing values (clouds etc.) - s1_image = np.nan_to_num(s1_image) - - with rasterio.open(self.target_list[index]) as src: - target = src.read(1) + s1_image = tifffile.imread(self.s1_image_list[index]) + # Convert the missing values (clouds etc.) + s1_image = np.nan_to_num(s1_image) + s1_image = s1_image.transpose(2, 0, 1) + target = tifffile.imread(self.target_list[index], key=0) timestamp = self._get_date(index) s2_image = torch.from_numpy(s2_image).float() diff --git a/pangaea/utils/compute_norm_std.py b/pangaea/utils/compute_norm_std.py index d16d94a1..b5a5c1ea 100644 --- a/pangaea/utils/compute_norm_std.py +++ b/pangaea/utils/compute_norm_std.py @@ -1,7 +1,7 @@ import glob import numpy as np import os -import rasterio +import tifffile def compute_norm_std(split_file_path, data_root_path): @@ -26,9 +26,9 @@ def compute_norm_std(split_file_path, data_root_path): data_list = [] for img in path: - with rasterio.open(img) as src: - data = src.read() - data = np.nan_to_num(data) + data = tifffile.imread(img) + data = data.transpose(2, 0, 1) + data = np.nan_to_num(data) data = data.reshape((2, -1)) data_list.append(data) diff --git a/requirements.txt b/requirements.txt index 015715f5..58fb9a25 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,6 @@ torch>=2.1.0 torchvision geopandas -rasterio pillow scikit-learn tensorboard From a7fe81994e8109806505f8a28adba5339f7a13a1 Mon Sep 17 00:00:00 2001 From: KerekesDavid Date: Wed, 2 Oct 2024 00:50:01 +0200 Subject: [PATCH 07/10] Fix relative resizing in mados --- pangaea/datasets/mados.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pangaea/datasets/mados.py b/pangaea/datasets/mados.py index c0067377..3d7bc5d2 100644 --- a/pangaea/datasets/mados.py +++ b/pangaea/datasets/mados.py @@ -149,7 +149,7 @@ def __getitem__(self, index): upscale_factor = int(os.path.basename(os.path.dirname(path))) // 10 band = tifffile.imread(path) - band = cv2.resize(band, fx=upscale_factor, fy=upscale_factor, interpolation=cv2.INTER_NEAREST) + band = cv2.resize(band, dsize=None, fx=upscale_factor, fy=upscale_factor, interpolation=cv2.INTER_NEAREST) band_tensor = torch.from_numpy(band).unsqueeze(0) current_image.append(band_tensor) From 4ca4fa691cfd7692b2a05b0ee29f5844b4cc36d5 Mon Sep 17 00:00:00 2001 From: KerekesDavid Date: Wed, 2 Oct 2024 01:15:32 +0200 Subject: [PATCH 08/10] Use rasterio compat nearest resize --- pangaea/datasets/mados.py | 2 +- pangaea/datasets/spacenet7.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pangaea/datasets/mados.py b/pangaea/datasets/mados.py index 3d7bc5d2..e7c8f3fe 100644 --- a/pangaea/datasets/mados.py +++ b/pangaea/datasets/mados.py @@ -149,7 +149,7 @@ def __getitem__(self, index): upscale_factor = int(os.path.basename(os.path.dirname(path))) // 10 band = tifffile.imread(path) - band = cv2.resize(band, dsize=None, fx=upscale_factor, fy=upscale_factor, interpolation=cv2.INTER_NEAREST) + band = cv2.resize(band, dsize=None, fx=upscale_factor, fy=upscale_factor, interpolation=cv2.INTER_NEAREST_EXACT) band_tensor = torch.from_numpy(band).unsqueeze(0) current_image.append(band_tensor) diff --git a/pangaea/datasets/spacenet7.py b/pangaea/datasets/spacenet7.py index 6af0c11b..0b37c405 100644 --- a/pangaea/datasets/spacenet7.py +++ b/pangaea/datasets/spacenet7.py @@ -221,7 +221,7 @@ def load_planet_mosaic(self, aoi_id: str, year: int, month: int) -> np.ndarray: folder = self.root_path / 'train' / aoi_id / 'images_masked' file = folder / f'global_monthly_{year}_{month:02d}_mosaic_{aoi_id}.tif' img = tifffile.imread(file) - img = cv2.resize(img, dsize=(self.img_size, self.img_size), interpolation=cv2.INTER_NEAREST) + img = cv2.resize(img, dsize=(self.img_size, self.img_size), interpolation=cv2.INTER_NEAREST_EXACT) # 4th band (last one) is alpha band img = img.transpose(2, 0, 1)[:-1] return img.astype(np.float32) @@ -230,7 +230,7 @@ def load_building_label(self, aoi_id: str, year: int, month: int) -> np.ndarray: folder = self.root_path / 'train' / aoi_id / 'labels_raster' file = folder / f'global_monthly_{year}_{month:02d}_mosaic_{aoi_id}_Buildings.tif' label = tifffile.imread(file) - label = cv2.resize(label, dsize=(self.img_size, self.img_size), interpolation=cv2.INTER_NEAREST) + label = cv2.resize(label, dsize=(self.img_size, self.img_size), interpolation=cv2.INTER_NEAREST_EXACT) label = (label > 0).squeeze() return label.astype(np.int64) From 9d8b39e3c8eacedec64af40765446a319a1c6bda Mon Sep 17 00:00:00 2001 From: KerekesDavid Date: Wed, 2 Oct 2024 14:23:14 +0200 Subject: [PATCH 09/10] Add transpose to remaining datasets --- pangaea/datasets/pastis.py | 2 +- pangaea/datasets/utae_dynamicen.py | 2 ++ pangaea/datasets/utils.py | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pangaea/datasets/pastis.py b/pangaea/datasets/pastis.py index e27dfee6..cf5ca64f 100644 --- a/pangaea/datasets/pastis.py +++ b/pangaea/datasets/pastis.py @@ -210,7 +210,7 @@ def __getitem__(self, i: int) -> dict[str, torch.Tensor | dict[str, torch.Tensor + ".tif", ) output["aerial"] = split_image( - torch.FloatTensor(tifffile.imread(path), self.nb_split, part) + torch.FloatTensor(tifffile.imread(path).transpose(2,0,1), self.nb_split, part) ) elif modality == "s1-median": modality_name = "s1a" diff --git a/pangaea/datasets/utae_dynamicen.py b/pangaea/datasets/utae_dynamicen.py index fbc99380..0092cf10 100644 --- a/pangaea/datasets/utae_dynamicen.py +++ b/pangaea/datasets/utae_dynamicen.py @@ -166,6 +166,7 @@ def load_data(self, index): image_stack = np.concatenate(cur_images, axis=0) dates = torch.from_numpy(np.array(cur_dates, dtype=np.int32)) label = tifffile.imread(os.path.join(self.root_path, self.labels[index][1:])) + label = label.transpose(2, 0, 1) mask = np.zeros((label.shape[1], label.shape[2]), dtype=np.int32) for i in range(self.num_classes + 1): @@ -189,6 +190,7 @@ def load_data(self, index): image_stack = np.concatenate(cur_images, axis=0) dates = torch.from_numpy(np.array(self.planet_day[index][len(self.dates):], dtype=np.int32)) label = tifffile.imread(os.path.join(self.root_path, self.labels[index][1:])) + label = label.transpose(2, 0, 1) mask = np.zeros((label.shape[1], label.shape[2]), dtype=np.int32) for i in range(self.num_classes + 1): diff --git a/pangaea/datasets/utils.py b/pangaea/datasets/utils.py index 3c12064a..680263b9 100644 --- a/pangaea/datasets/utils.py +++ b/pangaea/datasets/utils.py @@ -84,4 +84,4 @@ def download_blob_file_pair(blob_file_pair): def read_tif(file: pathlib.Path): arr = tifffile.imread(file) - return arr.transpose((1, 2, 0)) + return arr.transpose(2, 0, 1) From 70d823caf66cdc1d05006b548b78288527813c5b Mon Sep 17 00:00:00 2001 From: KerekesDavid Date: Wed, 2 Oct 2024 15:49:58 +0200 Subject: [PATCH 10/10] fix channel order in sen1floods11 --- pangaea/datasets/sen1floods11.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pangaea/datasets/sen1floods11.py b/pangaea/datasets/sen1floods11.py index 9ca53941..937fc688 100644 --- a/pangaea/datasets/sen1floods11.py +++ b/pangaea/datasets/sen1floods11.py @@ -139,12 +139,10 @@ def _get_date(self, index): def __getitem__(self, index): s2_image = tifffile.imread(self.s2_image_list[index]) - s2_image = s2_image.transpose(2, 0, 1) s1_image = tifffile.imread(self.s1_image_list[index]) # Convert the missing values (clouds etc.) s1_image = np.nan_to_num(s1_image) - s1_image = s1_image.transpose(2, 0, 1) target = tifffile.imread(self.target_list[index], key=0) timestamp = self._get_date(index)