-
Notifications
You must be signed in to change notification settings - Fork 1
/
DataLoader.py
140 lines (119 loc) · 4.92 KB
/
DataLoader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import os
import numpy as np
import torch
def load_constant_mask(
patch_size,
folder_path="/home/hk-project-epais/ke4365/pangu-weather/constant_masks/",
):
"""
Load the constant masks applied in the patch embedding layer.
patch_size: Tuple(int, int, int)
Number of pixels in (vert, lat, lon) dimensions per patch
folder_path: String
Path to directory containing constant masks
Returns
-------
land_mask: Tensor
of shape (n_lat, n_lon) after padding
soil_type: Tensor
of shape (n_lat, n_lon) after padding
topography: Tensor
of shape (n_lat, n_lon) after padding
"""
# Load data from numpy files
data_files = [f for f in os.listdir(folder_path) if f.endswith(".npy")]
data = {}
for file in data_files:
file_path = os.path.join(folder_path, file)
data[file] = np.load(file_path)
soil_type = data["soil_type.npy"]
topography = data["topography.npy"]
soil_type = (soil_type - np.mean(soil_type)) / np.std(soil_type)
topography = (topography - np.mean(topography)) / np.std(topography)
# Torch tensors
land_mask = torch.tensor(data["land_mask.npy"]).to(torch.float32)
soil_type = torch.tensor(soil_type).to(torch.float32)
topography = torch.tensor(topography).to(torch.float32)
# Check that the shapes of all the data are the same
assert (
land_mask.shape == soil_type.shape == topography.shape
), "Shapes of the three constant masks are not equal."
# Now that the shapes are equal, use land_mask as the actual shapes
x1_pad = (patch_size[1] - (land_mask.shape[0] % patch_size[1])) % patch_size[1] // 2
x2_pad = (patch_size[1] - (land_mask.shape[0] % patch_size[1])) % patch_size[
1
] - x1_pad
y1_pad = (patch_size[2] - (land_mask.shape[1] % patch_size[2])) % patch_size[2] // 2
y2_pad = (patch_size[2] - (land_mask.shape[1] % patch_size[2])) % patch_size[
2
] - y1_pad
# Apply padding according to patch embedding size
# Pad the same way as input shape (ensure code is cohesive)
land_mask = torch.nn.functional.pad(
land_mask, pad=(y1_pad, y2_pad, x1_pad, x2_pad), mode="constant", value=0
)
soil_type = torch.nn.functional.pad(
soil_type, pad=(y1_pad, y2_pad, x1_pad, x2_pad), mode="constant", value=0
)
topography = torch.nn.functional.pad(
topography, pad=(y1_pad, y2_pad, x1_pad, x2_pad), mode="constant", value=0
)
return land_mask, soil_type, topography
def load_constant_mask_2d(
patch_size,
folder_path="/home/hk-project-epais/ke4365/pangu-weather/constant_masks/",
):
"""
Load the constant masks applied in the patch embedding layer.
patch_size: Tuple(int, int)
Number of pixels in (lat, lon) dimensions per patch
folder_path: String
Path to directory containing constant masks
Returns
-------
land_mask: Tensor
of shape (n_lat, n_lon) after padding
soil_type: Tensor
of shape (n_lat, n_lon) after padding
topography: Tensor
of shape (n_lat, n_lon) after padding
"""
# Load data from numpy files
data_files = [f for f in os.listdir(folder_path) if f.endswith(".npy")]
data = {}
for file in data_files:
file_path = os.path.join(folder_path, file)
data[file] = np.load(file_path)
soil_type = data["soil_type.npy"]
topography = data["topography.npy"]
soil_type = (soil_type - np.mean(soil_type)) / np.std(soil_type)
topography = (topography - np.mean(topography)) / np.std(topography)
# Torch tensors
land_mask = torch.tensor(data["land_mask.npy"]).to(torch.float32)
soil_type = torch.tensor(soil_type).to(torch.float32)
topography = torch.tensor(topography).to(torch.float32)
# Check that the shapes of all the data are the same
assert (
land_mask.shape == soil_type.shape == topography.shape
), "Shapes of the three constant masks are not equal."
# Now that the shapes are equal, use land_mask as the actual shapes
x1_pad = (patch_size[0] - (land_mask.shape[0] % patch_size[0])) % patch_size[0] // 2
x2_pad = (patch_size[0] - (land_mask.shape[0] % patch_size[0])) % patch_size[
0
] - x1_pad
y1_pad = (patch_size[1] - (land_mask.shape[1] % patch_size[1])) % patch_size[1] // 2
y2_pad = (patch_size[1] - (land_mask.shape[1] % patch_size[1])) % patch_size[
1
] - y1_pad
# Apply padding according to patch embedding size
# Pad the same way as input shape (ensure code is cohesive)
land_mask = torch.nn.functional.pad(
land_mask, pad=(y1_pad, y2_pad, x1_pad, x2_pad), mode="constant", value=0
)
soil_type = torch.nn.functional.pad(
soil_type, pad=(y1_pad, y2_pad, x1_pad, x2_pad), mode="constant", value=0
)
topography = torch.nn.functional.pad(
topography, pad=(y1_pad, y2_pad, x1_pad, x2_pad), mode="constant", value=0
)
return land_mask, soil_type, topography