From 3c4a7269de20c211e77f9e85b75f7bf9f145e6d6 Mon Sep 17 00:00:00 2001 From: zabboud Date: Thu, 1 Jun 2023 14:11:55 -0400 Subject: [PATCH 1/2] Fixes #2083 - explain model.eval, torch.no_grad --- beginner_source/basics/optimization_tutorial.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/beginner_source/basics/optimization_tutorial.py b/beginner_source/basics/optimization_tutorial.py index 0fb508d1cc..a1603510b9 100644 --- a/beginner_source/basics/optimization_tutorial.py +++ b/beginner_source/basics/optimization_tutorial.py @@ -149,6 +149,9 @@ def forward(self, x): def train_loop(dataloader, model, loss_fn, optimizer): size = len(dataloader.dataset) + # Set the model to training mode - important for batch normalization and dropout layers + # Unnecessary in this situation but added for best practices + model.train() for batch, (X, y) in enumerate(dataloader): # Compute prediction and loss pred = model(X) @@ -165,10 +168,15 @@ def train_loop(dataloader, model, loss_fn, optimizer): def test_loop(dataloader, model, loss_fn): + # Set the model to evaluation mode - important for batch normalization and dropout layers + # Unnecessary in this situation but added for best practices + model.eval() size = len(dataloader.dataset) num_batches = len(dataloader) test_loss, correct = 0, 0 + # Evaluating the model with torch.no_grad() ensures that no gradients are computed during test mode + # also serves to reduce unnecessary gradient computations and memory usage for tensors with requires_grad=True with torch.no_grad(): for X, y in dataloader: pred = model(X) From d1b9d71f60c9d363bd5156eed32625202eb1e45f Mon Sep 17 00:00:00 2001 From: zabboud Date: Thu, 1 Jun 2023 20:40:42 -0400 Subject: [PATCH 2/2] set norm to mean & std of CIFAR10(pytorch#1818) --- beginner_source/introyt/introyt1_tutorial.py | 23 ++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/beginner_source/introyt/introyt1_tutorial.py b/beginner_source/introyt/introyt1_tutorial.py index f52c3902c0..a5d65bcab1 100644 --- a/beginner_source/introyt/introyt1_tutorial.py +++ b/beginner_source/introyt/introyt1_tutorial.py @@ -288,7 +288,7 @@ def num_flat_features(self, x): transform = transforms.Compose( [transforms.ToTensor(), - transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) + transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616))]) ########################################################################## @@ -297,9 +297,28 @@ def num_flat_features(self, x): # - ``transforms.ToTensor()`` converts images loaded by Pillow into # PyTorch tensors. # - ``transforms.Normalize()`` adjusts the values of the tensor so -# that their average is zero and their standard deviation is 0.5. Most +# that their average is zero and their standard deviation is 1.0. Most # activation functions have their strongest gradients around x = 0, so # centering our data there can speed learning. +# The values passed to the transform are the means (first tuple) and the +# standard deviations (second tuple) of the rgb values of the images in +# the dataset. You can calculate these values yourself by running these +# few lines of code: +# ``` +# from torch.utils.data import ConcatDataset +# transform = transforms.Compose([transforms.ToTensor()]) +# trainset = torchvision.datasets.CIFAR10(root='./data', train=True, +# download=True, transform=transform) +# +# #stack all train images together into a tensor of shape +# #(50000, 3, 32, 32) +# x = torch.stack([sample[0] for sample in ConcatDataset([trainset])]) +# +# #get the mean of each channel +# mean = torch.mean(x, dim=(0,2,3)) #tensor([0.4914, 0.4822, 0.4465]) +# std = torch.std(x, dim=(0,2,3)) #tensor([0.2470, 0.2435, 0.2616]) +# +# ``` # # There are many more transforms available, including cropping, centering, # rotation, and reflection.