seems nice

AmrElsersy · Jan 7, 2021 · 645745f · 645745f
1 parent f829f39
commit 645745f
Show file tree

Hide file tree

Showing 6 changed files with 47 additions and 13 deletions.
diff --git a/nn/Linear.py b/nn/Linear.py
@@ -20,8 +20,8 @@ def init_weights(self,indim, outdim):
     self.weights['b'] = np.zeros((outdim, 1))
 
   def forward(self,X):
-
     # output dims = (output_layer x features) . (features x batch_size) = (output_layer x batch_size)
+    # print(self.weights['b'].shape)
     output = np.dot(self.weights['w'].T ,X) + self.weights['b']
     self.cache['x'] = X
     self.cache['output'] = output
@@ -30,11 +30,13 @@ def forward(self,X):
 
   def backward(self,global_grad):
     dX = np.dot(self.local_grads['x'], global_grad )
+    # print("local grad x")
+    # print(self.local_grads['x'].shape)
     # dW dims = W dims .. because we have to calculate w = w - lr * dW
     # note that dW is global gradient .... but the local gradient (dY/dw) has a different dims as it is a function of the input
     dW = np.dot(np.array(self.local_grads['w']) , global_grad.T )
     # same as dW above
-    db = np.sum(global_grad, axis = 0, keepdims = True)
+    db = np.sum(global_grad, axis = 1, keepdims = True)
     self.weights_global_grads = {'w': dW, 'b': db}
     return dX
 

diff --git a/nn/MNIST_test.py b/nn/MNIST_test.py
@@ -19,10 +19,13 @@
 
 model.set_loss(CrossEntropyLoss())
 
-optimizer = GradientDecent(model.parameters(), learning_rate=0.001)
-
+optimizer = GradientDecent(model.parameters(), learning_rate = 0.001)
 
+i = 0
 for image, label in dataloader:
+    if i == 3000:
+        break
+    i = i + 1
     predicted = model(image)
     loss = model.loss(predicted, label)
     model.backward()

diff --git a/nn/loss.py b/nn/loss.py
@@ -34,13 +34,12 @@ def forward(self, Y_hat, Y):
         # calculating crossentropy
         exp_x = np.exp(Y_hat)
         probs = exp_x / np.sum(exp_x, axis=0, keepdims=True)
-
         log_probs = -np.log(probs)
 
         #  ........... Problem ...............
         # Y is inf because y hat at the begin is very big (range 8k) so e^8k = inf 
-
         crossentropy_loss = np.mean(log_probs,axis=0, keepdims=True) # avrage on both axis 0 & axis 1 ()
+        crossentropy_loss = np.sum(crossentropy_loss, axis=1, keepdims=True)
         #print("Dims", probs.shape)
         print('Label =',Y)
         print('Prediction = ',np.argmax(probs,axis=0))
@@ -57,8 +56,14 @@ def calculate_local_grads(self, X, Y):
         b[np.arange(Y.shape[1]),Y] = 1
         b = b.T
         probs = np.subtract(probs,b) / float(Y.shape[0])
+        # probs = np.sum(probs, axis=1, keepdims=True)
+
         #probs =  probs.mean(axis=1,keepdims=True)
-        return {'x':probs*X}
+        # print("back loss")
+        # print(probs.shape)
+        # print("What is X ?")
+        # print(X.shape)
+        return {'x':probs}
 
 
 

diff --git a/nn/model.py b/nn/model.py
@@ -45,4 +45,4 @@ def train_mode(self):
     def eval_mode(self):
         self.is_train_mode = False
     def parameters(self):
-        return [layer for layer in self.layers if isinstance(layer, Layer)]
+        return [layer for layer in self.layers if isinstance(layer, Layer)]
diff --git a/nn/optim.py b/nn/optim.py
@@ -31,11 +31,13 @@ def optimize(self, w, dw):
         """
             Optimization Equation for different types of gradient decent 
         """
-        pass
+        return w
 
 class GradientDecent(Optimizer):
     def optimize(self, w, dw):
-        w = w - self.lr * np.mean(dw,axis=1,keepdims=True)
+        # dw = np.mean(dw, axis=1, keepdims=True)
+        dw = dw / np.max(dw)
+        w = w - self.lr * dw
         return w
 
 class SGD(Optimizer):
@@ -123,4 +125,4 @@ def step(self):
         self.step_count = self.step_count + 1
 
         if self.step_count % self.step_size == 0:
-            self.optimizer.lr = self.optimizer.lr * self.gamma
+            self.optimizer.lr = self.optimizer.lr * self.gamma
diff --git a/nn/test.py b/nn/test.py
@@ -21,18 +21,40 @@ def init_weights(self,indim, outdim):
     self.weights['b'] = np.zeros((outdim, 1))
 
   def forward(self,X):
+    # print("weight transpose")
+    # print(self.weights['w'].T.shape)
+    # print("Input")
+    # print(X.shape)
+    # print("bias transpose")
     # print(self.weights['b'].shape)
     output = np.dot(self.weights['w'].T ,X) + self.weights['b']
+    # print("Output")
+    # print(output.shape)
     self.cache['x'] = X
     self.cache['output'] = output
 
     return output
 
   def backward(self,global_grad):
-
     dX = np.dot(self.local_grads['x'], global_grad )
+    # print("local grad x")
+    # print(self.local_grads['x'].shape)
+    # print("global grad")
+    # print(global_grad.shape)
+    # print("dX")
+    # print(dX.shape)
     dW = np.dot(np.array(self.local_grads['w']) , global_grad.T )
-    db = np.sum(global_grad, axis = 0, keepdims = True)
+    # print("local grad w")
+    # print(self.local_grads['w'].shape)
+    # print("global grad Transpose")
+    # print(global_grad.T.shape)
+    # print("dW")
+    # print(dW.shape)
+    db = np.sum(global_grad, axis = 1, keepdims = True)
+    # print("global grad")
+    # print(global_grad.shape)
+    # print("db")
+    # print(db.shape)
     self.weights_global_grads = {'w': dW, 'b': db}
     return dX