minhqngo
diff --git a/‎project/run_mnist_multiclass.py‎ renamed to ‎examples/run_mnist_multiclass.py‎
Lines changed: 22 additions & 36 deletions b/‎project/run_mnist_multiclass.py‎ renamed to ‎examples/run_mnist_multiclass.py‎
Lines changed: 22 additions & 36 deletions
@@ -1,6 +1,7 @@
 import argparse
 import numba
 import numpy as np
+import matplotlib.pyplot as plt
 import sys
 import warnings
 warnings.filterwarnings("ignore")
@@ -14,57 +15,42 @@
 if numba.cuda.is_available():
     GPUBackend = minitorch.TensorBackend(minitorch.CudaOps)
 
-C = 10
-
 H, W = 28, 28
+C = 10
 
 
-def mnist_transform(image):
-    """Normalize MNIST image from uint8 [0, 255] to float [0, 1]"""
+def preprocess(image):
     return image.astype(np.float64) / 255.0
 
 
-class Network(minitorch.Module):
-    """
-    Implement a CNN for MNist classification based on LeNet.
-    This model should implement the following procedure:
-    1. Apply a convolution with 4 output channels and a 3x3 kernel followed by a ReLU (save to self.mid)
-    2. Apply a convolution with 8 output channels and a 3x3 kernel followed by a ReLU (save to self.out)
-    3. Apply 2D pooling (either Avg or Max) with 4x4 kernel.
-    4. Flatten channels, height, and width. (Should be size BATCHx392)
-    5. Apply a Linear to size 64 followed by a ReLU and Dropout with rate 25%
-    6. Apply a Linear to size C (number of classes).
-    7. Apply a logsoftmax over the class dimension.
-    """
+class Network(minitorch.Module):  # LeNet-5
 
     def __init__(self, backend=FastTensorBackend):
         super().__init__()
+        self.conv1 = minitorch.Conv2d(in_channels=1, out_channels=6, kernel=(5, 5), stride=1, backend=backend)
+        self.conv2 = minitorch.Conv2d(in_channels=6, out_channels=16, kernel=(5, 5), stride=1, backend=backend)
 
-        # For vis
-        self.mid = None
-        self.out = None
-
-        self.conv1 = minitorch.Conv2d(1, 4, 3, 3, backend=backend)
-        self.conv2 = minitorch.Conv2d(4, 8, 3, 3, backend=backend)
-        self.linear1 = minitorch.Linear(392, 64, backend=backend)
-        self.linear2 = minitorch.Linear(64, C, backend=backend)
+        self.fc1 = minitorch.Linear(16 * 4 * 4, 120, backend=backend)
+        self.fc2 = minitorch.Linear(120, 84, backend=backend)
+        self.fc3 = minitorch.Linear(84, C, backend=backend)
 
     def forward(self, x):
         batch_size = x.shape[0]
         x = self.conv1(x).relu()
-        self.mid = x
+        x = minitorch.avgpool2d(x, kernel=(2, 2), stride=(2, 2))
         x = self.conv2(x).relu()
-        self.out = x
-        x = minitorch.avgpool2d(x, (4, 4))
-        x = x.view(batch_size, 392)
-        x = self.linear1(x).relu()
-        x = minitorch.dropout(x, 0.25, not self.training)
-        x = self.linear2(x)
+        x = minitorch.avgpool2d(x, kernel=(2, 2), stride=(2, 2))
+        x = x.view(batch_size, 16 * 4 * 4)
+        x = self.fc1(x).relu()
+        x = minitorch.dropout(x, 0.2, not self.training)
+        x = self.fc2(x).relu()
+        x = minitorch.dropout(x, 0.2, not self.training)
+        x = self.fc3(x)
         x = minitorch.logsoftmax(x, dim=1)
         return x
 
 
-def default_log_fn(epoch, total_loss, correct, total):
+def default_log_fn(epoch, total_loss, correct, total, loss_list):
     print(
         f"Epoch {epoch} | loss {total_loss / total:.2f} | valid acc {correct / total:.2f}"
     )
@@ -74,11 +60,11 @@ def train(
     model,
     train_loader,
     val_loader,
-    learning_rate,
+    learning_rate=1e-2,
     max_epochs=50,
     log_fn=default_log_fn,
 ):
-    optim = minitorch.SGD(model.parameters(), learning_rate)
+    optim = minitorch.RMSProp(model.parameters(), learning_rate)
     for epoch in range(1, max_epochs + 1):
         total_loss = 0.0
         model.train()
@@ -133,14 +119,14 @@ def train(
         batch_size=args.batch_size,
         shuffle=True,
         backend=backend,
-        transform=mnist_transform
+        transform=preprocess
     )
     val_loader = DataLoader(
         mnist_val,
         batch_size=args.batch_size,
         shuffle=False,
         backend=backend,
-        transform=mnist_transform
+        transform=preprocess
     )
 
     model = Network(backend=backend)