Add layers implementations for Minitorch

minhqngo · minhqngo · commit ecda33c509a1 · 2025-09-20T08:11:53.000+07:00
diff --git a/minitorch/backends/cuda_conv.py b/minitorch/backends/cuda_conv.py
@@ -33,9 +33,6 @@ def _tensor_conv1d_kernel(
     weight_strides: Strides,
     reverse: bool,
 ) -> None:
-    """
-    CUDA 1D Convolution implementation.
-    """
     i = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.x
     if i >= out_size:
         return
@@ -155,7 +152,6 @@ def _tensor_conv2d_kernel(
     if i >= out_size:
         return
 
-    # Deconstruct i into batch, out_channel, out_width
     out_index = cuda.local.array(MAX_DIMS, numba.int32)
     to_index(i, out_shape, out_index)
 
diff --git a/minitorch/nn/layers.py b/minitorch/nn/layers.py
@@ -0,0 +1,57 @@
+from ..tensor.operators import TensorBackend
+from ..tensor.functions import rand, zeros
+from .module import Module, Parameter
+from ..backends import fast_conv, fast_ops
+
+BACKEND = TensorBackend(fast_ops.FastOps)
+
+
+class Linear(Module):
+    def __init__(self, in_size, out_size):
+        super().__init__()
+        
+        # He initialization
+        scale = (2.0 / in_size) ** 0.5
+        self.weights = Parameter(scale * rand((in_size, out_size), backend=BACKEND))
+        self.bias = Parameter(zeros((out_size,), backend=BACKEND))
+        self.out_size = out_size
+
+    def forward(self, x):
+        batch, in_size = x.shape
+        return (
+            x.view(batch, in_size) @ self.weights.value.view(in_size, self.out_size)
+        ).view(batch, self.out_size) + self.bias.value
+
+
+class Conv1d(Module):
+    def __init__(self, in_channels, out_channels, kernel_width):
+        super().__init__()
+        
+        # He initialization
+        fan_in = in_channels * kernel_width
+        scale = (2.0 / fan_in) ** 0.5
+        self.weights = Parameter(
+            scale * rand((out_channels, in_channels, kernel_width), backend=BACKEND)
+        )
+        self.bias = Parameter(zeros((1, out_channels, 1), backend=BACKEND))
+
+    def forward(self, input):
+        out = fast_conv.conv1d(input, self.weights.value) + self.bias.value
+        return out
+
+
+class Conv2d(Module):
+    def __init__(self, in_channels, out_channels, kh, kw):
+        super().__init__()
+        
+        # He initialization
+        fan_in = in_channels * kh * kw
+        scale = (2.0 / fan_in) ** 0.5
+        self.weights = Parameter(
+            scale * rand((out_channels, in_channels, kh, kw), backend=BACKEND)
+        )
+        self.bias = Parameter(zeros((out_channels, 1, 1), backend=BACKEND))
+
+    def forward(self, input):
+        out = fast_conv.conv2d(input, self.weights.value) + self.bias.value
+        return out
diff --git a/minitorch/tensor/functions.py b/minitorch/tensor/functions.py
@@ -298,7 +298,7 @@ def rand(
     requires_grad: bool = False,
 ) -> Tensor:
     """
-    Produce a random tensor of size `shape`.
+    Produce a standard random distribution tensor of size `shape`.
 
     Args:
         shape : shape of tensor
@@ -308,7 +308,7 @@ def rand(
     Returns:
         :class:`Tensor` : new tensor
     """
-    vals = [random.random() for _ in range(int(common_operators.prod(shape)))]
+    vals = [np.random.randn() for _ in range(int(common_operators.prod(shape)))]
     tensor = minitorch.Tensor.make(vals, shape, backend=backend)
     tensor.requires_grad_(requires_grad)
     return tensor
diff --git a/project/run_mnist.py b/project/run_mnist.py