Skip to content

Commit 3f3e5a3

Browse files
committed
Decouple weight initialization
1 parent ecda33c commit 3f3e5a3

File tree

6 files changed

+48
-30
lines changed

6 files changed

+48
-30
lines changed

minitorch/datasets/mnist.py

Whitespace-only changes.

minitorch/nn/init.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import math
2+
from ..tensor import tensor
3+
4+
5+
def kaiming_uniform(tensor, fan_in, **kwargs):
6+
bound = math.sqrt(6 / fan_in)
7+
tensor.uniform_(-bound, bound)
8+
9+
10+
def glorot_uniform(tensor, fan_in, fan_out):
11+
bound = math.sqrt(6 / (fan_in + fan_out))
12+
tensor.uniform_(-bound, bound)
13+
14+
15+
def lecun_uniform(tensor, fan_in, **kwargs):
16+
bound = math.sqrt(3 / fan_in)
17+
tensor.uniform_(-bound, bound)
18+
19+
20+
def zero(tensor, **kwargs):
21+
tensor.fill_(0.0)
22+
23+
24+
def one(tensor, **kwargs):
25+
tensor.fill_(1.0)

minitorch/nn/layers.py

Lines changed: 13 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,15 @@
1-
from ..tensor.operators import TensorBackend
21
from ..tensor.functions import rand, zeros
32
from .module import Module, Parameter
43
from ..backends import fast_conv, fast_ops
5-
6-
BACKEND = TensorBackend(fast_ops.FastOps)
4+
from . import init
75

86

97
class Linear(Module):
10-
def __init__(self, in_size, out_size):
8+
def __init__(self, in_size, out_size, initializer=init.kaiming_uniform):
119
super().__init__()
12-
13-
# He initialization
14-
scale = (2.0 / in_size) ** 0.5
15-
self.weights = Parameter(scale * rand((in_size, out_size), backend=BACKEND))
16-
self.bias = Parameter(zeros((out_size,), backend=BACKEND))
10+
self.weights = Parameter(rand((in_size, out_size)))
11+
initializer(self.weights.value, in_size)
12+
self.bias = Parameter(zeros((out_size,)))
1713
self.out_size = out_size
1814

1915
def forward(self, x):
@@ -24,33 +20,25 @@ def forward(self, x):
2420

2521

2622
class Conv1d(Module):
27-
def __init__(self, in_channels, out_channels, kernel_width):
23+
def __init__(self, in_channels, out_channels, kernel_width, initializer=init.kaiming_uniform):
2824
super().__init__()
29-
30-
# He initialization
25+
self.weights = Parameter(rand((out_channels, in_channels, kernel_width)))
3126
fan_in = in_channels * kernel_width
32-
scale = (2.0 / fan_in) ** 0.5
33-
self.weights = Parameter(
34-
scale * rand((out_channels, in_channels, kernel_width), backend=BACKEND)
35-
)
36-
self.bias = Parameter(zeros((1, out_channels, 1), backend=BACKEND))
27+
initializer(self.weights.value, fan_in)
28+
self.bias = Parameter(zeros((1, out_channels, 1)))
3729

3830
def forward(self, input):
3931
out = fast_conv.conv1d(input, self.weights.value) + self.bias.value
4032
return out
4133

4234

4335
class Conv2d(Module):
44-
def __init__(self, in_channels, out_channels, kh, kw):
36+
def __init__(self, in_channels, out_channels, kh, kw, initializer=init.kaiming_uniform):
4537
super().__init__()
46-
47-
# He initialization
38+
self.weights = Parameter(rand((out_channels, in_channels, kh, kw)))
4839
fan_in = in_channels * kh * kw
49-
scale = (2.0 / fan_in) ** 0.5
50-
self.weights = Parameter(
51-
scale * rand((out_channels, in_channels, kh, kw), backend=BACKEND)
52-
)
53-
self.bias = Parameter(zeros((out_channels, 1, 1), backend=BACKEND))
40+
initializer(self.weights.value, fan_in)
41+
self.bias = Parameter(zeros((out_channels, 1, 1)))
5442

5543
def forward(self, input):
5644
out = fast_conv.conv2d(input, self.weights.value) + self.bias.value

minitorch/nn/nn.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -121,10 +121,10 @@ def logsoftmax(input: Tensor, dim: int) -> Tensor:
121121
Returns:
122122
log of softmax tensor
123123
"""
124-
exp_input = input.exp()
124+
m = max(input, dim)
125+
exp_input = (input - m).exp()
125126
sum_exp = exp_input.sum(dim)
126-
log_sum_exp = sum_exp.log()
127-
return input - log_sum_exp
127+
return input - m - sum_exp.log()
128128

129129

130130
def maxpool2d(input: Tensor, kernel: Tuple[int, int]) -> Tensor:
@@ -161,5 +161,5 @@ def dropout(input: Tensor, rate: float, ignore: bool = False) -> Tensor:
161161
if rate == 1.0:
162162
return input * 0
163163
p_keep = 1.0 - rate
164-
mask = rand(input.shape) > rate
164+
mask = tensor([1.0]) - (rand(input.shape) < rate)
165165
return input * mask * (1.0 / p_keep)

minitorch/tensor/functions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ def rand(
308308
Returns:
309309
:class:`Tensor` : new tensor
310310
"""
311-
vals = [np.random.randn() for _ in range(int(common_operators.prod(shape)))]
311+
vals = [np.random.uniform() for _ in range(int(common_operators.prod(shape)))]
312312
tensor = minitorch.Tensor.make(vals, shape, backend=backend)
313313
tensor.requires_grad_(requires_grad)
314314
return tensor

minitorch/tensor/tensor.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -372,3 +372,8 @@ def zero_grad_(self) -> None: # pragma: no cover
372372
Reset the derivative on this variable.
373373
"""
374374
self.grad = None
375+
376+
def uniform_(self, low=0.0, high=1.0):
377+
self._tensor._storage[:] = np.random.uniform(
378+
low, high, size=len(self._tensor._storage)
379+
)

0 commit comments

Comments
 (0)