Skip to content

Commit 1c3b5a1

Browse files
committed
- Add tensor slicing
- Implement RNN layer - Fix github CI/CD
1 parent 49f172f commit 1c3b5a1

File tree

8 files changed

+579
-22
lines changed

8 files changed

+579
-22
lines changed

.github/workflows/minitorch.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,12 @@ jobs:
88
runs-on: ubuntu-latest
99
strategy:
1010
matrix:
11-
python-version: [3.8]
11+
python-version: ['3.10']
1212

1313
steps:
14-
- uses: actions/checkout@v2
14+
- uses: actions/checkout@v4
1515
- name: Set up Python ${{ matrix.python-version }}
16-
uses: actions/setup-python@v2
16+
uses: actions/setup-python@v4
1717
with:
1818
python-version: ${{ matrix.python-version }}
1919
- name: Install dependencies

minitorch/nn/layers.py

Lines changed: 79 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,25 @@
99
cuda_conv = None
1010

1111

12+
__all__ = ['Linear', 'Conv1d', 'Conv2d', 'RNN', 'tanh']
13+
14+
15+
def tanh(x):
16+
"""
17+
Hyperbolic tangent activation function.
18+
19+
tanh(x) = (exp(2x) - 1) / (exp(2x) + 1) = 2*sigmoid(2x) - 1
20+
21+
Args:
22+
x: Input tensor
23+
24+
Returns:
25+
Tensor with tanh applied element-wise
26+
"""
27+
# Using the sigmoid-based formula for numerical stability
28+
return 2.0 * (2.0 * x).sigmoid() - 1.0
29+
30+
1231
class Linear(Module):
1332
def __init__(self, in_size, out_size, backend, initializer=init.kaiming_uniform):
1433
super().__init__()
@@ -46,17 +65,17 @@ def forward(self, input):
4665
class Conv2d(Module):
4766
def __init__(self, in_channels, out_channels, kernel, backend, stride=1, initializer=init.kaiming_uniform):
4867
super().__init__()
49-
68+
5069
self.stride = stride if isinstance(stride, tuple) else (stride, stride)
5170
kernel = kernel if isinstance(kernel, tuple) else (kernel, kernel)
5271
kh, kw = kernel
53-
72+
5473
self.weights = Parameter(rand((out_channels, in_channels, kh, kw), backend=backend))
5574
fan_in = in_channels * kh * kw
5675
initializer(self.weights.value, fan_in)
57-
76+
5877
self.bias = Parameter(zeros((out_channels, 1, 1), backend=backend))
59-
78+
6079
self.backend = backend
6180

6281
def forward(self, input):
@@ -65,3 +84,59 @@ def forward(self, input):
6584
else:
6685
out = fast_conv.conv2d(input, self.weights.value, self.stride) + self.bias.value
6786
return out
87+
88+
89+
class RNN(Module):
90+
def __init__(self, input_size, hidden_size, backend, initializer=init.glorot_uniform):
91+
super().__init__()
92+
93+
# Input-to-hidden weights
94+
self.W_ih = Parameter(rand((input_size, hidden_size), backend=backend))
95+
initializer(self.W_ih.value, input_size, hidden_size)
96+
97+
# Hidden-to-hidden weights
98+
self.W_hh = Parameter(rand((hidden_size, hidden_size), backend=backend))
99+
initializer(self.W_hh.value, hidden_size, hidden_size)
100+
101+
# Bias
102+
self.bias = Parameter(zeros((hidden_size,), backend=backend))
103+
104+
self.hidden_size = hidden_size
105+
self.input_size = input_size
106+
self.backend = backend
107+
108+
def forward(self, x, h=None):
109+
batch_size, seq_len, input_size = x.shape
110+
assert input_size == self.input_size, f"Expected input size {self.input_size}, got {input_size}"
111+
112+
if h is None:
113+
h = zeros((batch_size, self.hidden_size), backend=self.backend)
114+
115+
outputs = []
116+
117+
for t in range(seq_len):
118+
x_t = x[:, t, :]
119+
h = tanh(
120+
x_t.view(batch_size, self.input_size) @ self.W_ih.value.view(self.input_size, self.hidden_size)
121+
+ h.view(batch_size, self.hidden_size) @ self.W_hh.value.view(self.hidden_size, self.hidden_size)
122+
+ self.bias.value.view(1, self.hidden_size)
123+
)
124+
outputs.append(h)
125+
126+
output_tensors = []
127+
for i, out in enumerate(outputs):
128+
output_tensors.append(out.view(batch_size, 1, self.hidden_size))
129+
130+
if seq_len == 1:
131+
output = output_tensors[0]
132+
else:
133+
output_list = []
134+
for b in range(batch_size):
135+
for t in range(seq_len):
136+
for h_idx in range(self.hidden_size):
137+
output_list.append(outputs[t][b, h_idx])
138+
139+
from ..tensor.functions import tensor
140+
output = tensor(output_list, backend=self.backend).view(batch_size, seq_len, self.hidden_size)
141+
142+
return output, h

minitorch/tensor/data.py

Lines changed: 113 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,41 @@ def strides_from_shape(shape: UserShape) -> UserStrides:
143143
return tuple(reversed(layout[:-1]))
144144

145145

146+
def normalize_slice(s: slice, dim_size: int) -> Tuple[int, int, int]:
147+
"""
148+
Normalize a slice object to (start, stop, step) with proper bounds.
149+
150+
Args:
151+
s: slice object
152+
dim_size: size of the dimension being sliced
153+
154+
Returns:
155+
(start, stop, step) tuple with normalized values
156+
"""
157+
step = s.step if s.step is not None else 1
158+
if step == 0:
159+
raise IndexingError("slice step cannot be zero")
160+
161+
if step < 0:
162+
start = s.start if s.start is not None else dim_size - 1
163+
stop = s.stop if s.stop is not None else -dim_size - 1
164+
else:
165+
start = s.start if s.start is not None else 0
166+
stop = s.stop if s.stop is not None else dim_size
167+
168+
if start < 0:
169+
start = max(0, dim_size + start)
170+
else:
171+
start = min(start, dim_size)
172+
173+
if stop < 0:
174+
stop = max(-1 if step < 0 else 0, dim_size + stop)
175+
else:
176+
stop = min(stop, dim_size)
177+
178+
return start, stop, step
179+
180+
146181
class TensorData:
147182
_storage: Storage
148183
_strides: Strides
@@ -175,7 +210,8 @@ def __init__(
175210
self.dims = len(strides)
176211
self.size = int(prod(shape))
177212
self.shape = shape
178-
assert len(self._storage) == self.size
213+
# Note: Storage can be larger than size for non-contiguous views
214+
# assert len(self._storage) == self.size
179215

180216
def to_cuda_(self) -> None: # pragma: no cover
181217
if not numba.cuda.is_cuda_array(self._storage):
@@ -260,6 +296,55 @@ def permute(self, *order: int) -> TensorData:
260296
new_strides = tuple(self.strides[i] for i in order)
261297
return TensorData(self._storage, new_shape, new_strides)
262298

299+
def slice(self, key: Union[int, slice, Sequence[Union[int, slice]]]) -> TensorData:
300+
"""
301+
Create a sliced view of the tensor.
302+
303+
Args:
304+
key: int, slice, or tuple of ints/slices for indexing
305+
306+
Returns:
307+
New TensorData representing the sliced view
308+
"""
309+
if isinstance(key, (int, slice)):
310+
key = (key,)
311+
312+
if len(key) > len(self.shape):
313+
raise IndexingError(f"Too many indices {len(key)} for tensor of dimension {len(self.shape)}")
314+
315+
key = tuple(key) + (slice(None),) * (len(self.shape) - len(key))
316+
317+
new_shape = []
318+
new_strides = []
319+
offset = 0
320+
321+
for dim, (k, dim_size, stride) in enumerate(zip(key, self.shape, self.strides)):
322+
if isinstance(k, int):
323+
idx = k
324+
if idx < 0:
325+
idx = dim_size + idx
326+
if idx < 0 or idx >= dim_size:
327+
raise IndexingError(f"Index {k} out of range for dimension {dim} with size {dim_size}")
328+
offset += idx * stride
329+
elif isinstance(k, slice):
330+
start, stop, step = normalize_slice(k, dim_size)
331+
if step > 0:
332+
size = max(0, (stop - start + step - 1) // step)
333+
else:
334+
size = max(0, (stop - start + step + 1) // step)
335+
336+
new_shape.append(size)
337+
new_strides.append(stride * step)
338+
offset += start * stride
339+
else:
340+
raise IndexingError(f"Unsupported index type: {type(k)}")
341+
342+
if len(new_shape) == 0:
343+
scalar_val = self._storage[offset]
344+
return TensorData([scalar_val], (1,), (1,))
345+
346+
return _make_tensor_data_view(self._storage, tuple(new_shape), tuple(new_strides), offset)
347+
263348
def to_string(self) -> str:
264349
s = ""
265350
for index in self.indices():
@@ -283,3 +368,30 @@ def to_string(self) -> str:
283368
else:
284369
s += " "
285370
return s
371+
372+
373+
def _make_tensor_data_view(
374+
storage: Storage, shape: UserShape, strides: UserStrides, offset: int
375+
) -> TensorData:
376+
"""
377+
Create a TensorData view with an offset into the storage.
378+
379+
Args:
380+
storage: The underlying storage array
381+
shape: Shape of the view
382+
strides: Strides for the view
383+
offset: Offset into the storage where the view starts
384+
385+
Returns:
386+
TensorData representing the view
387+
"""
388+
if len(shape) == 0 or prod(shape) == 0:
389+
# Empty tensor
390+
return TensorData([], shape, strides)
391+
392+
if offset > 0:
393+
view_storage = storage[offset:]
394+
else:
395+
view_storage = storage
396+
397+
return TensorData(view_storage, shape, strides)

minitorch/tensor/tensor.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -285,9 +285,27 @@ def contiguous(self) -> Tensor:
285285
def __repr__(self) -> str:
286286
return self._tensor.to_string()
287287

288-
def __getitem__(self, key: Union[int, UserIndex]) -> float:
289-
key2 = (key,) if isinstance(key, int) else key
290-
return self._tensor.get(key2)
288+
def __getitem__(self, key: Union[int, slice, UserIndex]) -> Union[float, Tensor]:
289+
if isinstance(key, slice):
290+
sliced_data = self._tensor.slice(key)
291+
return Tensor(sliced_data, backend=self.backend)
292+
elif isinstance(key, tuple):
293+
has_slice = any(isinstance(k, slice) for k in key)
294+
if has_slice:
295+
sliced_data = self._tensor.slice(key)
296+
if sliced_data.shape == (1,) and sliced_data.size == 1:
297+
return float(sliced_data._storage[0])
298+
return Tensor(sliced_data, backend=self.backend)
299+
else:
300+
return self._tensor.get(key)
301+
elif isinstance(key, int):
302+
if len(self.shape) == 1:
303+
return self._tensor.get((key,))
304+
else:
305+
sliced_data = self._tensor.slice(key)
306+
return Tensor(sliced_data, backend=self.backend)
307+
else:
308+
raise TypeError(f"Unsupported index type: {type(key)}")
291309

292310
def __setitem__(self, key: Union[int, UserIndex], val: float) -> None:
293311
key2 = (key,) if isinstance(key, int) else key

requirements.txt

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
colorama==0.4.3
2-
hypothesis == 6.54
3-
mypy == 0.971
4-
numba==0.62
5-
numpy==2.2.6
6-
pre-commit==2.20.0
7-
pytest==7.1.2
8-
pytest-env
9-
pytest-runner==5.2
1+
colorama==0.4.6
2+
hypothesis==6.54
3+
mypy==1.7.0
4+
numba==0.60.0
5+
numpy==1.26.4
6+
pre-commit==3.5.0
7+
pytest==7.4.3
8+
pytest-env==1.1.3
9+
pytest-runner==6.0.1
1010
tensorboardX==2.6.4
11-
tensorboard==2.20.0
12-
typing_extensions
11+
tensorboard==2.15.1
12+
typing_extensions==4.9.0

0 commit comments

Comments
 (0)