This is a mini, torch-like deep learning library. The goal is that you can use syntax similar to PyTorch to build deep learning model using this library. This project starting points is the minitorch exercises. However, after completing the exercises, I want to turn it into a functional deep learning library that can utilize GPU for model training, and I also want to refactor the code to make it clearer. This project is my attempt to do so.
pip install -r requirements.txtimport minitorch
# Initiate backends
FastTensorBackend = minitorch.TensorBackend(minitorch.FastOps)
GPUBackend = minitorch.TensorBackend(minitorch.CudaOps) # If CUDA is available
# Create a tensor
x = minitorch.tensor([1, 2, 3, 4], backend=FastTensorBackend)
y = minitorch.tensor([5, 6, 7, 8], backend=FastTensorBackend)
# Perform operations
z = x + y
result = z.sum()Create custom models by subclassing minitorch.Module:
class Network(minitorch.Module):
def __init__(self, backend):
super().__init__()
self.fc1 = minitorch.Linear(784, 128, backend=backend)
self.fc2 = minitorch.Linear(128, 10, backend=backend)
def forward(self, x):
x = self.fc1(x).relu()
x = minitorch.dropout(x, 0.2, not self.training)
x = self.fc2(x)
return minitorch.logsoftmax(x, dim=1)class CNN(minitorch.Module):
def __init__(self, backend=FastTensorBackend):
super().__init__()
self.conv1 = minitorch.Conv2d(in_channels=1, out_channels=8, kernel=(3, 3), stride=1, backend=backend)
self.conv2 = minitorch.Conv2d(in_channels=8, out_channels=16, kernel=(3, 3), stride=1, backend=backend)
self.fc = minitorch.Linear(16 * 5 * 5, C, backend=backend)
def forward(self, x):
batch_size = x.shape[0]
x = self.conv1(x).relu()
x = minitorch.maxpool2d(x, kernel=(2, 2), stride=(2, 2))
x = self.conv2(x).relu()
x = minitorch.maxpool2d(x, kernel=(2, 2), stride=(2, 2))
x = x.view(batch_size, 16 * 5 * 5)
x = self.fc(x)
x = minitorch.logsoftmax(x, dim=1)
return xmodel = Network(backend=FastTensorBackend)
optimizer = minitorch.RMSProp(model.parameters(), lr=0.01)
model.train()
for epoch in range(num_epochs):
for X_batch, y_batch in train_loader:
optimizer.zero_grad()
output = model(X_batch)
loss = minitorch.nll_loss(output, y_batch)
loss.backward()
optimizer.step()
model.eval()
for X_batch, y_batch in val_loader:
output = model(X_batch)
predictions = minitorch.argmax(output, dim=1)from minitorch.datasets import mnist
from minitorch.dataloader import DataLoader
# Load dataset
mnist_train = mnist.MNISTDataset("/path/to/data", train=True)
# Create dataloader
train_loader = DataLoader(
mnist_train,
batch_size=32,
shuffle=True,
backend=FastTensorBackend,
transform=lambda x: x.astype(np.float64) / 255.0
)import numba
# Check CUDA availability and use GPU backend
if numba.cuda.is_available():
backend = minitorch.TensorBackend(minitorch.CudaOps)
print("Using GPU backend")
else:
backend = minitorch.TensorBackend(minitorch.FastOps)
print("Using CPU backend")
model = Network(backend=backend)# Save model weights
model.save_weights("model.npz")
# Load model weights
model.load_weights("model.npz")SGD(parameters, lr, momentum)- Stochastic Gradient Descent (momentum supported)RMSProp(parameters, lr, decay_rate, eps)- RMSProp optimizer
nll_loss(output, target)- Negative Log Likelihood Lossbce_loss(output, target)- Binary Cross Entropy Losscross_entropy_loss(output, target)- Cross Entropy Lossmse_loss(output, target)- Mean Squared Error Loss
See examples/train_mnist.py for a complete example of training a simple CNN on MNIST dataset:
python examples/train_mnist.py --backend gpu --batch_size 32 --epochs 10 --lr 0.01Implement vanilla RNN layer- Implement Adam optimizer