# MNIST Digit Classifier Lab Handout

In [None]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import torch.nn.functional as F

In [None]:
# this lets us use GPU, if one is available
if torch.cuda.is_available():
    device = torch.device("cuda:0")  
    print("Running on gpu")
# this is Apple's GPU, if there exists one
elif torch.backends.mps.is_available():
    device = torch.device("mps")
    print("Running on mps")
# Otherwise CPU, which will be much slower (but good enough for this task)
else:
    device = torch.device("cpu")
    print("Running on cpu")

In [None]:
# constants. don't touch or things may break
input_size = 784 # (28 * 28) images
num_classes = 10

# hyperparams. You can play around with these
hidden1_size = 256
hidden2_size = 64
num_epochs = 10
batch_size = 128
learning_rate = .0001

In [None]:
train_dataset = torchvision.datasets.MNIST(root = './data', train = True, transform = transforms.ToTensor(), download = True)
test_dataset = torchvision.datasets.MNIST(root = './data', train = False, transform = transforms.ToTensor(), download = True)

In [None]:
train_loader = torch.utils.data.DataLoader(dataset = train_dataset, batch_size = batch_size, shuffle = True)
test_loader = torch.utils.data.DataLoader(dataset = test_dataset, batch_size = batch_size, shuffle = False)

In [None]:
samples, labels = next(iter(train_loader))
print(samples.shape, labels.shape)

for i in range(8):
    plt.subplot(2,4,i+1)
    plt.imshow(samples[i][0], cmap='gray')
plt.show()

In [None]:
class NeuralNet(nn.Module):
    def __init__(self, input_size, num_classes, hidden1_size, hidden2_size):
        super(NeuralNet, self).__init__()
        
        # TODO define the layer of the model
        pass
        # end code region
        # Hints:
        # - nn.Linear(in_size, out_size) creates a fully connected layer
        # - nn.ReLU() creates a ReLU activation function, or you can use F.relu(input) directly
        # - What should the output size be for the last layer given that we write in base 10?
        # - You can also use nn.Sequential() to combine layers

    def forward(self, x):
        
        # TODO given input batch x, compute the prediction
        pass
        # end code region
        # Hints:
        # - The input should somehow make its way through all the layers, with activations in between, right?

In [None]:
model = NeuralNet(input_size, num_classes, hidden1_size, hidden2_size).to(device)

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)
losses = []

In [None]:
# Training loop
n_total_steps = len(train_loader)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.reshape(-1, 28*28).to(device)
        labels = labels.to(device)

        # TODO do forward pass on images
        outputs = "change me"
        # TODO calculate loss, i.e. how close outputs is from labels. Hint: use loss_fn
        loss = "change me"

        # clear gradient
        optimizer.zero_grad()
        # TODO perform backward propagration on loss
        "change me"
        # nudge parameters a little bit to optimize model according to gradient
        optimizer.step()

        if (i + 1) % 100 == 0:
            print (f'epoch = [{epoch+1}/{num_epochs}], step = [{i+1}/{n_total_steps}], loss = {loss.item()}')
    losses.append(loss.item())

In [None]:
plt.plot(losses)

In [None]:
# evaluation on the test dataset
with torch.no_grad():
    n_correct = 0
    n_samples = 0
    for images, labels in test_loader:
        images = images.reshape(-1, 28*28).to(device)
        labels = labels.to(device)
        
        outputs = model(images)

        _, predictions = torch.max(outputs, 1)
        n_samples += len(labels)
        n_correct += (predictions == labels).sum().item()
    accuracy = n_correct / n_samples
    print(f'{(accuracy*100):.3}%')