# Load dataset
import torch
import numpy as np
from torch import nn
import matplotlib.pyplot as plt
import torchvision
train_dataset = torchvision.datasets.MNIST('./files/', train=True, download=True,
transform=torchvision.transforms.Compose([
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize(
(0.1307,), (0.3081,))
]))
test_dataset = torchvision.datasets.MNIST('./files/', train=False, download=True,
transform=torchvision.transforms.Compose([
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize(
(0.1307,), (0.3081,))
]))
# Create training dataset and cross validation dataset
train_batch_size = 100 #Define train batch size
test_batch_size = 500 #Define test batch size
train_dataset_small, cv_dataset = torch.utils.data.random_split(train_dataset, [int(len(train_dataset.targets)-.2*len(train_dataset.targets)),int(0.2*len(train_dataset.targets))])
train_loader = torch.utils.data.DataLoader(train_dataset_small, batch_size=train_batch_size, shuffle=True)
cv_loader = torch.utils.data.DataLoader(cv_dataset, batch_size=test_batch_size)
#Define neuro, activation function, and layers
class Network(nn.Module):
def __init__(self,input_dim,hidden_dim,output_dim):
super(Network, self).__init__()
# Use fully connected layers, softmax, and dropout with p=0.1
self.layer1 = nn.Linear(input_dim,hidden_dim)
self.act1 = nn.Softmax(dim=1)
self.layer2 = nn.Linear(hidden_dim,hidden_dim)
self.act2 = nn.Softmax(dim=1)
self.layer3 = nn.Linear(hidden_dim,output_dim)
self.act3 = nn.Softmax(dim=1)
self.dropout = nn.Dropout(p=0.1)
def forward(self, input):
# Layer 1
x = self.layer1(input)
x = self.act1(x)
x = self.dropout(x)
# Layer 2
x = self.layer2(x)
x = self.act2(x)
x = self.dropout(x)
# Layer 3
x = self.layer3(x)
output = self.act3(x)
return output
model = Network(784,387,10) # The size of the hidden layer is the mean of the input and output layers
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_fnc = nn.CrossEntropyLoss()
epochs = 100
# Train model and print result
Llist_Train = []
Llist_Test = []
Alist = []
for epoch in range(epochs):
print(epoch)
for train_sample in train_loader:
# Calculate training loss on model
X = torch.reshape(train_sample[0],(train_sample[0].shape[0],784))
y = train_sample[1]
y_pred = model(X)
# Training Loss on the training set
loss = loss_fnc(y_pred,y)
Llist_Train.append(loss.item())
# Backprop
optimizer.zero_grad()
loss.backward()
optimizer.step()
with torch.no_grad():
for cv_sample in cv_loader:
# Find the cross-validation error
X = torch.reshape(cv_sample[0],(cv_sample[0].shape[0],784))
y = cv_sample[1]
y_pred = model(X)
loss_test = loss_fnc(y_pred,y)
Llist_Test.append(loss_test.item())
correct = (torch.argmax(y_pred, dim=1)==y).type(torch.FloatTensor)
Alist.append(correct.mean())
# Find the accuracy on the whole test set
with torch.no_grad():
X_test = test_dataset.data.flatten(start_dim=1).type(torch.FloatTensor)
y_test_pred = model(X_test)
y_test = test_dataset.targets
loss_test = loss_fnc(y_test_pred,y_test)
correct = (torch.argmax(y_test_pred, dim=1)==y_test).type(torch.FloatTensor)
testaccuracy.append(correct.mean())
#plot train set loss
plt.title("Training Loss")
plt.xlabel("Training Samples Seen")
plt.ylabel("Loss")
plt.plot(Llist_Train)
#plot cross validation set loss
plt.title("Cross-Validation Set Loss")
plt.xlabel("Cross-Validation Batches Seen")
plt.ylabel("Loss")
plt.plot(Llist_Test)
#plot cross validation set accuracy
plt.title("Cross-Validation Set Accuracy")
plt.xlabel("Cross-Validation Batches Seen")
plt.ylabel("Accuracy")
plt.plot(Alist)