! pip install torch torchvision
! pip install datasets
! python -m pip install -U matplotlib

import torch

tns0 = torch.tensor(1) # --> a rank 0 tensor
tns0.shape, tns0

(torch.Size([]), tensor(1))

tns1 = torch.tensor([1,2]) # --> a rank 1 tensor, with shape 2
tns1.shape, tns1

(torch.Size([2]), tensor([1, 2]))

tns2 = torch.tensor([
    [1,2],
    [1,2]]) # --> a rank 2 tensor with shape 2 * 2
tns2.shape, tns2

(torch.Size([2, 2]),
 tensor([[1, 2],
         [1, 2]]))

tns3 = torch.tensor([
    [[1,2],
     [1,2]], 
            [[1,2],
             [1,2]], 
                    [[1,2],
                     [1,2]]]) # a rank 3 tensor with shape 3*2*2
tns3.shape, tns3

(torch.Size([3, 2, 2]),
 tensor([[[1, 2],
          [1, 2]],
 
         [[1, 2],
          [1, 2]],
 
         [[1, 2],
          [1, 2]]]))

from datasets import load_dataset

ds = load_dataset("ylecun/mnist")
ds

DatasetDict({
    train: Dataset({
        features: ['image', 'label'],
        num_rows: 60000
    })
    test: Dataset({
        features: ['image', 'label'],
        num_rows: 10000
    })
})

test_set = ds.get("test")

train_dict = ds.get("train")
train_dict

Dataset({
    features: ['image', 'label'],
    num_rows: 60000
})

test_obj = train_dict[0]
test_obj

{'image': <PIL.PngImagePlugin.PngImageFile image mode=L size=28x28>,
 'label': 5}

from PIL import Image

img = test_obj.get("image")
img.resize((200,200))

import torch
import torchvision.transforms as transforms
import numpy as np

transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])

rng = np.random.default_rng()

train_vald_imgs_arr = [item.get('image') for item in train_dict]
train_vald_lbls_arr = [item.get('label') for item in train_dict]
train_vald = list(zip(train_vald_imgs_arr, train_vald_lbls_arr))
train_vald_arr = np.array(object = train_vald, dtype = tuple)
train_vald_arr[0:4]

array([[<PIL.PngImagePlugin.PngImageFile image mode=L size=28x28 at 0x132971950>,
        5],
       [<PIL.PngImagePlugin.PngImageFile image mode=L size=28x28 at 0x132971BD0>,
        0],
       [<PIL.PngImagePlugin.PngImageFile image mode=L size=28x28 at 0x132946B10>,
        4],
       [<PIL.PngImagePlugin.PngImageFile image mode=L size=28x28 at 0x132946EA0>,
        1]], dtype=object)

def valid_split(arr, split = 0.2):
    rng.shuffle(arr)
    split_ix = int(split * len(arr))
    return arr[0:split_ix], arr[split_ix:-1]

training_set, validation_set = valid_split(train_vald_arr)

training_img = torch.stack([transform(img_lbl_tpl[0]) for img_lbl_tpl in training_set]).view(-1, 28*28).float().requires_grad_()
validation_img = torch.stack([transform(img_lbl_tpl[0]) for img_lbl_tpl in validation_set]).view(-1, 28*28).float().requires_grad_()

training_lbls, validation_lbls = torch.tensor([img_lbl_tpl[1] for img_lbl_tpl in training_set]), torch.tensor([img_lbl_tpl[1] for img_lbl_tpl in validation_set])

train = list(zip(training_img, training_lbls))
valid = list(zip(validation_img, validation_lbls))

#Checking... #FerrariCore

print(f"The type of the first element of both sets are: train'{type(train[0])}', valid'{type(valid[0])}'")
print(f"The first element of each is a tensor: train'{type(train[0][0])}', valid '{type(valid[0][0])}'")
print(f"The second element of each tuple is an integer: train '{type(train[0][1])}', valid '{type(valid[0][1])}'")
print(f"The shape of both tensors are: train '{training_img.shape}, valid '{validation_img.shape}' ")

The type of the first element of both sets are: train'<class 'tuple'>', valid'<class 'tuple'>'
The first element of each is a tensor: train'<class 'torch.Tensor'>', valid '<class 'torch.Tensor'>'
The second element of each tuple is an integer: train '<class 'torch.Tensor'>', valid '<class 'torch.Tensor'>'
The shape of both tensors are: train 'torch.Size([12000, 784]), valid 'torch.Size([47999, 784])'

# Helper functions 
def init_params(size, std=1.0):
    """Initializes a random parameters for a given size of a PyTorch tensor
    
    SIZE: tuple --> gives the dimension of our tenso
    STD: float --> allows us to shift our parameters somewhat. Also needed to make sure that our parameters are floats."""
    return (torch.randn(size) * std).requires_grad_()

def first(dl):
    return next(iter(dl))

#Initializing weights and biases
weights = init_params((28*28,1))
bias = init_params(1)

#Calculating prediction for one image using a linear function
(train[0][0] * weights.T).sum() + bias

tensor([-0.7730], grad_fn=<AddBackward0>)

def linear1(xb):
    """A linear function that uses matrix multiplication.
    
    XB: tensor --> our input tensor. Returns a tensor of the same rank as XB x  weights"""
    return xb@weights + bias

def mse(pred, targets):
    return torch.mean((pred-targets)**2)

preds = linear1(training_img)
preds

tensor([[ -0.7730],
        [-12.4203],
        [-15.4166],
        ...,
        [-24.7586],
        [-31.4506],
        [ -6.3199]], grad_fn=<AddBackward0>)

from torch.utils.data import DataLoader 

train_dl = DataLoader(train, batch_size=256)
input, target = first(train_dl)
input.shape, target.shape

(torch.Size([256, 784]), torch.Size([256]))

valid_dl = DataLoader(valid, batch_size=256)

batch_img = training_img[0:4]
batch_lbls = training_lbls[0:4]

# How to calculate gradients with PyTorch
def calc_grad(x, target, model):
    pred = model(x)
    loss = mse(pred, target)
    loss.backward()

calc_grad(batch_img, batch_lbls, linear1)
weights.grad.mean(), bias.grad

(tensor(-0.6674), tensor([-36.5286]))

# Re-initialize our parameter's gradients: PyTorch adds to the last calculated gradient
weights.grad.zero_()
bias.grad.zero_()

tensor([0.])

# linear model from pytorch modules
import torch.nn as nn

linear_model = nn.Linear(28*28,1)

w,b = linear_model.parameters()
w.shape, b.shape

(torch.Size([1, 784]), torch.Size([1]))

#Creating a SGD optimizer

class BasicOptim:
    def __init__(self, params, lr):
        self.params = list(params)
        self.lr = lr

    def step(self, *args, **kwargs):
        for p in self.params: 
            p.data -= p.grad.data * self.lr
    
    def zero_grad(self, *args, **kwargs):
        for p in self.params:
            p.grad = None

learning_rate = 0.1
opt = BasicOptim(linear_model.parameters(), learning_rate)

#simplified training loop using our optimzer
def train_epoch(model):
    for xb,yb in train_dl:
        calc_grad(xb, yb, model)
        opt.step()
        opt.zero_grad()

validate_epoch = lambda model: "0.43"

def train_model(model, epochs: int): 
    for _ in range(epochs):
        train_epoch(model)
        print(validate_epoch(model), end = ' ')

train_model(linear_model, 20)

0.43 0.43 0.43 0.43 0.43 0.43 0.43 0.43 0.43 0.43 0.43 0.43 0.43 0.43 0.43 0.43 0.43 0.43 0.43 0.43

w1, b1, w2, b2 = init_params((28*28,30)), init_params(30), init_params((30,1)), init_params(1)

def simple_net(xb):
    res = xb@w1 + b1
    res = res.max(torch.tensor(0.0)) # --> this is the non-linearity, the ReLU function
    res = res@w2 + b2
    return res

import matplotlib.pyplot as plt

x = np.linspace(-2, 3, 6)
y = np.maximum(0, x)

ReLU = plt.plot(x, y)
plt.setp(ReLU, color = 'navy')
plt.xlabel("x")
plt.ylabel("ReLU(x)")
plt.title("ReLU Function Visualization")
plt.grid(True)
plt.show()
plt.style.use("tableau-colorblind10")

#Using Pytorch modules --> preferred
simple_net = nn.Sequential(
    nn.Linear(28*28,30),
    nn.ReLU(),
    nn.Linear(30,1), 
)

w1, b1, w2, b2 = simple_net.parameters()
w1.shape, b1.shape, w2.shape, b2.shape

(torch.Size([30, 784]), torch.Size([30]), torch.Size([1, 30]), torch.Size([1]))

#more complicated neural net example - 4 Layers

complex_net = nn.Sequential(
    nn.Linear(28*28,30),
    nn.ReLU(),
    nn.Linear(30, 30),
    nn.ReLU(),
    nn.Linear(30,1)
)

class Learner:
    def __init__(self, dataloaders, model, optimzer, loss, metrics):
        self.train = dataloaders[0]
        self.valid = dataloaders[1]
        self.model = model
        self.opt_type = optimzer
        self.loss_fn = loss
        self.metric_fn = metrics(self.valid)

    def train_epoch(self, opt):
            epoch_loss = np.array([])
            for xb, yb in self.train:
                opt.zero_grad()
                logits = self.model(xb)
                loss = self.loss_fn(logits, yb)
                loss.backward()
                epoch_loss = np.append(epoch_loss, loss.item())
                opt.step()
            return epoch_loss
    
    def fit(self, epochs:int, learning_rate):
        validate_epoch = self.metric_fn
        opt = self.opt_type(self.model, learning_rate)
        for ep in range(epochs):
            ep_mean_loss = np.mean(self.train_epoch(opt))
            print(f"Epoch #{ep} || Accuracy: {validate_epoch(self.model)} || Epoch Mean Loss: {ep_mean_loss}")
    
    #hide
    def __repr__(self):
        return f"Learner({(self.train, self.valid)}, {self.model}, {self.optimizer}, {self.loss}, {self.metrics})"
    
    def __str__(self):
         return (f"""<Learner for {self.model}: \n  
                        Optimizer: {self.optimizer.__name__} \n
                        Loss_func: {self.loss_fn.__name__} \n
                        Metrics: {self.metric_fn}
                """)

x = np.linspace(0.01,1.0)
y = -np.log(x)

cross_entropy = plt.plot(x,y)
plt.setp(cross_entropy, color = 'navy')
plt.xlabel("x")
plt.ylabel("Loss")
plt.title("Corss Entropy (Log) Loss Function Visualization")
plt.grid(True)
plt.show()
plt.style.use("tableau-colorblind10")

#prepare our data using the torchvision datasets
from torchvision import transforms, datasets
from torch.utils.data import Dataset, random_split, DataLoader

mnist_train = datasets.MNIST(
    root=".", train=True, download=True,
    transform=transforms.Compose([
        transforms.ToTensor(),                
        transforms.Normalize((0.1307,), (0.3081,))  
    ])
)

mnist_test = datasets.MNIST(
    root=".", train=False, download=True,
    transform=transforms.Compose([
        transforms.ToTensor(),                
        transforms.Normalize((0.1307,), (0.3081,))  
    ])
)

train_size = int(len(mnist_train)*0.8)
val_size = int(len(mnist_train)*0.2)

train_set, valid_set = random_split(mnist_train, [train_size, val_size])

train_dl = DataLoader(train_set, batch_size=64, shuffle=True)
valid_dl = DataLoader(valid_set, batch_size=64, shuffle=True)

test = DataLoader(mnist_test, batch_size = 10, shuffle=True)

#utils
def inference(xb):
    probs = torch.softmax(xb, dim=1)   
    return probs.argmax(dim=1) 

@torch.no_grad()
def metric_MNIST(xb, yb):
    preds = inference(xb)
    return (preds == yb).float().mean()

def make_metrics_fn(metrics_fn):
    def make_validate_epoch(validation_set):
        @torch.no_grad()
        def validate_epoch(model):
            accuracy = [metrics_fn(model(xb), yb) for xb,yb in validation_set]
            return round(torch.stack(accuracy).float().mean().item(), 4)
        return validate_epoch
    return make_validate_epoch

mnist_metrics = make_metrics_fn(metric_MNIST)
dataloaders = train_dl, valid_dl

#testing scripts
from IPython.display import display

transform_PIL = transforms.Compose([transforms.ToPILImage()])

xb, yb = next(iter(test))

def test_MNIST(model):
    for i in range(len(xb)):
        pred = inference(model(xb[i])).item()
        target = yb[i].item()
        print(f"Our model predicted for the following input this: {pred}")
        display(transform_PIL(xb[i]).resize((150,150)))
        if pred != target:
            print(f"It was actually {target} :( \n")
        else:
            print(f"Correct! It was {pred}! \n")

simple_net = nn.Sequential(
    nn.Flatten(), 
    nn.Linear(28*28, 128),
    nn.ReLU(),
    nn.Linear(128, 10)
)

def sgd(model, lr): 
    return torch.optim.SGD(model.parameters(), lr)

simple_learn = Learner(dataloaders, simple_net, sgd, nn.CrossEntropyLoss(), mnist_metrics)
simple_learn.fit(20, 0.1)

Epoch #0 || Accuracy: 0.9524 || Epoch Mean Loss: 0.28338200656572976
Epoch #1 || Accuracy: 0.9664 || Epoch Mean Loss: 0.12684618176395696
Epoch #2 || Accuracy: 0.9688 || Epoch Mean Loss: 0.0871885798505197
Epoch #3 || Accuracy: 0.9718 || Epoch Mean Loss: 0.06692528115523358
Epoch #4 || Accuracy: 0.9728 || Epoch Mean Loss: 0.05303540338886281
Epoch #5 || Accuracy: 0.974 || Epoch Mean Loss: 0.041634780219135185
Epoch #6 || Accuracy: 0.9746 || Epoch Mean Loss: 0.03470087833174815
Epoch #7 || Accuracy: 0.9758 || Epoch Mean Loss: 0.026925769412269197
Epoch #8 || Accuracy: 0.9755 || Epoch Mean Loss: 0.022359725966506327
Epoch #9 || Accuracy: 0.9758 || Epoch Mean Loss: 0.01742701446156328
Epoch #10 || Accuracy: 0.9761 || Epoch Mean Loss: 0.014651690504513681
Epoch #11 || Accuracy: 0.9776 || Epoch Mean Loss: 0.011901324820394318
Epoch #12 || Accuracy: 0.9766 || Epoch Mean Loss: 0.009739438372819374
Epoch #13 || Accuracy: 0.9773 || Epoch Mean Loss: 0.007814280425120767
Epoch #14 || Accuracy: 0.9773 || Epoch Mean Loss: 0.006423461076764701
Epoch #15 || Accuracy: 0.9786 || Epoch Mean Loss: 0.005473268447094597
Epoch #16 || Accuracy: 0.978 || Epoch Mean Loss: 0.004561760225110144
Epoch #17 || Accuracy: 0.9786 || Epoch Mean Loss: 0.004014504220239663
Epoch #18 || Accuracy: 0.9784 || Epoch Mean Loss: 0.00348928046420527
Epoch #19 || Accuracy: 0.9781 || Epoch Mean Loss: 0.00311433167430611

test_MNIST(simple_net)

Our model predicted for the following input this: 5

Correct! It was 5! 

Our model predicted for the following input this: 0

Correct! It was 0! 

Our model predicted for the following input this: 6

Correct! It was 6! 

Our model predicted for the following input this: 3

Correct! It was 3! 

Our model predicted for the following input this: 2

Correct! It was 2! 

Our model predicted for the following input this: 6

Correct! It was 6! 

Our model predicted for the following input this: 3

Correct! It was 3! 

Our model predicted for the following input this: 2

Correct! It was 2! 

Our model predicted for the following input this: 4

Correct! It was 4! 

Our model predicted for the following input this: 7

complex_net = nn.Sequential(
    nn.Flatten(), 
    nn.Linear(28*28, 128),
    nn.ReLU(),
    nn.Linear(128, 256),
    nn.ReLU(),
    nn.Linear(256,10)
)

complex_learn = Learner(dataloaders, complex_net, sgd, nn.CrossEntropyLoss(), mnist_metrics)
complex_learn.fit(20, 0.1)

Epoch #0 || Accuracy: 0.9547 || Epoch Mean Loss: 0.3226238980094592
Epoch #1 || Accuracy: 0.965 || Epoch Mean Loss: 0.12212716623395681
Epoch #2 || Accuracy: 0.9666 || Epoch Mean Loss: 0.08247767006667951
Epoch #3 || Accuracy: 0.9727 || Epoch Mean Loss: 0.061977105421945455
Epoch #4 || Accuracy: 0.974 || Epoch Mean Loss: 0.046655442216433585
Epoch #5 || Accuracy: 0.9737 || Epoch Mean Loss: 0.03490441683772951
Epoch #6 || Accuracy: 0.9751 || Epoch Mean Loss: 0.026278750751633197
Epoch #7 || Accuracy: 0.9702 || Epoch Mean Loss: 0.01987650533602573
Epoch #8 || Accuracy: 0.9772 || Epoch Mean Loss: 0.014694855815459353
Epoch #9 || Accuracy: 0.9762 || Epoch Mean Loss: 0.010513619389404387
Epoch #10 || Accuracy: 0.9777 || Epoch Mean Loss: 0.008572090015736953
Epoch #11 || Accuracy: 0.9784 || Epoch Mean Loss: 0.006331878423050511
Epoch #12 || Accuracy: 0.9788 || Epoch Mean Loss: 0.004126554309244966
Epoch #13 || Accuracy: 0.9786 || Epoch Mean Loss: 0.0026409956792776937
Epoch #14 || Accuracy: 0.9793 || Epoch Mean Loss: 0.002008390039525693
Epoch #15 || Accuracy: 0.9794 || Epoch Mean Loss: 0.0013605867226918538
Epoch #16 || Accuracy: 0.9787 || Epoch Mean Loss: 0.0010543642543246582
Epoch #17 || Accuracy: 0.9792 || Epoch Mean Loss: 0.0009271361964953637
Epoch #18 || Accuracy: 0.9792 || Epoch Mean Loss: 0.0007572702713490192
Epoch #19 || Accuracy: 0.9786 || Epoch Mean Loss: 0.0006665066784601853

test_MNIST(complex_net)

Our model predicted for the following input this: 5

Correct! It was 5! 

Our model predicted for the following input this: 0

Correct! It was 0! 

Our model predicted for the following input this: 6

Correct! It was 6! 

Our model predicted for the following input this: 3

Correct! It was 3! 

Our model predicted for the following input this: 2

Correct! It was 2! 

Our model predicted for the following input this: 6

Correct! It was 6! 

Our model predicted for the following input this: 3

Correct! It was 3! 

Our model predicted for the following input this: 2

Correct! It was 2! 

Our model predicted for the following input this: 4

Correct! It was 4! 

Our model predicted for the following input this: 7

Ch 4 - How Neural Networks Work

Exploration and Learning about Tensors

Tensors

PyTorch Tensors

Exploration of our dataset and its structure

Creating a validation set

Building Up The Neural Network

Creating a Stochastic Gradient Descent (SGD) Optimizer

The Neural Network

Creating Our Own Implementation of The Learner Class

Creating A Digit Classifier From The MNIST Dataset

Our model

Our loss function

Our inference function

Code

Trying this on a more complex neural network (4 layers)