Added, simplified and fixed all the experiments until 5.

This commit is contained in:
Batuhan Berk Başoğlu 2025-11-16 22:40:47 -05:00
parent 97f9db293d
commit 6df733aab5
Signed by: batuhan-basoglu
SSH key fingerprint: SHA256:kEsnuHX+qbwhxSAXPUQ4ox535wFHu/hIRaa53FzxRpo
14 changed files with 253 additions and 253 deletions

View file

@ -1,18 +1,13 @@
import numpy as np
import matplotlib.pyplot as plt
from torchvision import datasets
from torchvision import transforms
from torchvision import datasets, transforms
import os
class MLP:
def __init__(self, input_size, hidden_size1, hidden_size2, output_size, weight_scale, l1, l2):
def __init__(self, input_size, hidden_size1, hidden_size2, output_size, weight_scale, l1=0, l2=0):
self.l1 = l1
self.l2 = l2
# initializes weights and biases for each layer
self.W1 = np.random.randn(input_size, hidden_size1) * weight_scale
self.b1 = np.zeros((1, hidden_size1))
@ -50,18 +45,16 @@ class MLP:
dw1 = (self.x.T @ dz1) / m
db1 = np.sum(dz1, axis=0, keepdims=True) / m
# applying L1 and L2 regularization
if self.l1 > 0:
dw3 += self.l1 * np.sign(self.W3)
dw2 += self.l1 * np.sign(self.W2)
dw1 += self.l1 * np.sign(self.W1)
dw3 += self.l2 * self.W3
dw2 += self.l2 * self.W2
dw1 += self.l2 * self.W1
dw3 += self.l1 * np.sign(self.W3)
dw2 += self.l1 * np.sign(self.W2)
dw1 += self.l1 * np.sign(self.W1)
if self.l2 > 0:
dw3 += self.l2 * self.W3
dw2 += self.l2 * self.W2
dw1 += self.l2 * self.W1
# updates weights and biases using gradient descent
self.W3 -= lr * dw3
@ -155,7 +148,14 @@ class MLP:
plt.title('Training Loss and Validation Accuracy over Epochs')
result_path = 'results/experiment-5.png' # defines the file name
# dynamically sets the filename
if self.l1 > 0 and self.l2 > 0:
result_path = 'results/experiment-5-l1-and-l2.png'
elif self.l1 > 0:
result_path = 'results/experiment-5-l1.png'
elif self.l2 > 0:
result_path = 'results/experiment-5-l2.png'
fig.savefig(result_path)
print(f"Graph saved to: {result_path}")
@ -163,31 +163,41 @@ class MLP:
probs = self.forward(x) # forwards pass to get probabilities
return np.argmax(probs, axis=1) # returns the class with highest probability
# defining the data augmentation transformations for the training set
transform_train = transforms.Compose([
transforms.RandomRotation(20), # random rotations between -20 and 20 degrees
transforms.RandomHorizontalFlip(), # random horizontal flip
transforms.ToTensor(), # converting images to tensor and normalizing to [0, 1]
])
# no augmentation for the test set, just converting to tensor
transform_test = transforms.Compose([
transforms.ToTensor(),
])
# acquiring the FashionMNIST dataset
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
train_set = datasets.FashionMNIST(root='.', train=True, download=True, transform = transform)
test_set = datasets.FashionMNIST(root='.', train=False, download=True, transform = transform)
train_set = datasets.FashionMNIST(root='.', train=True, download=True, transform=transform_train)
test_set = datasets.FashionMNIST(root='.', train=False, download=True, transform=transform_test)
# preprocessing the data by flattening images and normalizing them.
x_train = train_set.data.numpy().reshape(-1, 28 * 28).astype(np.float32)
x_train = train_set.data.numpy().reshape(-1, 28 * 28).astype(np.float32) / 255.0
y_train = train_set.targets.numpy()
x_test = test_set.data.numpy().reshape(-1, 28 * 28).astype(np.float32)
x_test = test_set.data.numpy().reshape(-1, 28 * 28).astype(np.float32) / 255.0
y_test = test_set.targets.numpy()
# MLP Initialization
mlp = MLP(
# MLP initialization (L1 regularization)
mlp_l1 = MLP(
input_size=28 * 28,
hidden_size1=256,
hidden_size2=256,
output_size=10,
weight_scale=1e-2,
l1 = 1e-6,
l2 = 1e-4
l1 = 1e-6
)
# trains the model
mlp.fit(
mlp_l1.fit(
x_train=x_train,
y_train=y_train,
x_val=x_test,
@ -198,6 +208,59 @@ mlp.fit(
)
# tests the model
test_pred = mlp.predict(x_test)
test_acc = np.mean(test_pred == y_test)
print(f"\nFinal test accuracy: {test_acc:.4f}")
test_pred_l1 = mlp_l1.predict(x_test)
test_acc_l1 = np.mean(test_pred_l1 == y_test)
print(f"\nFinal test accuracy: {test_acc_l1:.4f}")
# MLP initialization (L2 regularization)
mlp_l2 = MLP(
input_size=28 * 28,
hidden_size1=256,
hidden_size2=256,
output_size=10,
weight_scale=1e-2,
l2 = 1e-4
)
# trains the model
mlp_l2.fit(
x_train=x_train,
y_train=y_train,
x_val=x_test,
y_val=y_test,
lr=1e-2,
epochs=10,
batch_size=256
)
# tests the model
test_pred_l2 = mlp_l2.predict(x_test)
test_acc_l2 = np.mean(test_pred_l2 == y_test)
print(f"\nFinal test accuracy: {test_acc_l2:.4f}")
# MLP initialization (L1 and L2 regularization)
mlp_l1_l2 = MLP(
input_size=28 * 28,
hidden_size1=256,
hidden_size2=256,
output_size=10,
weight_scale=1e-2,
l1 = 1e-6,
l2 = 1e-4
)
# trains the model
mlp_l1_l2.fit(
x_train=x_train,
y_train=y_train,
x_val=x_test,
y_val=y_test,
lr=1e-2,
epochs=10,
batch_size=256
)
# tests the model
test_pred_l1_l2 = mlp_l1_l2.predict(x_test)
test_acc_l1_l2 = np.mean(test_pred_l1_l2 == y_test)
print(f"\nFinal test accuracy: {test_acc_l1_l2:.4f}")