Added, simplified and fixed all the experiments until 5.
This commit is contained in:
parent
97f9db293d
commit
6df733aab5
14 changed files with 253 additions and 253 deletions
100
experiment-4.py
100
experiment-4.py
|
|
@ -3,9 +3,8 @@ import matplotlib.pyplot as plt
|
|||
from torchvision import datasets
|
||||
import os
|
||||
|
||||
|
||||
class MLP:
|
||||
def __init__(self, input_size, hidden_size1, hidden_size2, output_size, weight_scale, l1, l2):
|
||||
def __init__(self, input_size, hidden_size1, hidden_size2, output_size, weight_scale, l1=0, l2=0):
|
||||
self.l1 = l1
|
||||
self.l2 = l2
|
||||
|
||||
|
|
@ -46,18 +45,16 @@ class MLP:
|
|||
dw1 = (self.x.T @ dz1) / m
|
||||
db1 = np.sum(dz1, axis=0, keepdims=True) / m
|
||||
|
||||
# applying L1 and L2 regularization
|
||||
if self.l1 > 0:
|
||||
dw3 += self.l1 * np.sign(self.W3)
|
||||
dw2 += self.l1 * np.sign(self.W2)
|
||||
dw1 += self.l1 * np.sign(self.W1)
|
||||
|
||||
|
||||
dw3 += self.l2 * self.W3
|
||||
dw2 += self.l2 * self.W2
|
||||
dw1 += self.l2 * self.W1
|
||||
|
||||
|
||||
dw3 += self.l1 * np.sign(self.W3)
|
||||
|
||||
dw2 += self.l1 * np.sign(self.W2)
|
||||
|
||||
dw1 += self.l1 * np.sign(self.W1)
|
||||
if self.l2 > 0:
|
||||
dw3 += self.l2 * self.W3
|
||||
dw2 += self.l2 * self.W2
|
||||
dw1 += self.l2 * self.W1
|
||||
|
||||
# updates weights and biases using gradient descent
|
||||
self.W3 -= lr * dw3
|
||||
|
|
@ -151,7 +148,14 @@ class MLP:
|
|||
|
||||
plt.title('Training Loss and Validation Accuracy over Epochs')
|
||||
|
||||
result_path = 'results/experiment-4.png' # defines the file name
|
||||
# dynamically sets the filename
|
||||
if self.l1 > 0 and self.l2 > 0:
|
||||
result_path = 'results/experiment-4-l1-and-l2.png'
|
||||
elif self.l1 > 0:
|
||||
result_path = 'results/experiment-4-l1.png'
|
||||
elif self.l2 > 0:
|
||||
result_path = 'results/experiment-4-l2.png'
|
||||
|
||||
fig.savefig(result_path)
|
||||
print(f"Graph saved to: {result_path}")
|
||||
|
||||
|
|
@ -170,19 +174,18 @@ y_train = train_set.targets.numpy()
|
|||
x_test = test_set.data.numpy().reshape(-1, 28 * 28).astype(np.float32)
|
||||
y_test = test_set.targets.numpy()
|
||||
|
||||
# MLP Initialization
|
||||
mlp = MLP(
|
||||
# MLP initialization (L1 regularization)
|
||||
mlp_l1 = MLP(
|
||||
input_size=28 * 28,
|
||||
hidden_size1=256,
|
||||
hidden_size2=256,
|
||||
output_size=10,
|
||||
weight_scale=1e-2,
|
||||
l1 = 1e-6,
|
||||
l2 = 1e-4
|
||||
l1 = 1e-6
|
||||
)
|
||||
|
||||
# trains the model
|
||||
mlp.fit(
|
||||
mlp_l1.fit(
|
||||
x_train=x_train,
|
||||
y_train=y_train,
|
||||
x_val=x_test,
|
||||
|
|
@ -193,6 +196,59 @@ mlp.fit(
|
|||
)
|
||||
|
||||
# tests the model
|
||||
test_pred = mlp.predict(x_test)
|
||||
test_acc = np.mean(test_pred == y_test)
|
||||
print(f"\nFinal test accuracy: {test_acc:.4f}")
|
||||
test_pred_l1 = mlp_l1.predict(x_test)
|
||||
test_acc_l1 = np.mean(test_pred_l1 == y_test)
|
||||
print(f"\nFinal test accuracy: {test_acc_l1:.4f}")
|
||||
|
||||
# MLP initialization (L2 regularization)
|
||||
mlp_l2 = MLP(
|
||||
input_size=28 * 28,
|
||||
hidden_size1=256,
|
||||
hidden_size2=256,
|
||||
output_size=10,
|
||||
weight_scale=1e-2,
|
||||
l2 = 1e-4
|
||||
)
|
||||
|
||||
# trains the model
|
||||
mlp_l2.fit(
|
||||
x_train=x_train,
|
||||
y_train=y_train,
|
||||
x_val=x_test,
|
||||
y_val=y_test,
|
||||
lr=1e-2,
|
||||
epochs=10,
|
||||
batch_size=256
|
||||
)
|
||||
|
||||
# tests the model
|
||||
test_pred_l2 = mlp_l2.predict(x_test)
|
||||
test_acc_l2 = np.mean(test_pred_l2 == y_test)
|
||||
print(f"\nFinal test accuracy: {test_acc_l2:.4f}")
|
||||
|
||||
# MLP initialization (L1 and L2 regularization)
|
||||
mlp_l1_l2 = MLP(
|
||||
input_size=28 * 28,
|
||||
hidden_size1=256,
|
||||
hidden_size2=256,
|
||||
output_size=10,
|
||||
weight_scale=1e-2,
|
||||
l1 = 1e-6,
|
||||
l2 = 1e-4
|
||||
)
|
||||
|
||||
# trains the model
|
||||
mlp_l1_l2.fit(
|
||||
x_train=x_train,
|
||||
y_train=y_train,
|
||||
x_val=x_test,
|
||||
y_val=y_test,
|
||||
lr=1e-2,
|
||||
epochs=10,
|
||||
batch_size=256
|
||||
)
|
||||
|
||||
# tests the model
|
||||
test_pred_l1_l2 = mlp_l1_l2.predict(x_test)
|
||||
test_acc_l1_l2 = np.mean(test_pred_l1_l2 == y_test)
|
||||
print(f"\nFinal test accuracy: {test_acc_l1_l2:.4f}")
|
||||
Loading…
Add table
Add a link
Reference in a new issue