diff --git a/.idea/workspace.xml b/.idea/workspace.xml
index 55c29c7..b4fe5ff 100644
--- a/.idea/workspace.xml
+++ b/.idea/workspace.xml
@@ -5,8 +5,9 @@
   </component>
   <component name="ChangeListManager">
     <list default="true" id="53d2c8fc-09f6-4596-950a-66eac2662d99" name="Changes" comment="">
+      <change afterPath="$PROJECT_DIR$/hyperparameter-training.py" afterDir="false" />
       <change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
-      <change beforePath="$PROJECT_DIR$/experiment-8.py" beforeDir="false" afterPath="$PROJECT_DIR$/experiment-8.py" afterDir="false" />
+      <change beforePath="$PROJECT_DIR$/results/experiment-8.png" beforeDir="false" afterPath="$PROJECT_DIR$/results/experiment-8.png" afterDir="false" />
     </list>
     <option name="SHOW_DIALOG" value="false" />
     <option name="HIGHLIGHT_CONFLICTS" value="true" />
@@ -31,33 +32,33 @@
     <option name="hideEmptyMiddlePackages" value="true" />
     <option name="showLibraryContents" value="true" />
   </component>
-  <component name="PropertiesComponent"><![CDATA[{
-  "keyToString": {
-    "ModuleVcsDetector.initialDetectionPerformed": "true",
-    "Python.Unnamed.executor": "Run",
-    "Python.experiment-1-1.executor": "Run",
-    "Python.experiment-1-2.executor": "Run",
-    "Python.experiment-1.executor": "Run",
-    "Python.experiment-2-leaky-relu.executor": "Run",
-    "Python.experiment-2-tanh.executor": "Run",
-    "Python.experiment-2.executor": "Run",
-    "Python.experiment-3-l1.executor": "Run",
-    "Python.experiment-3-l2.executor": "Run",
-    "Python.experiment-3.executor": "Run",
-    "Python.experiment-4.executor": "Run",
-    "Python.experiment-5.executor": "Run",
-    "Python.experiment-6-convolutional-neural-network.executor": "Run",
-    "Python.experiment-7.executor": "Run",
-    "Python.experiment-8.executor": "Run",
-    "Python.multilayer-perceptron.executor": "Run",
-    "RunOnceActivity.ShowReadmeOnStart": "true",
-    "RunOnceActivity.TerminalTabsStorage.copyFrom.TerminalArrangementManager.252": "true",
-    "RunOnceActivity.git.unshallow": "true",
-    "git-widget-placeholder": "master",
-    "last_opened_file_path": "/home/arctichawk1/Desktop/Projects/Private/Classification-of-Image-Data-with-MLP-and-CNN",
-    "settings.editor.selected.configurable": "com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable"
+  <component name="PropertiesComponent">{
+  &quot;keyToString&quot;: {
+    &quot;ModuleVcsDetector.initialDetectionPerformed&quot;: &quot;true&quot;,
+    &quot;Python.Unnamed.executor&quot;: &quot;Run&quot;,
+    &quot;Python.experiment-1-1.executor&quot;: &quot;Run&quot;,
+    &quot;Python.experiment-1-2.executor&quot;: &quot;Run&quot;,
+    &quot;Python.experiment-1.executor&quot;: &quot;Run&quot;,
+    &quot;Python.experiment-2-leaky-relu.executor&quot;: &quot;Run&quot;,
+    &quot;Python.experiment-2-tanh.executor&quot;: &quot;Run&quot;,
+    &quot;Python.experiment-2.executor&quot;: &quot;Run&quot;,
+    &quot;Python.experiment-3-l1.executor&quot;: &quot;Run&quot;,
+    &quot;Python.experiment-3-l2.executor&quot;: &quot;Run&quot;,
+    &quot;Python.experiment-3.executor&quot;: &quot;Run&quot;,
+    &quot;Python.experiment-4.executor&quot;: &quot;Run&quot;,
+    &quot;Python.experiment-5.executor&quot;: &quot;Run&quot;,
+    &quot;Python.experiment-6-convolutional-neural-network.executor&quot;: &quot;Run&quot;,
+    &quot;Python.experiment-7.executor&quot;: &quot;Run&quot;,
+    &quot;Python.experiment-8.executor&quot;: &quot;Run&quot;,
+    &quot;Python.multilayer-perceptron.executor&quot;: &quot;Run&quot;,
+    &quot;RunOnceActivity.ShowReadmeOnStart&quot;: &quot;true&quot;,
+    &quot;RunOnceActivity.TerminalTabsStorage.copyFrom.TerminalArrangementManager.252&quot;: &quot;true&quot;,
+    &quot;RunOnceActivity.git.unshallow&quot;: &quot;true&quot;,
+    &quot;git-widget-placeholder&quot;: &quot;master&quot;,
+    &quot;last_opened_file_path&quot;: &quot;/home/arctichawk1/Desktop/Projects/Private/Classification-of-Image-Data-with-MLP-and-CNN&quot;,
+    &quot;settings.editor.selected.configurable&quot;: &quot;com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable&quot;
   }
-}]]></component>
+}</component>
   <component name="SharedIndexes">
     <attachedChunks>
       <set>
diff --git a/hyperparameter-training.py b/hyperparameter-training.py
new file mode 100644
index 0000000..70ff266
--- /dev/null
+++ b/hyperparameter-training.py
@@ -0,0 +1,185 @@
+
+#learn rate and batch sizes
+for learn_rate in [1e-2,1e-3,1e-4]:
+  for batch_sizes in [64,128,256]:
+    import numpy as np
+    import matplotlib.pyplot as plt
+    from torchvision import datasets
+    import os
+
+
+    class MLP:
+        def __init__(self, input_size, hidden_size1, hidden_size2, output_size, weight_scale):
+            # initializes weights and biases for each layer
+            self.W1 = np.random.randn(input_size, hidden_size1) * weight_scale
+            self.b1 = np.zeros((1, hidden_size1))
+            self.W2 = np.random.randn(hidden_size1, hidden_size2) * weight_scale
+            self.b2 = np.zeros((1, hidden_size2))
+            self.W3 = np.random.randn(hidden_size2, output_size) * weight_scale
+            self.b3 = np.zeros((1, output_size))
+
+        def forward(self, x):
+            # forwards pass through the network
+            self.x = x  # input for backpropagation
+            self.z1 = x @ self.W1 + self.b1  # linear transformation for layer 1
+            self.a1 = self.relu(self.z1)  # ReLU activation
+            self.z2 = self.a1 @ self.W2 + self.b2  # linear transformation for layer 2
+            self.a2 = self.relu(self.z2)  # ReLU activation
+            self.z3 = self.a2 @ self.W3 + self.b3  # linear transformation for layer 3
+            self.a3 = self.softmax(self.z3)  # applies softmax to get class probabilities
+            return self.a3  # output of the network
+
+        def backward(self, y, lr):
+            # backwards pass for weight updates using gradient descent
+            m = y.shape[0]
+            y_one_hot = self.one_hot_encode(y, self.W3.shape[1]) # converts labels to one-hot encoding
+
+            # computes gradients for each layer
+            dz3 = self.a3 - y_one_hot  # gradient for output layer
+            dw3 = (self.a2.T @ dz3) / m
+            db3 = np.sum(dz3, axis=0, keepdims=True) / m
+
+            dz2 = (dz3 @ self.W3.T) * self.relu_deriv(self.z2)  # gradient for layer 2
+            dw2 = (self.a1.T @ dz2) / m
+            db2 = np.sum(dz2, axis=0, keepdims=True) / m
+
+            dz1 = (dz2 @ self.W2.T) * self.relu_deriv(self.z1)  # gradient for layer 1
+            dw1 = (self.x.T @ dz1) / m
+            db1 = np.sum(dz1, axis=0, keepdims=True) / m
+
+            # updates weights and biases using gradient descent
+            self.W3 -= lr * dw3
+            self.b3 -= lr * db3
+            self.W2 -= lr * dw2
+            self.b2 -= lr * db2
+            self.W1 -= lr * dw1
+            self.b1 -= lr * db1
+
+        @staticmethod
+        def relu(x):
+            # ReLU activation
+            return np.maximum(0, x)
+
+        @staticmethod
+        def relu_deriv(x):
+            # derivation of ReLU activation for backpropagation
+            return (x > 0).astype(float)
+
+        @staticmethod
+        def softmax(x):
+            # softmax function normalizes outputs to probabilities
+            e_x = np.exp(x - np.max(x, axis=1, keepdims=True))  # exponentiates inputs
+            return e_x / np.sum(e_x, axis=1, keepdims=True) # normalizes to get probabilities
+
+        @staticmethod
+        def one_hot_encode(y, num_classes):
+            # converts labels to one-hot encoded format
+            return np.eye(num_classes)[y]
+
+        @staticmethod
+        def cross_entropy_loss(y, y_hat):
+            # computes cross-entropy loss between true labels and predicted probabilities
+            m = y.shape[0]
+            m = y.shape[0]
+            eps = 1e-12
+            y_hat_clipped = np.clip(y_hat, eps, 1. - eps)
+            log_probs = -np.log(y_hat_clipped[np.arange(m), y])
+            return np.mean(log_probs)
+
+        def fit(self, x_train, y_train, x_val, y_val, lr, epochs, batch_size):
+            train_losses = []
+            val_accuracies = []
+
+            for epoch in range(1, epochs + 1):
+                perm = np.random.permutation(x_train.shape[0]) # Shuffle the training data
+                x_train_shuffled, y_train_shuffled = x_train[perm], y_train[perm]
+
+                epoch_loss = 0.0
+                num_batches = int(np.ceil(x_train.shape[0] / batch_size))
+
+                for i in range(num_batches):
+                    start = i * batch_size
+                    end = start + batch_size
+                    x_batch = x_train_shuffled[start:end] # batch of inputs
+                    y_batch = y_train_shuffled[start:end] # batch of labels
+
+                    # Forward pass, backward pass, and weight update
+                    self.forward(x_batch)
+                    self.backward(y_batch, lr)
+
+                    epoch_loss += self.cross_entropy_loss(y_batch, self.a3) # updating the epoch loss
+
+                epoch_loss /= num_batches # average loss is defined
+                train_losses.append(epoch_loss)
+
+                val_pred = self.predict(x_val)
+                val_acc = np.mean(val_pred == y_val)
+                val_accuracies.append(val_acc) \
+
+                print(f"Epoch {epoch:02d} | Training Loss: {epoch_loss:.4f} | Value Accuracy: {val_acc:.4f}")
+
+            self.plot_graph(train_losses, val_accuracies)
+            return val_accuracies[-1]
+
+        def plot_graph(self, train_losses, val_accuracies):
+            if not os.path.exists('results'):
+                os.makedirs('results') # creates results director
+
+            fig, ax1 = plt.subplots() # initializes the plot
+
+            ax1.set_xlabel('Epochs')
+            ax1.set_ylabel('Training Loss', color='tab:blue')
+            ax1.plot(range(1, len(train_losses) + 1), train_losses, color='tab:blue', label='Training Loss')
+            ax1.tick_params(axis='y', labelcolor='tab:blue') # defines loss subplot
+
+            ax2 = ax1.twinx()
+            ax2.set_ylabel('Validation Accuracy', color='tab:orange')
+            ax2.plot(range(1, len(val_accuracies) + 1), val_accuracies, color='tab:orange', label='Validation Accuracy')
+            ax2.tick_params(axis='y', labelcolor='tab:orange') # defines accuracy subplot
+
+            plt.title('Training Loss and Validation Accuracy over Epochs')
+
+            result_path = 'results/hyperparameter-training-output.png' # defines the file name
+            fig.savefig(result_path)
+            print(f"Graph saved to: {result_path}")
+
+        def predict(self, x): # predicts class labels for the input data
+            probs = self.forward(x)  # forwards pass to get probabilities
+            return np.argmax(probs, axis=1)  # returns the class with highest probability
+
+    # acquiring the FashionMNIST dataset
+    train_set = datasets.FashionMNIST(root='.', train=True, download=True)
+    test_set = datasets.FashionMNIST(root='.', train=False, download=True)
+
+    # preprocessing the data by flattening images and normalizing them.
+    x_train = train_set.data.numpy().reshape(-1, 28 * 28).astype(np.float32) / 255.0
+    y_train = train_set.targets.numpy()
+
+    x_test = test_set.data.numpy().reshape(-1, 28 * 28).astype(np.float32) / 255.0
+    y_test = test_set.targets.numpy()
+
+    # MLP initialization
+    mlp = MLP(
+        input_size=28 * 28,
+        hidden_size1=256,
+        hidden_size2=256,
+        output_size=10,
+        weight_scale=1e-2
+    )
+
+    # trains the model
+    mlp.fit(
+        x_train=x_train,
+        y_train=y_train,
+        x_val=x_test,
+        y_val=y_test,
+        lr=learn_rate,
+        epochs=100,
+        batch_size=batch_sizes
+    )
+
+    # tests the model
+    test_pred = mlp.predict(x_test)
+    test_acc = np.mean(test_pred == y_test)
+    print("Test:" + str(learn_rate) + " "+  str(batch_sizes))
+    print(f"\nFinal test accuracy: {test_acc:.4f}")
\ No newline at end of file
diff --git a/results/experiment-8.png b/results/experiment-8.png
index c8cc6d7..0365ed5 100644
Binary files a/results/experiment-8.png and b/results/experiment-8.png differ
diff --git a/results/hyperparameter-training-output.png b/results/hyperparameter-training-output.png
new file mode 100644
index 0000000..c2fb3a2
Binary files /dev/null and b/results/hyperparameter-training-output.png differ