From 4ed70f6bd461096ee373ee381e8c53a9af80d03a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Batuhan=20Berk=20Ba=C5=9Fo=C4=9Flu?= <batuhan@basoglu.ca>
Date: Mon, 29 Sep 2025 12:15:54 -0400
Subject: [PATCH] Added Non-Linear base and Momentum

---
 linear-regression-parkinsons.py                | 16 ++++++++++++++--
 logistic-regression-wdbc.py                    | 10 +++++++++-
 mini-batch-sgd-linear-regression-parkinsons.py | 16 +++++++++++++++-
 mini-batch-sgd-logistic-regression-wdbc.py     | 11 ++++++++++-
 4 files changed, 48 insertions(+), 5 deletions(-)

diff --git a/linear-regression-parkinsons.py b/linear-regression-parkinsons.py
index d9881f4..08dc105 100644
--- a/linear-regression-parkinsons.py
+++ b/linear-regression-parkinsons.py
@@ -6,8 +6,9 @@ class LinearRegression:
         Constructor for the linear regression with analytical solution. It uses bias. It also
         initializes the weight, mean and standard deviation.
     '''
-    def __init__(self, add_bias):
+    def __init__(self, add_bias): # add degree as value for the polynomial features
         self.add_bias = add_bias  # bias to prepend a column of ones (the intercept term)
+        #self.degree = degree  # degree for polynomial expansion (non-linear base)
         self.w = None  # weight/coefficient
         self.mean = None  # used for standardisation
         self.std = None  # standard deviation
@@ -30,8 +31,18 @@ class LinearRegression:
         if self.add_bias:  # adding bias
             x['bias'] = 1.0
 
-        return x
+        '''
+        # applying polynomial transformation for non-linear bases
+        if self.degree > 1:
+            poly_features = pd.DataFrame()
+            # create polynomial features of the given degree
+            for col in x.columns:
+                for d in range(2, self.degree + 1):
+                    poly_features[f"{col}^{d}"] = x[col] ** d
+            x = pd.concat([x, poly_features], axis=1)\
+        '''
 
+        return x
 
     def fit(self, x: pd.DataFrame, y: pd.Series) -> "LinearRegression":
         '''
@@ -218,6 +229,7 @@ if __name__ == "__main__":
 
     # training of the model
     model = LinearRegression(add_bias=True)
+    #model = LinearRegression(add_bias=True, degree=2) # using polynomial degree for non-linear base calculation.
     model.fit(x_train, y_train)
 
     # evaluation of the model
diff --git a/logistic-regression-wdbc.py b/logistic-regression-wdbc.py
index fb2efb3..c78f777 100644
--- a/logistic-regression-wdbc.py
+++ b/logistic-regression-wdbc.py
@@ -10,17 +10,19 @@ class LogisticRegression:
         tolerance and verbose. It also initializes the weight, loss, x, y, mean and std.
     '''
 
-    def __init__(self, learning_rate: float, n_iter: int, tolerance: float, verbose: bool) -> None:
+    def __init__(self, learning_rate: float, n_iter: int, tolerance: float, verbose: bool) -> None: # add momentum as value for the gradient descent
         self.lr = learning_rate
         self.n_iter = n_iter
         self.tol = tolerance
         self.verbose = verbose
+        #self.momentum = momentum  # momentum parameter
         self.w: np.ndarray | None = None         # weight/coefficient (bias as first element)
         self.loss: list[float] = []              # loss per iteration
         self.x: np.ndarray | None = None         # matrix of inputs after standardisation
         self.y: np.ndarray | None = None         # target vector
         self.mean: np.ndarray | None = None      # used for standardisation
         self.std: np.ndarray | None = None       # standard deviation
+        #self.v: np.ndarray | None = None         # velocity term for momentum
 
     @staticmethod
     def sigmoid(z: np.ndarray) -> np.ndarray:
@@ -70,12 +72,16 @@ class LogisticRegression:
         if self.x is None or self.y is None: # if x or y are empty, throw error
             raise RuntimeError("Model is not fitted yet. Call `fit` first.")
 
+        #self.v = np.zeros_like(self.w) # initiating the velocity
+
         for i in range(1, self.n_iter + 1):
             z = self.x.dot(self.w) # linear prediction
             p = self.sigmoid(z) # probabilities of the model predictions
 
             gradient = self.x.T.dot(p - self.y) / self.y.size  # for logistic regression X^T*(p - y)
 
+            #self.v = self.momentum * self.v + gradient # incorporating momentum
+            #self.w -= self.lr * self.v
             self.w -= self.lr * gradient # gradient multiplied by learning rate is removed from weight
 
             loss = self.cost(self.y, p) # cost is calculated through cross‑entropy and added for the current range
@@ -338,6 +344,8 @@ if __name__ == "__main__":
     # training of the model
     model = LogisticRegression(learning_rate=0.00005, n_iter=5000, tolerance=1e-6, verbose=True)
     # other values could be used, for example (lr=0.01, n_iter=2000, tolerance=1e-3, verbose=False)
+    #model = LogisticRegression(learning_rate=0.00005, n_iter=5000, tolerance=1e-6, verbose=True, momentum= 0.9)
+    # using momentum for gradient descent calculation
     model.prepare(df_train, target_col="Diagnosis")
     model.fit()
 
diff --git a/mini-batch-sgd-linear-regression-parkinsons.py b/mini-batch-sgd-linear-regression-parkinsons.py
index f76520e..845628a 100644
--- a/mini-batch-sgd-linear-regression-parkinsons.py
+++ b/mini-batch-sgd-linear-regression-parkinsons.py
@@ -6,12 +6,13 @@ class LinearRegression:
         Constructor for the linear regression with mini‑batch stochastic gradient descent. It uses learning rate,
         iteration number, batch size, bias and verbose. It also initializes the weight, mean and standard deviation.
     '''
-    def __init__(self, lr, n_iter, batch_size, add_bias, verbose):
+    def __init__(self, lr, n_iter, batch_size, add_bias, verbose): # add degree as value for the polynomial features
         self.lr = lr  # learning rate
         self.n_iter = n_iter  # number of gradient-descent iterations
         self.batch_size = batch_size  # row number for each gradient step
         self.add_bias = add_bias  # bias to prepend a column of ones (the intercept term)
         self.verbose = verbose  # if true, prints the mean‑squared error every 100 iterations
+        #self.degree = degree  # degree for polynomial expansion (non-linear base)
         self.w = None  # weight/coefficient
         self.mean = None  # used for standardisation
         self.std = None  # standard deviation
@@ -33,6 +34,17 @@ class LinearRegression:
         if self.add_bias:  # adding bias
             x['bias'] = 1.0
 
+        '''
+        # applying polynomial transformation for non-linear bases
+        if self.degree > 1:
+            poly_features = pd.DataFrame()
+            # create polynomial features of the given degree
+            for col in x.columns:
+                for d in range(2, self.degree + 1):
+                    poly_features[f"{col}^{d}"] = x[col] ** d
+            x = pd.concat([x, poly_features], axis=1)
+        '''
+
         return x
 
 
@@ -213,6 +225,8 @@ if __name__ == "__main__":
     # training of the model
     model = LinearRegression(lr=0.0001, n_iter=5000, batch_size=64, add_bias=True, verbose=True)
     # other values could be used, for example (lr=0.01, n_iter=2000, batch_size=None, add_bias=True, verbose=False)
+    #model = LinearRegression(lr=0.0001, n_iter=5000, batch_size=64, add_bias=True, verbose=True, degree=2)
+    # using polynomial degree for non-linear base calculation.
     model.fit(x_train, y_train)
 
     # evaluation of the model
diff --git a/mini-batch-sgd-logistic-regression-wdbc.py b/mini-batch-sgd-logistic-regression-wdbc.py
index 5d466bd..1fbe6ce 100644
--- a/mini-batch-sgd-logistic-regression-wdbc.py
+++ b/mini-batch-sgd-logistic-regression-wdbc.py
@@ -6,18 +6,20 @@ class LogisticRegression:
         Constructor for the logistic regression with gradient descent. It uses learning rate, iteration number,
         tolerance and verbose. It also initializes the weight, loss, x, y, mean and std.
     '''
-    def __init__(self, learning_rate: float, n_iter: int, batch_size: int, tolerance: float, verbose: bool) -> None:
+    def __init__(self, learning_rate: float, n_iter: int, batch_size: int, tolerance: float, verbose: bool) -> None: # add momentum as value for the gradient descent
         self.lr = learning_rate
         self.n_iter = n_iter
         self.batch_size = batch_size
         self.tol = tolerance
         self.verbose = verbose
+        #self.momentum = momentum  # momentum parameter
         self.w: np.ndarray | None = None         # weight/coefficient (bias as first element)
         self.loss: list[float] = []              # loss per iteration
         self.x: np.ndarray | None = None         # matrix of inputs after standardisation
         self.y: np.ndarray | None = None         # target vector
         self.mean: np.ndarray | None = None      # used for standardisation
         self.std: np.ndarray | None = None       # standard deviation
+        #self.v: np.ndarray | None = None         # velocity term for momentum
 
     @staticmethod
     def sigmoid(z: np.ndarray) -> np.ndarray:
@@ -75,6 +77,8 @@ class LogisticRegression:
         # number of batches per iteration
         n_batches = int(np.ceil(n_samples / batch_size))
 
+        #self.v = np.zeros_like(self.w)  # initiating the velocity
+
         for epoch in range(1, self.n_iter + 1):
             shuffled_idx = np.random.permutation(n_samples) # random permutation of the indices
             for b in range(n_batches):
@@ -90,6 +94,9 @@ class LogisticRegression:
                 p = self.sigmoid(z) # probabilities of the model predictions
 
                 grad = x_batch.T.dot(p - y_batch) / y_batch.size # for logistic regression X^T*(p - y)
+
+                #self.v = self.momentum * self.v + grad  # incorporating momentum
+                #self.w -= self.lr * self.v
                 self.w -= self.lr * grad # gradient multiplied by learning rate is removed from weight
 
             # cost is calculated through cross‑entropy and added for the current range
@@ -245,6 +252,8 @@ if __name__ == "__main__":
     # training of the model
     model = LogisticRegression(learning_rate=0.00005, n_iter=5000, batch_size=64, tolerance=1e-6, verbose=True)
     # other values could be used, for example (lr=0.01, n_iter=2000, tolerance=1e-3, verbose=False)
+    #model = LogisticRegression(learning_rate=0.00005, n_iter=5000, batch_size=64, tolerance=1e-6, verbose=True, momentum= 0.9)
+    # using momentum for gradient descent calculation
     model.prepare(df_train, target_col="Diagnosis")
     model.fit()