From 91e98ba8bc3e875a75f75e41a606d07da7e5543a Mon Sep 17 00:00:00 2001
From: ShaaniBel <bellemare.sks@gmail.com>
Date: Tue, 30 Sep 2025 18:59:15 -0400
Subject: [PATCH] removed unnecessary comments

---
 linear-regression-parkinsons.py               | 25 --------------
 logistic-regression-wdbc.py                   | 34 -------------------
 ...-batch-sgd-linear-regression-parkinsons.py | 24 -------------
 mini-batch-sgd-logistic-regression-wdbc.py    | 34 -------------------
 4 files changed, 117 deletions(-)

diff --git a/linear-regression-parkinsons.py b/linear-regression-parkinsons.py
index 83e2634..1ca9f0e 100644
--- a/linear-regression-parkinsons.py
+++ b/linear-regression-parkinsons.py
@@ -169,31 +169,6 @@ if __name__ == "__main__":
     print("\nCorrelation with target variable descending order:")
     print(target_corr)
 
-    '''
-    # repeated fields —> for now I removed them since might not be too relevant (need testing to see if we keep it later)
-    Parkinson = Parkinson.drop(Parkinson.columns[0:3], axis=1)
-
-    # ____________________________________________________________________________________
-    # HANDLE OUTLIERS AND INCONSISTENCIES
-    # https://medium.com/@heyamit10/pandas-outlier-detection-techniques-e9afece3d9e3
-    # if z-score more than 3 --> outllier
-    # print(Parkinson.head().to_string())
-
-    # ____________________________________________________________________________________
-
-    # normalize / scale features? if not already done
-    # !!!!!!!!!!only for X not y!!!!!!!!!!!
-    # normalize = Parkinson.drop(Parkinson.columns[0:6], axis=1)
-    # normalize = (normalize - normalize.mean()) / normalize.std()
-    # Parkinson[Parkinson.columns[6:]] = normalize
-
-    # turn into array for regression
-    x = x.to_numpy()
-    y = y.to_numpy()
-
-    # split data into train 80% / tests datasets 20%
-    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42, stratify=y)
-'''
     for col in df:
         df[col] = pd.to_numeric(df[col], errors='coerce') # convert columns to numeric values
 
diff --git a/logistic-regression-wdbc.py b/logistic-regression-wdbc.py
index 52d283d..725abcb 100644
--- a/logistic-regression-wdbc.py
+++ b/logistic-regression-wdbc.py
@@ -200,10 +200,6 @@ class LogisticRegression:
         """
         if isinstance(x, pd.DataFrame):
             x = x.values
-
-        if self.w is None:
-            raise ValueError("Model not fitted yet")
-
         # Add bias term if needed
         if x.shape[1] == len(self.w) - 1:
             x = np.column_stack([np.ones(x.shape[0]), x])
@@ -250,37 +246,7 @@ if __name__ == "__main__":
     df = df.drop_duplicates()
     # check data types: --> everything is good
     # print(df.dtypes)
-    '''
-    # ____________________________________________________________________________________
-    # HANDLE OUTLIERS AND INCONSISTENCIES
-    # https://medium.com/@heyamit10/pandas-outlier-detection-techniques-e9afece3d9e3
-    # if z-score more than 3 --> outllier
-    # print(cancer.head().to_string())
 
-    # ____________________________________________________________________________________
-
-    # separate dependent VS independent variables
-    x = cancer.drop(cancer.columns[0], axis=1)
-    y = cancer[1]
-
-    # print(x.head().to_string())
-
-    # normalize data
-    # normalize = cancer.drop(cancer.columns[0], axis=1)
-    # normalize = (normalize - normalize.mean()) / normalize.std()
-    # cancer[cancer.columns[1:]] = normalize
-    # print(cancer.head().to_string())
-
-    # turn into array for regression
-    x = x.to_numpy()
-    y = y.to_numpy()
-
-    # cancer_y = np.asarray(cancer2[0].tolist())
-    # cancer2.drop(cancer2[0], axis = 1, inplace = True)
-
-    # split data into train / tests datasets
-    x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
-'''
     missing_rows = df[df.isin(['?', 'NA', 'na', '']).any(axis=1)]  # checks null values
     print(f"Rows with null values: {len(missing_rows)}")
 
diff --git a/mini-batch-sgd-linear-regression-parkinsons.py b/mini-batch-sgd-linear-regression-parkinsons.py
index f022d7d..5cae5fd 100644
--- a/mini-batch-sgd-linear-regression-parkinsons.py
+++ b/mini-batch-sgd-linear-regression-parkinsons.py
@@ -171,31 +171,7 @@ if __name__ == "__main__":
     print("\nCorrelation with target variable descending order:")
     print(target_corr)
 
-    '''
-    # repeated fields —> for now I removed them since might not be too relevant (need testing to see if we keep it later)
-    Parkinson = Parkinson.drop(Parkinson.columns[0:3], axis=1)
 
-    # ____________________________________________________________________________________
-    # HANDLE OUTLIERS AND INCONSISTENCIES
-    # https://medium.com/@heyamit10/pandas-outlier-detection-techniques-e9afece3d9e3
-    # if z-score more than 3 --> outllier
-    # print(Parkinson.head().to_string())
-
-    # ____________________________________________________________________________________
-
-    # normalize / scale features? if not already done
-    # !!!!!!!!!!only for X not y!!!!!!!!!!!
-    # normalize = Parkinson.drop(Parkinson.columns[0:6], axis=1)
-    # normalize = (normalize - normalize.mean()) / normalize.std()
-    # Parkinson[Parkinson.columns[6:]] = normalize
-
-    # turn into array for regression
-    x = x.to_numpy()
-    y = y.to_numpy()
-
-    # split data into train 80% / tests datasets 20%
-    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42, stratify=y)
-'''
     for col in df:
         df[col] = pd.to_numeric(df[col], errors='coerce') # convert columns to numeric values
 
diff --git a/mini-batch-sgd-logistic-regression-wdbc.py b/mini-batch-sgd-logistic-regression-wdbc.py
index 5941d59..f0126c3 100644
--- a/mini-batch-sgd-logistic-regression-wdbc.py
+++ b/mini-batch-sgd-logistic-regression-wdbc.py
@@ -219,9 +219,6 @@ class LogisticRegression:
         if isinstance(x, pd.DataFrame):
             x = x.values
 
-        if self.w is None:
-            raise ValueError("Model not fitted yet")
-
         # Add bias term if needed
         if x.shape[1] == len(self.w) - 1:
             x = np.column_stack([np.ones(x.shape[0]), x])
@@ -259,37 +256,6 @@ if __name__ == "__main__":
     # check data types: --> everything is good
     # print(df.dtypes)
 
-    '''
-    # ____________________________________________________________________________________
-    # HANDLE OUTLIERS AND INCONSISTENCIES
-    # https://medium.com/@heyamit10/pandas-outlier-detection-techniques-e9afece3d9e3
-    # if z-score more than 3 --> outllier
-    # print(cancer.head().to_string())
-
-    # ____________________________________________________________________________________
-
-    # separate dependent VS independent variables
-    x = cancer.drop(cancer.columns[0], axis=1)
-    y = cancer[1]
-
-    # print(X.head().to_string())
-
-    # normalize data
-    # normalize = cancer.drop(cancer.columns[0], axis=1)
-    # normalize = (normalize - normalize.mean()) / normalize.std()
-    # cancer[cancer.columns[1:]] = normalize
-    # print(cancer.head().to_string())
-
-    # turn into array for regression
-    x = x.to_numpy()
-    y = y.to_numpy()
-
-    # cancer_y = np.asarray(cancer2[0].tolist())
-    # cancer2.drop(cancer2[0], axis = 1, inplace = True)
-
-    # split data into train / tests datasets
-    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42, stratify=y)
-'''
     missing_rows = df[df.isin(['?', 'NA', 'na', '']).any(axis=1)]  # checks null values
     print(f"Rows with null values: {len(missing_rows)}")