From 91e98ba8bc3e875a75f75e41a606d07da7e5543a Mon Sep 17 00:00:00 2001 From: ShaaniBel Date: Tue, 30 Sep 2025 18:59:15 -0400 Subject: [PATCH] removed unnecessary comments --- linear-regression-parkinsons.py | 25 -------------- logistic-regression-wdbc.py | 34 ------------------- ...-batch-sgd-linear-regression-parkinsons.py | 24 ------------- mini-batch-sgd-logistic-regression-wdbc.py | 34 ------------------- 4 files changed, 117 deletions(-) diff --git a/linear-regression-parkinsons.py b/linear-regression-parkinsons.py index 83e2634..1ca9f0e 100644 --- a/linear-regression-parkinsons.py +++ b/linear-regression-parkinsons.py @@ -169,31 +169,6 @@ if __name__ == "__main__": print("\nCorrelation with target variable descending order:") print(target_corr) - ''' - # repeated fields —> for now I removed them since might not be too relevant (need testing to see if we keep it later) - Parkinson = Parkinson.drop(Parkinson.columns[0:3], axis=1) - - # ____________________________________________________________________________________ - # HANDLE OUTLIERS AND INCONSISTENCIES - # https://medium.com/@heyamit10/pandas-outlier-detection-techniques-e9afece3d9e3 - # if z-score more than 3 --> outllier - # print(Parkinson.head().to_string()) - - # ____________________________________________________________________________________ - - # normalize / scale features? if not already done - # !!!!!!!!!!only for X not y!!!!!!!!!!! - # normalize = Parkinson.drop(Parkinson.columns[0:6], axis=1) - # normalize = (normalize - normalize.mean()) / normalize.std() - # Parkinson[Parkinson.columns[6:]] = normalize - - # turn into array for regression - x = x.to_numpy() - y = y.to_numpy() - - # split data into train 80% / tests datasets 20% - x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42, stratify=y) -''' for col in df: df[col] = pd.to_numeric(df[col], errors='coerce') # convert columns to numeric values diff --git a/logistic-regression-wdbc.py b/logistic-regression-wdbc.py index 52d283d..725abcb 100644 --- a/logistic-regression-wdbc.py +++ b/logistic-regression-wdbc.py @@ -200,10 +200,6 @@ class LogisticRegression: """ if isinstance(x, pd.DataFrame): x = x.values - - if self.w is None: - raise ValueError("Model not fitted yet") - # Add bias term if needed if x.shape[1] == len(self.w) - 1: x = np.column_stack([np.ones(x.shape[0]), x]) @@ -250,37 +246,7 @@ if __name__ == "__main__": df = df.drop_duplicates() # check data types: --> everything is good # print(df.dtypes) - ''' - # ____________________________________________________________________________________ - # HANDLE OUTLIERS AND INCONSISTENCIES - # https://medium.com/@heyamit10/pandas-outlier-detection-techniques-e9afece3d9e3 - # if z-score more than 3 --> outllier - # print(cancer.head().to_string()) - # ____________________________________________________________________________________ - - # separate dependent VS independent variables - x = cancer.drop(cancer.columns[0], axis=1) - y = cancer[1] - - # print(x.head().to_string()) - - # normalize data - # normalize = cancer.drop(cancer.columns[0], axis=1) - # normalize = (normalize - normalize.mean()) / normalize.std() - # cancer[cancer.columns[1:]] = normalize - # print(cancer.head().to_string()) - - # turn into array for regression - x = x.to_numpy() - y = y.to_numpy() - - # cancer_y = np.asarray(cancer2[0].tolist()) - # cancer2.drop(cancer2[0], axis = 1, inplace = True) - - # split data into train / tests datasets - x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y) -''' missing_rows = df[df.isin(['?', 'NA', 'na', '']).any(axis=1)] # checks null values print(f"Rows with null values: {len(missing_rows)}") diff --git a/mini-batch-sgd-linear-regression-parkinsons.py b/mini-batch-sgd-linear-regression-parkinsons.py index f022d7d..5cae5fd 100644 --- a/mini-batch-sgd-linear-regression-parkinsons.py +++ b/mini-batch-sgd-linear-regression-parkinsons.py @@ -171,31 +171,7 @@ if __name__ == "__main__": print("\nCorrelation with target variable descending order:") print(target_corr) - ''' - # repeated fields —> for now I removed them since might not be too relevant (need testing to see if we keep it later) - Parkinson = Parkinson.drop(Parkinson.columns[0:3], axis=1) - # ____________________________________________________________________________________ - # HANDLE OUTLIERS AND INCONSISTENCIES - # https://medium.com/@heyamit10/pandas-outlier-detection-techniques-e9afece3d9e3 - # if z-score more than 3 --> outllier - # print(Parkinson.head().to_string()) - - # ____________________________________________________________________________________ - - # normalize / scale features? if not already done - # !!!!!!!!!!only for X not y!!!!!!!!!!! - # normalize = Parkinson.drop(Parkinson.columns[0:6], axis=1) - # normalize = (normalize - normalize.mean()) / normalize.std() - # Parkinson[Parkinson.columns[6:]] = normalize - - # turn into array for regression - x = x.to_numpy() - y = y.to_numpy() - - # split data into train 80% / tests datasets 20% - x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42, stratify=y) -''' for col in df: df[col] = pd.to_numeric(df[col], errors='coerce') # convert columns to numeric values diff --git a/mini-batch-sgd-logistic-regression-wdbc.py b/mini-batch-sgd-logistic-regression-wdbc.py index 5941d59..f0126c3 100644 --- a/mini-batch-sgd-logistic-regression-wdbc.py +++ b/mini-batch-sgd-logistic-regression-wdbc.py @@ -219,9 +219,6 @@ class LogisticRegression: if isinstance(x, pd.DataFrame): x = x.values - if self.w is None: - raise ValueError("Model not fitted yet") - # Add bias term if needed if x.shape[1] == len(self.w) - 1: x = np.column_stack([np.ones(x.shape[0]), x]) @@ -259,37 +256,6 @@ if __name__ == "__main__": # check data types: --> everything is good # print(df.dtypes) - ''' - # ____________________________________________________________________________________ - # HANDLE OUTLIERS AND INCONSISTENCIES - # https://medium.com/@heyamit10/pandas-outlier-detection-techniques-e9afece3d9e3 - # if z-score more than 3 --> outllier - # print(cancer.head().to_string()) - - # ____________________________________________________________________________________ - - # separate dependent VS independent variables - x = cancer.drop(cancer.columns[0], axis=1) - y = cancer[1] - - # print(X.head().to_string()) - - # normalize data - # normalize = cancer.drop(cancer.columns[0], axis=1) - # normalize = (normalize - normalize.mean()) / normalize.std() - # cancer[cancer.columns[1:]] = normalize - # print(cancer.head().to_string()) - - # turn into array for regression - x = x.to_numpy() - y = y.to_numpy() - - # cancer_y = np.asarray(cancer2[0].tolist()) - # cancer2.drop(cancer2[0], axis = 1, inplace = True) - - # split data into train / tests datasets - x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42, stratify=y) -''' missing_rows = df[df.isin(['?', 'NA', 'na', '']).any(axis=1)] # checks null values print(f"Rows with null values: {len(missing_rows)}")