removed unnecessary comments

2025-09-30 18:59:15 -04:00 · 2025-09-30 18:59:15 -04:00 · 91e98ba8bc
commit 91e98ba8bc
parent 193dcabbff
4 changed files with 0 additions and 117 deletions
--- a/linear-regression-parkinsons.py
+++ b/linear-regression-parkinsons.py
@ -169,31 +169,6 @@ if __name__ == "__main__":
    print("\nCorrelation with target variable descending order:")
    print(target_corr)

-    '''
-    # repeated fields —> for now I removed them since might not be too relevant (need testing to see if we keep it later)
-    Parkinson = Parkinson.drop(Parkinson.columns[0:3], axis=1)
-
-    # ____________________________________________________________________________________
-    # HANDLE OUTLIERS AND INCONSISTENCIES
-    # https://medium.com/@heyamit10/pandas-outlier-detection-techniques-e9afece3d9e3
-    # if z-score more than 3 --> outllier
-    # print(Parkinson.head().to_string())
-
-    # ____________________________________________________________________________________
-
-    # normalize / scale features? if not already done
-    # !!!!!!!!!!only for X not y!!!!!!!!!!!
-    # normalize = Parkinson.drop(Parkinson.columns[0:6], axis=1)
-    # normalize = (normalize - normalize.mean()) / normalize.std()
-    # Parkinson[Parkinson.columns[6:]] = normalize
-
-    # turn into array for regression
-    x = x.to_numpy()
-    y = y.to_numpy()
-
-    # split data into train 80% / tests datasets 20%
-    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42, stratify=y)
-'''
    for col in df:
        df[col] = pd.to_numeric(df[col], errors='coerce') # convert columns to numeric values

--- a/logistic-regression-wdbc.py
+++ b/logistic-regression-wdbc.py
@ -200,10 +200,6 @@ class LogisticRegression:
        """
        if isinstance(x, pd.DataFrame):
            x = x.values
-
-        if self.w is None:
-            raise ValueError("Model not fitted yet")
-
        # Add bias term if needed
        if x.shape[1] == len(self.w) - 1:
            x = np.column_stack([np.ones(x.shape[0]), x])
@ -250,37 +246,7 @@ if __name__ == "__main__":
    df = df.drop_duplicates()
    # check data types: --> everything is good
    # print(df.dtypes)
-    '''
-    # ____________________________________________________________________________________
-    # HANDLE OUTLIERS AND INCONSISTENCIES
-    # https://medium.com/@heyamit10/pandas-outlier-detection-techniques-e9afece3d9e3
-    # if z-score more than 3 --> outllier
-    # print(cancer.head().to_string())

-    # ____________________________________________________________________________________
-
-    # separate dependent VS independent variables
-    x = cancer.drop(cancer.columns[0], axis=1)
-    y = cancer[1]
-
-    # print(x.head().to_string())
-
-    # normalize data
-    # normalize = cancer.drop(cancer.columns[0], axis=1)
-    # normalize = (normalize - normalize.mean()) / normalize.std()
-    # cancer[cancer.columns[1:]] = normalize
-    # print(cancer.head().to_string())
-
-    # turn into array for regression
-    x = x.to_numpy()
-    y = y.to_numpy()
-
-    # cancer_y = np.asarray(cancer2[0].tolist())
-    # cancer2.drop(cancer2[0], axis = 1, inplace = True)
-
-    # split data into train / tests datasets
-    x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
-'''
    missing_rows = df[df.isin(['?', 'NA', 'na', '']).any(axis=1)]  # checks null values
    print(f"Rows with null values: {len(missing_rows)}")

--- a/mini-batch-sgd-linear-regression-parkinsons.py
+++ b/mini-batch-sgd-linear-regression-parkinsons.py
@ -171,31 +171,7 @@ if __name__ == "__main__":
    print("\nCorrelation with target variable descending order:")
    print(target_corr)

-    '''
-    # repeated fields —> for now I removed them since might not be too relevant (need testing to see if we keep it later)
-    Parkinson = Parkinson.drop(Parkinson.columns[0:3], axis=1)

-    # ____________________________________________________________________________________
-    # HANDLE OUTLIERS AND INCONSISTENCIES
-    # https://medium.com/@heyamit10/pandas-outlier-detection-techniques-e9afece3d9e3
-    # if z-score more than 3 --> outllier
-    # print(Parkinson.head().to_string())
-
-    # ____________________________________________________________________________________
-
-    # normalize / scale features? if not already done
-    # !!!!!!!!!!only for X not y!!!!!!!!!!!
-    # normalize = Parkinson.drop(Parkinson.columns[0:6], axis=1)
-    # normalize = (normalize - normalize.mean()) / normalize.std()
-    # Parkinson[Parkinson.columns[6:]] = normalize
-
-    # turn into array for regression
-    x = x.to_numpy()
-    y = y.to_numpy()
-
-    # split data into train 80% / tests datasets 20%
-    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42, stratify=y)
-'''
    for col in df:
        df[col] = pd.to_numeric(df[col], errors='coerce') # convert columns to numeric values

--- a/mini-batch-sgd-logistic-regression-wdbc.py
+++ b/mini-batch-sgd-logistic-regression-wdbc.py
@ -219,9 +219,6 @@ class LogisticRegression:
        if isinstance(x, pd.DataFrame):
            x = x.values

-        if self.w is None:
-            raise ValueError("Model not fitted yet")
-
        # Add bias term if needed
        if x.shape[1] == len(self.w) - 1:
            x = np.column_stack([np.ones(x.shape[0]), x])
@ -259,37 +256,6 @@ if __name__ == "__main__":
    # check data types: --> everything is good
    # print(df.dtypes)

-    '''
-    # ____________________________________________________________________________________
-    # HANDLE OUTLIERS AND INCONSISTENCIES
-    # https://medium.com/@heyamit10/pandas-outlier-detection-techniques-e9afece3d9e3
-    # if z-score more than 3 --> outllier
-    # print(cancer.head().to_string())
-
-    # ____________________________________________________________________________________
-
-    # separate dependent VS independent variables
-    x = cancer.drop(cancer.columns[0], axis=1)
-    y = cancer[1]
-
-    # print(X.head().to_string())
-
-    # normalize data
-    # normalize = cancer.drop(cancer.columns[0], axis=1)
-    # normalize = (normalize - normalize.mean()) / normalize.std()
-    # cancer[cancer.columns[1:]] = normalize
-    # print(cancer.head().to_string())
-
-    # turn into array for regression
-    x = x.to_numpy()
-    y = y.to_numpy()
-
-    # cancer_y = np.asarray(cancer2[0].tolist())
-    # cancer2.drop(cancer2[0], axis = 1, inplace = True)
-
-    # split data into train / tests datasets
-    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42, stratify=y)
-'''
    missing_rows = df[df.isin(['?', 'NA', 'na', '']).any(axis=1)]  # checks null values
    print(f"Rows with null values: {len(missing_rows)}")