removed unnecessary comments

2025-09-30 18:59:15 -04:00 · 2025-09-30 18:59:15 -04:00 · 91e98ba8bc
commit 91e98ba8bc
parent 193dcabbff
4 changed files with 0 additions and 117 deletions
--- a/linear-regression-parkinsons.py
+++ b/linear-regression-parkinsons.py
@ -169,31 +169,6 @@ if __name__ == "__main__":
    print("\nCorrelation with target variable descending order:")
    print(target_corr)
    '''
    # repeated fields —> for now I removed them since might not be too relevant (need testing to see if we keep it later)
    Parkinson = Parkinson.drop(Parkinson.columns[0:3], axis=1)
    # ____________________________________________________________________________________
    # HANDLE OUTLIERS AND INCONSISTENCIES
    # https://medium.com/@heyamit10/pandas-outlier-detection-techniques-e9afece3d9e3
    # if z-score more than 3 --> outllier
    # print(Parkinson.head().to_string())
    # ____________________________________________________________________________________
    # normalize / scale features? if not already done
    # !!!!!!!!!!only for X not y!!!!!!!!!!!
    # normalize = Parkinson.drop(Parkinson.columns[0:6], axis=1)
    # normalize = (normalize - normalize.mean()) / normalize.std()
    # Parkinson[Parkinson.columns[6:]] = normalize
    # turn into array for regression
    x = x.to_numpy()
    y = y.to_numpy()
    # split data into train 80% / tests datasets 20%
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42, stratify=y)
 '''
    for col in df:
        df[col] = pd.to_numeric(df[col], errors='coerce') # convert columns to numeric values
--- a/logistic-regression-wdbc.py
+++ b/logistic-regression-wdbc.py
@ -200,10 +200,6 @@ class LogisticRegression:
        """
        if isinstance(x, pd.DataFrame):
            x = x.values
        if self.w is None:
            raise ValueError("Model not fitted yet")
        # Add bias term if needed
        if x.shape[1] == len(self.w) - 1:
            x = np.column_stack([np.ones(x.shape[0]), x])
@ -250,37 +246,7 @@ if __name__ == "__main__":
    df = df.drop_duplicates()
    # check data types: --> everything is good
    # print(df.dtypes)
    '''
    # ____________________________________________________________________________________
    # HANDLE OUTLIERS AND INCONSISTENCIES
    # https://medium.com/@heyamit10/pandas-outlier-detection-techniques-e9afece3d9e3
    # if z-score more than 3 --> outllier
    # print(cancer.head().to_string())
    # ____________________________________________________________________________________
    # separate dependent VS independent variables
    x = cancer.drop(cancer.columns[0], axis=1)
    y = cancer[1]
    # print(x.head().to_string())
    # normalize data
    # normalize = cancer.drop(cancer.columns[0], axis=1)
    # normalize = (normalize - normalize.mean()) / normalize.std()
    # cancer[cancer.columns[1:]] = normalize
    # print(cancer.head().to_string())
    # turn into array for regression
    x = x.to_numpy()
    y = y.to_numpy()
    # cancer_y = np.asarray(cancer2[0].tolist())
    # cancer2.drop(cancer2[0], axis = 1, inplace = True)
    # split data into train / tests datasets
    x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
 '''
    missing_rows = df[df.isin(['?', 'NA', 'na', '']).any(axis=1)]  # checks null values
    print(f"Rows with null values: {len(missing_rows)}")
--- a/mini-batch-sgd-linear-regression-parkinsons.py
+++ b/mini-batch-sgd-linear-regression-parkinsons.py
@ -171,31 +171,7 @@ if __name__ == "__main__":
    print("\nCorrelation with target variable descending order:")
    print(target_corr)
    '''
    # repeated fields —> for now I removed them since might not be too relevant (need testing to see if we keep it later)
    Parkinson = Parkinson.drop(Parkinson.columns[0:3], axis=1)
    # ____________________________________________________________________________________
    # HANDLE OUTLIERS AND INCONSISTENCIES
    # https://medium.com/@heyamit10/pandas-outlier-detection-techniques-e9afece3d9e3
    # if z-score more than 3 --> outllier
    # print(Parkinson.head().to_string())
    # ____________________________________________________________________________________
    # normalize / scale features? if not already done
    # !!!!!!!!!!only for X not y!!!!!!!!!!!
    # normalize = Parkinson.drop(Parkinson.columns[0:6], axis=1)
    # normalize = (normalize - normalize.mean()) / normalize.std()
    # Parkinson[Parkinson.columns[6:]] = normalize
    # turn into array for regression
    x = x.to_numpy()
    y = y.to_numpy()
    # split data into train 80% / tests datasets 20%
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42, stratify=y)
 '''
    for col in df:
        df[col] = pd.to_numeric(df[col], errors='coerce') # convert columns to numeric values
--- a/mini-batch-sgd-logistic-regression-wdbc.py
+++ b/mini-batch-sgd-logistic-regression-wdbc.py
@ -219,9 +219,6 @@ class LogisticRegression:
        if isinstance(x, pd.DataFrame):
            x = x.values
        if self.w is None:
            raise ValueError("Model not fitted yet")
        # Add bias term if needed
        if x.shape[1] == len(self.w) - 1:
            x = np.column_stack([np.ones(x.shape[0]), x])
@ -259,37 +256,6 @@ if __name__ == "__main__":
    # check data types: --> everything is good
    # print(df.dtypes)
    '''
    # ____________________________________________________________________________________
    # HANDLE OUTLIERS AND INCONSISTENCIES
    # https://medium.com/@heyamit10/pandas-outlier-detection-techniques-e9afece3d9e3
    # if z-score more than 3 --> outllier
    # print(cancer.head().to_string())
    # ____________________________________________________________________________________
    # separate dependent VS independent variables
    x = cancer.drop(cancer.columns[0], axis=1)
    y = cancer[1]
    # print(X.head().to_string())
    # normalize data
    # normalize = cancer.drop(cancer.columns[0], axis=1)
    # normalize = (normalize - normalize.mean()) / normalize.std()
    # cancer[cancer.columns[1:]] = normalize
    # print(cancer.head().to_string())
    # turn into array for regression
    x = x.to_numpy()
    y = y.to_numpy()
    # cancer_y = np.asarray(cancer2[0].tolist())
    # cancer2.drop(cancer2[0], axis = 1, inplace = True)
    # split data into train / tests datasets
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42, stratify=y)
 '''
    missing_rows = df[df.isin(['?', 'NA', 'na', '']).any(axis=1)]  # checks null values
    print(f"Rows with null values: {len(missing_rows)}")