diff --git a/logistic-regression-wdbc.py b/logistic-regression-wdbc.py index b0ec4e7..820006e 100644 --- a/logistic-regression-wdbc.py +++ b/logistic-regression-wdbc.py @@ -43,9 +43,6 @@ if __name__ == "__main__": for col in num_cols: df = df[df[col] >= 0] - df['Diagnosis'] = df['Diagnosis'].map({'M': 1, 'B': 0}) # making diagnosis numeric - df['Diagnosis'] = df['Diagnosis'].astype('category') - # sanity checks for data validity df = df[(df['radius_mean'] > 0) & (df['radius_mean'] <= 30)] df = df[(df['radius_worst'] > 0) & (df['radius_worst'] <= 30)] @@ -59,4 +56,5 @@ if __name__ == "__main__": # check if there are still null values assert df.isna().sum().sum() == 0, "There are still some null values." + df['Diagnosis'] = df['Diagnosis'].map({'M': 1, 'B': 0}) # making diagnosis numeric df['Diagnosis'] = df['Diagnosis'].astype('category') \ No newline at end of file diff --git a/mini-batch-sgd-logistic-regression-wdbc.py b/mini-batch-sgd-logistic-regression-wdbc.py index b0ec4e7..820006e 100644 --- a/mini-batch-sgd-logistic-regression-wdbc.py +++ b/mini-batch-sgd-logistic-regression-wdbc.py @@ -43,9 +43,6 @@ if __name__ == "__main__": for col in num_cols: df = df[df[col] >= 0] - df['Diagnosis'] = df['Diagnosis'].map({'M': 1, 'B': 0}) # making diagnosis numeric - df['Diagnosis'] = df['Diagnosis'].astype('category') - # sanity checks for data validity df = df[(df['radius_mean'] > 0) & (df['radius_mean'] <= 30)] df = df[(df['radius_worst'] > 0) & (df['radius_worst'] <= 30)] @@ -59,4 +56,5 @@ if __name__ == "__main__": # check if there are still null values assert df.isna().sum().sum() == 0, "There are still some null values." + df['Diagnosis'] = df['Diagnosis'].map({'M': 1, 'B': 0}) # making diagnosis numeric df['Diagnosis'] = df['Diagnosis'].astype('category') \ No newline at end of file