Fixed diagnosis duplicate method.

This commit is contained in:
Batuhan Berk Başoğlu 2025-09-17 17:57:37 -04:00
parent 8a01e1bef6
commit 2a7c9cd28b
Signed by: batuhan-basoglu
SSH key fingerprint: SHA256:kEsnuHX+qbwhxSAXPUQ4ox535wFHu/hIRaa53FzxRpo
2 changed files with 2 additions and 6 deletions

View file

@ -43,9 +43,6 @@ if __name__ == "__main__":
for col in num_cols:
df = df[df[col] >= 0]
df['Diagnosis'] = df['Diagnosis'].map({'M': 1, 'B': 0}) # making diagnosis numeric
df['Diagnosis'] = df['Diagnosis'].astype('category')
# sanity checks for data validity
df = df[(df['radius_mean'] > 0) & (df['radius_mean'] <= 30)]
df = df[(df['radius_worst'] > 0) & (df['radius_worst'] <= 30)]
@ -59,4 +56,5 @@ if __name__ == "__main__":
# check if there are still null values
assert df.isna().sum().sum() == 0, "There are still some null values."
df['Diagnosis'] = df['Diagnosis'].map({'M': 1, 'B': 0}) # making diagnosis numeric
df['Diagnosis'] = df['Diagnosis'].astype('category')

View file

@ -43,9 +43,6 @@ if __name__ == "__main__":
for col in num_cols:
df = df[df[col] >= 0]
df['Diagnosis'] = df['Diagnosis'].map({'M': 1, 'B': 0}) # making diagnosis numeric
df['Diagnosis'] = df['Diagnosis'].astype('category')
# sanity checks for data validity
df = df[(df['radius_mean'] > 0) & (df['radius_mean'] <= 30)]
df = df[(df['radius_worst'] > 0) & (df['radius_worst'] <= 30)]
@ -59,4 +56,5 @@ if __name__ == "__main__":
# check if there are still null values
assert df.isna().sum().sum() == 0, "There are still some null values."
df['Diagnosis'] = df['Diagnosis'].map({'M': 1, 'B': 0}) # making diagnosis numeric
df['Diagnosis'] = df['Diagnosis'].astype('category')