Updated the data cleaning.

This commit is contained in:
Batuhan Berk Başoğlu 2025-09-17 17:51:33 -04:00
parent 7734802cd1
commit 8a01e1bef6
Signed by: batuhan-basoglu
SSH key fingerprint: SHA256:kEsnuHX+qbwhxSAXPUQ4ox535wFHu/hIRaa53FzxRpo
4 changed files with 44 additions and 0 deletions

View file

@ -135,6 +135,15 @@ if __name__ == "__main__":
df.dropna(inplace=True) # remove null values
print(f"Rows remaining after drop of the null values: {len(df)}")
# sanity checks for data validity
df = df[(df['age'] >= 18) & (df['age'] <= 95)]
df = df[(df['motor_UPDRS'] >= 0) & (df['motor_UPDRS'] <= 100)]
df = df[(df['total_UPDRS'] >= 0) & (df['total_UPDRS'] <= 100)]
df = df[(df['Jitter(%)'] >= 0) & (df['Jitter(%)'] <= 10)]
df = df[(df['Shimmer(dB)'] >= 0) & (df['Shimmer(dB)'] <= 10)]
print(f"Rows after sanity checks: {len(df)}")
# check if there are still null values
assert df.isna().sum().sum() == 0, "There are still some null values."