This commit is contained in:
ShaaniBel 2025-09-28 23:40:35 -04:00
parent 5d1e5e75c2
commit f261b06dff
2 changed files with 17 additions and 9 deletions

View file

@ -101,14 +101,22 @@ class LinearRegression:
def rmse(self, x: pd.DataFrame, y: pd.Series) -> float: def rmse(self, x: pd.DataFrame, y: pd.Series) -> float:
''' '''
Root Mean Squared Error Root Mean Squared Error
Square root of MSE, in same units as the target variable
More interpretable than MSE while still penalizing larger errors
Lower values indicate better performance
''' '''
y_hat = self.predict(x) y_hat = self.predict(x)
y_true = pd.Series(y).astype('float64') y_true = pd.Series(y).astype('float64')
return (((y_true - y_hat) ** 2).mean()) ** 0.5 return (((y_true - y_hat) ** 2).mean()) ** 0.5
def regression_report(self, x: pd.DataFrame, y: pd.Series) -> dict:
"""
Comprehensive classification report
"""
return {
'R^2': self.score(x, y),
'MAE': self.mae(x, y),
'MSE': self.mse(x, y),
'RMSE': self.rmse(x, y)
}
if __name__ == "__main__": if __name__ == "__main__":
df = pd.read_csv('parkinsons_updrs.data', dtype=str) df = pd.read_csv('parkinsons_updrs.data', dtype=str)

View file

@ -91,7 +91,7 @@ class LogisticRegression:
print(f"Converged after {i} iterations.") print(f"Converged after {i} iterations.")
break # loss is stopped so further training would be unnecessary break # loss is stopped so further training would be unnecessary
def predict(self, x: np.ndarray | pd.DataFrame) -> np.ndarray: def predict(self, x: pd.DataFrame) -> np.ndarray:
""" """
Predict method is used to test trained data to do Y prediction by multiplying X and weight vectors Predict method is used to test trained data to do Y prediction by multiplying X and weight vectors
and then calculates the model probability by applying sigmoid function. and then calculates the model probability by applying sigmoid function.
@ -104,7 +104,7 @@ class LogisticRegression:
probs = self.sigmoid(z) # probability calculation through sigmoid method probs = self.sigmoid(z) # probability calculation through sigmoid method
return (probs >= 0.5).astype(int) # 0.5 is commonly used to define positivity of the probability return (probs >= 0.5).astype(int) # 0.5 is commonly used to define positivity of the probability
def score(self, x: np.ndarray | pd.DataFrame, y: np.ndarray | pd.Series) -> float: def score(self, x: pd.DataFrame, y: pd.Series) -> float:
""" """
This method is used to calculate mean accuracy with the prediction of Y and actual Y values. This method is used to calculate mean accuracy with the prediction of Y and actual Y values.
""" """
@ -112,7 +112,7 @@ class LogisticRegression:
y_true = np.asarray(y).astype(int) y_true = np.asarray(y).astype(int)
return np.mean(y_pred == y_true) # mean is calculated if Y values match return np.mean(y_pred == y_true) # mean is calculated if Y values match
def confusion_matrix(self, x: np.ndarray | pd.DataFrame, y: np.ndarray | pd.Series, def confusion_matrix(self, x: pd.DataFrame, y: pd.Series,
normalize: bool = False) -> np.ndarray: normalize: bool = False) -> np.ndarray:
""" """
Confusion Matrix Confusion Matrix
@ -128,7 +128,7 @@ class LogisticRegression:
return cm return cm
def plot_confusion_matrix(self, x: np.ndarray | pd.DataFrame, y: np.ndarray | pd.Series, def plot_confusion_matrix(self, x: pd.DataFrame, y: pd.Series,
normalize: bool = False, title: str = "Confusion Matrix", sns=None) -> None: normalize: bool = False, title: str = "Confusion Matrix", sns=None) -> None:
""" """
Plot confusion matrix as a heatmap Plot confusion matrix as a heatmap
@ -145,7 +145,7 @@ class LogisticRegression:
plt.xlabel('Predicted Label') plt.xlabel('Predicted Label')
plt.show() plt.show()
def precision(self, x: np.ndarray | pd.DataFrame, y: np.ndarray | pd.Series) -> float: def precision(self, x: pd.DataFrame, y: pd.Series) -> float:
""" """
Precision = TP / (TP + FP) Precision = TP / (TP + FP)
Measures how many of the predicted positives are actually positive Measures how many of the predicted positives are actually positive
@ -158,7 +158,7 @@ class LogisticRegression:
return tp / (tp + fp) return tp / (tp + fp)
def recall(self, x: np.ndarray | pd.DataFrame, y: np.ndarray | pd.Series) -> float: def recall(self, x: pd.DataFrame, y: pd.Series) -> float:
""" """
Recall = TP / (TP + FN) Recall = TP / (TP + FN)
ratio of true positives to all the positives in ground truth ratio of true positives to all the positives in ground truth