Finished the implementation of the python code.

2025-09-18 20:58:02 -04:00 · 2025-09-18 20:58:02 -04:00 · 455b48c89b
commit 455b48c89b
parent 5702c3c1b8
6 changed files with 540 additions and 159 deletions
--- a/mini-batch-sgd-linear-regression-parkinsons.py
+++ b/mini-batch-sgd-linear-regression-parkinsons.py
@ -3,8 +3,8 @@ import pandas as pd

 class LinearRegression:
    '''
-        Constructor for the Linear Regression with mini‑batch stochastic gradient descent. It uses learning rate,
-        iteration number, batch size, bias and verbose. It also initializes the weight, mean and std.
+        Constructor for the linear regression with mini‑batch stochastic gradient descent. It uses learning rate,
+        iteration number, batch size, bias and verbose. It also initializes the weight, mean and standard deviation.
    '''
    def __init__(self, lr, n_iter, batch_size, add_bias, verbose):
        self.lr = lr  # learning rate
@ -90,7 +90,7 @@ class LinearRegression:

    def predict(self, x: pd.DataFrame) -> pd.Series:
        '''
-            Predict method makes X prediction by multiplying X and weight vectors.
+            Predict method is used to test trained data to do Y prediction by multiplying X and weight vectors.
        '''
        if self.w is None:  # if weight is empty, throw error
            raise RuntimeError("Model is not fitted yet. Call `fit` first.")
@ -101,7 +101,7 @@ class LinearRegression:
    def score(self, x: pd.DataFrame, y: pd.Series) -> float:
        '''
            This method is used to calculate coefficient of determination to assess the goodness
-            of fit from a regression model
+            of fit from the linear regression model
        '''
        y_pred = self.predict(x)  # predicts Y value with X predict method.
        y = pd.Series(y).astype('float64')
@ -133,7 +133,7 @@ if __name__ == "__main__":
        df[col] = pd.to_numeric(df[col], errors='coerce') # convert columns to numeric values

    df.dropna(inplace=True) # remove null values
-    print(f"Rows remaining after drop of the null values: {len(df)}")
+    print(f"Rows remaining after drop of the null values: {len(df)}\n")

    # sanity checks for data validity - realistic parkinson data range estimations
    df = df[(df['age'] >= 18) & (df['age'] <= 95)]
@ -164,12 +164,9 @@ if __name__ == "__main__":

    # evaluation of the model
    print("\nR² on training data:", model.score(x_train, y_train))
-    print("\nR² on testing data:", model.score(x_test, y_test))
+    print("R² on testing data:", model.score(x_test, y_test))

    # predict Y values using the trained data
    preds = model.predict(x_test)
-    print("\nFirst 5 predictions:")
-    print(preds.head())
-
-    print("\nWeights:")
-    print(model.w.round(4))
+    print("\nFirst 10 predictions:")
+    print(preds.head(10))