Finished the implementation of the python code.
This commit is contained in:
parent
5702c3c1b8
commit
455b48c89b
6 changed files with 540 additions and 159 deletions
|
|
@ -3,8 +3,8 @@ import pandas as pd
|
|||
|
||||
class LinearRegression:
|
||||
'''
|
||||
Constructor for the Linear Regression with mini‑batch stochastic gradient descent. It uses learning rate,
|
||||
iteration number, batch size, bias and verbose. It also initializes the weight, mean and std.
|
||||
Constructor for the linear regression with mini‑batch stochastic gradient descent. It uses learning rate,
|
||||
iteration number, batch size, bias and verbose. It also initializes the weight, mean and standard deviation.
|
||||
'''
|
||||
def __init__(self, lr, n_iter, batch_size, add_bias, verbose):
|
||||
self.lr = lr # learning rate
|
||||
|
|
@ -90,7 +90,7 @@ class LinearRegression:
|
|||
|
||||
def predict(self, x: pd.DataFrame) -> pd.Series:
|
||||
'''
|
||||
Predict method makes X prediction by multiplying X and weight vectors.
|
||||
Predict method is used to test trained data to do Y prediction by multiplying X and weight vectors.
|
||||
'''
|
||||
if self.w is None: # if weight is empty, throw error
|
||||
raise RuntimeError("Model is not fitted yet. Call `fit` first.")
|
||||
|
|
@ -101,7 +101,7 @@ class LinearRegression:
|
|||
def score(self, x: pd.DataFrame, y: pd.Series) -> float:
|
||||
'''
|
||||
This method is used to calculate coefficient of determination to assess the goodness
|
||||
of fit from a regression model
|
||||
of fit from the linear regression model
|
||||
'''
|
||||
y_pred = self.predict(x) # predicts Y value with X predict method.
|
||||
y = pd.Series(y).astype('float64')
|
||||
|
|
@ -133,7 +133,7 @@ if __name__ == "__main__":
|
|||
df[col] = pd.to_numeric(df[col], errors='coerce') # convert columns to numeric values
|
||||
|
||||
df.dropna(inplace=True) # remove null values
|
||||
print(f"Rows remaining after drop of the null values: {len(df)}")
|
||||
print(f"Rows remaining after drop of the null values: {len(df)}\n")
|
||||
|
||||
# sanity checks for data validity - realistic parkinson data range estimations
|
||||
df = df[(df['age'] >= 18) & (df['age'] <= 95)]
|
||||
|
|
@ -164,12 +164,9 @@ if __name__ == "__main__":
|
|||
|
||||
# evaluation of the model
|
||||
print("\nR² on training data:", model.score(x_train, y_train))
|
||||
print("\nR² on testing data:", model.score(x_test, y_test))
|
||||
print("R² on testing data:", model.score(x_test, y_test))
|
||||
|
||||
# predict Y values using the trained data
|
||||
preds = model.predict(x_test)
|
||||
print("\nFirst 5 predictions:")
|
||||
print(preds.head())
|
||||
|
||||
print("\nWeights:")
|
||||
print(model.w.round(4))
|
||||
print("\nFirst 10 predictions:")
|
||||
print(preds.head(10))
|
||||
Loading…
Add table
Add a link
Reference in a new issue