Added Non-Linear base and Momentum

This commit is contained in:
Batuhan Berk Başoğlu 2025-09-29 12:15:54 -04:00
parent f261b06dff
commit 4ed70f6bd4
Signed by: batuhan-basoglu
SSH key fingerprint: SHA256:kEsnuHX+qbwhxSAXPUQ4ox535wFHu/hIRaa53FzxRpo
4 changed files with 48 additions and 5 deletions

View file

@ -6,8 +6,9 @@ class LinearRegression:
Constructor for the linear regression with analytical solution. It uses bias. It also Constructor for the linear regression with analytical solution. It uses bias. It also
initializes the weight, mean and standard deviation. initializes the weight, mean and standard deviation.
''' '''
def __init__(self, add_bias): def __init__(self, add_bias): # add degree as value for the polynomial features
self.add_bias = add_bias # bias to prepend a column of ones (the intercept term) self.add_bias = add_bias # bias to prepend a column of ones (the intercept term)
#self.degree = degree # degree for polynomial expansion (non-linear base)
self.w = None # weight/coefficient self.w = None # weight/coefficient
self.mean = None # used for standardisation self.mean = None # used for standardisation
self.std = None # standard deviation self.std = None # standard deviation
@ -30,8 +31,18 @@ class LinearRegression:
if self.add_bias: # adding bias if self.add_bias: # adding bias
x['bias'] = 1.0 x['bias'] = 1.0
return x '''
# applying polynomial transformation for non-linear bases
if self.degree > 1:
poly_features = pd.DataFrame()
# create polynomial features of the given degree
for col in x.columns:
for d in range(2, self.degree + 1):
poly_features[f"{col}^{d}"] = x[col] ** d
x = pd.concat([x, poly_features], axis=1)\
'''
return x
def fit(self, x: pd.DataFrame, y: pd.Series) -> "LinearRegression": def fit(self, x: pd.DataFrame, y: pd.Series) -> "LinearRegression":
''' '''
@ -218,6 +229,7 @@ if __name__ == "__main__":
# training of the model # training of the model
model = LinearRegression(add_bias=True) model = LinearRegression(add_bias=True)
#model = LinearRegression(add_bias=True, degree=2) # using polynomial degree for non-linear base calculation.
model.fit(x_train, y_train) model.fit(x_train, y_train)
# evaluation of the model # evaluation of the model

View file

@ -10,17 +10,19 @@ class LogisticRegression:
tolerance and verbose. It also initializes the weight, loss, x, y, mean and std. tolerance and verbose. It also initializes the weight, loss, x, y, mean and std.
''' '''
def __init__(self, learning_rate: float, n_iter: int, tolerance: float, verbose: bool) -> None: def __init__(self, learning_rate: float, n_iter: int, tolerance: float, verbose: bool) -> None: # add momentum as value for the gradient descent
self.lr = learning_rate self.lr = learning_rate
self.n_iter = n_iter self.n_iter = n_iter
self.tol = tolerance self.tol = tolerance
self.verbose = verbose self.verbose = verbose
#self.momentum = momentum # momentum parameter
self.w: np.ndarray | None = None # weight/coefficient (bias as first element) self.w: np.ndarray | None = None # weight/coefficient (bias as first element)
self.loss: list[float] = [] # loss per iteration self.loss: list[float] = [] # loss per iteration
self.x: np.ndarray | None = None # matrix of inputs after standardisation self.x: np.ndarray | None = None # matrix of inputs after standardisation
self.y: np.ndarray | None = None # target vector self.y: np.ndarray | None = None # target vector
self.mean: np.ndarray | None = None # used for standardisation self.mean: np.ndarray | None = None # used for standardisation
self.std: np.ndarray | None = None # standard deviation self.std: np.ndarray | None = None # standard deviation
#self.v: np.ndarray | None = None # velocity term for momentum
@staticmethod @staticmethod
def sigmoid(z: np.ndarray) -> np.ndarray: def sigmoid(z: np.ndarray) -> np.ndarray:
@ -70,12 +72,16 @@ class LogisticRegression:
if self.x is None or self.y is None: # if x or y are empty, throw error if self.x is None or self.y is None: # if x or y are empty, throw error
raise RuntimeError("Model is not fitted yet. Call `fit` first.") raise RuntimeError("Model is not fitted yet. Call `fit` first.")
#self.v = np.zeros_like(self.w) # initiating the velocity
for i in range(1, self.n_iter + 1): for i in range(1, self.n_iter + 1):
z = self.x.dot(self.w) # linear prediction z = self.x.dot(self.w) # linear prediction
p = self.sigmoid(z) # probabilities of the model predictions p = self.sigmoid(z) # probabilities of the model predictions
gradient = self.x.T.dot(p - self.y) / self.y.size # for logistic regression X^T*(p - y) gradient = self.x.T.dot(p - self.y) / self.y.size # for logistic regression X^T*(p - y)
#self.v = self.momentum * self.v + gradient # incorporating momentum
#self.w -= self.lr * self.v
self.w -= self.lr * gradient # gradient multiplied by learning rate is removed from weight self.w -= self.lr * gradient # gradient multiplied by learning rate is removed from weight
loss = self.cost(self.y, p) # cost is calculated through crossentropy and added for the current range loss = self.cost(self.y, p) # cost is calculated through crossentropy and added for the current range
@ -338,6 +344,8 @@ if __name__ == "__main__":
# training of the model # training of the model
model = LogisticRegression(learning_rate=0.00005, n_iter=5000, tolerance=1e-6, verbose=True) model = LogisticRegression(learning_rate=0.00005, n_iter=5000, tolerance=1e-6, verbose=True)
# other values could be used, for example (lr=0.01, n_iter=2000, tolerance=1e-3, verbose=False) # other values could be used, for example (lr=0.01, n_iter=2000, tolerance=1e-3, verbose=False)
#model = LogisticRegression(learning_rate=0.00005, n_iter=5000, tolerance=1e-6, verbose=True, momentum= 0.9)
# using momentum for gradient descent calculation
model.prepare(df_train, target_col="Diagnosis") model.prepare(df_train, target_col="Diagnosis")
model.fit() model.fit()

View file

@ -6,12 +6,13 @@ class LinearRegression:
Constructor for the linear regression with minibatch stochastic gradient descent. It uses learning rate, Constructor for the linear regression with minibatch stochastic gradient descent. It uses learning rate,
iteration number, batch size, bias and verbose. It also initializes the weight, mean and standard deviation. iteration number, batch size, bias and verbose. It also initializes the weight, mean and standard deviation.
''' '''
def __init__(self, lr, n_iter, batch_size, add_bias, verbose): def __init__(self, lr, n_iter, batch_size, add_bias, verbose): # add degree as value for the polynomial features
self.lr = lr # learning rate self.lr = lr # learning rate
self.n_iter = n_iter # number of gradient-descent iterations self.n_iter = n_iter # number of gradient-descent iterations
self.batch_size = batch_size # row number for each gradient step self.batch_size = batch_size # row number for each gradient step
self.add_bias = add_bias # bias to prepend a column of ones (the intercept term) self.add_bias = add_bias # bias to prepend a column of ones (the intercept term)
self.verbose = verbose # if true, prints the meansquared error every 100 iterations self.verbose = verbose # if true, prints the meansquared error every 100 iterations
#self.degree = degree # degree for polynomial expansion (non-linear base)
self.w = None # weight/coefficient self.w = None # weight/coefficient
self.mean = None # used for standardisation self.mean = None # used for standardisation
self.std = None # standard deviation self.std = None # standard deviation
@ -33,6 +34,17 @@ class LinearRegression:
if self.add_bias: # adding bias if self.add_bias: # adding bias
x['bias'] = 1.0 x['bias'] = 1.0
'''
# applying polynomial transformation for non-linear bases
if self.degree > 1:
poly_features = pd.DataFrame()
# create polynomial features of the given degree
for col in x.columns:
for d in range(2, self.degree + 1):
poly_features[f"{col}^{d}"] = x[col] ** d
x = pd.concat([x, poly_features], axis=1)
'''
return x return x
@ -213,6 +225,8 @@ if __name__ == "__main__":
# training of the model # training of the model
model = LinearRegression(lr=0.0001, n_iter=5000, batch_size=64, add_bias=True, verbose=True) model = LinearRegression(lr=0.0001, n_iter=5000, batch_size=64, add_bias=True, verbose=True)
# other values could be used, for example (lr=0.01, n_iter=2000, batch_size=None, add_bias=True, verbose=False) # other values could be used, for example (lr=0.01, n_iter=2000, batch_size=None, add_bias=True, verbose=False)
#model = LinearRegression(lr=0.0001, n_iter=5000, batch_size=64, add_bias=True, verbose=True, degree=2)
# using polynomial degree for non-linear base calculation.
model.fit(x_train, y_train) model.fit(x_train, y_train)
# evaluation of the model # evaluation of the model

View file

@ -6,18 +6,20 @@ class LogisticRegression:
Constructor for the logistic regression with gradient descent. It uses learning rate, iteration number, Constructor for the logistic regression with gradient descent. It uses learning rate, iteration number,
tolerance and verbose. It also initializes the weight, loss, x, y, mean and std. tolerance and verbose. It also initializes the weight, loss, x, y, mean and std.
''' '''
def __init__(self, learning_rate: float, n_iter: int, batch_size: int, tolerance: float, verbose: bool) -> None: def __init__(self, learning_rate: float, n_iter: int, batch_size: int, tolerance: float, verbose: bool) -> None: # add momentum as value for the gradient descent
self.lr = learning_rate self.lr = learning_rate
self.n_iter = n_iter self.n_iter = n_iter
self.batch_size = batch_size self.batch_size = batch_size
self.tol = tolerance self.tol = tolerance
self.verbose = verbose self.verbose = verbose
#self.momentum = momentum # momentum parameter
self.w: np.ndarray | None = None # weight/coefficient (bias as first element) self.w: np.ndarray | None = None # weight/coefficient (bias as first element)
self.loss: list[float] = [] # loss per iteration self.loss: list[float] = [] # loss per iteration
self.x: np.ndarray | None = None # matrix of inputs after standardisation self.x: np.ndarray | None = None # matrix of inputs after standardisation
self.y: np.ndarray | None = None # target vector self.y: np.ndarray | None = None # target vector
self.mean: np.ndarray | None = None # used for standardisation self.mean: np.ndarray | None = None # used for standardisation
self.std: np.ndarray | None = None # standard deviation self.std: np.ndarray | None = None # standard deviation
#self.v: np.ndarray | None = None # velocity term for momentum
@staticmethod @staticmethod
def sigmoid(z: np.ndarray) -> np.ndarray: def sigmoid(z: np.ndarray) -> np.ndarray:
@ -75,6 +77,8 @@ class LogisticRegression:
# number of batches per iteration # number of batches per iteration
n_batches = int(np.ceil(n_samples / batch_size)) n_batches = int(np.ceil(n_samples / batch_size))
#self.v = np.zeros_like(self.w) # initiating the velocity
for epoch in range(1, self.n_iter + 1): for epoch in range(1, self.n_iter + 1):
shuffled_idx = np.random.permutation(n_samples) # random permutation of the indices shuffled_idx = np.random.permutation(n_samples) # random permutation of the indices
for b in range(n_batches): for b in range(n_batches):
@ -90,6 +94,9 @@ class LogisticRegression:
p = self.sigmoid(z) # probabilities of the model predictions p = self.sigmoid(z) # probabilities of the model predictions
grad = x_batch.T.dot(p - y_batch) / y_batch.size # for logistic regression X^T*(p - y) grad = x_batch.T.dot(p - y_batch) / y_batch.size # for logistic regression X^T*(p - y)
#self.v = self.momentum * self.v + grad # incorporating momentum
#self.w -= self.lr * self.v
self.w -= self.lr * grad # gradient multiplied by learning rate is removed from weight self.w -= self.lr * grad # gradient multiplied by learning rate is removed from weight
# cost is calculated through crossentropy and added for the current range # cost is calculated through crossentropy and added for the current range
@ -245,6 +252,8 @@ if __name__ == "__main__":
# training of the model # training of the model
model = LogisticRegression(learning_rate=0.00005, n_iter=5000, batch_size=64, tolerance=1e-6, verbose=True) model = LogisticRegression(learning_rate=0.00005, n_iter=5000, batch_size=64, tolerance=1e-6, verbose=True)
# other values could be used, for example (lr=0.01, n_iter=2000, tolerance=1e-3, verbose=False) # other values could be used, for example (lr=0.01, n_iter=2000, tolerance=1e-3, verbose=False)
#model = LogisticRegression(learning_rate=0.00005, n_iter=5000, batch_size=64, tolerance=1e-6, verbose=True, momentum= 0.9)
# using momentum for gradient descent calculation
model.prepare(df_train, target_col="Diagnosis") model.prepare(df_train, target_col="Diagnosis")
model.fit() model.fit()