Finished the implementation of the python code.

2025-09-18 20:58:02 -04:00 · 2025-09-18 20:58:02 -04:00 · 455b48c89b
commit 455b48c89b
parent 5702c3c1b8
6 changed files with 540 additions and 159 deletions
--- a/linear-regression-parkinsons.py
+++ b/linear-regression-parkinsons.py
@ -27,8 +27,8 @@ class LinearRegression:

 class LinearRegression:
    '''
-        Constructor for the Linear Regression with analytical. It uses bias. It also
-        initializes the weight, mean and std.
+        Constructor for the linear regression with analytical solution. It uses bias. It also
+        initializes the weight, mean and standard deviation.
    '''
    def __init__(self, add_bias):
        self.add_bias = add_bias  # bias to prepend a column of ones (the intercept term)
@ -60,7 +60,8 @@ class LinearRegression:
    def fit(self, x: pd.DataFrame, y: pd.Series) -> "LinearRegression":
        '''
            Fit method to fit X and Y datas through pandas and train the linear model by analytical solution.
-            It uses pandas DataFrame for the X and Series for the Y.
+            It uses pandas DataFrame for the X and Series for the Y. It uses the linear regression formula
+            to calculate weight
        '''
        x = self.prepare(x)
        y = pd.Series(y).astype("float64")
@ -84,7 +85,7 @@ class LinearRegression:

    def predict(self, x: pd.DataFrame) -> pd.Series:
        '''
-            Predict method is used to test trained data to do X prediction by multiplying X and weight vectors.
+            Predict method is used to test trained data to do Y prediction by multiplying X and weight vectors.
        '''
        if self.w is None:  # if weight is empty, throw error
            raise RuntimeError("Model is not fitted yet. Call `fit` first.")
@ -95,7 +96,7 @@ class LinearRegression:
    def score(self, x: pd.DataFrame, y: pd.Series) -> float:
        '''
            This method is used to calculate coefficient of determination to assess the goodness
-            of fit from a regression model
+            of fit from the linear regression model
        '''
        y_pred = self.predict(x)  # predicts Y value with X predict method.
        y = pd.Series(y).astype('float64')
@ -127,7 +128,7 @@ if __name__ == "__main__":
        df[col] = pd.to_numeric(df[col], errors='coerce') # convert columns to numeric values

    df.dropna(inplace=True) # remove null values
-    print(f"Rows remaining after drop of the null values: {len(df)}")
+    print(f"Rows remaining after drop of the null values: {len(df)}\n")

    # sanity checks for data validity - realistic parkinson data range estimations
    df = df[(df['age'] >= 18) & (df['age'] <= 95)]
@ -157,12 +158,9 @@ if __name__ == "__main__":

    # evaluation of the model
    print("\nR² on training data:", model.score(x_train, y_train))
-    print("\nR² on testing data:", model.score(x_test, y_test))
+    print("R² on testing data:", model.score(x_test, y_test))

    # predict Y values using the trained data
    preds = model.predict(x_test)
-    print("\nFirst 5 predictions:")
-    print(preds.head())
-
-    print("\nWeights:")
-    print(model.w.round(4))
+    print("\nFirst 10 predictions:")
+    print(preds.head(10))
--- a/logistic-regression-wdbc.py
+++ b/logistic-regression-wdbc.py
@ -1,144 +1,126 @@
 import numpy as np
 import pandas as pd

-class LogisticRegressionGD:
-    """Binary logistic regression trained with batch gradient descent."""
-    def __init__(self,
-                 learning_rate: float = 0.01,
-                 n_iter: int = 1000,
-                 tolerance: float = 1e-5,
-                 verbose: bool = False):
-        """
-        Parameters
-        ----------
-        learning_rate : float
-            Step size for weight updates.
-        n_iter : int
-            Maximum number of iterations.
-        tolerance : float
-            Stopping criterion: if the change in loss is < tolerance, stop.
-        verbose : bool
-            If True, prints loss at every 100 iterations.
-        """
+
+class LogisticRegression:
+    '''
+        Constructor for the logistic regression with gradient descent. It uses learning rate, iteration number,
+        tolerance and verbose. It also initializes the weight, loss, x, y, mean and std.
+    '''
+
+    def __init__(self, learning_rate: float, n_iter: int, tolerance: float, verbose: bool) -> None:
        self.lr = learning_rate
        self.n_iter = n_iter
        self.tol = tolerance
        self.verbose = verbose
+        self.w: np.ndarray | None = None         # weight/coefficient (bias as first element)
+        self.loss: list[float] = []              # loss per iteration
+        self.x: np.ndarray | None = None         # matrix of inputs after standardisation
+        self.y: np.ndarray | None = None         # target vector
+        self.mean: np.ndarray | None = None      # used for standardisation
+        self.std: np.ndarray | None = None       # standard deviation

-        # placeholders that will be filled during training
-        self.w_ = None          # weights (including bias as w[0])
-        self.loss_history_ = [] # loss at each iteration
-        self.X_ = None          # feature matrix (after standardisation)
-        self.y_ = None          # target vector (0/1)
-
-    # ------------------------------------------------------------------
-    # 2. Sigmoid helper (vectorised)
-    # ------------------------------------------------------------------
    @staticmethod
-    def _sigmoid(z: np.ndarray) -> np.ndarray:
-        return 1.0 / (1.0 + np.exp(-z))
+    def sigmoid(z: np.ndarray) -> np.ndarray:
+        """Sigmoid method for the logistic regression method."""
+        return 1.0 / (1.0 + np.exp(-z)) # 1/(1+exp(-z))

-    # ------------------------------------------------------------------
-    # 3. Cost function (cross‑entropy)
-    # ------------------------------------------------------------------
    @staticmethod
-    def _cost(y: np.ndarray, p: np.ndarray) -> float:
-        # avoid log(0) by clipping
+    def cost(y: np.ndarray, p: np.ndarray) -> float:
+        """Cross‑entropy loss is used for the cost calculation"""
        eps = 1e-15
        p = np.clip(p, eps, 1 - eps)
        return -np.mean(y * np.log(p) + (1 - y) * np.log(1 - p))

-    # ------------------------------------------------------------------
-    # 4. Data preparation – this is where we split X / y, scale, etc.
-    # ------------------------------------------------------------------
-    def prepare(self, df: pd.DataFrame, target_col: str = 'Diagnosis') -> None:
+    def prepare(self, df: pd.DataFrame, target_col: str) -> None:
        """
-        Splits `df` into X and y, standardises X (mean=0, std=1),
-        and stores the result in the class attributes.

-        Parameters
-        ----------
-        df : pd.DataFrame
-            Cleaned data – *already* contains a numeric target in `target_col`.
-        target_col : str
-            Name of the binary target column.
+        Preparation method splits df into x and y. It does define X and Y values from the dataframe and target column.
+        Then it does standardisation, adds bias and initializes the weight/coefficient.
+
        """
-        # target must be a 0/1 array
-        self.y_ = df[target_col].values.astype(np.int64)
+        if target_col not in df.columns:
+            raise ValueError(f"Target column '{target_col}' not found in DataFrame.")

-        # X – all columns except the target
-        X_raw = df.drop(columns=[target_col]).values.astype(np.float64)
+        self.y = df[target_col].values.astype(np.int64)

-        # -----------------------------------------------------------------
-        # 3.1  Feature scaling – we put the bias in the first column
-        # -----------------------------------------------------------------
-        # compute mean / std on the whole training set (no train/val split yet)
-        self.mean_ = X_raw.mean(axis=0)
-        self.std_ = X_raw.std(axis=0)
-        # avoid division by zero
-        self.std_[self.std_ == 0] = 1.0
+        x_raw = df.drop(columns=[target_col]).values.astype(np.float64)

-        X_scaled = (X_raw - self.mean_) / self.std_
-        # add bias column (all ones)
-        X_scaled = np.hstack([np.ones((X_scaled.shape[0], 1)), X_scaled])
+        # standardisation
+        self.mean = x_raw.mean(axis=0)
+        self.std = x_raw.std(axis=0)
+        self.std[self.std == 0] = 1.0

-        self.X_ = X_scaled
-        self.w_ = np.zeros(X_scaled.shape[1])  # initialise weights
+        x_scaled = (x_raw - self.mean) / self.std  # standardisation formula
+
+
+        bias = np.ones((x_scaled.shape[0], 1), dtype=np.float64)  # adding bias
+        self.x = np.hstack((bias, x_scaled))
+
+        self.w = np.zeros(self.x.shape[1], dtype=np.float64) # initialize weight as zero

-    # ------------------------------------------------------------------
-    # 4. Fit – batch gradient descent
-    # ------------------------------------------------------------------
    def fit(self) -> None:
-        """Runs batch gradient descent for `n_iter` epochs."""
+        """
+
+        Fit method to fit X and Y datas through pandas and train the linear model by gradient descent.
+        For the n iterations, it finds probabilities through sigmoid of linear prediction and does the
+        gradient to calculate the loss.
+
+        """
+        if self.x is None or self.y is None: # if x or y are empty, throw error
+            raise RuntimeError("Model is not fitted yet. Call `fit` first.")
+
        for i in range(1, self.n_iter + 1):
-            z = np.dot(self.X_, self.w_)          # linear part
-            p = self._sigmoid(z)                   # predicted probabilities
+            z = self.x.dot(self.w) # linear prediction
+            p = self.sigmoid(z) # probabilities of the model predictions

-            # gradient of the log‑likelihood (including bias)
-            gradient = np.dot(self.X_.T, (p - self.y_)) / self.y_.size
+            gradient = self.x.T.dot(p - self.y) / self.y.size # gradient calculation formula

-            # weight update
-            self.w_ -= self.lr * gradient
+            self.w -= self.lr * gradient # gradient multiplied by learning rate is removed from weight

-            # record cost and check stopping criterion
-            loss = self._cost(self.y_, p)
-            self.loss_history_.append(loss)
+            loss = self.cost(self.y, p) # cost is calculated through cross‑entropy and added for the current range
+            self.loss.append(loss)

+            # if verbose, it shows the loss every 100 iterations and displays it
            if self.verbose and i % 100 == 0:
-                print(f"Iteration {i:4d} – loss: {loss:.6f}")
+                print(f"Iter {i:4d} – loss: {loss:.6f}")

-            if i > 1 and abs(self.loss_history_[-2] - loss) < self.tol:
+            # tests whether the absolute change in loss is smaller than the tolerance
+            if i > 1 and abs(self.loss[-2] - loss) < self.tol:
                if self.verbose:
                    print(f"Converged after {i} iterations.")
-                break
+                break # loss is stopped so further training would be unnecessary

-    # ------------------------------------------------------------------
-    # 5. Predict – binary class labels
-    # ------------------------------------------------------------------
-    def predict(self, X: np.ndarray) -> np.ndarray:
-        """Return 0/1 predictions for a new X matrix (already scaled)."""
-        z = np.dot(X, self.w_)
-        probs = self._sigmoid(z)
-        return (probs >= 0.5).astype(int)
-
-    # ------------------------------------------------------------------
-    # 6. Score – accuracy on a given (X, y) pair
-    # ------------------------------------------------------------------
-    def score(self, X: np.ndarray, y: np.ndarray) -> float:
-        """Return the classification accuracy."""
-        y_pred = self.predict(X)
-        return np.mean(y_pred == y)
+    def predict(self, x: np.ndarray | pd.DataFrame) -> np.ndarray:
+        """
+            Predict method is used to test trained data to do Y prediction by multiplying X and weight vectors
+            and then calculates the model probability by applying sigmoid function.
+        """
+        if isinstance(x, pd.DataFrame): # verifies value type
+            x = x.values.astype(np.float64)
+        if x.ndim == 1:
+            x = x.reshape(1, -1)
+        z = x.dot(self.w)
+        probs = self.sigmoid(z) # probability calculation through sigmoid method
+        return (probs >= 0.5).astype(int) # 0.5 is commonly used to define positivity of the probability

+    def score(self, x: np.ndarray | pd.DataFrame, y: np.ndarray | pd.Series) -> float:
+        """
+            This method is used to calculate mean accuracy with the prediction of Y and actual Y values.
+        """
+        y_pred = self.predict(x)
+        y_true = np.asarray(y).astype(int)
+        return np.mean(y_pred == y_true) # mean is calculated if Y values match

 if __name__ == "__main__":
    columns = [
        'ID', 'Diagnosis',
        'radius_mean', 'texture_mean', 'perimeter_mean', 'area_mean', 'smoothness_mean',
-        'compactness_mean', 'concavity_mean', 'concave_points_mean', 'symmetry_mean', 'fractal_dimension_mean',
+        'compactness_mean', 'concavitymean', 'concave_points_mean', 'symmetrymean', 'fractal_dimension_mean',
        'radius_se', 'texture_se', 'perimeter_se', 'area_se', 'smoothness_se',
-        'compactness_se', 'concavity_se', 'concave_points_se', 'symmetry_se', 'fractal_dimension_se',
+        'compactness_se', 'concavityse', 'concave_points_se', 'symmetryse', 'fractal_dimension_se',
        'radius_worst', 'texture_worst', 'perimeter_worst', 'area_worst', 'smoothness_worst',
-        'compactness_worst', 'concavity_worst', 'concave_points_worst', 'symmetry_worst', 'fractal_dimension_worst'
+        'compactness_worst', 'concavityworst', 'concave_points_worst', 'symmetryworst', 'fractal_dimension_worst'
    ]

    df = pd.read_csv('wdbc.data', header=None, names=columns, dtype=str)
@ -155,7 +137,7 @@ if __name__ == "__main__":
        df[col] = pd.to_numeric(df[col], errors='coerce') # convert columns to numeric values

    df.dropna(inplace=True) # remove null values
-    print(f"Rows remaining after drop of the null values: {len(df)}")
+    print(f"Rows remaining after drop of the null values: {len(df)}\n")
    for col in num_cols:
        df = df[df[col] >= 0]

@ -172,33 +154,40 @@ if __name__ == "__main__":
    # check if there are still null values
    assert df.isna().sum().sum() == 0, "There are still some null values."

-    df['Diagnosis'] = df['Diagnosis'].map({'M': 1, 'B': 0})  # making diagnosis numeric
-    df['Diagnosis'] = df['Diagnosis'].astype('category')
+    # making diagnosis numeric
+    df["Diagnosis"] = df["Diagnosis"].map({"M": 1, "B": 0}).astype("category")

-    # ---- 7.2  Instantiate and train ------------------------------------
-    model = LogisticRegressionGD(learning_rate=0.05,
-                                 n_iter=5000,
-                                 tolerance=1e-6,
-                                 verbose=True)
+    rng = np.random.default_rng(seed=42)
+    n_train = len(df)
+    indices = rng.permutation(n_train)
+    train_size = int(0.8 * n_train)

-    # we need to split X / y here
-    X = df.drop(columns=['Diagnosis'])
-    y = df['Diagnosis'].cat.codes.values   # 0/1 array
+    train_idx = indices[:train_size]
+    test_idx = indices[train_size:]

-    # Standardise X inside the model for us – we’ll do it in `prepare`
-    model.X_ = (X - X.mean()) / X.std()          # bias‑column will be added later
-    model.X_ = np.hstack([np.ones((model.X_.shape[0], 1)), model.X_])  # add bias
-    model.y_ = y
+    df_train = df.iloc[train_idx].reset_index(drop=True)
+    df_test = df.iloc[test_idx].reset_index(drop=True)

-    # Fit the model
+    # training of the model
+    model = LogisticRegression(learning_rate=0.00005, n_iter=5000, tolerance=1e-6, verbose=True)
+    # other values could be used, for example (lr=0.01, n_iter=2000, tolerance=1e-3, verbose=False)
+    model.prepare(df_train, target_col="Diagnosis")
    model.fit()

-    # -------------------------------------------------
-    # 8. Evaluate on the same data (you could split)
-    # -------------------------------------------------
-    acc = model.score(model.X_, model.y_)
-    print(f"Training accuracy (on the whole cleaned set): {acc:.4f}")
+    # evaluation of the model
+    train_acc = model.score(model.x, model.y)
+    print(f"\nMean accuracy on training data: {train_acc:.4f}")

-    # Example: predict on the first 10 samples
-    y_hat = model.predict(model.X_[:10])
-    print("First 10 predictions:", y_hat)
+    # copied prepare method for building test X data
+    x_test_raw = df_test.drop(columns=['Diagnosis']).values.astype(np.float64)
+    x_test_scaled = (x_test_raw - model.mean) / model.std
+    bias_test = np.ones((x_test_scaled.shape[0], 1), dtype=np.float64)
+    X_test = np.hstack((bias_test, x_test_scaled))
+    y_test = df_test['Diagnosis'].values.astype(int)
+    test_acc = model.score(X_test, y_test)
+    print(f"Mean accuracy on testing data: {test_acc:.4f}")
+
+    # predict Y values using the trained data
+    first_10 = X_test[:10]
+    y_hat = model.predict(first_10)
+    print("\nFirst 10 predictions:", y_hat.ravel())
--- a/mini-batch-sgd-linear-regression-parkinsons.py
+++ b/mini-batch-sgd-linear-regression-parkinsons.py
@ -3,8 +3,8 @@ import pandas as pd

 class LinearRegression:
    '''
-        Constructor for the Linear Regression with mini‑batch stochastic gradient descent. It uses learning rate,
-        iteration number, batch size, bias and verbose. It also initializes the weight, mean and std.
+        Constructor for the linear regression with mini‑batch stochastic gradient descent. It uses learning rate,
+        iteration number, batch size, bias and verbose. It also initializes the weight, mean and standard deviation.
    '''
    def __init__(self, lr, n_iter, batch_size, add_bias, verbose):
        self.lr = lr  # learning rate
@ -90,7 +90,7 @@ class LinearRegression:

    def predict(self, x: pd.DataFrame) -> pd.Series:
        '''
-            Predict method makes X prediction by multiplying X and weight vectors.
+            Predict method is used to test trained data to do Y prediction by multiplying X and weight vectors.
        '''
        if self.w is None:  # if weight is empty, throw error
            raise RuntimeError("Model is not fitted yet. Call `fit` first.")
@ -101,7 +101,7 @@ class LinearRegression:
    def score(self, x: pd.DataFrame, y: pd.Series) -> float:
        '''
            This method is used to calculate coefficient of determination to assess the goodness
-            of fit from a regression model
+            of fit from the linear regression model
        '''
        y_pred = self.predict(x)  # predicts Y value with X predict method.
        y = pd.Series(y).astype('float64')
@ -133,7 +133,7 @@ if __name__ == "__main__":
        df[col] = pd.to_numeric(df[col], errors='coerce') # convert columns to numeric values

    df.dropna(inplace=True) # remove null values
-    print(f"Rows remaining after drop of the null values: {len(df)}")
+    print(f"Rows remaining after drop of the null values: {len(df)}\n")

    # sanity checks for data validity - realistic parkinson data range estimations
    df = df[(df['age'] >= 18) & (df['age'] <= 95)]
@ -164,12 +164,9 @@ if __name__ == "__main__":

    # evaluation of the model
    print("\nR² on training data:", model.score(x_train, y_train))
-    print("\nR² on testing data:", model.score(x_test, y_test))
+    print("R² on testing data:", model.score(x_test, y_test))

    # predict Y values using the trained data
    preds = model.predict(x_test)
-    print("\nFirst 5 predictions:")
-    print(preds.head())
-
-    print("\nWeights:")
-    print(model.w.round(4))
+    print("\nFirst 10 predictions:")
+    print(preds.head(10))
--- a/mini-batch-sgd-logistic-regression-wdbc.py
+++ b/mini-batch-sgd-logistic-regression-wdbc.py
@ -1,28 +1,143 @@
 import numpy as np
 import pandas as pd

-'''
+
 class LogisticRegression:
-    def __init__(self):
-
-    def prepare(self):
-
-    def fit(self):
-
-    def predict(self):
-
-    def score(self):
    '''
+        Constructor for the logistic regression with gradient descent. It uses learning rate, iteration number,
+        tolerance and verbose. It also initializes the weight, loss, x, y, mean and std.
+    '''
+
+    def __init__(self, learning_rate: float, n_iter: int, batch_size: int, tolerance: float, verbose: bool) -> None:
+        self.lr = learning_rate
+        self.n_iter = n_iter
+        self.batch_size = batch_size
+        self.tol = tolerance
+        self.verbose = verbose
+        self.w: np.ndarray | None = None         # weight/coefficient (bias as first element)
+        self.loss: list[float] = []              # loss per iteration
+        self.x: np.ndarray | None = None         # matrix of inputs after standardisation
+        self.y: np.ndarray | None = None         # target vector
+        self.mean: np.ndarray | None = None      # used for standardisation
+        self.std: np.ndarray | None = None       # standard deviation
+
+    @staticmethod
+    def sigmoid(z: np.ndarray) -> np.ndarray:
+        """Sigmoid method for the logistic regression method."""
+        return 1.0 / (1.0 + np.exp(-z)) # 1/(1+exp(-z))
+
+    @staticmethod
+    def cost(y: np.ndarray, p: np.ndarray) -> float:
+        """Cross‑entropy loss is used for the cost calculation"""
+        eps = 1e-15
+        p = np.clip(p, eps, 1 - eps)
+        return -np.mean(y * np.log(p) + (1 - y) * np.log(1 - p))
+
+    def prepare(self, df: pd.DataFrame, target_col: str) -> None:
+        """
+
+        Preparation method splits df into x and y. It does define X and Y values from the dataframe and target column.
+        Then it does standardisation, adds bias and initializes the weight/coefficient.
+
+        """
+        if target_col not in df.columns:
+            raise ValueError(f"Target column '{target_col}' not found in DataFrame.")
+
+        self.y = df[target_col].values.astype(np.int64)
+
+        x_raw = df.drop(columns=[target_col]).values.astype(np.float64)
+
+        # standardisation
+        self.mean = x_raw.mean(axis=0)
+        self.std = x_raw.std(axis=0)
+        self.std[self.std == 0] = 1.0
+
+        x_scaled = (x_raw - self.mean) / self.std  # standardisation formula
+
+
+        bias = np.ones((x_scaled.shape[0], 1), dtype=np.float64)  # adding bias
+        self.x = np.hstack((bias, x_scaled))
+
+        self.w = np.zeros(self.x.shape[1], dtype=np.float64) # initialize weight as zero
+
+    def fit(self) -> None:
+        """
+
+        Fit method to fit X and Y datas through pandas and train the linear model by gradient descent.
+        For the n iterations, it finds probabilities through sigmoid of linear prediction and does the
+        gradient to calculate the loss.
+
+        """
+        if self.x is None or self.y is None: # if x or y are empty, throw error
+            raise RuntimeError("Model is not fitted yet. Call `prepare` first.")
+
+        n_samples = self.x.shape[0]
+        batch_size = self.batch_size or n_samples
+
+        for epoch in range(1, self.n_iter + 1):
+            shuffled_idx = np.random.permutation(n_samples) # random permutation of the indices
+            x_shuffled = self.x[shuffled_idx]
+            y_shuffled = self.y[shuffled_idx]
+
+            # process execution for each mini‑batch
+            for b in range(0, n_samples, batch_size):
+                start = b * batch_size
+                end   = start + batch_size
+                idx = shuffled_idx[start:end]
+
+                x_batch = x_shuffled[idx]
+                y_batch = y_shuffled[idx]
+
+                z = x_batch.dot(self.w)
+                p = self.sigmoid(z)
+
+                grad = x_batch.T.dot(p - y_batch) / y_batch.size # gradient calculation formula
+                self.w -= self.lr * grad # gradient multiplied by learning rate is removed from weight
+
+            # cost is calculated through cross‑entropy and added for the current range
+            loss = self.cost(self.y, self.sigmoid(self.x.dot(self.w)))
+            self.loss.append(loss)
+
+            # if verbose, it shows the loss every 100 iterations and displays it
+            if self.verbose and epoch % 100 == 0:
+                print(f"Iter {epoch:4d} – loss: {loss:.6f}")
+
+            # tests whether the absolute change in loss is smaller than the tolerance
+            if epoch > 1 and abs(self.loss[-2] - loss) < self.tol:
+                if self.verbose:
+                    print(f"Converged after {epoch} iterations.")
+                break
+
+    def predict(self, x: np.ndarray | pd.DataFrame) -> np.ndarray:
+        """
+            Predict method is used to test trained data to do Y prediction by multiplying X and weight vectors
+            and then calculates the model probability by applying sigmoid function.
+        """
+        if isinstance(x, pd.DataFrame): # verifies value type
+            x = x.values.astype(np.float64)
+        if x.ndim == 1:
+            x = x.reshape(1, -1)
+        z = x.dot(self.w)
+        probs = self.sigmoid(z) # probability calculation through sigmoid method
+        return (probs >= 0.5).astype(int) # 0.5 is commonly used to define positivity of the probability
+
+    def score(self, x: np.ndarray | pd.DataFrame, y: np.ndarray | pd.Series) -> float:
+        """
+            This method is used to calculate mean accuracy with the prediction of Y and actual Y values.
+        """
+        y_pred = self.predict(x)
+        y_true = np.asarray(y).astype(int)
+        return np.mean(y_pred == y_true) # mean is calculated if Y values match

 if __name__ == "__main__":
    columns = [
        'ID', 'Diagnosis',
        'radius_mean', 'texture_mean', 'perimeter_mean', 'area_mean', 'smoothness_mean',
-        'compactness_mean', 'concavity_mean', 'concave_points_mean', 'symmetry_mean', 'fractal_dimension_mean',
+        'compactness_mean', 'concavitymean', 'concave_points_mean', 'symmetrymean', 'fractal_dimension_mean',
        'radius_se', 'texture_se', 'perimeter_se', 'area_se', 'smoothness_se',
-        'compactness_se', 'concavity_se', 'concave_points_se', 'symmetry_se', 'fractal_dimension_se',
+        'compactness_se', 'concavityse', 'concave_points_se', 'symmetryse', 'fractal_dimension_se',
        'radius_worst', 'texture_worst', 'perimeter_worst', 'area_worst', 'smoothness_worst',
-        'compactness_worst', 'concavity_worst', 'concave_points_worst', 'symmetry_worst', 'fractal_dimension_worst'
+        'compactness_worst', 'concavityworst', 'concave_points_worst', 'symmetryworst', 'fractal_dimension_worst'
    ]

    df = pd.read_csv('wdbc.data', header=None, names=columns, dtype=str)
@ -39,7 +154,7 @@ if __name__ == "__main__":
        df[col] = pd.to_numeric(df[col], errors='coerce') # convert columns to numeric values

    df.dropna(inplace=True) # remove null values
-    print(f"Rows remaining after drop of the null values: {len(df)}")
+    print(f"Rows remaining after drop of the null values: {len(df)}\n")
    for col in num_cols:
        df = df[df[col] >= 0]

@ -56,5 +171,40 @@ if __name__ == "__main__":
    # check if there are still null values
    assert df.isna().sum().sum() == 0, "There are still some null values."

-    df['Diagnosis'] = df['Diagnosis'].map({'M': 1, 'B': 0})  # making diagnosis numeric
-    df['Diagnosis'] = df['Diagnosis'].astype('category')
+    # making diagnosis numeric
+    df["Diagnosis"] = df["Diagnosis"].map({"M": 1, "B": 0}).astype("category")
+
+    rng = np.random.default_rng(seed=42)
+    n_samples = len(df)
+    indices = rng.permutation(n_samples)
+    train_size = int(0.8 * n_samples)
+
+    train_idx = indices[:train_size]
+    test_idx = indices[train_size:]
+
+    df_train = df.iloc[train_idx].reset_index(drop=True)
+    df_test = df.iloc[test_idx].reset_index(drop=True)
+
+    # training of the model
+    model = LogisticRegression(learning_rate=0.00005, n_iter=5000, batch_size=64, tolerance=1e-6, verbose=True)
+    # other values could be used, for example (lr=0.01, n_iter=2000, tolerance=1e-3, verbose=False)
+    model.prepare(df_train, target_col="Diagnosis")
+    model.fit()
+
+    # evaluation of the model
+    train_acc = model.score(model.x, model.y)
+    print(f"\nMean accuracy on training data: {train_acc:.4f}")
+
+    # copied prepare method for building test X data
+    x_test_raw = df_test.drop(columns=['Diagnosis']).values.astype(np.float64)
+    x_test_scaled = (x_test_raw - model.mean) / model.std
+    bias_test = np.ones((x_test_scaled.shape[0], 1), dtype=np.float64)
+    X_test = np.hstack((bias_test, x_test_scaled))
+    y_test = df_test['Diagnosis'].values.astype(int)
+    test_acc = model.score(X_test, y_test)
+    print(f"Mean accuracy on testing data: {test_acc:.4f}")
+
+    # predict Y values using the trained data
+    first_10 = X_test[:10]
+    y_hat = model.predict(first_10)
+    print("\nFirst 10 predictions:", y_hat.ravel())
--- a/parkinsons_updrs.names
+++ b/parkinsons_updrs.names
@ -0,0 +1,107 @@
+Parkinsons Telemonitoring Data Set  
+
+Abstract: Oxford Parkinson's Disease Telemonitoring Dataset
+
+============================================================
+
+Data Set Characteristics:  Multivariate
+Attribute Characteristics:  Integer, Real
+Associated Tasks:  Regression
+Number of Instances:  5875
+Number of Attributes:  26
+Area:  Life
+Date Donated:  2009-10-29
+
+============================================================
+
+SOURCE:
+
+The dataset was created by Athanasios Tsanas (tsanasthanasis '@' gmail.com) 
+and Max Little (littlem '@' physics.ox.ac.uk) of the University of Oxford, in 
+collaboration with 10 medical centers in the US and Intel Corporation who 
+developed the telemonitoring device to record the speech signals. The 
+original study used a range of linear and nonlinear regression methods to 
+predict the clinician's Parkinson's disease symptom score on the UPDRS scale.
+
+
+============================================================
+
+DATA SET INFORMATION:
+
+This dataset is composed of a range of biomedical voice measurements from 42 
+people with early-stage Parkinson's disease recruited to a six-month trial of 
+a telemonitoring device for remote symptom progression monitoring. The 
+recordings were automatically captured in the patient's homes.
+
+Columns in the table contain subject number, subject age, subject gender, 
+time interval from baseline recruitment date, motor UPDRS, total UPDRS, and 
+16 biomedical voice measures. Each row corresponds to one of 5,875 voice 
+recording from these individuals. The main aim of the data is to predict the 
+motor and total UPDRS scores ('motor_UPDRS' and 'total_UPDRS') from the 16 
+voice measures.
+
+The data is in ASCII CSV format. The rows of the CSV file contain an instance 
+corresponding to one voice recording. There are around 200 recordings per 
+patient, the subject number of the patient is identified in the first column. 
+For further information or to pass on comments, please contact Athanasios 
+Tsanas (tsanasthanasis '@' gmail.com) or Max Little (littlem '@' 
+physics.ox.ac.uk).
+
+Further details are contained in the following reference -- if you use this 
+dataset, please cite:
+Athanasios Tsanas, Max A. Little, Patrick E. McSharry, Lorraine O. Ramig (2009),
+'Accurate telemonitoring of Parkinson.s disease progression by non-invasive 
+speech tests',
+IEEE Transactions on Biomedical Engineering (to appear).
+
+Further details about the biomedical voice measures can be found in:
+Max A. Little, Patrick E. McSharry, Eric J. Hunter, Lorraine O. Ramig (2009),
+'Suitability of dysphonia measurements for telemonitoring of Parkinson's 
+disease',
+IEEE Transactions on Biomedical Engineering, 56(4):1015-1022 
+
+ 
+===========================================================
+
+ATTRIBUTE INFORMATION:
+
+subject# - Integer that uniquely identifies each subject
+age - Subject age
+sex - Subject gender '0' - male, '1' - female
+test_time - Time since recruitment into the trial. The integer part is the 
+number of days since recruitment.
+motor_UPDRS - Clinician's motor UPDRS score, linearly interpolated
+total_UPDRS - Clinician's total UPDRS score, linearly interpolated
+Jitter(%),Jitter(Abs),Jitter:RAP,Jitter:PPQ5,Jitter:DDP - Several measures of 
+variation in fundamental frequency
+Shimmer,Shimmer(dB),Shimmer:APQ3,Shimmer:APQ5,Shimmer:APQ11,Shimmer:DDA - 
+Several measures of variation in amplitude
+NHR,HNR - Two measures of ratio of noise to tonal components in the voice
+RPDE - A nonlinear dynamical complexity measure
+DFA - Signal fractal scaling exponent
+PPE - A nonlinear measure of fundamental frequency variation 
+
+
+===========================================================
+
+RELEVANT PAPERS:
+
+Little MA, McSharry PE, Hunter EJ, Ramig LO (2009),
+'Suitability of dysphonia measurements for telemonitoring of Parkinson's 
+disease',
+IEEE Transactions on Biomedical Engineering, 56(4):1015-1022
+
+Little MA, McSharry PE, Roberts SJ, Costello DAE, Moroz IM.
+'Exploiting Nonlinear Recurrence and Fractal Scaling Properties for Voice 
+Disorder Detection',
+BioMedical Engineering OnLine 2007, 6:23 (26 June 2007) 
+
+===========================================================
+
+CITATION REQUEST:
+
+If you use this dataset, please cite the following paper:
+A Tsanas, MA Little, PE McSharry, LO Ramig (2009)
+'Accurate telemonitoring of Parkinson.s disease progression by non-invasive 
+speech tests',
+IEEE Transactions on Biomedical Engineering (to appear). 
--- a/wdbc.names
+++ b/wdbc.names
@ -0,0 +1,140 @@
+1. Title: Wisconsin Diagnostic Breast Cancer (WDBC)
+
+2. Source Information
+
+a) Creators: 
+
+	Dr. William H. Wolberg, General Surgery Dept., University of
+	Wisconsin,  Clinical Sciences Center, Madison, WI 53792
+	wolberg@eagle.surgery.wisc.edu
+
+	W. Nick Street, Computer Sciences Dept., University of
+	Wisconsin, 1210 West Dayton St., Madison, WI 53706
+	street@cs.wisc.edu  608-262-6619
+
+	Olvi L. Mangasarian, Computer Sciences Dept., University of
+	Wisconsin, 1210 West Dayton St., Madison, WI 53706
+	olvi@cs.wisc.edu 
+
+b) Donor: Nick Street
+
+c) Date: November 1995
+
+3. Past Usage:
+
+first usage:
+
+	W.N. Street, W.H. Wolberg and O.L. Mangasarian 
+	Nuclear feature extraction for breast tumor diagnosis.
+	IS&T/SPIE 1993 International Symposium on Electronic Imaging: Science
+	and Technology, volume 1905, pages 861-870, San Jose, CA, 1993.
+
+OR literature:
+
+	O.L. Mangasarian, W.N. Street and W.H. Wolberg. 
+	Breast cancer diagnosis and prognosis via linear programming. 
+	Operations Research, 43(4), pages 570-577, July-August 1995.
+
+Medical literature:
+
+	W.H. Wolberg, W.N. Street, and O.L. Mangasarian. 
+	Machine learning techniques to diagnose breast cancer from
+	fine-needle aspirates.  
+	Cancer Letters 77 (1994) 163-171.
+
+	W.H. Wolberg, W.N. Street, and O.L. Mangasarian. 
+	Image analysis and machine learning applied to breast cancer
+	diagnosis and prognosis.  
+	Analytical and Quantitative Cytology and Histology, Vol. 17
+	No. 2, pages 77-87, April 1995. 
+
+	W.H. Wolberg, W.N. Street, D.M. Heisey, and O.L. Mangasarian. 
+	Computerized breast cancer diagnosis and prognosis from fine
+	needle aspirates.  
+	Archives of Surgery 1995;130:511-516.
+
+	W.H. Wolberg, W.N. Street, D.M. Heisey, and O.L. Mangasarian. 
+	Computer-derived nuclear features distinguish malignant from
+	benign breast cytology.  
+	Human Pathology, 26:792--796, 1995.
+
+See also:
+	http://www.cs.wisc.edu/~olvi/uwmp/mpml.html
+	http://www.cs.wisc.edu/~olvi/uwmp/cancer.html
+
+Results:
+
+	- predicting field 2, diagnosis: B = benign, M = malignant
+	- sets are linearly separable using all 30 input features
+	- best predictive accuracy obtained using one separating plane
+		in the 3-D space of Worst Area, Worst Smoothness and
+		Mean Texture.  Estimated accuracy 97.5% using repeated
+		10-fold crossvalidations.  Classifier has correctly
+		diagnosed 176 consecutive new patients as of November
+		1995. 
+
+4. Relevant information
+
+	Features are computed from a digitized image of a fine needle
+	aspirate (FNA) of a breast mass.  They describe
+	characteristics of the cell nuclei present in the image.
+	A few of the images can be found at
+	http://www.cs.wisc.edu/~street/images/
+
+	Separating plane described above was obtained using
+	Multisurface Method-Tree (MSM-T) [K. P. Bennett, "Decision Tree
+	Construction Via Linear Programming." Proceedings of the 4th
+	Midwest Artificial Intelligence and Cognitive Science Society,
+	pp. 97-101, 1992], a classification method which uses linear
+	programming to construct a decision tree.  Relevant features
+	were selected using an exhaustive search in the space of 1-4
+	features and 1-3 separating planes.
+
+	The actual linear program used to obtain the separating plane
+	in the 3-dimensional space is that described in:
+	[K. P. Bennett and O. L. Mangasarian: "Robust Linear
+	Programming Discrimination of Two Linearly Inseparable Sets",
+	Optimization Methods and Software 1, 1992, 23-34].
+
+
+	This database is also available through the UW CS ftp server:
+
+	ftp ftp.cs.wisc.edu
+	cd math-prog/cpo-dataset/machine-learn/WDBC/
+
+5. Number of instances: 569 
+
+6. Number of attributes: 32 (ID, diagnosis, 30 real-valued input features)
+
+7. Attribute information
+
+1) ID number
+2) Diagnosis (M = malignant, B = benign)
+3-32)
+
+Ten real-valued features are computed for each cell nucleus:
+
+	a) radius (mean of distances from center to points on the perimeter)
+	b) texture (standard deviation of gray-scale values)
+	c) perimeter
+	d) area
+	e) smoothness (local variation in radius lengths)
+	f) compactness (perimeter^2 / area - 1.0)
+	g) concavity (severity of concave portions of the contour)
+	h) concave points (number of concave portions of the contour)
+	i) symmetry 
+	j) fractal dimension ("coastline approximation" - 1)
+
+Several of the papers listed above contain detailed descriptions of
+how these features are computed. 
+
+The mean, standard error, and "worst" or largest (mean of the three
+largest values) of these features were computed for each image,
+resulting in 30 features.  For instance, field 3 is Mean Radius, field
+13 is Radius SE, field 23 is Worst Radius.
+
+All feature values are recoded with four significant digits.
+
+8. Missing attribute values: none
+
+9. Class distribution: 357 benign, 212 malignant