diff --git a/.idea/-Comp-551-Assignment-2.iml b/.idea/-Comp-551-Assignment-2.iml index 3d7297e..89b2bd1 100644 --- a/.idea/-Comp-551-Assignment-2.iml +++ b/.idea/-Comp-551-Assignment-2.iml @@ -4,7 +4,7 @@ - + \ No newline at end of file diff --git a/A2.py b/A2.py deleted file mode 100644 index 465b739..0000000 --- a/A2.py +++ /dev/null @@ -1,363 +0,0 @@ -import numpy as np -import matplotlib.pyplot as plt -import warnings -from sklearn.model_selection import train_test_split -warnings.filterwarnings('ignore') - -#reproducibility -np.random.seed(2) - -#__________________________________________________________________________________ -#Task 1 -#1.1 -def generate_data(n_samples=100, noise_std=1.0): - """Generates synthetic data with noise""" - # generate x values uniformly in [0, 10] - x = np.linspace(0, 10, n_samples) - - #y values without noise - y_clean = np.log(x + 1) * np.cos(x) + np.sin(2*x) - - #noise - noise = np.random.normal(0, noise_std, n_samples) - y_noisy = y_clean + noise - - return x, y_clean, y_noisy - -# generate data -x, y_clean, y_noisy = generate_data(100) - -# Plot clean and noisy data -plt.plot(x, y_clean, 'b-', label='Clean Data', linewidth=2) -plt.plot(x, y_noisy, 'ro', label='Noisy Data', alpha=0.6, markersize=4) -plt.xlabel('x') -plt.ylabel('y') -plt.title('Clean vs Noisy Data') -plt.legend() -plt.grid(True, alpha=0.3) -plt.show() - - -#__________________________________________________________________________________ -#1.2 -def gaussian_basis(x, mu, sigma=1.0): - """Gaussian basis function""" - return np.exp(-(x - mu)**2 / sigma**2) - -def gaussian_features(x, D, sigma=1.0): - """Create Gaussian basis features""" - if D == 0: - return np.ones((len(x), 1)) - - x_min, x_max = np.min(x), np.max(x) - - if D == 1: - mu_i = np.array([(x_min + x_max) / 2]) - else: - mu_i = x_min + (x_max - x_min) / (D - 1) * np.arange(D) - - features = np.ones((len(x), D + 1)) # with bias term - - for i, mu in enumerate(mu_i): - features[:, i+1] = gaussian_basis(x, mu, sigma).flatten() - - return features - - -# Plot Gaussian basis functions for different D values -D_values_to_plot = [5, 15, 30,45] -x_plot = np.linspace(0, 10, 200) - -plt.figure(figsize=(15, 4)) - -for i, D in enumerate(D_values_to_plot, 1): - plt.subplot(1, 4, i) - - # Calculate means - x_min, x_max = np.min(x_plot), np.max(x_plot) - mu_i = x_min + (x_max - x_min) / (D - 1) * np.arange(D) - - # Plot each Gaussian basis - for mu in mu_i: - phi = gaussian_basis(x_plot, mu) - plt.plot(x_plot, phi, alpha=0.7) - - plt.title(f'Gaussian Basis Functions (D={D})') - plt.xlabel('x') - plt.ylabel('$\phi(x)$') - plt.grid(True, alpha=0.3) - -plt.tight_layout() -plt.show() - -#__________________________________________________________________________________ -#1.3 Model fitting -#for now I used the whole data but idk we that's what they asked for that part -class GaussianRegression: - """Linear Regression with Gaussian Basis Functions""" - - def __init__(self, sigma=1.0): - self.sigma = sigma - self.w = None - self.D = None - - def fit(self, x, y, D): - # Store D for later use in predict - self.D = D - # create features for training and fit using least squares - X = gaussian_features(x, D, self.sigma) - #self.w = np.linalg.lstsq(X, y, rcond=None)[0] - self.w = np.linalg.pinv(X.T @ X) @ (X.T @ y) - - return self - - - def predict(self, x): - # create features for prediction and predict - X = gaussian_features(x, self.D, self.sigma) - yh = X @ self.w - - return yh - - - -def true_function(x): - return np.log(x + 1) * np.cos(x) + np.sin(2*x) - -# fit models with different numbers of basis functions and plot -D_values = [0, 5, 10, 13, 15, 17, 20, 25, 30, 45] -x_plot = np.linspace(0, 10, 300) - -plt.figure(figsize=(20, 8)) - -for i, D in enumerate(D_values): - plt.subplot(2, 5, i+1) - - # Create new model for each D value, fit and get predictions - model = GaussianRegression(sigma=1.0) - model.fit(x, y_noisy, D) - y_hat = model.predict(x_plot) - - # Ensure y_hat is 1D and has same length as x_plot - y_hat = y_hat.flatten() if y_hat.ndim > 1 else y_hat - - # Plot - plt.plot(x_plot, true_function(x_plot), 'b-', label='True Function', linewidth=2, alpha=0.7) - plt.plot(x, y_noisy, 'ro', label='Noisy Data', alpha=0.4, markersize=3) - plt.plot(x_plot, y_hat, 'g-', label=f'Fitted (D={D})', linewidth=2) - - plt.ylim(-6, 6) - plt.title(f'D = {D}') - plt.grid(True, alpha=0.3) - plt.legend(fontsize=8) - - # x and y labels - if i % 3 == 0: - plt.ylabel('y') - if i >= 9: - plt.xlabel('x') - -plt.tight_layout() -plt.show() - -#__________________________________________________________________________________ -#1.4 Model Selection - -# Split the data into training and validation sets -x_train, x_val, y_train, y_val = train_test_split(x, y_noisy, test_size=0.3, random_state=100) - -# range of basis functions to test -D_values = list(range(0, 46)) # 0 to 45 - -# Initialize arrays to store errors -train_sse = [] -val_sse = [] - - -# For each number of basis functions -for D in D_values: - # Create and fit the model - model = GaussianRegression(sigma=1.0) - model.fit(x_train, y_train, D) - - # predict on training and validation set - yh_train = model.predict(x_train) - yh_val = model.predict(x_val) - - # compute SSE - sse_train = np.sum((y_train - yh_train)**2) - sse_val = np.sum((y_val - yh_val)**2) - - train_sse.append(sse_train) - val_sse.append(sse_val) - - print(f"D={D}: Train SSE={sse_train:.0f}, Val SSE={sse_val:.0f}") - - -optimal_D = D_values[int(np.argmin(val_sse))] -print(f"Optimal D on single split = {optimal_D}") -#optimal_sse = np.min(val_sse) -#MAYBE CAN ADD A MANUAL LOWER BOUND - - -# Plot training and validation SSE vs D for this single split -plt.figure(figsize=(12, 6)) -plt.plot(D_values, train_sse, 'b-', label='Train SSE', linewidth=2, marker='o', markersize=4) -plt.plot(D_values, val_sse, 'r-', label='Validation SSE', linewidth=2, marker='s', markersize=4) -plt.axvline(x=optimal_D, color='g', linestyle='--', label=f'Optimal D = {optimal_D}') -#plt.scatter([optimal_D], [val_sse[optimal_D]], label=f"Opt D = {optimal_D}", zorder=5) -plt.xlabel('Number of Gaussian bases (D)') -plt.ylabel('Sum of Squared Errors (SSE)') -plt.title('Train and Validation SSE vs D (single split)') -plt.legend() -plt.grid(True, alpha=0.3) -plt.yscale('log') -plt.show() - - -# plot optimal model fit -plt.figure(figsize=(10, 4)) -optimal_model = GaussianRegression(sigma=1.0) -yh_opt = optimal_model.fit(x_train, y_train, optimal_D) -yh_opt = optimal_model.predict(x_plot) - -plt.plot(x_plot, true_function(x_plot), 'b-', label='True Function', linewidth=2) -plt.plot(x_train, y_train, 'bo', label='Training Data', alpha=0.6, markersize=4) -plt.plot(x_val, y_val, 'ro', label='Validation Data', alpha=0.6, markersize=4) -plt.plot(x_plot, yh_opt, 'g-', label=f'Optimal Model (D={optimal_D})', linewidth=2) -plt.ylim(-6, 6) -plt.title(f'Optimal Model with {optimal_D} Gaussian Basis Functions') -plt.ylabel('y') -plt.legend() -plt.grid(True, alpha=0.3) - -plt.tight_layout() -plt.show() - -#__________________________________________________________________________________ -#2. Bias-Variance Tradeoff with Multiple Fits -#sigma = (x.max() - x.min()) / D - -n_repetitions = 10 -D_values = [0, 5, 7, 10, 12, 15, 20, 25, 30, 45] -x = np.linspace(0, 10, 300) - -# Initialize arrays to store results -train_errs = np.zeros((n_repetitions, len(D_values))) -test_errs = np.zeros((n_repetitions, len(D_values))) -predictions = np.zeros((n_repetitions, len(D_values), len(x))) - -for rep in range(n_repetitions): - #create new dataset - x_data, y_true, y_data = generate_data(100, noise_std=1.0) - - # split into train and test - x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.3, random_state=rep) - - for D_i, D in enumerate(D_values): - # fit model - model = GaussianRegression(sigma=1.0) - model.fit(x_train, y_train, D) - - # predict on both sets - yh_train = model.predict(x_train) - yh_test = model.predict(x_test) - - # compute and store errors (MSE) - train_err = np.mean((y_train - yh_train)**2) - test_err = np.mean((y_test - yh_test)**2) - - train_errs[rep, D_i] = train_err - test_errs[rep, D_i] = test_err - - # predict for visualization - yh_cont = model.predict(x) - predictions[rep, D_i, :] = yh_cont - - -# Plot 1: fitted models on the same plot, bias-variance tradeoff visualization -fig, axes = plt.subplots(2, 5, figsize=(25, 10)) -axes = axes.flatten() - -for D_i, D in enumerate(D_values): - ax = axes[D_i] - - # plot individual fits - for rep in range(n_repetitions): - if train_errs[rep, D_i] != np.inf: - ax.plot(x, predictions[rep, D_i, :], color='green', alpha=0.3, linewidth=1) - - # plot true function - ax.plot(x, true_function(x), 'b-', linewidth=3, label='True Function') - - #plot average prediction - valid_predictions = [predictions[rep, D_i, :] - for rep in range(n_repetitions) - if train_errs[rep, D_i] != np.inf] - - if valid_predictions: - avg_prediction = np.mean(valid_predictions, axis=0) - ax.plot(x, avg_prediction, 'r-', linewidth=2, label='Average Prediction') - - ax.set_title(f'D = {D} Gaussian Bases') - ax.set_xlabel('x') - ax.set_ylabel('y') - ax.set_ylim(-4, 4) - ax.grid(True, alpha=0.3) - - if D_i == 0: - ax.legend() - -plt.tight_layout() -plt.suptitle('Bias-Variance tradeoff with 10 different fits', - fontsize=16, y=1.02) -plt.show() - - -# Plot 2: average training and test errors -plt.figure(figsize=(12, 6)) - -# Compute mean and std -avg_train_errors = np.mean(train_errs, axis=0) -avg_test_errors = np.mean(test_errs, axis=0) -std_train_errors = np.std(train_errs, axis=0) -std_test_errors = np.std(test_errs, axis=0) - - -# Plot with error bars -plt.errorbar(D_values, avg_train_errors, yerr=std_train_errors, label='Average Training Error', marker='o', capsize=5, linewidth=2) -plt.errorbar(D_values, avg_test_errors, yerr=std_test_errors, label='Average Test Error', marker='s', capsize=5, linewidth=2) - -plt.xlabel('Number of Gaussian Basis Functions (D)') -plt.ylabel('Mean Squared Error') -plt.title('Average Training and Test Errors Across 10 Repetitions') -plt.legend() -plt.grid(True, alpha=0.3) -plt.yscale('log') -plt.xticks(D_values) -plt.show() - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/results/task1-data-generation.png b/results/task1-data-generation.png new file mode 100644 index 0000000..83e3ccc Binary files /dev/null and b/results/task1-data-generation.png differ diff --git a/results/task1-model-fitting.png b/results/task1-model-fitting.png new file mode 100644 index 0000000..3967bb7 Binary files /dev/null and b/results/task1-model-fitting.png differ diff --git a/results/task1-model-selection-optimal-model.png b/results/task1-model-selection-optimal-model.png new file mode 100644 index 0000000..f9a1f0c Binary files /dev/null and b/results/task1-model-selection-optimal-model.png differ diff --git a/results/task1-model-selection-single-split.png b/results/task1-model-selection-single-split.png new file mode 100644 index 0000000..bc27b27 Binary files /dev/null and b/results/task1-model-selection-single-split.png differ diff --git a/results/task1-non-linear-basis-functions.png b/results/task1-non-linear-basis-functions.png new file mode 100644 index 0000000..355f828 Binary files /dev/null and b/results/task1-non-linear-basis-functions.png differ diff --git a/results/task2-plotting-multiple-fits.png b/results/task2-plotting-multiple-fits.png new file mode 100644 index 0000000..a98ebac Binary files /dev/null and b/results/task2-plotting-multiple-fits.png differ diff --git a/results/task2-plotting-train-and-test-errors.png b/results/task2-plotting-train-and-test-errors.png new file mode 100644 index 0000000..f8aff72 Binary files /dev/null and b/results/task2-plotting-train-and-test-errors.png differ diff --git a/tasks-1-&-2.py b/tasks-1-&-2.py index e69de29..615af68 100644 --- a/tasks-1-&-2.py +++ b/tasks-1-&-2.py @@ -0,0 +1,363 @@ +import numpy as np +import matplotlib.pyplot as plt +import warnings +from sklearn.model_selection import train_test_split +warnings.filterwarnings('ignore') + +#reproducibility +np.random.seed(2) + +#__________________________________________________________________________________ +#Task 1 +#1.1 +def generate_data(n_samples=100, noise_std=1.0): + """Generates synthetic data with noise""" + # generate x values uniformly in [0, 10] + x = np.linspace(0, 10, n_samples) + + #y values without noise + y_clean = np.log(x + 1) * np.cos(x) + np.sin(2*x) + + #noise + noise = np.random.normal(0, noise_std, n_samples) + y_noisy = y_clean + noise + + return x, y_clean, y_noisy + +# generate data +x, y_clean, y_noisy = generate_data(100) + +# Plot clean and noisy data +plt.plot(x, y_clean, 'b-', label='Clean Data', linewidth=2) +plt.plot(x, y_noisy, 'ro', label='Noisy Data', alpha=0.6, markersize=4) +plt.xlabel('x') +plt.ylabel('y') +plt.title('Clean vs Noisy Data') +plt.legend() +plt.grid(True, alpha=0.3) +plt.savefig('results/task1-data-generation.png') + + +#__________________________________________________________________________________ +#1.2 +def gaussian_basis(x, mu, sigma=1.0): + """Gaussian basis function""" + return np.exp(-(x - mu)**2 / sigma**2) + +def gaussian_features(x, D, sigma=1.0): + """Create Gaussian basis features""" + if D == 0: + return np.ones((len(x), 1)) + + x_min, x_max = np.min(x), np.max(x) + + if D == 1: + mu_i = np.array([(x_min + x_max) / 2]) + else: + mu_i = x_min + (x_max - x_min) / (D - 1) * np.arange(D) + + features = np.ones((len(x), D + 1)) # with bias term + + for i, mu in enumerate(mu_i): + features[:, i+1] = gaussian_basis(x, mu, sigma).flatten() + + return features + + +# Plot Gaussian basis functions for different D values +D_values_to_plot = [5, 15, 30,45] +x_plot = np.linspace(0, 10, 200) + +plt.figure(figsize=(15, 4)) + +for i, D in enumerate(D_values_to_plot, 1): + plt.subplot(1, 4, i) + + # Calculate means + x_min, x_max = np.min(x_plot), np.max(x_plot) + mu_i = x_min + (x_max - x_min) / (D - 1) * np.arange(D) + + # Plot each Gaussian basis + for mu in mu_i: + phi = gaussian_basis(x_plot, mu) + plt.plot(x_plot, phi, alpha=0.7) + + plt.title(f'Gaussian Basis Functions (D={D})') + plt.xlabel('x') + plt.ylabel(r'$\phi(x)$') + plt.grid(True, alpha=0.3) + +plt.tight_layout() +plt.savefig('results/task1-non-linear-basis-functions.png') + +#__________________________________________________________________________________ +#1.3 Model fitting +#for now I used the whole data but idk we that's what they asked for that part +class GaussianRegression: + """Linear Regression with Gaussian Basis Functions""" + + def __init__(self, sigma=1.0): + self.sigma = sigma + self.w = None + self.D = None + + def fit(self, x, y, D): + # Store D for later use in predict + self.D = D + # create features for training and fit using least squares + X = gaussian_features(x, D, self.sigma) + #self.w = np.linalg.lstsq(X, y, rcond=None)[0] + self.w = np.linalg.pinv(X.T @ X) @ (X.T @ y) + + return self + + + def predict(self, x): + # create features for prediction and predict + X = gaussian_features(x, self.D, self.sigma) + yh = X @ self.w + + return yh + + + +def true_function(x): + return np.log(x + 1) * np.cos(x) + np.sin(2*x) + +# fit models with different numbers of basis functions and plot +D_values = [0, 5, 10, 13, 15, 17, 20, 25, 30, 45] +x_plot = np.linspace(0, 10, 300) + +plt.figure(figsize=(20, 8)) + +for i, D in enumerate(D_values): + plt.subplot(2, 5, i+1) + + # Create new model for each D value, fit and get predictions + model = GaussianRegression(sigma=1.0) + model.fit(x, y_noisy, D) + y_hat = model.predict(x_plot) + + # Ensure y_hat is 1D and has same length as x_plot + y_hat = y_hat.flatten() if y_hat.ndim > 1 else y_hat + + # Plot + plt.plot(x_plot, true_function(x_plot), 'b-', label='True Function', linewidth=2, alpha=0.7) + plt.plot(x, y_noisy, 'ro', label='Noisy Data', alpha=0.4, markersize=3) + plt.plot(x_plot, y_hat, 'g-', label=f'Fitted (D={D})', linewidth=2) + + plt.ylim(-6, 6) + plt.title(f'D = {D}') + plt.grid(True, alpha=0.3) + plt.legend(fontsize=8) + + # x and y labels + if i % 3 == 0: + plt.ylabel('y') + if i >= 9: + plt.xlabel('x') + +plt.tight_layout() +plt.savefig('results/task1-model-fitting.png') + +#__________________________________________________________________________________ +#1.4 Model Selection + +# Split the data into training and validation sets +x_train, x_val, y_train, y_val = train_test_split(x, y_noisy, test_size=0.3, random_state=100) + +# range of basis functions to test +D_values = list(range(0, 46)) # 0 to 45 + +# Initialize arrays to store errors +train_sse = [] +val_sse = [] + + +# For each number of basis functions +for D in D_values: + # Create and fit the model + model = GaussianRegression(sigma=1.0) + model.fit(x_train, y_train, D) + + # predict on training and validation set + yh_train = model.predict(x_train) + yh_val = model.predict(x_val) + + # compute SSE + sse_train = np.sum((y_train - yh_train)**2) + sse_val = np.sum((y_val - yh_val)**2) + + train_sse.append(sse_train) + val_sse.append(sse_val) + + print(f"D={D}: Train SSE={sse_train:.0f}, Val SSE={sse_val:.0f}") + + +optimal_D = D_values[int(np.argmin(val_sse))] +print(f"Optimal D on single split = {optimal_D}") +#optimal_sse = np.min(val_sse) +#MAYBE CAN ADD A MANUAL LOWER BOUND + + +# Plot training and validation SSE vs D for this single split +plt.figure(figsize=(12, 6)) +plt.plot(D_values, train_sse, 'b-', label='Train SSE', linewidth=2, marker='o', markersize=4) +plt.plot(D_values, val_sse, 'r-', label='Validation SSE', linewidth=2, marker='s', markersize=4) +plt.axvline(x=optimal_D, color='g', linestyle='--', label=f'Optimal D = {optimal_D}') +#plt.scatter([optimal_D], [val_sse[optimal_D]], label=f"Opt D = {optimal_D}", zorder=5) +plt.xlabel('Number of Gaussian bases (D)') +plt.ylabel('Sum of Squared Errors (SSE)') +plt.title('Train and Validation SSE vs D (single split)') +plt.legend() +plt.grid(True, alpha=0.3) +plt.yscale('log') +plt.savefig('results/task1-model-selection-single-split.png') + + +# plot optimal model fit +plt.figure(figsize=(10, 4)) +optimal_model = GaussianRegression(sigma=1.0) +yh_opt = optimal_model.fit(x_train, y_train, optimal_D) +yh_opt = optimal_model.predict(x_plot) + +plt.plot(x_plot, true_function(x_plot), 'b-', label='True Function', linewidth=2) +plt.plot(x_train, y_train, 'bo', label='Training Data', alpha=0.6, markersize=4) +plt.plot(x_val, y_val, 'ro', label='Validation Data', alpha=0.6, markersize=4) +plt.plot(x_plot, yh_opt, 'g-', label=f'Optimal Model (D={optimal_D})', linewidth=2) +plt.ylim(-6, 6) +plt.title(f'Optimal Model with {optimal_D} Gaussian Basis Functions') +plt.ylabel('y') +plt.legend() +plt.grid(True, alpha=0.3) + +plt.tight_layout() +plt.savefig('results/task1-model-selection-optimal-model.png') + +#__________________________________________________________________________________ +#2. Bias-Variance Tradeoff with Multiple Fits +#sigma = (x.max() - x.min()) / D + +n_repetitions = 10 +D_values = [0, 5, 7, 10, 12, 15, 20, 25, 30, 45] +x = np.linspace(0, 10, 300) + +# Initialize arrays to store results +train_errs = np.zeros((n_repetitions, len(D_values))) +test_errs = np.zeros((n_repetitions, len(D_values))) +predictions = np.zeros((n_repetitions, len(D_values), len(x))) + +for rep in range(n_repetitions): + #create new dataset + x_data, y_true, y_data = generate_data(100, noise_std=1.0) + + # split into train and test + x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.3, random_state=rep) + + for D_i, D in enumerate(D_values): + # fit model + model = GaussianRegression(sigma=1.0) + model.fit(x_train, y_train, D) + + # predict on both sets + yh_train = model.predict(x_train) + yh_test = model.predict(x_test) + + # compute and store errors (MSE) + train_err = np.mean((y_train - yh_train)**2) + test_err = np.mean((y_test - yh_test)**2) + + train_errs[rep, D_i] = train_err + test_errs[rep, D_i] = test_err + + # predict for visualization + yh_cont = model.predict(x) + predictions[rep, D_i, :] = yh_cont + + +# Plot 1: fitted models on the same plot, bias-variance tradeoff visualization +fig, axes = plt.subplots(2, 5, figsize=(25, 10)) +axes = axes.flatten() + +for D_i, D in enumerate(D_values): + ax = axes[D_i] + + # plot individual fits + for rep in range(n_repetitions): + if train_errs[rep, D_i] != np.inf: + ax.plot(x, predictions[rep, D_i, :], color='green', alpha=0.3, linewidth=1) + + # plot true function + ax.plot(x, true_function(x), 'b-', linewidth=3, label='True Function') + + #plot average prediction + valid_predictions = [predictions[rep, D_i, :] + for rep in range(n_repetitions) + if train_errs[rep, D_i] != np.inf] + + if valid_predictions: + avg_prediction = np.mean(valid_predictions, axis=0) + ax.plot(x, avg_prediction, 'r-', linewidth=2, label='Average Prediction') + + ax.set_title(f'D = {D} Gaussian Bases') + ax.set_xlabel('x') + ax.set_ylabel('y') + ax.set_ylim(-4, 4) + ax.grid(True, alpha=0.3) + + if D_i == 0: + ax.legend() + +plt.tight_layout() +plt.suptitle('Bias-Variance tradeoff with 10 different fits', + fontsize=16, y=1.02) +plt.savefig('results/task2-plotting-multiple-fits.png') + + +# Plot 2: average training and test errors +plt.figure(figsize=(12, 6)) + +# Compute mean and std +avg_train_errors = np.mean(train_errs, axis=0) +avg_test_errors = np.mean(test_errs, axis=0) +std_train_errors = np.std(train_errs, axis=0) +std_test_errors = np.std(test_errs, axis=0) + + +# Plot with error bars +plt.errorbar(D_values, avg_train_errors, yerr=std_train_errors, label='Average Training Error', marker='o', capsize=5, linewidth=2) +plt.errorbar(D_values, avg_test_errors, yerr=std_test_errors, label='Average Test Error', marker='s', capsize=5, linewidth=2) + +plt.xlabel('Number of Gaussian Basis Functions (D)') +plt.ylabel('Mean Squared Error') +plt.title('Average Training and Test Errors Across 10 Repetitions') +plt.legend() +plt.grid(True, alpha=0.3) +plt.yscale('log') +plt.xticks(D_values) +plt.savefig('results/task2-plotting-train-and-test-errors.png') + + + + + + + + + + + + + + + + + + + + + + + + +