diff --git a/.idea/-Comp-551-Assignment-2.iml b/.idea/-Comp-551-Assignment-2.iml
index 3d7297e..89b2bd1 100644
--- a/.idea/-Comp-551-Assignment-2.iml
+++ b/.idea/-Comp-551-Assignment-2.iml
@@ -4,7 +4,7 @@
-
+
\ No newline at end of file
diff --git a/A2.py b/A2.py
deleted file mode 100644
index 465b739..0000000
--- a/A2.py
+++ /dev/null
@@ -1,363 +0,0 @@
-import numpy as np
-import matplotlib.pyplot as plt
-import warnings
-from sklearn.model_selection import train_test_split
-warnings.filterwarnings('ignore')
-
-#reproducibility
-np.random.seed(2)
-
-#__________________________________________________________________________________
-#Task 1
-#1.1
-def generate_data(n_samples=100, noise_std=1.0):
- """Generates synthetic data with noise"""
- # generate x values uniformly in [0, 10]
- x = np.linspace(0, 10, n_samples)
-
- #y values without noise
- y_clean = np.log(x + 1) * np.cos(x) + np.sin(2*x)
-
- #noise
- noise = np.random.normal(0, noise_std, n_samples)
- y_noisy = y_clean + noise
-
- return x, y_clean, y_noisy
-
-# generate data
-x, y_clean, y_noisy = generate_data(100)
-
-# Plot clean and noisy data
-plt.plot(x, y_clean, 'b-', label='Clean Data', linewidth=2)
-plt.plot(x, y_noisy, 'ro', label='Noisy Data', alpha=0.6, markersize=4)
-plt.xlabel('x')
-plt.ylabel('y')
-plt.title('Clean vs Noisy Data')
-plt.legend()
-plt.grid(True, alpha=0.3)
-plt.show()
-
-
-#__________________________________________________________________________________
-#1.2
-def gaussian_basis(x, mu, sigma=1.0):
- """Gaussian basis function"""
- return np.exp(-(x - mu)**2 / sigma**2)
-
-def gaussian_features(x, D, sigma=1.0):
- """Create Gaussian basis features"""
- if D == 0:
- return np.ones((len(x), 1))
-
- x_min, x_max = np.min(x), np.max(x)
-
- if D == 1:
- mu_i = np.array([(x_min + x_max) / 2])
- else:
- mu_i = x_min + (x_max - x_min) / (D - 1) * np.arange(D)
-
- features = np.ones((len(x), D + 1)) # with bias term
-
- for i, mu in enumerate(mu_i):
- features[:, i+1] = gaussian_basis(x, mu, sigma).flatten()
-
- return features
-
-
-# Plot Gaussian basis functions for different D values
-D_values_to_plot = [5, 15, 30,45]
-x_plot = np.linspace(0, 10, 200)
-
-plt.figure(figsize=(15, 4))
-
-for i, D in enumerate(D_values_to_plot, 1):
- plt.subplot(1, 4, i)
-
- # Calculate means
- x_min, x_max = np.min(x_plot), np.max(x_plot)
- mu_i = x_min + (x_max - x_min) / (D - 1) * np.arange(D)
-
- # Plot each Gaussian basis
- for mu in mu_i:
- phi = gaussian_basis(x_plot, mu)
- plt.plot(x_plot, phi, alpha=0.7)
-
- plt.title(f'Gaussian Basis Functions (D={D})')
- plt.xlabel('x')
- plt.ylabel('$\phi(x)$')
- plt.grid(True, alpha=0.3)
-
-plt.tight_layout()
-plt.show()
-
-#__________________________________________________________________________________
-#1.3 Model fitting
-#for now I used the whole data but idk we that's what they asked for that part
-class GaussianRegression:
- """Linear Regression with Gaussian Basis Functions"""
-
- def __init__(self, sigma=1.0):
- self.sigma = sigma
- self.w = None
- self.D = None
-
- def fit(self, x, y, D):
- # Store D for later use in predict
- self.D = D
- # create features for training and fit using least squares
- X = gaussian_features(x, D, self.sigma)
- #self.w = np.linalg.lstsq(X, y, rcond=None)[0]
- self.w = np.linalg.pinv(X.T @ X) @ (X.T @ y)
-
- return self
-
-
- def predict(self, x):
- # create features for prediction and predict
- X = gaussian_features(x, self.D, self.sigma)
- yh = X @ self.w
-
- return yh
-
-
-
-def true_function(x):
- return np.log(x + 1) * np.cos(x) + np.sin(2*x)
-
-# fit models with different numbers of basis functions and plot
-D_values = [0, 5, 10, 13, 15, 17, 20, 25, 30, 45]
-x_plot = np.linspace(0, 10, 300)
-
-plt.figure(figsize=(20, 8))
-
-for i, D in enumerate(D_values):
- plt.subplot(2, 5, i+1)
-
- # Create new model for each D value, fit and get predictions
- model = GaussianRegression(sigma=1.0)
- model.fit(x, y_noisy, D)
- y_hat = model.predict(x_plot)
-
- # Ensure y_hat is 1D and has same length as x_plot
- y_hat = y_hat.flatten() if y_hat.ndim > 1 else y_hat
-
- # Plot
- plt.plot(x_plot, true_function(x_plot), 'b-', label='True Function', linewidth=2, alpha=0.7)
- plt.plot(x, y_noisy, 'ro', label='Noisy Data', alpha=0.4, markersize=3)
- plt.plot(x_plot, y_hat, 'g-', label=f'Fitted (D={D})', linewidth=2)
-
- plt.ylim(-6, 6)
- plt.title(f'D = {D}')
- plt.grid(True, alpha=0.3)
- plt.legend(fontsize=8)
-
- # x and y labels
- if i % 3 == 0:
- plt.ylabel('y')
- if i >= 9:
- plt.xlabel('x')
-
-plt.tight_layout()
-plt.show()
-
-#__________________________________________________________________________________
-#1.4 Model Selection
-
-# Split the data into training and validation sets
-x_train, x_val, y_train, y_val = train_test_split(x, y_noisy, test_size=0.3, random_state=100)
-
-# range of basis functions to test
-D_values = list(range(0, 46)) # 0 to 45
-
-# Initialize arrays to store errors
-train_sse = []
-val_sse = []
-
-
-# For each number of basis functions
-for D in D_values:
- # Create and fit the model
- model = GaussianRegression(sigma=1.0)
- model.fit(x_train, y_train, D)
-
- # predict on training and validation set
- yh_train = model.predict(x_train)
- yh_val = model.predict(x_val)
-
- # compute SSE
- sse_train = np.sum((y_train - yh_train)**2)
- sse_val = np.sum((y_val - yh_val)**2)
-
- train_sse.append(sse_train)
- val_sse.append(sse_val)
-
- print(f"D={D}: Train SSE={sse_train:.0f}, Val SSE={sse_val:.0f}")
-
-
-optimal_D = D_values[int(np.argmin(val_sse))]
-print(f"Optimal D on single split = {optimal_D}")
-#optimal_sse = np.min(val_sse)
-#MAYBE CAN ADD A MANUAL LOWER BOUND
-
-
-# Plot training and validation SSE vs D for this single split
-plt.figure(figsize=(12, 6))
-plt.plot(D_values, train_sse, 'b-', label='Train SSE', linewidth=2, marker='o', markersize=4)
-plt.plot(D_values, val_sse, 'r-', label='Validation SSE', linewidth=2, marker='s', markersize=4)
-plt.axvline(x=optimal_D, color='g', linestyle='--', label=f'Optimal D = {optimal_D}')
-#plt.scatter([optimal_D], [val_sse[optimal_D]], label=f"Opt D = {optimal_D}", zorder=5)
-plt.xlabel('Number of Gaussian bases (D)')
-plt.ylabel('Sum of Squared Errors (SSE)')
-plt.title('Train and Validation SSE vs D (single split)')
-plt.legend()
-plt.grid(True, alpha=0.3)
-plt.yscale('log')
-plt.show()
-
-
-# plot optimal model fit
-plt.figure(figsize=(10, 4))
-optimal_model = GaussianRegression(sigma=1.0)
-yh_opt = optimal_model.fit(x_train, y_train, optimal_D)
-yh_opt = optimal_model.predict(x_plot)
-
-plt.plot(x_plot, true_function(x_plot), 'b-', label='True Function', linewidth=2)
-plt.plot(x_train, y_train, 'bo', label='Training Data', alpha=0.6, markersize=4)
-plt.plot(x_val, y_val, 'ro', label='Validation Data', alpha=0.6, markersize=4)
-plt.plot(x_plot, yh_opt, 'g-', label=f'Optimal Model (D={optimal_D})', linewidth=2)
-plt.ylim(-6, 6)
-plt.title(f'Optimal Model with {optimal_D} Gaussian Basis Functions')
-plt.ylabel('y')
-plt.legend()
-plt.grid(True, alpha=0.3)
-
-plt.tight_layout()
-plt.show()
-
-#__________________________________________________________________________________
-#2. Bias-Variance Tradeoff with Multiple Fits
-#sigma = (x.max() - x.min()) / D
-
-n_repetitions = 10
-D_values = [0, 5, 7, 10, 12, 15, 20, 25, 30, 45]
-x = np.linspace(0, 10, 300)
-
-# Initialize arrays to store results
-train_errs = np.zeros((n_repetitions, len(D_values)))
-test_errs = np.zeros((n_repetitions, len(D_values)))
-predictions = np.zeros((n_repetitions, len(D_values), len(x)))
-
-for rep in range(n_repetitions):
- #create new dataset
- x_data, y_true, y_data = generate_data(100, noise_std=1.0)
-
- # split into train and test
- x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.3, random_state=rep)
-
- for D_i, D in enumerate(D_values):
- # fit model
- model = GaussianRegression(sigma=1.0)
- model.fit(x_train, y_train, D)
-
- # predict on both sets
- yh_train = model.predict(x_train)
- yh_test = model.predict(x_test)
-
- # compute and store errors (MSE)
- train_err = np.mean((y_train - yh_train)**2)
- test_err = np.mean((y_test - yh_test)**2)
-
- train_errs[rep, D_i] = train_err
- test_errs[rep, D_i] = test_err
-
- # predict for visualization
- yh_cont = model.predict(x)
- predictions[rep, D_i, :] = yh_cont
-
-
-# Plot 1: fitted models on the same plot, bias-variance tradeoff visualization
-fig, axes = plt.subplots(2, 5, figsize=(25, 10))
-axes = axes.flatten()
-
-for D_i, D in enumerate(D_values):
- ax = axes[D_i]
-
- # plot individual fits
- for rep in range(n_repetitions):
- if train_errs[rep, D_i] != np.inf:
- ax.plot(x, predictions[rep, D_i, :], color='green', alpha=0.3, linewidth=1)
-
- # plot true function
- ax.plot(x, true_function(x), 'b-', linewidth=3, label='True Function')
-
- #plot average prediction
- valid_predictions = [predictions[rep, D_i, :]
- for rep in range(n_repetitions)
- if train_errs[rep, D_i] != np.inf]
-
- if valid_predictions:
- avg_prediction = np.mean(valid_predictions, axis=0)
- ax.plot(x, avg_prediction, 'r-', linewidth=2, label='Average Prediction')
-
- ax.set_title(f'D = {D} Gaussian Bases')
- ax.set_xlabel('x')
- ax.set_ylabel('y')
- ax.set_ylim(-4, 4)
- ax.grid(True, alpha=0.3)
-
- if D_i == 0:
- ax.legend()
-
-plt.tight_layout()
-plt.suptitle('Bias-Variance tradeoff with 10 different fits',
- fontsize=16, y=1.02)
-plt.show()
-
-
-# Plot 2: average training and test errors
-plt.figure(figsize=(12, 6))
-
-# Compute mean and std
-avg_train_errors = np.mean(train_errs, axis=0)
-avg_test_errors = np.mean(test_errs, axis=0)
-std_train_errors = np.std(train_errs, axis=0)
-std_test_errors = np.std(test_errs, axis=0)
-
-
-# Plot with error bars
-plt.errorbar(D_values, avg_train_errors, yerr=std_train_errors, label='Average Training Error', marker='o', capsize=5, linewidth=2)
-plt.errorbar(D_values, avg_test_errors, yerr=std_test_errors, label='Average Test Error', marker='s', capsize=5, linewidth=2)
-
-plt.xlabel('Number of Gaussian Basis Functions (D)')
-plt.ylabel('Mean Squared Error')
-plt.title('Average Training and Test Errors Across 10 Repetitions')
-plt.legend()
-plt.grid(True, alpha=0.3)
-plt.yscale('log')
-plt.xticks(D_values)
-plt.show()
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/results/task1-data-generation.png b/results/task1-data-generation.png
new file mode 100644
index 0000000..83e3ccc
Binary files /dev/null and b/results/task1-data-generation.png differ
diff --git a/results/task1-model-fitting.png b/results/task1-model-fitting.png
new file mode 100644
index 0000000..3967bb7
Binary files /dev/null and b/results/task1-model-fitting.png differ
diff --git a/results/task1-model-selection-optimal-model.png b/results/task1-model-selection-optimal-model.png
new file mode 100644
index 0000000..f9a1f0c
Binary files /dev/null and b/results/task1-model-selection-optimal-model.png differ
diff --git a/results/task1-model-selection-single-split.png b/results/task1-model-selection-single-split.png
new file mode 100644
index 0000000..bc27b27
Binary files /dev/null and b/results/task1-model-selection-single-split.png differ
diff --git a/results/task1-non-linear-basis-functions.png b/results/task1-non-linear-basis-functions.png
new file mode 100644
index 0000000..355f828
Binary files /dev/null and b/results/task1-non-linear-basis-functions.png differ
diff --git a/results/task2-plotting-multiple-fits.png b/results/task2-plotting-multiple-fits.png
new file mode 100644
index 0000000..a98ebac
Binary files /dev/null and b/results/task2-plotting-multiple-fits.png differ
diff --git a/results/task2-plotting-train-and-test-errors.png b/results/task2-plotting-train-and-test-errors.png
new file mode 100644
index 0000000..f8aff72
Binary files /dev/null and b/results/task2-plotting-train-and-test-errors.png differ
diff --git a/tasks-1-&-2.py b/tasks-1-&-2.py
index e69de29..615af68 100644
--- a/tasks-1-&-2.py
+++ b/tasks-1-&-2.py
@@ -0,0 +1,363 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import warnings
+from sklearn.model_selection import train_test_split
+warnings.filterwarnings('ignore')
+
+#reproducibility
+np.random.seed(2)
+
+#__________________________________________________________________________________
+#Task 1
+#1.1
+def generate_data(n_samples=100, noise_std=1.0):
+ """Generates synthetic data with noise"""
+ # generate x values uniformly in [0, 10]
+ x = np.linspace(0, 10, n_samples)
+
+ #y values without noise
+ y_clean = np.log(x + 1) * np.cos(x) + np.sin(2*x)
+
+ #noise
+ noise = np.random.normal(0, noise_std, n_samples)
+ y_noisy = y_clean + noise
+
+ return x, y_clean, y_noisy
+
+# generate data
+x, y_clean, y_noisy = generate_data(100)
+
+# Plot clean and noisy data
+plt.plot(x, y_clean, 'b-', label='Clean Data', linewidth=2)
+plt.plot(x, y_noisy, 'ro', label='Noisy Data', alpha=0.6, markersize=4)
+plt.xlabel('x')
+plt.ylabel('y')
+plt.title('Clean vs Noisy Data')
+plt.legend()
+plt.grid(True, alpha=0.3)
+plt.savefig('results/task1-data-generation.png')
+
+
+#__________________________________________________________________________________
+#1.2
+def gaussian_basis(x, mu, sigma=1.0):
+ """Gaussian basis function"""
+ return np.exp(-(x - mu)**2 / sigma**2)
+
+def gaussian_features(x, D, sigma=1.0):
+ """Create Gaussian basis features"""
+ if D == 0:
+ return np.ones((len(x), 1))
+
+ x_min, x_max = np.min(x), np.max(x)
+
+ if D == 1:
+ mu_i = np.array([(x_min + x_max) / 2])
+ else:
+ mu_i = x_min + (x_max - x_min) / (D - 1) * np.arange(D)
+
+ features = np.ones((len(x), D + 1)) # with bias term
+
+ for i, mu in enumerate(mu_i):
+ features[:, i+1] = gaussian_basis(x, mu, sigma).flatten()
+
+ return features
+
+
+# Plot Gaussian basis functions for different D values
+D_values_to_plot = [5, 15, 30,45]
+x_plot = np.linspace(0, 10, 200)
+
+plt.figure(figsize=(15, 4))
+
+for i, D in enumerate(D_values_to_plot, 1):
+ plt.subplot(1, 4, i)
+
+ # Calculate means
+ x_min, x_max = np.min(x_plot), np.max(x_plot)
+ mu_i = x_min + (x_max - x_min) / (D - 1) * np.arange(D)
+
+ # Plot each Gaussian basis
+ for mu in mu_i:
+ phi = gaussian_basis(x_plot, mu)
+ plt.plot(x_plot, phi, alpha=0.7)
+
+ plt.title(f'Gaussian Basis Functions (D={D})')
+ plt.xlabel('x')
+ plt.ylabel(r'$\phi(x)$')
+ plt.grid(True, alpha=0.3)
+
+plt.tight_layout()
+plt.savefig('results/task1-non-linear-basis-functions.png')
+
+#__________________________________________________________________________________
+#1.3 Model fitting
+#for now I used the whole data but idk we that's what they asked for that part
+class GaussianRegression:
+ """Linear Regression with Gaussian Basis Functions"""
+
+ def __init__(self, sigma=1.0):
+ self.sigma = sigma
+ self.w = None
+ self.D = None
+
+ def fit(self, x, y, D):
+ # Store D for later use in predict
+ self.D = D
+ # create features for training and fit using least squares
+ X = gaussian_features(x, D, self.sigma)
+ #self.w = np.linalg.lstsq(X, y, rcond=None)[0]
+ self.w = np.linalg.pinv(X.T @ X) @ (X.T @ y)
+
+ return self
+
+
+ def predict(self, x):
+ # create features for prediction and predict
+ X = gaussian_features(x, self.D, self.sigma)
+ yh = X @ self.w
+
+ return yh
+
+
+
+def true_function(x):
+ return np.log(x + 1) * np.cos(x) + np.sin(2*x)
+
+# fit models with different numbers of basis functions and plot
+D_values = [0, 5, 10, 13, 15, 17, 20, 25, 30, 45]
+x_plot = np.linspace(0, 10, 300)
+
+plt.figure(figsize=(20, 8))
+
+for i, D in enumerate(D_values):
+ plt.subplot(2, 5, i+1)
+
+ # Create new model for each D value, fit and get predictions
+ model = GaussianRegression(sigma=1.0)
+ model.fit(x, y_noisy, D)
+ y_hat = model.predict(x_plot)
+
+ # Ensure y_hat is 1D and has same length as x_plot
+ y_hat = y_hat.flatten() if y_hat.ndim > 1 else y_hat
+
+ # Plot
+ plt.plot(x_plot, true_function(x_plot), 'b-', label='True Function', linewidth=2, alpha=0.7)
+ plt.plot(x, y_noisy, 'ro', label='Noisy Data', alpha=0.4, markersize=3)
+ plt.plot(x_plot, y_hat, 'g-', label=f'Fitted (D={D})', linewidth=2)
+
+ plt.ylim(-6, 6)
+ plt.title(f'D = {D}')
+ plt.grid(True, alpha=0.3)
+ plt.legend(fontsize=8)
+
+ # x and y labels
+ if i % 3 == 0:
+ plt.ylabel('y')
+ if i >= 9:
+ plt.xlabel('x')
+
+plt.tight_layout()
+plt.savefig('results/task1-model-fitting.png')
+
+#__________________________________________________________________________________
+#1.4 Model Selection
+
+# Split the data into training and validation sets
+x_train, x_val, y_train, y_val = train_test_split(x, y_noisy, test_size=0.3, random_state=100)
+
+# range of basis functions to test
+D_values = list(range(0, 46)) # 0 to 45
+
+# Initialize arrays to store errors
+train_sse = []
+val_sse = []
+
+
+# For each number of basis functions
+for D in D_values:
+ # Create and fit the model
+ model = GaussianRegression(sigma=1.0)
+ model.fit(x_train, y_train, D)
+
+ # predict on training and validation set
+ yh_train = model.predict(x_train)
+ yh_val = model.predict(x_val)
+
+ # compute SSE
+ sse_train = np.sum((y_train - yh_train)**2)
+ sse_val = np.sum((y_val - yh_val)**2)
+
+ train_sse.append(sse_train)
+ val_sse.append(sse_val)
+
+ print(f"D={D}: Train SSE={sse_train:.0f}, Val SSE={sse_val:.0f}")
+
+
+optimal_D = D_values[int(np.argmin(val_sse))]
+print(f"Optimal D on single split = {optimal_D}")
+#optimal_sse = np.min(val_sse)
+#MAYBE CAN ADD A MANUAL LOWER BOUND
+
+
+# Plot training and validation SSE vs D for this single split
+plt.figure(figsize=(12, 6))
+plt.plot(D_values, train_sse, 'b-', label='Train SSE', linewidth=2, marker='o', markersize=4)
+plt.plot(D_values, val_sse, 'r-', label='Validation SSE', linewidth=2, marker='s', markersize=4)
+plt.axvline(x=optimal_D, color='g', linestyle='--', label=f'Optimal D = {optimal_D}')
+#plt.scatter([optimal_D], [val_sse[optimal_D]], label=f"Opt D = {optimal_D}", zorder=5)
+plt.xlabel('Number of Gaussian bases (D)')
+plt.ylabel('Sum of Squared Errors (SSE)')
+plt.title('Train and Validation SSE vs D (single split)')
+plt.legend()
+plt.grid(True, alpha=0.3)
+plt.yscale('log')
+plt.savefig('results/task1-model-selection-single-split.png')
+
+
+# plot optimal model fit
+plt.figure(figsize=(10, 4))
+optimal_model = GaussianRegression(sigma=1.0)
+yh_opt = optimal_model.fit(x_train, y_train, optimal_D)
+yh_opt = optimal_model.predict(x_plot)
+
+plt.plot(x_plot, true_function(x_plot), 'b-', label='True Function', linewidth=2)
+plt.plot(x_train, y_train, 'bo', label='Training Data', alpha=0.6, markersize=4)
+plt.plot(x_val, y_val, 'ro', label='Validation Data', alpha=0.6, markersize=4)
+plt.plot(x_plot, yh_opt, 'g-', label=f'Optimal Model (D={optimal_D})', linewidth=2)
+plt.ylim(-6, 6)
+plt.title(f'Optimal Model with {optimal_D} Gaussian Basis Functions')
+plt.ylabel('y')
+plt.legend()
+plt.grid(True, alpha=0.3)
+
+plt.tight_layout()
+plt.savefig('results/task1-model-selection-optimal-model.png')
+
+#__________________________________________________________________________________
+#2. Bias-Variance Tradeoff with Multiple Fits
+#sigma = (x.max() - x.min()) / D
+
+n_repetitions = 10
+D_values = [0, 5, 7, 10, 12, 15, 20, 25, 30, 45]
+x = np.linspace(0, 10, 300)
+
+# Initialize arrays to store results
+train_errs = np.zeros((n_repetitions, len(D_values)))
+test_errs = np.zeros((n_repetitions, len(D_values)))
+predictions = np.zeros((n_repetitions, len(D_values), len(x)))
+
+for rep in range(n_repetitions):
+ #create new dataset
+ x_data, y_true, y_data = generate_data(100, noise_std=1.0)
+
+ # split into train and test
+ x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.3, random_state=rep)
+
+ for D_i, D in enumerate(D_values):
+ # fit model
+ model = GaussianRegression(sigma=1.0)
+ model.fit(x_train, y_train, D)
+
+ # predict on both sets
+ yh_train = model.predict(x_train)
+ yh_test = model.predict(x_test)
+
+ # compute and store errors (MSE)
+ train_err = np.mean((y_train - yh_train)**2)
+ test_err = np.mean((y_test - yh_test)**2)
+
+ train_errs[rep, D_i] = train_err
+ test_errs[rep, D_i] = test_err
+
+ # predict for visualization
+ yh_cont = model.predict(x)
+ predictions[rep, D_i, :] = yh_cont
+
+
+# Plot 1: fitted models on the same plot, bias-variance tradeoff visualization
+fig, axes = plt.subplots(2, 5, figsize=(25, 10))
+axes = axes.flatten()
+
+for D_i, D in enumerate(D_values):
+ ax = axes[D_i]
+
+ # plot individual fits
+ for rep in range(n_repetitions):
+ if train_errs[rep, D_i] != np.inf:
+ ax.plot(x, predictions[rep, D_i, :], color='green', alpha=0.3, linewidth=1)
+
+ # plot true function
+ ax.plot(x, true_function(x), 'b-', linewidth=3, label='True Function')
+
+ #plot average prediction
+ valid_predictions = [predictions[rep, D_i, :]
+ for rep in range(n_repetitions)
+ if train_errs[rep, D_i] != np.inf]
+
+ if valid_predictions:
+ avg_prediction = np.mean(valid_predictions, axis=0)
+ ax.plot(x, avg_prediction, 'r-', linewidth=2, label='Average Prediction')
+
+ ax.set_title(f'D = {D} Gaussian Bases')
+ ax.set_xlabel('x')
+ ax.set_ylabel('y')
+ ax.set_ylim(-4, 4)
+ ax.grid(True, alpha=0.3)
+
+ if D_i == 0:
+ ax.legend()
+
+plt.tight_layout()
+plt.suptitle('Bias-Variance tradeoff with 10 different fits',
+ fontsize=16, y=1.02)
+plt.savefig('results/task2-plotting-multiple-fits.png')
+
+
+# Plot 2: average training and test errors
+plt.figure(figsize=(12, 6))
+
+# Compute mean and std
+avg_train_errors = np.mean(train_errs, axis=0)
+avg_test_errors = np.mean(test_errs, axis=0)
+std_train_errors = np.std(train_errs, axis=0)
+std_test_errors = np.std(test_errs, axis=0)
+
+
+# Plot with error bars
+plt.errorbar(D_values, avg_train_errors, yerr=std_train_errors, label='Average Training Error', marker='o', capsize=5, linewidth=2)
+plt.errorbar(D_values, avg_test_errors, yerr=std_test_errors, label='Average Test Error', marker='s', capsize=5, linewidth=2)
+
+plt.xlabel('Number of Gaussian Basis Functions (D)')
+plt.ylabel('Mean Squared Error')
+plt.title('Average Training and Test Errors Across 10 Repetitions')
+plt.legend()
+plt.grid(True, alpha=0.3)
+plt.yscale('log')
+plt.xticks(D_values)
+plt.savefig('results/task2-plotting-train-and-test-errors.png')
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+