diff --git a/A2.py b/A2.py index f03f6a4..465b739 100644 --- a/A2.py +++ b/A2.py @@ -50,7 +50,11 @@ def gaussian_features(x, D, sigma=1.0): return np.ones((len(x), 1)) x_min, x_max = np.min(x), np.max(x) - mu_i = x_min + (x_max - x_min) / (D - 1) * np.arange(D) + + if D == 1: + mu_i = np.array([(x_min + x_max) / 2]) + else: + mu_i = x_min + (x_max - x_min) / (D - 1) * np.arange(D) features = np.ones((len(x), D + 1)) # with bias term @@ -102,7 +106,8 @@ class GaussianRegression: self.D = D # create features for training and fit using least squares X = gaussian_features(x, D, self.sigma) - self.w = np.linalg.lstsq(X, y, rcond=None)[0] + #self.w = np.linalg.lstsq(X, y, rcond=None)[0] + self.w = np.linalg.pinv(X.T @ X) @ (X.T @ y) return self @@ -159,7 +164,7 @@ plt.show() #1.4 Model Selection # Split the data into training and validation sets -x_train, x_val, y_train, y_val = train_test_split(x, y_noisy, test_size=0.2, random_state=42) +x_train, x_val, y_train, y_val = train_test_split(x, y_noisy, test_size=0.3, random_state=100) # range of basis functions to test D_values = list(range(0, 46)) # 0 to 45 @@ -175,12 +180,9 @@ for D in D_values: model = GaussianRegression(sigma=1.0) model.fit(x_train, y_train, D) - # predict on training then validation + # predict on training and validation set yh_train = model.predict(x_train) - yh_train = yh_train.flatten() if yh_train.ndim > 1 else yh_train - yh_val = model.predict(x_val) - yh_val = yh_val.flatten() if yh_val.ndim > 1 else yh_val # compute SSE sse_train = np.sum((y_train - yh_train)**2) @@ -189,7 +191,172 @@ for D in D_values: train_sse.append(sse_train) val_sse.append(sse_val) - print(f"D={D:2d}: Train SSE = {sse_train:8.2f}, Val SSE = {sse_val:8.2f}") + print(f"D={D}: Train SSE={sse_train:.0f}, Val SSE={sse_val:.0f}") + + +optimal_D = D_values[int(np.argmin(val_sse))] +print(f"Optimal D on single split = {optimal_D}") +#optimal_sse = np.min(val_sse) +#MAYBE CAN ADD A MANUAL LOWER BOUND + + +# Plot training and validation SSE vs D for this single split +plt.figure(figsize=(12, 6)) +plt.plot(D_values, train_sse, 'b-', label='Train SSE', linewidth=2, marker='o', markersize=4) +plt.plot(D_values, val_sse, 'r-', label='Validation SSE', linewidth=2, marker='s', markersize=4) +plt.axvline(x=optimal_D, color='g', linestyle='--', label=f'Optimal D = {optimal_D}') +#plt.scatter([optimal_D], [val_sse[optimal_D]], label=f"Opt D = {optimal_D}", zorder=5) +plt.xlabel('Number of Gaussian bases (D)') +plt.ylabel('Sum of Squared Errors (SSE)') +plt.title('Train and Validation SSE vs D (single split)') +plt.legend() +plt.grid(True, alpha=0.3) +plt.yscale('log') +plt.show() + + +# plot optimal model fit +plt.figure(figsize=(10, 4)) +optimal_model = GaussianRegression(sigma=1.0) +yh_opt = optimal_model.fit(x_train, y_train, optimal_D) +yh_opt = optimal_model.predict(x_plot) + +plt.plot(x_plot, true_function(x_plot), 'b-', label='True Function', linewidth=2) +plt.plot(x_train, y_train, 'bo', label='Training Data', alpha=0.6, markersize=4) +plt.plot(x_val, y_val, 'ro', label='Validation Data', alpha=0.6, markersize=4) +plt.plot(x_plot, yh_opt, 'g-', label=f'Optimal Model (D={optimal_D})', linewidth=2) +plt.ylim(-6, 6) +plt.title(f'Optimal Model with {optimal_D} Gaussian Basis Functions') +plt.ylabel('y') +plt.legend() +plt.grid(True, alpha=0.3) + +plt.tight_layout() +plt.show() + +#__________________________________________________________________________________ +#2. Bias-Variance Tradeoff with Multiple Fits +#sigma = (x.max() - x.min()) / D + +n_repetitions = 10 +D_values = [0, 5, 7, 10, 12, 15, 20, 25, 30, 45] +x = np.linspace(0, 10, 300) + +# Initialize arrays to store results +train_errs = np.zeros((n_repetitions, len(D_values))) +test_errs = np.zeros((n_repetitions, len(D_values))) +predictions = np.zeros((n_repetitions, len(D_values), len(x))) + +for rep in range(n_repetitions): + #create new dataset + x_data, y_true, y_data = generate_data(100, noise_std=1.0) + + # split into train and test + x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.3, random_state=rep) + + for D_i, D in enumerate(D_values): + # fit model + model = GaussianRegression(sigma=1.0) + model.fit(x_train, y_train, D) + + # predict on both sets + yh_train = model.predict(x_train) + yh_test = model.predict(x_test) + + # compute and store errors (MSE) + train_err = np.mean((y_train - yh_train)**2) + test_err = np.mean((y_test - yh_test)**2) + + train_errs[rep, D_i] = train_err + test_errs[rep, D_i] = test_err + + # predict for visualization + yh_cont = model.predict(x) + predictions[rep, D_i, :] = yh_cont + + +# Plot 1: fitted models on the same plot, bias-variance tradeoff visualization +fig, axes = plt.subplots(2, 5, figsize=(25, 10)) +axes = axes.flatten() + +for D_i, D in enumerate(D_values): + ax = axes[D_i] + + # plot individual fits + for rep in range(n_repetitions): + if train_errs[rep, D_i] != np.inf: + ax.plot(x, predictions[rep, D_i, :], color='green', alpha=0.3, linewidth=1) + + # plot true function + ax.plot(x, true_function(x), 'b-', linewidth=3, label='True Function') + + #plot average prediction + valid_predictions = [predictions[rep, D_i, :] + for rep in range(n_repetitions) + if train_errs[rep, D_i] != np.inf] + + if valid_predictions: + avg_prediction = np.mean(valid_predictions, axis=0) + ax.plot(x, avg_prediction, 'r-', linewidth=2, label='Average Prediction') + + ax.set_title(f'D = {D} Gaussian Bases') + ax.set_xlabel('x') + ax.set_ylabel('y') + ax.set_ylim(-4, 4) + ax.grid(True, alpha=0.3) + + if D_i == 0: + ax.legend() + +plt.tight_layout() +plt.suptitle('Bias-Variance tradeoff with 10 different fits', + fontsize=16, y=1.02) +plt.show() + + +# Plot 2: average training and test errors +plt.figure(figsize=(12, 6)) + +# Compute mean and std +avg_train_errors = np.mean(train_errs, axis=0) +avg_test_errors = np.mean(test_errs, axis=0) +std_train_errors = np.std(train_errs, axis=0) +std_test_errors = np.std(test_errs, axis=0) + + +# Plot with error bars +plt.errorbar(D_values, avg_train_errors, yerr=std_train_errors, label='Average Training Error', marker='o', capsize=5, linewidth=2) +plt.errorbar(D_values, avg_test_errors, yerr=std_test_errors, label='Average Test Error', marker='s', capsize=5, linewidth=2) + +plt.xlabel('Number of Gaussian Basis Functions (D)') +plt.ylabel('Mean Squared Error') +plt.title('Average Training and Test Errors Across 10 Repetitions') +plt.legend() +plt.grid(True, alpha=0.3) +plt.yscale('log') +plt.xticks(D_values) +plt.show() + + + + + + + + + + + + + + + + + + + + +