Updated the code.
2
.idea/misc.xml
generated
|
|
@ -3,5 +3,5 @@
|
||||||
<component name="Black">
|
<component name="Black">
|
||||||
<option name="sdkName" value="Python 3.14 (-Comp-551-Assignment-2)" />
|
<option name="sdkName" value="Python 3.14 (-Comp-551-Assignment-2)" />
|
||||||
</component>
|
</component>
|
||||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.14 (-Comp-551-Assignment-2)" project-jdk-type="Python SDK" />
|
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.14" project-jdk-type="Python SDK" />
|
||||||
</project>
|
</project>
|
||||||
|
|
@ -1,72 +0,0 @@
|
||||||
import numpy as np
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
|
|
||||||
# Generating Synthetic Data
|
|
||||||
def generate_linear_data(n):
|
|
||||||
x = np.random.uniform(0, 10, n) # initialize x
|
|
||||||
eps = np.random.normal(0, 1, n) # initialize epsilon
|
|
||||||
y = -3 * x + 8 + 2 * eps # y = −3x + 8 + 2ϵ
|
|
||||||
return x.reshape(-1, 1), y
|
|
||||||
|
|
||||||
# Gradient Descent with L1/L2
|
|
||||||
def gradient_descent(x, y, lam, reg_type, lr, iters):
|
|
||||||
x_b = np.hstack([np.ones_like(x), x]) # initialize x
|
|
||||||
w = np.zeros(x_b.shape[1]) # initialize weight
|
|
||||||
path = [w.copy()]
|
|
||||||
|
|
||||||
for i in range(iters):
|
|
||||||
pred = x_b @ w # linear regression prediction
|
|
||||||
error = pred - y # error
|
|
||||||
grad = x_b.T @ error / len(y) # gradient formula
|
|
||||||
|
|
||||||
if reg_type == 'l2':
|
|
||||||
grad += lam * w # L2 formula
|
|
||||||
elif reg_type == 'l1':
|
|
||||||
grad += lam * np.sign(w) # L1 formula
|
|
||||||
|
|
||||||
w -= lr * grad # loss calculation
|
|
||||||
path.append(w.copy())
|
|
||||||
|
|
||||||
return w, np.array(path)
|
|
||||||
|
|
||||||
# Plotting the loss
|
|
||||||
def plot_contour(x, y, reg_type, lam):
|
|
||||||
x_b = np.hstack([np.ones_like(x), x]) # initialize x
|
|
||||||
w0, w1 = np.meshgrid(np.linspace(-10, 10, 100), np.linspace(-10, 10, 100)) # initialize intercept and slope
|
|
||||||
loss = np.zeros_like(w0) # initialize loss
|
|
||||||
|
|
||||||
for i in range(w0.shape[0]):
|
|
||||||
for j in range(w0.shape[1]):
|
|
||||||
w = np.array([w0[i, j], w1[i, j]])
|
|
||||||
error = y - x_b @ w # error
|
|
||||||
mse = np.mean(error ** 2) # mean square error
|
|
||||||
reg = lam * (np.sum(w ** 2) if reg_type == 'l2' else np.sum(np.abs(w))) # regularization
|
|
||||||
loss[i, j] = mse + reg # regularization and mse for the loss
|
|
||||||
|
|
||||||
_, path = gradient_descent(x, y, lam, reg_type, 0.01, 500)
|
|
||||||
|
|
||||||
# plotting the figure
|
|
||||||
plt.figure(figsize=(6, 5))
|
|
||||||
plt.contour(w0, w1, loss, levels=50, cmap='viridis')
|
|
||||||
plt.plot(path[:, 0], path[:, 1], 'ro-', markersize=2, label='Gradient Descent Path')
|
|
||||||
plt.title(f"{reg_type.upper()} Regularization (λ={lam})")
|
|
||||||
plt.xlabel("w0 (intercept)")
|
|
||||||
plt.ylabel("w1 (slope)")
|
|
||||||
plt.grid(True)
|
|
||||||
plt.legend()
|
|
||||||
plt.tight_layout()
|
|
||||||
plt.savefig('results/task4-effect-of-regularization-on-loss-' + reg_type + '-' + str(lam) + '.png')
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
print("Running Task 4: Effect of L1 and L2 Regularization on Loss Landscape")
|
|
||||||
|
|
||||||
# Generate dataset
|
|
||||||
x, y = generate_linear_data(30)
|
|
||||||
|
|
||||||
# Values of lambda to visualize
|
|
||||||
lambda_values = [0.01, 0.1, 1.0]
|
|
||||||
|
|
||||||
# Plot for both L1 and L2 regularization
|
|
||||||
for reg_type in ['l1', 'l2']:
|
|
||||||
for lam in lambda_values:
|
|
||||||
plot_contour(x, y, reg_type, lam)
|
|
||||||
|
Before Width: | Height: | Size: 38 KiB After Width: | Height: | Size: 40 KiB |
|
Before Width: | Height: | Size: 38 KiB After Width: | Height: | Size: 34 KiB |
|
Before Width: | Height: | Size: 26 KiB After Width: | Height: | Size: 26 KiB |
|
Before Width: | Height: | Size: 21 KiB After Width: | Height: | Size: 21 KiB |
|
Before Width: | Height: | Size: 127 KiB After Width: | Height: | Size: 130 KiB |
|
Before Width: | Height: | Size: 127 KiB After Width: | Height: | Size: 131 KiB |
|
Before Width: | Height: | Size: 128 KiB After Width: | Height: | Size: 132 KiB |
|
Before Width: | Height: | Size: 128 KiB After Width: | Height: | Size: 131 KiB |
|
Before Width: | Height: | Size: 130 KiB After Width: | Height: | Size: 133 KiB |
|
Before Width: | Height: | Size: 141 KiB After Width: | Height: | Size: 145 KiB |
|
|
@ -137,6 +137,63 @@ def bias_variance_decomp(reg_type, lam_values, num_datasets, N, D):
|
||||||
plt.grid(True)
|
plt.grid(True)
|
||||||
plt.savefig('results/task3-bias-decomposition-' + reg_type + '.png')
|
plt.savefig('results/task3-bias-decomposition-' + reg_type + '.png')
|
||||||
|
|
||||||
|
# Generating Synthetic Data
|
||||||
|
def generate_linear_data(n):
|
||||||
|
x = np.random.uniform(0, 10, n) # initialize x
|
||||||
|
eps = np.random.normal(0, 1, n) # initialize epsilon
|
||||||
|
y = -3 * x + 8 + 2 * eps # y = −3x + 8 + 2ϵ
|
||||||
|
return x.reshape(-1, 1), y
|
||||||
|
|
||||||
|
# Gradient Descent with L1/L2
|
||||||
|
def gradient_descent(x, y, lam, reg_type, lr, iters):
|
||||||
|
x_b = np.hstack([np.ones_like(x), x]) # initialize x
|
||||||
|
w = np.zeros(x_b.shape[1]) # initialize weight
|
||||||
|
path = [w.copy()]
|
||||||
|
|
||||||
|
for i in range(iters):
|
||||||
|
pred = x_b @ w # linear regression prediction
|
||||||
|
error = pred - y # error
|
||||||
|
grad = x_b.T @ error / len(y) # gradient formula
|
||||||
|
|
||||||
|
if reg_type == 'l2':
|
||||||
|
grad += lam * w # L2 formula
|
||||||
|
elif reg_type == 'l1':
|
||||||
|
grad += lam * np.sign(w) # L1 formula
|
||||||
|
|
||||||
|
w -= lr * grad # loss calculation
|
||||||
|
path.append(w.copy())
|
||||||
|
|
||||||
|
return w, np.array(path)
|
||||||
|
|
||||||
|
# Plotting the loss
|
||||||
|
def plot_contour(x, y, reg_type, lam):
|
||||||
|
x_b = np.hstack([np.ones_like(x), x]) # initialize x
|
||||||
|
w0, w1 = np.meshgrid(np.linspace(-10, 10, 100), np.linspace(-10, 10, 100)) # initialize intercept and slope
|
||||||
|
loss = np.zeros_like(w0) # initialize loss
|
||||||
|
|
||||||
|
for i in range(w0.shape[0]):
|
||||||
|
for j in range(w0.shape[1]):
|
||||||
|
w = np.array([w0[i, j], w1[i, j]])
|
||||||
|
error = y - x_b @ w # error
|
||||||
|
mse = np.mean(error ** 2) # mean square error
|
||||||
|
reg = lam * (np.sum(w ** 2) if reg_type == 'l2' else np.sum(np.abs(w))) # regularization
|
||||||
|
loss[i, j] = mse + reg # regularization and mse for the loss
|
||||||
|
|
||||||
|
_, path = gradient_descent(x, y, lam, reg_type, 0.01, 500)
|
||||||
|
|
||||||
|
# plotting the figure
|
||||||
|
plt.figure(figsize=(6, 5))
|
||||||
|
plt.contour(w0, w1, loss, levels=50, cmap='viridis')
|
||||||
|
plt.plot(path[:, 0], path[:, 1], 'ro-', markersize=2, label='Gradient Descent Path')
|
||||||
|
plt.title(f"{reg_type.upper()} Regularization (λ={lam})")
|
||||||
|
plt.xlabel("w0 (intercept)")
|
||||||
|
plt.ylabel("w1 (slope)")
|
||||||
|
plt.grid(True)
|
||||||
|
plt.legend()
|
||||||
|
plt.tight_layout()
|
||||||
|
plt.savefig('results/task4-effect-of-regularization-on-loss-' + reg_type + '-' + str(lam) + '.png')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
print("Running Task 3: Regularization with Cross-Validation")
|
print("Running Task 3: Regularization with Cross-Validation")
|
||||||
|
|
||||||
|
|
@ -146,4 +203,17 @@ if __name__ == "__main__":
|
||||||
train_validation_err('l1', lam_values, 50, 20, 45)
|
train_validation_err('l1', lam_values, 50, 20, 45)
|
||||||
|
|
||||||
bias_variance_decomp('l2', lam_values, 50, 20, 45)
|
bias_variance_decomp('l2', lam_values, 50, 20, 45)
|
||||||
bias_variance_decomp('l1', lam_values, 50, 20, 45)
|
bias_variance_decomp('l1', lam_values, 50, 20, 45)
|
||||||
|
|
||||||
|
print("Running Task 4: Effect of L1 and L2 Regularization on Loss Landscape")
|
||||||
|
|
||||||
|
# Generate dataset
|
||||||
|
x, y = generate_linear_data(30)
|
||||||
|
|
||||||
|
# Values of lambda to visualize
|
||||||
|
lambda_values = [0.01, 0.1, 1.0]
|
||||||
|
|
||||||
|
# Plot for both L1 and L2 regularization
|
||||||
|
for reg_type in ['l1', 'l2']:
|
||||||
|
for lam in lambda_values:
|
||||||
|
plot_contour(x, y, reg_type, lam)
|
||||||