Skip to content

Commit

Permalink
adding a new quantile regression experiment script
Browse files Browse the repository at this point in the history
  • Loading branch information
brifordwylie committed Jun 23, 2024
1 parent e0cf24d commit f1c3f15
Showing 1 changed file with 93 additions and 0 deletions.
93 changes: 93 additions & 0 deletions experiments/quantile_regression_3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import numpy as np
import matplotlib.pyplot as plt
from xgboost import XGBRegressor

# Generate synthetic data with even spacing from -10 to 5 and sparse spacing from 5 to 10
x_even = np.linspace(-10, 5, 800) # Evenly spaced from -10 to 5
x_sparse = 5 + (np.linspace(0, 1, 50) ** 2) * 5 # Increasingly sparse from 5 to 10
x = np.concatenate([x_even, x_sparse])

# Ensure no values are exactly zero or negative in the input to the log function
epsilon = 1e-6 # Small value to avoid log(0)
x_adjusted = np.where(x >= 0, x + 1 + epsilon, -x + 1 + epsilon)

# Generate non-linear 'S' shape y values
y = np.where(x >= 0, np.log(x_adjusted), -np.log(x_adjusted)) + np.random.normal(scale=0.05 * np.abs(x), size=x.shape)

# Reshape the data
X_train = x.reshape(-1, 1)
y_train = y


# Function to train and predict using the prediction model
def train_prediction_model(X, y):
model = XGBRegressor(objective="reg:squarederror")
model.fit(X, y)
return model


# Function to train quantile models and store predictions
def train_quantile_models(X, y, quantiles, n_estimators=200):
quantile_models = {}
for quantile in quantiles:
model = XGBRegressor(
objective="reg:quantileerror", quantile_alpha=quantile, n_estimators=n_estimators, max_depth=1
)
model.fit(X, y)
quantile_models[quantile] = model
return quantile_models


# Calculate confidence based on the quantile predictions
def calculate_confidence(preds, quantile_models, X):
lower_05 = quantile_models[0.05].predict(X)
lower_25 = quantile_models[0.25].predict(X)
quant_50 = quantile_models[0.50].predict(X)
upper_75 = quantile_models[0.75].predict(X)
upper_95 = quantile_models[0.95].predict(X)

# Domain specific logic for calculating confidence
# If the interval with is greater than 1 with have 0 confidence
# anything below that is a linear scale from 0 to 1
interval_width = upper_95 - lower_05
confidence = np.clip((1 - interval_width) / 1, 0, 1)
return confidence

"""
confidence = np.zeros_like(preds)
for i, pred in enumerate(preds):
if pred < lower_25[i]:
confidence[i] = (pred - lower_05[i]) / (lower_25[i] - lower_05[i])
elif pred > upper_75[i]:
confidence[i] = (upper_95[i] - pred) / (upper_95[i] - upper_75[i])
else:
confidence[i] = 1.0 # High confidence if the prediction is between the 25th and 75th percentiles
return confidence
"""


# Train models
prediction_model = train_prediction_model(X_train, y_train)
quantiles = [0.05, 0.25, 0.50, 0.75, 0.95]
quantile_models = train_quantile_models(X_train, y_train, quantiles)

# Predictions
rmse_predictions = prediction_model.predict(X_train)

# Calculate confidence for the array of predictions
confidence_values = calculate_confidence(rmse_predictions, quantile_models, X_train)

# Plot the results
plt.figure(figsize=(10, 6))
sc = plt.scatter(X_train, y_train, c=confidence_values, cmap="coolwarm", label="Data", alpha=0.5)
# sc = plt.scatter(X_train, y_train, cmap='coolwarm', label='Data', alpha=0.5)
plt.colorbar(sc, label="Confidence")
for q in quantiles:
plt.plot(X_train, quantile_models[q].predict(X_train), label=f"Quantile {int(q * 100)}")
plt.plot(X_train, rmse_predictions, label="RMSE Prediction", color="black", linestyle="--")
plt.xlabel("X")
plt.ylabel("Y")
plt.title("Quantile Regression and Confidence with XGBoost")
plt.legend()
plt.show()

0 comments on commit f1c3f15

Please sign in to comment.