compare-bayesian-regressors

Runtime error

App Files Files Community

NTaylor commited on Apr 27, 2023

Commit

06870e1

0 Parent(s):

Duplicate from NTaylor/compare-bayesian-regressors

Browse files

Files changed (4) hide show

.gitattributes +34 -0
README.md +13 -0
app.py +295 -0
requirements.txt +4 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,34 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,13 @@

+---
+title: Compare Bayesian Regressors
+emoji: 🐨
+colorFrom: yellow
+colorTo: pink
+sdk: gradio
+sdk_version: 3.27.0
+app_file: app.py
+pinned: false
+duplicated_from: NTaylor/compare-bayesian-regressors
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,295 @@

+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import PolynomialFeatures, StandardScaler
+import numpy as np
+from sklearn.datasets import make_regression
+import pandas as pd
+from sklearn.linear_model import ARDRegression, LinearRegression, BayesianRidge
+import matplotlib.pyplot as plt
+from matplotlib.colors import SymLogNorm
+import gradio as gr
+import seaborn as sns
+X, y, true_weights = make_regression(
+    n_samples=100,
+    n_features=100,
+    n_informative=10,
+    noise=8,
+    coef=True,
+    random_state=42,
+)
+# Fit the regressors
+# ------------------
+#
+# We now fit both Bayesian models and the OLS to later compare the models'
+# coefficients.
+def fit_regression_models(n_iter=30, X=X, y=y, true_weights=true_weights):
+    olr = LinearRegression().fit(X, y)
+    print(f"inside fit_regression n_iter={n_iter}")
+    brr = BayesianRidge(compute_score=True, n_iter=n_iter).fit(X, y)
+    ard = ARDRegression(compute_score=True, n_iter=n_iter).fit(X, y)
+    df = pd.DataFrame(
+        {
+            "Weights of true generative process": true_weights,
+            "ARDRegression": ard.coef_,
+            "BayesianRidge": brr.coef_,
+            "LinearRegression": olr.coef_,
+        }
+    )
+    return df, olr, brr, ard
+# %%
+# Plot the true and estimated coefficients
+# ----------------------------------------
+#
+# Now we compare the coefficients of each model with the weights of
+# the true generative model.
+def visualize_coefficients(df=None):
+    fig = plt.figure(figsize=(10, 6))
+    ax = sns.heatmap(
+        df.T,
+        norm=SymLogNorm(linthresh=10e-4, vmin=-80, vmax=80),
+        cbar_kws={"label": "coefficients' values"},
+        cmap="seismic_r",
+    )
+    plt.ylabel("linear model")
+    plt.xlabel("coefficients")
+    plt.tight_layout(rect=(0, 0, 1, 0.95))
+    _ = plt.title("Models' coefficients")
+    return fig
+# %%
+# Due to the added noise, none of the models recover the true weights. Indeed,
+# all models always have more than 10 non-zero coefficients. Compared to the OLS
+# estimator, the coefficients using a Bayesian Ridge regression are slightly
+# shifted toward zero, which stabilises them. The ARD regression provides a
+# sparser solution: some of the non-informative coefficients are set exactly to
+# zero, while shifting others closer to zero. Some non-informative coefficients
+# are still present and retain large values.
+# %%
+# Plot the marginal log-likelihood
+# --------------------------------
+def plot_marginal_log_likelihood(ard=None, brr=None, n_iter=30):
+    fig = plt.figure(figsize=(10, 6))
+    ard_scores = -np.array(ard.scores_)
+    brr_scores = -np.array(brr.scores_)
+    # print(f"ard_scores = {ard_scores}")
+    # print(f"brr_scores = {brr_scores}")
+    plt.plot(ard_scores, color="navy", label="ARD")
+    plt.plot(brr_scores, color="red", label="BayesianRidge")
+    plt.ylabel("Log-likelihood")
+    plt.xlabel("Iterations")
+    plt.xlim(1, n_iter)
+    plt.legend()
+    _ = plt.title("Models log-likelihood")
+    print("fig inside plot marginal = ", fig)
+    return fig
+def make_regression_comparison_plot(n_iter=30):
+    # print(f"n_iter = {n_iter}")
+    # fit models
+    df, olr, brr, ard = fit_regression_models(n_iter=n_iter, X=X, y=y, true_weights=true_weights)
+    # print(f"df = {df}")
+    # get figure
+    fig = visualize_coefficients(df=df)
+    return fig
+def make_log_likelihood_plot(n_iter=30):
+    # print(f"n_iter = {n_iter}")
+    # fit models
+    df, olr, brr, ard = fit_regression_models(n_iter=n_iter, X=X, y=y, true_weights=true_weights)
+    # print(f"df = {df}")
+    # get figure
+    fig = plot_marginal_log_likelihood(ard=ard, brr=brr, n_iter=n_iter)
+    print(f"fig = {fig}")
+    return fig
+    # visualize coefficients
+# # %%
+# # Indeed, both models minimize the log-likelihood up to an arbitrary cutoff
+# # defined by the `n_iter` parameter.
+# #
+# # Bayesian regressions with polynomial feature expansion
+# # ======================================================
+# Generate synthetic dataset
+# --------------------------
+# We create a target that is a non-linear function of the input feature.
+# Noise following a standard uniform distribution is added.
+rng = np.random.RandomState(0)
+n_samples = 110
+# sort the data to make plotting easier later
+g_X = np.sort(-10 * rng.rand(n_samples) + 10)
+noise = rng.normal(0, 1, n_samples) * 1.35
+g_y = np.sqrt(g_X) * np.sin(g_X) + noise
+full_data = pd.DataFrame({"input_feature": g_X, "target": g_y})
+g_X = g_X.reshape((-1, 1))
+# extrapolation
+X_plot = np.linspace(10, 10.4, 10)
+y_plot = np.sqrt(X_plot) * np.sin(X_plot)
+X_plot = np.concatenate((g_X, X_plot.reshape((-1, 1))))
+y_plot = np.concatenate((g_y - noise, y_plot))
+# %%
+# Fit the regressors
+# ------------------
+#
+# Here we try a degree 10 polynomial to potentially overfit, though the bayesian
+# linear models regularize the size of the polynomial coefficients. As
+# `fit_intercept=True` by default for
+# :class:`~sklearn.linear_model.ARDRegression` and
+# :class:`~sklearn.linear_model.BayesianRidge`, then
+# :class:`~sklearn.preprocessing.PolynomialFeatures` should not introduce an
+# additional bias feature. By setting `return_std=True`, the bayesian regressors
+# return the standard deviation of the posterior distribution for the model
+# parameters.
+#TODO - make this function that can be adapted with the gr.slider
+def generate_polynomial_dataset(degree = 10):
+    ard_poly = make_pipeline(
+        PolynomialFeatures(degree=degree, include_bias=False),
+        StandardScaler(),
+        ARDRegression(),
+    ).fit(g_X, g_y)
+    brr_poly = make_pipeline(
+        PolynomialFeatures(degree=degree, include_bias=False),
+        StandardScaler(),
+        BayesianRidge(),
+    ).fit(g_X, g_y)
+    y_ard, y_ard_std = ard_poly.predict(X_plot, return_std=True)
+    y_brr, y_brr_std = brr_poly.predict(X_plot, return_std=True)
+    return y_ard, y_ard_std, y_brr, y_brr_std
+# %%
+# Plotting polynomial regressions with std errors of the scores
+# -------------------------------------------------------------
+def visualize_bayes_regressions_polynomial_features(degree = 10):
+    #TODO - get data dynamically from the gr.slider
+    y_ard, y_ard_std, y_brr, y_brr_std = generate_polynomial_dataset(degree)
+    fig = plt.figure(figsize=(10, 6))
+    ax = sns.scatterplot(
+        data=full_data, x="input_feature", y="target", color="black", alpha=0.75)
+    ax.plot(X_plot, y_plot, color="black", label="Ground Truth")
+    ax.plot(X_plot, y_brr, color="red", label="BayesianRidge with polynomial features")
+    ax.plot(X_plot, y_ard, color="navy", label="ARD with polynomial features")
+    ax.fill_between(
+        X_plot.ravel(),
+        y_ard - y_ard_std,
+        y_ard + y_ard_std,
+        color="navy",
+        alpha=0.3,
+    )
+    ax.fill_between(
+        X_plot.ravel(),
+        y_brr - y_brr_std,
+        y_brr + y_brr_std,
+        color="red",
+        alpha=0.3,
+    )
+    ax.legend()
+    _ = ax.set_title("Polynomial fit of a non-linear feature")
+    # print(f"ax = {ax}")
+    return fig
+# def make_polynomial_comparison_plot():
+#     return fig
+title = " Illustration of Comparing Linear Bayesian Regressors with synthetic data"
+with gr.Blocks(title=title) as demo:
+    gr.Markdown(f"# {title}")
+    gr.Markdown(""" This example shows a comparison of two different bayesian regressors:
+        Automatic Relevance Determination - ARD see [sklearn-docs](https://scikit-learn.org/stable/modules/linear_model.html#automatic-relevance-determination)
+         Bayesian Ridge Regression -  see [sklearn-docs](https://scikit-learn.org/stable/modules/linear_model.html#bayesian-ridge-regression)
+        The tutorial is split into sections, with the first comparing model coeffecients produced by Ordinary Least Squares (OLS), Bayesian Ridge Regression, and ARD with the known true coefficients. For this
+        We generated a dataset where X and y are linearly linked: 10 of the features of X will be used to generate y. The other features are not useful at predicting y.
+        n addition, we generate a dataset where n_samples == n_features. Such a setting is challenging for an OLS model and leads potentially to arbitrary large weights.
+        Having a prior on the weights and a penalty alleviates the problem. Finally, gaussian noise is added.
+        For the final tab, we investigate bayesian regressors with polynomial features and generate an additional dataset where the target is a non-linear function of the input feature, with
+        added noise following a standard uniform distribution.
+     For further details please see the sklearn docs:
+    """)
+    gr.Markdown(" **[Demo is based on sklearn docs found here](https://scikit-learn.org/stable/auto_examples/linear_model/plot_ard.html#sphx-glr-auto-examples-linear-model-plot-ard-py)** <br>")
+    with gr.Tab("# Plot true and estimated coefficients"):
+        with gr.Row():
+            n_iter = gr.Slider(value=5, minimum=5, maximum=50, step=1, label="n_iterations")
+        btn = gr.Button(value="Plot true and estimated coefficients")
+        btn.click(make_regression_comparison_plot, inputs = [n_iter], outputs= gr.Plot(label='Plot true and estimated coefficients') )
+        gr.Markdown(
+        """
+        # Details
+         One can observe that with the added noise, none of the models can perfectly recover the coefficients of the original model. All models have more thab 10 non-zero coefficients,
+        where only 10 are useful. The Bayesian Ridge Regression manages to recover most of the coefficients, while the ARD is more conservative.
+        """)
+    with gr.Tab("# Plot marginal log likelihoods"):
+        with gr.Row():
+            n_iter = gr.Slider(value=5, minimum=5, maximum=50, step=1, label="n_iterations")
+        btn = gr.Button(value="Plot marginal log likelihoods")
+        btn.click(make_log_likelihood_plot, inputs = [n_iter], outputs= gr.Plot(label='Plot marginal log likelihoods') )
+        gr.Markdown(
+        """
+        # Confirm with marginal log likelihoods
+        Both ARD and Bayesian Ridge minimized the log-likelihood upto an arbitrary cuttoff defined the the n_iter parameter.
+        """
+        )
+    with gr.Tab("# Plot bayesian regression with polynomial features"):
+        with gr.Row():
+            degree = gr.Slider(value=5, minimum=5, maximum=50, step=1, label="n_degrees")
+        btn = gr.Button(value="Plot bayesian regression with polynomial features")
+        btn.click(visualize_bayes_regressions_polynomial_features, inputs = [degree], outputs= gr.Plot(label='Plot bayesian regression with polynomial features') )
+        gr.Markdown(
+        """
+        # Details
+        Here we try a degree 10 polynomial to potentially overfit, though the bayesian linear models regularize the size of the polynomial coefficients.
+        As fit_intercept=True by default for ARDRegression and BayesianRidge, then PolynomialFeatures should not introduce an additional bias feature. By setting return_std=True,
+        the bayesian regressors return the standard deviation of the posterior distribution for the model parameters.
+        """)
+demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+scikit-learn==1.2.2
+matplotlib==3.5.1
+numpy==1.21.6
+seaborn==0.11.2