|
import streamlit as st |
|
import tensorflow as tf |
|
from sklearn import datasets |
|
from sklearn.model_selection import train_test_split |
|
import numpy as np |
|
import matplotlib.pyplot as plt |
|
from tensorflow.keras.layers import Dense |
|
from tensorflow.keras.models import Sequential |
|
from sklearn.preprocessing import OneHotEncoder |
|
from sklearn.metrics import mean_squared_error |
|
|
|
|
|
def soft_quantized_influence_measure(y_true, y_pred, threshold=0.1): |
|
y_global_mean = tf.reduce_mean(y_true) |
|
y_std = tf.math.reduce_std(y_true) |
|
error = y_true - y_pred |
|
abs_error = tf.abs(error) |
|
is_small_error = abs_error <= threshold |
|
n1 = tf.reduce_sum(tf.cast(is_small_error, tf.float32)) |
|
n2 = tf.reduce_sum(tf.cast(~is_small_error, tf.float32)) |
|
true_error_loss = tf.square(error) * n1 ** 2 |
|
false_error_loss = tf.square(error) * n2 ** 2 |
|
final = tf.where(is_small_error, true_error_loss, false_error_loss) |
|
final = tf.reduce_mean(final) / (tf.square(y_std) ** 2) |
|
return final |
|
|
|
|
|
def create_datasets(n_samples=1500): |
|
noisy_circles = datasets.make_circles(n_samples=n_samples, factor=0.5, noise=0.05, random_state=170) |
|
noisy_moons = datasets.make_moons(n_samples=n_samples, noise=0.05, random_state=170) |
|
blobs = datasets.make_blobs(n_samples=n_samples, random_state=170) |
|
rng = np.random.RandomState(170) |
|
no_structure = rng.rand(n_samples, 2), None |
|
X, y = datasets.make_blobs(n_samples=n_samples, random_state=170) |
|
transformation = [[0.6, -0.6], [-0.4, 0.8]] |
|
X_aniso = np.dot(X, transformation) |
|
aniso = (X_aniso, y) |
|
return [noisy_circles, noisy_moons, blobs, no_structure, aniso] |
|
|
|
|
|
def create_model(input_shape): |
|
model = Sequential([Dense(1, input_shape=input_shape, activation='sigmoid')]) |
|
return model |
|
|
|
|
|
def main(): |
|
st.title("Classification Data and Model Training Visualization") |
|
|
|
|
|
datasets = create_datasets() |
|
dataset_names = ["Noisy Circles", "Noisy Moons", "Blobs", "No Structure", "Anisotropic"] |
|
|
|
|
|
selected_dataset = st.selectbox("Select a Dataset", options=dataset_names) |
|
|
|
|
|
fig, axs = plt.subplots(3, len(datasets), figsize=(15, 9)) |
|
|
|
|
|
encoder = OneHotEncoder(sparse=False) |
|
|
|
for i, dataset in enumerate(datasets): |
|
X, y = dataset |
|
if y is not None: |
|
y = encoder.fit_transform(y.reshape(-1, 1)) |
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) |
|
|
|
|
|
if y is not None: |
|
axs[0, i].scatter(X[:, 0], X[:, 1], c=np.argmax(y, axis=1)) |
|
else: |
|
axs[0, i].scatter(X[:, 0], X[:, 1]) |
|
axs[0, i].set_title(dataset_names[i]) |
|
|
|
|
|
model = create_model((2,)) |
|
|
|
|
|
model.compile(optimizer='sgd', loss='mean_squared_error') |
|
history_mse = model.fit(X_train, y_train, validation_split=0.2, epochs=20, verbose=0) |
|
|
|
|
|
model.compile(optimizer='sgd', loss=lambda y_true, y_pred: soft_quantized_influence_measure(y_true, y_pred, 0.1)) |
|
history_sqim = model.fit(X_train, y_train, validation_split=0.2, epochs=20, verbose=0) |
|
|
|
|
|
axs[1, i].plot |
|
|