Spaces:
Running
Running
mervenoyan
commited on
Commit
Β·
9541eae
1
Parent(s):
ac70dee
initial commit
Browse files
app.py
ADDED
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import pandas as pd
|
3 |
+
from huggingface_hub.hf_api import create_repo, upload_folder, upload_file
|
4 |
+
from huggingface_hub.repository import Repository
|
5 |
+
import subprocess
|
6 |
+
import os
|
7 |
+
import tempfile
|
8 |
+
from uuid import uuid4
|
9 |
+
import pickle
|
10 |
+
import sweetviz as sv
|
11 |
+
import dabl
|
12 |
+
import re
|
13 |
+
|
14 |
+
|
15 |
+
def analyze_datasets(dataset, dataset_name, username, token, column=None, pairwise="off"):
|
16 |
+
df = pd.read_csv(dataset.name)
|
17 |
+
if column is not None:
|
18 |
+
analyze_report = sv.analyze(df, target_feat=column, pairwise_analysis=pairwise)
|
19 |
+
else:
|
20 |
+
analyze_report = sv.analyze(df, pairwise_analysis=pairwise)
|
21 |
+
analyze_report.show_html('index.html', open_browser=False)
|
22 |
+
repo_url = create_repo(f"{username}/{dataset_name}", repo_type = "space", token = token, space_sdk = "static", private=False)
|
23 |
+
|
24 |
+
upload_file(path_or_fileobj ="./index.html", path_in_repo = "index.html", repo_id =f"{username}/{dataset_name}", repo_type = "space", token=token)
|
25 |
+
readme = f"---\ntitle: {dataset_name}\nemoji: β¨\ncolorFrom: green\ncolorTo: red\nsdk: static\npinned: false\ntags:\n- dataset-report\n---"
|
26 |
+
with open("README.md", "w+") as f:
|
27 |
+
f.write(readme)
|
28 |
+
upload_file(path_or_fileobj ="./README.md", path_in_repo = "README.md", repo_id =f"{username}/{dataset_name}", repo_type = "space", token=token)
|
29 |
+
|
30 |
+
return f"Your dataset report will be ready at {repo_url}"
|
31 |
+
|
32 |
+
|
33 |
+
from sklearn.utils import estimator_html_repr
|
34 |
+
|
35 |
+
|
36 |
+
def extract_estimator_config(model):
|
37 |
+
hyperparameter_dict = model.get_params(deep=True)
|
38 |
+
table = "| Hyperparameters | Value |\n| :-- | :-- |\n"
|
39 |
+
for hyperparameter, value in hyperparameter_dict.items():
|
40 |
+
table += f"| {hyperparameter} | {value} |\n"
|
41 |
+
return table
|
42 |
+
|
43 |
+
|
44 |
+
def train_baseline(dataset, username, dataset_name, token, column):
|
45 |
+
df = pd.read_csv(dataset.name)
|
46 |
+
fc = dabl.SimpleClassifier(random_state=0)
|
47 |
+
df_clean = dabl.clean(df)
|
48 |
+
X = df_clean.drop(column, axis = 1)
|
49 |
+
y = df_clean[column]
|
50 |
+
with tempfile.TemporaryDirectory() as tmpdirname:
|
51 |
+
from contextlib import redirect_stdout
|
52 |
+
|
53 |
+
with open('logs.txt', 'w') as f:
|
54 |
+
with redirect_stdout(f):
|
55 |
+
print('Logging training')
|
56 |
+
fc.fit(X, y)
|
57 |
+
repo_url = create_repo(repo_id = f"{username}/{dataset_name}", token = token)
|
58 |
+
|
59 |
+
readme = f"---\nlicense: apache-2.0\nlibrary_name: sklearn\n---\n\n"
|
60 |
+
readme += f"## Baseline Model trained on {dataset_name} to predict {column}\n\n"
|
61 |
+
readme+="Metrics of the best model:\n\n"
|
62 |
+
for elem in str(fc.current_best_).split("\n"):
|
63 |
+
readme+= f"{elem}\n\n"
|
64 |
+
readme+= "\n\nSee model plot below:\n\n"
|
65 |
+
readme+= re.sub(r"\n\s+", "", str(estimator_html_repr(fc.est_)))
|
66 |
+
|
67 |
+
with open(f"{tmpdirname}/README.md", "w+") as f:
|
68 |
+
f.write(readme)
|
69 |
+
with open(f"{tmpdirname}/clf.pkl", mode="bw") as f:
|
70 |
+
pickle.dump(fc, file=f)
|
71 |
+
upload_folder(repo_id =f"{username}/{dataset_name}", folder_path=tmpdirname, repo_type = "model", token=token, path_in_repo="./")
|
72 |
+
|
73 |
+
return f"Your model will be ready at {repo_url}"
|
74 |
+
|
75 |
+
|
76 |
+
|
77 |
+
with gr.Blocks() as demo:
|
78 |
+
main_title = gr.Markdown("""# Baseline Trainer πͺπβ¨""")
|
79 |
+
main_desc = gr.Markdown("""This app trains a baseline model for a given dataset and pushes it to your Hugging Face Hub Profile with a model card.""")
|
80 |
+
|
81 |
+
|
82 |
+
with gr.Tabs():
|
83 |
+
with gr.TabItem("Baseline Trainer") as baseline_trainer:
|
84 |
+
with gr.Row():
|
85 |
+
with gr.Column():
|
86 |
+
title = gr.Markdown(""" ## Train a supervised baseline model""")
|
87 |
+
description = gr.Markdown("This app trains a model and pushes it to your Hugging Face Hub Profile.")
|
88 |
+
dataset = gr.File(label = "Dataset")
|
89 |
+
column = gr.Text(label = "Enter target variable:")
|
90 |
+
dataset_name = gr.Text(label = "Enter dataset name:")
|
91 |
+
pushing_desc = gr.Markdown("This app needs your Hugging Face Hub user name, token and a unique name for your dataset report.")
|
92 |
+
token = gr.Textbox(label = "Your Hugging Face Token")
|
93 |
+
username = gr.Textbox(label = "Your Hugging Face User Name")
|
94 |
+
inference_run = gr.Button("Train")
|
95 |
+
inference_progress = gr.StatusTracker(cover_container=True)
|
96 |
+
|
97 |
+
outcome = gr.outputs.Textbox(label = "Progress")
|
98 |
+
inference_run.click(
|
99 |
+
train_baseline,
|
100 |
+
inputs=[dataset, username, dataset_name, token, column],
|
101 |
+
outputs=outcome,
|
102 |
+
status_tracker=inference_progress,
|
103 |
+
)
|
104 |
+
with gr.TabItem("Analyze") as analyze:
|
105 |
+
with gr.Row():
|
106 |
+
with gr.Column():
|
107 |
+
title = gr.Markdown(""" ## Analyze Dataset """)
|
108 |
+
description = gr.Markdown("Analyze a dataset or predictive variables against a target variable in a dataset (enter a column name to column section if you want to compare against target value). You can also do pairwise analysis, but it has quadratic complexity.")
|
109 |
+
dataset = gr.File(label = "Dataset")
|
110 |
+
column = gr.Text(label = "Compare dataset against a target variable (Optional)")
|
111 |
+
pairwise = gr.Radio(["off", "on"], label = "Enable pairwise analysis")
|
112 |
+
token = gr.Textbox(label = "Your Hugging Face Token")
|
113 |
+
username = gr.Textbox(label = "Your Hugging Face User Name")
|
114 |
+
dataset_name = gr.Textbox(label = "Dataset Name")
|
115 |
+
pushing_desc = gr.Markdown("This app needs your Hugging Face Hub user name, token and a unique name for your dataset report.")
|
116 |
+
inference_run = gr.Button("Infer")
|
117 |
+
inference_progress = gr.StatusTracker(cover_container=True)
|
118 |
+
outcome = gr.outputs.Textbox()
|
119 |
+
inference_run.click(
|
120 |
+
analyze_datasets,
|
121 |
+
inputs=[dataset, dataset_name, username, token, column, pairwise],
|
122 |
+
outputs=outcome,
|
123 |
+
status_tracker=inference_progress,
|
124 |
+
)
|
125 |
+
|
126 |
+
demo.launch(debug=True)
|