mervenoyan commited on
Commit
9541eae
Β·
1 Parent(s): ac70dee

initial commit

Browse files
Files changed (1) hide show
  1. app.py +126 -0
app.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ from huggingface_hub.hf_api import create_repo, upload_folder, upload_file
4
+ from huggingface_hub.repository import Repository
5
+ import subprocess
6
+ import os
7
+ import tempfile
8
+ from uuid import uuid4
9
+ import pickle
10
+ import sweetviz as sv
11
+ import dabl
12
+ import re
13
+
14
+
15
+ def analyze_datasets(dataset, dataset_name, username, token, column=None, pairwise="off"):
16
+ df = pd.read_csv(dataset.name)
17
+ if column is not None:
18
+ analyze_report = sv.analyze(df, target_feat=column, pairwise_analysis=pairwise)
19
+ else:
20
+ analyze_report = sv.analyze(df, pairwise_analysis=pairwise)
21
+ analyze_report.show_html('index.html', open_browser=False)
22
+ repo_url = create_repo(f"{username}/{dataset_name}", repo_type = "space", token = token, space_sdk = "static", private=False)
23
+
24
+ upload_file(path_or_fileobj ="./index.html", path_in_repo = "index.html", repo_id =f"{username}/{dataset_name}", repo_type = "space", token=token)
25
+ readme = f"---\ntitle: {dataset_name}\nemoji: ✨\ncolorFrom: green\ncolorTo: red\nsdk: static\npinned: false\ntags:\n- dataset-report\n---"
26
+ with open("README.md", "w+") as f:
27
+ f.write(readme)
28
+ upload_file(path_or_fileobj ="./README.md", path_in_repo = "README.md", repo_id =f"{username}/{dataset_name}", repo_type = "space", token=token)
29
+
30
+ return f"Your dataset report will be ready at {repo_url}"
31
+
32
+
33
+ from sklearn.utils import estimator_html_repr
34
+
35
+
36
+ def extract_estimator_config(model):
37
+ hyperparameter_dict = model.get_params(deep=True)
38
+ table = "| Hyperparameters | Value |\n| :-- | :-- |\n"
39
+ for hyperparameter, value in hyperparameter_dict.items():
40
+ table += f"| {hyperparameter} | {value} |\n"
41
+ return table
42
+
43
+
44
+ def train_baseline(dataset, username, dataset_name, token, column):
45
+ df = pd.read_csv(dataset.name)
46
+ fc = dabl.SimpleClassifier(random_state=0)
47
+ df_clean = dabl.clean(df)
48
+ X = df_clean.drop(column, axis = 1)
49
+ y = df_clean[column]
50
+ with tempfile.TemporaryDirectory() as tmpdirname:
51
+ from contextlib import redirect_stdout
52
+
53
+ with open('logs.txt', 'w') as f:
54
+ with redirect_stdout(f):
55
+ print('Logging training')
56
+ fc.fit(X, y)
57
+ repo_url = create_repo(repo_id = f"{username}/{dataset_name}", token = token)
58
+
59
+ readme = f"---\nlicense: apache-2.0\nlibrary_name: sklearn\n---\n\n"
60
+ readme += f"## Baseline Model trained on {dataset_name} to predict {column}\n\n"
61
+ readme+="Metrics of the best model:\n\n"
62
+ for elem in str(fc.current_best_).split("\n"):
63
+ readme+= f"{elem}\n\n"
64
+ readme+= "\n\nSee model plot below:\n\n"
65
+ readme+= re.sub(r"\n\s+", "", str(estimator_html_repr(fc.est_)))
66
+
67
+ with open(f"{tmpdirname}/README.md", "w+") as f:
68
+ f.write(readme)
69
+ with open(f"{tmpdirname}/clf.pkl", mode="bw") as f:
70
+ pickle.dump(fc, file=f)
71
+ upload_folder(repo_id =f"{username}/{dataset_name}", folder_path=tmpdirname, repo_type = "model", token=token, path_in_repo="./")
72
+
73
+ return f"Your model will be ready at {repo_url}"
74
+
75
+
76
+
77
+ with gr.Blocks() as demo:
78
+ main_title = gr.Markdown("""# Baseline Trainer πŸͺ„πŸŒŸβœ¨""")
79
+ main_desc = gr.Markdown("""This app trains a baseline model for a given dataset and pushes it to your Hugging Face Hub Profile with a model card.""")
80
+
81
+
82
+ with gr.Tabs():
83
+ with gr.TabItem("Baseline Trainer") as baseline_trainer:
84
+ with gr.Row():
85
+ with gr.Column():
86
+ title = gr.Markdown(""" ## Train a supervised baseline model""")
87
+ description = gr.Markdown("This app trains a model and pushes it to your Hugging Face Hub Profile.")
88
+ dataset = gr.File(label = "Dataset")
89
+ column = gr.Text(label = "Enter target variable:")
90
+ dataset_name = gr.Text(label = "Enter dataset name:")
91
+ pushing_desc = gr.Markdown("This app needs your Hugging Face Hub user name, token and a unique name for your dataset report.")
92
+ token = gr.Textbox(label = "Your Hugging Face Token")
93
+ username = gr.Textbox(label = "Your Hugging Face User Name")
94
+ inference_run = gr.Button("Train")
95
+ inference_progress = gr.StatusTracker(cover_container=True)
96
+
97
+ outcome = gr.outputs.Textbox(label = "Progress")
98
+ inference_run.click(
99
+ train_baseline,
100
+ inputs=[dataset, username, dataset_name, token, column],
101
+ outputs=outcome,
102
+ status_tracker=inference_progress,
103
+ )
104
+ with gr.TabItem("Analyze") as analyze:
105
+ with gr.Row():
106
+ with gr.Column():
107
+ title = gr.Markdown(""" ## Analyze Dataset """)
108
+ description = gr.Markdown("Analyze a dataset or predictive variables against a target variable in a dataset (enter a column name to column section if you want to compare against target value). You can also do pairwise analysis, but it has quadratic complexity.")
109
+ dataset = gr.File(label = "Dataset")
110
+ column = gr.Text(label = "Compare dataset against a target variable (Optional)")
111
+ pairwise = gr.Radio(["off", "on"], label = "Enable pairwise analysis")
112
+ token = gr.Textbox(label = "Your Hugging Face Token")
113
+ username = gr.Textbox(label = "Your Hugging Face User Name")
114
+ dataset_name = gr.Textbox(label = "Dataset Name")
115
+ pushing_desc = gr.Markdown("This app needs your Hugging Face Hub user name, token and a unique name for your dataset report.")
116
+ inference_run = gr.Button("Infer")
117
+ inference_progress = gr.StatusTracker(cover_container=True)
118
+ outcome = gr.outputs.Textbox()
119
+ inference_run.click(
120
+ analyze_datasets,
121
+ inputs=[dataset, dataset_name, username, token, column, pairwise],
122
+ outputs=outcome,
123
+ status_tracker=inference_progress,
124
+ )
125
+
126
+ demo.launch(debug=True)