Spaces:
Sleeping
Sleeping
# streamlit_app.py | |
import streamlit as st | |
import os | |
import pandas as pd | |
from syngen.ml.worker import Worker | |
import queue | |
from loguru import logger | |
import threading | |
# Use streamlit to run the application | |
if __name__ == "__main__": | |
st.title("HuggingFace Streamlit App with Syngen") | |
st.write("Upload CSV files, define relationships, and train your model.") | |
# Create a queue for the logs | |
log_queue = queue.Queue() | |
def log_sink(message): | |
log_queue.put(message.record["message"]) | |
logger.add(log_sink) | |
# Path to store the uploaded files | |
UPLOAD_DIRECTORY = "uploaded_files" | |
if not os.path.exists(UPLOAD_DIRECTORY): | |
os.makedirs(UPLOAD_DIRECTORY) | |
# Define file uploader | |
uploaded_files = st.file_uploader( | |
"Upload CSV files", type="csv", accept_multiple_files=True) | |
dataframes = {} | |
if uploaded_files: | |
for uploaded_file in uploaded_files: | |
# Save file to local directory | |
file_path = os.path.join(UPLOAD_DIRECTORY, uploaded_file.name) | |
with open(file_path, 'wb') as f: | |
f.write(uploaded_file.getvalue()) | |
df = pd.read_csv(file_path) | |
dataframes[uploaded_file.name] = df | |
st.write(f"Preview of {uploaded_file.name}:", df.head()) | |
# YAML Configuration Editor | |
st.subheader('YAML Configuration') | |
yaml_config = st.text_area( | |
"Define the relationships between the CSV files:", "") | |
if yaml_config: | |
st.code(yaml_config, language="yaml") | |
def train_model(): | |
logger.info("Starting model training...") | |
for uploaded_file in uploaded_files: | |
file_path = os.path.join(UPLOAD_DIRECTORY, uploaded_file.name) | |
settings = { | |
"source": file_path, | |
"epochs": 2, | |
"drop_null": False, | |
"print_report": False, | |
"row_limit": None, | |
"batch_size": 32 | |
} | |
worker = Worker( | |
table_name=uploaded_file.name, | |
settings=settings, | |
metadata_path=None, | |
log_level='DEBUG', | |
type="train" | |
) | |
worker.launch_train() | |
logger.info("Model training completed.") | |
# Training Button | |
if st.button('Start Model Training'): | |
if uploaded_files and yaml_config: | |
# 1. Save YAML configuration to a file | |
with open("config.yaml", "w") as f: | |
f.write(yaml_config) | |
log_display = st.empty() # create an empty slot to display logs | |
training_thread = threading.Thread(target=train_model) | |
training_thread.start() | |
st.info("Training started. Please wait...") | |
while not log_queue.empty(): | |
log_display.text(log_queue.get()) | |
# 3. Save the model binaries | |
# model.save("model.bin") | |
# 4. Display links to download files | |
st.markdown( | |
"Download [Generated File](./generated_file.csv) and [Model Binaries](./model.bin).") | |
else: | |
st.warning( | |
"Please upload CSV files and provide a valid YAML configuration.") | |