import hashlib import json import os import random import uuid from datetime import datetime from pathlib import Path import gradio as gr from huggingface_hub import CommitScheduler, get_token, login from openai import OpenAI from prompts import basic_prompt, detailed_genre_description_prompt from theme import TufteInspired # Ensure you're logged in to Hugging Face login(os.getenv("HF_TOKEN")) # Define available models MODELS = [ "meta-llama/Meta-Llama-3-70B-Instruct", "mistralai/Mixtral-8x7B-Instruct-v0.1", "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", ] def get_random_model(): return random.choice(MODELS) def create_client(model_id): return OpenAI( base_url=f"https://api-inference.huggingface.co/models/{model_id}/v1", api_key=get_token(), ) # Set up dataset storage dataset_folder = Path("dataset") dataset_folder.mkdir(exist_ok=True) # Function to get the latest dataset file def get_latest_dataset_file(): files = list(dataset_folder.glob("data_*.jsonl")) return max(files, key=os.path.getctime) if files else None # Check for existing dataset and create or append to it if latest_file := get_latest_dataset_file(): dataset_file = latest_file print(f"Appending to existing dataset file: {dataset_file}") else: dataset_file = dataset_folder / f"data_{uuid.uuid4()}.jsonl" print(f"Creating new dataset file: {dataset_file}") # Set up CommitScheduler for dataset uploads repo_id = "davanstrien/summer-reading-preferences" scheduler = CommitScheduler( repo_id=repo_id, repo_type="dataset", folder_path=dataset_folder, path_in_repo="data", every=1, # Upload every minute ) # Global dictionary to store votes votes = {} def generate_prompt(): if random.choice([True, False]): return detailed_genre_description_prompt() else: return basic_prompt() def get_and_store_prompt(): prompt = generate_prompt() print(prompt) # Keep this for debugging return prompt def generate_blurb(prompt): model_id = get_random_model() client = create_client(model_id) max_tokens = random.randint(100, 1000) chat_completion = client.chat.completions.create( model="tgi", messages=[ {"role": "user", "content": prompt}, ], stream=True, max_tokens=max_tokens, ) full_text = "" for message in chat_completion: full_text += message.choices[0].delta.content yield full_text, model_id return full_text, model_id # Return final result with model_id def generate_vote_id(user_id, blurb): return hashlib.md5(f"{user_id}:{blurb}".encode()).hexdigest() def log_blurb_and_vote( prompt, blurb, vote, user_info: gr.OAuthProfile | None, model_id, *args ): user_id = user_info.username if user_info is not None else str(uuid.uuid4()) vote_id = generate_vote_id(user_id, blurb) if vote_id in votes: gr.Info("You've already voted on this blurb!") return None, gr.Row.update(visible=False) votes[vote_id] = vote log_entry = { "timestamp": datetime.now().isoformat(), "prompt": prompt, "blurb": blurb, "vote": vote, "user_id": user_id, "model_id": model_id, } with scheduler.lock: with dataset_file.open("a") as f: f.write(json.dumps(log_entry) + "\n") gr.Info("Thank you for voting! Your feedback will be synced to the dataset.") return f"Logged: {vote} by user {user_id}", gr.Row.update(visible=False) # Create custom theme tufte_theme = TufteInspired() # Create Gradio interface with gr.Blocks(theme=tufte_theme) as demo: gr.Markdown("
Looking for your next summer read?
Would you read a book based on this LLM generated blurb?
Your vote will be added to this Hugging Face dataset