davanstrien's picture
davanstrien HF staff
chore: Modify log_blurb_and_vote function to handle already voted responses
98127fe
raw
history blame
6.47 kB
import gradio as gr
import json
from datetime import datetime
from theme import TufteInspired
import uuid
from huggingface_hub import InferenceClient, CommitScheduler, hf_hub_download
from openai import OpenAI
from huggingface_hub import get_token, login
from prompts import detailed_genre_description_prompt, basic_prompt
import random
import os
from pathlib import Path
# Ensure you're logged in to Hugging Face
login(get_token())
# Define available models
MODELS = [
"meta-llama/Meta-Llama-3-70B-Instruct",
"mistralai/Mixtral-8x7B-Instruct-v0.1",
"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
]
# Set up dataset storage
dataset_folder = Path("dataset")
dataset_folder.mkdir(exist_ok=True)
# Function to get the latest dataset file
def get_latest_dataset_file():
files = list(dataset_folder.glob("data_*.jsonl"))
return max(files, key=os.path.getctime) if files else None
# Check for existing dataset and create or append to it
latest_file = get_latest_dataset_file()
if latest_file:
dataset_file = latest_file
print(f"Appending to existing dataset file: {dataset_file}")
else:
dataset_file = dataset_folder / f"data_{uuid.uuid4()}.jsonl"
print(f"Creating new dataset file: {dataset_file}")
# Set up CommitScheduler for dataset uploads
repo_id = (
"davanstrien/summer-reading-preference" # Replace with your desired dataset repo
)
scheduler = CommitScheduler(
repo_id=repo_id,
repo_type="dataset",
folder_path=dataset_folder,
path_in_repo="data",
every=1, # Upload every 5 minutes
)
# Function to download existing dataset files
def download_existing_dataset():
try:
files = hf_hub_download(
repo_id=repo_id, filename="data", repo_type="dataset", recursive=True
)
for file in Path(files).glob("*.jsonl"):
dest_file = dataset_folder / file.name
if not dest_file.exists():
dest_file.write_bytes(file.read_bytes())
print(f"Downloaded existing dataset file: {dest_file}")
except Exception as e:
print(f"Error downloading existing dataset: {e}")
# Download existing dataset files at startup
download_existing_dataset()
def get_random_model():
global CHOSEN_MODEL
model = random.choice(MODELS)
CHOSEN_MODEL = model
return model
def create_client(model_id):
return OpenAI(
base_url=f"https://api-inference.huggingface.co/models/{model_id}/v1",
api_key=get_token(),
)
client = OpenAI(
base_url="https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3-70B-Instruct/v1",
api_key=get_token(),
)
def generate_prompt():
if random.choice([True, False]):
return detailed_genre_description_prompt()
else:
return basic_prompt()
def get_and_store_prompt():
prompt = generate_prompt()
print(prompt) # Keep this for debugging
return prompt
def generate_blurb(prompt):
model_id = get_random_model()
client = create_client(model_id)
max_tokens = random.randint(100, 1000)
chat_completion = client.chat.completions.create(
model="tgi",
messages=[
{"role": "user", "content": prompt},
],
stream=True,
max_tokens=max_tokens,
)
full_text = ""
for message in chat_completion:
full_text += message.choices[0].delta.content
yield full_text
# Modified function to log blurb and vote
def log_blurb_and_vote(
prompt, blurb, vote, user_info: gr.OAuthProfile | None, has_voted, *args
):
if has_voted:
return (
"You've already voted on this response.",
has_voted,
gr.Row.update(visible=True),
)
user_id = user_info.username if user_info is not None else str(uuid.uuid4())
log_entry = {
"timestamp": datetime.now().isoformat(),
"prompt": prompt,
"blurb": blurb,
"vote": vote,
"user_id": user_id,
"model": CHOSEN_MODEL,
}
with scheduler.lock:
with dataset_file.open("a") as f:
f.write(json.dumps(log_entry) + "\n")
gr.Info("Thank you for voting!")
return f"Logged: {vote} by user {user_id}", True, gr.Row.update(visible=False)
# Create custom theme
tufte_theme = TufteInspired()
# Create Gradio interface
with gr.Blocks(theme=tufte_theme) as demo:
gr.Markdown("<h1 style='text-align: center;'>Would you read this book?</h1>")
gr.Markdown(
"""<p style='text-align: center;'>Looking for your next summer read?
Would you read a book based on this LLM generated blurb? <br> Your vote will be added to <a href="https://huggingface.co/datasets/your-username/your-dataset-repo">this</a> Hugging Face dataset</p>"""
)
# Add the login button
login_btn = gr.LoginButton()
with gr.Row():
generate_btn = gr.Button("Create a book", variant="primary")
prompt_state = gr.State()
blurb_output = gr.Markdown(label="Book blurb")
has_voted = gr.State(False)
with gr.Row(visible=False) as voting_row:
upvote_btn = gr.Button("πŸ‘ would read")
downvote_btn = gr.Button("πŸ‘Ž wouldn't read")
vote_output = gr.Textbox(label="Vote Status", interactive=False, visible=False)
def generate_and_show(prompt):
return "Generating...", gr.Row.update(visible=False), False
def show_voting_buttons(blurb):
return blurb, gr.Row.update(visible=True), False
generate_btn.click(get_and_store_prompt, outputs=prompt_state).then(
generate_and_show,
inputs=prompt_state,
outputs=[blurb_output, voting_row, has_voted],
).then(generate_blurb, inputs=prompt_state, outputs=blurb_output).then(
show_voting_buttons,
inputs=blurb_output,
outputs=[blurb_output, voting_row, has_voted],
)
upvote_btn.click(
log_blurb_and_vote,
inputs=[
prompt_state,
blurb_output,
gr.Textbox(value="upvote", visible=False),
login_btn,
has_voted,
],
outputs=[vote_output, has_voted, voting_row],
)
downvote_btn.click(
log_blurb_and_vote,
inputs=[
prompt_state,
blurb_output,
gr.Textbox(value="downvote", visible=False),
login_btn,
has_voted,
],
outputs=[vote_output, has_voted, voting_row],
)
if __name__ == "__main__":
demo.launch(debug=True)