davanstrien's picture
davanstrien HF staff
fix id
f0f10e0
raw
history blame
6.06 kB
import gradio as gr
import json
from datetime import datetime
from theme import TufteInspired
import uuid
from huggingface_hub import InferenceClient, CommitScheduler, hf_hub_download
from openai import OpenAI
from huggingface_hub import get_token, login
from prompts import detailed_genre_description_prompt, basic_prompt
import random
import os
from pathlib import Path
# Ensure you're logged in to Hugging Face
login(get_token())
# Define available models
MODELS = [
"meta-llama/Meta-Llama-3-70B-Instruct",
"mistralai/Mixtral-8x7B-Instruct-v0.1",
"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
]
CHOSEN_MODEL = None
# Set up dataset storage
dataset_folder = Path("dataset")
dataset_folder.mkdir(exist_ok=True)
# Function to get the latest dataset file
def get_latest_dataset_file():
files = list(dataset_folder.glob("data_*.jsonl"))
return max(files, key=os.path.getctime) if files else None
# Check for existing dataset and create or append to it
latest_file = get_latest_dataset_file()
if latest_file:
dataset_file = latest_file
print(f"Appending to existing dataset file: {dataset_file}")
else:
dataset_file = dataset_folder / f"data_{uuid.uuid4()}.jsonl"
print(f"Creating new dataset file: {dataset_file}")
# Set up CommitScheduler for dataset uploads
repo_id = (
"davanstrien/summer-reading-preference" # Replace with your desired dataset repo
)
scheduler = CommitScheduler(
repo_id=repo_id,
repo_type="dataset",
folder_path=dataset_folder,
path_in_repo="data",
every=5, # Upload every 5 minutes
)
# Function to download existing dataset files
def download_existing_dataset():
try:
files = hf_hub_download(
repo_id=repo_id, filename="data", repo_type="dataset", recursive=True
)
for file in Path(files).glob("*.jsonl"):
dest_file = dataset_folder / file.name
if not dest_file.exists():
dest_file.write_bytes(file.read_bytes())
print(f"Downloaded existing dataset file: {dest_file}")
except Exception as e:
print(f"Error downloading existing dataset: {e}")
# Download existing dataset files at startup
download_existing_dataset()
def get_random_model():
global CHOSEN_MODEL
model = random.choice(MODELS)
CHOSEN_MODEL = model
return model
def create_client(model_id):
return OpenAI(
base_url=f"https://api-inference.huggingface.co/models/{model_id}/v1",
api_key=get_token(),
)
client = OpenAI(
base_url="https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3-70B-Instruct/v1",
api_key=get_token(),
)
def generate_prompt():
if random.choice([True, False]):
return detailed_genre_description_prompt()
else:
return basic_prompt()
def get_and_store_prompt():
prompt = generate_prompt()
print(prompt) # Keep this for debugging
return prompt
def generate_blurb(prompt):
model_id = get_random_model()
client = create_client(model_id)
max_tokens = random.randint(100, 1000)
chat_completion = client.chat.completions.create(
model="tgi",
messages=[
{"role": "user", "content": prompt},
],
stream=True,
max_tokens=max_tokens,
)
full_text = ""
for message in chat_completion:
full_text += message.choices[0].delta.content
yield full_text
# Function to log blurb and vote
def log_blurb_and_vote(prompt, blurb, vote, user_info: gr.OAuthProfile | None, *args):
user_id = user_info.username if user_info is not None else str(uuid.uuid4())
log_entry = {
"timestamp": datetime.now().isoformat(),
"prompt": prompt,
"blurb": blurb,
"vote": vote,
"user_id": user_id,
"model": CHOSEN_MODEL,
}
with scheduler.lock:
with dataset_file.open("a") as f:
f.write(json.dumps(log_entry) + "\n")
gr.Info("Thank you for voting!")
return f"Logged: {vote} by user {user_id}"
# Create custom theme
tufte_theme = TufteInspired()
# Create Gradio interface
with gr.Blocks(theme=tufte_theme) as demo:
gr.Markdown("<h1 style='text-align: center;'>Would you read this book?</h1>")
gr.Markdown(
"""<p style='text-align: center;'>Looking for your next summer read?
Would you read a book based on this LLM generated blurb? <br> Your vote will be added to <a href="https://huggingface.co/datasets/your-username/your-dataset-repo">this</a> Hugging Face dataset</p>"""
)
# Add the login button
login_btn = gr.LoginButton()
with gr.Row():
generate_btn = gr.Button("Create a book", variant="primary")
prompt_state = gr.State()
blurb_output = gr.Markdown(label="Book blurb")
with gr.Row(visible=False) as voting_row:
upvote_btn = gr.Button("πŸ‘ would read")
downvote_btn = gr.Button("πŸ‘Ž wouldn't read")
vote_output = gr.Textbox(label="Vote Status", interactive=False, visible=False)
def generate_and_show(prompt):
return "Generating...", gr.Row.update(visible=False)
def show_voting_buttons(blurb):
return blurb, gr.Row.update(visible=True)
generate_btn.click(get_and_store_prompt, outputs=prompt_state).then(
generate_and_show, inputs=prompt_state, outputs=[blurb_output, voting_row]
).then(generate_blurb, inputs=prompt_state, outputs=blurb_output).then(
show_voting_buttons, inputs=blurb_output, outputs=[blurb_output, voting_row]
)
upvote_btn.click(
log_blurb_and_vote,
inputs=[
prompt_state,
blurb_output,
gr.Textbox(value="upvote", visible=False),
login_btn,
],
outputs=vote_output,
)
downvote_btn.click(
log_blurb_and_vote,
inputs=[
prompt_state,
blurb_output,
gr.Textbox(value="downvote", visible=False),
login_btn,
],
outputs=vote_output,
)
if __name__ == "__main__":
demo.launch(debug=True)