Spaces:
Sleeping
Sleeping
File size: 5,914 Bytes
b2e385a 88d9c40 2655ad8 b2e385a 5a3f658 b2e385a 3ef94a5 b2e385a a328dd2 2655ad8 19dd1b9 dce9e22 37a83d8 3ef94a5 37a83d8 dce9e22 5a3f658 37a83d8 5a3f658 37a83d8 5a3f658 b2e385a 5a3f658 3ef94a5 5a3f658 3ef94a5 b2e385a 37a83d8 dce9e22 37a83d8 f921425 dce9e22 f921425 37a83d8 f921425 37a83d8 f921425 dce9e22 37a83d8 88d9c40 b2e385a 37a83d8 0537fbe 37a83d8 b0ec887 b2e385a 0cd2bb2 3ef94a5 b2e385a 2655ad8 f921425 2655ad8 b0ec887 37a83d8 2655ad8 5a3f658 37a83d8 5a3f658 3ef94a5 b0ec887 37a83d8 88d9c40 dce9e22 4e8ec3f dce9e22 5a3f658 4e8ec3f b0ec887 203a7f7 5a3f658 88d9c40 dce9e22 f921425 dce9e22 b2e385a 37a83d8 f921425 dce9e22 88d9c40 f921425 3ef94a5 dce9e22 b2e385a 37a83d8 f921425 37a83d8 4e8ec3f f921425 b2e385a 37a83d8 dce9e22 f921425 4e8ec3f 2655ad8 f921425 37a83d8 0537fbe f921425 3ef94a5 4e8ec3f 2655ad8 f921425 37a83d8 0537fbe f921425 3ef94a5 4e8ec3f 88d9c40 b0ec887 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 |
import hashlib
import json
import os
import random
import uuid
from datetime import datetime
from pathlib import Path
import gradio as gr
from huggingface_hub import CommitScheduler, get_token, login
from openai import OpenAI
from prompts import basic_prompt, detailed_genre_description_prompt
from theme import TufteInspired
# Ensure you're logged in to Hugging Face
login(os.getenv("HF_TOKEN"))
# Define available models
MODELS = [
"meta-llama/Meta-Llama-3-70B-Instruct",
"mistralai/Mixtral-8x7B-Instruct-v0.1",
"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
]
def get_random_model():
return random.choice(MODELS)
def create_client(model_id):
return OpenAI(
base_url=f"https://api-inference.huggingface.co/models/{model_id}/v1",
api_key=get_token(),
)
# Set up dataset storage
dataset_folder = Path("dataset")
dataset_folder.mkdir(exist_ok=True)
# Function to get the latest dataset file
def get_latest_dataset_file():
files = list(dataset_folder.glob("data_*.jsonl"))
return max(files, key=os.path.getctime) if files else None
# Check for existing dataset and create or append to it
if latest_file := get_latest_dataset_file():
dataset_file = latest_file
print(f"Appending to existing dataset file: {dataset_file}")
else:
dataset_file = dataset_folder / f"data_{uuid.uuid4()}.jsonl"
print(f"Creating new dataset file: {dataset_file}")
# Set up CommitScheduler for dataset uploads
repo_id = "davanstrien/summer-reading-preferences"
scheduler = CommitScheduler(
repo_id=repo_id,
repo_type="dataset",
folder_path=dataset_folder,
path_in_repo="data",
every=1, # Upload every minute
)
# Global dictionary to store votes
votes = {}
def generate_prompt():
if random.choice([True, False]):
return detailed_genre_description_prompt()
else:
return basic_prompt()
def get_and_store_prompt():
prompt = generate_prompt()
print(prompt) # Keep this for debugging
return prompt
def generate_blurb(prompt):
model_id = get_random_model()
client = create_client(model_id)
max_tokens = random.randint(100, 1000)
chat_completion = client.chat.completions.create(
model="tgi",
messages=[
{"role": "user", "content": prompt},
],
stream=True,
max_tokens=max_tokens,
)
full_text = ""
for message in chat_completion:
full_text += message.choices[0].delta.content
yield full_text, model_id
return full_text, model_id # Return final result with model_id
def generate_vote_id(user_id, blurb):
return hashlib.md5(f"{user_id}:{blurb}".encode()).hexdigest()
def log_blurb_and_vote(
prompt,
blurb,
vote,
model_id,
user_info: gr.OAuthProfile | None,
):
user_id = user_info.username if user_info is not None else str(uuid.uuid4())
vote_id = generate_vote_id(user_id, blurb)
if vote_id in votes:
gr.Info("You've already voted on this blurb!")
return None, gr.Row.update(visible=False)
votes[vote_id] = vote
log_entry = {
"timestamp": datetime.now().isoformat(),
"prompt": prompt,
"blurb": blurb,
"vote": vote,
"user_id": user_id,
"model_id": model_id,
}
with scheduler.lock:
with dataset_file.open("a") as f:
f.write(json.dumps(log_entry) + "\n")
gr.Info("Thank you for voting! Your feedback will be synced to the dataset.")
return f"Logged: {vote} by user {user_id}", gr.Row.update(visible=False)
# Create custom theme
tufte_theme = TufteInspired()
# Create Gradio interface
with gr.Blocks(theme=tufte_theme) as demo:
gr.Markdown("<h1 style='text-align: center;'>Would you read this book?</h1>")
gr.Markdown(
"""<p style='text-align: center;'>Looking for your next summer read?
Would you read a book based on this LLM generated blurb? <br> Your vote will be added to <a href="https://huggingface.co/datasets/your-username/your-dataset-repo">this</a> Hugging Face dataset</p>"""
)
with gr.Row():
login_btn = gr.LoginButton(size="sm")
with gr.Row():
generate_btn = gr.Button("Create a book", variant="primary")
prompt_state = gr.State()
blurb_output = gr.Markdown(label="Book blurb")
user_state = gr.State()
model_state = gr.State()
with gr.Row(visible=False) as voting_row:
upvote_btn = gr.Button("π would read")
downvote_btn = gr.Button("π wouldn't read")
vote_output = gr.Textbox(label="Vote Status", interactive=False, visible=True)
def generate_and_show(prompt, user_info):
return "Generating...", gr.Row.update(visible=False), user_info, None
def show_voting_buttons(blurb, model_id):
return blurb, gr.Row.update(visible=True), model_id
generate_btn.click(get_and_store_prompt, outputs=prompt_state).then(
generate_and_show,
inputs=[prompt_state, login_btn],
outputs=[blurb_output, voting_row, user_state, model_state],
).then(
generate_blurb, inputs=prompt_state, outputs=[blurb_output, model_state]
).then(
show_voting_buttons,
inputs=[blurb_output, model_state],
outputs=[blurb_output, voting_row, model_state],
)
upvote_btn.click(
log_blurb_and_vote,
inputs=[
prompt_state,
blurb_output,
gr.Textbox(value="upvote", visible=False),
model_state,
user_state,
],
outputs=[vote_output, voting_row],
)
downvote_btn.click(
log_blurb_and_vote,
inputs=[
prompt_state,
blurb_output,
gr.Textbox(value="downvote", visible=False),
model_state,
user_state,
],
outputs=[vote_output, voting_row],
)
if __name__ == "__main__":
demo.launch(debug=True)
|