Spaces:
Running
Running
File size: 6,064 Bytes
e6d141a 88d9c40 b2e385a e6d141a dd2079c e6d141a dd2079c a328dd2 2655ad8 e6d141a 299e22b dd2079c f0f10e0 dd2079c 299e22b 3ef94a5 e6d141a 5a3f658 37a83d8 dce9e22 37a83d8 f921425 dce9e22 f921425 37a83d8 f921425 299e22b f921425 dce9e22 e6d141a 37a83d8 88d9c40 e6d141a b0ec887 2655ad8 f921425 2655ad8 b0ec887 299e22b 2655ad8 dd2079c e6d141a b0ec887 37a83d8 88d9c40 dce9e22 4e8ec3f dce9e22 dd2079c 4e8ec3f b0ec887 e6d141a 88d9c40 dce9e22 f921425 dce9e22 f921425 dce9e22 88d9c40 f921425 e6d141a dce9e22 e6d141a 86629e2 f921425 e6d141a 86629e2 4e8ec3f f921425 e6d141a dce9e22 f921425 4e8ec3f 2655ad8 f921425 e6d141a f921425 e6d141a 4e8ec3f 2655ad8 f921425 e6d141a f921425 e6d141a 4e8ec3f 88d9c40 b0ec887 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 |
import gradio as gr
import json
from datetime import datetime
from theme import TufteInspired
import uuid
from huggingface_hub import InferenceClient, CommitScheduler, hf_hub_download
from openai import OpenAI
from huggingface_hub import get_token, login
from prompts import detailed_genre_description_prompt, basic_prompt
import random
import os
from pathlib import Path
# Ensure you're logged in to Hugging Face
login(get_token())
# Define available models
MODELS = [
"meta-llama/Meta-Llama-3-70B-Instruct",
"mistralai/Mixtral-8x7B-Instruct-v0.1",
"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
]
CHOSEN_MODEL = None
# Set up dataset storage
dataset_folder = Path("dataset")
dataset_folder.mkdir(exist_ok=True)
# Function to get the latest dataset file
def get_latest_dataset_file():
files = list(dataset_folder.glob("data_*.jsonl"))
return max(files, key=os.path.getctime) if files else None
# Check for existing dataset and create or append to it
latest_file = get_latest_dataset_file()
if latest_file:
dataset_file = latest_file
print(f"Appending to existing dataset file: {dataset_file}")
else:
dataset_file = dataset_folder / f"data_{uuid.uuid4()}.jsonl"
print(f"Creating new dataset file: {dataset_file}")
# Set up CommitScheduler for dataset uploads
repo_id = (
"davanstrien/summer-reading-preference" # Replace with your desired dataset repo
)
scheduler = CommitScheduler(
repo_id=repo_id,
repo_type="dataset",
folder_path=dataset_folder,
path_in_repo="data",
every=5, # Upload every 5 minutes
)
# Function to download existing dataset files
def download_existing_dataset():
try:
files = hf_hub_download(
repo_id=repo_id, filename="data", repo_type="dataset", recursive=True
)
for file in Path(files).glob("*.jsonl"):
dest_file = dataset_folder / file.name
if not dest_file.exists():
dest_file.write_bytes(file.read_bytes())
print(f"Downloaded existing dataset file: {dest_file}")
except Exception as e:
print(f"Error downloading existing dataset: {e}")
# Download existing dataset files at startup
download_existing_dataset()
def get_random_model():
global CHOSEN_MODEL
model = random.choice(MODELS)
CHOSEN_MODEL = model
return model
def create_client(model_id):
return OpenAI(
base_url=f"https://api-inference.huggingface.co/models/{model_id}/v1",
api_key=get_token(),
)
client = OpenAI(
base_url="https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3-70B-Instruct/v1",
api_key=get_token(),
)
def generate_prompt():
if random.choice([True, False]):
return detailed_genre_description_prompt()
else:
return basic_prompt()
def get_and_store_prompt():
prompt = generate_prompt()
print(prompt) # Keep this for debugging
return prompt
def generate_blurb(prompt):
model_id = get_random_model()
client = create_client(model_id)
max_tokens = random.randint(100, 1000)
chat_completion = client.chat.completions.create(
model="tgi",
messages=[
{"role": "user", "content": prompt},
],
stream=True,
max_tokens=max_tokens,
)
full_text = ""
for message in chat_completion:
full_text += message.choices[0].delta.content
yield full_text
# Function to log blurb and vote
def log_blurb_and_vote(prompt, blurb, vote, user_info: gr.OAuthProfile | None, *args):
user_id = user_info.username if user_info is not None else str(uuid.uuid4())
log_entry = {
"timestamp": datetime.now().isoformat(),
"prompt": prompt,
"blurb": blurb,
"vote": vote,
"user_id": user_id,
"model": CHOSEN_MODEL,
}
with scheduler.lock:
with dataset_file.open("a") as f:
f.write(json.dumps(log_entry) + "\n")
gr.Info("Thank you for voting!")
return f"Logged: {vote} by user {user_id}"
# Create custom theme
tufte_theme = TufteInspired()
# Create Gradio interface
with gr.Blocks(theme=tufte_theme) as demo:
gr.Markdown("<h1 style='text-align: center;'>Would you read this book?</h1>")
gr.Markdown(
"""<p style='text-align: center;'>Looking for your next summer read?
Would you read a book based on this LLM generated blurb? <br> Your vote will be added to <a href="https://huggingface.co/datasets/your-username/your-dataset-repo">this</a> Hugging Face dataset</p>"""
)
# Add the login button
login_btn = gr.LoginButton()
with gr.Row():
generate_btn = gr.Button("Create a book", variant="primary")
prompt_state = gr.State()
blurb_output = gr.Markdown(label="Book blurb")
with gr.Row(visible=False) as voting_row:
upvote_btn = gr.Button("π would read")
downvote_btn = gr.Button("π wouldn't read")
vote_output = gr.Textbox(label="Vote Status", interactive=False, visible=False)
def generate_and_show(prompt):
return "Generating...", gr.Row.update(visible=False)
def show_voting_buttons(blurb):
return blurb, gr.Row.update(visible=True)
generate_btn.click(get_and_store_prompt, outputs=prompt_state).then(
generate_and_show, inputs=prompt_state, outputs=[blurb_output, voting_row]
).then(generate_blurb, inputs=prompt_state, outputs=blurb_output).then(
show_voting_buttons, inputs=blurb_output, outputs=[blurb_output, voting_row]
)
upvote_btn.click(
log_blurb_and_vote,
inputs=[
prompt_state,
blurb_output,
gr.Textbox(value="upvote", visible=False),
login_btn,
],
outputs=vote_output,
)
downvote_btn.click(
log_blurb_and_vote,
inputs=[
prompt_state,
blurb_output,
gr.Textbox(value="downvote", visible=False),
login_btn,
],
outputs=vote_output,
)
if __name__ == "__main__":
demo.launch(debug=True)
|