Spaces:
Runtime error
Runtime error
File size: 2,745 Bytes
953bf3d debfcf8 2f5c740 d7f1630 2f5c740 d7f1630 ce419f1 b1d6e77 d7f1630 730ef21 d552000 b463202 2f5c740 d7f1630 dfc8b82 debfcf8 b2b46dc debfcf8 c75952a debfcf8 2f5c740 9226230 80ae653 dfc8b82 b1d6e77 debfcf8 80ae653 debfcf8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
import gradio as gr
import os
import pandas as pd
from datasets import load_dataset
from transformers import T5ForConditionalGeneration, T5Tokenizer
device = 'cpu' # if you have a GPU
tokenizer = T5Tokenizer.from_pretrained('stanfordnlp/SteamSHP-flan-t5-large')
model = T5ForConditionalGeneration.from_pretrained('stanfordnlp/SteamSHP-flan-t5-large').to(device)
model_list = [
'google/flan-t5-xxl',
'bigscience/bloomz-7b1',
'facebook/opt-iml-max-30b',
'allenai/tk-instruct-11b-def-pos']
HF_TOKEN = os.getenv("HF_TOKEN")
OUTPUTS_DATASET = "HuggingFaceH4/instruction-pilot-outputs-filtered"
ds = load_dataset(OUTPUTS_DATASET, split="train", use_auth_token=HF_TOKEN)
def process(model_A, model_B):
sample_ds = ds.shuffle().select(range(1))
sample = sample_ds[0]
prompt = sample["prompt"]
df = pd.DataFrame.from_records(sample["filtered_outputs"])
response_A_df = df[df['model']==model_A]["output"]
response_B_df= df[df['model']==model_B]["output"]
response_A = response_A_df.values[0]
response_B = response_B_df.values[0]
print(response_A)
input_text = "POST: "+ prompt+ "\n\n RESPONSE A: "+response_A+"\n\n RESPONSE B: "+response_B+"\n\n Which response is better? RESPONSE"
x = tokenizer([input_text], return_tensors='pt').input_ids.to(device)
y = model.generate(x, max_new_tokens=1)
prefered = tokenizer.batch_decode(y, skip_special_tokens=True)[0]
result = model_A if prefered == 'A' else model_B
return prompt,df[df['model'].isin([model_A, model_B])], result
title = "Compare Instruction Models to see which one is more helpful"
description = "This app compares the outputs of various open-source, instruction-trained models from a [dataset](https://huggingface.co/datasets/{OUTPUTS_DATASET}) of human demonstrations using the SteamSHP reward model trained on the [Stanford Human Preferences Dataset (SHP)](https://huggingface.co/datasets/stanfordnlp/SHP). Hit the button below to view a few random samples from the generated outputs"
interface = gr.Interface(fn=process,
inputs=[gr.Dropdown(choices=model_list, value=model_list[0], label='Model A'),
gr.Dropdown(choices=model_list, value=model_list[1], label='Model B')],
outputs=[
gr.Textbox(label = "Prompt"),
gr.DataFrame(label = "Model Responses"),
gr.Textbox(label = "Preferred Option"),
],
title=title,
description = description
)
interface.launch(debug=True) |