Spaces:

RamAnanth1
/

human_preference

Runtime error

File size: 2,745 Bytes

953bf3d
 
debfcf8
 
 
 
 
 
 
 
 
 
 
2f5c740
 
 
 
 
 
d7f1630
 
 
 
 
 
2f5c740
d7f1630
ce419f1
b1d6e77
d7f1630
 
730ef21
 
 
d552000
 
b463202
2f5c740
d7f1630
dfc8b82
debfcf8
 
b2b46dc
 
 
debfcf8
 
c75952a
debfcf8
2f5c740
 
9226230
80ae653
dfc8b82
b1d6e77
 
debfcf8
 
80ae653
 
debfcf8

import gradio as gr
import os

import pandas as pd
from datasets import load_dataset


from transformers import T5ForConditionalGeneration, T5Tokenizer
device = 'cpu' # if you have a GPU

tokenizer = T5Tokenizer.from_pretrained('stanfordnlp/SteamSHP-flan-t5-large')
model = T5ForConditionalGeneration.from_pretrained('stanfordnlp/SteamSHP-flan-t5-large').to(device)

model_list = [
 'google/flan-t5-xxl',
'bigscience/bloomz-7b1',
'facebook/opt-iml-max-30b',
'allenai/tk-instruct-11b-def-pos']

HF_TOKEN = os.getenv("HF_TOKEN")

OUTPUTS_DATASET = "HuggingFaceH4/instruction-pilot-outputs-filtered"

ds = load_dataset(OUTPUTS_DATASET, split="train", use_auth_token=HF_TOKEN)

def process(model_A, model_B):
    sample_ds = ds.shuffle().select(range(1))
    sample = sample_ds[0]
    prompt = sample["prompt"]
    
    df = pd.DataFrame.from_records(sample["filtered_outputs"])
    response_A_df = df[df['model']==model_A]["output"]
    response_B_df= df[df['model']==model_B]["output"]

    response_A = response_A_df.values[0]
    response_B = response_B_df.values[0]
    print(response_A)
    
        
    input_text = "POST: "+ prompt+ "\n\n RESPONSE A: "+response_A+"\n\n RESPONSE B: "+response_B+"\n\n Which response is better? RESPONSE"
    x = tokenizer([input_text], return_tensors='pt').input_ids.to(device)
    y = model.generate(x, max_new_tokens=1)
    prefered = tokenizer.batch_decode(y, skip_special_tokens=True)[0]
    result = model_A if prefered == 'A' else model_B
    return  prompt,df[df['model'].isin([model_A, model_B])], result

title = "Compare Instruction Models to see which one is more helpful"
description = "This app compares the outputs of various open-source, instruction-trained models from a [dataset](https://huggingface.co/datasets/{OUTPUTS_DATASET}) of human demonstrations using the SteamSHP reward model trained on the [Stanford Human Preferences Dataset (SHP)](https://huggingface.co/datasets/stanfordnlp/SHP).  Hit the button below to view a few random samples from the generated outputs"
interface = gr.Interface(fn=process, 
                     inputs=[gr.Dropdown(choices=model_list, value=model_list[0], label='Model A'),
                            gr.Dropdown(choices=model_list, value=model_list[1], label='Model B')],
                     outputs=[
                              gr.Textbox(label = "Prompt"),
                              gr.DataFrame(label = "Model Responses"),
                              gr.Textbox(label = "Preferred Option"),
                                 
                              ],
                     title=title,
                     description = description
                        
                     )
                     
interface.launch(debug=True)