import gradio as gr import os import pandas as pd from datasets import load_dataset from transformers import T5ForConditionalGeneration, T5Tokenizer device = 'cpu' # if you have a GPU tokenizer = T5Tokenizer.from_pretrained('stanfordnlp/SteamSHP-flan-t5-large') model = T5ForConditionalGeneration.from_pretrained('stanfordnlp/SteamSHP-flan-t5-large').to(device) model_list = [ 'google/flan-t5-xxl', 'bigscience/bloomz-7b1', 'facebook/opt-iml-max-30b', 'allenai/tk-instruct-11b-def-pos'] HF_TOKEN = os.getenv("HF_TOKEN") OUTPUTS_DATASET = "HuggingFaceH4/instruction-pilot-outputs-filtered" ds = load_dataset(OUTPUTS_DATASET, split="train", use_auth_token=HF_TOKEN) def process(model_A, model_B): sample_ds = ds.shuffle().select(range(1)) sample = sample_ds[0] prompt = sample["prompt"] df = pd.DataFrame.from_records(sample["filtered_outputs"]) response_A = df[df['model']==model_A] response_B = df[df['model']==model_B] input_text = "POST: "+ prompt+ "\n\n RESPONSE A: Lime juice, and zest, then freeze in small quantities.\n\n RESPONSE B: Lime marmalade lol\n\n Which response is better? RESPONSE" x = tokenizer([input_text], return_tensors='pt').input_ids.to(device) y = model.generate(x, max_new_tokens=1) prefered = tokenizer.batch_decode(y, skip_special_tokens=True)[0] return prefered, response_A title = "Compare Instruction Models to see which one is more helpful" interface = gr.Interface(fn=process, inputs=[gr.Dropdown(choices=model_list, value=model_list[0], label='Model A'), gr.Dropdown(choices=model_list, value=model_list[1], label='Model B')], outputs=[ gr.Textbox(label = "Preferred Option"), gr.DataFrame(label = "Model Responses") ], title=title, ) interface.launch(debug=True)