Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import AutoTokenizer | |
import re | |
from peft import PeftModel, PeftConfig | |
from transformers import AutoModelForCausalLM | |
from transformers import BitsAndBytesConfig | |
import torch | |
from peft import PeftModel, PeftConfig | |
from transformers import AutoModelForCausalLM | |
# The model that you want to train from the Hugging Face hub | |
import os | |
import torch | |
from datasets import load_dataset | |
from transformers import ( | |
AutoModelForCausalLM, | |
AutoTokenizer, | |
BitsAndBytesConfig, | |
HfArgumentParser, | |
TrainingArguments, | |
pipeline, | |
logging, | |
Trainer | |
) | |
from peft import LoraConfig, PeftModel | |
from trl import SFTTrainer | |
model_name ="bigscience/bloomz-7b1" | |
# The instruction dataset to use | |
# Fine-tuned model name | |
new_model = "bigscience/bloomz-7b1" | |
# LoRA attention dimension | |
lora_r = 16 | |
lora_alpha = 16 | |
# Dropout probability for LoRA layers | |
lora_dropout = 0.05 | |
use_4bit = True | |
# Compute dtype for 4-bit base models | |
bnb_4bit_compute_dtype = "float16" | |
# Quantization type (fp4 or nf4) | |
bnb_4bit_quant_type = "nf4" | |
use_nested_quant = False | |
output_dir = "./results" | |
# Number of training epochs | |
num_train_epochs = 1 | |
fp16 = False | |
bf16 = False | |
per_device_train_batch_size =1 | |
per_device_eval_batch_size = 4 | |
gradient_accumulation_steps = 8 | |
gradient_checkpointing = True | |
max_grad_norm = 0.3 | |
learning_rate = 5e-5 | |
weight_decay = 0.001 | |
optim = "paged_adamw_8bit" | |
lr_scheduler_type = "constant" | |
max_steps = -1 | |
warmup_ratio = 0.03 | |
group_by_length = True | |
save_steps = 100 | |
logging_steps = 25 | |
max_seq_length = False | |
packing = False | |
#device_map = {"": 0} | |
compute_dtype = getattr(torch, bnb_4bit_compute_dtype) | |
bnb_config = BitsAndBytesConfig( | |
load_in_4bit=use_4bit, | |
bnb_4bit_quant_type=bnb_4bit_quant_type, | |
bnb_4bit_compute_dtype=compute_dtype, | |
bnb_4bit_use_double_quant=use_nested_quant, | |
) | |
# Check GPU compatibility with bfloat16 | |
if compute_dtype == torch.float16 and use_4bit: | |
major, _ = torch.cuda.get_device_capability() | |
if major >= 8: | |
print("=" * 80) | |
print("Your GPU supports bfloat16: accelerate training with bf16=True") | |
print("=" * 80) | |
# Load base model | |
config = PeftConfig.from_pretrained("mohamedemam/essay_checker") | |
model = AutoModelForCausalLM.from_pretrained("nfaheem/Marcoroni-7b-DPO-Merge", quantization_config=bnb_config) | |
model = PeftModel.from_pretrained(model, "mohamedemam/essay_checker") | |
model.eval() | |
def chat_Format(context,quetion,answer): | |
return "Instruction:\n check answer is true or false of next quetion using context below:\n"+"context: "+context+"\nquetion:"+quetion+ f".\n#student answer: "+answer+".\n#response:" | |
# Create a Wikipedia API instance | |
# Function to generate questions and answers with configurable parameters | |
def generate_qa(context,quetion,answer,max_new_token): | |
input_text = chat_Format(context,quetion,answer) | |
a = tokenizer(text=input_text, return_tensors='pt') | |
# Generate with configurable parameters | |
output = model.generate(input_ids=a['input_ids'],attention_mask=w['attention_mask'], | |
max_new_tokens=max_new_token | |
) | |
# | |
generated_text = tokenizer.batch_decode(output, skip_special_tokens=True) | |
formatted_output = "\n\n".join(set(generated_text)) | |
return formatted_output | |
iface = gr.Interface( | |
fn=generate_qa, | |
inputs=[ "text", "text2", "text3", | |
gr.inputs.Slider(minimum=1, maximum=100, default=3, step=1, label="max token"), | |
],theme="red-black", | |
outputs=gr.outputs.Textbox(label="Generated Output"), | |
title="check answers", | |
description="put you context ", | |
) | |
# Launch the interface | |
iface.launch() |