essay_checker / app.py
mohamedemam's picture
Update app.py
92482c7 verified
raw
history blame
3.63 kB
import gradio as gr
from transformers import AutoTokenizer
import re
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM
from transformers import BitsAndBytesConfig
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM
# The model that you want to train from the Hugging Face hub
import os
import torch
from datasets import load_dataset
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
BitsAndBytesConfig,
HfArgumentParser,
TrainingArguments,
pipeline,
logging,
Trainer
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer
model_name ="bigscience/bloomz-7b1"
# The instruction dataset to use
# Fine-tuned model name
new_model = "bigscience/bloomz-7b1"
# LoRA attention dimension
lora_r = 16
lora_alpha = 16
# Dropout probability for LoRA layers
lora_dropout = 0.05
use_4bit = True
# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"
# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"
use_nested_quant = False
output_dir = "./results"
# Number of training epochs
num_train_epochs = 1
fp16 = False
bf16 = False
per_device_train_batch_size =1
per_device_eval_batch_size = 4
gradient_accumulation_steps = 8
gradient_checkpointing = True
max_grad_norm = 0.3
learning_rate = 5e-5
weight_decay = 0.001
optim = "paged_adamw_8bit"
lr_scheduler_type = "constant"
max_steps = -1
warmup_ratio = 0.03
group_by_length = True
save_steps = 100
logging_steps = 25
max_seq_length = False
packing = False
#device_map = {"": 0}
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)
bnb_config = BitsAndBytesConfig(
load_in_4bit=use_4bit,
bnb_4bit_quant_type=bnb_4bit_quant_type,
bnb_4bit_compute_dtype=compute_dtype,
bnb_4bit_use_double_quant=use_nested_quant,
)
# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
major, _ = torch.cuda.get_device_capability()
if major >= 8:
print("=" * 80)
print("Your GPU supports bfloat16: accelerate training with bf16=True")
print("=" * 80)
# Load base model
config = PeftConfig.from_pretrained("mohamedemam/essay_checker")
model = AutoModelForCausalLM.from_pretrained("nfaheem/Marcoroni-7b-DPO-Merge", quantization_config=bnb_config)
model = PeftModel.from_pretrained(model, "mohamedemam/essay_checker")
model.eval()
def chat_Format(context,quetion,answer):
return "Instruction:\n check answer is true or false of next quetion using context below:\n"+"context: "+context+"\nquetion:"+quetion+ f".\n#student answer: "+answer+".\n#response:"
# Create a Wikipedia API instance
# Function to generate questions and answers with configurable parameters
def generate_qa(context,quetion,answer,max_new_token):
input_text = chat_Format(context,quetion,answer)
a = tokenizer(text=input_text, return_tensors='pt')
# Generate with configurable parameters
output = model.generate(input_ids=a['input_ids'],attention_mask=w['attention_mask'],
max_new_tokens=max_new_token
)
#
generated_text = tokenizer.batch_decode(output, skip_special_tokens=True)
formatted_output = "\n\n".join(set(generated_text))
return formatted_output
iface = gr.Interface(
fn=generate_qa,
inputs=[ "text", "text2", "text3",
gr.inputs.Slider(minimum=1, maximum=100, default=3, step=1, label="max token"),
],theme="red-black",
outputs=gr.outputs.Textbox(label="Generated Output"),
title="check answers",
description="put you context ",
)
# Launch the interface
iface.launch()