Spaces:

mohamedemam
/

essay_checker

Runtime error

File size: 3,630 Bytes

92482c7

import gradio as gr
from transformers import AutoTokenizer
import re
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM
from transformers import BitsAndBytesConfig
import torch

from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM
# The model that you want to train from the Hugging Face hub
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
    Trainer
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer
model_name ="bigscience/bloomz-7b1"

# The instruction dataset to use

# Fine-tuned model name
new_model = "bigscience/bloomz-7b1"
# LoRA attention dimension
lora_r = 16
lora_alpha = 16

# Dropout probability for LoRA layers
lora_dropout = 0.05
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

use_nested_quant = False

output_dir = "./results"

# Number of training epochs
num_train_epochs = 1
fp16 = False
bf16 = False

per_device_train_batch_size =1

per_device_eval_batch_size = 4

gradient_accumulation_steps = 8

gradient_checkpointing = True

max_grad_norm = 0.3

learning_rate = 5e-5

weight_decay = 0.001

optim = "paged_adamw_8bit"

lr_scheduler_type = "constant"

max_steps = -1
warmup_ratio = 0.03
group_by_length = True

save_steps = 100

logging_steps = 25

max_seq_length = False
packing = False
#device_map = {"": 0}
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

# Load base model
config = PeftConfig.from_pretrained("mohamedemam/essay_checker")
model = AutoModelForCausalLM.from_pretrained("nfaheem/Marcoroni-7b-DPO-Merge",    quantization_config=bnb_config)
model = PeftModel.from_pretrained(model, "mohamedemam/essay_checker")
model.eval()
def chat_Format(context,quetion,answer):
   return "Instruction:\n check answer is true or false of next quetion using context below:\n"+"context: "+context+"\nquetion:"+quetion+ f".\n#student answer: "+answer+".\n#response:"
# Create a Wikipedia API instance


# Function to generate questions and answers with configurable parameters
def generate_qa(context,quetion,answer,max_new_token):
    input_text = chat_Format(context,quetion,answer)
    a = tokenizer(text=input_text, return_tensors='pt')
    
    # Generate with configurable parameters
    output = model.generate(input_ids=a['input_ids'],attention_mask=w['attention_mask'],
        max_new_tokens=max_new_token
    )
    #
    generated_text = tokenizer.batch_decode(output, skip_special_tokens=True)

    formatted_output = "\n\n".join(set(generated_text))
    return formatted_output
iface = gr.Interface(
    fn=generate_qa,
    inputs=[    "text", "text2", "text3",

        gr.inputs.Slider(minimum=1, maximum=100, default=3, step=1, label="max token"),


    ],theme="red-black",
    outputs=gr.outputs.Textbox(label="Generated Output"),
    title="check answers",
    description="put you context ",
)
# Launch the interface
iface.launch()