Spaces:
Runtime error
Runtime error
File size: 3,630 Bytes
92482c7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
import gradio as gr
from transformers import AutoTokenizer
import re
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM
from transformers import BitsAndBytesConfig
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM
# The model that you want to train from the Hugging Face hub
import os
import torch
from datasets import load_dataset
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
BitsAndBytesConfig,
HfArgumentParser,
TrainingArguments,
pipeline,
logging,
Trainer
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer
model_name ="bigscience/bloomz-7b1"
# The instruction dataset to use
# Fine-tuned model name
new_model = "bigscience/bloomz-7b1"
# LoRA attention dimension
lora_r = 16
lora_alpha = 16
# Dropout probability for LoRA layers
lora_dropout = 0.05
use_4bit = True
# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"
# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"
use_nested_quant = False
output_dir = "./results"
# Number of training epochs
num_train_epochs = 1
fp16 = False
bf16 = False
per_device_train_batch_size =1
per_device_eval_batch_size = 4
gradient_accumulation_steps = 8
gradient_checkpointing = True
max_grad_norm = 0.3
learning_rate = 5e-5
weight_decay = 0.001
optim = "paged_adamw_8bit"
lr_scheduler_type = "constant"
max_steps = -1
warmup_ratio = 0.03
group_by_length = True
save_steps = 100
logging_steps = 25
max_seq_length = False
packing = False
#device_map = {"": 0}
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)
bnb_config = BitsAndBytesConfig(
load_in_4bit=use_4bit,
bnb_4bit_quant_type=bnb_4bit_quant_type,
bnb_4bit_compute_dtype=compute_dtype,
bnb_4bit_use_double_quant=use_nested_quant,
)
# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
major, _ = torch.cuda.get_device_capability()
if major >= 8:
print("=" * 80)
print("Your GPU supports bfloat16: accelerate training with bf16=True")
print("=" * 80)
# Load base model
config = PeftConfig.from_pretrained("mohamedemam/essay_checker")
model = AutoModelForCausalLM.from_pretrained("nfaheem/Marcoroni-7b-DPO-Merge", quantization_config=bnb_config)
model = PeftModel.from_pretrained(model, "mohamedemam/essay_checker")
model.eval()
def chat_Format(context,quetion,answer):
return "Instruction:\n check answer is true or false of next quetion using context below:\n"+"context: "+context+"\nquetion:"+quetion+ f".\n#student answer: "+answer+".\n#response:"
# Create a Wikipedia API instance
# Function to generate questions and answers with configurable parameters
def generate_qa(context,quetion,answer,max_new_token):
input_text = chat_Format(context,quetion,answer)
a = tokenizer(text=input_text, return_tensors='pt')
# Generate with configurable parameters
output = model.generate(input_ids=a['input_ids'],attention_mask=w['attention_mask'],
max_new_tokens=max_new_token
)
#
generated_text = tokenizer.batch_decode(output, skip_special_tokens=True)
formatted_output = "\n\n".join(set(generated_text))
return formatted_output
iface = gr.Interface(
fn=generate_qa,
inputs=[ "text", "text2", "text3",
gr.inputs.Slider(minimum=1, maximum=100, default=3, step=1, label="max token"),
],theme="red-black",
outputs=gr.outputs.Textbox(label="Generated Output"),
title="check answers",
description="put you context ",
)
# Launch the interface
iface.launch() |