mohamedemam commited on
Commit
92482c7
·
verified ·
1 Parent(s): 321ca54

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +139 -0
app.py CHANGED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer
3
+ import re
4
+ from peft import PeftModel, PeftConfig
5
+ from transformers import AutoModelForCausalLM
6
+ from transformers import BitsAndBytesConfig
7
+ import torch
8
+
9
+ from peft import PeftModel, PeftConfig
10
+ from transformers import AutoModelForCausalLM
11
+ # The model that you want to train from the Hugging Face hub
12
+ import os
13
+ import torch
14
+ from datasets import load_dataset
15
+ from transformers import (
16
+ AutoModelForCausalLM,
17
+ AutoTokenizer,
18
+ BitsAndBytesConfig,
19
+ HfArgumentParser,
20
+ TrainingArguments,
21
+ pipeline,
22
+ logging,
23
+ Trainer
24
+ )
25
+ from peft import LoraConfig, PeftModel
26
+ from trl import SFTTrainer
27
+ model_name ="bigscience/bloomz-7b1"
28
+
29
+ # The instruction dataset to use
30
+
31
+ # Fine-tuned model name
32
+ new_model = "bigscience/bloomz-7b1"
33
+ # LoRA attention dimension
34
+ lora_r = 16
35
+ lora_alpha = 16
36
+
37
+ # Dropout probability for LoRA layers
38
+ lora_dropout = 0.05
39
+ use_4bit = True
40
+
41
+ # Compute dtype for 4-bit base models
42
+ bnb_4bit_compute_dtype = "float16"
43
+
44
+ # Quantization type (fp4 or nf4)
45
+ bnb_4bit_quant_type = "nf4"
46
+
47
+ use_nested_quant = False
48
+
49
+ output_dir = "./results"
50
+
51
+ # Number of training epochs
52
+ num_train_epochs = 1
53
+ fp16 = False
54
+ bf16 = False
55
+
56
+ per_device_train_batch_size =1
57
+
58
+ per_device_eval_batch_size = 4
59
+
60
+ gradient_accumulation_steps = 8
61
+
62
+ gradient_checkpointing = True
63
+
64
+ max_grad_norm = 0.3
65
+
66
+ learning_rate = 5e-5
67
+
68
+ weight_decay = 0.001
69
+
70
+ optim = "paged_adamw_8bit"
71
+
72
+ lr_scheduler_type = "constant"
73
+
74
+ max_steps = -1
75
+ warmup_ratio = 0.03
76
+ group_by_length = True
77
+
78
+ save_steps = 100
79
+
80
+ logging_steps = 25
81
+
82
+ max_seq_length = False
83
+ packing = False
84
+ #device_map = {"": 0}
85
+ compute_dtype = getattr(torch, bnb_4bit_compute_dtype)
86
+
87
+ bnb_config = BitsAndBytesConfig(
88
+ load_in_4bit=use_4bit,
89
+ bnb_4bit_quant_type=bnb_4bit_quant_type,
90
+ bnb_4bit_compute_dtype=compute_dtype,
91
+ bnb_4bit_use_double_quant=use_nested_quant,
92
+ )
93
+
94
+ # Check GPU compatibility with bfloat16
95
+ if compute_dtype == torch.float16 and use_4bit:
96
+ major, _ = torch.cuda.get_device_capability()
97
+ if major >= 8:
98
+ print("=" * 80)
99
+ print("Your GPU supports bfloat16: accelerate training with bf16=True")
100
+ print("=" * 80)
101
+
102
+ # Load base model
103
+ config = PeftConfig.from_pretrained("mohamedemam/essay_checker")
104
+ model = AutoModelForCausalLM.from_pretrained("nfaheem/Marcoroni-7b-DPO-Merge", quantization_config=bnb_config)
105
+ model = PeftModel.from_pretrained(model, "mohamedemam/essay_checker")
106
+ model.eval()
107
+ def chat_Format(context,quetion,answer):
108
+ return "Instruction:\n check answer is true or false of next quetion using context below:\n"+"context: "+context+"\nquetion:"+quetion+ f".\n#student answer: "+answer+".\n#response:"
109
+ # Create a Wikipedia API instance
110
+
111
+
112
+ # Function to generate questions and answers with configurable parameters
113
+ def generate_qa(context,quetion,answer,max_new_token):
114
+ input_text = chat_Format(context,quetion,answer)
115
+ a = tokenizer(text=input_text, return_tensors='pt')
116
+
117
+ # Generate with configurable parameters
118
+ output = model.generate(input_ids=a['input_ids'],attention_mask=w['attention_mask'],
119
+ max_new_tokens=max_new_token
120
+ )
121
+ #
122
+ generated_text = tokenizer.batch_decode(output, skip_special_tokens=True)
123
+
124
+ formatted_output = "\n\n".join(set(generated_text))
125
+ return formatted_output
126
+ iface = gr.Interface(
127
+ fn=generate_qa,
128
+ inputs=[ "text", "text2", "text3",
129
+
130
+ gr.inputs.Slider(minimum=1, maximum=100, default=3, step=1, label="max token"),
131
+
132
+
133
+ ],theme="red-black",
134
+ outputs=gr.outputs.Textbox(label="Generated Output"),
135
+ title="check answers",
136
+ description="put you context ",
137
+ )
138
+ # Launch the interface
139
+ iface.launch()