EITD commited on
Commit
1975e5f
·
1 Parent(s): 7b75d63
Files changed (2) hide show
  1. app.py +42 -49
  2. requirements.txt +8 -8
app.py CHANGED
@@ -1,25 +1,29 @@
1
- import gradio as gr
2
- # from huggingface_hub import InferenceClient
3
- # from peft import AutoPeftModelForCausalLM
4
- # from transformers import AutoTokenizer, TextStreamer, BitsAndBytesConfig
5
- from transformers import AutoTokenizer, AutoModelForCausalLM
6
  """
7
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
8
  """
9
  # client = InferenceClient("EITD/lora_model", token=os.getenv("HF_TOKEN"))
10
 
11
- # model_name = "lora_model"
12
- # model = AutoPeftModelForCausalLM.from_pretrained(
13
- # model_name,
14
- # load_in_4bit = True,
 
 
 
 
 
 
 
 
 
15
  # )
16
- # tokenizer = AutoTokenizer.from_pretrained(model_name)
17
 
18
- model_id = "EITD/model"
19
- filename = "unsloth.Q4_K_M.gguf"
20
 
21
- tokenizer = AutoTokenizer.from_pretrained(model_id, gguf_file=filename)
22
- model = AutoModelForCausalLM.from_pretrained(model_id, gguf_file=filename)
23
 
24
  def respond(
25
  message,
@@ -53,24 +57,13 @@ def respond(
53
  # response += token
54
  # yield response
55
 
56
- # inputs = tokenizer.apply_chat_template(
57
- # messages,
58
- # tokenize = True,
59
- # add_generation_prompt = True, # Must add for generation
60
- # return_tensors = "pt",
61
- # )
62
- conversation = ''
63
- for msg in messages:
64
- if msg['role'] == 'user':
65
- conversation += f"User: {msg['content']}\n"
66
- elif msg['role'] == 'assistant':
67
- conversation += f"Assistant: {msg['content']}\n"
68
- conversation += "Assistant: "
69
- inputs = tokenizer.encode(conversation, return_tensors='pt')
70
- # text_streamer = TextStreamer(tokenizer, skip_prompt = True)
71
- # for response in model.generate(input_ids = inputs, streamer = text_streamer, max_new_tokens = max_tokens, use_cache = True,
72
- # temperature = temperature, min_p = top_p):
73
- # yield response
74
  outputs = model.generate(input_ids = inputs, max_new_tokens = max_tokens, use_cache = True,
75
  temperature = temperature, min_p = top_p)
76
  return tokenizer.batch_decode(outputs)
@@ -79,22 +72,22 @@ def respond(
79
  """
80
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
81
  """
82
- demo = gr.ChatInterface(
83
- respond,
84
- additional_inputs=[
85
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
86
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
87
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
88
- gr.Slider(
89
- minimum=0.1,
90
- maximum=1.0,
91
- value=0.95,
92
- step=0.05,
93
- label="Top-p (nucleus sampling)",
94
- ),
95
- ],
96
- )
97
 
98
 
99
- if __name__ == "__main__":
100
- demo.launch()
 
1
+ from peft import AutoPeftModelForCausalLM
2
+ from transformers import AutoTokenizer
 
 
 
3
  """
4
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
5
  """
6
  # client = InferenceClient("EITD/lora_model", token=os.getenv("HF_TOKEN"))
7
 
8
+ model = AutoPeftModelForCausalLM.from_pretrained(
9
+ "EITD/lora_model_1", # YOUR MODEL YOU USED FOR TRAINING
10
+ load_in_4bit = False,
11
+ )
12
+ tokenizer = AutoTokenizer.from_pretrained("EITD/lora_model_1")
13
+
14
+ # messages = [{"role": "user", "content": "Continue the Fibonacci sequence: 1, 1, 2, 3, 5, 8,"},]
15
+
16
+ # inputs = tokenizer.apply_chat_template(
17
+ # messages,
18
+ # tokenize = True,
19
+ # add_generation_prompt = True, # Must add for generation
20
+ # return_tensors = "pt",
21
  # )
 
22
 
23
+ # outputs = model.generate(input_ids = inputs, max_new_tokens = 64, use_cache = True,
24
+ # temperature = 1.5, min_p = 0.1)
25
 
26
+ # print(tokenizer.batch_decode(outputs))
 
27
 
28
  def respond(
29
  message,
 
57
  # response += token
58
  # yield response
59
 
60
+ inputs = tokenizer.apply_chat_template(
61
+ messages,
62
+ tokenize = True,
63
+ add_generation_prompt = True, # Must add for generation
64
+ return_tensors = "pt",
65
+ )
66
+
 
 
 
 
 
 
 
 
 
 
 
67
  outputs = model.generate(input_ids = inputs, max_new_tokens = max_tokens, use_cache = True,
68
  temperature = temperature, min_p = top_p)
69
  return tokenizer.batch_decode(outputs)
 
72
  """
73
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
74
  """
75
+ # demo = gr.ChatInterface(
76
+ # respond,
77
+ # additional_inputs=[
78
+ # gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
79
+ # gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
80
+ # gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
81
+ # gr.Slider(
82
+ # minimum=0.1,
83
+ # maximum=1.0,
84
+ # value=0.95,
85
+ # step=0.05,
86
+ # label="Top-p (nucleus sampling)",
87
+ # ),
88
+ # ],
89
+ # )
90
 
91
 
92
+ # if __name__ == "__main__":
93
+ # demo.launch()
requirements.txt CHANGED
@@ -1,10 +1,10 @@
1
- huggingface_hub==0.25.2
2
- # peft==0.13.2
3
  transformers==4.46.3
4
  # bitsandbytes==0.42.0
5
- torch==2.5.1
6
- gguf==0.10.0
7
- sentencepiece==0.2.0
8
- numpy<2.0.0
9
- accelerate==1.1.1
10
- unsloth==2024.11.9
 
1
+ # huggingface_hub==0.25.2
2
+ peft==0.13.2
3
  transformers==4.46.3
4
  # bitsandbytes==0.42.0
5
+ # torch==2.5.1
6
+ # gguf==0.10.0
7
+ # sentencepiece==0.2.0
8
+ # numpy==1.26.4
9
+ # accelerate==1.1.1
10
+ # unsloth==2024.11.9