EITD commited on
Commit
9e0025f
·
1 Parent(s): dbd38f1

test tokenizer

Browse files
Files changed (1) hide show
  1. app.py +13 -26
app.py CHANGED
@@ -2,10 +2,7 @@ import gradio as gr
2
  # from huggingface_hub import InferenceClient
3
  # from peft import AutoPeftModelForCausalLM
4
  # from transformers import AutoTokenizer, TextStreamer, BitsAndBytesConfig
5
- # from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
6
- import os
7
- os.environ["CUDA_VISIBLE_DEVICES"] = ""
8
- from unsloth import FastLanguageModel
9
  """
10
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
11
  """
@@ -18,23 +15,11 @@ For more information on `huggingface_hub` Inference API support, please check th
18
  # )
19
  # tokenizer = AutoTokenizer.from_pretrained(model_name)
20
 
21
- # model_id = "EITD/model"
22
- # filename = "unsloth.Q4_K_M.gguf"
23
 
24
- # tokenizer = AutoTokenizer.from_pretrained(model_id, gguf_file=filename)
25
- # model = AutoModelForCausalLM.from_pretrained(model_id, gguf_file=filename)
26
-
27
- max_seq_length = 2048
28
- dtype = None # or torch.float32
29
- load_in_4bit = False
30
-
31
- model, tokenizer = FastLanguageModel.from_pretrained(
32
- model_name = "EITD/lora_model", # YOUR MODEL YOU USED FOR TRAINING
33
- max_seq_length = max_seq_length,
34
- dtype = dtype,
35
- load_in_4bit = load_in_4bit,
36
- )
37
- FastLanguageModel.for_inference(model)
38
 
39
  def respond(
40
  message,
@@ -67,12 +52,14 @@ def respond(
67
 
68
  # response += token
69
  # yield response
70
- inputs = tokenizer.apply_chat_template(
71
- messages,
72
- tokenize = True,
73
- add_generation_prompt = True, # Must add for generation
74
- return_tensors = "pt",
75
- )
 
 
76
  # text_streamer = TextStreamer(tokenizer, skip_prompt = True)
77
  # for response in model.generate(input_ids = inputs, streamer = text_streamer, max_new_tokens = max_tokens, use_cache = True,
78
  # temperature = temperature, min_p = top_p):
 
2
  # from huggingface_hub import InferenceClient
3
  # from peft import AutoPeftModelForCausalLM
4
  # from transformers import AutoTokenizer, TextStreamer, BitsAndBytesConfig
5
+ from transformers import AutoTokenizer, AutoModelForCausalLM
 
 
 
6
  """
7
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
8
  """
 
15
  # )
16
  # tokenizer = AutoTokenizer.from_pretrained(model_name)
17
 
18
+ model_id = "EITD/model"
19
+ filename = "unsloth.Q4_K_M.gguf"
20
 
21
+ tokenizer = AutoTokenizer.from_pretrained(model_id, gguf_file=filename)
22
+ model = AutoModelForCausalLM.from_pretrained(model_id, gguf_file=filename)
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  def respond(
25
  message,
 
52
 
53
  # response += token
54
  # yield response
55
+
56
+ # inputs = tokenizer.apply_chat_template(
57
+ # messages,
58
+ # tokenize = True,
59
+ # add_generation_prompt = True, # Must add for generation
60
+ # return_tensors = "pt",
61
+ # )
62
+ inputs = tokenizer.encode(messages, return_tensors='pt')
63
  # text_streamer = TextStreamer(tokenizer, skip_prompt = True)
64
  # for response in model.generate(input_ids = inputs, streamer = text_streamer, max_new_tokens = max_tokens, use_cache = True,
65
  # temperature = temperature, min_p = top_p):