aizanlabs commited on
Commit
8cbdf09
·
verified ·
1 Parent(s): 2021133

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -16
app.py CHANGED
@@ -13,7 +13,14 @@ from datetime import datetime
13
  import json
14
  import gradio as gr
15
  import re
16
- from unsloth import FastLanguageModel
 
 
 
 
 
 
 
17
  class DocumentRetrievalAndGeneration:
18
  def __init__(self, embedding_model_name, lm_model_id, data_folder):
19
  # hf_token = os.getenv('HF_TOKEN')
@@ -64,10 +71,10 @@ class DocumentRetrievalAndGeneration:
64
  return generate_text
65
  def initialize_llm2(self,model_id):
66
 
67
- tokenizer = AutoTokenizer.from_pretrained(model_id)
68
- model = AutoModelForCausalLM.from_pretrained(model_id)
69
- FastLanguageModel.for_inference(model) # Enable native 2x faster inference
70
-
71
  # return generate_text
72
 
73
  def generate_response_with_timeout(self, model_inputs):
@@ -131,18 +138,9 @@ class DocumentRetrievalAndGeneration:
131
 
132
  # decoded = self.llm.tokenizer.batch_decode(generated_ids)
133
  # generated_response = decoded[0]
134
-
135
- inputs = tokenizer(
136
- [
137
- alpaca_prompt.format(
138
- "", # instruction
139
- prompt, # input
140
- "", # output - leave this blank for generation!
141
- )
142
- ], return_tensors = "pt")#.to("cuda")
143
 
144
- outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
145
- tokenizer.batch_decode(outputs)
146
  match1 = re.search(r'\[/INST\](.*?)</s>', generated_response, re.DOTALL)
147
 
148
  match2 = re.search(r'Solution:(.*?)</s>', generated_response, re.DOTALL | re.IGNORECASE)
 
13
  import json
14
  import gradio as gr
15
  import re
16
+ # from unsloth import FastLanguageModel
17
+
18
+ import transformers
19
+ from transformers import BloomForCausalLM
20
+ from transformers import BloomForTokenClassification
21
+ from transformers import BloomForTokenClassification
22
+ from transformers import BloomTokenizerFast
23
+ import torch
24
  class DocumentRetrievalAndGeneration:
25
  def __init__(self, embedding_model_name, lm_model_id, data_folder):
26
  # hf_token = os.getenv('HF_TOKEN')
 
71
  return generate_text
72
  def initialize_llm2(self,model_id):
73
 
74
+ tokenizer = BloomTokenizerFast.from_pretrained("bigscience/bloom-1b3", local_files_only=True)
75
+ model = BloomForCausalLM.from_pretrained("bigscience/bloom-1b3", local_files_only=True)
76
+ result_length = 200
77
+ inputs = tokenizer(prompt, return_tensors="pt")
78
  # return generate_text
79
 
80
  def generate_response_with_timeout(self, model_inputs):
 
138
 
139
  # decoded = self.llm.tokenizer.batch_decode(generated_ids)
140
  # generated_response = decoded[0]
141
+ generated_response=tokenizer.decode(model.generate(inputs["input_ids"], max_length=result_length,no_repeat_ngram_size=2)[0])
142
+ print(generated_response)
 
 
 
 
 
 
 
143
 
 
 
144
  match1 = re.search(r'\[/INST\](.*?)</s>', generated_response, re.DOTALL)
145
 
146
  match2 = re.search(r'Solution:(.*?)</s>', generated_response, re.DOTALL | re.IGNORECASE)