aizanlabs commited on
Commit
9c2bb60
·
verified ·
1 Parent(s): 89e6c61

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -40
app.py CHANGED
@@ -71,13 +71,13 @@ class DocumentRetrievalAndGeneration:
71
  return generate_text
72
  def initialize_llm2(self,model_id):
73
 
74
- model_name = "mistralai/Mistral-7B-Instruct-v0.2"
75
- pipeline = transformers.pipeline(
76
- "text-generation",
77
- model=model_name,
78
- model_kwargs={"torch_dtype": torch.bfloat16},
79
- device="cpu",
80
- )
81
 
82
 
83
  # return generate_text
@@ -144,7 +144,11 @@ class DocumentRetrievalAndGeneration:
144
  Solution:"NO SOLUTION AVAILABLE"
145
  </s>
146
  """
147
-
 
 
 
 
148
  # messages = [{"role": "user", "content": prompt}]
149
  # encodeds = self.llm.tokenizer.apply_chat_template(messages, return_tensors="pt")
150
  # model_inputs = encodeds.to(self.llm.device)
@@ -155,44 +159,45 @@ class DocumentRetrievalAndGeneration:
155
 
156
  # decoded = self.llm.tokenizer.batch_decode(generated_ids)
157
  # generated_response = decoded[0]
158
- messages = []
159
- # Check if history is None or empty and handle accordingly
160
- if history:
161
- for user_msg, assistant_msg in history:
162
- messages.append({"role": "user", "content": user_msg})
163
- messages.append({"role": "assistant", "content": assistant_msg})
 
164
 
165
- # Always add the current user message
166
- messages.append({"role": "user", "content": message})
167
 
168
- # Construct the prompt using the pipeline's tokenizer
169
- prompt = pipeline.tokenizer.apply_chat_template(
170
- messages,
171
- tokenize=False,
172
- add_generation_prompt=True
173
- )
174
 
175
- # Generate the response
176
- terminators = [
177
- pipeline.tokenizer.eos_token_id,
178
- pipeline.tokenizer.convert_tokens_to_ids("")
179
- ]
180
 
181
- # Adjust the temperature slightly above given to ensure variety
182
- adjusted_temp = temperature + 0.1
183
 
184
- # Generate outputs with adjusted parameters
185
- outputs = pipeline(
186
- prompt,
187
- max_new_tokens=max_new_tokens,
188
- do_sample=True,
189
- temperature=adjusted_temp,
190
- top_p=0.9
191
- )
192
 
193
- # Extract the generated text, skipping the length of the prompt
194
- generated_text = outputs[0]["generated_text"]
195
- generated_response = generated_text[len(prompt):]
196
 
197
  match1 = re.search(r'\[/INST\](.*?)</s>', generated_response, re.DOTALL)
198
 
 
71
  return generate_text
72
  def initialize_llm2(self,model_id):
73
 
74
+ # model_name = "mistralai/Mistral-7B-Instruct-v0.2"
75
+ # pipeline = transformers.pipeline(
76
+ # "text-generation",
77
+ # model=model_name,
78
+ # model_kwargs={"torch_dtype": torch.bfloat16},
79
+ # device="cpu",
80
+ # )
81
 
82
 
83
  # return generate_text
 
144
  Solution:"NO SOLUTION AVAILABLE"
145
  </s>
146
  """
147
+ messages = [
148
+ {"role": "user", "content": prompt},
149
+ ]
150
+ pipe = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct-v0.2")
151
+ generated_response=pipe(messages)
152
  # messages = [{"role": "user", "content": prompt}]
153
  # encodeds = self.llm.tokenizer.apply_chat_template(messages, return_tensors="pt")
154
  # model_inputs = encodeds.to(self.llm.device)
 
159
 
160
  # decoded = self.llm.tokenizer.batch_decode(generated_ids)
161
  # generated_response = decoded[0]
162
+ #########################################################
163
+ # messages = []
164
+ # # Check if history is None or empty and handle accordingly
165
+ # if history:
166
+ # for user_msg, assistant_msg in history:
167
+ # messages.append({"role": "user", "content": user_msg})
168
+ # messages.append({"role": "assistant", "content": assistant_msg})
169
 
170
+ # # Always add the current user message
171
+ # messages.append({"role": "user", "content": message})
172
 
173
+ # # Construct the prompt using the pipeline's tokenizer
174
+ # prompt = pipeline.tokenizer.apply_chat_template(
175
+ # messages,
176
+ # tokenize=False,
177
+ # add_generation_prompt=True
178
+ # )
179
 
180
+ # # Generate the response
181
+ # terminators = [
182
+ # pipeline.tokenizer.eos_token_id,
183
+ # pipeline.tokenizer.convert_tokens_to_ids("")
184
+ # ]
185
 
186
+ # # Adjust the temperature slightly above given to ensure variety
187
+ # adjusted_temp = temperature + 0.1
188
 
189
+ # # Generate outputs with adjusted parameters
190
+ # outputs = pipeline(
191
+ # prompt,
192
+ # max_new_tokens=max_new_tokens,
193
+ # do_sample=True,
194
+ # temperature=adjusted_temp,
195
+ # top_p=0.9
196
+ # )
197
 
198
+ # # Extract the generated text, skipping the length of the prompt
199
+ # generated_text = outputs[0]["generated_text"]
200
+ # generated_response = generated_text[len(prompt):]
201
 
202
  match1 = re.search(r'\[/INST\](.*?)</s>', generated_response, re.DOTALL)
203