Update app.py
Browse files
app.py
CHANGED
@@ -71,13 +71,13 @@ class DocumentRetrievalAndGeneration:
|
|
71 |
return generate_text
|
72 |
def initialize_llm2(self,model_id):
|
73 |
|
74 |
-
model_name = "mistralai/Mistral-7B-Instruct-v0.2"
|
75 |
-
pipeline = transformers.pipeline(
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
)
|
81 |
|
82 |
|
83 |
# return generate_text
|
@@ -144,7 +144,11 @@ class DocumentRetrievalAndGeneration:
|
|
144 |
Solution:"NO SOLUTION AVAILABLE"
|
145 |
</s>
|
146 |
"""
|
147 |
-
|
|
|
|
|
|
|
|
|
148 |
# messages = [{"role": "user", "content": prompt}]
|
149 |
# encodeds = self.llm.tokenizer.apply_chat_template(messages, return_tensors="pt")
|
150 |
# model_inputs = encodeds.to(self.llm.device)
|
@@ -155,44 +159,45 @@ class DocumentRetrievalAndGeneration:
|
|
155 |
|
156 |
# decoded = self.llm.tokenizer.batch_decode(generated_ids)
|
157 |
# generated_response = decoded[0]
|
158 |
-
|
159 |
-
#
|
160 |
-
if history
|
161 |
-
|
162 |
-
|
163 |
-
|
|
|
164 |
|
165 |
-
# Always add the current user message
|
166 |
-
messages.append({"role": "user", "content": message})
|
167 |
|
168 |
-
# Construct the prompt using the pipeline's tokenizer
|
169 |
-
prompt = pipeline.tokenizer.apply_chat_template(
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
)
|
174 |
|
175 |
-
# Generate the response
|
176 |
-
terminators = [
|
177 |
-
|
178 |
-
|
179 |
-
]
|
180 |
|
181 |
-
# Adjust the temperature slightly above given to ensure variety
|
182 |
-
adjusted_temp = temperature + 0.1
|
183 |
|
184 |
-
# Generate outputs with adjusted parameters
|
185 |
-
outputs = pipeline(
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
)
|
192 |
|
193 |
-
# Extract the generated text, skipping the length of the prompt
|
194 |
-
generated_text = outputs[0]["generated_text"]
|
195 |
-
generated_response = generated_text[len(prompt):]
|
196 |
|
197 |
match1 = re.search(r'\[/INST\](.*?)</s>', generated_response, re.DOTALL)
|
198 |
|
|
|
71 |
return generate_text
|
72 |
def initialize_llm2(self,model_id):
|
73 |
|
74 |
+
# model_name = "mistralai/Mistral-7B-Instruct-v0.2"
|
75 |
+
# pipeline = transformers.pipeline(
|
76 |
+
# "text-generation",
|
77 |
+
# model=model_name,
|
78 |
+
# model_kwargs={"torch_dtype": torch.bfloat16},
|
79 |
+
# device="cpu",
|
80 |
+
# )
|
81 |
|
82 |
|
83 |
# return generate_text
|
|
|
144 |
Solution:"NO SOLUTION AVAILABLE"
|
145 |
</s>
|
146 |
"""
|
147 |
+
messages = [
|
148 |
+
{"role": "user", "content": prompt},
|
149 |
+
]
|
150 |
+
pipe = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct-v0.2")
|
151 |
+
generated_response=pipe(messages)
|
152 |
# messages = [{"role": "user", "content": prompt}]
|
153 |
# encodeds = self.llm.tokenizer.apply_chat_template(messages, return_tensors="pt")
|
154 |
# model_inputs = encodeds.to(self.llm.device)
|
|
|
159 |
|
160 |
# decoded = self.llm.tokenizer.batch_decode(generated_ids)
|
161 |
# generated_response = decoded[0]
|
162 |
+
#########################################################
|
163 |
+
# messages = []
|
164 |
+
# # Check if history is None or empty and handle accordingly
|
165 |
+
# if history:
|
166 |
+
# for user_msg, assistant_msg in history:
|
167 |
+
# messages.append({"role": "user", "content": user_msg})
|
168 |
+
# messages.append({"role": "assistant", "content": assistant_msg})
|
169 |
|
170 |
+
# # Always add the current user message
|
171 |
+
# messages.append({"role": "user", "content": message})
|
172 |
|
173 |
+
# # Construct the prompt using the pipeline's tokenizer
|
174 |
+
# prompt = pipeline.tokenizer.apply_chat_template(
|
175 |
+
# messages,
|
176 |
+
# tokenize=False,
|
177 |
+
# add_generation_prompt=True
|
178 |
+
# )
|
179 |
|
180 |
+
# # Generate the response
|
181 |
+
# terminators = [
|
182 |
+
# pipeline.tokenizer.eos_token_id,
|
183 |
+
# pipeline.tokenizer.convert_tokens_to_ids("")
|
184 |
+
# ]
|
185 |
|
186 |
+
# # Adjust the temperature slightly above given to ensure variety
|
187 |
+
# adjusted_temp = temperature + 0.1
|
188 |
|
189 |
+
# # Generate outputs with adjusted parameters
|
190 |
+
# outputs = pipeline(
|
191 |
+
# prompt,
|
192 |
+
# max_new_tokens=max_new_tokens,
|
193 |
+
# do_sample=True,
|
194 |
+
# temperature=adjusted_temp,
|
195 |
+
# top_p=0.9
|
196 |
+
# )
|
197 |
|
198 |
+
# # Extract the generated text, skipping the length of the prompt
|
199 |
+
# generated_text = outputs[0]["generated_text"]
|
200 |
+
# generated_response = generated_text[len(prompt):]
|
201 |
|
202 |
match1 = re.search(r'\[/INST\](.*?)</s>', generated_response, re.DOTALL)
|
203 |
|