Update app.py
Browse files
app.py
CHANGED
@@ -72,7 +72,7 @@ class DocumentRetrievalAndGeneration:
|
|
72 |
return generate_text
|
73 |
def initialize_llm2(self,model_id):
|
74 |
|
75 |
-
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
76 |
# except:
|
77 |
# try:
|
78 |
# pipe = pipeline("text-generation", model="microsoft/Phi-3-mini-4k-instruct", trust_remote_code=True)
|
@@ -156,7 +156,7 @@ class DocumentRetrievalAndGeneration:
|
|
156 |
messages.append({"role": "user", "content": query})
|
157 |
response = ""
|
158 |
|
159 |
-
for message in client.chat_completion(messages,max_tokens=2048,stream=True,temperature=0.7):
|
160 |
token = message.choices[0].delta.content
|
161 |
response += token
|
162 |
# yield response
|
|
|
72 |
return generate_text
|
73 |
def initialize_llm2(self,model_id):
|
74 |
|
75 |
+
self.client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
76 |
# except:
|
77 |
# try:
|
78 |
# pipe = pipeline("text-generation", model="microsoft/Phi-3-mini-4k-instruct", trust_remote_code=True)
|
|
|
156 |
messages.append({"role": "user", "content": query})
|
157 |
response = ""
|
158 |
|
159 |
+
for message in self.client.chat_completion(messages,max_tokens=2048,stream=True,temperature=0.7):
|
160 |
token = message.choices[0].delta.content
|
161 |
response += token
|
162 |
# yield response
|