alexkueck commited on
Commit
d8dd36b
·
1 Parent(s): 0ddfcd1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -8
app.py CHANGED
@@ -86,7 +86,7 @@ MODEL_NAME = "gpt-3.5-turbo-16k"
86
 
87
  #HuggingFace--------------------------------
88
  #repo_id = "meta-llama/Llama-2-13b-chat-hf"
89
- repo_id = "HuggingFaceH4/zephyr-7b-alpha" #das Modell ist echt gut!!!
90
  #repo_id = "meta-llama/Llama-2-70b-chat-hf"
91
  #repo_id = "tiiuae/falcon-40b"
92
  #repo_id = "Vicuna-33b"
@@ -277,7 +277,7 @@ def generate_prompt_with_history_langchain(prompt, history):
277
 
278
  ###################################################
279
  #Funktion von Gradio aus, die den dort eingegebenen Prompt annimmt und weiterverarbeitet
280
- def invoke (prompt, history, rag_option, openai_api_key, temperature=0.9, max_new_tokens=512, top_p=0.6, repetition_penalty=1.3,):
281
  global splittet
282
  print(splittet)
283
  #Prompt an history anhängen und einen Text daraus machen
@@ -304,12 +304,14 @@ def invoke (prompt, history, rag_option, openai_api_key, temperature=0.9, max_n
304
  ###########################
305
  #LLM auswählen (OpenAI oder HF)
306
  ###########################
307
- #Anfrage an OpenAI ----------------------------
308
- llm = ChatOpenAI(model_name = MODEL_NAME, openai_api_key = openai_api_key, temperature=temperature)#, top_p = top_p)
309
- #oder an Hugging Face --------------------------
310
- #llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={"temperature": 0.5, "max_length": 64})
311
- #llm = HuggingFaceHub(url_??? = "https://wdgsjd6zf201mufn.us-east-1.aws.endpoints.huggingface.cloud", model_kwargs={"temperature": 0.5, "max_length": 64})
312
- #llm = HuggingFaceTextGenInference( inference_server_url="http://localhost:8010/", max_new_tokens=max_new_tokens,top_k=10,top_p=top_p,typical_p=0.95,temperature=temperature,repetition_penalty=repetition_penalty,)
 
 
313
 
314
 
315
  #zusätzliche Dokumenten Splits aus DB zum Prompt hinzufügen (aus VektorDB - Chroma oder Mongo DB)
@@ -355,6 +357,7 @@ def vote(data: gr.LikeData):
355
  additional_inputs = [
356
  #gr.Radio(["Off", "Chroma", "MongoDB"], label="Retrieval Augmented Generation", value = "Off"),
357
  gr.Radio(["Aus", "An"], label="RAG - LI Erweiterungen", value = "Aus"),
 
358
  gr.Textbox(label = "OpenAI API Key", value = "sk-", lines = 1),
359
  gr.Slider(label="Temperature", value=0.9, minimum=0.0, maximum=1.0, step=0.05, interactive=True, info="Höhere Werte erzeugen diversere Antworten", visible=False),
360
  gr.Slider(label="Max new tokens", value=256, minimum=0, maximum=4096, step=64, interactive=True, info="Maximale Anzahl neuer Tokens", visible=False),
 
86
 
87
  #HuggingFace--------------------------------
88
  #repo_id = "meta-llama/Llama-2-13b-chat-hf"
89
+ repo_id = "HuggingFaceH4/zephyr-7b-alpha" #das Modell ist echt gut!!! Vom MIT
90
  #repo_id = "meta-llama/Llama-2-70b-chat-hf"
91
  #repo_id = "tiiuae/falcon-40b"
92
  #repo_id = "Vicuna-33b"
 
277
 
278
  ###################################################
279
  #Funktion von Gradio aus, die den dort eingegebenen Prompt annimmt und weiterverarbeitet
280
+ def invoke (prompt, history, rag_option, model_option, openai_api_key, temperature=0.9, max_new_tokens=512, top_p=0.6, repetition_penalty=1.3,):
281
  global splittet
282
  print(splittet)
283
  #Prompt an history anhängen und einen Text daraus machen
 
304
  ###########################
305
  #LLM auswählen (OpenAI oder HF)
306
  ###########################
307
+ if (model_option == "OpenAI"):
308
+ #Anfrage an OpenAI ----------------------------
309
+ llm = ChatOpenAI(model_name = MODEL_NAME, openai_api_key = openai_api_key, temperature=temperature)#, top_p = top_p)
310
+ else:
311
+ #oder an Hugging Face --------------------------
312
+ llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={"temperature": 0.5, "max_length": 64})
313
+ #llm = HuggingFaceHub(url_??? = "https://wdgsjd6zf201mufn.us-east-1.aws.endpoints.huggingface.cloud", model_kwargs={"temperature": 0.5, "max_length": 64})
314
+ #llm = HuggingFaceTextGenInference( inference_server_url="http://localhost:8010/", max_new_tokens=max_new_tokens,top_k=10,top_p=top_p,typical_p=0.95,temperature=temperature,repetition_penalty=repetition_penalty,)
315
 
316
 
317
  #zusätzliche Dokumenten Splits aus DB zum Prompt hinzufügen (aus VektorDB - Chroma oder Mongo DB)
 
357
  additional_inputs = [
358
  #gr.Radio(["Off", "Chroma", "MongoDB"], label="Retrieval Augmented Generation", value = "Off"),
359
  gr.Radio(["Aus", "An"], label="RAG - LI Erweiterungen", value = "Aus"),
360
+ gr.Radio(["OpenAI", "HuggingFace"], label="Modellauswahl", value = "OpenAI"),
361
  gr.Textbox(label = "OpenAI API Key", value = "sk-", lines = 1),
362
  gr.Slider(label="Temperature", value=0.9, minimum=0.0, maximum=1.0, step=0.05, interactive=True, info="Höhere Werte erzeugen diversere Antworten", visible=False),
363
  gr.Slider(label="Max new tokens", value=256, minimum=0, maximum=4096, step=64, interactive=True, info="Maximale Anzahl neuer Tokens", visible=False),