Spaces:
Runtime error
Runtime error
Roger Condori
commited on
add new features app.py
Browse files
app.py
CHANGED
@@ -1,3 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
from langchain.embeddings.openai import OpenAIEmbeddings
|
3 |
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
|
@@ -21,30 +33,25 @@ from langchain.document_loaders import (
|
|
21 |
PyPDFLoader,
|
22 |
)
|
23 |
import param
|
24 |
-
import os
|
25 |
-
import torch
|
26 |
from conversadocs.bones import DocChat
|
|
|
|
|
|
|
27 |
|
28 |
dc = DocChat()
|
|
|
29 |
|
30 |
##### GRADIO CONFIG ####
|
31 |
|
32 |
-
if torch.cuda.is_available():
|
33 |
-
print("CUDA is available on this system.")
|
34 |
-
os.system('CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python --force-reinstall --upgrade --no-cache-dir --verbose')
|
35 |
-
else:
|
36 |
-
print("CUDA is not available on this system.")
|
37 |
-
os.system('pip install llama-cpp-python')
|
38 |
-
|
39 |
css="""
|
40 |
#col-container {max-width: 1500px; margin-left: auto; margin-right: auto;}
|
41 |
"""
|
42 |
|
43 |
title = """
|
44 |
<div style="text-align: center;max-width: 1500px;">
|
45 |
-
<h2>Chat with Documents π - Falcon
|
46 |
-
<p style="text-align: center;">Upload txt, pdf, doc, docx, enex, epub, html, md, odt, ptt and pttx.
|
47 |
-
Wait for the Status to show Loaded documents, start typing your questions.
|
48 |
</div>
|
49 |
"""
|
50 |
|
@@ -55,11 +62,25 @@ description = """
|
|
55 |
|
56 |
- Oficial Repository [![a](https://img.shields.io/badge/GitHub-Repository-black?style=flat-square&logo=github)](https://github.com/R3gm/ConversaDocs/)
|
57 |
|
58 |
-
-
|
|
|
|
|
59 |
|
60 |
-
-
|
|
|
|
|
61 |
|
62 |
- For more information about what GGML models are, you can visit this notebook [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/R3gm/InsightSolver-Colab/blob/main/LLM_Inference_with_llama_cpp_python__Llama_2_13b_chat.ipynb)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
"""
|
64 |
|
65 |
theme='aliabid94/new-theme'
|
@@ -73,7 +94,6 @@ def upload_file(files, max_docs):
|
|
73 |
|
74 |
def predict(message, chat_history, max_k, check_memory):
|
75 |
print(message)
|
76 |
-
print(check_memory)
|
77 |
bot_message = dc.convchain(message, max_k, check_memory)
|
78 |
print(bot_message)
|
79 |
return "", dc.get_chats()
|
@@ -94,9 +114,8 @@ def convert():
|
|
94 |
def clear_api_key(api_key):
|
95 |
return 'api_key...', dc.openai_model(api_key)
|
96 |
|
97 |
-
|
98 |
# Max values in generation
|
99 |
-
DOC_DB_LIMIT =
|
100 |
MAX_NEW_TOKENS = 2048
|
101 |
|
102 |
# Limit in HF, no need to set it
|
@@ -124,20 +143,43 @@ with gr.Blocks(theme=theme, css=css) as demo:
|
|
124 |
sou = gr.HTML("")
|
125 |
|
126 |
clear_button.click(flag,[],[link_output]).then(dc.clr_history,[], [link_output]).then(lambda: None, None, chatbot, queue=False)
|
127 |
-
upload_button.upload(flag,[],[file_output]).then(upload_file, [upload_button, max_docs], file_output).then(dc.clr_history,[], [link_output])
|
128 |
-
|
129 |
-
with gr.Tab("
|
130 |
-
gr.HTML("<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
repo_ = gr.Textbox(label="Repository" ,value="TheBloke/Llama-2-7B-Chat-GGML")
|
132 |
-
file_ = gr.Textbox(label="File name" ,value="llama-2-7b-chat.ggmlv3.
|
133 |
-
max_tokens = gr.inputs.Slider(1,
|
134 |
temperature = gr.inputs.Slider(0.1, 1., default=0.2, label="Temperature", step=0.1)
|
135 |
top_k = gr.inputs.Slider(0.01, 1., default=0.95, label="Top K", step=0.01)
|
136 |
top_p = gr.inputs.Slider(0, 100, default=50, label="Top P", step=1)
|
137 |
repeat_penalty = gr.inputs.Slider(0.1, 100., default=1.2, label="Repeat penalty", step=0.1)
|
138 |
-
change_model_button = gr.Button("Load GGML Model")
|
139 |
-
|
140 |
-
|
|
|
|
|
|
|
|
|
|
|
141 |
falcon_button = gr.Button("Load FALCON 7B-Instruct")
|
142 |
|
143 |
openai_gpt_model = gr.HTML("<hr>OpenAI Model gpt-3.5-turbo</h2>")
|
@@ -145,16 +187,16 @@ with gr.Blocks(theme=theme, css=css) as demo:
|
|
145 |
openai_button = gr.Button("Load gpt-3.5-turbo")
|
146 |
|
147 |
line_ = gr.HTML("<hr> </h2>")
|
148 |
-
model_verify = gr.HTML("
|
149 |
|
150 |
-
with gr.Tab("
|
151 |
description_md = gr.Markdown(description)
|
152 |
|
153 |
msg.submit(predict,[msg, chatbot, max_docs, check_memory],[msg, chatbot]).then(convert,[],[sou])
|
154 |
|
155 |
-
change_model_button.click(dc.change_llm,[repo_, file_, max_tokens, temperature, top_p, top_k, repeat_penalty, max_docs],[
|
156 |
|
157 |
-
falcon_button.click(dc.default_falcon_model, [], [model_verify])
|
158 |
openai_button.click(clear_api_key, [api_key], [api_key, model_verify])
|
159 |
-
|
160 |
demo.launch(enable_queue=True)
|
|
|
1 |
+
import torch
|
2 |
+
import os
|
3 |
+
try:
|
4 |
+
from llama_cpp import Llama
|
5 |
+
except:
|
6 |
+
if torch.cuda.is_available():
|
7 |
+
print("CUDA is available on this system.")
|
8 |
+
os.system('CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python --force-reinstall --upgrade --no-cache-dir --verbose')
|
9 |
+
else:
|
10 |
+
print("CUDA is not available on this system.")
|
11 |
+
os.system('pip install llama-cpp-python')
|
12 |
+
|
13 |
import gradio as gr
|
14 |
from langchain.embeddings.openai import OpenAIEmbeddings
|
15 |
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
|
|
|
33 |
PyPDFLoader,
|
34 |
)
|
35 |
import param
|
|
|
|
|
36 |
from conversadocs.bones import DocChat
|
37 |
+
from conversadocs.llm_chess import ChessGame
|
38 |
+
|
39 |
+
My_hf_token = os.getenv("My_hf_token")
|
40 |
|
41 |
dc = DocChat()
|
42 |
+
cg = ChessGame(dc)
|
43 |
|
44 |
##### GRADIO CONFIG ####
|
45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
css="""
|
47 |
#col-container {max-width: 1500px; margin-left: auto; margin-right: auto;}
|
48 |
"""
|
49 |
|
50 |
title = """
|
51 |
<div style="text-align: center;max-width: 1500px;">
|
52 |
+
<h2>Chat with Documents π - Falcon, Llama-2 and OpenAI</h2>
|
53 |
+
<p style="text-align: center;">Upload txt, pdf, doc, docx, enex, epub, html, md, odt, ptt and pttx.
|
54 |
+
Wait for the Status to show Loaded documents, start typing your questions. Oficial Repository <a href="https://github.com/R3gm/ConversaDocs">ConversaDocs</a>.<br /></p>
|
55 |
</div>
|
56 |
"""
|
57 |
|
|
|
62 |
|
63 |
- Oficial Repository [![a](https://img.shields.io/badge/GitHub-Repository-black?style=flat-square&logo=github)](https://github.com/R3gm/ConversaDocs/)
|
64 |
|
65 |
+
- You can upload multiple documents at once to a single database.
|
66 |
+
|
67 |
+
- Every time a new database is created, the previous one is deleted.
|
68 |
|
69 |
+
- For maximum privacy, you can click "Load LLAMA GGML Model" to use a Llama 2 model. By default, the model llama-2_7B-Chat is loaded.
|
70 |
+
|
71 |
+
- This application works on both CPU and GPU. For fast inference with GGML models, use the GPU.
|
72 |
|
73 |
- For more information about what GGML models are, you can visit this notebook [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/R3gm/InsightSolver-Colab/blob/main/LLM_Inference_with_llama_cpp_python__Llama_2_13b_chat.ipynb)
|
74 |
+
|
75 |
+
## π News
|
76 |
+
|
77 |
+
π₯ 2023/07/24: Document summarization was added.
|
78 |
+
|
79 |
+
π₯ 2023/07/29: Error with llama 70B was fixed.
|
80 |
+
|
81 |
+
π₯ 2023/08/07: βοΈ Chessboard was added for playing with a LLM.
|
82 |
+
|
83 |
+
|
84 |
"""
|
85 |
|
86 |
theme='aliabid94/new-theme'
|
|
|
94 |
|
95 |
def predict(message, chat_history, max_k, check_memory):
|
96 |
print(message)
|
|
|
97 |
bot_message = dc.convchain(message, max_k, check_memory)
|
98 |
print(bot_message)
|
99 |
return "", dc.get_chats()
|
|
|
114 |
def clear_api_key(api_key):
|
115 |
return 'api_key...', dc.openai_model(api_key)
|
116 |
|
|
|
117 |
# Max values in generation
|
118 |
+
DOC_DB_LIMIT = 5
|
119 |
MAX_NEW_TOKENS = 2048
|
120 |
|
121 |
# Limit in HF, no need to set it
|
|
|
143 |
sou = gr.HTML("")
|
144 |
|
145 |
clear_button.click(flag,[],[link_output]).then(dc.clr_history,[], [link_output]).then(lambda: None, None, chatbot, queue=False)
|
146 |
+
upload_button.upload(flag,[],[file_output]).then(upload_file, [upload_button, max_docs], file_output).then(dc.clr_history,[], [link_output])
|
147 |
+
|
148 |
+
with gr.Tab("Experimental Summarization"):
|
149 |
+
default_model = gr.HTML("<hr>From DB<br>It may take approximately 5 minutes to complete 15 pages in GPU. Please use files with fewer pages if you want to use summarization.<br></h2>")
|
150 |
+
summarize_button = gr.Button("Start summarization")
|
151 |
+
|
152 |
+
summarize_verify = gr.HTML(" ")
|
153 |
+
summarize_button.click(dc.summarize, [], [summarize_verify])
|
154 |
+
|
155 |
+
with gr.Tab("βοΈ Chess Game with a LLM"):
|
156 |
+
with gr.Column():
|
157 |
+
gr.HTML('<div style="display: flex; justify-content: center; align-items: center; height: 100vh;"><div>βοΈ Click to start the Chessboard βοΈ</div></div>')
|
158 |
+
start_chess = gr.Button("START GAME")
|
159 |
+
board_chess = gr.HTML()
|
160 |
+
info_chess = gr.HTML()
|
161 |
+
input_chess = gr.Textbox(label="Type a valid move", placeholder="")
|
162 |
+
|
163 |
+
start_chess.click(cg.start_game,[],[board_chess, info_chess])
|
164 |
+
input_chess.submit(cg.user_move,[input_chess],[board_chess, info_chess, input_chess])
|
165 |
+
|
166 |
+
with gr.Tab("Config llama-2 model"):
|
167 |
+
gr.HTML("<h3>Only models from the GGML library are accepted. To apply the new configurations, please reload the model.</h3>")
|
168 |
repo_ = gr.Textbox(label="Repository" ,value="TheBloke/Llama-2-7B-Chat-GGML")
|
169 |
+
file_ = gr.Textbox(label="File name" ,value="llama-2-7b-chat.ggmlv3.q5_1.bin")
|
170 |
+
max_tokens = gr.inputs.Slider(1, 2048, default=256, label="Max new tokens", step=1)
|
171 |
temperature = gr.inputs.Slider(0.1, 1., default=0.2, label="Temperature", step=0.1)
|
172 |
top_k = gr.inputs.Slider(0.01, 1., default=0.95, label="Top K", step=0.01)
|
173 |
top_p = gr.inputs.Slider(0, 100, default=50, label="Top P", step=1)
|
174 |
repeat_penalty = gr.inputs.Slider(0.1, 100., default=1.2, label="Repeat penalty", step=0.1)
|
175 |
+
change_model_button = gr.Button("Load Llama GGML Model")
|
176 |
+
|
177 |
+
model_verify_ggml = gr.HTML("Loaded model Llama-2")
|
178 |
+
|
179 |
+
with gr.Tab("API Models"):
|
180 |
+
|
181 |
+
default_model = gr.HTML("<hr>Falcon Model</h2>")
|
182 |
+
hf_key = gr.Textbox(label="HF TOKEN", value=My_hf_token, visible=False)
|
183 |
falcon_button = gr.Button("Load FALCON 7B-Instruct")
|
184 |
|
185 |
openai_gpt_model = gr.HTML("<hr>OpenAI Model gpt-3.5-turbo</h2>")
|
|
|
187 |
openai_button = gr.Button("Load gpt-3.5-turbo")
|
188 |
|
189 |
line_ = gr.HTML("<hr> </h2>")
|
190 |
+
model_verify = gr.HTML(" ")
|
191 |
|
192 |
+
with gr.Tab("Help"):
|
193 |
description_md = gr.Markdown(description)
|
194 |
|
195 |
msg.submit(predict,[msg, chatbot, max_docs, check_memory],[msg, chatbot]).then(convert,[],[sou])
|
196 |
|
197 |
+
change_model_button.click(dc.change_llm,[repo_, file_, max_tokens, temperature, top_p, top_k, repeat_penalty, max_docs],[model_verify_ggml])
|
198 |
|
199 |
+
falcon_button.click(dc.default_falcon_model, [hf_key], [model_verify])
|
200 |
openai_button.click(clear_api_key, [api_key], [api_key, model_verify])
|
201 |
+
|
202 |
demo.launch(enable_queue=True)
|