Spaces:
Running
Running
Sean-Case
commited on
Commit
·
aa0ad5d
1
Parent(s):
0b0054b
Cleaned up code a bit, added user icons, thumbs up/down
Browse files- Link to images.txt +4 -0
- app.py +9 -19
- bot.png +0 -0
- chatfuncs/chatfuncs.py +50 -148
- requirements.txt +2 -2
- user.jfif +0 -0
Link to images.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Robot emoji: https://upload.wikimedia.org/wikipedia/commons/thumb/5/50/Fluent_Emoji_high_contrast_1f916.svg/32px-Fluent_Emoji_high_contrast_1f916.svg.png
|
2 |
+
|
3 |
+
Bing smile emoji: https://www.bing.com/images/create/a-black-and-white-emoji-with-a-simple-smile2c-black/6523d2c320df409581e85bec80ef3ba8?id=KTdVbixG8oRqR9BzF6AblQ%3d%3d&view=detailv2&idpp=genimg&idpclose=1&FORM=SYDBIC
|
4 |
+
|
app.py
CHANGED
@@ -65,35 +65,23 @@ def docs_to_faiss_save(docs_out:PandasDataFrame, embeddings=embeddings):
|
|
65 |
print(docs_out)
|
66 |
|
67 |
vectorstore_func = FAISS.from_documents(documents=docs_out, embedding=embeddings)
|
68 |
-
|
69 |
-
'''
|
70 |
-
#with open("vectorstore.pkl", "wb") as f:
|
71 |
-
#pickle.dump(vectorstore, f)
|
72 |
-
'''
|
73 |
-
|
74 |
-
#if Path(save_to).exists():
|
75 |
-
# vectorstore_func.save_local(folder_path=save_to)
|
76 |
-
#else:
|
77 |
-
# os.mkdir(save_to)
|
78 |
-
# vectorstore_func.save_local(folder_path=save_to)
|
79 |
-
|
80 |
-
#global vectorstore
|
81 |
|
82 |
-
#vectorstore = vectorstore_func
|
83 |
|
84 |
chatf.vectorstore = vectorstore_func
|
85 |
|
86 |
out_message = "Document processing complete"
|
87 |
|
88 |
-
#print(out_message)
|
89 |
-
#print(f"> Saved to: {save_to}")
|
90 |
-
|
91 |
return out_message, vectorstore_func
|
92 |
|
93 |
# Gradio chat
|
94 |
|
95 |
import gradio as gr
|
96 |
|
|
|
|
|
|
|
|
|
|
|
97 |
|
98 |
block = gr.Blocks(theme = gr.themes.Base())#css=".gradio-container {background-color: black}")
|
99 |
|
@@ -117,8 +105,8 @@ with block:
|
|
117 |
with gr.Tab("Chatbot"):
|
118 |
|
119 |
with gr.Row():
|
120 |
-
chat_height =
|
121 |
-
chatbot = gr.Chatbot(height=chat_height)
|
122 |
sources = gr.HTML(value = "Source paragraphs where I looked for answers will appear here", height=chat_height)
|
123 |
|
124 |
with gr.Row():
|
@@ -194,6 +182,8 @@ with block:
|
|
194 |
clear.click(chatf.clear_chat, inputs=[chat_history_state, sources, message, current_topic], outputs=[chat_history_state, sources, message, current_topic])
|
195 |
clear.click(lambda: None, None, chatbot, queue=False)
|
196 |
|
|
|
|
|
197 |
block.queue(concurrency_count=1).launch(debug=True)
|
198 |
# -
|
199 |
|
|
|
65 |
print(docs_out)
|
66 |
|
67 |
vectorstore_func = FAISS.from_documents(documents=docs_out, embedding=embeddings)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
|
|
|
69 |
|
70 |
chatf.vectorstore = vectorstore_func
|
71 |
|
72 |
out_message = "Document processing complete"
|
73 |
|
|
|
|
|
|
|
74 |
return out_message, vectorstore_func
|
75 |
|
76 |
# Gradio chat
|
77 |
|
78 |
import gradio as gr
|
79 |
|
80 |
+
def vote(data: gr.LikeData):
|
81 |
+
if data.liked:
|
82 |
+
print("You upvoted this response: " + data.value)
|
83 |
+
else:
|
84 |
+
print("You downvoted this response: " + data.value)
|
85 |
|
86 |
block = gr.Blocks(theme = gr.themes.Base())#css=".gradio-container {background-color: black}")
|
87 |
|
|
|
105 |
with gr.Tab("Chatbot"):
|
106 |
|
107 |
with gr.Row():
|
108 |
+
chat_height = 550
|
109 |
+
chatbot = gr.Chatbot(height=chat_height, avatar_images=('user.jfif', 'bot.jpg'),bubble_full_width = False)
|
110 |
sources = gr.HTML(value = "Source paragraphs where I looked for answers will appear here", height=chat_height)
|
111 |
|
112 |
with gr.Row():
|
|
|
182 |
clear.click(chatf.clear_chat, inputs=[chat_history_state, sources, message, current_topic], outputs=[chat_history_state, sources, message, current_topic])
|
183 |
clear.click(lambda: None, None, chatbot, queue=False)
|
184 |
|
185 |
+
chatbot.like(vote, None, None)
|
186 |
+
|
187 |
block.queue(concurrency_count=1).launch(debug=True)
|
188 |
# -
|
189 |
|
bot.png
ADDED
chatfuncs/chatfuncs.py
CHANGED
@@ -12,9 +12,7 @@ from threading import Thread
|
|
12 |
from transformers import AutoTokenizer, pipeline, TextIteratorStreamer
|
13 |
|
14 |
# Alternative model sources
|
15 |
-
from gpt4all import GPT4All
|
16 |
from ctransformers import AutoModelForCausalLM#, AutoTokenizer
|
17 |
-
|
18 |
from dataclasses import asdict, dataclass
|
19 |
|
20 |
# Langchain functions
|
@@ -33,8 +31,6 @@ from nltk.tokenize import RegexpTokenizer
|
|
33 |
from nltk.stem import WordNetLemmatizer
|
34 |
import keybert
|
35 |
|
36 |
-
#from transformers.pipelines import pipeline
|
37 |
-
|
38 |
# For Name Entity Recognition model
|
39 |
from span_marker import SpanMarkerModel
|
40 |
|
@@ -69,6 +65,7 @@ temperature: float = 0.1
|
|
69 |
top_k: int = 3
|
70 |
top_p: float = 1
|
71 |
repetition_penalty: float = 1.05
|
|
|
72 |
last_n_tokens: int = 64
|
73 |
max_new_tokens: int = 125
|
74 |
#seed: int = 42
|
@@ -77,7 +74,7 @@ stream: bool = True
|
|
77 |
threads: int = threads
|
78 |
batch_size:int = 512
|
79 |
context_length:int = 4096
|
80 |
-
gpu_layers:int = 0#5#gpu_layers
|
81 |
sample = True
|
82 |
|
83 |
@dataclass
|
@@ -99,7 +96,7 @@ class GenerationConfig:
|
|
99 |
|
100 |
|
101 |
## Highlight text constants
|
102 |
-
hlt_chunk_size =
|
103 |
hlt_strat = [" ", ".", "!", "?", ":", "\n\n", "\n", ","]
|
104 |
hlt_overlap = 0
|
105 |
|
@@ -110,51 +107,47 @@ ner_model = SpanMarkerModel.from_pretrained("tomaarsen/span-marker-mbert-base-mu
|
|
110 |
# Used to pull out keywords from chat history to add to user queries behind the scenes
|
111 |
kw_model = pipeline("feature-extraction", model="sentence-transformers/all-MiniLM-L6-v2")
|
112 |
|
113 |
-
|
|
|
114 |
|
115 |
## Chat models ##
|
116 |
-
ctrans_llm = [] # Not leaded by default
|
117 |
-
ctrans_llm = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **asdict(GenerationConfig()))
|
118 |
-
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/vicuna-13B-v1.5-16K-GGUF', model_type='llama', model_file='vicuna-13b-v1.5-16k.Q4_K_M.gguf')
|
119 |
-
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/CodeUp-Llama-2-13B-Chat-HF-GGUF', model_type='llama', model_file='codeup-llama-2-13b-chat-hf.Q4_K_M.gguf')
|
120 |
-
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/CodeLlama-13B-Instruct-GGUF', model_type='llama', model_file='codellama-13b-instruct.Q4_K_M.gguf')
|
121 |
-
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-Instruct-v0.1-GGUF', model_type='mistral', model_file='mistral-7b-instruct-v0.1.Q4_K_M.gguf')
|
122 |
-
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='mistral', model_file='mistral-7b-openorca.Q4_K_M.gguf', **asdict(GenerationConfig()))
|
123 |
-
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='mistral', model_file='mistral-7b-openorca.Q2_K.gguf', **asdict(GenerationConfig()))
|
124 |
-
|
125 |
|
126 |
-
|
|
|
|
|
|
|
127 |
|
128 |
-
|
129 |
-
#
|
130 |
-
hf_checkpoint = '
|
131 |
-
|
132 |
-
|
|
|
133 |
|
134 |
-
|
135 |
|
136 |
-
# model_id = model_name
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
|
|
|
|
143 |
else:
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
else:
|
151 |
-
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
|
152 |
|
153 |
-
|
154 |
|
155 |
-
|
156 |
|
157 |
-
|
158 |
|
159 |
# Vectorstore funcs
|
160 |
|
@@ -439,7 +432,6 @@ def hybrid_retrieval(new_question_kworded, vectorstore, embeddings, k_val, out_p
|
|
439 |
|
440 |
return docs_keep_as_doc, doc_df, docs_keep_out
|
441 |
|
442 |
-
|
443 |
def get_expanded_passages(vectorstore, docs, width):
|
444 |
|
445 |
"""
|
@@ -524,86 +516,6 @@ def get_expanded_passages(vectorstore, docs, width):
|
|
524 |
|
525 |
return expanded_docs, doc_df
|
526 |
|
527 |
-
|
528 |
-
def get_expanded_passages_orig(vectorstore, docs, width):
|
529 |
-
|
530 |
-
"""
|
531 |
-
Extracts expanded passages based on given documents and a width for context.
|
532 |
-
|
533 |
-
Parameters:
|
534 |
-
- vectorstore: The primary data source.
|
535 |
-
- docs: List of documents to be expanded.
|
536 |
-
- width: Number of documents to expand around a given document for context.
|
537 |
-
|
538 |
-
Returns:
|
539 |
-
- expanded_docs: List of expanded Document objects.
|
540 |
-
- doc_df: DataFrame representation of expanded_docs.
|
541 |
-
"""
|
542 |
-
|
543 |
-
from collections import defaultdict
|
544 |
-
|
545 |
-
def get_docs_from_vstore(vectorstore):
|
546 |
-
vector = vectorstore.docstore._dict
|
547 |
-
return list(vector.items())
|
548 |
-
|
549 |
-
def extract_details(docs_list):
|
550 |
-
docs_list_out = [tup[1] for tup in docs_list]
|
551 |
-
content = [doc.page_content for doc in docs_list_out]
|
552 |
-
meta = [doc.metadata for doc in docs_list_out]
|
553 |
-
return ''.join(content), meta[0], meta[-1]
|
554 |
-
|
555 |
-
def get_parent_content_and_meta(vstore_docs, width, target):
|
556 |
-
target_range = range(max(0, target - width), min(len(vstore_docs), target + width + 1))
|
557 |
-
parent_vstore_out = [vstore_docs[i] for i in target_range]
|
558 |
-
|
559 |
-
content_str_out, meta_first_out, meta_last_out = [], [], []
|
560 |
-
for _ in parent_vstore_out:
|
561 |
-
content_str, meta_first, meta_last = extract_details(parent_vstore_out)
|
562 |
-
content_str_out.append(content_str)
|
563 |
-
meta_first_out.append(meta_first)
|
564 |
-
meta_last_out.append(meta_last)
|
565 |
-
return content_str_out, meta_first_out, meta_last_out
|
566 |
-
|
567 |
-
def merge_dicts_except_source(d1, d2):
|
568 |
-
merged = {}
|
569 |
-
for key in d1:
|
570 |
-
if key != "source":
|
571 |
-
merged[key] = str(d1[key]) + " to " + str(d2[key])
|
572 |
-
else:
|
573 |
-
merged[key] = d1[key] # or d2[key], based on preference
|
574 |
-
return merged
|
575 |
-
|
576 |
-
def merge_two_lists_of_dicts(list1, list2):
|
577 |
-
return [merge_dicts_except_source(d1, d2) for d1, d2 in zip(list1, list2)]
|
578 |
-
|
579 |
-
vstore_docs = get_docs_from_vstore(vectorstore)
|
580 |
-
|
581 |
-
parent_vstore_meta_section = [doc.metadata['page_section'] for _, doc in vstore_docs]
|
582 |
-
|
583 |
-
#print(docs)
|
584 |
-
|
585 |
-
expanded_docs = []
|
586 |
-
for doc, score in docs:
|
587 |
-
search_section = doc.metadata['page_section']
|
588 |
-
search_index = parent_vstore_meta_section.index(search_section) if search_section in parent_vstore_meta_section else -1
|
589 |
-
|
590 |
-
content_str, meta_first, meta_last = get_parent_content_and_meta(vstore_docs, width, search_index)
|
591 |
-
#print("Meta first:")
|
592 |
-
#print(meta_first)
|
593 |
-
#print("Meta last:")
|
594 |
-
#print(meta_last)
|
595 |
-
#print("Meta last end.")
|
596 |
-
meta_full = merge_two_lists_of_dicts(meta_first, meta_last)
|
597 |
-
|
598 |
-
#print(meta_full)
|
599 |
-
|
600 |
-
expanded_doc = (Document(page_content=content_str[0], metadata=meta_full[0]), score)
|
601 |
-
expanded_docs.append(expanded_doc)
|
602 |
-
|
603 |
-
doc_df = create_doc_df(expanded_docs) # Assuming you've defined the 'create_doc_df' function elsewhere
|
604 |
-
|
605 |
-
return expanded_docs, doc_df
|
606 |
-
|
607 |
def create_final_prompt(inputs: Dict[str, str], instruction_prompt, content_prompt, extracted_memory, vectorstore, embeddings): # ,
|
608 |
|
609 |
question = inputs["question"]
|
@@ -838,18 +750,6 @@ def highlight_found_text(search_text: str, full_text: str, hlt_chunk_size:int=hl
|
|
838 |
return "".join(pos_tokens)
|
839 |
|
840 |
# # Chat functions
|
841 |
-
def produce_streaming_answer_chatbot_gpt4all(history, full_prompt):
|
842 |
-
|
843 |
-
print("The question is: ")
|
844 |
-
print(full_prompt)
|
845 |
-
|
846 |
-
# Pull the generated text from the streamer, and update the model output.
|
847 |
-
history[-1][1] = ""
|
848 |
-
for new_text in gpt4all_model.generate(full_prompt, max_tokens=2000, streaming=True):
|
849 |
-
if new_text == None: new_text = ""
|
850 |
-
history[-1][1] += new_text
|
851 |
-
yield history
|
852 |
-
|
853 |
def produce_streaming_answer_chatbot_hf(history, full_prompt):
|
854 |
|
855 |
#print("The question is: ")
|
@@ -866,7 +766,7 @@ def produce_streaming_answer_chatbot_hf(history, full_prompt):
|
|
866 |
streamer=streamer,
|
867 |
max_new_tokens=max_new_tokens,
|
868 |
do_sample=sample,
|
869 |
-
repetition_penalty=
|
870 |
top_p=top_p,
|
871 |
temperature=temperature,
|
872 |
top_k=top_k
|
@@ -902,26 +802,28 @@ def produce_streaming_answer_chatbot_ctrans(history, full_prompt):
|
|
902 |
|
903 |
tokens = ctrans_llm.tokenize(full_prompt)
|
904 |
|
905 |
-
#
|
906 |
-
#from loguru import logger
|
907 |
-
|
908 |
-
#_ = [elm for elm in full_prompt.splitlines() if elm.strip()]
|
909 |
-
#stop_string = [elm.split(":")[0] + ":" for elm in _][-2]
|
910 |
-
#print(stop_string)
|
911 |
-
|
912 |
-
#logger.debug(f"{stop_string=} not used")
|
913 |
-
|
914 |
-
#_ = psutil.cpu_count(logical=False) - 1
|
915 |
-
#cpu_count: int = int(_) if _ else 1
|
916 |
-
#logger.debug(f"{cpu_count=}")
|
917 |
|
918 |
# Pull the generated text from the streamer, and update the model output.
|
919 |
-
|
|
|
|
|
|
|
|
|
920 |
history[-1][1] = ""
|
921 |
for new_text in ctrans_llm.generate(tokens, top_k=top_k, temperature=temperature, repetition_penalty=repetition_penalty): #ctrans_generate(prompt=tokens, config=config):
|
922 |
if new_text == None: new_text = ""
|
923 |
history[-1][1] += ctrans_llm.detokenize(new_text) #new_text
|
|
|
924 |
yield history
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
925 |
|
926 |
|
927 |
def ctrans_generate(
|
|
|
12 |
from transformers import AutoTokenizer, pipeline, TextIteratorStreamer
|
13 |
|
14 |
# Alternative model sources
|
|
|
15 |
from ctransformers import AutoModelForCausalLM#, AutoTokenizer
|
|
|
16 |
from dataclasses import asdict, dataclass
|
17 |
|
18 |
# Langchain functions
|
|
|
31 |
from nltk.stem import WordNetLemmatizer
|
32 |
import keybert
|
33 |
|
|
|
|
|
34 |
# For Name Entity Recognition model
|
35 |
from span_marker import SpanMarkerModel
|
36 |
|
|
|
65 |
top_k: int = 3
|
66 |
top_p: float = 1
|
67 |
repetition_penalty: float = 1.05
|
68 |
+
flan_alpaca_repetition_penalty: float = 1.3
|
69 |
last_n_tokens: int = 64
|
70 |
max_new_tokens: int = 125
|
71 |
#seed: int = 42
|
|
|
74 |
threads: int = threads
|
75 |
batch_size:int = 512
|
76 |
context_length:int = 4096
|
77 |
+
gpu_layers:int = 0#5#gpu_layers For serving on Huggingface set to 0 as using free CPU instance
|
78 |
sample = True
|
79 |
|
80 |
@dataclass
|
|
|
96 |
|
97 |
|
98 |
## Highlight text constants
|
99 |
+
hlt_chunk_size = 15
|
100 |
hlt_strat = [" ", ".", "!", "?", ":", "\n\n", "\n", ","]
|
101 |
hlt_overlap = 0
|
102 |
|
|
|
107 |
# Used to pull out keywords from chat history to add to user queries behind the scenes
|
108 |
kw_model = pipeline("feature-extraction", model="sentence-transformers/all-MiniLM-L6-v2")
|
109 |
|
110 |
+
## Set model type ##
|
111 |
+
model_type = "ctrans"
|
112 |
|
113 |
## Chat models ##
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
|
115 |
+
if model_type == "ctrans":
|
116 |
+
ctrans_llm = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **asdict(GenerationConfig()))
|
117 |
+
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='mistral', model_file='mistral-7b-openorca.Q4_K_M.gguf', **asdict(GenerationConfig()))
|
118 |
+
#ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='mistral', model_file='mistral-7b-openorca.Q2_K.gguf', **asdict(GenerationConfig()))
|
119 |
|
120 |
+
if model_type == "hf":
|
121 |
+
# Huggingface chat model
|
122 |
+
#hf_checkpoint = 'jphme/phi-1_5_Wizard_Vicuna_uncensored'
|
123 |
+
hf_checkpoint = 'declare-lab/flan-alpaca-large'
|
124 |
+
|
125 |
+
def create_hf_model(model_name):
|
126 |
|
127 |
+
from transformers import AutoModelForSeq2SeqLM, AutoModelForCausalLM
|
128 |
|
129 |
+
# model_id = model_name
|
130 |
+
|
131 |
+
if torch_device == "cuda":
|
132 |
+
if "flan" in model_name:
|
133 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(model_name, load_in_8bit=True, device_map="auto")
|
134 |
+
elif "mpt" in model_name:
|
135 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, load_in_8bit=True, device_map="auto", trust_remote_code=True)
|
136 |
+
else:
|
137 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, load_in_8bit=True, device_map="auto")
|
138 |
else:
|
139 |
+
if "flan" in model_name:
|
140 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
141 |
+
elif "mpt" in model_name:
|
142 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
|
143 |
+
else:
|
144 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
|
|
|
|
|
145 |
|
146 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length = 2048)
|
147 |
|
148 |
+
return model, tokenizer, torch_device
|
149 |
|
150 |
+
model, tokenizer, torch_device = create_hf_model(model_name = hf_checkpoint)
|
151 |
|
152 |
# Vectorstore funcs
|
153 |
|
|
|
432 |
|
433 |
return docs_keep_as_doc, doc_df, docs_keep_out
|
434 |
|
|
|
435 |
def get_expanded_passages(vectorstore, docs, width):
|
436 |
|
437 |
"""
|
|
|
516 |
|
517 |
return expanded_docs, doc_df
|
518 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
519 |
def create_final_prompt(inputs: Dict[str, str], instruction_prompt, content_prompt, extracted_memory, vectorstore, embeddings): # ,
|
520 |
|
521 |
question = inputs["question"]
|
|
|
750 |
return "".join(pos_tokens)
|
751 |
|
752 |
# # Chat functions
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
753 |
def produce_streaming_answer_chatbot_hf(history, full_prompt):
|
754 |
|
755 |
#print("The question is: ")
|
|
|
766 |
streamer=streamer,
|
767 |
max_new_tokens=max_new_tokens,
|
768 |
do_sample=sample,
|
769 |
+
repetition_penalty=flan_alpaca_repetition_penalty,
|
770 |
top_p=top_p,
|
771 |
temperature=temperature,
|
772 |
top_k=top_k
|
|
|
802 |
|
803 |
tokens = ctrans_llm.tokenize(full_prompt)
|
804 |
|
805 |
+
#config = GenerationConfig(reset=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
806 |
|
807 |
# Pull the generated text from the streamer, and update the model output.
|
808 |
+
import time
|
809 |
+
start = time.time()
|
810 |
+
NUM_TOKENS=0
|
811 |
+
print('-'*4+'Start Generation'+'-'*4)
|
812 |
+
|
813 |
history[-1][1] = ""
|
814 |
for new_text in ctrans_llm.generate(tokens, top_k=top_k, temperature=temperature, repetition_penalty=repetition_penalty): #ctrans_generate(prompt=tokens, config=config):
|
815 |
if new_text == None: new_text = ""
|
816 |
history[-1][1] += ctrans_llm.detokenize(new_text) #new_text
|
817 |
+
NUM_TOKENS+=1
|
818 |
yield history
|
819 |
+
|
820 |
+
time_generate = time.time() - start
|
821 |
+
print('\n')
|
822 |
+
print('-'*4+'End Generation'+'-'*4)
|
823 |
+
print(f'Num of generated tokens: {NUM_TOKENS}')
|
824 |
+
print(f'Time for complete generation: {time_generate}s')
|
825 |
+
print(f'Tokens per secound: {NUM_TOKENS/time_generate}')
|
826 |
+
print(f'Time per token: {(time_generate/NUM_TOKENS)*1000}ms')
|
827 |
|
828 |
|
829 |
def ctrans_generate(
|
requirements.txt
CHANGED
@@ -13,8 +13,8 @@ bitsandbytes
|
|
13 |
accelerate
|
14 |
optimum
|
15 |
pypdf
|
16 |
-
gradio
|
17 |
-
gradio_client==0.
|
18 |
python-docx
|
19 |
gpt4all
|
20 |
ctransformers[cuda]
|
|
|
13 |
accelerate
|
14 |
optimum
|
15 |
pypdf
|
16 |
+
gradio==3.47.1
|
17 |
+
gradio_client==0.6.0
|
18 |
python-docx
|
19 |
gpt4all
|
20 |
ctransformers[cuda]
|
user.jfif
ADDED
Binary file (53.4 kB). View file
|
|