Sean-Case commited on
Commit
e4df9f2
·
1 Parent(s): d2ddc62

Added support for Mistral Orca

Browse files
Files changed (1) hide show
  1. chatfuncs/chatfuncs.py +15 -14
chatfuncs/chatfuncs.py CHANGED
@@ -47,7 +47,7 @@ import gradio as gr
47
 
48
  if torch.cuda.is_available():
49
  torch_device = "cuda"
50
- gpu_layers = 1
51
  else: torch_device = "cpu"
52
 
53
  print("Running on device:", torch_device)
@@ -76,8 +76,8 @@ reset: bool = False
76
  stream: bool = True
77
  threads: int = threads
78
  batch_size:int = 512
79
- context_length:int = 2048
80
- gpu_layers:int = 0#10#gpu_layers
81
  sample = True
82
 
83
  @dataclass
@@ -114,13 +114,13 @@ kw_model = pipeline("feature-extraction", model="sentence-transformers/all-MiniL
114
 
115
  ## Chat models ##
116
  ctrans_llm = [] # Not leaded by default
117
- #ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/orca_mini_3B-GGML', model_type='llama', model_file='orca-mini-3b.ggmlv3.q4_0.bin')
118
  ctrans_llm = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **asdict(GenerationConfig()))
119
  #ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/vicuna-13B-v1.5-16K-GGUF', model_type='llama', model_file='vicuna-13b-v1.5-16k.Q4_K_M.gguf')
120
  #ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/CodeUp-Llama-2-13B-Chat-HF-GGUF', model_type='llama', model_file='codeup-llama-2-13b-chat-hf.Q4_K_M.gguf')
121
  #ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/CodeLlama-13B-Instruct-GGUF', model_type='llama', model_file='codellama-13b-instruct.Q4_K_M.gguf')
122
  #ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-Instruct-v0.1-GGUF', model_type='mistral', model_file='mistral-7b-instruct-v0.1.Q4_K_M.gguf')
123
- #ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='mistral', model_file='mistral-7b-openorca.Q4_K_M.gguf')
 
124
 
125
 
126
  #ctokenizer = AutoTokenizer.from_pretrained(ctrans_llm)
@@ -222,16 +222,14 @@ def create_prompt_templates():
222
 
223
  ### Response:"""
224
 
225
- instruction_prompt_template_orca_input = """
226
- ### System:
227
- You are an AI assistant that follows instruction extremely well. Help as much as you can.
228
- ### User:
229
- Answer the QUESTION using information from the following input.
230
- ### Input:
231
- {summaries}
232
- QUESTION: {question}
233
 
234
- ### Response:"""
 
 
 
 
 
 
235
 
236
 
237
 
@@ -986,6 +984,9 @@ def _get_chat_history(chat_history: List[Tuple[str, str]], max_memory_length:int
986
 
987
  def add_inputs_answer_to_history(user_message, history, current_topic):
988
 
 
 
 
989
  #history.append((user_message, [-1]))
990
 
991
  chat_history_str, chat_history_first_q, chat_history_first_ans, max_memory_length = _get_chat_history(history)
 
47
 
48
  if torch.cuda.is_available():
49
  torch_device = "cuda"
50
+ gpu_layers = 5
51
  else: torch_device = "cpu"
52
 
53
  print("Running on device:", torch_device)
 
76
  stream: bool = True
77
  threads: int = threads
78
  batch_size:int = 512
79
+ context_length:int = 4096
80
+ gpu_layers:int = 0#5#gpu_layers
81
  sample = True
82
 
83
  @dataclass
 
114
 
115
  ## Chat models ##
116
  ctrans_llm = [] # Not leaded by default
 
117
  ctrans_llm = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **asdict(GenerationConfig()))
118
  #ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/vicuna-13B-v1.5-16K-GGUF', model_type='llama', model_file='vicuna-13b-v1.5-16k.Q4_K_M.gguf')
119
  #ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/CodeUp-Llama-2-13B-Chat-HF-GGUF', model_type='llama', model_file='codeup-llama-2-13b-chat-hf.Q4_K_M.gguf')
120
  #ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/CodeLlama-13B-Instruct-GGUF', model_type='llama', model_file='codellama-13b-instruct.Q4_K_M.gguf')
121
  #ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-Instruct-v0.1-GGUF', model_type='mistral', model_file='mistral-7b-instruct-v0.1.Q4_K_M.gguf')
122
+ #ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='mistral', model_file='mistral-7b-openorca.Q4_K_M.gguf', **asdict(GenerationConfig()))
123
+ #ctrans_llm = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='mistral', model_file='mistral-7b-openorca.Q2_K.gguf', **asdict(GenerationConfig()))
124
 
125
 
126
  #ctokenizer = AutoTokenizer.from_pretrained(ctrans_llm)
 
222
 
223
  ### Response:"""
224
 
 
 
 
 
 
 
 
 
225
 
226
+ instruction_prompt_mistral_orca = """<|im_start|>system\n
227
+ You are an AI assistant that follows instruction extremely well. Help as much as you can.
228
+ <|im_start|>user\n
229
+ Answer the QUESTION using information from the following CONTENT.
230
+ CONTENT: {summaries}
231
+ QUESTION: {question}\n
232
+ <|im_end|>"""
233
 
234
 
235
 
 
984
 
985
  def add_inputs_answer_to_history(user_message, history, current_topic):
986
 
987
+ if history is None:
988
+ history = [("","")]
989
+
990
  #history.append((user_message, [-1]))
991
 
992
  chat_history_str, chat_history_first_q, chat_history_first_ans, max_memory_length = _get_chat_history(history)