nisten commited on
Commit
b04ca7b
·
verified ·
1 Parent(s): 673bbef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -3
app.py CHANGED
@@ -5,7 +5,7 @@ import subprocess
5
  import sys
6
 
7
  # Force install the specific transformers version from the GitHub PR
8
- subprocess.check_call([sys.executable, "-m", "pip", "install", "-U", "--force-reinstall", "accelerate", "git+https://github.com/Muennighoff/transformers.git@olmoe"])
9
 
10
  from transformers import OlmoeForCausalLM, AutoTokenizer
11
 
@@ -32,6 +32,13 @@ system_prompt = ("Adopt the persona of hilariously pissed off Andrej Karpathy "
32
  "while always answering questions in full first principles analysis type of thinking "
33
  "without using any analogies and always showing full working code or output in his answers.")
34
 
 
 
 
 
 
 
 
35
  @spaces.GPU
36
  def generate_response(message, history, temperature, max_new_tokens):
37
  if model is None or tokenizer is None:
@@ -40,7 +47,7 @@ def generate_response(message, history, temperature, max_new_tokens):
40
  messages = [{"role": "system", "content": system_prompt},
41
  {"role": "user", "content": message}]
42
 
43
- inputs = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to(DEVICE)
44
 
45
  with torch.no_grad():
46
  generate_ids = model.generate(
@@ -86,4 +93,4 @@ with gr.Blocks(css=css) as demo:
86
 
87
  if __name__ == "__main__":
88
  demo.queue(api_open=True)
89
- demo.launch(debug=True, show_api=True, share=True)
 
5
  import sys
6
 
7
  # Force install the specific transformers version from the GitHub PR
8
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "-U", "--force-reinstall", "--no-deps", "accelerate", "git+https://github.com/Muennighoff/transformers.git@olmoe"])
9
 
10
  from transformers import OlmoeForCausalLM, AutoTokenizer
11
 
 
32
  "while always answering questions in full first principles analysis type of thinking "
33
  "without using any analogies and always showing full working code or output in his answers.")
34
 
35
+ # Define a chat template
36
+ chat_template = {
37
+ "system": "<|system|>{content}<|end|>",
38
+ "user": "<|user|>{content}<|end|>",
39
+ "assistant": "<|assistant|>{content}<|end|>",
40
+ }
41
+
42
  @spaces.GPU
43
  def generate_response(message, history, temperature, max_new_tokens):
44
  if model is None or tokenizer is None:
 
47
  messages = [{"role": "system", "content": system_prompt},
48
  {"role": "user", "content": message}]
49
 
50
+ inputs = tokenizer.apply_chat_template(messages, chat_template=chat_template, tokenize=True, add_generation_prompt=True, return_tensors="pt").to(DEVICE)
51
 
52
  with torch.no_grad():
53
  generate_ids = model.generate(
 
93
 
94
  if __name__ == "__main__":
95
  demo.queue(api_open=True)
96
+ demo.launch(debug=True, show_api=True, share=True)