AtAndDev commited on
Commit
063dc40
·
verified ·
1 Parent(s): e2429ca

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -11
app.py CHANGED
@@ -1,4 +1,3 @@
1
-
2
  import json
3
  import subprocess
4
  from threading import Thread
@@ -10,10 +9,10 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
10
 
11
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
12
 
13
- MODEL_ID = "nikravan/Marco_o1_q4"
14
  CHAT_TEMPLATE = "ChatML"
15
  MODEL_NAME = MODEL_ID.split("/")[-1]
16
- CONTEXT_LENGTH = 16000
17
 
18
  # Estableciendo valores directamente para las variables
19
  COLOR = "blue" # Color predeterminado de la interfaz
@@ -75,7 +74,7 @@ def predict(message, history, system_prompt, temperature, max_new_tokens, top_k,
75
  else:
76
  raise Exception("Incorrect chat template, select 'Auto', 'ChatML' or 'Mistral Instruct'")
77
  print(instruction)
78
-
79
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
80
  enc = tokenizer(instruction, return_tensors="pt", padding=True, truncation=True)
81
  input_ids, attention_mask = enc.input_ids, enc.attention_mask
@@ -104,14 +103,13 @@ def predict(message, history, system_prompt, temperature, max_new_tokens, top_k,
104
  break
105
  yield "".join(outputs)
106
 
107
-
108
  # Load model
109
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
110
  quantization_config = BitsAndBytesConfig(
111
  load_in_4bit=True,
112
  bnb_4bit_compute_dtype=torch.bfloat16
113
  )
114
- tokenizer = AutoTokenizer.from_pretrained('AIDC-AI/Marco-o1')
115
  model = AutoModelForCausalLM.from_pretrained(
116
  MODEL_ID,
117
  device_map="auto",
@@ -124,12 +122,10 @@ gr.ChatInterface(
124
  predict,
125
  title=EMOJI + " " + MODEL_NAME,
126
  description=DESCRIPTION,
127
-
128
 
129
-
130
  additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False),
131
  additional_inputs=[
132
- gr.Textbox("You are a code assistant.", label="System prompt"),
133
  gr.Slider(0, 1, 0.3, label="Temperature"),
134
  gr.Slider(128, 4096, 1024, label="Max new tokens"),
135
  gr.Slider(1, 80, 40, label="Top K sampling"),
@@ -137,5 +133,4 @@ gr.ChatInterface(
137
  gr.Slider(0, 1, 0.95, label="Top P sampling"),
138
  ],
139
  theme=gr.themes.Soft(primary_hue=COLOR),
140
- ).queue().launch()
141
-
 
 
1
  import json
2
  import subprocess
3
  from threading import Thread
 
9
 
10
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
11
 
12
+ MODEL_ID = "AtAndDev/marco-qwq-7B"
13
  CHAT_TEMPLATE = "ChatML"
14
  MODEL_NAME = MODEL_ID.split("/")[-1]
15
+ CONTEXT_LENGTH = 32000
16
 
17
  # Estableciendo valores directamente para las variables
18
  COLOR = "blue" # Color predeterminado de la interfaz
 
74
  else:
75
  raise Exception("Incorrect chat template, select 'Auto', 'ChatML' or 'Mistral Instruct'")
76
  print(instruction)
77
+
78
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
79
  enc = tokenizer(instruction, return_tensors="pt", padding=True, truncation=True)
80
  input_ids, attention_mask = enc.input_ids, enc.attention_mask
 
103
  break
104
  yield "".join(outputs)
105
 
 
106
  # Load model
107
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
108
  quantization_config = BitsAndBytesConfig(
109
  load_in_4bit=True,
110
  bnb_4bit_compute_dtype=torch.bfloat16
111
  )
112
+ tokenizer = AutoTokenizer.from_pretrained('AtAndDev/marco-qwq-7B')
113
  model = AutoModelForCausalLM.from_pretrained(
114
  MODEL_ID,
115
  device_map="auto",
 
122
  predict,
123
  title=EMOJI + " " + MODEL_NAME,
124
  description=DESCRIPTION,
 
125
 
 
126
  additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False),
127
  additional_inputs=[
128
+ gr.Textbox("You are a helpful assistant.", label="System prompt"),
129
  gr.Slider(0, 1, 0.3, label="Temperature"),
130
  gr.Slider(128, 4096, 1024, label="Max new tokens"),
131
  gr.Slider(1, 80, 40, label="Top K sampling"),
 
133
  gr.Slider(0, 1, 0.95, label="Top P sampling"),
134
  ],
135
  theme=gr.themes.Soft(primary_hue=COLOR),
136
+ ).queue().launch()