nikravan commited on
Commit
ab382f0
·
verified ·
1 Parent(s): 8ec3e64

latex support

Browse files
Files changed (1) hide show
  1. app.py +67 -2
app.py CHANGED
@@ -1,3 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  def predict(message, history, system_prompt, temperature, max_new_tokens, top_k, repetition_penalty, top_p):
2
  if CHAT_TEMPLATE == "Auto":
3
  stop_tokens = [tokenizer.eos_token_id]
@@ -48,11 +102,22 @@ def predict(message, history, system_prompt, temperature, max_new_tokens, top_k,
48
  if new_token in stop_tokens:
49
  break
50
  result = "".join(outputs)
51
- # Wrapping result in Markdown for LaTeX rendering
52
  yield f"$$ {result} $$"
53
 
54
 
55
- # Create Gradio interface
 
 
 
 
 
 
 
 
 
 
 
 
56
  gr.ChatInterface(
57
  predict,
58
  title=EMOJI + " " + MODEL_NAME,
 
1
+ import os
2
+ import json
3
+ import subprocess
4
+ from threading import Thread
5
+
6
+ import torch
7
+ import spaces
8
+ import gradio as gr
9
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TextIteratorStreamer
10
+
11
+ subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
12
+
13
+ MODEL_ID = "nikravan/Marco-O1-q4"
14
+ CHAT_TEMPLATE = "ChatML"
15
+ MODEL_NAME = MODEL_ID.split("/")[-1]
16
+ CONTEXT_LENGTH = 16000
17
+
18
+ # Estableciendo valores directamente para las variables
19
+ COLOR = "blue"
20
+ EMOJI = "🤖"
21
+ DESCRIPTION = f"This is the {MODEL_NAME} model designed for testing thinking for general AI tasks."
22
+
23
+ latex_delimiters_set = [{
24
+ "left": "\\(",
25
+ "right": "\\)",
26
+ "display": False
27
+ }, {
28
+ "left": "\\begin{equation}",
29
+ "right": "\\end{equation}",
30
+ "display": True
31
+ }, {
32
+ "left": "\\begin{align}",
33
+ "right": "\\end{align}",
34
+ "display": True
35
+ }, {
36
+ "left": "\\begin{alignat}",
37
+ "right": "\\end{alignat}",
38
+ "display": True
39
+ }, {
40
+ "left": "\\begin{gather}",
41
+ "right": "\\end{gather}",
42
+ "display": True
43
+ }, {
44
+ "left": "\\begin{CD}",
45
+ "right": "\\end{CD}",
46
+ "display": True
47
+ }, {
48
+ "left": "\\[",
49
+ "right": "\\]",
50
+ "display": True
51
+ }]
52
+
53
+
54
+ @spaces.GPU()
55
  def predict(message, history, system_prompt, temperature, max_new_tokens, top_k, repetition_penalty, top_p):
56
  if CHAT_TEMPLATE == "Auto":
57
  stop_tokens = [tokenizer.eos_token_id]
 
102
  if new_token in stop_tokens:
103
  break
104
  result = "".join(outputs)
 
105
  yield f"$$ {result} $$"
106
 
107
 
108
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
109
+ quantization_config = BitsAndBytesConfig(
110
+ load_in_4bit=True,
111
+ bnb_4bit_compute_dtype=torch.bfloat16
112
+ )
113
+ tokenizer = AutoTokenizer.from_pretrained('AIDC-AI/Marco-o1')
114
+ model = AutoModelForCausalLM.from_pretrained(
115
+ MODEL_ID,
116
+ device_map="auto",
117
+ quantization_config=quantization_config,
118
+ attn_implementation="flash_attention_2",
119
+ )
120
+
121
  gr.ChatInterface(
122
  predict,
123
  title=EMOJI + " " + MODEL_NAME,