AdithyaSK commited on
Commit
a916ab9
·
1 Parent(s): 9f47d1c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +155 -0
app.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from threading import Thread
3
+ from typing import Iterator
4
+
5
+ import gradio as gr
6
+ import spaces
7
+ import torch
8
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
9
+
10
+ MAX_MAX_NEW_TOKENS = 2048
11
+ DEFAULT_MAX_NEW_TOKENS = 1024
12
+ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
13
+
14
+ description = """
15
+ <h1><center>LLM Finetuned on TaoScience<center></h1>
16
+ <h3><center>TaoGPT is a fine-tuned LLM on Tao Science by Dr. Rulin Xu and Dr. Zhi Gang Sha. <br> Check out- <a href='https://github.com/agencyxr/taogpt7B'>Github Repo</a> For More Information. 💬<h3><center>
17
+
18
+ """
19
+
20
+ NOMIC = """
21
+ <!DOCTYPE html>
22
+ <html>
23
+ <head>
24
+ <title>TaoGPT - DataMap</title>
25
+ <style>
26
+ iframe {
27
+ width: 100%;
28
+ height: 600px; /* You can adjust the height as needed */
29
+ border: 0;
30
+ }
31
+ </style>
32
+ </head>
33
+ <body>
34
+ <iframe
35
+ src="https://atlas.nomic.ai/map/c1ce06f4-7ed0-4c02-88a4-dd3b47bdf878/f2941fb8-0f36-4a23-8cbe-40dbf76ca9e4?xs=-41.09135&xf=41.12038&ys=-22.50394&yf=23.67273"
36
+ ></iframe>
37
+ </body>
38
+ </html>
39
+ """
40
+
41
+ if not torch.cuda.is_available():
42
+ DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
43
+
44
+
45
+ if torch.cuda.is_available():
46
+ model_id = "agency888/TaoGPT-v1"
47
+ model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
48
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
49
+ tokenizer.use_default_system_prompt = False
50
+
51
+
52
+ @spaces.GPU
53
+ def generate(
54
+ message: str,
55
+ chat_history: list[tuple[str, str]],
56
+ system_prompt: str,
57
+ max_new_tokens: int = 1024,
58
+ temperature: float = 0.6,
59
+ top_p: float = 0.9,
60
+ top_k: int = 50,
61
+ repetition_penalty: float = 1.2,
62
+ ) -> Iterator[str]:
63
+ conversation = []
64
+ if system_prompt:
65
+ conversation.append({"role": "system", "content": system_prompt})
66
+ for user, assistant in chat_history:
67
+ conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
68
+ conversation.append({"role": "user", "content": message})
69
+
70
+ input_ids = tokenizer.encode(f"<s>{system_prompt}{message}", return_tensors="pt")
71
+ if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
72
+ input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
73
+ gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
74
+ input_ids = input_ids.to(model.device)
75
+
76
+ streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
77
+ generate_kwargs = dict(
78
+ {"input_ids": input_ids},
79
+ streamer=streamer,
80
+ max_new_tokens=max_new_tokens,
81
+ do_sample=True,
82
+ top_p=top_p,
83
+ top_k=top_k,
84
+ temperature=temperature,
85
+ num_beams=1,
86
+ repetition_penalty=repetition_penalty,
87
+ pad_token_id=2,
88
+ eos_token_id=2,
89
+ )
90
+ t = Thread(target=model.generate, kwargs=generate_kwargs )
91
+ t.start()
92
+
93
+ outputs = []
94
+ for text in streamer:
95
+ outputs.append(text)
96
+ yield "".join(outputs)
97
+
98
+
99
+ chat_interface = gr.ChatInterface(
100
+ fn=generate,
101
+ additional_inputs=[
102
+ gr.Textbox(label="System prompt", lines=6),
103
+ gr.Slider(
104
+ label="Max new tokens",
105
+ minimum=1,
106
+ maximum=MAX_MAX_NEW_TOKENS,
107
+ step=1,
108
+ value=DEFAULT_MAX_NEW_TOKENS,
109
+ ),
110
+ gr.Slider(
111
+ label="Temperature",
112
+ minimum=0.1,
113
+ maximum=4.0,
114
+ step=0.1,
115
+ value=0.6,
116
+ ),
117
+ gr.Slider(
118
+ label="Top-p (nucleus sampling)",
119
+ minimum=0.05,
120
+ maximum=1.0,
121
+ step=0.05,
122
+ value=0.9,
123
+ ),
124
+ gr.Slider(
125
+ label="Top-k",
126
+ minimum=1,
127
+ maximum=1000,
128
+ step=1,
129
+ value=50,
130
+ ),
131
+ gr.Slider(
132
+ label="Repetition penalty",
133
+ minimum=1.0,
134
+ maximum=2.0,
135
+ step=0.05,
136
+ value=1.2,
137
+ ),
138
+ ],
139
+ stop_btn=None,
140
+ examples=[
141
+ ["What is TaoScience ?"],
142
+ ["TaoScience was written by ?"],
143
+ ["Tell me more about TaoScience"]],
144
+ )
145
+
146
+ with gr.Blocks() as demo:
147
+ gr.HTML(description)
148
+ with gr.Column():
149
+ with gr.Accordion(label="Visualise Training Data"):
150
+ gr.HTML(NOMIC)
151
+ chat_interface.render()
152
+ gr.Markdown("The model is prone to Hallucination and many not always be Factual")
153
+
154
+ if __name__ == "__main__":
155
+ demo.queue(max_size=20).launch(share=True)