Paulie-Aditya commited on
Commit
9926c93
·
1 Parent(s): efaba2f

updated model

Browse files
Files changed (2) hide show
  1. app.py +26 -51
  2. requirements.txt +4 -0
app.py CHANGED
@@ -1,51 +1,39 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
- from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 
4
 
5
- """
6
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
7
- """
8
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
9
 
10
  class Assistant:
11
- def __init__(self, model_name="ContactDoctor/Bio-Medical-Llama-3-8B"):
12
- self.tokenizer = AutoTokenizer.from_pretrained(model_name)
13
- self.model = AutoModelForCausalLM.from_pretrained(model_name)
14
- self.pipe = pipeline("text-generation", model="ContactDoctor/Bio-Medical-Llama-3-8B")
 
 
 
15
 
16
 
17
  def respond(
18
  self,
19
- message,
20
- history: list[tuple[str, str]],
21
- system_message,
22
- max_tokens,
23
- temperature,
24
- top_p,
25
  ):
26
- messages = [{"role": "system", "content": system_message}]
27
-
28
- for val in history:
29
- if val[0]:
30
- messages.append({"role": "user", "content": val[0]})
31
- if val[1]:
32
- messages.append({"role": "assistant", "content": val[1]})
33
-
34
- messages.append({"role": "user", "content": message})
35
-
36
- response = ""
37
-
38
- for message in client.chat_completion(
39
- messages,
40
- max_tokens=max_tokens,
41
- stream=True,
42
- temperature=temperature,
43
- top_p=top_p,
44
- ):
45
- token = message.choices[0].delta.content
46
-
47
- response += token
48
- yield response
49
 
50
 
51
  """
@@ -54,22 +42,9 @@ For information on how to customize the ChatInterface, peruse the gradio docs: h
54
  assistant = Assistant()
55
 
56
  demo = gr.ChatInterface(
57
- assistant.respond,
58
- additional_inputs=[
59
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
60
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
61
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
62
- gr.Slider(
63
- minimum=0.1,
64
- maximum=1.0,
65
- value=0.95,
66
- step=0.05,
67
- label="Top-p (nucleus sampling)",
68
- ),
69
- ],
70
  )
71
 
72
 
73
  if __name__ == "__main__":
74
- # Assistant()
75
  demo.launch()
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
4
+ import torch
5
 
 
 
 
6
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
7
 
8
  class Assistant:
9
+ def __init__(self):
10
+ model_name = "ruslanmv/Medical-Llama3-8B"
11
+ device_map = 'auto'
12
+ bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4",bnb_4bit_compute_dtype=torch.float16,)
13
+ self.model = AutoModelForCausalLM.from_pretrained( model_name,quantization_config=bnb_config, trust_remote_code=True,use_cache=False,device_map=device_map)
14
+ self.tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
15
+ self.tokenizer.pad_token = self.tokenizer.eos_token
16
 
17
 
18
  def respond(
19
  self,
20
+ message
 
 
 
 
 
21
  ):
22
+ sys_message = '''
23
+ You are an AI Medical Assistant trained on a vast dataset of health information. Please be thorough and
24
+ provide an informative answer. If you don't know the answer to a specific medical inquiry, advise seeking professional help.
25
+ '''
26
+ messages = [{"role": "system", "content": sys_message}, {"role": "user", "content": question}]
27
+
28
+ # Applying chat template
29
+ prompt = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
30
+ inputs = self.tokenizer(prompt, return_tensors="pt").to("cuda")
31
+ outputs = self.model.generate(**inputs, max_new_tokens=100, use_cache=True)
32
+
33
+ # Extract and return the generated text, removing the prompt
34
+ response_text = self.tokenizer.batch_decode(outputs)[0].strip()
35
+ answer = response_text.split('<|im_start|>assistant')[-1].strip()
36
+ return answer
 
 
 
 
 
 
 
 
37
 
38
 
39
  """
 
42
  assistant = Assistant()
43
 
44
  demo = gr.ChatInterface(
45
+ assistant.respond
 
 
 
 
 
 
 
 
 
 
 
 
46
  )
47
 
48
 
49
  if __name__ == "__main__":
 
50
  demo.launch()
requirements.txt CHANGED
@@ -20,6 +20,8 @@ Jinja2==3.1.5
20
  markdown-it-py==3.0.0
21
  MarkupSafe==2.1.5
22
  mdurl==0.1.2
 
 
23
  numpy==2.2.1
24
  orjson==3.10.13
25
  packaging==24.2
@@ -44,8 +46,10 @@ shellingham==1.5.4
44
  six==1.17.0
45
  sniffio==1.3.1
46
  starlette==0.41.3
 
47
  tokenizers==0.21.0
48
  tomlkit==0.13.2
 
49
  tqdm==4.67.1
50
  transformers==4.47.1
51
  typer==0.15.1
 
20
  markdown-it-py==3.0.0
21
  MarkupSafe==2.1.5
22
  mdurl==0.1.2
23
+ mpmath==1.3.0
24
+ networkx==3.4.2
25
  numpy==2.2.1
26
  orjson==3.10.13
27
  packaging==24.2
 
46
  six==1.17.0
47
  sniffio==1.3.1
48
  starlette==0.41.3
49
+ sympy==1.13.1
50
  tokenizers==0.21.0
51
  tomlkit==0.13.2
52
+ torch==2.5.1
53
  tqdm==4.67.1
54
  transformers==4.47.1
55
  typer==0.15.1