its-magick
/

hymba-1.5b

Model card Files Files and versions Community

marks commited on Nov 26, 2024

Commit

1a61dc5

·

1 Parent(s): bb9a4a3

Added Docker file and chat

Files changed (2) hide show

Dockerfile +4 -0
chat.py +33 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,4 @@

+FROM nvidia/cuda:12.6.2-cudnn-devel-ubuntu22.04
+RUN wget --header="Authorization: Bearer YOUR_HF_TOKEN" https://huggingface.co/nvidia/Hymba-1.5B-Base/resolve/main/setup.sh
+RUN bash setup.sh
+CMD [ "python", "chat.py" ]

chat.py ADDED Viewed

	@@ -0,0 +1,33 @@

+from transformers import AutoModelForCausalLM, AutoTokenizer, StopStringCriteria, StoppingCriteriaList
+import torch
+# Load the tokenizer and model
+repo_name = "nvidia/Hymba-1.5B-Instruct"
+tokenizer = AutoTokenizer.from_pretrained(repo_name, trust_remote_code=True)
+model = AutoModelForCausalLM.from_pretrained(repo_name, trust_remote_code=True)
+model = model.cuda().to(torch.bfloat16)
+# Chat with Hymba
+prompt = input()
+messages = [
+    {"role": "system", "content": "You are a helpful assistant."}
+]
+messages.append({"role": "user", "content": prompt})
+# Apply chat template
+tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to('cuda')
+stopping_criteria = StoppingCriteriaList([StopStringCriteria(tokenizer=tokenizer, stop_strings="</s>")])
+outputs = model.generate(
+    tokenized_chat,
+    max_new_tokens=256,
+    do_sample=False,
+    temperature=0.7,
+    use_cache=True,
+    stopping_criteria=stopping_criteria
+)
+input_length = tokenized_chat.shape[1]
+response = tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)
+print(f"Model response: {response}")