Update README.md
Browse files
README.md
CHANGED
@@ -89,14 +89,36 @@ prompt = f"System: {system_messages[lang_code]}\nUser: {user}\nAssistant:<s>"
|
|
89 |
import torch
|
90 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
91 |
|
92 |
-
|
93 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
```
|
101 |
|
102 |
This example demonstrates how to load the model and tokenizer, prepare input, generate text, and print the result.
|
|
|
89 |
import torch
|
90 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
91 |
|
92 |
+
|
93 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
94 |
+
|
95 |
+
model_name = "openGPT-X/Teuken-7B-instruct-v0.4"
|
96 |
+
model = AutoModelForCausalLM.from_pretrained(
|
97 |
+
model_name,
|
98 |
+
trust_remote_code=True,
|
99 |
+
torch_dtype=torch.bfloat16,
|
100 |
+
attn_implementation="flash_attention_2",
|
101 |
+
)
|
102 |
+
model = model.to(device).eval()
|
103 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
104 |
+
model_name,
|
105 |
+
use_fast=False,
|
106 |
+
trust_remote_code=True,
|
107 |
+
)
|
108 |
+
|
109 |
+
messages = [{"role": "User", "content": "Wer bist du?"}]
|
110 |
+
prompt_ids = tokenizer.apply_chat_template(messages, chat_template="DE", tokenize=True, add_generation_prompt=True, return_tensors="pt")
|
111 |
+
prediction = model.generate(
|
112 |
+
prompt_ids.to(model.device),
|
113 |
+
max_length=512,
|
114 |
+
do_sample=True,
|
115 |
+
top_k=50,
|
116 |
+
top_p=0.95,
|
117 |
+
temperature=0.7,
|
118 |
+
num_return_sequences=1,
|
119 |
+
)
|
120 |
+
prediction_text = tokenizer.decode(prediction[0])
|
121 |
+
print(prediction_text)
|
122 |
```
|
123 |
|
124 |
This example demonstrates how to load the model and tokenizer, prepare input, generate text, and print the result.
|