avoroshilov
/

DeepSeek-R1-Distill-Qwen-14B-GPTQ_4bit-128g

Text Generation

text-generation-inference

Inference Endpoints

4-bit precision

Model card Files Files and versions Community

avoroshilov commited on 14 days ago

Commit

31c9d54

·

verified ·

1 Parent(s): ba6b752

Update README.md

Files changed (1) hide show

README.md +1 -1

README.md CHANGED Viewed

@@ -29,7 +29,7 @@ model_name = "avoroshilov/DeepSeek-R1-Distill-Qwen-14B-GPTQ_4bit-128g"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 quantized_model = AutoModelForCausalLM.from_pretrained(model_name, device_map='cuda')
-chat = [{"role": "user", "content": "Why is the grass green?"},]
 question_tokens = tokenizer.apply_chat_template(chat, add_generation_prompt=True, return_tensors="pt").to(quantized_model.device)
 answer_tokens = quantized_model.generate(question_tokens, generation_config=GenerationConfig(max_length=2048, ))[0]

 tokenizer = AutoTokenizer.from_pretrained(model_name)
 quantized_model = AutoModelForCausalLM.from_pretrained(model_name, device_map='cuda')
+chat = [{"role": "user", "content": "Why is grass green?"},]
 question_tokens = tokenizer.apply_chat_template(chat, add_generation_prompt=True, return_tensors="pt").to(quantized_model.device)
 answer_tokens = quantized_model.generate(question_tokens, generation_config=GenerationConfig(max_length=2048, ))[0]