|
import torch |
|
|
|
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer |
|
|
|
|
|
model_name = "Crystalcareai/Quiet-Star-Custom" |
|
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, ignore_mismatched_sizes=True) |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
|
|
|
model.tokenizer = tokenizer |
|
|
|
|
|
prompt_template = "[INST] {prompt} [/INST]" |
|
prompt = "This is a reasoning problem. You're standing on the surface of the Earth. " \ |
|
"You walk one mile south, one mile west and one mile north. " \ |
|
"You end up exactly where you started. Where are EXACTLY on earth you?" |
|
|
|
input_text = prompt |
|
input_ids = tokenizer.encode(input_text, return_tensors="pt").to(device) |
|
attention_mask = torch.ones_like(input_ids).to(device) |
|
|
|
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) |
|
|
|
tokens = tokenizer( |
|
prompt_template.format(prompt=prompt), |
|
return_tensors='pt' |
|
).input_ids.cuda() |
|
|
|
|
|
with torch.no_grad(): |
|
generated_outputs = model.generate( |
|
input_ids=input_ids, |
|
attention_mask=attention_mask, |
|
max_length=1024, |
|
num_return_sequences=1, |
|
no_repeat_ngram_size=2, |
|
early_stopping=True, |
|
use_cache=True, |
|
num_beams=1, |
|
temperature=0.2, |
|
repetition_penalty=1.2, |
|
length_penalty=1.0, |
|
pad_token_id=tokenizer.eos_token_id, |
|
eos_token_id=tokenizer.eos_token_id, |
|
output_attentions=False, |
|
output_hidden_states=False, |
|
return_dict_in_generate=True, |
|
streamer=streamer, |
|
) |
|
|
|
|
|
generated_text = tokenizer.decode(generated_outputs.sequences[0], skip_special_tokens=True) |
|
|
|
|
|
print("Generated output:") |
|
print(generated_text) |