Quiet-Star-Custom / inference.py
Crystalcareai's picture
Update inference.py
9a4caac verified
raw
history blame
1.89 kB
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
# Load the pre-trained model and tokenizer
model_name = "Crystalcareai/Quiet-Star-Custom"
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, ignore_mismatched_sizes=True)
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Set the tokenizer in the model
model.tokenizer = tokenizer
prompt_template = "[INST] {prompt} [/INST]"
prompt = "This is a reasoning problem. You're standing on the surface of the Earth. " \
"You walk one mile south, one mile west and one mile north. " \
"You end up exactly where you started. Where are EXACTLY on earth you?"
input_text = prompt
input_ids = tokenizer.encode(input_text, return_tensors="pt").to(device)
attention_mask = torch.ones_like(input_ids).to(device)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
tokens = tokenizer(
prompt_template.format(prompt=prompt),
return_tensors='pt'
).input_ids.cuda()
# Generate the output using the generate method
with torch.no_grad():
generated_outputs = model.generate(
input_ids=input_ids,
attention_mask=attention_mask,
max_length=1024,
num_return_sequences=1,
no_repeat_ngram_size=2,
early_stopping=True,
use_cache=True,
num_beams=1,
temperature=0.2,
repetition_penalty=1.2,
length_penalty=1.0,
pad_token_id=tokenizer.eos_token_id,
eos_token_id=tokenizer.eos_token_id,
output_attentions=False,
output_hidden_states=False,
return_dict_in_generate=True,
streamer=streamer,
)
# Decode the generated output
generated_text = tokenizer.decode(generated_outputs.sequences[0], skip_special_tokens=True)
# Print the generated output
print("Generated output:")
print(generated_text)