|
import torch |
|
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer |
|
from accelerate import infer_auto_device_map, init_empty_weights, dispatch_model |
|
|
|
model_path = "Crystalcareai/Quiet-Star-Custom" |
|
|
|
n_ahead = 8 |
|
n_ahead_talk = 4 |
|
merged_talk_heads = True |
|
|
|
model = AutoModelForCausalLM.from_pretrained(model_path, |
|
max_thoughts=n_ahead + n_ahead_talk + 1, |
|
merged_talk_heads=merged_talk_heads, |
|
merged_lm_and_talk_heads=False, |
|
merged_lm_and_think_heads=True, |
|
use_concat_talk_head=True, |
|
use_shallow_think=True, |
|
use_shallow_talk=False, |
|
use_complex_think_head=False, |
|
use_complex_talk_head=True, |
|
use_weighted_talk_head=True, |
|
trust_remote_code=True, |
|
torch_dtype=torch.bfloat16, |
|
device_map="auto", |
|
) |
|
|
|
model.eval() |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_path) |
|
model.tokenizer = tokenizer |
|
|
|
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=False) |
|
|
|
|
|
prompt_template = "[INST] {prompt} [/INST]" |
|
prompt = "It is not always easy to see who is related to whom -- and in which ways. The following argument pertains to this question: To begin with, Lesley is a close friend of Fernando. Moreover, being a close friend of Fernando or a schoolmate of Lowell is sufficient for being a great-grandfather of Leroy. It follows that Lesley is a great-grandfather of Leroy. Is the argument, given the explicitly stated premises, deductively valid or invalid?" |
|
|
|
input_ids = tokenizer( |
|
prompt_template.format(prompt=prompt), |
|
return_tensors='pt' |
|
).input_ids.to(model.device) |
|
|
|
attention_mask = torch.ones_like(input_ids) |
|
|
|
max_length = 1024 |
|
|
|
output_ids, _ = model.generate(input_ids, attention_mask=attention_mask, max_length=max_length, streamer=streamer) |
|
|
|
print(tokenizer.decode(output_ids[0], skip_special_tokens=False)) |